## ----setup, echo=FALSE, warning=FALSE-----------------------------------------
set.seed(42)
# knitr::opts_chunk$set(comment=NA,
#                fig.align="center",
#                fig.width = 7,
#                fig.height = 7,
#                warning=FALSE)

## ----installation, eval=FALSE-------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("ideal")

## ----loadlibrary, message=FALSE-----------------------------------------------
library("ideal")

## ----citation-----------------------------------------------------------------
citation("ideal")

## ----using--------------------------------------------------------------------
library("ideal")

## ----installairway, eval=FALSE------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("airway")

## ----loadairway, message=FALSE------------------------------------------------
library(airway)
library(DESeq2)

data(airway)

dds_airway <- DESeqDataSet(airway,design= ~ cell + dex)
dds_airway
# run deseq on it
dds_airway <- DESeq(dds_airway)
# extract the results
res_airway <- results(dds_airway, contrast = c("dex","trt","untrt"),alpha = 0.05)

## ----launchairway, eval=FALSE-------------------------------------------------
#  ideal(dds_obj = dds_airway)
#  # or also providing the results object
#  ideal(dds_obj = dds_airway,res_obj = res_airway)

## ----annoairway, message = FALSE----------------------------------------------
library(org.Hs.eg.db)
genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL")
annotation_airway <- data.frame(gene_id = rownames(dds_airway),
                                gene_name = genenames_airway,
                                row.names = rownames(dds_airway),
                                stringsAsFactors = FALSE)
head(annotation_airway)                                

## ----launchairwayanno, eval=FALSE---------------------------------------------
#  ideal(dds_obj = dds_airway,
#        annotation_obj = annotation_airway)

## ----fromedgerandlimma--------------------------------------------------------
library(DEFormats)
library(edgeR)
library(limma)
dge_airway <- as.DGEList(dds_airway) # this is your initial object
# your factors for the design:
dex <- colData(dds_airway)$dex
cell <- colData(dds_airway)$cell

redo_dds_airway <- as.DESeqDataSet(dge_airway)
# force the design to ~cell + dex
design(redo_dds_airway) <- ~cell+group  #TODO: this is due to the not 100% re-conversion via DEFormats 

### with edgeR
y <- calcNormFactors(dge_airway)
design <- model.matrix(~ cell + dex)
y <- estimateDisp(y,design)
# If you performed quasi-likelihood F-tests
fit <- glmQLFit(y,design)
qlf <- glmQLFTest(fit) # contrast for dexamethasone treatment
topTags(qlf)
# If you performed likelihood ratio tests
fit <- glmFit(y,design)
lrt <- glmLRT(fit)
topTags(lrt)

# lrt to DESeqResults

tbledger <- lrt$table
colnames(tbledger)[colnames(tbledger) == 'PValue'] <- 'pvalue'
colnames(tbledger)[colnames(tbledger) == 'logFC'] <- 'log2FoldChange'
colnames(tbledger)[colnames(tbledger) == 'logCPM'] <- 'baseMean'
# get from the logcpm to something more the baseMean for better 
tbledger$baseMean <- (2^tbledger$baseMean) * mean(dge_airway$samples$lib.size) / 1e6
# use the constructor for DESeqResults 
edger_resu <- DESeqResults(DataFrame(tbledger))
edger_resu <- DESeq2:::pvalueAdjustment(edger_resu, independentFiltering = TRUE, 
                                        alpha = 0.05, pAdjustMethod = "BH")
                                      
# cfr with res_airway
# plot(res_airway$pvalue,edger_resu$pvalue)


### with limma-voom
x <- calcNormFactors(dge_airway)
design <- model.matrix(~0+dex+cell)
contr.matrix <- makeContrasts(dextrt - dexuntrt,levels=colnames(design))
v <- voom(x, design)
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)

tbllimma <- topTable(efit,number= Inf, sort.by = "none")
colnames(tbllimma)[colnames(tbllimma) == 'P.Value'] <- 'pvalue'
colnames(tbllimma)[colnames(tbllimma) == 'logFC'] <- 'log2FoldChange'
colnames(tbllimma)[colnames(tbllimma) == 'AveExpr'] <- 'baseMean'
colnames(tbllimma)[colnames(tbllimma) == 'adj.P.Val'] <- 'padj'
# get from the logcpm to something more the baseMean for better 
tbllimma$baseMean <- (2^tbllimma$baseMean) * mean(dge_airway$samples$lib.size) / 1e6
# use the constructor for DESeqResults 
limma_resu <- DESeqResults(DataFrame(tbllimma))

# cfr with res_airway
# plot(res_airway$pvalue,limma_resu$pvalue)

## ----ideal-edgerlimma, eval=FALSE---------------------------------------------
#  ideal(redo_dds_airway,edger_resu)
#  # or ...
#  ideal(redo_dds_airway,limma_resu)

## ----res2tbl------------------------------------------------------------------
summary(res_airway)
res_airway

head(deseqresult2tbl(res_airway))

## ----res2de-------------------------------------------------------------------
head(deseqresult2DEgenes(res_airway,FDR = 0.05))
# the output in the first lines is the same, but
dim(res_airway)
dim(deseqresult2DEgenes(res_airway))

## ----res-enhance--------------------------------------------------------------
myde <- deseqresult2DEgenes(res_airway,FDR = 0.05)
myde$symbol <- mapIds(org.Hs.eg.db,keys = as.character(myde$id),column = "SYMBOL",keytype="ENSEMBL")

myde_enhanced <- myde
myde_enhanced$id <- ideal:::createLinkENS(myde_enhanced$id,species = "Homo_sapiens")
myde_enhanced$symbol <- ideal:::createLinkGeneSymbol(myde_enhanced$symbol)
DT::datatable(myde_enhanced[1:100,], escape = FALSE)

## ----ggpc1--------------------------------------------------------------------
ggplotCounts(dds = dds_airway,
             gene = "ENSG00000103196", # CRISPLD2 in the original publication
             intgroup = "dex")

## ----ggpc2--------------------------------------------------------------------
ggplotCounts(dds = dds_airway,
             gene = "ENSG00000103196", # CRISPLD2 in the original publication
             intgroup = "dex",
             annotation_obj = annotation_airway)

## ----goseq, eval=FALSE--------------------------------------------------------
#  res_subset <- deseqresult2DEgenes(res_airway)[1:100,] # taking only a subset of the DE genes
#  myde <- res_subset$id
#  myassayed <- rownames(res_airway)
#  mygo <- goseqTable(de.genes = myde,
#                     assayed.genes = myassayed,
#                     genome = "hg38",
#                     id = "ensGene",
#                     testCats = "GO:BP",
#                     addGeneToTerms = FALSE)
#  head(mygo)

## ----go-enhance, eval=FALSE---------------------------------------------------
#  mygo_enhanced <- mygo
#  # using the unexported function to link the GO term to the entry in the AmiGO db
#  mygo_enhanced$category <- ideal:::createLinkGO(mygo_enhanced$category)
#  DT::datatable(mygo_enhanced,escape=FALSE)

## ----plotma, eval=FALSE-------------------------------------------------------
#  plot_ma(res_airway, FDR = 0.05, hlines = 1,
#          title ="Adding horizontal lines", add_rug = FALSE)
#  plot_ma(res_airway, FDR = 0.1,
#          intgenes = c("ENSG00000103196",  # CRISPLD2
#                       "ENSG00000120129",  # DUSP1
#                       "ENSG00000163884",  # KLF15
#                       "ENSG00000179094"), # PER1
#          title = "Providing a shortlist of genes",
#          add_rug = FALSE
#         )

## ----plotma2------------------------------------------------------------------
res_airway2 <- res_airway
res_airway2$symbol <- mapIds(org.Hs.eg.db,keys = rownames(res_airway2),column = "SYMBOL",keytype="ENSEMBL")
plot_ma(res_airway2, FDR = 0.05,
        intgenes = c("CRISPLD2",  # CRISPLD2
                     "DUSP1",  # DUSP1
                     "KLF15",  # KLF15
                     "PER1"), # PER1
        annotation_obj = annotation_airway,
        hlines = 2,
        add_rug = FALSE,
        title = "Putting gene names..."
       )

## ----plotvolcano, eval = FALSE------------------------------------------------
#  plot_volcano(res_airway)

## ----plotvolcano2-------------------------------------------------------------
plot_volcano(res_airway2, FDR = 0.05,
        intgenes = c("CRISPLD2",  # CRISPLD2
                     "DUSP1",  # DUSP1
                     "KLF15",  # KLF15
                     "PER1"), # PER1
        title = "Selecting the handful of genes - and displaying the full range for the -log10(pvalue)",
        ylim_up = -log10(min(res_airway2$pvalue, na.rm =TRUE)))

## ----sepguess-----------------------------------------------------------------
sepguesser(system.file("extdata/design_commas.txt",package = "ideal"))
sepguesser(system.file("extdata/design_semicolons.txt",package = "ideal"))
sepguesser(system.file("extdata/design_spaces.txt",package = "ideal"))
anyfile <- system.file("extdata/design_tabs.txt",package = "ideal") # we know it is going to be TAB
guessed_sep <- sepguesser(anyfile)
guessed_sep
# to be used for reading in the same file, without having to specify the sep
read.delim(anyfile, header = TRUE, as.is = TRUE, 
           sep = guessed_sep, 
           quote = "", row.names = 1, check.names = FALSE)

## ----sessioninfo--------------------------------------------------------------
sessionInfo()