## ----setup, echo=FALSE, warning=FALSE----------------------------------------- set.seed(42) # knitr::opts_chunk$set(comment=NA, # fig.align="center", # fig.width = 7, # fig.height = 7, # warning=FALSE) ## ----installation, eval=FALSE------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("ideal") ## ----loadlibrary, message=FALSE----------------------------------------------- library("ideal") ## ----citation----------------------------------------------------------------- citation("ideal") ## ----using-------------------------------------------------------------------- library("ideal") ## ----installairway, eval=FALSE------------------------------------------------ # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("airway") ## ----loadairway, message=FALSE------------------------------------------------ library(airway) library(DESeq2) data(airway) dds_airway <- DESeqDataSet(airway,design= ~ cell + dex) dds_airway # run deseq on it dds_airway <- DESeq(dds_airway) # extract the results res_airway <- results(dds_airway, contrast = c("dex","trt","untrt"),alpha = 0.05) ## ----launchairway, eval=FALSE------------------------------------------------- # ideal(dds_obj = dds_airway) # # or also providing the results object # ideal(dds_obj = dds_airway,res_obj = res_airway) ## ----annoairway, message = FALSE---------------------------------------------- library(org.Hs.eg.db) genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL") annotation_airway <- data.frame(gene_id = rownames(dds_airway), gene_name = genenames_airway, row.names = rownames(dds_airway), stringsAsFactors = FALSE) head(annotation_airway) ## ----launchairwayanno, eval=FALSE--------------------------------------------- # ideal(dds_obj = dds_airway, # annotation_obj = annotation_airway) ## ----fromedgerandlimma-------------------------------------------------------- library(DEFormats) library(edgeR) library(limma) dge_airway <- as.DGEList(dds_airway) # this is your initial object # your factors for the design: dex <- colData(dds_airway)$dex cell <- colData(dds_airway)$cell redo_dds_airway <- as.DESeqDataSet(dge_airway) # force the design to ~cell + dex design(redo_dds_airway) <- ~cell+group #TODO: this is due to the not 100% re-conversion via DEFormats ### with edgeR y <- calcNormFactors(dge_airway) design <- model.matrix(~ cell + dex) y <- estimateDisp(y,design) # If you performed quasi-likelihood F-tests fit <- glmQLFit(y,design) qlf <- glmQLFTest(fit) # contrast for dexamethasone treatment topTags(qlf) # If you performed likelihood ratio tests fit <- glmFit(y,design) lrt <- glmLRT(fit) topTags(lrt) # lrt to DESeqResults tbledger <- lrt$table colnames(tbledger)[colnames(tbledger) == 'PValue'] <- 'pvalue' colnames(tbledger)[colnames(tbledger) == 'logFC'] <- 'log2FoldChange' colnames(tbledger)[colnames(tbledger) == 'logCPM'] <- 'baseMean' # get from the logcpm to something more the baseMean for better tbledger$baseMean <- (2^tbledger$baseMean) * mean(dge_airway$samples$lib.size) / 1e6 # use the constructor for DESeqResults edger_resu <- DESeqResults(DataFrame(tbledger)) edger_resu <- DESeq2:::pvalueAdjustment(edger_resu, independentFiltering = TRUE, alpha = 0.05, pAdjustMethod = "BH") # cfr with res_airway # plot(res_airway$pvalue,edger_resu$pvalue) ### with limma-voom x <- calcNormFactors(dge_airway) design <- model.matrix(~0+dex+cell) contr.matrix <- makeContrasts(dextrt - dexuntrt,levels=colnames(design)) v <- voom(x, design) vfit <- lmFit(v, design) vfit <- contrasts.fit(vfit, contrasts=contr.matrix) efit <- eBayes(vfit) tbllimma <- topTable(efit,number= Inf, sort.by = "none") colnames(tbllimma)[colnames(tbllimma) == 'P.Value'] <- 'pvalue' colnames(tbllimma)[colnames(tbllimma) == 'logFC'] <- 'log2FoldChange' colnames(tbllimma)[colnames(tbllimma) == 'AveExpr'] <- 'baseMean' colnames(tbllimma)[colnames(tbllimma) == 'adj.P.Val'] <- 'padj' # get from the logcpm to something more the baseMean for better tbllimma$baseMean <- (2^tbllimma$baseMean) * mean(dge_airway$samples$lib.size) / 1e6 # use the constructor for DESeqResults limma_resu <- DESeqResults(DataFrame(tbllimma)) # cfr with res_airway # plot(res_airway$pvalue,limma_resu$pvalue) ## ----ideal-edgerlimma, eval=FALSE--------------------------------------------- # ideal(redo_dds_airway,edger_resu) # # or ... # ideal(redo_dds_airway,limma_resu) ## ----res2tbl------------------------------------------------------------------ summary(res_airway) res_airway head(deseqresult2tbl(res_airway)) ## ----res2de------------------------------------------------------------------- head(deseqresult2DEgenes(res_airway,FDR = 0.05)) # the output in the first lines is the same, but dim(res_airway) dim(deseqresult2DEgenes(res_airway)) ## ----res-enhance-------------------------------------------------------------- myde <- deseqresult2DEgenes(res_airway,FDR = 0.05) myde$symbol <- mapIds(org.Hs.eg.db,keys = as.character(myde$id),column = "SYMBOL",keytype="ENSEMBL") myde_enhanced <- myde myde_enhanced$id <- ideal:::createLinkENS(myde_enhanced$id,species = "Homo_sapiens") myde_enhanced$symbol <- ideal:::createLinkGeneSymbol(myde_enhanced$symbol) DT::datatable(myde_enhanced[1:100,], escape = FALSE) ## ----ggpc1-------------------------------------------------------------------- ggplotCounts(dds = dds_airway, gene = "ENSG00000103196", # CRISPLD2 in the original publication intgroup = "dex") ## ----ggpc2-------------------------------------------------------------------- ggplotCounts(dds = dds_airway, gene = "ENSG00000103196", # CRISPLD2 in the original publication intgroup = "dex", annotation_obj = annotation_airway) ## ----goseq, eval=FALSE-------------------------------------------------------- # res_subset <- deseqresult2DEgenes(res_airway)[1:100,] # taking only a subset of the DE genes # myde <- res_subset$id # myassayed <- rownames(res_airway) # mygo <- goseqTable(de.genes = myde, # assayed.genes = myassayed, # genome = "hg38", # id = "ensGene", # testCats = "GO:BP", # addGeneToTerms = FALSE) # head(mygo) ## ----go-enhance, eval=FALSE--------------------------------------------------- # mygo_enhanced <- mygo # # using the unexported function to link the GO term to the entry in the AmiGO db # mygo_enhanced$category <- ideal:::createLinkGO(mygo_enhanced$category) # DT::datatable(mygo_enhanced,escape=FALSE) ## ----plotma, eval=FALSE------------------------------------------------------- # plot_ma(res_airway, FDR = 0.05, hlines = 1, # title ="Adding horizontal lines", add_rug = FALSE) # plot_ma(res_airway, FDR = 0.1, # intgenes = c("ENSG00000103196", # CRISPLD2 # "ENSG00000120129", # DUSP1 # "ENSG00000163884", # KLF15 # "ENSG00000179094"), # PER1 # title = "Providing a shortlist of genes", # add_rug = FALSE # ) ## ----plotma2------------------------------------------------------------------ res_airway2 <- res_airway res_airway2$symbol <- mapIds(org.Hs.eg.db,keys = rownames(res_airway2),column = "SYMBOL",keytype="ENSEMBL") plot_ma(res_airway2, FDR = 0.05, intgenes = c("CRISPLD2", # CRISPLD2 "DUSP1", # DUSP1 "KLF15", # KLF15 "PER1"), # PER1 annotation_obj = annotation_airway, hlines = 2, add_rug = FALSE, title = "Putting gene names..." ) ## ----plotvolcano, eval = FALSE------------------------------------------------ # plot_volcano(res_airway) ## ----plotvolcano2------------------------------------------------------------- plot_volcano(res_airway2, FDR = 0.05, intgenes = c("CRISPLD2", # CRISPLD2 "DUSP1", # DUSP1 "KLF15", # KLF15 "PER1"), # PER1 title = "Selecting the handful of genes - and displaying the full range for the -log10(pvalue)", ylim_up = -log10(min(res_airway2$pvalue, na.rm =TRUE))) ## ----sepguess----------------------------------------------------------------- sepguesser(system.file("extdata/design_commas.txt",package = "ideal")) sepguesser(system.file("extdata/design_semicolons.txt",package = "ideal")) sepguesser(system.file("extdata/design_spaces.txt",package = "ideal")) anyfile <- system.file("extdata/design_tabs.txt",package = "ideal") # we know it is going to be TAB guessed_sep <- sepguesser(anyfile) guessed_sep # to be used for reading in the same file, without having to specify the sep read.delim(anyfile, header = TRUE, as.is = TRUE, sep = guessed_sep, quote = "", row.names = 1, check.names = FALSE) ## ----sessioninfo-------------------------------------------------------------- sessionInfo()