## ----style, echo = FALSE, results = 'asis'--------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE, warning=FALSE-------------------------------------- library(knitr) set.seed(42) opts_chunk$set(comment=NA, fig.align="center", warning=FALSE) ## ----installation, eval=FALSE---------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("pcaExplorer") ## ----installation-github, eval=FALSE--------------------------------------- # library("devtools") # install_github("federicomarini/pcaExplorer") ## ----loadlibrary, message=FALSE-------------------------------------------- library("pcaExplorer") ## -------------------------------------------------------------------------- citation("pcaExplorer") ## ----installairway, eval=FALSE--------------------------------------------- # source("https://bioconductor.org/biocLite.R") # biocLite("airway") ## ----loadairway, message=FALSE--------------------------------------------- library(airway) library(DESeq2) data(airway) dds_airway <- DESeqDataSet(airway,design= ~ cell + dex) dds_airway rld_airway <- rlogTransformation(dds_airway) rld_airway ## ----launchairway, eval=FALSE---------------------------------------------- # pcaExplorer(dds = dds_airway, # rlt = rld_airway) ## ----annoairway, message = FALSE------------------------------------------- library(org.Hs.eg.db) genenames_airway <- mapIds(org.Hs.eg.db,keys = rownames(dds_airway),column = "SYMBOL",keytype="ENSEMBL") annotation_airway <- data.frame(gene_name = genenames_airway, row.names = rownames(dds_airway), stringsAsFactors = FALSE) head(annotation_airway) ## ----annofuncs, eval=FALSE------------------------------------------------- # # anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, # orgdb_species = "org.Hs.eg.db", # idtype = "ENSEMBL") # # anno_df_biomart <- get_annotation(dds = dds_airway, # biomart_dataset = "hsapiens_gene_ensembl", # idtype = "ensembl_gene_id") # ## ----echo=FALSE------------------------------------------------------------ anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, orgdb_species = "org.Hs.eg.db", idtype = "ENSEMBL") ## -------------------------------------------------------------------------- head(anno_df_orgdb) ## ----launchairwayanno, eval=FALSE------------------------------------------ # pcaExplorer(dds = dds_airway, # rlt = rld_airway, # annotation = annotation_airway) # or anno_df_orgdb, or anno_df_biomart ## ----ddsmultifac----------------------------------------------------------- dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1) ## ----prepsimul------------------------------------------------------------- dds_multifac <- makeExampleDESeqDataSet_multifac(betaSD_condition = 1,betaSD_tissue = 3) dds_multifac rld_multifac <- rlogTransformation(dds_multifac) rld_multifac ## checking how the samples cluster on the PCA plot pcaplot(rld_multifac,intgroup = c("condition","tissue")) ## ----launchsimul, eval=FALSE----------------------------------------------- # pcaExplorer(dds = dds_multifac, # rlt = rld_multifac) ## ----func-pcaplot---------------------------------------------------------- pcaplot(rld_airway,intgroup = c("cell","dex"),ntop = 1000, pcX = 1, pcY = 2, title = "airway dataset PCA on samples - PC1 vs PC2") # on a different set of principal components... pcaplot(rld_airway,intgroup = c("dex"),ntop = 1000, pcX = 1, pcY = 4, title = "airway dataset PCA on samples - PC1 vs PC4", ellipse = TRUE) ## ----func-pcaplot3d, eval=FALSE-------------------------------------------- # pcaplot3d(rld_airway,intgroup = c("cell","dex"),ntop = 1000, # pcX = 1, pcY = 2, pcZ = 3) # # will open up in the viewer ## ----func-pcascree--------------------------------------------------------- pcaobj_airway <- prcomp(t(assay(rld_airway))) pcascree(pcaobj_airway,type="pev", title="Proportion of explained proportion of variance - airway dataset") ## ----func-correlatepcs----------------------------------------------------- res_pcairway <- correlatePCs(pcaobj_airway,colData(dds_airway)) res_pcairway plotPCcorrs(res_pcairway) ## ----func-hiloadings------------------------------------------------------- # extract the table of the genes with high loadings hi_loadings(pcaobj_airway,topN = 10,exprTable=counts(dds_airway)) # or alternatively plot the values hi_loadings(pcaobj_airway,topN = 10,annotation = annotation_airway) ## ----func-genespca--------------------------------------------------------- groups_airway <- colData(dds_airway)$dex cols_airway <- scales::hue_pal()(2)[groups_airway] # with many genes, do not plot the labels of the genes genespca(rld_airway,ntop=5000, choices = c(1,2), arrowColors=cols_airway,groupNames=groups_airway, alpha = 0.2, useRownamesAsLabels=FALSE, varname.size = 5 ) # with a smaller number of genes, plot gene names included in the annotation genespca(rld_airway,ntop=100, choices = c(1,2), arrowColors=cols_airway,groupNames=groups_airway, alpha = 0.7, varname.size = 5, annotation = annotation_airway ) ## ----func-topGOtable, eval=FALSE------------------------------------------- # # example not run due to quite long runtime # dds_airway <- DESeq(dds_airway) # res_airway <- results(dds_airway) # res_airway$symbol <- mapIds(org.Hs.eg.db, # keys=row.names(res_airway), # column="SYMBOL", # keytype="ENSEMBL", # multiVals="first") # res_airway$entrez <- mapIds(org.Hs.eg.db, # keys=row.names(res_airway), # column="ENTREZID", # keytype="ENSEMBL", # multiVals="first") # resOrdered <- as.data.frame(res_airway[order(res_airway$padj),]) # head(resOrdered) # # extract DE genes # de_df <- resOrdered[resOrdered$padj < .05 & !is.na(resOrdered$padj),] # de_symbols <- de_df$symbol # # extract background genes # bg_ids <- rownames(dds_airway)[rowSums(counts(dds_airway)) > 0] # bg_symbols <- mapIds(org.Hs.eg.db, # keys=bg_ids, # column="SYMBOL", # keytype="ENSEMBL", # multiVals="first") # # run the function # topgoDE_airway <- topGOtable(de_symbols, bg_symbols, # ontology = "BP", # mapping = "org.Hs.eg.db", # geneID = "symbol") ## ----func-pca2go, eval=FALSE----------------------------------------------- # pca2go_airway <- pca2go(rld_airway, # annotation = annotation_airway, # organism = "Hs", # ensToGeneSymbol = TRUE, # background_genes = bg_ids) # # for a smooth interactive exploration, use DT::datatable # datatable(pca2go_airway$PC1$posLoad) # # display it in the normal R session... # head(pca2go_airway$PC1$posLoad) # # ... or use it for running the app and display in the dedicated tab # pcaExplorer(dds_airway,rld_airway, # pca2go = pca2go_airway, # annotation = annotation_airway) # ## ----func, message = FALSE, eval = FALSE----------------------------------- # goquick_airway <- limmaquickpca2go(rld_airway, # pca_ngenes = 10000, # inputType = "ENSEMBL", # organism = "Hs") # # display it in the normal R session... # head(goquick_airway$PC1$posLoad) # # ... or use it for running the app and display in the dedicated tab # pcaExplorer(dds_airway,rld_airway, # pca2go = goquick_airway, # annotation = annotation_airway) # # ## ----func-makedataset------------------------------------------------------ dds_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 0.5) dds_simu dds2_simu <- makeExampleDESeqDataSet_multifac(betaSD_condition = 0.5,betaSD_tissue = 2) dds2_simu rld_simu <- rlogTransformation(dds_simu) rld2_simu <- rlogTransformation(dds2_simu) pcaplot(rld_simu,intgroup = c("condition","tissue")) + ggplot2::ggtitle("Simulated data - Big condition effect, small tissue effect") pcaplot(rld2_simu,intgroup = c("condition","tissue")) + ggplot2::ggtitle("Simulated data - Small condition effect, bigger tissue effect") ## ----eval=FALSE------------------------------------------------------------ # distro_expr(rld_airway,plot_type = "density") # distro_expr(rld_airway,plot_type = "violin") ## -------------------------------------------------------------------------- distro_expr(rld_airway,plot_type = "boxplot") ## -------------------------------------------------------------------------- dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3,betaSD_tissue = 1) rlt <- DESeq2::rlogTransformation(dds) set.seed(42) geneprofiler(rlt,paste0("gene",sample(1:1000,20)), plotZ = FALSE) ## ----eval=FALSE------------------------------------------------------------ # anno_df_biomart <- get_annotation(dds = dds_airway, # biomart_dataset = "hsapiens_gene_ensembl", # idtype = "ensembl_gene_id") # # anno_df_orgdb <- get_annotation_orgdb(dds = dds_airway, # orgdb_species = "org.Hs.eg.db", # idtype = "ENSEMBL") ## -------------------------------------------------------------------------- # use a subset of the counts to reduce plotting time, it can be time consuming with many samples pair_corr(counts(dds_airway)[1:100,]) ## -------------------------------------------------------------------------- sessionInfo()