## ----bioconductor, eval=FALSE------------------------------------------------- # # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # The following initializes usage of Bioc devel # BiocManager::install(version='devel') # # BiocManager::install("multiGSEA") # ## ----devtools, eval=FALSE----------------------------------------------------- # # install.packages("devtools") # devtools::install_github("https://github.com/yigbt/multiGSEA") # ## ----load_multiGSEA, eval=FALSE----------------------------------------------- # library(multiGSEA) ## ----load_mapping_library, results='hide', warning=FALSE, message=FALSE------- library( "org.Hs.eg.db") ## ----organismsTable, results='asis', echo=FALSE------------------------------- caption <- "Supported organisms, their abbreviations being used in `multiGSEA`, and mapping database that are needed for feature conversion. Supported abbreviations can be seen with `getOrganisms()`" df <- data.frame( Organisms = c( "Human", "Mouse", "Rat", "Dog", "Cow", "Pig", "Chicken", "Zebrafish", "Frog", "Fruit Fly", "C.elegans"), Abbreviations = c( "hsapiens", "mmusculus", "rnorvegicus", "cfamiliaris", "btaurus", "sscrofa", "ggallus", "drerio", "xlaevis", "dmelanogaster", "celegans"), Mapping = c( "org.Hs.eg.db", "org.Mm.eg.db", "org.Rn.eg.db", "org.Cf.eg.db", "org.Bt.eg.db", "org.Ss.eg.db", "org.Gg.eg.db", "org.Xl.eg.db", "org.Dr.eg.db", "org.Dm.eg.db", "org.Ce.eg.db")) knitr::kable( df, caption = caption) ## ----load_multiGSEA_package, results='hide', message=FALSE, warning=FALSE----- library( multiGSEA) library( magrittr) ## ----load_omics_data---------------------------------------------------------- # load transcriptomic features data( transcriptome) # load proteomic features data( proteome) # load metabolomic features data( metabolome) ## ----omicsTable, results='asis', echo=FALSE----------------------------------- caption <- "Structure of the necessary omics data. For each layer (here: transcriptome), feature IDs, log2FCs, and p-values are needed." knitr::kable( transcriptome %>% dplyr::arrange( Symbol) %>% dplyr::slice( 1:6), caption = caption ) ## ----rank_features, results='hide'-------------------------------------------- # create data structure omics_data <- initOmicsDataStructure( layer = c("transcriptome", "proteome", "metabolome")) ## add transcriptome layer omics_data$transcriptome <- rankFeatures( transcriptome$logFC, transcriptome$pValue) names( omics_data$transcriptome) <- transcriptome$Symbol ## add proteome layer omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) names( omics_data$proteome) <- proteome$Symbol ## add metabolome layer ## HMDB features have to be updated to the new HMDB format omics_data$metabolome <- rankFeatures(metabolome$logFC, metabolome$pValue) names( omics_data$metabolome) <- metabolome$HMDB names( omics_data$metabolome) <- gsub( "HMDB", "HMDB00", names( omics_data$metabolome)) ## ----omics_short-------------------------------------------------------------- omics_short <- lapply( names( omics_data), function( name){ head( omics_data[[name]]) }) names( omics_short) <- names( omics_data) omics_short ## ----calculate_enrichment, results='hide', message=FALSE, warning=FALSE------- databases <- c( "kegg", "reactome") layers <- names( omics_data) pathways <- getMultiOmicsFeatures( dbs = databases, layer = layers, returnTranscriptome = "SYMBOL", returnProteome = "SYMBOL", returnMetabolome = "HMDB", useLocal = FALSE) ## ----pathways_short----------------------------------------------------------- pathways_short <- lapply( names( pathways), function( name){ head( pathways[[name]], 2) }) names( pathways_short) <- names( pathways) pathways_short ## ----run_enrichment, results='hide', message=FALSE, warning=FALSE------------- # use the multiGSEA function to calculate the enrichment scores # for all omics layer at once. enrichment_scores <- multiGSEA( pathways, omics_data) ## ----combine_pvalues---------------------------------------------------------- df <- extractPvalues( enrichmentScores = enrichment_scores, pathwayNames = names( pathways[[1]])) df$combined_pval <- combinePvalues( df) df$combined_padj <- p.adjust( df$combined_pval, method = "BH") df <- cbind( data.frame( pathway = names( pathways[[1]])), df) ## ----resultTable, results='asis', echo=FALSE---------------------------------- caption <- "Table summarizing the top 15 pathways where we can calculate an enrichment for all three layers . Pathways from KEGG, Reactome, and BioCarta are listed based on their aggregated adjusted p-value. Corrected p-values are displayed for each omics layer separately and aggregated by means of the Stouffer's Z-method." knitr::kable( df %>% dplyr::arrange( combined_padj) %>% dplyr::filter( !is.na(metabolome_padj) ) %>% dplyr::select( c( pathway, transcriptome_padj, proteome_padj, metabolome_padj, combined_pval)) %>% dplyr::slice( 1:15), caption = caption ) ## ----session, echo=FALSE------------------------------------------------------ sessionInfo()