## ----bioconductor, eval=FALSE-------------------------------------------------
#  
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  # The following initializes usage of Bioc devel
#  BiocManager::install(version='devel')
#  
#  BiocManager::install("multiGSEA")
#  

## ----devtools, eval=FALSE-----------------------------------------------------
#  
#  install.packages("devtools")
#  devtools::install_github("https://github.com/yigbt/multiGSEA")
#  

## ----load_multiGSEA, eval=FALSE-----------------------------------------------
#  library(multiGSEA)

## ----load_mapping_library, results='hide', warning=FALSE, message=FALSE-------
library( "org.Hs.eg.db")

## ----organismsTable, results='asis', echo=FALSE-------------------------------
caption <- "Supported organisms, their abbreviations being used in `multiGSEA`,
            and mapping database that are needed for feature conversion. 
            Supported abbreviations can be seen with `getOrganisms()`"
df <- data.frame( Organisms = c( "Human", "Mouse", "Rat", "Dog", "Cow", "Pig",
                                 "Chicken", "Zebrafish", "Frog", "Fruit Fly", 
                                 "C.elegans"),
                  Abbreviations = c( "hsapiens", "mmusculus", "rnorvegicus", 
                                    "cfamiliaris", "btaurus", "sscrofa", 
                                    "ggallus", "drerio", "xlaevis", 
                                    "dmelanogaster", "celegans"),
                  Mapping = c( "org.Hs.eg.db", "org.Mm.eg.db", "org.Rn.eg.db",
                               "org.Cf.eg.db", "org.Bt.eg.db", "org.Ss.eg.db",
                               "org.Gg.eg.db", "org.Xl.eg.db", "org.Dr.eg.db",
                               "org.Dm.eg.db", "org.Ce.eg.db"))

knitr::kable( df, caption = caption)


## ----load_multiGSEA_package, results='hide', message=FALSE, warning=FALSE-----
library( multiGSEA)
library( magrittr)

## ----load_omics_data----------------------------------------------------------

# load transcriptomic features
data( transcriptome)

# load proteomic features
data( proteome)

# load metabolomic features
data( metabolome)


## ----omicsTable, results='asis', echo=FALSE-----------------------------------
caption <- "Structure of the necessary omics data. For each layer
            (here: transcriptome), feature IDs, log2FCs, and p-values 
            are needed."

knitr::kable( 
    transcriptome %>% 
    dplyr::arrange( Symbol) %>% 
    dplyr::slice( 1:6), 
    caption = caption
)

## ----rank_features, results='hide'--------------------------------------------

# create data structure
omics_data <- initOmicsDataStructure( layer = c("transcriptome", 
                                                "proteome",
                                                "metabolome"))

## add transcriptome layer
omics_data$transcriptome <- rankFeatures( transcriptome$logFC, 
                                          transcriptome$pValue)
names( omics_data$transcriptome) <- transcriptome$Symbol

## add proteome layer
omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
names( omics_data$proteome) <- proteome$Symbol

## add metabolome layer
## HMDB features have to be updated to the new HMDB format
omics_data$metabolome <- rankFeatures(metabolome$logFC, metabolome$pValue)
names( omics_data$metabolome) <- metabolome$HMDB
names( omics_data$metabolome) <- gsub( "HMDB", "HMDB00", 
                                       names( omics_data$metabolome))


## ----omics_short--------------------------------------------------------------

omics_short <- lapply( names( omics_data), function( name){ 
                        head( omics_data[[name]])
                      })
names( omics_short) <- names( omics_data)
omics_short

## ----calculate_enrichment, results='hide', message=FALSE, warning=FALSE-------

databases <- c( "kegg", "reactome")
layers <- names( omics_data)

pathways <- getMultiOmicsFeatures( dbs = databases, layer = layers,
                                   returnTranscriptome = "SYMBOL",
                                   returnProteome = "SYMBOL",
                                   returnMetabolome = "HMDB",
                                   useLocal = FALSE)

## ----pathways_short-----------------------------------------------------------

pathways_short <- lapply( names( pathways), function( name){
                          head( pathways[[name]], 2)
                        })
names( pathways_short) <- names( pathways)
pathways_short

## ----run_enrichment, results='hide', message=FALSE, warning=FALSE-------------

# use the multiGSEA function to calculate the enrichment scores
# for all omics layer at once.
enrichment_scores <- multiGSEA( pathways, omics_data)


## ----combine_pvalues----------------------------------------------------------

df <- extractPvalues( enrichmentScores = enrichment_scores,
                      pathwayNames = names( pathways[[1]]))

df$combined_pval <- combinePvalues( df)
df$combined_padj <- p.adjust( df$combined_pval, method = "BH")

df <- cbind( data.frame( pathway = names( pathways[[1]])), df)


## ----resultTable, results='asis', echo=FALSE----------------------------------
caption <- "Table summarizing the top 15 pathways where we can calculate an
            enrichment for all three layers . Pathways from KEGG, Reactome,
            and BioCarta are listed based on their aggregated adjusted p-value. 
            Corrected p-values are displayed for each omics layer separately and 
            aggregated by means of the Stouffer's Z-method."

knitr::kable( 
    df %>% 
    dplyr::arrange( combined_padj) %>% 
    dplyr::filter( !is.na(metabolome_padj) ) %>%
    dplyr::select( c( pathway, transcriptome_padj, proteome_padj, metabolome_padj, combined_pval)) %>% 
    dplyr::slice( 1:15),
    caption = caption
)

## ----session, echo=FALSE------------------------------------------------------
sessionInfo()