Contents

library(TDbasedUFE)
library(TDbasedUFEadv)
#> Warning: replacing previous import 'utils::findMatches' by
#> 'S4Vectors::findMatches' when loading 'AnnotationDbi'
#> 
library(DOSE)
#> DOSE v3.26.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
#> 
#> If you use DOSE in published research, please cite:
#> Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
library(enrichplot)
library(RTCGA.rnaseq)
#> Loading required package: RTCGA
#> Welcome to the RTCGA (version: 1.30.0). Read more about the project under https://rtcga.github.io/RTCGA/
library(RTCGA.clinical)
library(enrichR)
#> Welcome to enrichR
#> Checking connection ...
#> Enrichr ... Connection is Live!
#> FlyEnrichr ... Connection is Live!
#> WormEnrichr ... Connection is Live!
#> YeastEnrichr ... Connection is Live!
#> FishEnrichr ... Connection is Live!
#> OxEnrichr ... Connection is Live!
library(STRINGdb)

1 Introduction

It might be helpful to demonstrate how to evaluate selected genes by enrichment analysis. Here, we show some of useful tools applied to the output from TDbasedUFEadv In order foe this, we reproduce one example in “How to use TDbasedUFEadv” as follows.

Multi <- list(
  BLCA.rnaseq[seq_len(100), 1 + seq_len(1000)],
  BRCA.rnaseq[seq_len(100), 1 + seq_len(1000)],
  CESC.rnaseq[seq_len(100), 1 + seq_len(1000)],
  COAD.rnaseq[seq_len(100), 1 + seq_len(1000)]
)
Z <- prepareTensorfromList(Multi, 10L)
Z <- aperm(Z, c(2, 1, 3))
Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical)
Multi_sample <- list(
  BLCA.rnaseq[seq_len(100), 1, drop = FALSE],
  BRCA.rnaseq[seq_len(100), 1, drop = FALSE],
  CESC.rnaseq[seq_len(100), 1, drop = FALSE],
  COAD.rnaseq[seq_len(100), 1, drop = FALSE]
)
# patient.stage_event.tnm_categories.pathologic_categories.pathologic_m
ID_column_of_Multi_sample <- c(770, 1482, 773, 791)
# patient.bcr_patient_barcode
ID_column_of_Clinical <- c(20, 20, 12, 14)
Z <- PrepareSummarizedExperimentTensor(
  feature = colnames(ACC.rnaseq)[1 + seq_len(1000)],
  sample = array("", 1), value = Z,
  sampleData = prepareCondTCGA(
    Multi_sample, Clinical,
    ID_column_of_Multi_sample, ID_column_of_Clinical
  )
)
HOSVD <- computeHosvd(Z)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%
cond <- attr(Z, "sampleData")
index <- selectFeatureProj(HOSVD, Multi, cond, de = 1e-3, input_all = 3) # Batch mode

head(tableFeatures(Z, index))
#>       Feature       p value adjusted p value
#> 10    ACTB|60  0.000000e+00     0.000000e+00
#> 11   ACTG1|71  0.000000e+00     0.000000e+00
#> 37  ALDOA|226  0.000000e+00     0.000000e+00
#> 19 ADAM6|8755 5.698305e-299    1.424576e-296
#> 22  AEBP1|165 1.057392e-218    2.114785e-216
#> 9    ACTA2|59 7.862975e-198    1.310496e-195
genes <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|",
  fixed = TRUE
), "[", 1))
entrez <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|",
  fixed = TRUE
), "[", 2))

2 Enrichr

Enrichr(Kuleshov et al. 2016) is one of tools that often provides us significant results toward genes selected by TDbasedUFE and TDbasedUFEadv.

setEnrichrSite("Enrichr")
#> Connection changed to https://maayanlab.cloud/Enrichr/
#> Connection is Live!
websiteLive <- TRUE
dbs <- c(
  "GO_Molecular_Function_2015", "GO_Cellular_Component_2015",
  "GO_Biological_Process_2015"
)
enriched <- enrichr(genes, dbs)
#> Uploading data to Enrichr... Done.
#>   Querying GO_Molecular_Function_2015... Done.
#>   Querying GO_Cellular_Component_2015... Done.
#>   Querying GO_Biological_Process_2015... Done.
#> Parsing results... Done.
if (websiteLive) {
  plotEnrich(enriched$GO_Biological_Process_2015,
    showTerms = 20, numChar = 40, y = "Count",
    orderBy = "P.value"
  )
}

Enrichr can provide you huge number of enrichment analyses, many of which have good compatibility with the genes selected by TDbasedUFE as well as TDbasedUFEadv by the experience. Please check Enrichr’s web site to see what kinds of enrichment analyses can be done.

3 STRING

STRING(Szklarczyk et al. 2018) is enrichment analyses based upon protein-protein interaction, which is known to provide often significant results toward genes selected by TDbasedUFE as well as TDbasedUFEadv.

options(timeout = 200)
string_db <- STRINGdb$new(
  version = "11.5",
  species = 9606, score_threshold = 200,
  network_type = "full", input_directory = ""
)
example1_mapped <- string_db$map(data.frame(genes = genes),
  "genes",
  removeUnmappedRows = TRUE
)
#> Warning:  we couldn't map to STRING 1% of your identifiers
hits <- example1_mapped$STRING_id
string_db$plot_network(hits)