Results from the univariate regressions performed using can be combined in a post-processing step to perform multivariate hypothesis testing. In this example, we fit on transcript-level counts and then perform multivariate hypothesis testing by combining transcripts at the gene-level. This is done with the function.

Import transcript-level counts

Read in transcript counts from the package.

library(readr)
library(tximport)
library(tximportData)

# specify directory
path = system.file("extdata", package="tximportData")

# read sample meta-data
samples = read.table(file.path(path,"samples.txt"), header=TRUE)
samples.ext = read.table(file.path(path,"samples_extended.txt"), header=TRUE, sep="\t")

# read assignment of transcripts to genes
# remove genes on the PAR, since these are present twice
tx2gene = read_csv(file.path(path, "tx2gene.gencode.v27.csv"))
tx2gene = tx2gene[grep("PAR_Y", tx2gene$GENEID, invert=TRUE),]

# read transcript-level quatifictions
files = file.path(path, "salmon", samples$run, "quant.sf.gz")
txi = tximport(files, type = "salmon", txOut=TRUE)

# Create metadata simulating two conditions
sampleTable = data.frame(condition = factor(rep(c("A", "B"), each = 3)))
rownames(sampleTable) = paste0("Sample", 1:6)

Standard dream analysis

Perform standard analysis at the transcript-level

library(variancePartition)
library(edgeR)

# Prepare transcript-level reads
dge = DGEList(txi$counts)
design <- model.matrix(~condition, data = sampleTable)
isexpr = filterByExpr(dge, design)
dge = dge[isexpr,]
dge = calcNormFactors(dge) 

# Estimate precision weights
vobj = voomWithDreamWeights(dge, ~ condition, sampleTable)

# Fit regression model one transcript at a time
fit = dream(vobj, ~ condition, sampleTable)
fit = eBayes(fit)

Multivariate analysis

Combine the transcript-level results at the gene-level. The mapping between transcript and gene is stored in as a list.

# Prepare transcript to gene mapping
# keep only transcripts present in vobj
# then convert to list with key GENEID and values TXNAMEs
keep = tx2gene$TXNAME %in% rownames(vobj)
tx2gene.lst = unstack(tx2gene[keep,])

# Run multivariate test on entries in each feature set
res = mvTest(fit, vobj, tx2gene.lst, coef="conditionB")

# truncate gene names since they have version numbers
# ENST00000498289.5 -> ENST00000498289
res$ID.short = gsub("\\..+", "", res$ID)

Gene set analysis

Perform gene set analysis using on the gene-level test statistics.

# must have zenith > v1.0.2
library(zenith)
library(GSEABase)

gs = get_MSigDB("C1", to="ENSEMBL")

df_gsa = zenithPR_gsa( res$stat, res$ID.short, gs, inter.gene.cor=.05)

head(df_gsa)
##                 NGenes Correlation      delta       se       p.less p.greater       PValue
## M652_chr5q35        81        0.05 -9558.4102 1689.015 7.737973e-09 1.0000000 1.547595e-08
## M12945_chr12q21     35        0.05 -1506.2074 1897.386 2.136523e-01 0.7863477 4.273046e-01
## M5824_chr11p13      30        0.05  -988.0642 1952.248 3.063911e-01 0.6936089 6.127822e-01
## M1556_chr20q11      92        0.05   775.2733 1678.101 6.779541e-01 0.3220459 6.440917e-01
## M13160_chr16p11    105        0.05  -519.6880 1660.274 3.771373e-01 0.6228627 7.542747e-01
## M6742_chr2q36       20        0.05  -575.4590 2133.016 3.936640e-01 0.6063360 7.873280e-01
##                 Direction          FDR
## M652_chr5q35         Down 3.838035e-06
## M12945_chr12q21      Down 9.970286e-01
## M5824_chr11p13       Down 9.970286e-01
## M1556_chr20q11         Up 9.970286e-01
## M13160_chr16p11      Down 9.970286e-01
## M6742_chr2q36        Down 9.970286e-01

Session info

## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_GB             
##  [4] LC_COLLATE=C               LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.17.0      msigdbr_7.5.1            GSEABase_1.62.0         
##  [4] graph_1.78.0             annotate_1.78.0          XML_3.99-0.14           
##  [7] AnnotationDbi_1.62.1     IRanges_2.34.0           S4Vectors_0.38.1        
## [10] Biobase_2.60.0           BiocGenerics_0.46.0      zenith_1.2.0            
## [13] tximportData_1.28.0      tximport_1.28.0          readr_2.1.4             
## [16] edgeR_3.42.4             pander_0.6.5             variancePartition_1.30.2
## [19] BiocParallel_1.34.2      limma_3.56.2             ggplot2_3.4.2           
## [22] knitr_1.43              
## 
## loaded via a namespace (and not attached):
##   [1] jsonlite_1.8.5              magrittr_2.0.3              farver_2.1.1               
##   [4] nloptr_2.0.3                rmarkdown_2.22              zlibbioc_1.46.0            
##   [7] vctrs_0.6.2                 memoise_2.0.1               minqa_1.2.5                
##  [10] RCurl_1.98-1.12             S4Arrays_1.0.4              htmltools_0.5.5            
##  [13] progress_1.2.2              curl_5.0.0                  broom_1.0.4                
##  [16] sass_0.4.6                  KernSmooth_2.23-21          bslib_0.4.2                
##  [19] pbkrtest_0.5.2              plyr_1.8.8                  cachem_1.0.8               
##  [22] lifecycle_1.0.3             iterators_1.0.14            pkgconfig_2.0.3            
##  [25] Matrix_1.5-4.1              R6_2.5.1                    fastmap_1.1.1              
##  [28] MatrixGenerics_1.12.0       GenomeInfoDbData_1.2.10     rbibutils_2.2.13           
##  [31] digest_0.6.31               numDeriv_2016.8-1.1         colorspace_2.1-0           
##  [34] GenomicRanges_1.52.0        RSQLite_2.3.1               filelock_1.0.2             
##  [37] labeling_0.4.2              RcppZiggurat_0.1.6          clusterGeneration_1.3.7    
##  [40] fansi_1.0.4                 httr_1.4.6                  compiler_4.3.0             
##  [43] bit64_4.0.5                 aod_1.3.2                   withr_2.5.0                
##  [46] doParallel_1.0.17           backports_1.4.1             DBI_1.1.3                  
##  [49] highr_0.10                  gplots_3.1.3                MASS_7.3-60                
##  [52] DelayedArray_0.26.3         gtools_3.9.4                caTools_1.18.2             
##  [55] tools_4.3.0                 remaCor_0.0.11              glue_1.6.2                 
##  [58] nlme_3.1-162                grid_4.3.0                  reshape2_1.4.4             
##  [61] generics_0.1.3              snow_0.4-4                  gtable_0.3.3               
##  [64] tzdb_0.4.0                  tidyr_1.3.0                 hms_1.1.3                  
##  [67] utf8_1.2.3                  XVector_0.40.0              foreach_1.5.2              
##  [70] pillar_1.9.0                stringr_1.5.0               babelgene_22.9             
##  [73] vroom_1.6.3                 splines_4.3.0               dplyr_1.1.2                
##  [76] BiocFileCache_2.8.0         lattice_0.21-8              bit_4.0.5                  
##  [79] tidyselect_1.2.0            locfit_1.5-9.7              Biostrings_2.68.1          
##  [82] SummarizedExperiment_1.30.2 RhpcBLASctl_0.23-42         xfun_0.39                  
##  [85] statmod_1.5.0               matrixStats_1.0.0           KEGGgraph_1.60.0           
##  [88] stringi_1.7.12              yaml_2.3.7                  boot_1.3-28.1              
##  [91] evaluate_0.21               codetools_0.2-19            archive_1.1.5              
##  [94] tibble_3.2.1                Rgraphviz_2.44.0            cli_3.6.1                  
##  [97] xtable_1.8-4                Rdpack_2.4                  munsell_0.5.0              
## [100] jquerylib_0.1.4             Rcpp_1.0.10                 GenomeInfoDb_1.36.0        
## [103] dbplyr_2.3.2                png_0.1-8                   Rfast_2.0.7                
## [106] RUnit_0.4.32                parallel_4.3.0              blob_1.2.4                 
## [109] prettyunits_1.1.1           bitops_1.0-7                lme4_1.1-33                
## [112] mvtnorm_1.2-1               lmerTest_3.1-3              scales_1.2.1               
## [115] purrr_1.0.1                 crayon_1.5.2                rlang_1.1.1                
## [118] EnrichmentBrowser_2.30.1    KEGGREST_1.40.0

<>

References