## ---- echo = FALSE------------------------------------------------------------ library(knitr) ## ---- eval=FALSE-------------------------------------------------------------- # if(!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("satuRn") ## ---- eval=FALSE-------------------------------------------------------------- # devtools::install_github("statOmics/satuRn") ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(satuRn) library(AnnotationHub) library(ensembldb) library(edgeR) library(SummarizedExperiment) library(ggplot2) library(DEXSeq) library(stageR) ## ----------------------------------------------------------------------------- data(Tasic_counts_vignette) # transcript expression matrix data(Tasic_metadata_vignette) # metadata ## ---- message=FALSE, warning=FALSE-------------------------------------------- ah <- AnnotationHub() # load the annotation resource. all <- query(ah, "EnsDb") # query for all available EnsDb databases ahEdb <- all[["AH75036"]] # for Mus musculus (choose correct release date) txs <- transcripts(ahEdb) ## ----------------------------------------------------------------------------- # Get the transcript information in correct format txInfo <- as.data.frame(matrix(data = NA, nrow = length(txs), ncol = 2)) colnames(txInfo) <- c("isoform_id", "gene_id") txInfo$isoform_id <- txs$tx_id txInfo$gene_id <- txs$gene_id rownames(txInfo) <- txInfo$isoform_id # Remove transcripts that are the only isoform expressed of a certain gene rownames(Tasic_counts_vignette) <- sub("\\..*", "", rownames(Tasic_counts_vignette)) # remove transcript version identifiers txInfo <- txInfo[txInfo$isoform_id %in% rownames(Tasic_counts_vignette), ] txInfo <- subset(txInfo, duplicated(gene_id) | duplicated(gene_id, fromLast = TRUE)) Tasic_counts_vignette <- Tasic_counts_vignette[which( rownames(Tasic_counts_vignette) %in% txInfo$isoform_id), ] ## ----------------------------------------------------------------------------- filter_edgeR <- filterByExpr(Tasic_counts_vignette, design = NULL, group = Tasic_metadata_vignette$brain_region, lib.size = NULL, min.count = 10, min.total.count = 30, large.n = 20, min.prop = 0.7 ) # more stringent than default to reduce run time of the vignette table(filter_edgeR) Tasic_counts_vignette <- Tasic_counts_vignette[filter_edgeR, ] # Update txInfo according to the filtering procedure txInfo <- txInfo[which( txInfo$isoform_id %in% rownames(Tasic_counts_vignette)), ] # remove txs that are the only isoform expressed within a gene (after filtering) txInfo <- subset(txInfo, duplicated(gene_id) | duplicated(gene_id, fromLast = TRUE)) Tasic_counts_vignette <- Tasic_counts_vignette[which(rownames( Tasic_counts_vignette) %in% txInfo$isoform_id), ] # satuRn requires the transcripts in the rowData and # the transcripts in the count matrix to be in the same order. txInfo <- txInfo[match(rownames(Tasic_counts_vignette), txInfo$isoform_id), ] ## ----------------------------------------------------------------------------- Tasic_metadata_vignette$group <- paste(Tasic_metadata_vignette$brain_region, Tasic_metadata_vignette$cluster, sep = ".") ## ---- message=FALSE----------------------------------------------------------- sumExp <- SummarizedExperiment::SummarizedExperiment( assays = list(counts = Tasic_counts_vignette), colData = Tasic_metadata_vignette, rowData = txInfo ) # for sake of completeness: specify design formula from colData metadata(sumExp)$formula <- ~ 0 + as.factor(colData(sumExp)$group) sumExp ## ----------------------------------------------------------------------------- system.time({ sumExp <- satuRn::fitDTU( object = sumExp, formula = ~ 0 + group, parallel = FALSE, BPPARAM = BiocParallel::bpparam(), verbose = TRUE ) }) ## ----------------------------------------------------------------------------- rowData(sumExp)[["fitDTUModels"]]$"ENSMUST00000037739" ## ----------------------------------------------------------------------------- group <- as.factor(Tasic_metadata_vignette$group) design <- model.matrix(~ 0 + group) # construct design matrix colnames(design) <- levels(group) L <- matrix(0, ncol = 2, nrow = ncol(design)) # initialize contrast matrix rownames(L) <- colnames(design) colnames(L) <- c("Contrast1", "Contrast2") L[c("VISp.L5_IT_VISp_Hsd11b1_Endou","ALM.L5_IT_ALM_Tnc"),1] <-c(1,-1) L[c("VISp.L5_IT_VISp_Hsd11b1_Endou","ALM.L5_IT_ALM_Tmem163_Dmrtb1"),2] <-c(1,-1) L # contrast matrix ## ----------------------------------------------------------------------------- group <- as.factor(Tasic_metadata_vignette$group) design <- model.matrix(~ 0 + group) # construct design matrix colnames(design) <- levels(group) L <- limma::makeContrasts( Contrast1 = VISp.L5_IT_VISp_Hsd11b1_Endou - ALM.L5_IT_ALM_Tnc, Contrast2 = VISp.L5_IT_VISp_Hsd11b1_Endou - ALM.L5_IT_ALM_Tmem163_Dmrtb1, levels = design ) L # contrast matrix ## ----------------------------------------------------------------------------- sumExp <- satuRn::testDTU( object = sumExp, contrasts = L, diagplot1 = TRUE, diagplot2 = TRUE, sort = FALSE ) ## ----------------------------------------------------------------------------- head(rowData(sumExp)[["fitDTUResult_Contrast1"]]) # first contrast ## ----------------------------------------------------------------------------- head(rowData(sumExp)[["fitDTUResult_Contrast2"]]) # second contrast ## ---- warning=FALSE----------------------------------------------------------- group1 <- colnames(sumExp)[colData(sumExp)$group == "VISp.L5_IT_VISp_Hsd11b1_Endou"] group2 <- colnames(sumExp)[colData(sumExp)$group == "ALM.L5_IT_ALM_Tnc"] plots <- satuRn::plotDTU( object = sumExp, contrast = "Contrast1", groups = list(group1, group2), coefficients = list(c(0, 0, 1), c(0, 1, 0)), summaryStat = "model", transcripts = c("ENSMUST00000081554", "ENSMUST00000195963", "ENSMUST00000132062"), genes = NULL, top.n = 6 ) # to have same layout as in our paper for (i in seq_along(plots)) { current_plot <- plots[[i]] + scale_fill_manual(labels = c("VISp", "ALM"), values = c("royalblue4", "firebrick")) + scale_x_discrete(labels = c("Hsd11b1_Endou", "Tnc")) print(current_plot) } ## ---- stage-wise testing------------------------------------------------------ # transcript level p-values from satuRn pvals <- rowData(sumExp)[["fitDTUResult_Contrast1"]]$empirical_pval # compute gene level q-values geneID <- factor(rowData(sumExp)$gene_id) geneSplit <- split(seq(along = geneID), geneID) pGene <- sapply(geneSplit, function(i) min(pvals[i])) pGene[is.na(pGene)] <- 1 theta <- unique(sort(pGene)) # gene-level significance testing q <- DEXSeq:::perGeneQValueExact(pGene, theta, geneSplit) qScreen <- rep(NA_real_, length(pGene)) qScreen <- q[match(pGene, theta)] qScreen <- pmin(1, qScreen) names(qScreen) <- names(geneSplit) # prepare stageR input tx2gene <- as.data.frame(rowData(sumExp)[c("isoform_id", "gene_id")]) colnames(tx2gene) <- c("transcript", "gene") pConfirmation <- matrix(matrix(pvals), ncol = 1, dimnames = list(rownames(tx2gene), "transcript") ) # create a stageRTx object stageRObj <- stageR::stageRTx( pScreen = qScreen, pConfirmation = pConfirmation, pScreenAdjusted = TRUE, tx2gene = tx2gene ) # perform the two-stage testing procedure stageRObj <- stageR::stageWiseAdjustment( object = stageRObj, method = "dtu", alpha = 0.05, allowNA = TRUE ) # retrieves the adjusted p-values from the stageRTx object padj <- stageR::getAdjustedPValues(stageRObj, order = TRUE, onlySignificantGenes = FALSE ) head(padj) ## ----------------------------------------------------------------------------- sessionInfo()