## ---- include=FALSE----------------------------------------------------------- library(BiocStyle) ## ----echo = FALSE, fig.cap = "A: Overview of the diffUTR workflow. B: Bin creation scheme."---- knitr::include_graphics(system.file('docs', 'figure1.svg', package = 'diffUTR')) ## ---- eval=FALSE-------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("diffUTR") ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(diffUTR) library(SummarizedExperiment) }) ## ---- eval=FALSE-------------------------------------------------------------- # library(AnnotationHub) # ah <- AnnotationHub() # # obtain the identifier of the latest mouse ensembl annotation: # ahid <- rev(query(ah, c("EnsDb", "Mus musculus"))$ah_id)[1] # ensdb <- ah[[ahid]] ## ----------------------------------------------------------------------------- data(example_gene_annotation, package="diffUTR") g <- example_gene_annotation head(g) ## ----------------------------------------------------------------------------- # If you know that your data will be stranded, use # bins <- prepareBins(g, stranded=TRUE) # Otherwise use bins <- prepareBins(g) ## ---- eval=FALSE-------------------------------------------------------------- # bamfiles <- c("path/to/sample1.bam", "path/to/sample2.bam", ...) # rse <- countFeatures(bamfiles, bins, strandSpecific=2, nthreads=6, isPairedEnd=FALSE) ## ----------------------------------------------------------------------------- data(example_bin_se, package="diffUTR") rse <- example_bin_se rse ## ----------------------------------------------------------------------------- head(rowRanges(rse)) ## ----------------------------------------------------------------------------- rse <- diffSpliceWrapper(rse, design = ~condition) ## ----------------------------------------------------------------------------- head(rowData(rse)) ## ----------------------------------------------------------------------------- perGene <- metadata(rse)$geneLevel head(perGene) ## ---- eval=FALSE-------------------------------------------------------------- # download.file("https://polyasite.unibas.ch/download/atlas/2.0/GRCm38.96/atlas.clusters.2.0.GRCm38.96.bed.gz", # destfile="apa.mm38.bed.gz") # bins <- prepareBins(g, "apa.mm38.bed.gz") # # (Again, if you know that your data will be stranded, use `stranded=TRUE`) ## ----------------------------------------------------------------------------- data(rn6_PAS) # bins <- prepareBins(g, rn6_PAS) ## ---- eval=FALSE-------------------------------------------------------------- # bamfiles <- c("path/to/sample1.bam", "path/to/sample2.bam", ...) # se <- countFeatures(bamfiles, bins, strandSpecific=2, nthreads=6, isPairedEnd=TRUE) ## ----------------------------------------------------------------------------- rse <- diffSpliceWrapper( rse, design = ~condition ) ## ----------------------------------------------------------------------------- # we can then only look for changes in CDS bins: cds <- geneLevelStats(rse, includeTypes="CDS", returnSE=FALSE) head(cds) # or only look for changes in UTR bins: utr <- geneLevelStats(rse, includeTypes="UTR", returnSE=FALSE) head(utr) # or only look for changes in bins that _could be_ UTRs: # geneLevelStats(rse, excludeTypes=c("CDS","non-coding"), returnSE=FALSE) ## ----------------------------------------------------------------------------- plotTopGenes(rse) ## ----------------------------------------------------------------------------- # `utr` being the output of the above # geneLevelStats(rse, includeTypes="UTR", returnSE=FALSE) plotTopGenes(utr, diffUTR = TRUE) ## ----------------------------------------------------------------------------- rse <- addNormalizedAssays(rse) ## ----------------------------------------------------------------------------- deuBinPlot(rse,"Jund") deuBinPlot(rse,"Jund", type="condition", colour="condition") deuBinPlot(rse,"Jund", type="sample", colour="condition") # shows separate samples ## ----------------------------------------------------------------------------- library(ggplot2) deuBinPlot(rse,"Jund",type="condition",colour="condition") + guides(colour = guide_legend(override.aes = list(size = 3))) + scale_colour_manual(values=c(CTRL="darkblue", LTP="red")) ## ----------------------------------------------------------------------------- geneBinHeatmap(rse, "Smg6") ## ----------------------------------------------------------------------------- #' binTxPlot #' #' @param se A bin-wise SummarizedExperiment as produced by #' \code{\link{countFeatures}} and including bin-level tests (i.e. having been #' passed through one of the DEU wrappers such as #' \code{\link{diffSpliceWrapper}} or \code{\link{DEXSeqWrapper}}) #' @param ensdb The `EnsDb` which was used to create the bins #' @param gene The gene of interest #' @param by The colData column of `se` used to split the samples #' @param assayName The assay to plot #' @param removeAmbiguous Logical; whether to remove bins that are #' gene-ambiguous (i.e. overlap multiple genes). #' @param size Size of the lines #' @param ... Passed to `plotRangesLinkedToData` #' #' @return A `ggbio` `Tracks` #' @importFrom AnnotationFilter GeneNameFilter GeneIdFilter #' @importFrom ensembldb getGeneRegionTrackForGviz #' @importFrom ggbio plotRangesLinkedToData autoplot #' @export binTxPlot <- function(se, ensdb, gene, by=NULL, assayName=c("logNormDensity"), removeAmbiguous=TRUE, size=3, threshold=0.05, ...){ w <- diffUTR:::.matchGene(se, gene) se <- sort(se[w,]) if(removeAmbiguous) se <- se[!rowData(se)$geneAmbiguous,] if(length(w)==0) return(NULL) if(!is.null(by)) by <- match.arg(by, colnames(colData(se))) assayName <- match.arg(assayName, assayNames(se)) if(rowData(se)$gene[[1]]==gene){ filt <- GeneIdFilter(gene) }else{ filt <- GeneNameFilter(gene) } gr <- ggbio::autoplot(ensdb, filt) gr2 <- rowRanges(se) if(!is.null(by)){ sp <- split(seq_len(ncol(se)), se[[by]]) for(f in names(sp)) mcols(gr2)[[f]] <- rowMeans(assays(se)[[assayName]][,sp[[f]],drop=FALSE]) y <- names(sp) }else{ mcols(gr2)[[assayName]] <- rowMeans(assays(se)[[assayName]]) y <- assayName } ggbio::plotRangesLinkedToData(gr2, stat.y=y, stat.ylab=assayName, annotation=list(gr), size=size, ...) + ggtitle(gene) } # example usage (not run): # binTxPlot(rse, ensdb, gene="Arid5a", by="condition") ## ----------------------------------------------------------------------------- sessionInfo()