## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() ## ----message = FALSE----------------------------------------------------- library(GenomicRanges) library(Rsamtools) library(bamsignals) ## ------------------------------------------------------------------------ bampath <- system.file("extdata", "randomBam.bam", package="bamsignals") genes <- get(load(system.file("extdata", "randomAnnot.Rdata", package="bamsignals"))) genes ## ------------------------------------------------------------------------ #sequence names of the GenomicRanges object seqinfo(genes) #sequence names in the bam file bf <- Rsamtools::BamFile(bampath) seqinfo(bf) #checking if there is an index file.exists(gsub(".bam$", ".bam.bai", bampath)) ## ------------------------------------------------------------------------ proms <- GenomicRanges::promoters(genes, upstream=100, downstream=100) counts <- bamCount(bampath, proms, verbose=FALSE) str(counts) ## ------------------------------------------------------------------------ counts <- bamCount(bampath, proms, verbose=FALSE, shift=75) str(counts) ## ------------------------------------------------------------------------ strand(proms) counts <- bamCount(bampath, proms, verbose=FALSE, ss=TRUE) str(counts) ## ------------------------------------------------------------------------ sigs <- bamProfile(bampath, genes, verbose=FALSE) sigs ## ------------------------------------------------------------------------ #CountSignals is conceptually like a list lsigs <- as.list(sigs) stopifnot(length(lsigs[[1]])==length(sigs[1])) #sapply and lapply can be used as if we were using a list stopifnot(all(sapply(sigs, sum) == sapply(lsigs, sum))) ## ------------------------------------------------------------------------ stopifnot(all(width(sigs)==width(genes))) ## ------------------------------------------------------------------------ sssigs <- bamProfile(bampath, genes, verbose=FALSE, ss=TRUE) sssigs ## ------------------------------------------------------------------------ str(sssigs[1]) #summing up the counts from the two strands is the same as using ss=FALSE stopifnot(colSums(sssigs[1])==sigs[1]) #the width function takes into account that now the signals are strand-specific stopifnot(width(sssigs)==width(sigs)) #the length function does not, a strand-specific signal is twice as long stopifnot(length(sssigs[1])==2*length(sigs[1])) ## ------------------------------------------------------------------------ xlab <- "offset from start of the region" ylab <- "counts per base pair (negative means antisense)" main <- paste0("read profile of the region ", seqnames(genes)[1], ":", start(genes)[1], "-", end(genes)[1]) plot(sigs[1], ylim=c(-max(sigs[1]), max(sigs[1])), ylab=ylab, xlab=xlab, main=main, type="l") lines(sssigs[1]["sense",], col="blue") lines(-sssigs[1]["antisense",], col="red") legend("bottom", c("sense", "antisense", "both"), col=c("blue", "red", "black"), lty=1) ## ------------------------------------------------------------------------ #The promoter regions have all the same width sigs <- bamProfile(bampath, proms, ss=FALSE, verbose=FALSE) sssigs <- bamProfile(bampath, proms, ss=TRUE, verbose=FALSE) sigsMat <- alignSignals(sigs) sigsArr <- alignSignals(sssigs) ## ------------------------------------------------------------------------ #the dimensions are [base pair, region] str(sigsMat) #the dimensions are [strand, base pair, region] str(sigsArr) stopifnot(all(sigsMat == sigsArr["sense",,] + sigsArr["antisense",,])) ## ------------------------------------------------------------------------ avgSig <- rowMeans(sigsMat) avgSenseSig <- rowMeans(sigsArr["sense",,]) avgAntisenseSig <- rowMeans(sigsArr["antisense",,]) ylab <- "average counts per base pair" xlab <- "distance from TSS" main <- paste0("average profile of ", length(proms), " promoters") xs <- -99:100 plot(xs, avgSig, ylim=c(0, max(avgSig)), xlab=xlab, ylab=ylab, main=main, type="l") lines(xs, avgSenseSig, col="blue") lines(xs, avgAntisenseSig, col="red") legend("bottom", c("sense", "antisense", "both"), col=c("blue", "red", "black"), lty=1) ## ------------------------------------------------------------------------ binsize <- 20 binnedSigs <- bamProfile(bampath, proms, binsize=binsize, verbose=FALSE) stopifnot(all(width(binnedSigs)==ceiling(width(sigs)/binsize))) binnedSigs ## ------------------------------------------------------------------------ avgBinnedSig <- rowMeans(alignSignals(binnedSigs)) #the counts in the bin are the sum of the counts in each base pair stopifnot(all.equal(colSums(matrix(avgSig, nrow=binsize)),avgBinnedSig)) #let's plot it ylab <- "average counts per base pair" plot(xs, avgSig, xlab=xlab, ylab=ylab, main=main, type="l") lines(xs, rep(avgBinnedSig, each=binsize)/binsize, lty=2) legend("topright", c("base pair count", "bin count"), lty=c(1, 2)) ## ------------------------------------------------------------------------ covSigs <- bamCoverage(bampath, genes, verbose=FALSE) puSigs <- bamProfile(bampath, genes, verbose=FALSE) xlab <- "offset from start of the region" ylab <- "reads per base pair" main <- paste0("read coverage and profile of the region ", seqnames(genes)[1], ":", start(genes)[1], "-", end(genes)[1]) plot(covSigs[1], ylim=c(0, max(covSigs[1])), ylab=ylab, xlab=xlab, main=main, type="l") lines(puSigs[1], lty=2) legend("topright", c("covering the base pair", "5' end maps to the base pair"), lty=c(1,2)) ## ------------------------------------------------------------------------ counts.all <- bamCount(bampath, proms, verbose=FALSE) summary(counts.all) counts.mapq <- bamCount(bampath, proms, mapq=20, verbose=FALSE) summary(counts.mapq) ## ------------------------------------------------------------------------ counts.mapq.noDups <- bamCount(bampath, proms, mapq=20, filteredFlag=1024, verbose=FALSE) summary(counts.mapq.noDups) ## ------------------------------------------------------------------------ #5' end falls into regions defined in `proms` counts.pe.filter <- bamCount(bampath, proms, paired.end="filter", verbose=FALSE) summary(counts.pe.filter) #fragment midpoint falls into regions defined in `proms` counts.pe.midpoint <- bamCount(bampath, proms, paired.end="midpoint", verbose=FALSE) summary(counts.pe.midpoint) #counts are similar but not identical cor(counts.pe.filter, counts.pe.midpoint) ## ------------------------------------------------------------------------ covSigs <- bamCoverage(bampath, genes, paired.end="extend", verbose=FALSE) puSigs <- bamProfile(bampath, genes, paired.end="midpoint", verbose=FALSE) xlab <- "offset from start of the region" ylab <- "reads per base pair" main <- paste0("Paired end whole fragment coverage and fragment midpoint profile\n", "of the region ", seqnames(genes)[1], ":", start(genes)[1], "-", end(genes)[1]) plot(covSigs[1], ylim=c(0, max(covSigs[1])), ylab=ylab, xlab=xlab, main=main, type="l") lines(puSigs[1], lty=2) legend("topright", c("covering the base pair", "fragment midpoint maps to the base pair"), lty=c(1,2)) ## ------------------------------------------------------------------------ counts.monoNucl <- bamCount(bampath, genes, paired.end="midpoint", tlenFilter=c(120,170), verbose=FALSE) summary(counts.monoNucl) #Coverage of mononucleosomal fragments covSigs.monoNucl <- bamCoverage(bampath, genes, paired.end="extend", tlenFilter=c(120,170), verbose=FALSE) xlab <- "offset from start of the region" ylab <- "reads per base pair" main <- paste0("Paired end whole fragment coverage for\n", "of the region ", seqnames(genes)[1], ":", start(genes)[1], "-", end(genes)[1]) plot(covSigs[1], ylim=c(0, max(covSigs[1])), ylab=ylab, xlab=xlab, main=main, type="l") lines(covSigs.monoNucl[1], lty=3) legend("topright", c("all fragment sizes", "mononucleosomal fragments only"), lty=c(1,3))