## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png", message=FALSE, error=FALSE, warning=FALSE) ## ----eval=FALSE--------------------------------------------------------------- # # 'coldata.csv': sample information table # coldata <- read.csv("coldata.csv") # library(tximeta) # y <- tximeta(coldata) # library(swish) # y <- scaleInfReps(y) # y <- labelKeep(y) # set.seed(1) # y <- swish(y, x="condition") ## ----eval=FALSE--------------------------------------------------------------- # table(mcols(y)$qvalue < .05) ## ----eval=FALSE--------------------------------------------------------------- # y <- y[mcols(y)$keep,] ## ----------------------------------------------------------------------------- library(macrophage) dir <- system.file("extdata", package="macrophage") ## ----------------------------------------------------------------------------- coldata <- read.csv(file.path(dir, "coldata.csv")) head(coldata) ## ----------------------------------------------------------------------------- coldata <- coldata[,c(1,2,3,5)] names(coldata) <- c("names","id","line","condition") ## ----------------------------------------------------------------------------- coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz") all(file.exists(coldata$files)) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(SummarizedExperiment)) ## ----include=FALSE------------------------------------------------------------ # This hidden code chunk is only needed for Bioc build machines, # so that 'fishpond' will build regardless of whether # the machine can connect to ftp.ebi.ac.uk. # Using linkedTxomes to point to a GTF that lives in the macrophage pkg. # The chunk can be skipped if you have internet connection, # as tximeta will automatically ID the transcriptome and DL the GTF. library(tximeta) makeLinkedTxome( indexDir=file.path(dir, "gencode.v29_salmon_0.12.0"), source="Gencode", organism="Homo sapiens", release="29", genome="GRCh38", fasta="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.transcripts.fa.gz", gtf=file.path(dir, "gencode.v29.annotation.gtf.gz"), # local version write=FALSE ) ## ----------------------------------------------------------------------------- library(tximeta) se <- tximeta(coldata) ## ----------------------------------------------------------------------------- assayNames(se) ## ----------------------------------------------------------------------------- head(rownames(se)) ## ----------------------------------------------------------------------------- y <- se y <- y[seqnames(y) == "chr1",] ## ----------------------------------------------------------------------------- y <- y[,y$condition %in% c("naive","IFNg")] y$condition <- factor(y$condition, c("naive","IFNg")) ## ----results="hide", message=FALSE-------------------------------------------- library(fishpond) y <- scaleInfReps(y) y <- labelKeep(y) y <- y[mcols(y)$keep,] set.seed(1) y <- swish(y, x="condition", pair="line", nperms=64) ## ----------------------------------------------------------------------------- table(mcols(y)$qvalue < .05) ## ----------------------------------------------------------------------------- hist(mcols(y)$pvalue, col="grey") ## ----------------------------------------------------------------------------- with(mcols(y), table(sig=qvalue < .05, sign.lfc=sign(log2FC)) ) sig <- mcols(y)$qvalue < .05 lo <- order(mcols(y)$log2FC * sig) hi <- order(-mcols(y)$log2FC * sig) ## ----------------------------------------------------------------------------- top.up <- mcols(y)[head(hi),] names(top.up) cols <- c("log10mean","log2FC","pvalue","qvalue") print(as.data.frame(top.up)[,cols], digits=3) ## ----------------------------------------------------------------------------- top.down <- mcols(y)[head(lo),] print(as.data.frame(top.down)[,cols], digits=3) ## ----------------------------------------------------------------------------- plotInfReps(y, idx=hi[100], x="condition", cov="line") ## ----------------------------------------------------------------------------- plotMASwish(y, alpha=.05) ## ----------------------------------------------------------------------------- library(org.Hs.eg.db) y <- addIds(y, "SYMBOL", gene=TRUE) ## ----------------------------------------------------------------------------- plotMASwish(y, alpha=.05, xlim=c(.5,5.5)) with( subset(mcols(y), qvalue < .05 & abs(log2FC) > 4), text(log10mean, log2FC, SYMBOL, col="blue", pos=4, cex=.7) ) ## ----------------------------------------------------------------------------- gse <- summarizeToGene(se) gy <- gse gy <- gy[seqnames(gy) == "chr1",] ## ----------------------------------------------------------------------------- gy <- gy[,gy$condition %in% c("naive","IFNg")] gy$condition <- factor(gy$condition, c("naive","IFNg")) ## ----results="hide", message=FALSE-------------------------------------------- gy <- scaleInfReps(gy) gy <- labelKeep(gy) gy <- gy[mcols(gy)$keep,] set.seed(1) gy <- swish(gy, x="condition", pair="line", nperms=64) ## ----------------------------------------------------------------------------- table(mcols(gy)$qvalue < .05) ## ----------------------------------------------------------------------------- hist(mcols(y)$pvalue, col="grey") ## ----------------------------------------------------------------------------- with(mcols(gy), table(sig=qvalue < .05, sign.lfc=sign(log2FC)) ) sig <- mcols(gy)$qvalue < .05 glo <- order(mcols(gy)$log2FC * sig) ghi <- order(-mcols(gy)$log2FC * sig) ## ----------------------------------------------------------------------------- gtop.up <- mcols(gy)[head(ghi),] print(as.data.frame(gtop.up)[,cols], digits=3) gtop.down <- mcols(gy)[head(glo),] print(as.data.frame(gtop.down)[,cols], digits=3) ## ----------------------------------------------------------------------------- plotInfReps(gy, idx=ghi[100], x="condition", cov="line") ## ----------------------------------------------------------------------------- plotMASwish(gy, alpha=.05) ## ----------------------------------------------------------------------------- library(org.Hs.eg.db) gy <- addIds(gy, "SYMBOL", gene=TRUE) ## ----------------------------------------------------------------------------- plotMASwish(gy, alpha=.05, xlim=c(.5,5.5)) with( subset(mcols(gy), qvalue < .05 & abs(log2FC) > 3), text(log10mean, log2FC, SYMBOL, col="blue", pos=4, cex=.7) ) ## ----------------------------------------------------------------------------- # run on the transcript-level dataset iso <- isoformProportions(y) iso <- swish(iso, x="condition", pair="line", nperms=64) ## ----eval=FALSE, echo=FALSE--------------------------------------------------- # # some unevaluated code for looking into DTE within non-DGE gene # # (DTE vs DGE plot) # fisherP <- function(p) { # pchisq(-2 * sum(log(p)), 2*length(p), lower.tail=FALSE) # } # stopifnot(all(lengths(mcols(y)$gene_id) == 1)) # dat <- as.data.frame(mcols(y)[,c("gene_id","pvalue")]) # dat$gene_id <- unlist(dat$gene_id) # pvals <- tapply(dat$pvalue, dat$gene_id, fisherP) # dte <- data.frame(gene_id=names(pvals), pvalue=pvals) # dte <- dte[rownames(gy),] # plot(-log10(mcols(gy)$pvalue), -log10(dte$pvalue)) # #identify(-log10(mcols(gy)$pvalue), -log10(dte$pvalue)) # idx <- 193 # idx2 <- which(unlist(mcols(y)$gene_id) == rownames(gy)[idx]) # plotInfReps(gy, idx, x="condition", cov="line", xaxis=FALSE) # par(mfrow=c(1,3)) # for (i in 1:3) { # plotInfReps(y, idx2[i], x="condition", cov="line", xaxis=FALSE) # } ## ----------------------------------------------------------------------------- se$ifng <- factor(ifelse( grepl("IFNg",se$condition), "treated","control")) se$salmonella <- factor(ifelse( grepl("SL1344",se$condition), "infected","control")) with(colData(se), table(ifng, salmonella) ) ## ----------------------------------------------------------------------------- y2 <- se y2 <- y2[seqnames(y2) == "chr1",] ## ----------------------------------------------------------------------------- y2$pair <- as.numeric(factor(y2$line)) y2$pair[y2$ifng == "control"] y2$pair[y2$ifng == "treated"] y2$pair[y2$ifng == "treated"] <- rep(7:12,each=2) y2$pair <- factor(y2$pair) table(y2$pair, y2$salmonella) ## ----results="hide", message=FALSE-------------------------------------------- y2 <- scaleInfReps(y2) y2 <- labelKeep(y2) y2 <- y2[mcols(y2)$keep,] set.seed(1) y2 <- swish(y2, x="salmonella", cov="ifng", pair="pair", interaction=TRUE) ## ----------------------------------------------------------------------------- hist(mcols(y2)$pvalue, col="grey") ## ----------------------------------------------------------------------------- plotMASwish(y2, alpha=.05) ## ----------------------------------------------------------------------------- idx <- with(mcols(y2), which(qvalue < .05 & log2FC > 5)) plotInfReps(y2, idx[1], x="ifng", cov="salmonella") plotInfReps(y2, idx[2], x="ifng", cov="salmonella") ## ----------------------------------------------------------------------------- gres <- mcols(gy)[mcols(gy)$keep,] min(gres$qvalue, na.rm=TRUE) # min nominal FDR is not 0 with(gres, plot(stat, -log10(qvalue))) with(gres, plot(log2FC, -log10(qvalue))) abline(v=0, col="red") with(gres, plot(log2FC, -log10(qvalue), xlim=c(-1.5,1.5), ylim=c(0,1.5))) abline(v=0, col="red") ## ----------------------------------------------------------------------------- y3 <- se y3 <- y3[seqnames(y3) == "chr1",] y3 <- y3[,y3$condition %in% c("naive","IFNg")] y3 <- labelKeep(y3) y3 <- y3[mcols(y3)$keep,] y3 <- computeInfRV(y3) mcols(y3)$meanCts <- rowMeans(assays(y3)[["counts"]]) with(mcols(y3), plot(meanCts, meanInfRV, log="xy")) hist(log10(mcols(y3)$meanInfRV), col="grey50", border="white", breaks=20, xlab="mean InfRV", main="Txp-level inferential uncertainty") ## ----eval=FALSE--------------------------------------------------------------- # y <- labelKeep(y, minCount=3, minN=10) # y <- y[mcols(y)$keep,] ## ----eval=FALSE--------------------------------------------------------------- # assays(y) <- lapply(assays(y), as.matrix) # y <- scaleInfReps(y, lengthCorrect=FALSE) # y <- swish(y, x="condition") ## ----echo=FALSE--------------------------------------------------------------- n <- 8 condition <- rep(1:2,length=2*n) group <- rep(1:2,each=n) pair <- rep(c(1:n),each=2) cols <- c("dodgerblue","goldenrod4") plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5), type="n", xaxt="n", yaxt="n", xlab="samples", ylab="permutation") abline(v=8.5, lty=2) axis(2, 0:3, c("orig",1:3), las=2) text(1:(2*n), rep(0,2*n), pair, col=cols[condition], cex=2) set.seed(1) for (i in 1:3) { perms <- rep(2*sample(n,n),each=2) - rep(1:0,length=2*n) text(1:(2*n), rep(i,2*n), pair[perms], col=cols[condition[perms]], cex=2) } ## ----echo=FALSE--------------------------------------------------------------- n <- 8 condition <- rep(c(1:2,1:2),each=n/2) group <- rep(1:2,each=n) id <- LETTERS[1:(2*n)] cols <- c("dodgerblue","goldenrod4") plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5), type="n", xaxt="n", yaxt="n", xlab="samples", ylab="permutation") abline(v=8.5, lty=2) axis(2, 0:3, c("orig",1:3), las=2) text(1:(2*n), rep(0,2*n), id, col=cols[condition], cex=2) set.seed(3) for (i in 1:3) { id.perms <- character(2*n) grp1 <- id[group==1] grp2 <- id[group==2] id.perms[c(1:4,9:12)] <- sample(id[condition==1],n) idx1 <- id.perms[c(1:4,9:12)] %in% grp1 id.perms[c(5:8,13:16)][idx1] <- sample(id[condition==2 & group==1],sum(idx1)) idx2 <- id.perms[c(1:4,9:12)] %in% grp2 id.perms[c(5:8,13:16)][idx2] <- sample(id[condition==2 & group==2],sum(idx2)) text(1:(2*n), rep(i,2*n), id.perms, col=cols[condition], cex=2) } arrows(3,1.5,1.3,1.15,,length=.1) arrows(3,1.5,4.7,1.15,length=.1) ## ----------------------------------------------------------------------------- sessionInfo()