## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----environment, cache=FALSE, echo=FALSE-------------------------------- suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("MSnbase")) suppressPackageStartupMessages(library("zoo")) suppressPackageStartupMessages(require("Rdisop")) suppressPackageStartupMessages(require("pRolocdata")) suppressPackageStartupMessages(require("pRoloc")) library("grid") library("reshape2") ## ----readdata, echo=TRUE, cache=FALSE, tidy=FALSE------------------------ file <- dir(system.file(package = "MSnbase", dir = "extdata"), full.names = TRUE, pattern = "mzXML$") rawdata <- readMSData(file, msLevel = 2, verbose = FALSE) ## ----MSnExp, cache=FALSE, echo=TRUE-------------------------------------- library("MSnbase") itraqdata head(fData(itraqdata)) ## ----experimentsize, echo=FALSE, cache=FALSE----------------------------- sz <- sum(sapply(assayData(itraqdata), object.size)) + object.size(itraqdata) sz <- as.numeric(sz) sz <- round(sz/(1024^2), 2) ## ----Spectrum, cache=FALSE, echo=TRUE------------------------------------ sp <- itraqdata[["X1"]] sp ## ----accessors, cache=FALSE, echo=TRUE----------------------------------- peaksCount(sp) head(peaksCount(itraqdata)) rtime(sp) head(rtime(itraqdata)) ## ----ReporterIons-------------------------------------------------------- iTRAQ4 TMT10 ## ----msmap, eval=FALSE--------------------------------------------------- ## ## downloads the data ## library("rpx") ## px1 <- PXDataset("PXD000001") ## mzf <- pxget(px1, 6) ## ## ## reads the data ## ms <- openMSfile(mzf) ## hd <- header(ms) ## ## ## a set of spectra of interest: MS1 spectra eluted ## ## between 30 and 35 minutes retention time ## ms1 <- which(hd$msLevel == 1) ## rtsel <- hd$retentionTime[ms1] / 60 > 30 & ## hd$retentionTime[ms1] / 60 < 35 ## ## ## the map ## M <- MSmap(ms, ms1[rtsel], 521, 523, .005, hd, zeroAsNA = TRUE) ## ----msmaplaod, echo=FALSE----------------------------------------------- mrda <- dir(system.file(package = "MSnbase", dir = "extdata"), full.names = TRUE, pattern = "M.rda$") mrda2 <- dir(system.file(package = "MSnbase", dir = "extdata"), full.names = TRUE, pattern = "M2.rda$") load(mrda) load(mrda2) ## ----msmaphow------------------------------------------------------------ M ## ----mapheat, dev='pdf', fig.width=5, fig.height=5----------------------- plot(M, aspect = 1, allTicks = FALSE) ## ----map3d1, dev='pdf', fig.width=4, fig.height=4------------------------ plot3D(M) ## ----msmap2, eval=FALSE-------------------------------------------------- ## i <- ms1[which(rtsel)][1] ## j <- ms1[which(rtsel)][2] ## M2 <- MSmap(ms, i:j, 100, 1000, 1, hd) ## ----m2------------------------------------------------------------------ M2 ## ----map3d2, dev='pdf', fig.width=4, fig.height=4------------------------ plot3D(M2) ## ----spectrumPlot, dev='pdf', fig.width=6, fig.height=4, fig.keep='high'---- plot(sp, reporters = iTRAQ4, full = TRUE) ## ----bsaSelect, eval=TRUE, echo=FALSE------------------------------------ ## bsasel <- fData(itraqdata)$ProteinAccession == "BSA" bsasel <- 1:3 ## ----subset, echo=TRUE--------------------------------------------------- sel <- fData(itraqdata)$ProteinAccession == "BSA" bsa <- itraqdata[sel] bsa as.character(fData(bsa)$ProteinAccession) ## ----msnexpPlot, dev='pdf', fig.width=3.7, fig.height=4.5, fig.keep='last'---- plot(bsa, reporters = iTRAQ4, full = FALSE) + theme_gray(8) ## ----msnexpIdentification, echo=TRUE, cache=FALSE, tidy=FALSE------------ ## find path to a mzXML file quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzXML$") ## find path to a mzIdentML file identFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "dummyiTRAQ.mzid") ## create basic MSnExp msexp <- readMSData(quantFile, verbose = FALSE) head(fData(msexp), n = 2) ## ----msnexpIdentification2, echo=TRUE, cache=FALSE, tidy=FALSE----------- ## add identification information msexp <- addIdentificationData(msexp, id = identFile, verbose = FALSE) head(fData(msexp), n = 2) ## ----msnexpIdentification3, echo=TRUE, cache=FALSE, tidy=FALSE----------- idSummary(msexp) ## ----fragplot------------------------------------------------------------ itraqdata2 <- pickPeaks(itraqdata, verbose=FALSE) i <- 14 s <- as.character(fData(itraqdata2)[i, "PeptideSequence"]) ## ----fragplotfig, dev='pdf', fig.width=5, fig.height=5------------------- plot(itraqdata2[[i]], s, main = s) ## ----msnexpIdentification4, echo=TRUE, cache=FALSE, tidy=FALSE----------- fData(msexp)$pepseq msexp <- removeNoId(msexp) fData(msexp)$pepseq idSummary(msexp) ## ----calculateFragments, echo=TRUE, cache=FALSE, tidy=FALSE-------------- calculateFragments("ACEK", type=c("a", "b", "c", "x", "y", "z")) ## ----msnexpcalculateFragments, echo=TRUE, cache=FALSE, tidy=FALSE-------- pepseq <- fData(msexp)$pepseq[1] calculateFragments(pepseq, msexp[[1]], type=c("b", "y")) ## ----removePeaks, echo=TRUE, cache=FALSE--------------------------------- experiment <- removePeaks(itraqdata, t = 400, verbose = FALSE) ## total ion current ionCount(itraqdata[["X55"]]) ionCount(experiment[["X55"]]) ## ----spectrum-clean-plot, dev='pdf', fig.width=6, fig.height=3, echo=FALSE, fig.keep='high'---- p1 <- plot(itraqdata[["X55"]], full = TRUE, plot = FALSE) + theme_gray(5) p2 <- plot(experiment[["X55"]], full = TRUE, plot = FALSE) + theme_gray(5) grid.newpage() pushViewport(viewport(layout = grid.layout(1, 2))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) print(p1,vp=vplayout(1,1)) print(p2,vp=vplayout(1,2)) ## ----clean, echo=TRUE, cache=FALSE--------------------------------------- ## number of peaks peaksCount(itraqdata[["X55"]]) peaksCount(experiment[["X55"]]) experiment <- clean(experiment, verbose = FALSE) peaksCount(experiment[["X55"]]) ## ----preprosp, cache=FALSE, echo=FALSE----------------------------------- int <- c(0,1,1,3,1,1,0,0,0,1,3,7,3,1,0) mz <- c(113.9,114.0,114.05,114.1,114.15,114.2,114.25, 114.3,114.35,114.4,114.42,114.48,114.5,114.55,114.6) ppsp <- new("Spectrum2",intensity=int,mz=mz,centroided=FALSE) p1 <- plot(ppsp, full = TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Original spectrum") p2 <- plot(removePeaks(ppsp,t=3), full=TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Peaks < 3 removed") p3 <- plot(clean(removePeaks(ppsp,t=3)), full = TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Peaks < 3 removed and cleaned") ## ----preprocPlot, dev='pdf', fig.width=3, fig.height=6, echo=FALSE------- grid.newpage() pushViewport(viewport(layout = grid.layout(3, 1))) print(p1, vp=vplayout(1,1)) print(p2, vp=vplayout(2,1)) print(p3, vp=vplayout(3,1)) ## ----trimMz, echo=TRUE, cache=FALSE-------------------------------------- range(mz(itraqdata[["X55"]])) experiment <- trimMz(experiment, mzlim = c(112,120)) range(mz(experiment[["X55"]])) experiment ## ----reporters, echo=TRUE, cache=FALSE----------------------------------- mz(iTRAQ4) width(iTRAQ4) ## ----simplesp, cache=FALSE, echo=FALSE, fig.keep='none'------------------ int <- c(0,1,1,3,1,1,0) mz <- c(113.9,114.0,114.05,114.1,114.15,114.2,114.25) ssp <- new("Spectrum2",intensity=int,mz=mz,centroided=FALSE) p <- plot(ssp, full=TRUE, plot=FALSE) p <- p + theme_gray(5) ## ----quantitationPlot, dev='pdf',fig.width=5, fig.height=5, echo=FALSE---- grid.newpage() pushViewport(viewport(layout = grid.layout(2, 2))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) print(p + ggtitle("Quantitation using 'sum'") + geom_point(size = 3, alpha = I(1/3), colour = "red"), vp = vplayout(1, 1)) print(p + ggtitle("Quantitation using 'max'") + geom_point(aes(x = 114.1, y = 3), alpha = I(1/18), colour = "red", size = 3), vp = vplayout(1, 2)) print(p + ggtitle("Trapezoidation and strict=FALSE") + geom_polygon(alpha = I(1/5), fill = "red"), vp = vplayout(2, 1)) print(p + ggtitle("Trapezoidation and strict=TRUE") + geom_polygon(aes(x = c(NA, 114.05, 114.05, 114.1, 114.15, 114.15, NA), y=c(NA, 0, 1, 3, 1, 0, NA)), fill = "red", alpha = I(1/5)), vp = vplayout(2,2)) ## ----quantify, echo=TRUE, cache=FALSE, tidy=FALSE------------------------ qnt <- quantify(experiment, method = "trap", reporters = iTRAQ4, strict = FALSE, verbose = FALSE) qnt head(exprs(qnt)) ## ----filterNA, echo=TRUE------------------------------------------------- table(is.na(qnt)) qnt <- filterNA(qnt, pNA = 0) sum(is.na(qnt)) ## ----removeNa, echo=TRUE, eval=FALSE------------------------------------- ## whichRow <- which(is.na((qnt))) %% nrow(qnt) ## qnt <- qnt[-whichRow, ] ## ----pheplus1, echo=TRUE, cache=FALSE------------------------------------ library(Rdisop) ## Phenylalanine immonium ion Fim <- getMolecule("C8H10N") getMass(Fim) isotopes <- getIsotope(Fim) F1 <- isotopes[2, 2] F1 ## ----purityCorrect, echo=TRUE, cache=FALSE, tidy = FALSE----------------- impurities <- matrix(c(0.929, 0.059, 0.002, 0.000, 0.020, 0.923, 0.056, 0.001, 0.000, 0.030, 0.924, 0.045, 0.000, 0.001, 0.040, 0.923), nrow = 4) qnt.crct <- purityCorrect(qnt, impurities) head(exprs(qnt)) head(exprs(qnt.crct)) ## ----impute-------------------------------------------------------------- ## an example MSnSet containing missing values data(naset) table(is.na(naset)) ## number of NAs per protein table(fData(naset)$nNA) x <- impute(naset, "min") processingData(x) table(is.na(x)) ## ----miximpfig, echo=FALSE, dev='pdf', fig.width=8, fig.width=8---------- x <- impute(naset, "zero") exprs(x)[exprs(x) != 0] <- 1 suppressPackageStartupMessages(library("gplots")) heatmap.2(exprs(x), col = c("lightgray", "black"), scale = "none", dendrogram = "none", trace = "none", keysize = 0.5, key = FALSE, RowSideColors = ifelse(fData(x)$randna, "orange", "brown"), ColSideColors = rep(c("steelblue", "darkolivegreen"), each = 8)) ## ----miximp-------------------------------------------------------------- x <- impute(naset, method = "mixed", randna = fData(naset)$randna, mar = "knn", mnar = "min") x ## ----normalise, echo=TRUE, cache=FALSE----------------------------------- qnt.max <- normalise(qnt, "max") qnt.sum <- normalise(qnt, "sum") qnt.quant <- normalise(qnt, "quantiles") qnt.qrob <- normalise(qnt, "quantiles.robust") qnt.vsn <- normalise(qnt, "vsn") ## ----normPlot, dev='pdf', fig.width=5, fig.height=7, echo=FALSE---------- .plot <- function(x,ttl=NULL) { boxplot(exprs(x), main=ifelse(is.null(ttl),processingData(x)@processing[2],ttl), cex.main=.8, cex.lab=.5, cex.axis=.5, cex=.8) grid() } oldmar <- par()$mar par(mfrow=c(3,2),mar=c(2.9,2.9,2.9,1)) .plot(qnt, ttl = "Non-normalised data") .plot(qnt.max, ttl = "Maximum") .plot(qnt.sum, ttl = "Sum") .plot(qnt.quant, ttl = "Quantile") .plot(qnt.qrob, ttl = "Robust quantile") .plot(qnt.vsn, ttl = "vsn") ## ----cvdata, echo=FALSE, cache=FALSE------------------------------------- sd1 <- apply(log2(exprs(qnt))+10,1,sd) mn1 <- apply(log2(exprs(qnt))+10,1,mean) cv1 <- sd1/mn1 sd2 <- apply(exprs(qnt.vsn)+10,1,sd) mn2 <- apply(exprs(qnt.vsn)+10,1,mean) cv2 <- sd2/mn2 dfr <- rbind(data.frame(rank=order(mn1),cv=cv1,norm="raw"), data.frame(rank=order(mn2),cv=cv2,norm="vsn")) library("zoo") ## rmed1 <- rollapply(cv1,7,function(x) median(x,na.rm=TRUE)) ## rmed2 <- rollapply(cv2,7,function(x) median(x,na.rm=TRUE)) ## ## Calling directly rollapply.zoo to make it zoo_1.6-4 compatible. ## The above requires zoo >= 1.7-0, which is as of 15 March 2011 ## not yet available on CRAN (only on r-forge). rmed1 <- zoo:::rollapply.zoo(cv1,7,function(x) median(x,na.rm=TRUE)) rmed2 <- zoo:::rollapply.zoo(cv2,7,function(x) median(x,na.rm=TRUE)) dfr2 <- rbind(data.frame(x=seq(1,70,by=(70/length(rmed1))), y=rmed1,norm="raw"), data.frame(x=seq(1,70,by=(70/length(rmed1))), y=rmed2,norm="vsn")) p <- qplot(rank,cv,data=dfr,col=norm) + geom_line(data=dfr2,aes(x=x,y=y,colour=norm)) + theme_gray(7) ## ----cvPlot, dev='pdf', fig.width=5, fig.height=4, echo=FALSE------------ print(p) ## ----prepareMsnsetNormPlot, cache=FALSE, echo=FALSE, keep.fig='none'----- p <- plot(normalise(experiment[bsasel], "max"), reporters = iTRAQ4, full = FALSE, plot = FALSE) p <- p + theme_gray(7) ## ----msnexpNormPlot, dev='pdf', fig.width=5, fig.height=6, echo=FALSE---- print(p) ## ----makeGroups1,echo=FALSE,cache=FALSE---------------------------------- gb <- fData(qnt)$ProteinAccession ## ----makeGroups2,echo=TRUE,cache=FALSE----------------------------------- gb <- fData(qnt)$ProteinAccession table(gb) length(unique(gb)) ## ----combineFeatures, echo=TRUE, cache=FALSE----------------------------- qnt2 <- combineFeatures(qnt, groupBy = gb, fun = "median") qnt2 ## ----count--------------------------------------------------------------- sc <- quantify(msexp, method = "count") ## lets modify out data for demonstration purposes fData(sc)$accession[1] <- fData(sc)$accession[2] fData(sc)$accession sc <- combineFeatures(sc, groupBy = fData(sc)$accession, fun = "sum") exprs(sc) ## ----labelfree----------------------------------------------------------- fData(msexp)[, c("accession", "nprot")] ## ----SIn----------------------------------------------------------------- siquant <- quantify(msexp, method = "SIn") processingData(siquant) exprs(siquant) ## ----plotSpectrumSpectrum, dev='pdf', fig.width=6.5, fig.height=6.5------ centroided <- pickPeaks(itraqdata, verbose=FALSE) (k <- which(fData(centroided)[, "PeptideSequence"] == "TAGIQIVADDLTVTNPK")) mzk <- precursorMz(centroided)[k] zk <- precursorCharge(centroided)[k] mzk * zk plot(centroided[[k[1]]], centroided[[k[2]]]) ## ----compareSpectra, tidy=FALSE------------------------------------------ compareSpectra(centroided[[2]], centroided[[3]], fun = "common") compareSpectra(centroided[[2]], centroided[[3]], fun = "cor") compareSpectra(centroided[[2]], centroided[[3]], fun = "dotproduct") ## ----msnexpcompareSpectra------------------------------------------------ compmat <- compareSpectra(centroided, fun="cor") compmat[1:10, 1:5] ## ----hclustcompareSpectra, dev='pdf', fig.width=5, fig.height=6---------- plot(hclust(as.dist(compmat))) ## ----incompdiss, echo=TRUE, cache=FALSE, tidy=FALSE---------------------- iTRAQ5 incompdiss <- quantify(itraqdata, method = "trap", reporters = iTRAQ5, strict = FALSE, verbose = FALSE) head(exprs(incompdiss)) ## ----incompdissPlot, dev='pdf', fig.width=5, fig.height=2.5, echo=FALSE, warning=FALSE---- dfr <- melt(exprs(incompdiss)) colnames(dfr) <- c("feature", "reporters", "intensity") dfr$reporters <- sub("iTRAQ5.", "", dfr$reporters) repsum <- apply(exprs(incompdiss), 1, function(x) sum(x[1:4])) dfr2 <- data.frame(iTRAQ1to4 = repsum, iTRAQ5 = exprs(incompdiss)[,5]) p1 <- ggplot(data = dfr, aes(x = reporters,y = log10(intensity))) + geom_boxplot() + theme_gray(6) p2 <- ggplot(data = dfr2, aes(x = log10(iTRAQ1to4), y = log10(iTRAQ5))) + geom_point(alpha = I(1/2)) + geom_abline(intercept = 0, slope = 1, linetype = "dotted") + stat_smooth(method = lm, se = FALSE) + xlab(label = expression(log[10]~sum~114~to~117)) + ylab(label = expression(log[10]~145)) + theme_gray(6) grid.newpage() pushViewport(viewport(layout = grid.layout(1, 2))) print(p1, vp = vplayout(1, 1)) print(p2, vp = vplayout(1, 2)) ## ----makeexp12, echo=TRUE, cache=FALSE, tidy = FALSE--------------------- exp1 <- quantify(itraqdata, reporters = iTRAQ4, verbose = FALSE) sampleNames(exp1) exp2 <- quantify(rawdata, reporters = iTRAQ4, verbose = FALSE) sampleNames(exp2) ## ----updateFnames, echo=TRUE, cache=FALSE-------------------------------- head(featureNames(exp1)) exp1 <- updateFeatureNames(exp1) head(featureNames(exp1)) head(featureNames(exp2)) exp2 <- updateFeatureNames(exp2) head(featureNames(exp2)) ## ----comb1, echo=TRUE, cache=FALSE--------------------------------------- exp12 <- combine(exp1, exp2) dim(exp1) dim(exp2) dim(exp12) ## ----make2exps2, echo=TRUE, cache=FALSE, tidy=FALSE---------------------- set.seed(1) i <- sample(length(itraqdata), 35) j <- sample(length(itraqdata), 35) exp1 <- quantify(itraqdata[i], reporters = iTRAQ4, verbose = FALSE) exp2 <- quantify(itraqdata[j], reporters = iTRAQ4, verbose = FALSE) exp1 <- droplevels(exp1) exp2 <- droplevels(exp2) table(featureNames(exp1) %in% featureNames(exp2)) exp1 <- combineFeatures(exp1, groupBy = fData(exp1)$ProteinAccession) exp2 <- combineFeatures(exp2, groupBy = fData(exp2)$ProteinAccession) head(featureNames(exp1)) head(featureNames(exp2)) ## ----renameSamples, echo=TRUE, cache=FALSE------------------------------- sampleNames(exp1) exp1 <- updateSampleNames(exp1) sampleNames(exp1) sampleNames(exp1) <- c("Ctrl1", "Cond1", "Ctrl2", "Cond2") sampleNames(exp2) <- c("Ctrl3", "Cond3", "Ctrl4", "Cond4") ## ----fdatanames, echo=TRUE, cache=FALSE---------------------------------- stopifnot(all(fvarLabels(exp1) == fvarLabels(exp2))) fData(exp1)["BSA", 1:4] fData(exp2)["BSA", 1:4] ## ----renameFvars, echo=TRUE, cache=FALSE--------------------------------- exp1 <- updateFvarLabels(exp1) exp2 <- updateFvarLabels(exp2) head(fvarLabels(exp1)) head(fvarLabels(exp2)) ## ----combine, echo=TRUE, cache=FALSE------------------------------------- exp12 <- combine(exp1, exp2) dim(exp12) pData(exp12) exprs(exp12)[25:28, ] exp12 ## ----avg----------------------------------------------------------------- library("pRolocdata") data(tan2009r1) data(tan2009r2) data(tan2009r3) avgtan <- averageMSnSet(list(tan2009r1, tan2009r2, tan2009r3)) head(exprs(avgtan)) head(fData(avgtan)$disp) head(fData(avgtan)$nNA) ## ----avg2, dev='pdf', fig.height=5, fig.width=5-------------------------- disp <- rowMax(fData(avgtan)$disp) disp[disp == 0] <- max(disp) range(disp) library("pRoloc") plot2D(avgtan, cex = 3 * disp) ## ----sessioninfo, results='asis', echo=FALSE, cache=FALSE---------------- toLatex(sessionInfo())