## ----environment, echo=FALSE----------------------------------------------- suppressPackageStartupMessages(library("synapter")) suppressPackageStartupMessages(library("synapterdata")) suppressPackageStartupMessages(library("BiocStyle")) suppressPackageStartupMessages(library("qvalue")) ## preparing some data data(ups25b) ## from synapterdata res <- ups25b ## for synergise and plotRt ## ----install, eval=FALSE-------------------------------------------------- # if (!require("BiocManager")) # install.packages("BiocManager") # BiocManager::install("synapter") ## ----library, eval=FALSE-------------------------------------------------- # library("synapter") ## ----prepare-synergise----------------------------------------------------- library("synapterdata") hdmsefile <- getHDMSeFinalPeptide()[2] basename(hdmsefile) msefile <- getMSeFinalPeptide()[2] basename(msefile) msepep3dfile <- getMSePep3D()[2] basename(msepep3dfile) fas <- getFasta() basename(fas) ## the synergise input is a (named) list of filenames input <- list(identpeptide = hdmsefile, quantpeptide = msefile, quantpep3d = msepep3dfile, fasta = fas) ## a report and result files will be stored ## in the 'output' directory output <- tempdir() output ## ----synergise, eval=FALSE------------------------------------------------- # res <- synergise(filenames = input, outputdir = output) ## ----performance----------------------------------------------------------- performance(res) ## ----inputfiles, echo=FALSE, eval=TRUE------------------------------------- inputfiles <- getHDMSeFinalPeptide() ## ----masterFdr, cache=TRUE------------------------------------------------- ## using the full set of 6 HDMSe files and a ## fasta database from the synapterdata package inputfiles <- getHDMSeFinalPeptide() fasta <- getFasta() cmb <- estimateMasterFdr(inputfiles, fasta, masterFdr = 0.02) cmb ## ----estimateFdrPlot, fig.cap="Figure illustrating the relation between the number of unique peptides in the combined HDMS$^E$ file and the resulting false discovery rate. The symbols on the figure represent the number of files for that particular combination. The dotted line is the user defined threshold for the combined FDR (`masterFdr` parameter). The best combination, i.e the one that maximises the number of unique peptides while keeping the FDR below `masterFdr` is highlighted in red."---- plot(cmb) ## ----bestComb-------------------------------------------------------------- bestComb(cmb) ## ----master, cache=TRUE, warning=FALSE------------------------------------- master <- makeMaster(inputfiles[bestComb(cmb)]) master ## ----loadups--------------------------------------------------------------- data(ups25a, ups25b, ups25c, ups50a, ups50b, ups50c) ## ----setas----------------------------------------------------------------- ms25a <- as(ups25a, "MSnSet") class(ups25a)[1] class(ms25a)[1] ms25a ## ----updatesamplename------------------------------------------------------ sampleNames(ms25a) sampleNames(ms25a) <- "ups25a" sampleNames(ms25a) ## ----accessor-------------------------------------------------------------- tail(exprs(ms25a)) tail(fData(ms25a)[, c(2,9)]) ## all fetaure metadata fvarLabels(ms25a) ## ----dotop3---------------------------------------------------------------- ms25a <- topN(ms25a, groupBy = fData(ms25a)$protein.Accession, n = 3) nPeps <- nQuants(ms25a, groupBy = fData(ms25a)$protein.Accession) ms25a <- combineFeatures(ms25a, fData(ms25a)$protein.Accession, "sum", na.rm = TRUE, verbose = FALSE) head(exprs(ms25a)) head(nPeps) ## scale intensities exprs(ms25a) <- exprs(ms25a) * (3/nPeps) ## NaN result from the division by 0, when ## no peptide was found for that protein head(exprs(ms25a)) ## ----batch----------------------------------------------------------------- nms <- c(paste0("ups", 25, c("b", "c")), paste0("ups", 50, c("a", "b", "c"))) tmp <- sapply(nms, function(.ups) { cat("Processing", .ups, "... ") ## get the object from workspace and convert to MSnSet x <- get(.ups, envir = .GlobalEnv) x <- as(x, "MSnSet") sampleNames(x) <- .ups ## extract top 3 peptides x <- topN(x, groupBy = fData(x)$protein.Accession, n = 3) ## calculate the number of peptides that are available nPeps <- nQuants(x, fData(x)$protein.Accession) ## sum top3 peptides into protein quantitation x <- combineFeatures(x, fData(x)$protein.Accession, "sum", na.rm = TRUE, verbose = FALSE) ## adjust protein intensity based on actual number of top peptides exprs(x) <- exprs(x) * (3/nPeps) ## adjust feature variable names for combine x <- updateFvarLabels(x, .ups) ## save the new MSnExp instance in the workspace varnm <- sub("ups", "ms", .ups) assign(varnm, x, envir = .GlobalEnv) cat("done\n") }) ## ----combine25, echo=TRUE-------------------------------------------------- ms25 <- combine(ms25a, ms25b) ms25 <- combine(ms25, ms25c) dim(ms25) ms25 <- filterNA(ms25, pNA = 1/3) dim(ms25) ## ----combine50------------------------------------------------------------- ms50 <- combine(ms50a, ms50b) ms50 <- combine(ms50, ms50c) dim(ms50) ms50 <- filterNA(ms50, pNA = 1/3) dim(ms50) ## ----msUps----------------------------------------------------------------- msUps <- combine(ms25, ms50) msUps <- filterNA(msUps, pNA = 2/6) head(exprs(msUps)) table(apply(exprs(msUps), 1, function(.x)sum(!is.na(.x)))) ## ----scale----------------------------------------------------------------- ecoli <- -grep("ups$", featureNames(msUps)) meds <- apply(exprs(msUps)[ecoli, ], 2, median, na.rm=TRUE) exprs(msUps) <- t(apply(exprs(msUps), 1, "/", meds)) ## ----stats----------------------------------------------------------------- ## use log2 data for t-test exprs(msUps) <- log2(exprs(msUps)) ## apply a t-test and extract the p-value pv <- apply(exprs(msUps), 1 , function(x)t.test(x[1:3], x[4:6])$p.value) ## calculate q-values library("qvalue") qv <- qvalue(pv)$qvalues ## calculate log2 fold-changes lfc <- apply(exprs(msUps), 1 , function(x) mean(x[1:3], na.rm=TRUE)-mean(x[4:6], na.rm=TRUE)) ## create a summary table res <- data.frame(cbind(exprs(msUps), pv, qv, lfc)) ## reorder based on q-values res <- res[order(res$qv), ] knitr::kable(head(round(res, 3))) ## ----writecsv, eval=FALSE-------------------------------------------------- # write.csv(res, file = "upsResults.csv") ## ----volcano, fig.cap="Volcano plot. On the volcano plot, each protein is represented by a dot and positioned according to its $log_2$ fold-change along the x axis and $-log_{10}$ of its q-value along the y axis. Proteins with large fold-changes are positioned on the sides of the plot, while proteins with low q-values are at the top of the figure. The most promising candidates are this located on the top corners. In our case, the UPS proteins (in blue) have $log2$ fold-changes around -1 (vertical dotted line), as expected. The horizontal dotted line represents a q-value threshold of 0.10."---- plot(res$lfc, -log10(res$qv), col = ifelse(grepl("ups$", rownames(res)), "#4582B3AA", "#A1A1A180"), pch = 19, xlab = expression(log[2]~fold-change), ylab = expression(-log[10]~q-value)) grid() abline(v = -1, lty = "dotted") abline(h = -log10(0.1), lty = "dotted") legend("topright", c("UPS", "E. coli"), col = c("#4582B3AA", "#A1A1A1AA"), pch = 19, bty = "n") ## ----hmap, fig.keep="last", fig.cap="A heatmap of all UPS1 proteins present in the final data set."---- heatmap(exprs(msUps)[grep("ups",featureNames(msUps)), ]) ## ----falsepos, echo=FALSE-------------------------------------------------- sel1 <- (res$qv < 0.1) signms <- rownames(res)[sel1] i <- grep("ups", signms) n1 <- length(i) n2 <- length(signms) - n1 ## ----upstab, echo=FALSE, results='asis'------------------------------------ k <- grep("ups", rownames(res)) knitr::kable(round(res[k, ], 3), caption = "UPS1 proteins.") ## ----sessioninfo----------------------------------------------------------- sessionInfo()