## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----env, include=FALSE, echo=FALSE, cache=FALSE------------------------- library("knitr") opts_chunk$set(fig.align = 'center', fig.show = 'hold', par = TRUE, prompt = TRUE, eval = TRUE, stop_on_error = 1L, comment = NA) options(replace.assign = TRUE, width = 55) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("MSnbase")) suppressWarnings(suppressPackageStartupMessages(library("pRoloc"))) suppressPackageStartupMessages(library("pRolocdata")) suppressPackageStartupMessages(library("class")) set.seed(1) ## ----ref, echo=FALSE, results='asis'------------------------------------- cits <- citation("pRoloc") i <- which(lengths(sapply(cits, function(xx) grep("heterogeneous", xx$title))) > 0) print(cits[[i]], style = "LaTeX") ## ----loadpkg------------------------------------------------------------- library("pRoloc") ## ----loaddata------------------------------------------------------------ library("pRolocdata") data(andy2011) ## ----ap------------------------------------------------------------------ ap <- setAnnotationParams(inputs = c("Homo sapiens", "UniProtKB/Swiss-Prot ID")) ap ## ----pdata--------------------------------------------------------------- data(andy2011) head(featureNames(andy2011)) ## ----andgoset------------------------------------------------------------ andygoset <- makeGoSet(andy2011) andygoset exprs(andygoset)[1:7, 1:4] ## ----testandsamefeats, echo=FALSE---------------------------------------- stopifnot(all.equal(featureNames(andy2011), featureNames(andygoset))) ## ----hparprep, eval=TRUE------------------------------------------------- fvarLabels(andy2011)[1] <- "accession" ## for left_join matching ## convert protein accession numbers to ensembl gene identifiers library("biomaRt") mart <- useMart("ensembl",dataset="hsapiens_gene_ensembl") filter <- "uniprotswissprot" attrib <- c("uniprot_gn", "uniprotswissprot", "ensembl_gene_id") bm <- getBM(attributes = attrib, filters = filter, values = fData(andy2011)[, "accession"], mart = mart) head(bm) ## HPA data library("hpar") ## using old version for traceability setHparOptions(hpadata = "hpaSubcellularLoc14") hpa <- getHpa(bm$ensembl_gene_id) hpa$Reliability <- droplevels(hpa$Reliability) colnames(hpa)[1] <- "ensembl_gene_id" library("dplyr") hpa <- left_join(hpa, bm) hpa <- hpa[!duplicated(hpa$uniprotswissprot), ] ## match HPA/LOPIT colnames(hpa)[7] <- "accession" fd <- left_join(fData(andy2011), hpa) rownames(fd) <- featureNames(andy2011) fData(andy2011) <- fd stopifnot(validObject(andy2011)) ## Let's get rid of features without any hpa data lopit <- andy2011[!is.na(fData(andy2011)$Main.location), ] ## ----hpadata, eval=TRUE-------------------------------------------------- ## HPA localisation hpalocs <- c(as.character(fData(lopit)$Main.location), as.character(fData(lopit)$Other.location)) hpalocs <- hpalocs[!is.na(hpalocs)] hpalocs <- unique(unlist(strsplit(hpalocs, ";"))) makeHpaSet <- function(x, score2, locs = hpalocs) { hpamat <- matrix(0, ncol = length(locs), nrow = nrow(x)) colnames(hpamat) <- locs rownames(hpamat) <- featureNames(x) for (i in 1:nrow(hpamat)) { loc <- unlist(strsplit(as.character(fData(x)[i, "Main.location"]), ";")) loc2 <- unlist(strsplit(as.character(fData(x)[i, "Other.location"]), ";")) score <- score2[fData(x)[i, "Reliability"]] hpamat[i, loc] <- score hpamat[i, loc2] <- score } new("MSnSet", exprs = hpamat, featureData = featureData(x)) } hpaset <- makeHpaSet(lopit, score2 = c(Supportive = 1, Uncertain = 0)) hpaset <- filterZeroRows(hpaset) dim(hpaset) exprs(hpaset)[c(1, 6, 200), 1:3] ## ----tabdelim------------------------------------------------------------ ppif <- system.file("extdata/tabdelimited._gHentss2F9k.txt.gz", package = "pRolocdata") ppidf <- read.delim(ppif, header = TRUE, stringsAsFactors = FALSE) head(ppidf) ## ----ppiset-------------------------------------------------------------- uid <- unique(c(ppidf$X.node1, ppidf$node2)) ppim <- diag(length(uid)) colnames(ppim) <- rownames(ppim) <- uid for (k in 1:nrow(ppidf)) { i <- ppidf[[k, "X.node1"]] j <- ppidf[[k, "node2"]] ppim[i, j] <- ppidf[[k, "combined_score"]] } ppim[1:5, 1:8] ## ----ppiset2------------------------------------------------------------- featureNames(andy2011) <- sub("_HUMAN", "", fData(andy2011)$UniProtKB.entry.name) cmn <- intersect(featureNames(andy2011), rownames(ppim)) ppim <- ppim[cmn, ] andy2011 <- andy2011[cmn, ] ppi <- MSnSet(ppim, fData = fData(andy2011), pData = data.frame(row.names = colnames(ppim))) ppi <- filterZeroCols(ppi) ## ----mclasses, echo=FALSE------------------------------------------------ data(andy2011) ## load clean LOPIT data ## marker classes for andy2011 m <- unique(fData(andy2011)$markers.tl) m <- m[m != "unknown"] ## ----andypca, fig.width=6, fig.height=6, echo=FALSE---------------------- setStockcol(paste0(getStockcol(), "80")) plot2D(andy2011, fcol = "markers.tl") setStockcol(NULL) addLegend(andy2011, fcol = "markers.tl", where = "topright", bty = "n", cex = .7) ## ----thetas0, echo=TRUE-------------------------------------------------- head(thetas(3, by = 0.5)) dim(thetas(3, by = 0.5)) ## ----thetas1, echo=TRUE-------------------------------------------------- dim(thetas(5, length.out = 4)) ## ----thetaandy----------------------------------------------------------- ## marker classes for andy2011 m <- unique(fData(andy2011)$markers.tl) m <- m[m != "unknown"] th <- thetas(length(m), length.out=4) dim(th) ## ----thetaopt0, eval=FALSE----------------------------------------------- ## topt <- knntlOptimisation(andy2011, andygoset, ## th = th, ## k = c(3, 3), ## fcol = "markers.tl", ## times = 50) ## ----thetaopt, eval=TRUE------------------------------------------------- set.seed(1) i <- sample(nrow(th), 12) topt <- knntlOptimisation(andy2011, andygoset, th = th[i, ], k = c(3, 3), fcol = "markers.tl", times = 5) topt ## ----getParam------------------------------------------------------------ getParams(topt) ## ----besttheta----------------------------------------------------------- (bw <- experimentData(andy2011)@other$knntl$thetas) ## ----tlclass------------------------------------------------------------- andy2011 <- knntlClassification(andy2011, andygoset, bestTheta = bw, k = c(3, 3), fcol = "markers.tl") ## ----tlpreds------------------------------------------------------------- andy2011 <- getPredictions(andy2011, fcol = "knntl") ## ----andypca2, fig.width=6, fig.height=6--------------------------------- setStockcol(paste0(getStockcol(), "80")) ptsze <- exp(fData(andy2011)$knntl.scores) - 1 plot2D(andy2011, fcol = "knntl", cex = ptsze) setStockcol(NULL) addLegend(andy2011, where = "topright", fcol = "markers.tl", bty = "n", cex = .7) ## ----sessioninfo, results='asis', echo=FALSE----------------------------- toLatex(sessionInfo())