## ----style, echo = FALSE, results = 'asis', message=FALSE---------------- BiocStyle::markdown() ## ----env, message=FALSE, echo=FALSE, cache=FALSE, warning=FALSE---------- library("Pbase") ## ----schema, echo=FALSE, fig.cap='Mapping proteins to a genome reference.', fig.align='center'---- Pbase:::mapplot() ## ----p, message=FALSE---------------------------------------------------- library("Pbase") data(p) p seqnames(p) ## ----startend------------------------------------------------------------ pcols(p)[1, c("start", "end")] ## ----bm, cache=TRUE, message=FALSE--------------------------------------- library("biomaRt") ens <- useMart("ensembl", "hsapiens_gene_ensembl") ens bm <- select(ens, keys = seqnames(p), keytype = "uniprot_swissprot", columns = c( "uniprot_swissprot", "hgnc_symbol", "ensembl_transcript_id")) bm ## ----enst---------------------------------------------------------------- acols(p)$ENST ## ----etris2grl----------------------------------------------------------- grl <- etrid2grl(acols(p)$ENST, ens) all.equal(names(grl), acols(p)$ENST, check.attributes=FALSE) grl ## ----pc------------------------------------------------------------------ pcgrl <- proteinCoding(grl) pcgrl ## ----echo=FALSE---------------------------------------------------------- pp <- p[5] n <- elementLengths(pranges(pp)) pepstring <- paste(unique(pcols(pp)[[1]]$pepseq), collapse = ", ") ## ------------------------------------------------------------------------ sort(pranges(p)[5]) plot(p[5]) ## ------------------------------------------------------------------------ plotAsGeneRegionTrack(grl[[5]], pcgrl[[5]]) ## ----protfromgenome------------------------------------------------------ library("BSgenome.Hsapiens.NCBI.GRCh38") head(seqnames(BSgenome.Hsapiens.NCBI.GRCh38)) if (!"chr1" %in% seqnames(BSgenome.Hsapiens.NCBI.GRCh38)) seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[1:23] <- paste0("chr", seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[1:23]) seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[21:27] ## ----aaseq--------------------------------------------------------------- s <- getSeq(BSgenome.Hsapiens.NCBI.GRCh38, pcgrl[[5]]) s if (isReverse(pcgrl[[5]])) s <- rev(s) aaseq <- translate(unlist(s)) aaseq ## ----aln----------------------------------------------------------------- writePairwiseAlignments(pairwiseAlignment(aa(p[5]), aaseq)) ## ----map----------------------------------------------------------------- res <- mapToGenome(p[5], pcgrl[5]) res[[1]] ## ----pepcoords, fig.align='center'--------------------------------------- plotAsAnnotationTrack(res[[1]], grl[[5]]) ## ----msmsspectra, fig.align='center'------------------------------------- data(pms) library("ggplot2") details <- function(identifier, ...) { p <- plot(pms[[as.numeric(identifier)]], full=TRUE, plot=FALSE) + ggtitle("") p <- p + theme_bw() + theme(axis.text.y = element_blank(), axis.text.x = element_blank()) + labs(x = NULL, y = NULL) print(p, newpage=FALSE) } res <- res[[1]] deTrack <- AnnotationTrack(start = start(res), end = end(res), genome = "hg38", chromosom = "chrX", id = pcols(p)[[5]]$acquisitionNum, name = "MS2 spectra", stacking = "squish", fun = details) grTrack <- GeneRegionTrack(grl[[5]], name = acols(p)$ENST[5]) ideoTrack <- IdeogramTrack(genome = "hg38", chromosome = "chrX") axisTrack <- GenomeAxisTrack() plotTracks(list(ideoTrack, axisTrack, deTrack, grTrack), add53 = TRUE, add35 = TRUE) ## ----pmap---------------------------------------------------------------- pres <- pmapToGenome(p, pcgrl) pres ## ------------------------------------------------------------------------ k <- 6 seqnames(p)[k] ## ----remindbm------------------------------------------------------------ sel <- bm$uniprot_swissprot == seqnames(p)[k] bm[sel, ] acols(p)$ENST[k] ## ----etris2grl2---------------------------------------------------------- eid <- bm[sel, 3] names(eid) <- bm[sel, 1] eid grl <- etrid2grl(eid, ens, use.names = TRUE) pcgrl <- proteinCoding(grl) ## ----plot5--------------------------------------------------------------- plotAsGeneRegionTrack(pcgrl) ## ----getseq2, warning=FALSE---------------------------------------------- lseq <- lapply(getSeq(BSgenome.Hsapiens.NCBI.GRCh38, pcgrl), function(s) translate(unlist(s))) laln <- sapply(lseq, pairwiseAlignment, aa(p[k])) sapply(laln, nmatch)/width(aa(p[k])) ## ----ki, echo=FALSE------------------------------------------------------ ki <- which.max(sapply(laln, nmatch)) ## ----checkk-------------------------------------------------------------- ki <- which.max(sapply(laln, nmatch)) stopifnot(eid[ki] == acols(p)$ENST[k]) ## ----map2---------------------------------------------------------------- res <- pmapToGenome(p[k], pcgrl[ki]) ## ----pepcoords2---------------------------------------------------------- plotAsAnnotationTrack(res[[1]], pcgrl[[ki]]) ## ----coordall------------------------------------------------------------ alleid <- bm[, 3] names(alleid) <- bm[, 1] grl <- etrid2grl(alleid, ens, use.names = TRUE) pcgrl <- proteinCoding(grl) res <- mapToGenome(p, pcgrl) length(res) ## ----si------------------------------------------------------------------ sessionInfo()