NestLink 1.4.0
The following content is descibed in more detail in Egloff et al. (2018) (under review NMETH-A35040).
aa_pool_x8 <- c(rep('A', 12), rep('S', 0), rep('T', 12), rep('N', 12),
rep('Q', 12), rep('D', 8), rep('E', 0), rep('V', 12), rep('L', 0),
rep('F', 0), rep('Y', 8), rep('W', 0), rep('G', 12), rep('P', 12))
aa_pool_1_2_9_10 <- c(rep('A', 8), rep('S', 7), rep('T', 7), rep('N', 6),
rep('Q', 6), rep('D', 8), rep('E', 8), rep('V', 9), rep('L', 6),
rep('F', 5), rep('Y', 9), rep('W', 6), rep('G', 15), rep('P', 0))
aa_pool_3_8 <- c(rep('A', 5), rep('S', 4), rep('T', 5), rep('N', 2),
rep('Q', 2), rep('D', 8), rep('E', 8), rep('V', 7), rep('L', 5),
rep('F', 4), rep('Y', 6), rep('W', 4), rep('G', 12), rep('P', 28))
## aa_pool_x8
## A D G N P Q T V Y
## 12 8 12 12 12 12 12 12 8
## [1] 100
## aa_pool_1_2_9_10
## A D E F G L N Q S T V W Y
## 8 8 8 5 15 6 6 6 7 7 9 6 9
## [1] 100
## aa_pool_3_8
## A D E F G L N P Q S T V W Y
## 5 8 8 4 12 5 2 28 2 4 5 7 4 6
## [1] 100
replicate(10, compose_GPGx8cTerm(pool=aa_pool_x8))
compose_GPx10R(aa_pool_1_2_9_10, aa_pool_3_8)
(sample.size <- 3E+04)
## [1] 30000
peptides.GPGx8cTerm <- replicate(sample.size, compose_GPGx8cTerm(pool=aa_pool_x8))
peptides.GPx10R <- replicate(sample.size, compose_GPx10R(aa_pool_1_2_9_10, aa_pool_3_8))
# write.table(peptides.GPGx8cTerm, file='/tmp/pp.txt')
(smp.peptide <- compose_GPGx8cTerm(aa_pool_x8))
## [1] 1496.665
pim.GPGx8cTerm <- unlist(lapply(peptides.GPGx8cTerm, function(x){parentIonMass(x)}))
pim.GPx10R <- unlist(lapply(peptides.GPx10R, function(x){parentIonMass(x)}))
pim.iRT <- unlist(lapply(as.character(iRTpeptides$peptide), function(x){parentIonMass(x)}))
(pim.min <- min(pim.GPGx8cTerm, pim.GPx10R))
## [1] 1037.512
(pim.max <- max(pim.GPGx8cTerm, pim.GPx10R))
## [1] 1890.877
(pim.breaks <- seq(round(pim.min - 1) , round(pim.max + 1) , length=75))
## [1] 1037.000 1048.554 1060.108 1071.662 1083.216 1094.770 1106.324 1117.878
## [9] 1129.432 1140.986 1152.541 1164.095 1175.649 1187.203 1198.757 1210.311
## [17] 1221.865 1233.419 1244.973 1256.527 1268.081 1279.635 1291.189 1302.743
## [25] 1314.297 1325.851 1337.405 1348.959 1360.514 1372.068 1383.622 1395.176
## [33] 1406.730 1418.284 1429.838 1441.392 1452.946 1464.500 1476.054 1487.608
## [41] 1499.162 1510.716 1522.270 1533.824 1545.378 1556.932 1568.486 1580.041
## [49] 1591.595 1603.149 1614.703 1626.257 1637.811 1649.365 1660.919 1672.473
## [57] 1684.027 1695.581 1707.135 1718.689 1730.243 1741.797 1753.351 1764.905
## [65] 1776.459 1788.014 1799.568 1811.122 1822.676 1834.230 1845.784 1857.338
## [73] 1868.892 1880.446 1892.000
hist(pim.GPGx8cTerm, breaks=pim.breaks, probability = TRUE,
col='#1111AAAA', xlab='peptide mass [Dalton]', ylim=c(0, 0.006))
hist(pim.GPx10R, breaks=pim.breaks,
probability = TRUE, add=TRUE, col='#11AA1188')
abline(v=pim.iRT, col='grey')
legend("topleft", c('GPGx8cTerm', 'GPx10R', 'iRT'),
fill=c('#1111AAAA', '#11AA1133', 'grey'))
the SSRC model, see Krokhin et al. (2004), is implemented as ssrc
function in
For a sanity check we apply the ssrc
to a real world LC-MS run peptideStd
consits of a digest of the
protein (400 amol) shipped with specL Panse et al. (2015).
ssrc <- sapply(peptideStd, function(x){ssrc(x$peptideSequence)})
rt <- unlist(lapply(peptideStd, function(x){x$rt}))
plot(ssrc, rt); abline(ssrc.lm <- lm(rt ~ ssrc), col='red');
legend("topleft", paste("spearman", round(cor(ssrc, rt, method='spearman'),2)))
here we apply
to the simulated flycodes and iRT peptides Escher et al. (2012).
hyd.GPGx8cTerm <- ssrc(peptides.GPGx8cTerm)
hyd.GPx10R <- ssrc(peptides.GPx10R)
hyd.iRT <- ssrc(as.character(iRTpeptides$peptide))
(hyd.min <- min(hyd.GPGx8cTerm, hyd.GPx10R))
## [1] -7.63055
(hyd.max <- max(hyd.GPGx8cTerm, hyd.GPx10R))
## [1] 65.12112
hyd.breaks <- seq(round(hyd.min - 1) , round(hyd.max + 1) , length=75)
hist(hyd.GPGx8cTerm, breaks = hyd.breaks, probability = TRUE,
col='#1111AAAA', xlab='hydrophobicity',
ylim=c(0, 0.06),
hist(hyd.GPx10R, breaks = hyd.breaks, probability = TRUE, add=TRUE, col='#11AA1188')
abline(v=hyd.iRT, col='grey')
legend("topleft", c('GPGx8cTerm', 'GPx10R', 'iRT'), fill=c('#1111AAAA', '#11AA1133', 'grey'))
round(table(aa_pool_x8)/length(aa_pool_x8), 2)
## aa_pool_x8
## A D G N P Q T V Y
## 0.12 0.08 0.12 0.12 0.12 0.12 0.12 0.12 0.08
peptide2aa <- function(seq, from=4, to=4+8){
unlist(lapply(seq, function(x){strsplit(substr(x, from, to), '')}))
peptides.GPGx8cTerm.aa <- peptide2aa(peptides.GPGx8cTerm)
round(table(peptides.GPGx8cTerm.aa)/length(peptides.GPGx8cTerm.aa), 2)
## peptides.GPGx8cTerm.aa
## A D G N P Q T V Y
## 0.11 0.07 0.11 0.11 0.11 0.11 0.11 0.22 0.07
peptides.GPx10R.aa <- peptide2aa(peptides.GPx10R, from=3, to=12)
round(table(peptides.GPx10R.aa)/length(peptides.GPx10R.aa), 2)
## peptides.GPx10R.aa
## A D E F G L N P Q S T V W Y
## 0.06 0.08 0.08 0.04 0.13 0.05 0.04 0.17 0.04 0.05 0.06 0.08 0.05 0.07
## [1] 30000
length(grep('^GP(.*)GP(.*)R$', peptides.GPGx8cTerm))
## [1] 6319
length(grep('^GP(.*)GP(.*)R$', peptides.GPx10R))
## [1] 5959
count the peptides having the same AA composition
## [1] 30000
function(x){paste(sort(unlist(strsplit(x, ''))), collapse='')}))))
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16 17
## 9541 3606 1607 792 427 204 104 50 34 20 6 5 6 2 1 1
# write.table(tt, file='GPGx8cTerm.txt')
function(x){paste(sort(unlist(strsplit(x, ''))), collapse='')}))))
## 1 2 3 4 5
## 24844 2104 265 32 5
the NestLink function plot_in_silico_LCMS_map
the LC-MS maps.
par(mfrow=c(2, 2))
h <- NestLink:::.plot_in_silico_LCMS_map(peptides.GPGx8cTerm, main='GPGx8cTerm')
h <- NestLink:::.plot_in_silico_LCMS_map(peptides.GPx10R, main='GPx10R')
Here is the output of the sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.11-bioc/R/lib/
## LAPACK: /home/biocbuild/bbs-3.11-bioc/R/lib/
## locale:
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
## other attached packages:
## [1] specL_1.22.0 seqinr_3.6-1
## [3] RSQLite_2.2.0 DBI_1.1.0
## [5] knitr_1.28 scales_1.1.0
## [7] ggplot2_3.3.0 NestLink_1.4.0
## [9] ShortRead_1.46.0 GenomicAlignments_1.24.0
## [11] SummarizedExperiment_1.18.1 DelayedArray_0.14.0
## [13] matrixStats_0.56.0 Biobase_2.48.0
## [15] Rsamtools_2.4.0 GenomicRanges_1.40.0
## [17] GenomeInfoDb_1.24.0 BiocParallel_1.22.0
## [19] protViz_0.6.8 gplots_3.0.3
## [21] Biostrings_2.56.0 XVector_0.28.0
## [23] IRanges_2.22.1 S4Vectors_0.26.0
## [25] ExperimentHub_1.14.0 AnnotationHub_2.20.0
## [27] BiocFileCache_1.12.0 dbplyr_1.4.3
## [29] BiocGenerics_0.34.0 BiocStyle_2.16.0
## loaded via a namespace (and not attached):
## [1] nlme_3.1-147 bitops_1.0-6
## [3] bit64_0.9-7 RColorBrewer_1.1-2
## [5] httr_1.4.1 tools_4.0.0
## [7] R6_2.4.1 KernSmooth_2.23-17
## [9] mgcv_1.8-31 colorspace_1.4-1
## [11] ade4_1.7-15 withr_2.2.0
## [13] tidyselect_1.0.0 bit_1.1-15.2
## [15] curl_4.3 compiler_4.0.0
## [17] labeling_0.3 bookdown_0.18
## [19] caTools_1.18.0 rappdirs_0.3.1
## [21] stringr_1.4.0 digest_0.6.25
## [23] rmarkdown_2.1 jpeg_0.1-8.1
## [25] pkgconfig_2.0.3 htmltools_0.4.0
## [27] highr_0.8 fastmap_1.0.1
## [29] rlang_0.4.6 shiny_1.4.0.2
## [31] farver_2.0.3 hwriter_1.3.2
## [33] gtools_3.8.2 dplyr_0.8.5
## [35] RCurl_1.98-1.2 magrittr_1.5
## [37] GenomeInfoDbData_1.2.3 Matrix_1.2-18
## [39] Rcpp_1.0.4.6 munsell_0.5.0
## [41] lifecycle_0.2.0 stringi_1.4.6
## [43] yaml_2.2.1 MASS_7.3-51.6
## [45] zlibbioc_1.34.0 grid_4.0.0
## [47] blob_1.2.1 gdata_2.18.0
## [49] promises_1.1.0 crayon_1.3.4
## [51] lattice_0.20-41 splines_4.0.0
## [53] magick_2.3 pillar_1.4.4
## [55] codetools_0.2-16 glue_1.4.0
## [57] BiocVersion_3.11.1 evaluate_0.14
## [59] latticeExtra_0.6-29 BiocManager_1.30.10
## [61] vctrs_0.2.4 png_0.1-7
## [63] httpuv_1.5.2 gtable_0.3.0
## [65] purrr_0.3.4 assertthat_0.2.1
## [67] xfun_0.13 mime_0.9
## [69] xtable_1.8-4 later_1.0.0
## [71] tibble_3.0.1 AnnotationDbi_1.50.0
## [73] memoise_1.1.0 ellipsis_0.3.0
## [75] interactiveDisplayBase_1.26.0
Egloff, Pascal, Iwan Zimmermann, Fabian M. Arnold, Cedric A.J. Hutter, Damien Damien Morger, Lennart Opitz, Lucy Poveda, et al. 2018. “Engineered Peptide Barcodes for In-Depth Analyses of Binding Protein Ensembles.” bioRxiv. Cold Spring Harbor Laboratory.
Escher, C., L. Reiter, B. MacLean, R. Ossola, F. Herzog, J. Chilton, M. J. MacCoss, and O. Rinner. 2012. “Using iRT, a normalized retention time for more targeted measurement of peptides.” Proteomics 12 (8):1111–21.
Krokhin, O. V., R. Craig, V. Spicer, W. Ens, K. G. Standing, R. C. Beavis, and J. A. Wilkins. 2004. “An improved model for prediction of retention times of tryptic peptides in ion pair reversed-phase HPLC: its application to protein peptide mapping by off-line HPLC-MALDI MS.” Mol. Cell Proteomics 3 (9):908–19.
Panse, C., C. Trachsel, J. Grossmann, and R. Schlapbach. 2015. “specL–an R/Bioconductor package to prepare peptide spectrum matches for use in targeted proteomics.” Bioinformatics 31 (13):2228–31.