Mapped reads from tumor and normal tissue can be processed with mutation caller software such as VarScan or MUTECT. In this example, we use a hypothetical vcf file from VarScan output which contains mutation calls from both normal and tumor samples.
Preparing CNV Data from DNAcopy
The object seg is the segment call output from DNAcopy and min.num here specifies the minimum segment size to keep
library(GenomicRanges)
min.num <- 10
cnv.gr <- with(subset(seg$output, num.mark >= min.num & ! chrom %in% c("chrX", "chrY")) , GRanges(chrom, IRanges(loc.start, loc.end), mcols=cbind(num.mark, seg.mean)))
Then merge the SNP and CNV GRanges objects.
Example data in the desired format is provided as part of this package as GRanges objects and can be loaded as shown below. To utilize this vignette, you must first load BubbleTree below. You don’t need to use “suppressMessages”.
suppressMessages(
library(BubbleTree)
)
allCall.lst is pre-calculated CNV data. allRBD.lst is simply the RBD data from below.
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
head(allCall.lst[[1]]@rbd)
## seqnames start end width strand seg.id num.mark lrr
## 1 chr10 93890 38769716 38675827 * 806 31699 0.1413
## 2 chr10 38877329 135523936 96646608 * 808 74425 0.1415
## 3 chr11 133952 134946370 134812419 * 812 102934 0.1412
## 4 chr12 60000 133841793 133781794 * 813 103392 0.1413
## 5 chr13 19020000 115109861 96089862 * 814 76080 0.1419
## 6 chr14 20191636 107288640 87097005 * 823 68709 0.1425
## kurtosis hds hds.sd het.cnt seg.size
## 1 -0.093044958 0.01851852 0.06051370 10400 1.487216
## 2 -0.048701274 0.01851852 0.06046402 25414 3.491784
## 3 -0.018840021 0.01851852 0.06052843 36183 4.829335
## 4 -0.007149860 0.01851852 0.06096056 36798 4.850823
## 5 -0.022536940 0.01851852 0.06057517 27278 3.569431
## 6 -0.004935614 0.01851852 0.06166893 25001 3.223607
However, if you wish to create your own RBD object from your input, you would use the code below. There is test data available in this package that is used for demonstration purposes.
# load sample files
load(system.file("data", "cnv.gr.rda", package="BubbleTree"))
load(system.file("data", "snp.gr.rda", package="BubbleTree"))
# load annotations
load(system.file("data", "centromere.dat.rda", package="BubbleTree"))
load(system.file("data", "cyto.gr.rda", package="BubbleTree"))
load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree"))
load(system.file("data", "vol.genes.rda", package="BubbleTree"))
load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree"))
# initialize RBD object
r <- new("RBD", unimodal.kurtosis=-0.1)
# create new RBD object with GenomicRanges objects for SNPs and CNVs
rbd <- makeRBD(r, snp.gr, cnv.gr)
head(rbd)
## seqnames start end width strand seg.id num.mark lrr
## 1 chr1 65625 2066855 2001231 * 1 548 0.1997
## 2 chr1 2075796 38489397 36413602 * 2 5284 -0.4146
## 3 chr1 38511244 39761601 1250358 * 3 72 -0.0511
## 4 chr1 39763396 39982177 218782 * 4 112 0.0401
## 5 chr1 39988109 43905367 3917259 * 5 601 0.0372
## 6 chr1 43905709 44128685 222977 * 6 53 0.2822
## kurtosis hds hds.sd het.cnt seg.size
## 1 0.1830119 0.01220 0.05007095 71 0.24809178
## 2 -1.9020390 0.35600 0.06296830 501 2.39218420
## 3 -2.0000000 0.15555 0.03471894 2 0.03259600
## 4 -0.9912620 0.12060 0.06761821 4 0.05070489
## 5 -1.4809979 0.14560 0.06186714 47 0.27208605
## 6 -2.0000000 0.07280 0.05176022 2 0.02399428
# create a new prediction
btreepredictor <- new("BTreePredictor", rbd=rbd, max.ploidy=6, prev.grid=seq(0.2,1, by=0.01))
pred <- btpredict(btreepredictor)
# create rbd plot
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
btree <- drawBTree(btreeplotter, pred@rbd)
## Warning in drawBTree(btreeplotter, pred@rbd): More ploidy might be suggested: 1.6, 1.6, 2.2, 1.7, 1.9, 1.5, 1.7, 2.2, 1.9, 2.1, 1.7
print(btree)
# create rbd.adj plot
btreeplotter <- new("BTreePlotter", branch.col="gray50")
btree <- drawBTree(btreeplotter, pred@rbd.adj)
## Warning in drawBTree(btreeplotter, pred@rbd.adj): More ploidy might be suggested: 1.8, 1.9, 2.1, 2, 2.2, 2.9, 2.3, 2.6, 1.8, 1.9, 2, 2.3, 3, 2.6, 2.9, 2.3
print(btree)
# create a combined plot with rbd and rbd.adj that shows the arrows indicating change
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
arrows <- trackBTree(btreeplotter,
pred@rbd,
pred@rbd.adj,
min.srcSize=0.01,
min.trtSize=0.01)
## Warning: Ignoring unknown aesthetics: fill
btree <- drawBTree(btreeplotter, pred@rbd) + arrows
## Warning in drawBTree(btreeplotter, pred@rbd): More ploidy might be suggested: 1.6, 1.6, 2.2, 1.7, 1.9, 1.5, 1.7, 2.2, 1.9, 2.1, 1.7
print(btree)
# create a plot with overlays of significant genes
btreeplotter <- new("BTreePlotter", branch.col="gray50")
annotator <- new("Annotate")
comm <- btcompare(vol.genes, cancer.genes.minus2)
## 77 are common, whereas 35 and 302 are unique in each dataset
sample.name <- "22_cnv_snv"
btree <- drawBTree(btreeplotter, pred@rbd.adj) +
ggplot2::labs(title=sprintf("%s (%s)", sample.name, info(pred)))
## Warning in drawBTree(btreeplotter, pred@rbd.adj): More ploidy might be suggested: 1.8, 1.9, 2.1, 2, 2.2, 2.9, 2.3, 2.6, 1.8, 1.9, 2, 2.3, 3, 2.6, 2.9, 2.3
out <- pred@result$dist %>%
filter(seg.size >= 0.1 ) %>%
arrange(gtools::mixedorder(as.character(seqnames)), start)
ann <- with(out, {
annoByGenesAndCyto(annotator,
as.character(out$seqnames),
as.numeric(out$start),
as.numeric(out$end),
comm$comm,
gene.uni.clean.gr=gene.uni.clean.gr,
cyto.gr=cyto.gr)
})
out$cyto <- ann$cyto
out$genes <- ann$ann
btree <- btree + drawFeatures(btreeplotter, out)
print(btree)
# print out purity and ploidy values
info <- info(pred)
cat("\nPurity/Ploidy: ", info, "\n")
##
## Purity/Ploidy: Purity: 0.71, 0.26; Ploidy: 3.0; Deviation: 0.02
The remaining datasets used to support the CNV data display on the BubbleTree plots.
load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree"))
load(system.file("data", "vol.genes.rda", package="BubbleTree"))
load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree"))
load(system.file("data", "cyto.gr.rda", package="BubbleTree"))
load(system.file("data", "centromere.dat.rda", package="BubbleTree"))
load(system.file("data", "all.somatic.lst.RData", package="BubbleTree"))
load(system.file("data", "allHetero.lst.RData", package="BubbleTree"))
load(system.file("data", "allCNV.lst.RData", package="BubbleTree"))
load(system.file("data", "hg19.seqinfo.rda", package="BubbleTree"))
# lists of 379 known cancer genes
head(cancer.genes.minus2)
## [1] "ABL1" "ACVR1B" "AKR1B1" "AKT1" "ALK" "APC"
# another list of 105 known cancer genes
head(vol.genes)
## [1] "ABL1" "AKT2" "ALK" "APC" "ATM" "AXIN2"
# full gene annotation Grange object
head(gene.uni.clean.gr)
## GRanges object with 6 ranges and 1 metadata column:
## seqnames ranges strand | gene.symbol
## <Rle> <IRanges> <Rle> | <character>
## A1BG chr19 [ 58858172, 58874214] - | A1BG
## NAT2 chr8 [ 18248755, 18258723] + | NAT2
## ADA chr20 [ 43248163, 43280376] - | ADA
## CDH2 chr18 [ 25530930, 25757445] - | CDH2
## AKT3 chr1 [243651535, 244006886] - | AKT3
## GAGE12F chrX [ 49217763, 49233491] + | GAGE12F
## -------
## seqinfo: 24 sequences from hg19 genome
# cytoband coordinate data
head(cyto.gr)
## GRanges object with 6 ranges and 2 metadata columns:
## seqnames ranges strand | name gieStain
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chr1 [ 0, 2300000] * | p36.33 gneg
## [2] chr1 [ 2300000, 5400000] * | p36.32 gpos25
## [3] chr1 [ 5400000, 7200000] * | p36.31 gneg
## [4] chr1 [ 7200000, 9200000] * | p36.23 gpos25
## [5] chr1 [ 9200000, 12700000] * | p36.22 gneg
## [6] chr1 [12700000, 16200000] * | p36.21 gpos50
## -------
## seqinfo: 24 sequences from hg19 genome
# centromere coordinate data
head(centromere.dat)
## X.bin chrom chromStart chromEnd ix n size type bridge
## 2 23 chr1 121535434 124535434 1270 N 3000000 centromere no
## 43 20 chr2 92326171 95326171 770 N 3000000 centromere no
## 60 2 chr3 90504854 93504854 784 N 3000000 centromere no
## 67 1 chr4 49660117 52660117 447 N 3000000 centromere no
## 80 14 chr5 46405641 49405641 452 N 3000000 centromere no
## 91 16 chr6 58830166 61830166 628 N 3000000 centromere no
# SNV location data
head(all.somatic.lst, n=1L)
## $HCC4.Primary.Tumor
## GRanges object with 229 ranges and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr1 [20982538, 20982538] * | 0.0741
## [2] chr1 [23419976, 23419976] * | 0.5696
## [3] chr1 [40859043, 40859043] * | 0.358
## [4] chr1 [41976299, 41976299] * | 0.3333
## [5] chr1 [47124291, 47124291] * | 0.2867
## ... ... ... ... . ...
## [225] chr20 [43964620, 43964620] * | 0.0508
## [226] chr20 [48503292, 48503292] * | 0.5333
## [227] chr21 [33043893, 33043893] * | 0.3987
## [228] chr21 [37618302, 37618302] * | 0.3883
## [229] chr22 [38087422, 38087422] * | 0.2037
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
# sequence variants
head(allHetero.lst, n=1L)
## $sam2
## GRanges object with 592971 ranges and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr2 [95633262, 95633262] * | 0
## [2] chr2 [95818028, 95818028] * | 0
## [3] chr2 [95937209, 95937209] * | 1
## [4] chr2 [96742179, 96742179] * | 0
## [5] chr2 [96922986, 96922986] * | 1
## ... ... ... ... . ...
## [592967] chr22 [51171854, 51171854] * | 0.4
## [592968] chr22 [51174391, 51174391] * | 0.517241379310345
## [592969] chr22 [51174533, 51174533] * | 0.488888888888889
## [592970] chr22 [51178405, 51178405] * | 0.5
## [592971] chr22 [51178709, 51178709] * | 0.36
## -------
## seqinfo: 21 sequences from an unspecified genome; no seqlengths
# copy number variation data
head(allCNV.lst, n=1L)
## $sam2
## GRanges object with 1305 ranges and 2 metadata columns:
## seqnames ranges strand | num.mark score
## <Rle> <IRanges> <Rle> | <numeric> <numeric>
## [1] chr1 [ 10000, 50753] * | 38 -1.2722
## [2] chr1 [ 51753, 79890] * | 22 -2.6144
## [3] chr1 [ 81231, 88087] * | 5 -4.714
## [4] chr1 [ 89174, 110680] * | 17 -2.4498
## [5] chr1 [111680, 139748] * | 24 -1.6389
## ... ... ... ... . ... ...
## [1301] chr9 [ 10000, 37354] * | 29 -0.2345
## [1302] chr9 [ 40777, 138916379] * | 93375 0.1409
## [1303] chr9 [138930478, 138946579] * | 11 -0.382
## [1304] chr9 [138947579, 141141038] * | 1775 0.1411
## [1305] chr9 [141142038, 141152573] * | 12 -0.1797
## -------
## seqinfo: 25 sequences from an unspecified genome; no seqlengths
# hg19 sequence data
hg19.seqinfo
## Seqinfo object with 24 sequences from hg19 genome:
## seqnames seqlengths isCircular genome
## chr1 249250621 FALSE hg19
## chr2 243199373 FALSE hg19
## chr3 198022430 FALSE hg19
## chr4 191154276 FALSE hg19
## chr5 180915260 FALSE hg19
## ... ... ... ...
## chr20 63025520 FALSE hg19
## chr21 48129895 FALSE hg19
## chr22 51304566 FALSE hg19
## chrX 155270560 FALSE hg19
## chrY 59373566 FALSE hg19