TCGAbiolinks has provided a few functions to download mutation data from GDC. There are two options to download the data:
GDCquery
, GDCdownload
and
GDCpreprare
to download MAF aligned against hg38GDCquery
, GDCdownload
and
GDCpreprare
to download MAF aligned against hg19getMC3MAF()
, to download MC3 MAF from https://gdc.cancer.gov/about-data/publications/mc3-2017This example will download Aggregate GDC MAFs. For more information please access https://github.com/NCI-GDC/gdc-maf-tool and GDC docs.
<- GDCquery(
query project = "TCGA-CHOL",
data.category = "Simple Nucleotide Variation",
access = "open",
legacy = FALSE,
data.type = "Masked Somatic Mutation",
workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
)GDCdownload(query)
<- GDCprepare(query) maf
# Only first 50 to make render faster
datatable(maf[1:20,],
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
This example will download MAF (mutation annotation files) aligned against hg19 (Old TCGA maf files)
<- GDCquery(
query.maf.hg19 project = "TCGA-CHOL",
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
legacy = TRUE
)
# Check maf availables
getResults(query.maf.hg19) %>%
::select(-contains("sample_type")) %>%
dplyr::select(-contains("cases")) %>%
dplyr::datatable(
DTfilter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 10),
rownames = FALSE
)
<- GDCquery(project = "TCGA-CHOL",
query.maf.hg19 data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
file.type = "bcgsc.ca_CHOL.IlluminaHiSeq_DNASeq.1.somatic.maf",
legacy = TRUE)
GDCdownload(query.maf.hg19)
<- GDCprepare(query.maf.hg19) maf
# Only first 50 to make render faster
datatable(maf[1:20,],
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
This will download the MC3 MAF file from https://gdc.cancer.gov/about-data/publications/mc3-2017, and add project each sample belongs.
<- getMC3MAF() maf
To visualize the data you can use the Bioconductor package maftools. For more information, please check its vignette.
library(maftools)
library(dplyr)
<- GDCquery(
query project = "TCGA-CHOL",
data.category = "Simple Nucleotide Variation",
access = "open",
legacy = FALSE,
data.type = "Masked Somatic Mutation",
workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
)GDCdownload(query)
<- GDCprepare(query)
maf
<- maf %>% maftools::read.maf maf
datatable(getSampleSummary(maf),
filter = 'top',
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
rownames = FALSE)
plotmafSummary(maf = maf, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE)
oncoplot(maf = maf, top = 10, removeNonMutated = TRUE)
= titv(maf = maf, plot = FALSE, useSyn = TRUE)
titv #plot titv summary
plotTiTv(res = titv)