## ----style, eval=TRUE, echo=FALSE, results="asis"--------------------------------------- BiocStyle::latex() ## ----echo=FALSE------------------------------------------------------------------------- library(SomaticCancerAlterations) library(GenomicRanges) library(ggbio) ## ----list_datasets---------------------------------------------------------------------- all_datasets = scaListDatasets() print(all_datasets) meta_data = scaMetadata() print(meta_data) ## ----load_dataset----------------------------------------------------------------------- ov = scaLoadDatasets("ov_tcga", merge = TRUE) ## ----print------------------------------------------------------------------------------ head(ov, 3) ## ----summary---------------------------------------------------------------------------- with(mcols(ov), table(Variant_Classification, Variant_Type)) ## --------------------------------------------------------------------------------------- head(sort(table(ov$Sample_ID), decreasing = TRUE)) head(sort(table(ov$Hugo_Symbol), decreasing = TRUE), 10) ## ----multiple_studies------------------------------------------------------------------- three_studies = scaLoadDatasets(all_datasets[1:3]) print(elementLengths(three_studies)) class(three_studies) ## ----merge_studies---------------------------------------------------------------------- merged_studies = scaLoadDatasets(all_datasets[1:3], merge = TRUE) class(merged_studies) ## ----mutated_genes_study---------------------------------------------------------------- gene_study_count = with(mcols(merged_studies), table(Hugo_Symbol, Dataset)) gene_study_count = gene_study_count[order(apply(gene_study_count, 1, sum), decreasing = TRUE), ] gene_study_count = addmargins(gene_study_count) head(gene_study_count) ## ----subset_studies--------------------------------------------------------------------- tp53_region = GRanges("17", IRanges(7571720, 7590863)) tp53_studies = subsetByOverlaps(merged_studies, tp53_region) ## ----variant_study_table---------------------------------------------------------------- addmargins(table(tp53_studies$Variant_Classification, tp53_studies$Dataset)) ## ----mutateted_genes-------------------------------------------------------------------- fraction_mutated_region = function(y, region) { s = subsetByOverlaps(y, region) m = length(unique(s$Patient_ID)) / metadata(s)$Number_Patients return(m) } mutated_fraction = sapply(three_studies, fraction_mutated_region, tp53_region) mutated_fraction = data.frame(name = names(three_studies), fraction = mutated_fraction) ## ----plot_mutated_genes----------------------------------------------------------------- library(ggplot2) p = ggplot(mutated_fraction) + ggplot2::geom_bar(aes(x = name, y = fraction, fill = name), stat = "identity") + ylim(0, 1) + xlab("Study") + ylab("Ratio") + theme_bw() print(p) ## ----echo=FALSE------------------------------------------------------------------------- sessionInfo()