## ----loadStyle, warning=FALSE, echo=FALSE, message=FALSE, results="hide"------ library(BiocStyle) ## ----packages, include=FALSE-------------------------------------------------- library(YAPSA) library(Biostrings) library(BSgenome.Hsapiens.UCSC.hg19) library(knitr) opts_chunk$set(echo=TRUE) opts_chunk$set(fig.show='asis') ## ----loadPCAWGsigs------------------------------------------------------------ data(sigs_pcawg) ## ----captionSpectra, echo=FALSE----------------------------------------------- cap <-": Nucleotide exchange spectra of the Indel signatures ID3, associated with tobacco smoking, and ID6, related to deficiencies in homologous recombination repair." ## ----INDELsigExample, include=TRUE, fig.width=15, fig.height=6, fig.cap= cap---- plotExchangeSpectra_indel(PCAWG_SP_ID_sigs_df[,c(3,6)]) ## ----INDELsigInfo------------------------------------------------------------- current_caption <- paste0("Information on Indel mutational signatures.") if(!exists("repress_tables")) kable(PCAWG_SP_ID_sigInd_df, row.names=FALSE, caption=current_caption) ## ----loadGoNL----------------------------------------------------------------- data(GenomeOfNl_raw) GenomeOfNl_raw <- GenomeOfNl_raw[, c(1,2,4,5)] ## ----loadGoNLraw-------------------------------------------------------------- load_data_new <- FALSE if(load_data_new){ data <- data.frame(matrix(ncol = 8, nrow = 0)) for(index in seq_along(1:22)){ print(index) temp <- tempfile() file_path <- paste0("https://molgenis26.target.rug.nl/ downloads/gonl_public/variants/release5/ gonl.chr", index, ".snps_indels.r5.vcf.gz") download.file(file_path, temp) data <- rbind(data, read.table(gzfile(temp, paste0("gonl.chr", index, ".snps_indels.r5.vcf")), header=FALSE, sep="\t", stringsAsFactors = FALSE)) data <- data[grep("INDEL", data$V8),] unlink(temp) } colnames(data) <- c("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER","INFO") GenomeOfNl_raw <- data[, c(1,2,4,5)] } ## ----showsTopOfDf, echo=FALSE------------------------------------------------- kable(head(GenomeOfNl_raw), caption="Head of VCF file containing the GoNL INDEL data") ## ----randomizeDataSet, warning= FALSE----------------------------------------- seed = 2 set.seed(seed) number_of_indels <- sample(c(30:70), 15, replace = TRUE) index=0 seed=3 set.seed(seed) vcf_like_indel_lists <- lapply(number_of_indels, function(size){ df_per_PID <- GenomeOfNl_raw[sample(nrow(GenomeOfNl_raw), size, replace = FALSE), ] index <<- index+1 df_per_PID$PID <- rep(paste0("PID_", index), length(size)) df_PIDs <- df_per_PID[order(df_per_PID$CHROM),] return(df_PIDs) }) vcf_like_indel_df <- do.call(rbind.data.frame, vcf_like_indel_lists) kable(head(vcf_like_indel_df), caption="Head of the vcf_like_df containing the subsampled GoNL Indel data") ## ----createMutationalCatalog, warning=FALSE----------------------------------- vcf_like_indel_trans_df <- translate_to_hg19(vcf_like_indel_df,"CHROM") mutational_cataloge_indel_df <- create_indel_mutation_catalogue_from_df( in_dat = vcf_like_indel_trans_df, in_signature_df = PCAWG_SP_ID_sigs_df) kable(head(mutational_cataloge_indel_df[,1:5])) ## ----loadCutoffs, warning=FALSE----------------------------------------------- data(cutoffs_pcawg) ## ----LCDdecompostion, warning=FALSE------------------------------------------- current_catalogue_df <- mutational_cataloge_indel_df current_sig_df <- PCAWG_SP_ID_sigs_df current_cutoff_pid_vector <- cutoffPCAWG_ID_WGS_Pid_df[3,] current_sigInd_df <- PCAWG_SP_ID_sigInd_df current_LCDlistsList <- LCD_complex_cutoff_combined( current_catalogue_df, current_sig_df, in_cutoff_vector = current_cutoff_pid_vector, in_filename = NULL, in_method = "abs", in_sig_ind_df = current_sigInd_df) current_consensus_LCDlist <- current_LCDlistsList$consensus if(!exists("repress_tables")) as.character(current_consensus_LCDlist$out_sig_ind_df$sig) ## ----captionExposure, echo=FALSE---------------------------------------------- cap <- ":Exposures to Indel mutational signatures in the artificial data created by sampling GoNL variants. Exposures were obtained from a decomposition with PCAWG Indel signatures as well as their signature specific-cutoffs (cutoffPCAWG_ID_WGS_Pid_df)." ## ----plotExposure, echo=TRUE, warning=FALSE, fig.width=15, fig.height=6, fig.cap= cap---- exposures_barplot(current_LCDlistsList$perPID$exposures, current_LCDlistsList$perPID$out_sig_ind_df) ## ----captionCI, echo=FALSE---------------------------------------------------- cap <- "Confidence interval calculation for exposures to Indel mutational signatures" ## ----CI, echo=TRUE, warning=FALSE, fig.width=17, fig.height=15, fig.cap=cap---- confidence_intervals_ID <- confidence_indel_only_calulation( in_current_indel_df = current_catalogue_df) plot(confidence_intervals_ID$p_complete_PCAWG_ID)