### R code from vignette source 'CNVPanelizer.Rnw' ################################################### ### code chunk number 1: InstallingPackage (eval = FALSE) ################################################### ## # To install from Bioconductor ## source("http://bioconductor.org/biocLite.R") ## biocLite("CNVPanelizer") ################################################### ### code chunk number 2: LoadingPackage ################################################### # To load the package library(CNVPanelizer) ################################################### ### code chunk number 3: LoadingBED (eval = FALSE) ################################################### ## ## # Bed file defining the amplicons ## bedFilepath <- "/somePath/someFile.bed" ## ## # The column number of the gene Names in the BED file. ## amplColumnNumber <- 4 ## ## # Extract the information from a bed file ## genomicRangesFromBed <- BedToGenomicRanges(bedFilepath, ## ampliconColumn = amplColumnNumber, ## split = "_") ## ## metadataFromGenomicRanges <- elementMetadata(genomicRangesFromBed) ## geneNames = metadataFromGenomicRanges["geneNames"][, 1] ## ampliconNames = metadataFromGenomicRanges["ampliconNames"][, 1] ################################################### ### code chunk number 4: LoadingPathsAndFilenames (eval = FALSE) ################################################### ## ## # Directory with the test data ## sampleDirectory <- "/somePathToTestData" ## ## # Directory with the reference data ## referenceDirectory <- "/somePathToReferenceData" ## ## # Vector with test filenames ## sampleFilenames <- list.files(path = sampleDirectory, ## pattern = ".bam$", ## full.names = TRUE) ## ## # Vector with reference filenames ## referenceFilenames <- list.files(path = referenceDirectory, ## pattern = ".bam$", ## full.names = TRUE) ################################################### ### code chunk number 5: LoadingReadCountsData (eval = FALSE) ################################################### ## ## # Should duplicated reads (same start, end site and strand) be removed ## removePcrDuplicates <- FALSE # TRUE is only recommended for Ion Torrent data ## ## # Read the Reference data set ## referenceReadCounts <- ReadCountsFromBam(referenceFilenames, ## genomicRangesFromBed, ## sampleNames = referenceFilenames, ## ampliconNames = ampliconNames, ## removeDup = removePcrDuplicates) ## ## # Read the sample of interest data set ## sampleReadCounts <- ReadCountsFromBam(sampleFilenames, ## genomicRangesFromBed, ## sampleNames = sampleFilenames, ## ampliconNames = ampliconNames, ## removeDup = removePcrDuplicates) ################################################### ### code chunk number 6: LoadingSyntheticData ################################################### data(sampleReadCounts) data(referenceReadCounts) # Gene names should be same size as row columns # For real data this is a vector of the genes associated # with each row/amplicon. For example Gene1, Gene1, Gene2, Gene2, Gene3, ... geneNames <- row.names(referenceReadCounts) # Not defined for synthetic data # For real data this gives a unique name to each amplicon. # For example Gene1:Amplicon1, Gene1:Amplicon2, Gene2:Amplicon1, # Gene2:Amplicon2, Gene3:Amplicon1 ... ampliconNames <- NULL ################################################### ### code chunk number 7: NormalizedReadCounts ################################################### normalizedReadCounts <- CombinedNormalizedCounts(sampleReadCounts, referenceReadCounts, ampliconNames = ampliconNames) # After normalization data sets need to be splitted again to perform bootstrap samplesNormalizedReadCounts = normalizedReadCounts["samples"][[1]] referenceNormalizedReadCounts = normalizedReadCounts["reference"][[1]] ################################################### ### code chunk number 8: NumberOfReplicates ################################################### # Number of bootstrap replicates to be used replicates <- 10000 ################################################### ### code chunk number 9: RealNumberOfReplicatesToBeUsed ################################################### # To speed up vignette generation while debugging #replicates <- 10 ################################################### ### code chunk number 10: BootList ################################################### # Perform the bootstrap based analysis bootList <- BootList(geneNames, samplesNormalizedReadCounts, referenceNormalizedReadCounts, replicates = replicates) ################################################### ### code chunk number 11: BackgroundNoise ################################################### # Estimate the background noise left after normalization backgroundNoise <- Background(geneNames, samplesNormalizedReadCounts, referenceNormalizedReadCounts, bootList, replicates = replicates, significanceLevel = 0.1, robust = TRUE) ################################################### ### code chunk number 12: ReportTables ################################################### # Build report tables reportTables <- ReportTables(geneNames, samplesNormalizedReadCounts, referenceNormalizedReadCounts, bootList, backgroundNoise) ################################################### ### code chunk number 13: ReportTablesToShow ################################################### options(width=500) # to show the entire table.. # to avoid have to print to other page.. numberOfGenesViewport = 20 #if (nrow(reportTables[[1]]) > numberOfGenes) { # numberOfGenes = numberOfGenesViewport #} else { # numberOfGenes = nrow(reportTables[[1]]) #} #reportTables[[1]][1:numberOfGenes, ] # index of the sample to show sampleIndexToShow = 2 # TODO improve this.. remove the column.. # but for now just hide "Signif." column... no space available #indexToHide <- which(colnames(reportTables[[1]])=="Signif.") indexToHide <- which(colnames(reportTables[[1]])=="MeanRatio") reportTables[[sampleIndexToShow]][1:numberOfGenesViewport, -c(indexToHide)] #reportTables[[sampleIndexToShow]][1:numberOfGenesViewport, ] ################################################### ### code chunk number 14: HelperFunctions (eval = FALSE) ################################################### ## ## # Directory where the generated files with the analysis results will be saved. ## outputDirectory <- "./tmp" ## dir.create(outputDirectory, recursive = TRUE) ## ## # Export the report tables to excel format ## reportTablesFilepath <- file.path(outputDirectory, "report_tables.xlsx") ## WriteListToXLSX(reportTables, reportTablesFilepath) ## ## # # Export read counts to excel format ## readCountsFilepath <- file.path(outputDirectory, "readCounts.xlsx") ## normalizedReadCountsFilepath <- file.path(outputDirectory, ## "normalizedReadCounts.xlsx") ## WriteListToXLSX(list(samplesReadCount = sampleReadCounts, ## referenceReadCounts = referenceNormalizedReadCounts), ## readCountsFilepath) ## WriteListToXLSX(list(samplesReadCount = samplesNormalizedReadCounts, ## referenceReadCounts = referenceNormalizedReadCounts), ## normalizedReadCountsFilepath) ## ## justToHide = PlotBootstrapDistributions(bootList, reportTables, outputFolder=outputDirectory, save=TRUE) ## ## save.image(file = file.path(outputDirectory, "RSession.Rdata")) ## ################################################### ### code chunk number 15: JustToShowBootPlot (eval = FALSE) ################################################### ## PlotBootstrapDistributions(bootList, reportTables) ################################################### ### code chunk number 16: BootstrapPlot ################################################### PlotBootstrapDistributions(bootList, reportTables)[[sampleIndexToShow]]