## ----setup, echo=FALSE, results="hide"------------------------------------- knitr::opts_chunk$set(tidy=FALSE, cache=TRUE, dev="png", message=FALSE, error=FALSE, warning=FALSE) ## -------------------------------------------------------------------------- library(GenoGAM) ## A. ## specify folder and experiment design path wd <- system.file("extdata/Set1", package='GenoGAM') folder <- file.path(wd, "bam") expDesign <- file.path(wd, "experimentDesign.txt") ## B. ## register parallel backend (default is "the number of cores" - 2) BiocParallel::register(BiocParallel::MulticoreParam(worker = 2)) ## C. ## specify chunk and overhang size. chunkSize <- 50000 overhangSize <- 1000 ## D. ## the experiment design reflecting the underlying GAM ## x = position design <- ~ s(x) + s(x, by = genotype) ## -------------------------------------------------------------------------- ## build the GenoGAMDataSet ggd <- GenoGAMDataSet( expDesign, directory = folder, chunkSize = chunkSize, overhangSize = overhangSize, design = design) ggd ## -------------------------------------------------------------------------- ## compute size factors ggd <- computeSizeFactors(ggd) ggd ## the data assay(ggd) ## -------------------------------------------------------------------------- ## compute model without parameter estimation to save time in vignette result <- genogam(ggd, lambda = 4601, theta = 4.51) result ## the fit and standard error fits(result) se(result) ## -------------------------------------------------------------------------- result <- computeSignificance(result) pvalue(result) ## -------------------------------------------------------------------------- ranges = GRanges("chrI", IRanges(45000, 55000)) plotGenoGAM(result, ranges = ranges) ## -------------------------------------------------------------------------- library(GenoGAM) ## On multicore machines by default the number of available cores - 2 are registered on the default backend 'Multicore' BiocParallel::registered()[1] ## -------------------------------------------------------------------------- BiocParallel::register(BiocParallel::SnowParam(workers = 3)) ## -------------------------------------------------------------------------- BiocParallel::registered()[1] ## -------------------------------------------------------------------------- folder <- system.file("extdata/Set1", package='GenoGAM') expDesign <- read.delim( file.path(folder, "experimentDesign.txt") ) expDesign ## -------------------------------------------------------------------------- ## build the GenoGAMDataSet wd_folder <- file.path(folder, "bam") ggd <- GenoGAMDataSet( expDesign, directory = wd_folder, design = ~ s(x) + s(x, by = genotype) ) assay(ggd) ## overhang size getOverhangSize(ggd) ## chunk size getChunkSize(ggd) ## -------------------------------------------------------------------------- ggd <- GenoGAMDataSet( expDesign, directory = wd_folder, design = ~ s(x) + s(x, by = genotype), hdf5 = TRUE ) ## Note the difference in data type compared to the DataFrame above assay(ggd) ## -------------------------------------------------------------------------- GenoGAMSettings() ## -------------------------------------------------------------------------- range <- GRanges("chrI", IRanges(30000, 50000)) params <- Rsamtools::ScanBamParam(which = range) settings <- GenoGAMSettings(center = FALSE, bamParams = params) ggd <- GenoGAMDataSet( expDesign, directory = wd_folder, design = ~ s(x) + s(x, by = genotype), settings = settings) ## Note the higher counts assay(ggd) rowRanges(ggd) ## Now with chromosome specification. As only chrI is present ## in the example file, the read in will log an error and generate ## an empty GenoGAMDataSet object settings <- GenoGAMSettings(chromosomeList = c("chrII", "chrIII")) ggd <- GenoGAMDataSet( expDesign, directory = wd_folder, design = ~ s(x) + s(x, by = genotype), settings = settings) ## Empty GenoGAMDataSet due to lack of data in the example BAM file length(ggd) ggd ## -------------------------------------------------------------------------- ## Note, optimControl and estimControl is a list of more parameters. However none of them besides the shown are important settings <- GenoGAMSettings(optimMethod = "L-BFGS-B", optimControl = list(maxit = 100L), estimControl = list(eps = 1e-10, maxiter = 500L)) settings ## -------------------------------------------------------------------------- ## set HDF5 folder to 'myFolder' tmp <- tempdir() settings <- GenoGAMSettings(hdf5Control = list(dir = file.path(tmp, "myFolder"), level = 0)) HDF5Array::getHDF5DumpDir() HDF5Array::getHDF5DumpCompressionLevel() ## Another way to set this would be through the HDF5Array package HDF5Array::setHDF5DumpDir(file.path(tmp, "myOtherFolder")) settings ## -------------------------------------------------------------------------- ## For example, we choose a twice as long region but also two times less knots (double the knot spacing), ## which results in the same number of knots per tile as before. settings <- GenoGAMSettings(dataControl = list(regionSize = 8000, bpknots = 40)) settings ## -------------------------------------------------------------------------- ## read in data again, since the last read-in was an empty GenoGAM ggd <- GenoGAMDataSet( expDesign, directory = wd_folder, design = ~ s(x) + s(x, by = genotype) ) ggd <- computeSizeFactors(ggd) sizeFactors(ggd) ## -------------------------------------------------------------------------- ## fit model without parameters estimation fit <- genogam(ggd, lambda = 4601, theta = 4.51) fit ## ---- eval = FALSE--------------------------------------------------------- # ## Does not evaluate # fit_CV <- genogam(ggd, intervalSize = 200) ## -------------------------------------------------------------------------- ranges = GRanges("chrI", IRanges(45000, 55000)) plotGenoGAM(fit, ranges = ranges) ## -------------------------------------------------------------------------- plotGenoGAM(fit, ranges = ranges, scale = FALSE) ## -------------------------------------------------------------------------- fit <- computeSignificance(fit) pvalue(fit) ## -------------------------------------------------------------------------- gr <- GRanges("chrI", IRanges(c(1000, 7000), c(4000, 9000))) db <- computeRegionSignificance(fit, regions = gr, smooth = "s(x):genotype") db ## -------------------------------------------------------------------------- peaks <- callPeaks(fit, threshold = 1, smooth = "s(x):genotype") peaks peaks <- callPeaks(fit, threshold = 1, peakType = "broad", cutoff = 0.75, smooth = "s(x):genotype") peaks ## ---- eval = FALSE--------------------------------------------------------- # ## Not evaluated # writeToBEDFile(peaks, file = 'myPeaks') ## -------------------------------------------------------------------------- sessionInfo()