## ----setup, echo=FALSE, results="hide"-------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                      dev="png", 
                      message=FALSE, error=FALSE, warning=FALSE)

## --------------------------------------------------------------------------
library(GenoGAM)

## A.
## specify folder and experiment design path
wd <- system.file("extdata/Set1", package='GenoGAM')
folder <- file.path(wd, "bam")
expDesign <- file.path(wd, "experimentDesign.txt")

## B.
## register parallel backend (default is "the number of cores" - 2)
BiocParallel::register(BiocParallel::MulticoreParam(worker = 2))

## C.
## specify chunk and overhang size.
chunkSize <- 50000
overhangSize <- 1000

## D.
## the experiment design reflecting the underlying GAM
## x = position
design <- ~ s(x) + s(x, by = genotype)

## --------------------------------------------------------------------------
## build the GenoGAMDataSet
ggd <- GenoGAMDataSet(
  expDesign, directory = folder,
  chunkSize = chunkSize, overhangSize = overhangSize,
  design = design)

ggd

## --------------------------------------------------------------------------
## compute size factors
ggd <- computeSizeFactors(ggd)

ggd

## the data
assay(ggd)

## --------------------------------------------------------------------------
## compute model without parameter estimation to save time in vignette
result <- genogam(ggd, lambda = 4601, theta = 4.51)

result

## the fit and standard error
fits(result)
se(result)

## --------------------------------------------------------------------------
result <- computeSignificance(result)

pvalue(result)

## --------------------------------------------------------------------------
ranges = GRanges("chrI", IRanges(45000, 55000))
plotGenoGAM(result, ranges = ranges)

## --------------------------------------------------------------------------
library(GenoGAM)

## On multicore machines by default the number of available cores - 2 are registered on the default backend 'Multicore'
BiocParallel::registered()[1]

## --------------------------------------------------------------------------
BiocParallel::register(BiocParallel::SnowParam(workers = 3))

## --------------------------------------------------------------------------
BiocParallel::registered()[1]

## --------------------------------------------------------------------------
folder <- system.file("extdata/Set1", package='GenoGAM')

expDesign <- read.delim(
  file.path(folder, "experimentDesign.txt")
)

expDesign

## --------------------------------------------------------------------------
## build the GenoGAMDataSet
wd_folder <- file.path(folder, "bam")
ggd <- GenoGAMDataSet(
  expDesign, directory = wd_folder,
  design = ~ s(x) + s(x, by = genotype)
)

assay(ggd)

## overhang size
getOverhangSize(ggd)

## chunk size
getChunkSize(ggd)

## --------------------------------------------------------------------------
ggd <- GenoGAMDataSet(
  expDesign, directory = wd_folder,
  design = ~ s(x) + s(x, by = genotype), hdf5 = TRUE
)

## Note the difference in data type compared to the DataFrame above
assay(ggd)

## --------------------------------------------------------------------------
GenoGAMSettings()

## --------------------------------------------------------------------------
range <- GRanges("chrI", IRanges(30000, 50000))
params <- Rsamtools::ScanBamParam(which = range)
settings <- GenoGAMSettings(center = FALSE,
                            bamParams = params)

ggd <- GenoGAMDataSet(
  expDesign, directory = wd_folder,
  design = ~ s(x) + s(x, by = genotype),
  settings = settings)

## Note the higher counts
assay(ggd)
rowRanges(ggd)

## Now with chromosome specification. As only chrI is present
## in the example file, the read in will log an error and generate
## an empty GenoGAMDataSet object
settings <- GenoGAMSettings(chromosomeList = c("chrII", "chrIII"))

ggd <- GenoGAMDataSet(
  expDesign, directory = wd_folder,
  design = ~ s(x) + s(x, by = genotype),
  settings = settings)

## Empty GenoGAMDataSet due to lack of data in the example BAM file
length(ggd)
ggd

## --------------------------------------------------------------------------
## Note, optimControl and estimControl is a list of more parameters. However none of them besides the shown are important
settings <- GenoGAMSettings(optimMethod = "L-BFGS-B", optimControl = list(maxit = 100L),
                            estimControl = list(eps = 1e-10, maxiter = 500L)) 
settings

## --------------------------------------------------------------------------
## set HDF5 folder to 'myFolder'
tmp <- tempdir()
settings <- GenoGAMSettings(hdf5Control = list(dir = file.path(tmp, "myFolder"), level = 0))
HDF5Array::getHDF5DumpDir()
HDF5Array::getHDF5DumpCompressionLevel()

## Another way to set this would be through the HDF5Array package
HDF5Array::setHDF5DumpDir(file.path(tmp, "myOtherFolder"))
settings

## --------------------------------------------------------------------------
## For example, we choose a twice as long region but also two times less knots (double the knot spacing),
## which results in the same number of knots per tile as before.
settings <- GenoGAMSettings(dataControl = list(regionSize = 8000, bpknots = 40))
settings

## --------------------------------------------------------------------------
## read in data again, since the last read-in was an empty GenoGAM
ggd <- GenoGAMDataSet(
  expDesign, directory = wd_folder,
  design = ~ s(x) + s(x, by = genotype)
)

ggd <- computeSizeFactors(ggd)
sizeFactors(ggd)

## --------------------------------------------------------------------------
## fit model without parameters estimation
fit <- genogam(ggd, lambda = 4601, theta = 4.51)
fit

## ---- eval = FALSE---------------------------------------------------------
#  ## Does not evaluate
#  fit_CV <- genogam(ggd, intervalSize = 200)

## --------------------------------------------------------------------------
ranges = GRanges("chrI", IRanges(45000, 55000))
plotGenoGAM(fit, ranges = ranges)

## --------------------------------------------------------------------------
plotGenoGAM(fit, ranges = ranges, scale = FALSE)

## --------------------------------------------------------------------------
fit <- computeSignificance(fit)
pvalue(fit)

## --------------------------------------------------------------------------
gr <- GRanges("chrI", IRanges(c(1000, 7000), c(4000, 9000)))
db <- computeRegionSignificance(fit, regions = gr, smooth = "s(x):genotype")
db

## --------------------------------------------------------------------------
peaks <- callPeaks(fit, threshold = 1, smooth = "s(x):genotype")
peaks

peaks <- callPeaks(fit, threshold = 1, peakType = "broad",
                   cutoff = 0.75, smooth = "s(x):genotype")
peaks

## ---- eval = FALSE---------------------------------------------------------
#  ## Not evaluated
#  writeToBEDFile(peaks, file = 'myPeaks')

## --------------------------------------------------------------------------
sessionInfo()