## ---- echo = FALSE------------------------------------------------------------
suppressPackageStartupMessages({
  library(rtracklayer)
  library(GenomicRanges)
  library(RNAmodR.ML)
  library(RNAmodR.Data)
})

## ---- eval = FALSE------------------------------------------------------------
#  library(rtracklayer)
#  library(GenomicRanges)
#  library(RNAmodR.ML)
#  library(RNAmodR.Data)

## -----------------------------------------------------------------------------
setClass("ModMLExample",
         contains = c("RNAModifierML"),
         prototype = list(mod = c("D"),
                          score = "score",
                          dataType = c("PileupSequenceData",
                                       "CoverageSequenceData"),
                          mlModel = character(0)))
# constructor function for ModMLExample
ModMLExample <- function(x, annotation = NA, sequences = NA, seqinfo = NA,
                           ...){
  RNAmodR:::Modifier("ModMLExample", x = x, annotation = annotation,
                     sequences = sequences, seqinfo = seqinfo, ...)
}

setClass("ModSetMLExample",
         contains = "ModifierSet",
         prototype = list(elementType = "ModMLExample"))
# constructor function for ModSetMLExample
ModSetMLExample <- function(x, annotation = NA, sequences = NA, seqinfo = NA,
                              ...){
  RNAmodR:::ModifierSet("ModMLExample", x, annotation = annotation,
                        sequences = sequences, seqinfo = seqinfo, ...)
}

## -----------------------------------------------------------------------------
setMethod(
  f = "aggregateData",
  signature = signature(x = "ModMLExample"),
  definition =
    function(x){
      aggregate_example(x)
    }
)

## ----include=FALSE------------------------------------------------------------
annotation <- GFF3File(RNAmodR.Data.example.gff3())
sequences <- RNAmodR.Data.example.fasta()
files <- list("wt" = c(treated = RNAmodR.Data.example.bam.1(),
                       treated = RNAmodR.Data.example.bam.2(),
                       treated = RNAmodR.Data.example.bam.3()))

## -----------------------------------------------------------------------------
me <-  ModMLExample(files[[1]], annotation, sequences)

## -----------------------------------------------------------------------------
data("dmod",package = "RNAmodR.ML")
# we just select the next U position from known positions
nextUPos <- function(gr){
  nextU <- lapply(seq.int(1L,2L),
                  function(i){
                    subseq <- subseq(sequences(me)[dmod$Parent], start(dmod)+3L)
                    pos <- start(dmod) + 2L + 
                      vapply(strsplit(as.character(subseq),""),
                    function(y){which(y == "U")[i]},integer(1))
                    ans <- dmod
                    ranges(ans) <- IRanges(start = pos, width = 1L)
                    ans
                  })
  nextU <- do.call(c,nextU)
  nextU$mod <- NULL
  unique(nextU)
}
nondmod <- nextUPos(dmod)
nondmod <- nondmod[!(nondmod %in% dmod)]
coord <- unique(c(dmod,nondmod))
coord <- coord[order(as.integer(coord$Parent))]

## -----------------------------------------------------------------------------
trainingData <- trainingData(me,coord)
trainingData <- unlist(trainingData, use.names = FALSE)
# converting logical labels to integer
trainingData$labels <- as.integer(trainingData$labels)

## -----------------------------------------------------------------------------
library(ranger)
model <- ranger(labels ~ ., data.frame(trainingData))

## -----------------------------------------------------------------------------
setClass("ModifierMLexample",
         contains = c("ModifierMLranger"),
         prototype = list(model = model))
ModifierMLexample <- function(...){
  new("ModifierMLexample")
}
mlmodel <- ModifierMLexample()

## -----------------------------------------------------------------------------
getMethod("useModel", c("ModifierMLranger","ModifierML"))

## -----------------------------------------------------------------------------
setMLModel(me) <- mlmodel

## -----------------------------------------------------------------------------
setMethod(f = "useMLModel",
          signature = signature(x = "ModMLExample"),
          definition =
            function(x){
              predictions <- useModel(getMLModel(x), x)
              data <- getAggregateData(x)
              unlisted_data <- unlist(data, use.names = FALSE)
              unlisted_data$score <- unlist(predictions)
              x@aggregate <- relist(unlisted_data,data)
              x
            }
)

## -----------------------------------------------------------------------------
me <- aggregate(me, force = TRUE)

## ----plot1, fig.cap="Performance of the maching learning model to distinguish unmodified from modified nucleotides.", fig.asp=1.5, dev="png"----
plotROC(me, dmod)

## -----------------------------------------------------------------------------
setMethod(
  f = "findMod",
  signature = signature(x = "ModMLExample"),
  definition =
    function(x){
      find_mod_example(x, 25L)
    }
)

## -----------------------------------------------------------------------------
rm(me)
setClass("ModMLExample",
         contains = c("RNAModifierML"),
         prototype = list(mod = c("D"),
                          score = "score",
                          dataType = c("PileupSequenceData",
                                       "CoverageSequenceData"),
                          mlModel = "ModifierMLexample"))
me <-  ModMLExample(files[[1]], annotation, sequences)

## -----------------------------------------------------------------------------
mod <- modifications(me)
mod <- split(mod, factor(mod$Parent,levels = unique(mod$Parent)))
mod

## -----------------------------------------------------------------------------
options(ucscChromosomeNames=FALSE)

## ----plot2, fig.cap="Visualization of sequence data", dev="png"---------------
plotDataByCoord(sequenceData(me),mod[["4"]][1])

## -----------------------------------------------------------------------------
nonValidMod <- mod[c("1","4")]
nonValidMod[["18"]] <- nonValidMod[["18"]][2]
nonValidMod[["26"]] <- nonValidMod[["26"]][2]
nonValidMod <- unlist(nonValidMod)
nonValidMod <- nonValidMod[,"Parent"]
coord <- unique(c(dmod,nondmod,nonValidMod))
coord <- coord[order(as.integer(coord$Parent))]

## -----------------------------------------------------------------------------
trainingData <- trainingData(me,coord)
trainingData <- unlist(trainingData, use.names = FALSE)
trainingData$labels <- as.integer(trainingData$labels)

## -----------------------------------------------------------------------------
model2 <- ranger(labels ~ ., data.frame(trainingData), num.trees = 2000)
setClass("ModifierMLexample2",
         contains = c("ModifierMLranger"),
         prototype = list(model = model2))
ModifierMLexample2 <- function(...){
  new("ModifierMLexample2")
}
mlmodel2 <- ModifierMLexample2()
me2 <- me
setMLModel(me2) <- mlmodel2
me2 <- aggregate(me2, force = TRUE)

## ----plot3, fig.cap="Performance aggregation of multiple samples and strategies."----
plotROC(me2, dmod, score="score")

## -----------------------------------------------------------------------------
setMethod(
  f = "findMod",
  signature = signature(x = "ModMLExample"),
  definition =
    function(x){
      find_mod_example(x, 25L)
    }
)
me2 <- modify(me2, force = TRUE)
modifications(me2)

## -----------------------------------------------------------------------------
mse <- ModSetMLExample(list(one = me, two = me2))

## ----plot4, fig.cap="Performance average across models", dev="png"------------
plotROC(mse, dmod, score= "score",
        plot.args = list(avg = "threshold", spread.estimate = "stderror"))

## ----sessioninfo--------------------------------------------------------------
sessionInfo()