## ---- code=readLines(system.file('vignettes_inc.R', package='biodb')), echo=FALSE----
# Disable automatic loading of extra biodb packages
Sys.setenv(BIODB_AUTOLOAD_EXTRA_PKGS="FALSE")

biodbVignettes <- data.frame()
files <- Sys.glob('*.Rmd')
for (f in files) {
	name <- sub('^(.*)\\.Rmd', '\\1', f, perl=TRUE)
	firstLines <- readLines(f, n=20)
	title <- grep("^title:", firstLines, value=TRUE)
	title <- sub('^title: *"(.*)\\.?"$', '\\1', title, perl=TRUE)
	desc <- grep("%\\\\VignetteIndexEntry", firstLines, value=TRUE)
	desc <- sub('^.*VignetteIndexEntry{(.*)}.*$', '\\1', desc, perl=TRUE)
    html <- paste0(name, '.html')
    link <- paste0('[', title, '](', html, ')')
    biodbVignettes <- rbind(biodbVignettes, data.frame(name=name, title=title,
                                                       desc=desc, html=html,
                                                       link=link))
}

make_vignette_ref <- function(name) {
	cat(biodbVignettes[biodbVignettes$name == name, 'link', drop=TRUE])
}

insert_features_table <- function() {
    featuresFile <- system.file("features.tsv",
                                package='biodb')
    featuresDf <- read.table(featuresFile, sep="\t", header=TRUE, quote="",
                             stringsAsFactors=FALSE)
    knitr::kable(featuresDf, "pipe", label="features",
                 caption="*biodb* main features. These are generic features (i.e.: present at top-level of architecture or present in at least a group of connectors), unless specified otherwise.")
}

## -----------------------------------------------------------------------------
biodb::genNewExtPkg(path='biodbChebiEx', dbName='chebi.ex', connType='compound',
                    dbTitle='ChEBI connector example', entryType='xml', remote=TRUE)

## -----------------------------------------------------------------------------
list.files('biodbChebiEx', all.files=TRUE, recursive=TRUE)

## ---- eval=FALSE, highlight=FALSE, code=readLines('biodbChebiEx/inst/definitions.yml')----
#  # biodb example definitions file for extensions packages, version 1.0.0
#  
#  databases:
#    chebi.ex:
#      name: ChEBI connector example
#      description: Write here the description of this database.
#      entry.content.type: xml
#      parsing.expr:
#        accession: substring-after(//dbns:return/dbns:accessionId,'ACCESSION:')
#        name:
#        - //dbns:name
#        - //dbns:synonyms/dbns:data
#        mass: //dbns:mass
#        monoisotopic.mass: //dbns:monoisotopicMass
#        smiles: //dbns:return/dbns:smiles
#        inchi: //dbns:return/dbns:inchi
#        inchikey: //dbns:return/dbns:inchiKey
#        formula:
#        - //dbns:Formulae/dbns:source[text()='MyDatabase']/../dbns:data
#        - (//dbns:Formulae/dbns:data)[1]
#      xml.ns:
#        dbns: https://my.database.org/webservices/v1
#        xsd: http://www.w3.org/2001/XMLSchema
#      searchable.fields:
#        - name
#        - monoisotopic.mass
#        - molecular.mass
#        - average.mass
#        - nominal.mass
#      # Length in seconds of the connection sliding window
#      scheduler.t: 1
#      # Number of connections allowed inside the connection sliding window
#      scheduler.n: 3
#      urls:
#        # Base URL of the database server, where to find entry pages
#        base.url: https://my.database.org/mydb/
#        # Webservice URL to use to contact web services
#        ws.url: https://my.database.org/webservices/mydb/3.2/
#        # Add any other URL you need for the development of your connector
#        # Inside your code, you can get each of these URLs with a call like the following one:
#        #     .self$getPropValSlot('urls', 'ws.url')
#  
#  fields:
#    chebi.ex.id:
#      description: ChEBI connector example ID
#      case.insensitive: true
#      forbids.duplicates: true
#      type: id
#      card: many

## -----------------------------------------------------------------------------
defFile <- system.file("extdata", "chebi_ex.yml", package='biodb')

## ---- eval=FALSE, highlight=FALSE, code=readLines(system.file("extdata", "chebi_ex.yml", package='biodb'))----
#  databases:
#  
#    chebi.ex:
#      name: ChEBI example connector
#      description: An example connector for ChEBI.
#      entry.content.encoding: UTF-8
#      entry.content.type: xml
#      parsing.expr:
#        accession: substring-after(//chebi:return/chebi:chebiId,'CHEBI:')
#        formula:
#          - //chebi:Formulae/chebi:source[text()='ChEBI']/../chebi:data
#          - (//chebi:Formulae/chebi:data)[1]
#        inchi: //chebi:return/chebi:inchi
#        inchikey: //chebi:return/chebi:inchiKey
#        mass: //chebi:mass
#        monoisotopic.mass: //chebi:monoisotopicMass
#        name:
#          - //chebi:chebiAsciiName
#        smiles: //chebi:return/chebi:smiles
#      searchable.fields:
#        - name
#        - monoisotopic.mass
#        - molecular.mass
#      scheduler.t: 1
#      scheduler.n: 3
#      urls:
#        base.url: https://www.ebi.ac.uk/chebi/
#        ws.url: https://www.ebi.ac.uk/webservices/chebi/2.0/
#      xml.ns:
#        chebi: https://www.ebi.ac.uk/webservices/chebi
#        xsd: http://www.w3.org/2001/XMLSchema
#  
#  fields:
#  
#    chebi.ex.id:
#      description: ChEBI ID
#      type: id
#      card: many
#      forbids.duplicates: true
#      case.insensitive: true

## ---- eval=TRUE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExEntry.R')----
#' ChEBI connector example entry class.
#'
#' Entry class for ChEBI connector example. 
#'
#' @seealso
#' \code{\link{BiodbXmlEntry}}.
#'
#' @examples
#' # Create an instance with default settings:
#' mybiodb <- biodb::Biodb()
#'
#' # Get a connector that inherits from ChebiExConn:
#' conn <- mybiodb$getFactory()$createConn('chebi.ex')
#'
#' # Get the first entry
#' e <- conn$getEntry(conn$getEntryIds(1L))
#'
#' # Terminate instance.
#' mybiodb$terminate()
#'
#' @import biodb
#' @import methods
#' @export ChebiExEntry
#' @exportClass ChebiExEntry
ChebiExEntry <- methods::setRefClass("ChebiExEntry",
    contains=c(
        'BiodbXmlEntry'
    ),

methods=list(

initialize=function(...) {
    callSuper(...)
}

,.isContentCorrect=function(content) {
 
    correct <- callSuper(content)
    
    # You can do some more checks of the content here.
    
    return(correct)
}

,.parseFieldsStep2=function(parsed.content) {
    
    # TODO Implement your custom parsing processing here.
}
))

## ---- echo=FALSE, results='asis'----------------------------------------------
make_vignette_ref('details')

## ---- eval=TRUE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExConn.R')----
#' ChEBI connector example connector class.
#'
#' Connector class for ChEBI connector example.
#'
#' @seealso 
#' \code{\link{BiodbCompounddbConn}}
#' \code{\link{BiodbRemotedbConn}}
#'
#' @examples
#' # Create an instance with default settings:
#' mybiodb <- biodb::Biodb()
#'
#' # Get a connector:
#' conn <- mybiodb$getFactory()$createConn('chebi.ex')
#'
#' # Get the first entry
#' e <- conn$getEntry(conn$getEntryIds(1L))
#'
#' # Terminate instance.
#' mybiodb$terminate()
#'
#' @import biodb
#' @import methods
#' @export ChebiExConn
#' @exportClass ChebiExConn
ChebiExConn <- methods::setRefClass("ChebiExConn",
    contains=c(
        "BiodbCompounddbConn"
        ,"BiodbRemotedbConn"
    ),
    fields=list(
    ),

methods=list(

initialize=function(...) {
    callSuper(...)
}

,getNbEntries=function(count=FALSE) {
    # Overrides super class' method.

    # Replace the call below if you have a direct way (specific web service for
    # a remote database, provided method or information for a local database)
    # to count entries for your database.
    return(callSuper(count=count))
}

,getEntryContentFromDb=function(entry.id) {
    # Overrides super class' method.

    # Initialize return values
    content <- rep(NA_character_, length(entry.id))

    # TODO Implement retrieval of entry contents.

    # Some debug message
    if (length(content) > 0)
        .self$message('debug', paste("Content of first entry:", content[[1]]))

    return(content)
}

,.doGetEntryIds=function(max.results=NA_integer_) {
    # Overrides super class' method.

    ids <- NA_character_
 
    # TODO Implement retrieval of accession numbers.
    
    return(ids)
}

,.doSearchForEntries=function(fields=NULL, max.results=NA_integer_) {
    # Overrides super class' method.

    ids <- character()

    # TODO Implement search of entries by filtering on values of fields.
    
    return(ids)
}

,getEntryPageUrl=function(id) {
    # Overrides super class' method.

    # TODO Modify this code to build the individual URLs to the entry web pages
    fct <- function(x) {
        u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries', x)
        BiodbUrl(url=u)$toString()
    }

    return(vapply(id, fct, FUN.VALUE=''))
}

,getEntryImageUrl=function(id) {
    # Overrides super class' method.

    # TODO Modify this code to build the individual URLs to the entry images 
    fct <- function(x) {
        u <- c(.self$getPropValSlot('urls', 'base.url'), 'images', x,
               'image.png')
        BiodbUrl(url=u)$toString()
    }

    return(vapply(id, fct, FUN.VALUE=''))
}

,wsFind=function(name="", retfmt=c('plain', 'parsed', 'ids', 'request')) {
    # This is the implementation of a fictive web service called "find" that
    # search for entries by name.
    # Use it as an example for implementing your own web services.

    retfmt <- match.arg(retfmt)

    # Build request
    params <- list(name=name)
    url <- BiodbUrl(url=c(.self$getPropValSlot('urls', 'ws.url'), 'find'),
                    params=params)
    request <- .self$makeRequest(method='get', url=url)

    # Return request
    if (retfmt == 'request')
        return(request)

    # Send request
    # This the line that should be run for sending the request and getting the
    # results:
    #results <- .self$getBiodb()$getRequestScheduler()$sendRequest(request)
    # Instead, for this example, we just generate the results of this fictive
    # web service:
    results <- paste('{"0001": {"name": "name1"},',
                     ' "0198": {"name": "name2"},',
                     ' "9834": {"name": "name3"}}')

    # Parse
    if (retfmt != 'plain') {
        
        # Parse JSON
        results <- jsonlite::fromJSON(results, simplifyDataFrame=FALSE)

        # Get IDs
        if (retfmt == 'ids')
            results <- names(results)
    }

    return(results)
}

,.doGetEntryContentRequest=function(id, concatenate=TRUE) {

    # TODO Modify the code below to build the URLs to get the contents of the
    # entries.
    # Depending on the database, you may have to build one URL for each
    # individual entry or may be able to write just one or a few URL for all
    # entries to retrieve.
    u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries',
           paste(id, 'xml', sep='.'))
    url <- BiodbUrl(url=u)$toString()

    return(url)
}
))

## ---- eval=FALSE--------------------------------------------------------------
#  wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
#                           max.results=10,
#                           retfmt=c('plain', 'parsed', 'request', 'ids')) {
#  }

## ---- echo=FALSE, results='hide'----------------------------------------------
connClass <- system.file("extdata", "ChebiExConn.R", package='biodb')

## ---- code=readLines(connClass)-----------------------------------------------
ChebiExConn <- methods::setRefClass("ChebiExConn",
    contains=c("BiodbRemotedbConn", "BiodbCompounddbConn"),

methods=list(

initialize=function(...) {
    callSuper(...)
},

getEntryPageUrl=function(id) {
    # Overrides super class' method

    url <- c(.self$getPropValSlot('urls', 'base.url'), 'searchId.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(chebiId=x))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
},

getEntryImageUrl=function(id) {
    # Overrides super class' method

    url <- c(.self$getPropValSlot('urls', 'base.url'), 'displayImage.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(defaultImage='true', imageIndex=0,
                                      chebiId=x, dimensions=400))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
},

wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
                         max.results=10,
                         retfmt=c('plain', 'parsed', 'request', 'ids')) {

    # Check parameters
    chk::chk_string(search)
    chk::chk_in(search.category, .self$getSearchCategories())
    chk::chk_number(max.results)
    chk::chk_gte(max.results, 0)
    chk::chk_in(stars, .self$getStarsCategories())
    retfmt <- match.arg(retfmt)

    # Build request
    params <- c(search=search,
                searchCategory=search.category,
                maximumResults=max.results,
                starsCategory=stars)
    url <- c(.self$getPropValSlot('urls', 'ws.url'), 'test/getLiteEntity')
    request <- .self$makeRequest(method='get', url=BiodbUrl$new(url=url,
                                                                params=params),
                                 encoding='UTF-8')
    if (retfmt == 'request')
        return(request)

    # Send request
    results <- .self$getBiodb()$getRequestScheduler()$sendRequest(request)

    # Parse
    if (retfmt != 'plain') {

        # Parse XML
        results <-  XML::xmlInternalTreeParse(results, asText=TRUE)

        if (retfmt == 'ids') {
            ns <- .self$getPropertyValue('xml.ns')
            results <- XML::xpathSApply(results, "//chebi:chebiId",
                                        XML::xmlValue, namespaces=ns)
            results <- sub('CHEBI:', '', results)
            if (length(grep("^[0-9]+$", results)) != length(results))
                .self$error("Impossible to parse XML to get entry IDs.")
        }
    }

    return(results)
},

.doSearchForEntries=function(fields=NULL, max.results=0) {

    ids <- character()

    if ( ! is.null(fields)) {

        # Search by name
        if ('name' %in% names(fields))
            ids <- .self$wsGetLiteEntity(search=fields$name,
                                         search.category="ALL NAMES",
                                         max.results=0, retfmt='ids')
    }

    # Cut
    if (max.results > 0 && max.results < length(ids))
        ids <- ids[seq_len(max.results)]

    return(ids)
},

.doGetEntryContentRequest=function(id, concatenate=TRUE) {

    url <- c(.self$getPropValSlot('urls', 'ws.url'), 'test',
             'getCompleteEntity')

    urls <- vapply(id, function(x) BiodbUrl$new(url=url,
                                            params=list(chebiId=x))$toString(),
                   FUN.VALUE='')

    return(urls)
},

.doGetEntryIds=function(max.results=NA_integer_) {
    return(NULL)
}
))

## -----------------------------------------------------------------------------
mybiodb <- biodb::newInst()

## -----------------------------------------------------------------------------
mybiodb$loadDefinitions(defFile)

## -----------------------------------------------------------------------------
conn <- mybiodb$getFactory()$createConn('chebi.ex')

## -----------------------------------------------------------------------------
entry <- conn$getEntry('17001')
entry$getFieldsAsDataframe()

## ----Closing of the biodb instance--------------------------------------------
mybiodb$terminate()

## -----------------------------------------------------------------------------
MyEntryClass <- methods::setRefClass("MyEntryClass", contains="BiodbCsvEntry",
    methods=list(
        initialize=function() {
            callSuper(sep=';', na.strings=c('', 'NA'))
        }
))

## ---- eval=FALSE--------------------------------------------------------------
#  .doParseContent=function(content) {
#  
#      # Get lines of content
#      lines <- strsplit(content, "\r?\n")[[1]]
#  
#      return(lines)
#  },
#  
#  .parseFieldsStep1=function(parsed.content) {
#  
#      # Get parsing expressions
#      parsing.expr <- .self$getParent()$getPropertyValue('parsing.expr')
#  
#      .self$.assertNotNull(parsed.content)
#      .self$.assertNotNa(parsed.content)
#      .self$.assertNotNull(parsing.expr)
#      .self$.assertNotNa(parsing.expr)
#      .self$.assertNotNull(names(parsing.expr))
#  
#      # Loop on all parsing expressions
#      for (field in names(parsing.expr)) {
#  
#          # Match whole content
#          g <- stringr::str_match(parsed.content, parsing.expr[[field]])
#  
#          # Get positive results
#          results <- g[ ! is.na(g[, 1]), , drop=FALSE]
#  
#          # Any match ?
#          if (nrow(results) > 0)
#              .self$setFieldValue(field, results[, 2])
#      }
#  }

## ---- eval=FALSE--------------------------------------------------------------
#  .parseFieldsStep2=function(parsed.content) {
#  
#      # Remove fields with empty string
#      for (f in .self$getFieldNames()) {
#          v <- .self$getFieldValue(f)
#          if (is.character(v) && ! is.na(v) && v == '')
#              .self$removeField(f)
#      }
#  
#      # Correct InChIKey
#      if (.self$hasField('INCHIKEY')) {
#          v <- sub('^InChIKey=', '', .self$getFieldValue('INCHIKEY'), perl=TRUE)
#          .self$setFieldValue('INCHIKEY', v)
#      }
#  
#      # Synonyms
#      synonyms <- XML::xpathSApply(parsed.content, "//synonym", XML::xmlValue)
#      if (length(synonyms) > 0)
#          .self$appendFieldValue('name', synonyms)
#  }