## ---- code=readLines(system.file('vignettes_inc.R', package='biodb')), echo=FALSE---- # Disable automatic loading of extra biodb packages Sys.setenv(BIODB_AUTOLOAD_EXTRA_PKGS="FALSE") biodbVignettes <- data.frame() files <- Sys.glob('*.Rmd') for (f in files) { name <- sub('^(.*)\\.Rmd', '\\1', f, perl=TRUE) firstLines <- readLines(f, n=20) title <- grep("^title:", firstLines, value=TRUE) title <- sub('^title: *"(.*)\\.?"$', '\\1', title, perl=TRUE) desc <- grep("%\\\\VignetteIndexEntry", firstLines, value=TRUE) desc <- sub('^.*VignetteIndexEntry{(.*)}.*$', '\\1', desc, perl=TRUE) html <- paste0(name, '.html') link <- paste0('[', title, '](', html, ')') biodbVignettes <- rbind(biodbVignettes, data.frame(name=name, title=title, desc=desc, html=html, link=link)) } make_vignette_ref <- function(name) { cat(biodbVignettes[biodbVignettes$name == name, 'link', drop=TRUE]) } insert_features_table <- function() { featuresFile <- system.file("features.tsv", package='biodb') featuresDf <- read.table(featuresFile, sep="\t", header=TRUE, quote="", stringsAsFactors=FALSE) knitr::kable(featuresDf, "pipe", label="features", caption="*biodb* main features. These are generic features (i.e.: present at top-level of architecture or present in at least a group of connectors), unless specified otherwise.") } ## ----------------------------------------------------------------------------- biodb::genNewExtPkg(path='biodbChebiEx', dbName='chebi.ex', connType='compound', dbTitle='ChEBI connector example', entryType='xml', remote=TRUE) ## ----------------------------------------------------------------------------- list.files('biodbChebiEx', all.files=TRUE, recursive=TRUE) ## ---- eval=FALSE, highlight=FALSE, code=readLines('biodbChebiEx/inst/definitions.yml')---- # # biodb example definitions file for extensions packages, version 1.0.0 # # databases: # chebi.ex: # name: ChEBI connector example # description: Write here the description of this database. # entry.content.type: xml # parsing.expr: # accession: substring-after(//dbns:return/dbns:accessionId,'ACCESSION:') # name: # - //dbns:name # - //dbns:synonyms/dbns:data # mass: //dbns:mass # monoisotopic.mass: //dbns:monoisotopicMass # smiles: //dbns:return/dbns:smiles # inchi: //dbns:return/dbns:inchi # inchikey: //dbns:return/dbns:inchiKey # formula: # - //dbns:Formulae/dbns:source[text()='MyDatabase']/../dbns:data # - (//dbns:Formulae/dbns:data)[1] # xml.ns: # dbns: https://my.database.org/webservices/v1 # xsd: http://www.w3.org/2001/XMLSchema # searchable.fields: # - name # - monoisotopic.mass # - molecular.mass # - average.mass # - nominal.mass # # Length in seconds of the connection sliding window # scheduler.t: 1 # # Number of connections allowed inside the connection sliding window # scheduler.n: 3 # urls: # # Base URL of the database server, where to find entry pages # base.url: https://my.database.org/mydb/ # # Webservice URL to use to contact web services # ws.url: https://my.database.org/webservices/mydb/3.2/ # # Add any other URL you need for the development of your connector # # Inside your code, you can get each of these URLs with a call like the following one: # # .self$getPropValSlot('urls', 'ws.url') # # fields: # chebi.ex.id: # description: ChEBI connector example ID # case.insensitive: true # forbids.duplicates: true # type: id # card: many ## ----------------------------------------------------------------------------- defFile <- system.file("extdata", "chebi_ex.yml", package='biodb') ## ---- eval=FALSE, highlight=FALSE, code=readLines(system.file("extdata", "chebi_ex.yml", package='biodb'))---- # databases: # # chebi.ex: # name: ChEBI example connector # description: An example connector for ChEBI. # entry.content.encoding: UTF-8 # entry.content.type: xml # parsing.expr: # accession: substring-after(//chebi:return/chebi:chebiId,'CHEBI:') # formula: # - //chebi:Formulae/chebi:source[text()='ChEBI']/../chebi:data # - (//chebi:Formulae/chebi:data)[1] # inchi: //chebi:return/chebi:inchi # inchikey: //chebi:return/chebi:inchiKey # mass: //chebi:mass # monoisotopic.mass: //chebi:monoisotopicMass # name: # - //chebi:chebiAsciiName # smiles: //chebi:return/chebi:smiles # searchable.fields: # - name # - monoisotopic.mass # - molecular.mass # scheduler.t: 1 # scheduler.n: 3 # urls: # base.url: https://www.ebi.ac.uk/chebi/ # ws.url: https://www.ebi.ac.uk/webservices/chebi/2.0/ # xml.ns: # chebi: https://www.ebi.ac.uk/webservices/chebi # xsd: http://www.w3.org/2001/XMLSchema # # fields: # # chebi.ex.id: # description: ChEBI ID # type: id # card: many # forbids.duplicates: true # case.insensitive: true ## ---- eval=TRUE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExEntry.R')---- #' ChEBI connector example entry class. #' #' Entry class for ChEBI connector example. #' #' @seealso #' \code{\link{BiodbXmlEntry}}. #' #' @examples #' # Create an instance with default settings: #' mybiodb <- biodb::Biodb() #' #' # Get a connector that inherits from ChebiExConn: #' conn <- mybiodb$getFactory()$createConn('chebi.ex') #' #' # Get the first entry #' e <- conn$getEntry(conn$getEntryIds(1L)) #' #' # Terminate instance. #' mybiodb$terminate() #' #' @import biodb #' @import methods #' @export ChebiExEntry #' @exportClass ChebiExEntry ChebiExEntry <- methods::setRefClass("ChebiExEntry", contains=c( 'BiodbXmlEntry' ), methods=list( initialize=function(...) { callSuper(...) } ,.isContentCorrect=function(content) { correct <- callSuper(content) # You can do some more checks of the content here. return(correct) } ,.parseFieldsStep2=function(parsed.content) { # TODO Implement your custom parsing processing here. } )) ## ---- echo=FALSE, results='asis'---------------------------------------------- make_vignette_ref('details') ## ---- eval=TRUE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExConn.R')---- #' ChEBI connector example connector class. #' #' Connector class for ChEBI connector example. #' #' @seealso #' \code{\link{BiodbCompounddbConn}} #' \code{\link{BiodbRemotedbConn}} #' #' @examples #' # Create an instance with default settings: #' mybiodb <- biodb::Biodb() #' #' # Get a connector: #' conn <- mybiodb$getFactory()$createConn('chebi.ex') #' #' # Get the first entry #' e <- conn$getEntry(conn$getEntryIds(1L)) #' #' # Terminate instance. #' mybiodb$terminate() #' #' @import biodb #' @import methods #' @export ChebiExConn #' @exportClass ChebiExConn ChebiExConn <- methods::setRefClass("ChebiExConn", contains=c( "BiodbCompounddbConn" ,"BiodbRemotedbConn" ), fields=list( ), methods=list( initialize=function(...) { callSuper(...) } ,getNbEntries=function(count=FALSE) { # Overrides super class' method. # Replace the call below if you have a direct way (specific web service for # a remote database, provided method or information for a local database) # to count entries for your database. return(callSuper(count=count)) } ,getEntryContentFromDb=function(entry.id) { # Overrides super class' method. # Initialize return values content <- rep(NA_character_, length(entry.id)) # TODO Implement retrieval of entry contents. # Some debug message if (length(content) > 0) .self$message('debug', paste("Content of first entry:", content[[1]])) return(content) } ,.doGetEntryIds=function(max.results=NA_integer_) { # Overrides super class' method. ids <- NA_character_ # TODO Implement retrieval of accession numbers. return(ids) } ,.doSearchForEntries=function(fields=NULL, max.results=NA_integer_) { # Overrides super class' method. ids <- character() # TODO Implement search of entries by filtering on values of fields. return(ids) } ,getEntryPageUrl=function(id) { # Overrides super class' method. # TODO Modify this code to build the individual URLs to the entry web pages fct <- function(x) { u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries', x) BiodbUrl(url=u)$toString() } return(vapply(id, fct, FUN.VALUE='')) } ,getEntryImageUrl=function(id) { # Overrides super class' method. # TODO Modify this code to build the individual URLs to the entry images fct <- function(x) { u <- c(.self$getPropValSlot('urls', 'base.url'), 'images', x, 'image.png') BiodbUrl(url=u)$toString() } return(vapply(id, fct, FUN.VALUE='')) } ,wsFind=function(name="", retfmt=c('plain', 'parsed', 'ids', 'request')) { # This is the implementation of a fictive web service called "find" that # search for entries by name. # Use it as an example for implementing your own web services. retfmt <- match.arg(retfmt) # Build request params <- list(name=name) url <- BiodbUrl(url=c(.self$getPropValSlot('urls', 'ws.url'), 'find'), params=params) request <- .self$makeRequest(method='get', url=url) # Return request if (retfmt == 'request') return(request) # Send request # This the line that should be run for sending the request and getting the # results: #results <- .self$getBiodb()$getRequestScheduler()$sendRequest(request) # Instead, for this example, we just generate the results of this fictive # web service: results <- paste('{"0001": {"name": "name1"},', ' "0198": {"name": "name2"},', ' "9834": {"name": "name3"}}') # Parse if (retfmt != 'plain') { # Parse JSON results <- jsonlite::fromJSON(results, simplifyDataFrame=FALSE) # Get IDs if (retfmt == 'ids') results <- names(results) } return(results) } ,.doGetEntryContentRequest=function(id, concatenate=TRUE) { # TODO Modify the code below to build the URLs to get the contents of the # entries. # Depending on the database, you may have to build one URL for each # individual entry or may be able to write just one or a few URL for all # entries to retrieve. u <- c(.self$getPropValSlot('urls', 'base.url'), 'entries', paste(id, 'xml', sep='.')) url <- BiodbUrl(url=u)$toString() return(url) } )) ## ---- eval=FALSE-------------------------------------------------------------- # wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL', # max.results=10, # retfmt=c('plain', 'parsed', 'request', 'ids')) { # } ## ---- echo=FALSE, results='hide'---------------------------------------------- connClass <- system.file("extdata", "ChebiExConn.R", package='biodb') ## ---- code=readLines(connClass)----------------------------------------------- ChebiExConn <- methods::setRefClass("ChebiExConn", contains=c("BiodbRemotedbConn", "BiodbCompounddbConn"), methods=list( initialize=function(...) { callSuper(...) }, getEntryPageUrl=function(id) { # Overrides super class' method url <- c(.self$getPropValSlot('urls', 'base.url'), 'searchId.do') fct <- function(x) { BiodbUrl$new(url=url, params=list(chebiId=x))$toString() } urls <- vapply(id, fct, FUN.VALUE='') return(urls) }, getEntryImageUrl=function(id) { # Overrides super class' method url <- c(.self$getPropValSlot('urls', 'base.url'), 'displayImage.do') fct <- function(x) { BiodbUrl$new(url=url, params=list(defaultImage='true', imageIndex=0, chebiId=x, dimensions=400))$toString() } urls <- vapply(id, fct, FUN.VALUE='') return(urls) }, wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL', max.results=10, retfmt=c('plain', 'parsed', 'request', 'ids')) { # Check parameters chk::chk_string(search) chk::chk_in(search.category, .self$getSearchCategories()) chk::chk_number(max.results) chk::chk_gte(max.results, 0) chk::chk_in(stars, .self$getStarsCategories()) retfmt <- match.arg(retfmt) # Build request params <- c(search=search, searchCategory=search.category, maximumResults=max.results, starsCategory=stars) url <- c(.self$getPropValSlot('urls', 'ws.url'), 'test/getLiteEntity') request <- .self$makeRequest(method='get', url=BiodbUrl$new(url=url, params=params), encoding='UTF-8') if (retfmt == 'request') return(request) # Send request results <- .self$getBiodb()$getRequestScheduler()$sendRequest(request) # Parse if (retfmt != 'plain') { # Parse XML results <- XML::xmlInternalTreeParse(results, asText=TRUE) if (retfmt == 'ids') { ns <- .self$getPropertyValue('xml.ns') results <- XML::xpathSApply(results, "//chebi:chebiId", XML::xmlValue, namespaces=ns) results <- sub('CHEBI:', '', results) if (length(grep("^[0-9]+$", results)) != length(results)) .self$error("Impossible to parse XML to get entry IDs.") } } return(results) }, .doSearchForEntries=function(fields=NULL, max.results=0) { ids <- character() if ( ! is.null(fields)) { # Search by name if ('name' %in% names(fields)) ids <- .self$wsGetLiteEntity(search=fields$name, search.category="ALL NAMES", max.results=0, retfmt='ids') } # Cut if (max.results > 0 && max.results < length(ids)) ids <- ids[seq_len(max.results)] return(ids) }, .doGetEntryContentRequest=function(id, concatenate=TRUE) { url <- c(.self$getPropValSlot('urls', 'ws.url'), 'test', 'getCompleteEntity') urls <- vapply(id, function(x) BiodbUrl$new(url=url, params=list(chebiId=x))$toString(), FUN.VALUE='') return(urls) }, .doGetEntryIds=function(max.results=NA_integer_) { return(NULL) } )) ## ----------------------------------------------------------------------------- mybiodb <- biodb::newInst() ## ----------------------------------------------------------------------------- mybiodb$loadDefinitions(defFile) ## ----------------------------------------------------------------------------- conn <- mybiodb$getFactory()$createConn('chebi.ex') ## ----------------------------------------------------------------------------- entry <- conn$getEntry('17001') entry$getFieldsAsDataframe() ## ----Closing of the biodb instance-------------------------------------------- mybiodb$terminate() ## ----------------------------------------------------------------------------- MyEntryClass <- methods::setRefClass("MyEntryClass", contains="BiodbCsvEntry", methods=list( initialize=function() { callSuper(sep=';', na.strings=c('', 'NA')) } )) ## ---- eval=FALSE-------------------------------------------------------------- # .doParseContent=function(content) { # # # Get lines of content # lines <- strsplit(content, "\r?\n")[[1]] # # return(lines) # }, # # .parseFieldsStep1=function(parsed.content) { # # # Get parsing expressions # parsing.expr <- .self$getParent()$getPropertyValue('parsing.expr') # # .self$.assertNotNull(parsed.content) # .self$.assertNotNa(parsed.content) # .self$.assertNotNull(parsing.expr) # .self$.assertNotNa(parsing.expr) # .self$.assertNotNull(names(parsing.expr)) # # # Loop on all parsing expressions # for (field in names(parsing.expr)) { # # # Match whole content # g <- stringr::str_match(parsed.content, parsing.expr[[field]]) # # # Get positive results # results <- g[ ! is.na(g[, 1]), , drop=FALSE] # # # Any match ? # if (nrow(results) > 0) # .self$setFieldValue(field, results[, 2]) # } # } ## ---- eval=FALSE-------------------------------------------------------------- # .parseFieldsStep2=function(parsed.content) { # # # Remove fields with empty string # for (f in .self$getFieldNames()) { # v <- .self$getFieldValue(f) # if (is.character(v) && ! is.na(v) && v == '') # .self$removeField(f) # } # # # Correct InChIKey # if (.self$hasField('INCHIKEY')) { # v <- sub('^InChIKey=', '', .self$getFieldValue('INCHIKEY'), perl=TRUE) # .self$setFieldValue('INCHIKEY', v) # } # # # Synonyms # synonyms <- XML::xpathSApply(parsed.content, "//synonym", XML::xmlValue) # if (length(synonyms) > 0) # .self$appendFieldValue('name', synonyms) # }