## ----start, message=FALSE, warning=FALSE----------------------------------- library(SIAMCAT) # this is data from Zeller et al., Mol. Syst. Biol. 2014 fn.feat.fr <- 'https://embl.de/download/zeller/FR-CRC/FR-CRC-N141_tax-ab-specI.tsv' fn.meta.fr <- 'https://embl.de/download/zeller/FR-CRC/FR-CRC-N141_metadata.tsv' # this is the external dataset from Yu et al., Gut 2017 fn.feat.cn <- 'https://embl.de/download/zeller/CN-CRC/CN-CRC-N128_tax-ab-specI.tsv' fn.meta.cn <- 'https://embl.de/download/zeller/CN-CRC/CN-CRC-N128_metadata.tsv' ## ----siamcat_fr------------------------------------------------------------ # features # be vary of the defaults in R!!! feat.fr <- read.table(fn.feat.fr, sep='\t', quote="", check.names = FALSE, stringsAsFactors = FALSE) # the features are counts, but we want to work with relative abundances feat.fr.rel <- prop.table(as.matrix(feat.fr), 2) # metadata meta.fr <- read.table(fn.meta.fr, sep='\t', quote="", check.names=FALSE, stringsAsFactors=FALSE) # create SIAMCAT object siamcat.fr <- siamcat(feat=feat.fr.rel, meta=meta.fr, label='Group', case='CRC') ## ----siamcat_cn------------------------------------------------------------ # features feat.cn <- read.table(fn.feat.cn, sep='\t', quote="", check.names = FALSE) feat.cn.rel <- prop.table(as.matrix(feat.cn), 2) # metadata meta.cn <- read.table(fn.meta.cn, sep='\t', quote="", check.names=FALSE, stringsAsFactors = FALSE) # SIAMCAT object siamcat.cn <- siamcat(feat=feat.cn.rel, meta=meta.cn, label='Group', case='CRC') ## ----preprocessing_fr------------------------------------------------------ siamcat.fr <- filter.features( siamcat.fr, filter.method = 'abundance', cutoff = 0.001, rm.unmapped = TRUE, verbose=2 ) siamcat.fr <- normalize.features( siamcat.fr, norm.method = "log.std", norm.param = list(log.n0 = 1e-06, sd.min.q = 0.1), verbose = 2 ) ## ----build_model_fr, results='hide'---------------------------------------- siamcat.fr <- create.data.split( siamcat.fr, num.folds = 5, num.resample = 2 ) siamcat.fr <- train.model( siamcat.fr, method = "lasso" ) ## ----predict_evaluate_fr, results='hide'----------------------------------- siamcat.fr <- make.predictions(siamcat.fr) siamcat.fr <- evaluate.predictions(siamcat.fr) ## ----normalize_cn---------------------------------------------------------- siamcat.cn <- normalize.features(siamcat.cn, norm.param=norm_params(siamcat.fr), feature.type='original', verbose = 2) ## ----predict_cn, results='hide'-------------------------------------------- siamcat.cn <- make.predictions( siamcat = siamcat.fr, siamcat.holdout = siamcat.cn, normalize.holdout = FALSE) ## ----alternative_pipeline_cn, eval=FALSE----------------------------------- # ## Alternative Code, not run here # siamcat.cn <- siamcat(feat=feat.cn.rel, meta=meta.cn, # label='Group', case='CRC') # siamcat.cn <- make.predictions(siamcat = siamcat.fr, # siamcat.holdout = siamcat.cn, # normalize.holdout = TRUE) ## ----eval_cn, message=FALSE------------------------------------------------ siamcat.cn <- evaluate.predictions(siamcat.cn) ## ----eval_plot, eval=FALSE------------------------------------------------- # model.evaluation.plot('FR-CRC'=siamcat.fr, # 'CN-CRC'=siamcat.cn, # colours=c('dimgrey', 'orange')) ## ----eval_plot_hidden, fig.width = 6, fig.asp=1, fig.align="left", echo=FALSE---- args <- list('FR-CRC'=siamcat.fr, 'CN-CRC'=siamcat.cn) colours=c('dimgrey', 'orange') plot(NULL, xlim = c(0, 1), ylim = c(0, 1), xlab = "False positive rate", ylab = "True positive rate", type = "n") title(paste("ROC curve for the model", sep = " ")) abline(a = 0, b = 1, lty = 3) legend.val <- c() for (i in 1:length(args)) { legend.val <- c(legend.val, as.numeric(SIAMCAT:::single.roc.plot(args[[i]], colours[i],verbose=0))) } legend('bottomright', legend= paste0(names(args), ' AUC: ' , format(legend.val, digits=3)), col=colours, lty=1, lwd=2, cex=0.8, y.intersp=1.5) ## ----session_info---------------------------------------------------------- sessionInfo()