## ----imports, echo=FALSE,eval=TRUE, message=FALSE, warning=FALSE----------- library(Onassis) library(DT) library(gplots) library(org.Hs.eg.db) library(kableExtra) ## ----echo=TRUE, eval=FALSE------------------------------------------------- # source("https://bioconductor.org/biocLite.R") # biocLite('org.Hs.eg.db') # biocLite("GenomicRanges") # install.packages('data.table') # install.packages('DT') # install.packages('gplots') ## ----connectTodb, echo=TRUE,eval=FALSE------------------------------------- # ## Running this function might take long time if the database has to be downloaded. # geo_con <- connectToGEODB(download=TRUE) # # #Showing the experiment types available in GEO # experiments <- experiment_types(geo_con) # # #Showing the organism types available in GEO # species <- organism_types(geo_con) # # #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing" # meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens') # # #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array # expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570') ## ----experimentTypesshow, echo=FALSE, eval=TRUE---------------------------- experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis')) knitr::kable(as.data.frame(experiments[1:10]), col.names = c('Experiment')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "300px", height = "200px") ## ----speciesShow, echo=FALSE,eval=TRUE------------------------------------- species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis')) knitr::kable(as.data.frame(species[1:10]), col.names=c('Species')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "300px", height = "200px") ## ----loadgeoMetadata, echo=TRUE, eval=TRUE--------------------------------- meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis')) ## ----printmeta, echo=FALSE,eval=TRUE--------------------------------------- methylation_tmp <- meth_metadata methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50)) knitr::kable(methylation_tmp[1:10,], caption = 'Methylation profiling by high througput sequencing metadata from GEOmetadb.') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "300px") ## ----connectSRA, echo=TRUE,eval=FALSE-------------------------------------- # # Connection to the SRAmetadb and potential download of the sqlite file # sqliteFileName <- './data/SRAdb.sqlite' # sra_con <- dbConnect(SQLite(), sqliteFileName) # # # Query for the ChIP-Seq experiments contained in GEO for human samples # library_strategy <- 'ChIP-Seq' #ChIP-Seq data # library_source='GENOMIC' # taxon_id=9606 #Human samples # center_name='GEO' #Data from GEO # # # Query to the sample table # samples_query <- paste0("select sample_accession, description, sample_attribute, sample_url_link from sample where taxon_id='", taxon_id, "' and sample_accession IS NOT NULL", " and center_name='", center_name, "'", ) # # samples_df <- dbGetQuery(sra_con, samples_query) # samples <- unique(as.character(as.vector(samples_df[, 1]))) # # # Query to the experiment table # experiment_query <- paste0("select experiment_accession, center_name, title, sample_accession, sample_name, experiment_alias, library_strategy, library_layout, experiment_url_link, experiment_attribute from experiment where library_strategy='", # library_strategy, "'" , " and library_source ='", library_source, # "' " ) # # experiment_df <- dbGetQuery(sra_con, experiment_query) # # #Merging the columns from the sample and the experiment table # experiment_df <- merge(experiment_df, samples_df, by = "sample_accession") # # # Replacing the field separators with white spaces # experiment_df$experiment_attribute <- sapply(experiment_df$experiment_attribute, # function(value) { # gsub("||", " ", value) # }) # experiment_df$sample_attribute <- sapply(experiment_df$sample_attribute, # function(value) { # gsub("||", " ", value) # }) # # Replacing the '_' character with white spaces # experiment_df$sample_name <- sapply(experiment_df$sample_name, # function(value) { # gsub("_", " ", value) # }) # experiment_df$experiment_alias <- sapply(experiment_df$experiment_alias, # function(value) { # gsub("_", " ", value) # }) # sra_chip_seq <- experiment_df ## ----readCHIP, echo=TRUE, eval=TRUE---------------------------------------- sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds', package='Onassis')) ## ----printchromatinIP, echo=FALSE,eval=TRUE-------------------------------- knitr::kable(head(sra_chip_seq, 10), rownames=FALSE, caption = 'ChIP-Seq metadata obtained from SRAdb') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "300px") ## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE--------- # If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded sample_dict <- CMdictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT') #Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs') sample_dict <- CMdictionary(inputFileOrDb=obo, outputDir=getwd(), synonymType='ALL') # Creation of a dictionary for human genes/proteins require(org.Hs.eg.db) targets <- CMdictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db') ## ----settingOptions, echo=TRUE,eval=TRUE----------------------------------- #Creating a CMoptions object and showing hte default parameters opts <- CMoptions() show(opts) ## ----listCombinations, echo=TRUE, eval=TRUE-------------------------------- combinations <- listCMOptions() ## ----setsynonymtype, echo=TRUE, eval=TRUE---------------------------------- myopts <- CMoptions(SynonymType = 'EXACT_ONLY') myopts ## ----changeparameter, echo=TRUE, eval=TRUE--------------------------------- #Changing the SearchStrategy parameter SearchStrategy(myopts) <- 'SKIP_ANY_MATCH_ALLOW_OVERLAP' myopts ## ----EntityFinder, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE---- chipseq_dict_annot <- EntityFinder(sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts) ## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE----------------- #methylation_brenda_annot <- readRDS(system.file('extdata', 'vignette_data', 'methylation_brenda_annot.rds', package='Onassis')) #UPDATE con ChIP-seq knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, caption = 'Annotations of the methylation profiling by high througput sequencing metadata obtained from GEO with BRENDA ontology concepts') %>% kable_styling() %>% scroll_box(width = "80%", height = "400px") ## ----annotateGenes, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE---- #Finding the TARGET entities target_entities <- EntityFinder(input=sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets) ## ----printKable, echo=FALSE, eval=TRUE------------------------------------- knitr::kable(target_entities, caption = 'Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "400px") ## ----similarity, echo=TRUE, eval=TRUE, message=FALSE----------------------- #Instantiating the Similarity similarities <- listSimilarities() ## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE--------------- found_terms <- unique(chipseq_dict_annot$term_url) n <- length(found_terms) ontologyfile <- obo pairwise_results <- data.frame(term1 = character(0), term2= character(0), value = double(0L)) for(i in 1:(n-1)){ term1 <- as.character(found_terms[i]) j = i + 1 for(k in j:n){ term2 <- as.character(found_terms[k]) two_term_similarity <- Similarity(ontologyfile, term1, term2 ) new_row <- cbind(term1, term2, two_term_similarity) pairwise_results <- rbind(pairwise_results, new_row ) } } pairwise_results <- unique(pairwise_results) pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url', all.x=TRUE) colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name' pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url', all.x=TRUE) colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name' pairwise_results <- unique(pairwise_results) ## ----showSim, echo=FALSE, eval=TRUE---------------------------------------- knitr::kable(pairwise_results, caption = 'Pairwise similarities of cell line terms annotating the ChIP-seq metadata') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "400px") ## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE--------------- Similarity(obo, found_terms[1:2], found_terms[3]) ## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE--------------- annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id))) n <- length(annotated_samples) samples_results <- data.frame(sample1 = character(0), sample2= character(0), value = double(0L)) samples_results <- matrix(0, nrow=n, ncol=n) rownames(samples_results) <- colnames(samples_results) <- annotated_samples for(i in 1:(n-1)){ sample1 <- as.character(annotated_samples[i]) j = i + 1 for(k in j:n){ sample2 <- as.character(annotated_samples[k]) two_samples_similarity <- Similarity(ontologyfile, sample1, sample2, chipseq_dict_annot) samples_results[i, k] <- samples_results[k, i] <- two_samples_similarity } } diag(samples_results) <- 1 heatmap.2(samples_results, density.info = "none", trace="none", main='Semantic similarity of annotated samples', margins=c(5,5)) ## ----onassis_class_usage, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE---- onassis_annotations <- annotate(sra_chip_seq, 'OBO',obo ) ## ----show_onassis_annotations, echo=TRUE, eval=TRUE------------------------ onassis_entities <- entities(onassis_annotations) ## ----showing_entities, echo=FALSE, eval=TRUE------------------------------- knitr::kable( onassis_entities[sample(nrow(onassis_entities), 10),], caption = 'Entities in Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "400px") ## ----term_filtering, echo=TRUE, eval=TRUE---------------------------------- filtered_onassis <- filterconcepts(onassis_annotations, c('cell')) ## ----showing_filt_entities, echo=FALSE, eval=TRUE-------------------------- knitr::kable(entities(filtered_onassis), caption = 'Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>% scroll_box(width = "80%", height = "400px") ## ----similarity_of_samples, echo=TRUE, eval=TRUE--------------------------- filtered_onassis <- sim(filtered_onassis) ## ----collapsing_similarities, echo=TRUE, eval=TRUE, message=FALSE, results='hide', fig.width=6, fig.height=6---- collapsed_onassis <- Onassis::collapse(filtered_onassis, 0.8) head(entities(collapsed_onassis)) heatmap.2(simil(collapsed_onassis), margins=c(15,15), cexRow = 1, cexCol = 1) ## ----sessionInfo(), echo=FALSE, eval=TRUE---------------------------------- sessionInfo()