## ----eval=FALSE------------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("CTDquerier") ## ----gala_read_csv--------------------------------------------------------- table_e1_csv <- system.file( paste0( "extdata", .Platform$file.sep, "gala_table_e1.csv" ), package="CTDquerier" ) table_e1 <- read.csv( table_e1_csv, stringsAsFactors = FALSE ) ## ----gala_remove_na_file--------------------------------------------------- dim( table_e1 ) table_e1 <- table_e1[ table_e1$Genes != "NA ", ] dim( table_e1 ) ## ----gala_create_list------------------------------------------------------ gala_genes <- trimws( unlist( strsplit( table_e1$Genes, "," ) ) ) length( gala_genes ) gala_genes[1:15] ## ----load_ctdquerier, message=FALSE---------------------------------------- library( CTDquerier ) ## ----gale_query, eval=FALSE------------------------------------------------ # gala <- query_ctd_gene( terms = gala_genes, verbose = TRUE ) ## ----gala_data------------------------------------------------------------- data( gala, package = "CTDquerier" ) gala ## ----gala_plot_query, message=FALSE---------------------------------------- library( ggplot2 ) plot( gala ) + ggtitle( "Lost & Found Genes from GALA Study" ) ## ----gala_lost------------------------------------------------------------- get_terms( gala )[[ "lost" ]] ## ----gala_show_2----------------------------------------------------------- gala ## ----gala_gda_all---------------------------------------------------------- gala_all_diseases <- get_table( gala, index_name = "diseases" ) colnames( gala_all_diseases ) dim( gala_all_diseases ) ## ----gala_disease_genes---------------------------------------------------- length( unique( gala_all_diseases$GeneSymbol ) ) sum( get_terms( gala )[[ "found" ]] %in% unique( gala_all_diseases$GeneSymbol ) ) sum( !get_terms( gala )[[ "found" ]] %in% unique( gala_all_diseases$GeneSymbol ) ) ## ----gala_diseases_no_genes------------------------------------------------ get_terms( gala )[[ "found" ]][ !get_terms( gala )[[ "found" ]] %in% unique( gala_all_diseases$GeneSymbol ) ] ## ----gala_diseases_unique-------------------------------------------------- length( unique( gala_all_diseases$Disease.Name ) ) ## ----gala_diseases_curated------------------------------------------------- gala_all_diseases_cu <- gala_all_diseases[ !is.na( gala_all_diseases$Direct.Evidence ), ] gala_all_diseases_cu <- gala_all_diseases_cu[ gala_all_diseases_cu$Direct.Evidence != "", ] dim( gala_all_diseases_cu ) length( unique( gala_all_diseases_cu$Disease.Name ) ) ## ----gala_diseases_asthma-------------------------------------------------- gala_asthma <- gala_all_diseases[ gala_all_diseases$Disease.Name == "Asthma" , ] dim( gala_asthma ) ## ----gala_diseases_asthma_direct------------------------------------------- sum( gala_asthma$Direct.Evidence != "" & !is.na( gala_asthma$Direct.Evidence ) ) ## ----gala_diseases_asthma_evidence----------------------------------------- mean( gala_asthma$Inference.Score, na.rm = TRUE ) ## ----gala_diseases_asthma_reference---------------------------------------- sum( gala_asthma$Reference.Count, na.rm = TRUE ) ## ----gala_diseases_asthma_evidence_plot------------------------------------ plot( gala, index_name = "disease", subset.disease = "Asthma", filter.score = 20 ) + ggtitle( "Evidence of the association between GALA genes and Asthma" ) ## ----gala_chemicals-------------------------------------------------------- gala_chem <- get_table( gala, index_name = "chemical interactions" ) colnames( gala_chem ) length( unique( gala_chem$Chemical.Name ) ) ## ---- gala_chemicals_table, results="asis"--------------------------------- knitr::kable( t( table( gala_chem$Reference.Count ) ) ) ## ---- gala_chemicals_plot-------------------------------------------------- plot( gala, index_name = "chemical interactions", filter.score = 6 ) ## ----ctd_asthma------------------------------------------------------------ asthma <- query_ctd_dise( terms = "Asthma", verbose = TRUE ) ## ----ctd_asthma_show------------------------------------------------------- asthma ## ----ctd_asthma_n_genes---------------------------------------------------- ctd_asthma <- get_table( asthma, index_name = "gene interactions" ) length( unique( ctd_asthma$Gene.Symbol ) ) ## ----ctd_asthma_n_genes_curated-------------------------------------------- sum( !is.na( ctd_asthma$Direct.Evidence ) & ctd_asthma$Direct.Evidence != "" ) ## ----ctd_asthma_table, results="asis"-------------------------------------- library( knitr ) tt <- as.data.frame( table( ctd_asthma$Disease.Name ) ) colnames( tt ) <- c( "Disease", "Frequency" ) kable( tt[ order( tt$Frequency, decreasing = TRUE ), ] ) ## ----ctd_asthma_chem------------------------------------------------------- ctd_asthma_chem <- get_table( asthma, index_name = "chemical interactions" ) colnames( ctd_asthma_chem ) length( unique( ctd_asthma_chem$Chemical.Name ) ) ## ----ctd_asthma_chem_cur--------------------------------------------------- sum( !is.na( ctd_asthma_chem$Direct.Evidence ) & ctd_asthma_chem$Direct.Evidence != "" ) ## ----ctd_asthma_plot------------------------------------------------------- plot( asthma, index_name = "chemical interactions", subset.disease = "Asthma", filter.score = 30 ) + ggtitle( "Evidence of the association between GALA genes and chemicals" ) ## ----intersect_gala_asthma_chem_1------------------------------------------ intr_chem <- intersect( gala_chem$Chemical.Name, ctd_asthma_chem$Chemical.Name ) length( intr_chem ) ## ----intersect_gala_asthma_chem_2------------------------------------------ length( intr_chem ) / nrow( gala_chem ) * 100 length( intr_chem ) / nrow( ctd_asthma_chem ) * 100 ## ----intersect_gala_asthma_chem_2_temp, echo=FALSE------------------------- p1 <- round(length( intr_chem ) / nrow( gala_chem ) * 100, 2) p2 <- round(length( intr_chem ) / nrow( ctd_asthma_chem ) * 100, 2) ## ----intersect_gala_asthma_chem_cur_1-------------------------------------- a <- ctd_asthma_chem$Chemical.Name[ !is.na( ctd_asthma_chem$Direct.Evidence ) & ctd_asthma_chem$Direct.Evidence != "" ] intr_chem <- intersect( gala_chem$Chemical.Name, a ) length( intr_chem ) ## ----intersect_gala_asthma_chem_plot_1------------------------------------- gala_chem_r <- gala_chem[ gala_chem$Chemical.Name %in% intr_chem, ] gala_chem_r <- gala_chem_r[ !duplicated( gala_chem_r$Chemical.Name ), ] ctd_asthma_chem_r <- ctd_asthma_chem[ ctd_asthma_chem$Chemical.Name %in% intr_chem, ] ctd_asthma_chem_r <- ctd_asthma_chem_r[ !duplicated( ctd_asthma_chem_r$Chemical.Name ), ] dta <- merge( gala_chem_r[ , c( "Chemical.Name", "Reference.Count" ) ], ctd_asthma_chem_r[ , c( "Chemical.Name", "Reference.Count" ) ], by = "Chemical.Name" ) colnames( dta ) <- c( "Chemical.Name", "Reference.Gala", "Reference.Asthma" ) dta <- dta[ order( dta$Reference.Gala, dta$Reference.Asthma, decreasing = TRUE ), ] dta[1:5, ] ## ----intersect_gala_asthma_chem_plot_2------------------------------------- leaf_plot( dta[1:25, ], label = "Chemical.Name", valueLeft = "Reference.Gala", valueRight = "Reference.Asthma", titleLeft = "GALA", titleRight = "Asthma" ) ## ----load_hugo------------------------------------------------------------- hgnc_universe <- read.delim( system.file( "extdata", "HGNC_Genes.tsv", package="CTDquerier" ), sep = "\t", stringsAsFactor = FALSE ) ## ----gala_enrich_asthma_all------------------------------------------------ enrich( gala, asthma, use = "all", universe = hgnc_universe$Approved.Symbol ) ## ----gala_enrich_asthma_curated-------------------------------------------- enrich( gala, asthma, universe = hgnc_universe$Approved.Symbol, use = "curated" ) ## ----air_ctd--------------------------------------------------------------- air <- query_ctd_chem( terms = "Air Pollutants" ) air ## ----gala_enrich_air------------------------------------------------------- enrich( gala, air, universe = hgnc_universe$Approved.Symbol, use = "all" ) ## ----sessionInfo, echo=FALSE----------------------------------------------- sessionInfo()