## ----citation------------------------------------------------------------ citation("GEOmetadb") ## ----include=FALSE----------------------------------- library(knitr) options(width=55) opts_chunk$set(cache=TRUE,message=FALSE,warning=FALSE) ## ---------------------------------------------------- library(GEOmetadb) ## ---------------------------------------------------- if(!file.exists('GEOmetadb.sqlite')) getSQLiteFile() ## ---------------------------------------------------- file.info('GEOmetadb.sqlite') ## ---------------------------------------------------- con <- dbConnect(SQLite(),'GEOmetadb.sqlite') dbDisconnect(con) ## ---------------------------------------------------- con <- dbConnect(SQLite(),'GEOmetadb.sqlite') ## ---------------------------------------------------- geo_tables <- dbListTables(con) geo_tables ## ---------------------------------------------------- dbListFields(con,'gse') ## ---------------------------------------------------- dbGetQuery(con,'PRAGMA TABLE_INFO(gpl)') ## ---------------------------------------------------- rs <- dbGetQuery(con,'select * from gse limit 5') rs[,1:7] ## ---------------------------------------------------- rs <- dbGetQuery(con,paste("select gse,title from gse where", "contributor like '%Sean%Davis%'",sep=" ")) rs ## ---------------------------------------------------- rs <- dbGetQuery(con,paste("select gsm,supplementary_file", "from gsm where gpl='GPL96'", "and supplementary_file like '%CEL.gz'")) dim(rs) ## ---------------------------------------------------- rs <- dbGetQuery(con,paste("select gpl.bioc_package,gsm.gpl,", "gsm,gsm.supplementary_file", "from gsm join gpl on gsm.gpl=gpl.gpl", "where gpl.manufacturer='Affymetrix'", "and gsm.supplementary_file like '%CEL.gz' ")) rs[1:5,] ## ---------------------------------------------------- getTableCounts <- function(tableName,conn) { sql <- sprintf("select count(*) from %s",tableName) return(dbGetQuery(conn,sql)[1,1]) } do.call(rbind,sapply(geo_tables,getTableCounts,con,simplify=FALSE)) ## ---------------------------------------------------- conversion <- geoConvert('GPL96') ## ---------------------------------------------------- lapply(conversion, dim) conversion$gse[1:5,] conversion$gsm[1:5,] conversion$gds[1:5,] conversion$sMatrix[1:5,] ## ---------------------------------------------------- getBiocPlatformMap(con, bioc=c('hgu133a','hgu95av2')) ## ---------------------------------------------------- sql <- paste("SELECT DISTINCT gse.title,gse.gse", "FROM", " gsm JOIN gse_gsm ON gsm.gsm=gse_gsm.gsm", " JOIN gse ON gse_gsm.gse=gse.gse", " JOIN gse_gpl ON gse_gpl.gse=gse.gse", " JOIN gpl ON gse_gpl.gpl=gpl.gpl", "WHERE", " gsm.molecule_ch1 like '%total RNA%' AND", " gse.title LIKE '%breast cancer%' AND", " gpl.organism LIKE '%Homo sapiens%'",sep=" ") rs <- dbGetQuery(con,sql) dim(rs) print(rs[1:5,],right=FALSE) ## ----fig.width=10------------------------------------ library(tm) library(wordcloud) gseTitles = dbGetQuery(con,"select title from gse") corp = VCorpus(VectorSource(gseTitles)) corp <- tm_map(corp, removePunctuation) corp <- tm_map(corp, content_transformer(tolower)) corp <- tm_map(corp, removeNumbers) corp <- tm_map(corp, function(x)removeWords(x,stopwords())) term.matrix <- TermDocumentMatrix(corp) term.matrix <- as.matrix(term.matrix) v <- sort(rowSums(term.matrix),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) n = 100 wordcloud(d[1:n,]$word,d[1:n,]$freq) ## ---------------------------------------------------- library(dplyr) db = src_sqlite('GEOmetadb.sqlite') gse = tbl(db,'gse') filter(gse,gse=='GSE2553') ## ---------------------------------------------------- dbDisconnect(con) ## ---- eval=FALSE------------------------------------- # file.remove('GEOmetadb.sqlite') ## ---------------------------------------------------- sessionInfo()