## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----getPackage, eval=FALSE--------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("SQLDataFrame") ## ----getDevel, eval=FALSE----------------------------------------------------- # BiocManager::install("Liubuntu/SQLDataFrame") ## ----Load, message=FALSE, eval = TRUE----------------------------------------- library(SQLDataFrame) library(DBI) ## ----constructor_conn--------------------------------------------------------- dbfile <- system.file("extdata/test.db", package = "SQLDataFrame") conn <- DBI::dbConnect(DBI::dbDriver("SQLite"), dbname = dbfile) obj <- SQLDataFrame(conn = conn, dbtable = "state", dbkey = "state") ## ----constructor_credentials-------------------------------------------------- obj1 <- SQLDataFrame(dbname = dbfile, type = "SQLite", dbtable = "state", dbkey = "state") all.equal(obj, obj1) ## ----------------------------------------------------------------------------- obj dim(obj) colnames(obj) ## ----------------------------------------------------------------------------- slotNames(obj) dbtable(obj) dbkey(obj) connSQLDataFrame(obj) ## ----methods------------------------------------------------------------------ dim(obj) dimnames(obj) length(obj) ROWNAMES(obj) ## ----------------------------------------------------------------------------- dbtable(obj) aa <- rbind(obj[1:5, ], obj[6:10, ]) aa dbtable(aa) ## message bb <- saveSQLDataFrame(aa, dbname = tempfile(fileext=".db"), dbtable = "aa") connSQLDataFrame(bb) dbtable(bb) ## ----------------------------------------------------------------------------- mtc <- tibble::rownames_to_column(mtcars)[,1:6] filename <- file.path(tempdir(), "mtc.csv") write.csv(mtc, file= filename, row.names = FALSE) aa <- makeSQLDataFrame(filename, dbkey = "rowname", sep = ",", overwrite = TRUE) aa connSQLDataFrame(aa) dbtable(aa) ## ----------------------------------------------------------------------------- connSQLDataFrame(obj) dbtable(obj) obj1 <- saveSQLDataFrame(obj, dbname = tempfile(fileext = ".db"), dbtable = "obj_copy") connSQLDataFrame(obj1) dbtable(obj1) ## ----------------------------------------------------------------------------- head(obj[[1]]) head(obj[["region"]]) head(obj$size) ## ----------------------------------------------------------------------------- head(obj[["state"]]) ## ---- subsetting-------------------------------------------------------------- obj[1:3, 1:2] obj[c(TRUE, FALSE), c(TRUE, FALSE), drop=FALSE] obj[1:3, "population", drop=FALSE] obj[, "population"] ## realized column value ## ----------------------------------------------------------------------------- rnms <- ROWNAMES(obj) obj[c("Alabama", "Colorado"), ] ## ----------------------------------------------------------------------------- obj1 <- SQLDataFrame(conn = conn, dbtable = "state", dbkey = c("region", "population")) rnms <- ROWNAMES(obj1) obj1[c("South:3615.0", "West:365.0"), ] ## ----------------------------------------------------------------------------- obj[1] obj["region"] ## ----------------------------------------------------------------------------- obj1 %>% filter(division == "South Atlantic" & size == "medium") ## ----------------------------------------------------------------------------- obj1 %>% mutate(p1 = population/10, s1 = size) ## ----------------------------------------------------------------------------- dbfile1 <- system.file("extdata/test.db", package = "SQLDataFrame") con1 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile1) dbfile2 <- system.file("extdata/test1.db", package = "SQLDataFrame") con2 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile2) ss1 <- SQLDataFrame(conn = con1, dbtable = "state", dbkey = c("state")) ss2 <- SQLDataFrame(conn = con2, dbtable = "state1", dbkey = c("state")) ss11 <- ss1[sample(5), ] ss21 <- ss2[sample(10, 5), ] ## ---- eval=FALSE-------------------------------------------------------------- # obj1 <- union(ss11, ss21) # obj1 ## reordered by the "dbkey()" ## ----------------------------------------------------------------------------- obj2 <- rbind(ss11, ss21) obj2 ## keeping the original order by updating the row index ## ----------------------------------------------------------------------------- ss12 <- ss1[1:10, 1:2] ss22 <- ss2[6:15, 3:4] left_join(ss12, ss22) inner_join(ss12, ss22) semi_join(ss12, ss22) anti_join(ss12, ss22) ## ---- mysql------------------------------------------------------------------- library(RMySQL) ensbConn <- dbConnect(dbDriver("MySQL"), host="genome-mysql.soe.ucsc.edu", user = "genome", dbname = "xenTro9") enssdf <- SQLDataFrame(conn = ensbConn, dbtable = "xenoRefGene", dbkey = c("name", "txStart")) enssdf1 <- enssdf[1:20, 1:2] enssdf2 <- enssdf[11:30,3:4] res <- left_join(enssdf1, enssdf2) ## ----bigquery, eval=FALSE----------------------------------------------------- # library(bigrquery) # bigrquery::bq_auth() ## use this to authorize bigrquery in the # ## browser. # bqConn <- DBI::dbConnect(dbDriver("bigquery"), # project = "bigquery-public-data", # dataset = "human_variant_annotation", # billing = "") ## if not previous provided # ## authorization, must specify a # ## project name that was already # ## linked with Google Cloud with # ## billing info. # sdf <- SQLDataFrame(conn = bqConn, dbtable = "ncbi_clinvar_hg38_20180701") # sdf[1:5, 1:5] # sdf %>% select(GENEINFO) # sdf %>% filter(GENEINFO == "PYGL:5836") # sdf %>% filter(reference_name == "21") ## ----------------------------------------------------------------------------- sessionInfo()