## ----setup, include = FALSE------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----getPackage, eval=FALSE------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("SQLDataFrame") ## ----getDevel, eval=FALSE-------------------------------------------------- # BiocManager::install("Liubuntu/SQLDataFrame") ## ----Load, message=FALSE, eval = TRUE-------------------------------------- library(SQLDataFrame) library(DBI) ## ----constructor_conn------------------------------------------------------ dbfile <- system.file("extdata/test.db", package = "SQLDataFrame") conn <- DBI::dbConnect(DBI::dbDriver("SQLite"), dbname = dbfile) obj <- SQLDataFrame(conn = conn, dbtable = "state", dbkey = "state") ## ----constructor_credentials----------------------------------------------- obj1 <- SQLDataFrame(dbname = dbfile, type = "SQLite", dbtable = "state", dbkey = "state") all.equal(obj, obj1) ## -------------------------------------------------------------------------- obj dim(obj) colnames(obj) ## -------------------------------------------------------------------------- slotNames(obj) dbtable(obj) dbkey(obj) connSQLDataFrame(obj) ## ----methods--------------------------------------------------------------- dim(obj) dimnames(obj) length(obj) ROWNAMES(obj) ## -------------------------------------------------------------------------- dbtable(obj) aa <- rbind(obj[1:5, ], obj[6:10, ]) aa dbtable(aa) ## message bb <- saveSQLDataFrame(aa, dbname = tempfile(fileext=".db"), dbtable = "aa", overwrite = TRUE) connSQLDataFrame(bb) dbtable(bb) ## -------------------------------------------------------------------------- mtc <- tibble::rownames_to_column(mtcars)[,1:6] filename <- file.path(tempdir(), "mtc.csv") write.csv(mtc, file= filename, row.names = FALSE) aa <- makeSQLDataFrame(filename, dbkey = "rowname", sep = ",", overwrite = TRUE) aa connSQLDataFrame(aa) dbtable(aa) ## -------------------------------------------------------------------------- connSQLDataFrame(obj) dbtable(obj) obj1 <- saveSQLDataFrame(obj, dbname = tempfile(fileext = ".db"), dbtable = "obj_copy") connSQLDataFrame(obj1) dbtable(obj1) ## -------------------------------------------------------------------------- head(obj[[1]]) head(obj[["region"]]) head(obj$size) ## -------------------------------------------------------------------------- head(obj[["state"]]) ## ---- subsetting----------------------------------------------------------- obj[1:3, 1:2] obj[c(TRUE, FALSE), c(TRUE, FALSE), drop=FALSE] obj[1:3, "population", drop=FALSE] obj[, "population"] ## realized column value ## -------------------------------------------------------------------------- rnms <- ROWNAMES(obj) obj[c("Alabama", "Colorado"), ] ## -------------------------------------------------------------------------- obj1 <- SQLDataFrame(conn = conn, dbtable = "state", dbkey = c("region", "population")) rnms <- ROWNAMES(obj1) obj1[c("South:3615.0", "West:365.0"), ] ## -------------------------------------------------------------------------- obj[1] obj["region"] ## -------------------------------------------------------------------------- obj1 %>% filter(division == "South Atlantic" & size == "medium") ## -------------------------------------------------------------------------- obj1 %>% mutate(p1 = population/10, s1 = size) ## -------------------------------------------------------------------------- dbfile1 <- system.file("extdata/test.db", package = "SQLDataFrame") con1 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile1) dbfile2 <- system.file("extdata/test1.db", package = "SQLDataFrame") con2 <- DBI::dbConnect(dbDriver("SQLite"), dbname = dbfile2) ss1 <- SQLDataFrame(conn = con1, dbtable = "state", dbkey = c("state")) ss2 <- SQLDataFrame(conn = con2, dbtable = "state1", dbkey = c("state")) ss11 <- ss1[sample(5), ] ss21 <- ss2[sample(10, 5), ] ## ---- eval=FALSE----------------------------------------------------------- # obj1 <- union(ss11, ss21) # obj1 ## reordered by the "dbkey()" ## -------------------------------------------------------------------------- obj2 <- rbind(ss11, ss21) obj2 ## keeping the original order by updating the row index ## -------------------------------------------------------------------------- ss12 <- ss1[1:10, 1:2] ss22 <- ss2[6:15, 3:4] left_join(ss12, ss22) inner_join(ss12, ss22) semi_join(ss12, ss22) anti_join(ss12, ss22) ## ---- mysql---------------------------------------------------------------- library(RMySQL) ensbConn <- dbConnect(dbDriver("MySQL"), host="genome-mysql.soe.ucsc.edu", user = "genome", dbname = "xenTro9") enssdf <- SQLDataFrame(conn = ensbConn, dbtable = "xenoRefGene", dbkey = c("name", "txStart")) enssdf1 <- enssdf[1:20, 1:2] enssdf2 <- enssdf[11:30,3:4] res <- left_join(enssdf1, enssdf2) ## ----bigquery, eval=FALSE-------------------------------------------------- # library(bigrquery) # bigrquery::bq_auth() ## use this to authorize bigrquery in the # ## browser. # bqConn <- DBI::dbConnect(dbDriver("bigquery"), # project = "bigquery-public-data", # dataset = "human_variant_annotation", # billing = "") ## if not previous provided # ## authorization, must specify a # ## project name that was already # ## linked with Google Cloud with # ## billing info. # sdf <- SQLDataFrame(conn = bqConn, dbtable = "ncbi_clinvar_hg38_20180701") # sdf[1:5, 1:5] # sdf %>% filter(GENEINFO == "PYGL:5836") # sdf %>% filter(reference_name == "21") ## -------------------------------------------------------------------------- sessionInfo()