TileDBArray 1.14.1
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.56468446 1.40282463 0.98211325 . -1.9458459 -0.4267455
## [2,] 0.84809492 0.06558448 0.96386404 . -0.7734357 -0.5601530
## [3,] -0.22229781 2.01492141 -0.06090963 . -0.8556386 0.1146063
## [4,] -2.71221348 0.96248851 0.02337115 . 2.0541096 1.9850506
## [5,] 0.26629782 1.05906791 0.48867310 . -1.0283754 -1.2677685
## ... . . . . . .
## [96,] 1.95898701 0.18665308 0.56396222 . -0.21699191 -0.88835571
## [97,] 1.22722232 2.31052670 -0.76809465 . -0.82338299 0.21829123
## [98,] -2.09129483 -0.33386041 -0.14901772 . -0.01720534 -0.85923872
## [99,] 0.55019783 0.05184827 1.34038277 . 0.92877144 -1.43671942
## [100,] 0.66743583 -0.41381163 2.50616610 . 0.00867724 -2.03888816
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.56468446 1.40282463 0.98211325 . -1.9458459 -0.4267455
## [2,] 0.84809492 0.06558448 0.96386404 . -0.7734357 -0.5601530
## [3,] -0.22229781 2.01492141 -0.06090963 . -0.8556386 0.1146063
## [4,] -2.71221348 0.96248851 0.02337115 . 2.0541096 1.9850506
## [5,] 0.26629782 1.05906791 0.48867310 . -1.0283754 -1.2677685
## ... . . . . . .
## [96,] 1.95898701 0.18665308 0.56396222 . -0.21699191 -0.88835571
## [97,] 1.22722232 2.31052670 -0.76809465 . -0.82338299 0.21829123
## [98,] -2.09129483 -0.33386041 -0.14901772 . -0.01720534 -0.85923872
## [99,] 0.55019783 0.05184827 1.34038277 . 0.92877144 -1.43671942
## [100,] 0.66743583 -0.41381163 2.50616610 . 0.00867724 -2.03888816
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 -2 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 0.56468446 1.40282463 0.98211325 . -1.9458459 -0.4267455
## GENE_2 0.84809492 0.06558448 0.96386404 . -0.7734357 -0.5601530
## GENE_3 -0.22229781 2.01492141 -0.06090963 . -0.8556386 0.1146063
## GENE_4 -2.71221348 0.96248851 0.02337115 . 2.0541096 1.9850506
## GENE_5 0.26629782 1.05906791 0.48867310 . -1.0283754 -1.2677685
## ... . . . . . .
## GENE_96 1.95898701 0.18665308 0.56396222 . -0.21699191 -0.88835571
## GENE_97 1.22722232 2.31052670 -0.76809465 . -0.82338299 0.21829123
## GENE_98 -2.09129483 -0.33386041 -0.14901772 . -0.01720534 -0.85923872
## GENE_99 0.55019783 0.05184827 1.34038277 . 0.92877144 -1.43671942
## GENE_100 0.66743583 -0.41381163 2.50616610 . 0.00867724 -2.03888816
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## 0.5646845 0.8480949 -0.2222978 -2.7122135 0.2662978 -0.1073937
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 0.564684463 1.402824632 0.982113245 -0.213393002 -1.397927112
## GENE_2 0.848094917 0.065584476 0.963864038 -0.009382973 0.851817027
## GENE_3 -0.222297807 2.014921414 -0.060909628 0.481387104 -0.261940816
## GENE_4 -2.712213482 0.962488509 0.023371152 -0.224582278 1.266597573
## GENE_5 0.266297822 1.059067907 0.488673097 -0.737131230 -0.390292156
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 1.1293689 2.8056493 1.9642265 . -3.8916917 -0.8534910
## GENE_2 1.6961898 0.1311690 1.9277281 . -1.5468713 -1.1203061
## GENE_3 -0.4445956 4.0298428 -0.1218193 . -1.7112771 0.2292126
## GENE_4 -5.4244270 1.9249770 0.0467423 . 4.1082192 3.9701011
## GENE_5 0.5325956 2.1181358 0.9773462 . -2.0567508 -2.5355370
## ... . . . . . .
## GENE_96 3.9179740 0.3733062 1.1279244 . -0.43398381 -1.77671142
## GENE_97 2.4544446 4.6210534 -1.5361893 . -1.64676597 0.43658247
## GENE_98 -4.1825897 -0.6677208 -0.2980354 . -0.03441068 -1.71847744
## GENE_99 1.1003957 0.1036965 2.6807655 . 1.85754288 -2.87343885
## GENE_100 1.3348717 -0.8276233 5.0123322 . 0.01735448 -4.07777632
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6
## 5.6107880 5.3222219 17.9002502 -12.0759012 -0.8539484 15.2113426
## SAMP_7 SAMP_8 SAMP_9 SAMP_10
## -10.3245264 -13.8631608 8.8121142 -8.8986378
out %*% runif(ncol(out))
## [,1]
## GENE_1 -1.54213883
## GENE_2 -0.10426776
## GENE_3 -1.43387097
## GENE_4 -3.35786911
## GENE_5 -2.71857272
## GENE_6 -2.07980331
## GENE_7 2.12754427
## GENE_8 0.13157927
## GENE_9 -0.06731421
## GENE_10 -0.33347527
## GENE_11 -0.93796528
## GENE_12 0.72080776
## GENE_13 -4.35433095
## GENE_14 -0.16511428
## GENE_15 -0.41060032
## GENE_16 -3.35726745
## GENE_17 -0.01608337
## GENE_18 0.28082607
## GENE_19 -1.07267117
## GENE_20 1.47742098
## GENE_21 3.23532611
## GENE_22 -3.96949154
## GENE_23 -0.51821182
## GENE_24 -2.00407880
## GENE_25 0.73558014
## GENE_26 -2.39199426
## GENE_27 -1.37385935
## GENE_28 2.27352985
## GENE_29 0.08233810
## GENE_30 1.47840603
## GENE_31 0.23790690
## GENE_32 2.00446270
## GENE_33 -0.51278599
## GENE_34 -1.24214530
## GENE_35 -0.70729701
## GENE_36 -2.35908955
## GENE_37 -2.07154890
## GENE_38 2.18384498
## GENE_39 -0.01956332
## GENE_40 1.10827818
## GENE_41 -0.32085405
## GENE_42 4.90114793
## GENE_43 5.11628825
## GENE_44 -0.81138564
## GENE_45 0.77502892
## GENE_46 0.82449975
## GENE_47 2.65878313
## GENE_48 0.37188897
## GENE_49 0.25405040
## GENE_50 -0.60957261
## GENE_51 -0.69902115
## GENE_52 0.37357504
## GENE_53 1.06055823
## GENE_54 -2.57760154
## GENE_55 -1.45801154
## GENE_56 1.44856775
## GENE_57 3.10231168
## GENE_58 -2.39923808
## GENE_59 4.20977311
## GENE_60 -0.49940986
## GENE_61 -1.77894565
## GENE_62 1.16799833
## GENE_63 0.21764042
## GENE_64 -1.65967771
## GENE_65 -1.45420717
## GENE_66 -0.15349511
## GENE_67 1.49711165
## GENE_68 -1.40855704
## GENE_69 1.85295166
## GENE_70 1.80425318
## GENE_71 -3.77102480
## GENE_72 1.53225855
## GENE_73 -3.57691521
## GENE_74 -0.14828223
## GENE_75 1.69600393
## GENE_76 0.93717501
## GENE_77 -1.30925165
## GENE_78 -1.50979998
## GENE_79 -0.97847252
## GENE_80 -2.05564728
## GENE_81 -1.34288956
## GENE_82 0.72988682
## GENE_83 6.31062370
## GENE_84 1.39203671
## GENE_85 4.94263742
## GENE_86 -0.79041749
## GENE_87 -2.89018647
## GENE_88 1.09131042
## GENE_89 0.66771993
## GENE_90 3.21591694
## GENE_91 1.97444941
## GENE_92 -0.02114155
## GENE_93 0.92042494
## GENE_94 1.21977377
## GENE_95 -0.26279167
## GENE_96 0.68888951
## GENE_97 0.09181890
## GENE_98 -3.07033809
## GENE_99 -0.34495563
## GENE_100 -1.45253180
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.72328916 -0.67525889 1.42608218 . 1.812447187 -0.003680596
## [2,] -1.29923418 -1.44100329 -0.57882034 . -0.890769929 0.845519648
## [3,] 1.41897543 0.44474904 -1.22067351 . 0.329483668 0.897769745
## [4,] -0.31949091 1.24410707 0.73619044 . 0.605764854 0.046843184
## [5,] 0.69684304 -1.83873417 0.06803377 . 0.604778852 -1.337601724
## ... . . . . . .
## [96,] -0.44962861 0.90243750 0.43198444 . -0.4338502 1.1570949
## [97,] 0.11698243 -0.03017279 -0.32455752 . 0.7593744 0.3370095
## [98,] 1.08725418 -0.31808858 -0.34058483 . -1.0236778 0.6283832
## [99,] 0.67813287 -0.84776985 0.14176769 . -1.3925875 -2.1840231
## [100,] 0.30371160 0.07215880 -1.31972246 . 0.7475123 2.0211802
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.72328916 -0.67525889 1.42608218 . 1.812447187 -0.003680596
## [2,] -1.29923418 -1.44100329 -0.57882034 . -0.890769929 0.845519648
## [3,] 1.41897543 0.44474904 -1.22067351 . 0.329483668 0.897769745
## [4,] -0.31949091 1.24410707 0.73619044 . 0.605764854 0.046843184
## [5,] 0.69684304 -1.83873417 0.06803377 . 0.604778852 -1.337601724
## ... . . . . . .
## [96,] -0.44962861 0.90243750 0.43198444 . -0.4338502 1.1570949
## [97,] 0.11698243 -0.03017279 -0.32455752 . 0.7593744 0.3370095
## [98,] 1.08725418 -0.31808858 -0.34058483 . -1.0236778 0.6283832
## [99,] 0.67813287 -0.84776985 0.14176769 . -1.3925875 -2.1840231
## [100,] 0.30371160 0.07215880 -1.31972246 . 0.7475123 2.0211802
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.6.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.18 TileDBArray_1.14.1 DelayedArray_0.30.1
## [4] SparseArray_1.4.8 S4Arrays_1.4.1 abind_1.4-8
## [7] IRanges_2.38.1 S4Vectors_0.42.1 MatrixGenerics_1.16.0
## [10] matrixStats_1.4.1 BiocGenerics_0.50.0 Matrix_1.7-0
## [13] BiocStyle_2.32.1
##
## loaded via a namespace (and not attached):
## [1] bit_4.5.0 jsonlite_1.8.9 compiler_4.4.1
## [4] BiocManager_1.30.25 crayon_1.5.3 Rcpp_1.0.13
## [7] nanoarrow_0.5.0.1 jquerylib_0.1.4 yaml_2.3.10
## [10] fastmap_1.2.0 lattice_0.22-6 R6_2.5.1
## [13] RcppCCTZ_0.2.12 XVector_0.44.0 tiledb_0.30.0
## [16] knitr_1.48 bookdown_0.40 bslib_0.8.0
## [19] rlang_1.1.4 cachem_1.1.0 xfun_0.47
## [22] sass_0.4.9 bit64_4.5.2 cli_3.6.3
## [25] zlibbioc_1.50.0 spdl_0.0.5 digest_0.6.37
## [28] grid_4.4.1 lifecycle_1.0.4 data.table_1.16.0
## [31] evaluate_1.0.0 nanotime_0.3.10 zoo_1.8-12
## [34] rmarkdown_2.28 tools_4.4.1 htmltools_0.5.8.1