TileDBArray 1.16.0
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.01385165 0.65178769 1.38788239 . -1.5961029 -0.6015149
## [2,] -0.44274435 -0.27941653 -1.76011682 . 0.5608735 0.5826867
## [3,] 0.78404910 -1.37689080 0.40040878 . 0.7109870 0.4451363
## [4,] -2.35166134 0.03807284 0.71822337 . 1.1899024 -0.7761541
## [5,] 0.52400498 -1.70292227 -0.18831766 . 1.4929429 1.1387584
## ... . . . . . .
## [96,] 0.70783840 -0.02687686 1.18565329 . -0.06835931 1.19029768
## [97,] -1.66209684 -0.01509888 0.88125229 . -0.42352859 -1.34763506
## [98,] -0.26498582 2.01629388 -1.00470258 . 0.90016691 1.48218463
## [99,] 1.87471307 0.11321374 0.33693063 . -0.79417827 -0.94825007
## [100,] -0.55649768 -1.23254846 -0.41323466 . 0.13361983 2.17183154
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.01385165 0.65178769 1.38788239 . -1.5961029 -0.6015149
## [2,] -0.44274435 -0.27941653 -1.76011682 . 0.5608735 0.5826867
## [3,] 0.78404910 -1.37689080 0.40040878 . 0.7109870 0.4451363
## [4,] -2.35166134 0.03807284 0.71822337 . 1.1899024 -0.7761541
## [5,] 0.52400498 -1.70292227 -0.18831766 . 1.4929429 1.1387584
## ... . . . . . .
## [96,] 0.70783840 -0.02687686 1.18565329 . -0.06835931 1.19029768
## [97,] -1.66209684 -0.01509888 0.88125229 . -0.42352859 -1.34763506
## [98,] -0.26498582 2.01629388 -1.00470258 . 0.90016691 1.48218463
## [99,] 1.87471307 0.11321374 0.33693063 . -0.79417827 -0.94825007
## [100,] -0.55649768 -1.23254846 -0.41323466 . 0.13361983 2.17183154
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 0 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 -0.01385165 0.65178769 1.38788239 . -1.5961029 -0.6015149
## GENE_2 -0.44274435 -0.27941653 -1.76011682 . 0.5608735 0.5826867
## GENE_3 0.78404910 -1.37689080 0.40040878 . 0.7109870 0.4451363
## GENE_4 -2.35166134 0.03807284 0.71822337 . 1.1899024 -0.7761541
## GENE_5 0.52400498 -1.70292227 -0.18831766 . 1.4929429 1.1387584
## ... . . . . . .
## GENE_96 0.70783840 -0.02687686 1.18565329 . -0.06835931 1.19029768
## GENE_97 -1.66209684 -0.01509888 0.88125229 . -0.42352859 -1.34763506
## GENE_98 -0.26498582 2.01629388 -1.00470258 . 0.90016691 1.48218463
## GENE_99 1.87471307 0.11321374 0.33693063 . -0.79417827 -0.94825007
## GENE_100 -0.55649768 -1.23254846 -0.41323466 . 0.13361983 2.17183154
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## -0.01385165 -0.44274435 0.78404910 -2.35166134 0.52400498 0.13081935
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 -0.01385165 0.65178769 1.38788239 -0.99100203 -0.82018945
## GENE_2 -0.44274435 -0.27941653 -1.76011682 -0.34968990 1.87653362
## GENE_3 0.78404910 -1.37689080 0.40040878 1.01570129 -0.04298868
## GENE_4 -2.35166134 0.03807284 0.71822337 -0.10694499 -1.63011543
## GENE_5 0.52400498 -1.70292227 -0.18831766 -0.59191811 1.25465405
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 -0.02770330 1.30357537 2.77576477 . -3.1922059 -1.2030299
## GENE_2 -0.88548870 -0.55883305 -3.52023364 . 1.1217471 1.1653734
## GENE_3 1.56809819 -2.75378159 0.80081757 . 1.4219740 0.8902726
## GENE_4 -4.70332269 0.07614568 1.43644673 . 2.3798048 -1.5523083
## GENE_5 1.04800995 -3.40584455 -0.37663532 . 2.9858857 2.2775168
## ... . . . . . .
## GENE_96 1.41567680 -0.05375373 2.37130659 . -0.1367186 2.3805954
## GENE_97 -3.32419367 -0.03019776 1.76250459 . -0.8470572 -2.6952701
## GENE_98 -0.52997163 4.03258775 -2.00940517 . 1.8003338 2.9643693
## GENE_99 3.74942613 0.22642748 0.67386125 . -1.5883565 -1.8965001
## GENE_100 -1.11299536 -2.46509692 -0.82646933 . 0.2672397 4.3436631
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6
## 4.7856818 -7.4004828 5.3177152 6.0025478 -6.8448567 -15.2077629
## SAMP_7 SAMP_8 SAMP_9 SAMP_10
## -5.1158611 0.4807862 0.1915800 9.1494656
out %*% runif(ncol(out))
## [,1]
## GENE_1 2.10725452
## GENE_2 0.07567970
## GENE_3 1.50187422
## GENE_4 -0.80970398
## GENE_5 0.31926846
## GENE_6 1.07842559
## GENE_7 -0.13414122
## GENE_8 1.57194318
## GENE_9 -0.11853270
## GENE_10 -0.68283968
## GENE_11 0.51220685
## GENE_12 -0.99820734
## GENE_13 1.10193343
## GENE_14 -3.17118060
## GENE_15 -0.71559482
## GENE_16 1.35110416
## GENE_17 -0.18000292
## GENE_18 -3.38122020
## GENE_19 -1.46974104
## GENE_20 0.58420220
## GENE_21 0.18485506
## GENE_22 -1.94664827
## GENE_23 1.79858008
## GENE_24 -0.02023229
## GENE_25 -1.38244380
## GENE_26 1.48828932
## GENE_27 -0.39530771
## GENE_28 2.54575689
## GENE_29 -1.56564365
## GENE_30 -0.43556297
## GENE_31 1.10010192
## GENE_32 -0.40919862
## GENE_33 -2.07440646
## GENE_34 3.16249312
## GENE_35 2.00455520
## GENE_36 0.65369857
## GENE_37 -0.40150770
## GENE_38 -0.59264960
## GENE_39 -0.26103745
## GENE_40 -0.30548735
## GENE_41 -1.75066698
## GENE_42 0.71758239
## GENE_43 -0.50997927
## GENE_44 0.19470899
## GENE_45 -3.32257082
## GENE_46 -0.67972743
## GENE_47 0.26135587
## GENE_48 -0.33123188
## GENE_49 -1.86702908
## GENE_50 0.55003449
## GENE_51 1.15726535
## GENE_52 0.02064735
## GENE_53 -1.55362238
## GENE_54 0.77781202
## GENE_55 2.34119059
## GENE_56 -0.82605801
## GENE_57 -1.29288141
## GENE_58 0.12437789
## GENE_59 0.67535051
## GENE_60 -0.52365212
## GENE_61 -0.91356164
## GENE_62 -1.17906595
## GENE_63 -1.97366577
## GENE_64 -1.23505827
## GENE_65 1.80908696
## GENE_66 0.61132111
## GENE_67 -2.31105921
## GENE_68 -0.42414740
## GENE_69 -0.77335116
## GENE_70 1.32211466
## GENE_71 -0.92033294
## GENE_72 -1.34571481
## GENE_73 3.15113167
## GENE_74 0.72883605
## GENE_75 -0.80222845
## GENE_76 0.22489118
## GENE_77 0.79370508
## GENE_78 -1.79177819
## GENE_79 1.23232340
## GENE_80 0.14432505
## GENE_81 0.27836714
## GENE_82 -1.86756369
## GENE_83 -0.03178917
## GENE_84 0.36811785
## GENE_85 0.45538889
## GENE_86 -1.46961558
## GENE_87 0.27677688
## GENE_88 1.29149950
## GENE_89 0.78714520
## GENE_90 -0.77299792
## GENE_91 0.92827134
## GENE_92 0.72149012
## GENE_93 1.56237014
## GENE_94 0.25014042
## GENE_95 -0.36993539
## GENE_96 1.15960487
## GENE_97 -1.38610118
## GENE_98 2.88576002
## GENE_99 0.20635106
## GENE_100 1.46160043
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.2780192 -0.6403386 -0.8423215 . 1.10900147 -4.05574050
## [2,] -0.9917547 0.1077138 1.0607816 . 0.01364444 -0.95884991
## [3,] -0.1725374 -1.8865444 0.5772879 . 0.69850334 0.81655319
## [4,] -0.2222844 1.4127261 0.1305354 . -1.11237241 -1.47347031
## [5,] 0.7824242 0.6567201 1.8080879 . -0.85129804 -1.39582095
## ... . . . . . .
## [96,] 0.15607174 -0.69779674 -0.32803004 . -0.1315751 -1.5243496
## [97,] 1.21393674 -0.73105671 -0.03004298 . 0.7973614 -0.6267355
## [98,] 0.26131493 1.40528715 -0.20566268 . 0.3135253 0.6944263
## [99,] -0.40609019 -0.67559618 1.31587065 . -0.4768329 -2.1536995
## [100,] 1.00396656 0.49220947 0.08086783 . -0.6840942 -2.3717282
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.2780192 -0.6403386 -0.8423215 . 1.10900147 -4.05574050
## [2,] -0.9917547 0.1077138 1.0607816 . 0.01364444 -0.95884991
## [3,] -0.1725374 -1.8865444 0.5772879 . 0.69850334 0.81655319
## [4,] -0.2222844 1.4127261 0.1305354 . -1.11237241 -1.47347031
## [5,] 0.7824242 0.6567201 1.8080879 . -0.85129804 -1.39582095
## ... . . . . . .
## [96,] 0.15607174 -0.69779674 -0.32803004 . -0.1315751 -1.5243496
## [97,] 1.21393674 -0.73105671 -0.03004298 . 0.7973614 -0.6267355
## [98,] 0.26131493 1.40528715 -0.20566268 . 0.3135253 0.6944263
## [99,] -0.40609019 -0.67559618 1.31587065 . -0.4768329 -2.1536995
## [100,] 1.00396656 0.49220947 0.08086783 . -0.6840942 -2.3717282
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.6.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.18 TileDBArray_1.16.0 DelayedArray_0.32.0
## [4] SparseArray_1.6.0 S4Arrays_1.6.0 IRanges_2.40.0
## [7] abind_1.4-8 S4Vectors_0.44.0 MatrixGenerics_1.18.0
## [10] matrixStats_1.4.1 BiocGenerics_0.52.0 Matrix_1.7-1
## [13] BiocStyle_2.34.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.5.0 jsonlite_1.8.9 compiler_4.4.1
## [4] BiocManager_1.30.25 crayon_1.5.3 Rcpp_1.0.13-1
## [7] nanoarrow_0.6.0 jquerylib_0.1.4 yaml_2.3.10
## [10] fastmap_1.2.0 lattice_0.22-6 R6_2.5.1
## [13] RcppCCTZ_0.2.12 XVector_0.46.0 tiledb_0.30.2
## [16] knitr_1.49 bookdown_0.41 bslib_0.8.0
## [19] rlang_1.1.4 cachem_1.1.0 xfun_0.49
## [22] sass_0.4.9 bit64_4.5.2 cli_3.6.3
## [25] zlibbioc_1.52.0 spdl_0.0.5 digest_0.6.37
## [28] grid_4.4.1 lifecycle_1.0.4 data.table_1.16.2
## [31] evaluate_1.0.1 nanotime_0.3.10 zoo_1.8-12
## [34] rmarkdown_2.29 tools_4.4.1 htmltools_0.5.8.1