Contents

1 Overview

The alabaster.mae package implements methods to save MultiAssayExperiment objects to file artifacts and load them back into R. Check out the alabaster.base for more details on the motivation and concepts of the alabaster framework.

2 Quick start

Let’s create a mildly complicated MAE containing RNA-seq and ChIP-seq data with partial overlaps:

library(SummarizedExperiment)
rna.counts <- matrix(rpois(60, 10), ncol=6)
colnames(rna.counts) <- c("disease1", "disease2", "disease3", "control1", "control2", "control3")
rownames(rna.counts) <- c("ENSMUSG00000000001", "ENSMUSG00000000003", "ENSMUSG00000000028", 
    "ENSMUSG00000000031", "ENSMUSG00000000037", "ENSMUSG00000000049",  "ENSMUSG00000000056", 
    "ENSMUSG00000000058", "ENSMUSG00000000078",  "ENSMUSG00000000085")
rna.se <- SummarizedExperiment(list(counts=rna.counts))
colData(rna.se)$disease <- rep(c("disease", "control"), each=3)

chip.counts <- matrix(rpois(100, 10), ncol=4)
colnames(chip.counts) <- c("disease1", "disease2", "control1", "control3")
chip.peaks <- GRanges("chr1", IRanges(1:25*100+1, 1:25*100+100))
chip.se <- SummarizedExperiment(list(counts=chip.counts), rowRanges=chip.peaks)

library(MultiAssayExperiment)
mapping <- DataFrame(
    primary = c(colnames(rna.se), colnames(chip.se)), # sample identifiers
    assay = rep(c("rnaseq", "chipseq"), c(ncol(rna.se), ncol(chip.se))), # experiment name
    colname = c(colnames(rna.se), colnames(chip.se)) # column names inside each experiment
)
mae <- MultiAssayExperiment(list(rnaseq=rna.se, chipseq=chip.se), sampleMap=mapping)

We can use stageObject() to save it inside a staging directory:

library(alabaster.mae)
tmp <- tempfile()
dir.create(tmp)
meta <- stageObject(mae, tmp, "mae")
.writeMetadata(meta, tmp)
## $type
## [1] "local"
## 
## $path
## [1] "mae/dataset.json"
list.files(tmp, recursive=TRUE)
##  [1] "mae/dataset.json"                                     
##  [2] "mae/experiment-1/assay-1/array.h5"                    
##  [3] "mae/experiment-1/assay-1/array.h5.json"               
##  [4] "mae/experiment-1/coldata/simple.csv.gz"               
##  [5] "mae/experiment-1/coldata/simple.csv.gz.json"          
##  [6] "mae/experiment-1/experiment.json"                     
##  [7] "mae/experiment-1/rowdata/simple.csv.gz"               
##  [8] "mae/experiment-1/rowdata/simple.csv.gz.json"          
##  [9] "mae/experiment-2/assay-1/array.h5"                    
## [10] "mae/experiment-2/assay-1/array.h5.json"               
## [11] "mae/experiment-2/coldata/simple.csv.gz"               
## [12] "mae/experiment-2/coldata/simple.csv.gz.json"          
## [13] "mae/experiment-2/experiment.json"                     
## [14] "mae/experiment-2/rowranges/ranges.csv.gz"             
## [15] "mae/experiment-2/rowranges/ranges.csv.gz.json"        
## [16] "mae/experiment-2/rowranges/seqinfo/simple.csv.gz"     
## [17] "mae/experiment-2/rowranges/seqinfo/simple.csv.gz.json"
## [18] "mae/sample_data/simple.csv.gz"                        
## [19] "mae/sample_data/simple.csv.gz.json"                   
## [20] "mae/sample_mapping/simple.csv.gz"                     
## [21] "mae/sample_mapping/simple.csv.gz.json"

We can then load it back into the session with loadObject().

meta <- acquireMetadata(tmp, "mae/dataset.json")
roundtrip <- loadObject(meta, tmp)
class(roundtrip)
## [1] "MultiAssayExperiment"
## attr(,"package")
## [1] "MultiAssayExperiment"

More details on the metadata and on-disk layout are provided in the schema.

Session information

sessionInfo()
## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] alabaster.mae_1.0.0         alabaster.base_1.0.0       
##  [3] MultiAssayExperiment_1.26.0 SummarizedExperiment_1.30.0
##  [5] Biobase_2.60.0              GenomicRanges_1.52.0       
##  [7] GenomeInfoDb_1.36.0         IRanges_2.34.0             
##  [9] S4Vectors_0.38.0            BiocGenerics_0.46.0        
## [11] MatrixGenerics_1.12.0       matrixStats_0.63.0         
## [13] BiocStyle_2.28.0           
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.5              bitops_1.0-7            lattice_0.21-8         
##  [4] alabaster.se_1.0.0      jsonvalidate_1.3.2      digest_0.6.31          
##  [7] evaluate_0.20           grid_4.3.0              bookdown_0.33          
## [10] fastmap_1.1.1           jsonlite_1.8.4          Matrix_1.5-4           
## [13] alabaster.schemas_1.0.0 BiocManager_1.30.20     HDF5Array_1.28.0       
## [16] jquerylib_0.1.4         cli_3.6.1               rlang_1.1.0            
## [19] XVector_0.40.0          cachem_1.0.7            DelayedArray_0.26.0    
## [22] yaml_2.3.7              tools_4.3.0             Rhdf5lib_1.22.0        
## [25] GenomeInfoDbData_1.2.10 alabaster.ranges_1.0.0  alabaster.matrix_1.0.0 
## [28] curl_5.0.0              R6_2.5.1                rhdf5_2.44.0           
## [31] zlibbioc_1.46.0         V8_4.3.0                bslib_0.4.2            
## [34] Rcpp_1.0.10             xfun_0.39               knitr_1.42             
## [37] rhdf5filters_1.12.0     htmltools_0.5.5         rmarkdown_2.21         
## [40] compiler_4.3.0          RCurl_1.98-1.12