1 Introduction

SpatialDE by Svensson et al., 2018, is a method to identify spatially variable genes (SVGs) in spatially resolved transcriptomics data.

2 Installation

You can install spatialDE from Bioconductor with the following code:

if (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}
BiocManager::install("spatialDE")

3 Example: Mouse Olfactory Bulb

Reproducing the example analysis from the original SpatialDE Python package.

library(spatialDE)
library(ggplot2)

3.1 Load data

Files originally retrieved from SpatialDE GitHub repository from the following links: https://github.com/Teichlab/SpatialDE/blob/master/Analysis/MouseOB/data/Rep11_MOB_0.csv https://github.com/Teichlab/SpatialDE/blob/master/Analysis/MouseOB/MOB_sample_info.csv

# Expression file used in python SpatialDE. 
data("Rep11_MOB_0")

# Sample Info file used in python SpatialDE
data("MOB_sample_info")

The Rep11_MOB_0 object contains spatial expression data for 16218 genes on 262 spots, with genes as rows and spots as columns.

Rep11_MOB_0[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1              1             0            0             1             0
#> Zbtb5             1             0            1             0             0
#> Ccnl1             1             3            1             1             0
#> Lrrfip1           2             2            0             0             3
#> Bbs1              1             2            0             4             0
dim(Rep11_MOB_0)
#> [1] 16218   262

The MOB_sample_info object contains a data.frame with coordinates for each spot.

head(MOB_sample_info)

3.1.1 Filter out pratically unobserved genes

Rep11_MOB_0 <- Rep11_MOB_0[rowSums(Rep11_MOB_0) >= 3, ]

3.1.2 Get total_counts for every spot

Rep11_MOB_0 <- Rep11_MOB_0[, row.names(MOB_sample_info)]
MOB_sample_info$total_counts <- colSums(Rep11_MOB_0)
head(MOB_sample_info)

3.1.3 Get coordinates from MOB_sample_info

X <- MOB_sample_info[, c("x", "y")]
head(X)

3.2 stabilize

The SpatialDE method assumes normally distributed data, so we stabilize the variance of the negative binomial distributed counts data using Anscombe’s approximation. The stabilize() function takes as input a data.frame of expression values with samples in columns and genes in rows. Thus, in this case, we have to transpose the data.

norm_expr <- stabilize(Rep11_MOB_0)
#> + '/home/biocbuild/.cache/R/basilisk/1.10.0/0/bin/conda' 'create' '--yes' '--prefix' '/home/biocbuild/.cache/R/basilisk/1.10.0/spatialDE/1.4.0/env' 'python=3.8.5' '--quiet' '-c' 'conda-forge'
#> + '/home/biocbuild/.cache/R/basilisk/1.10.0/0/bin/conda' 'install' '--yes' '--prefix' '/home/biocbuild/.cache/R/basilisk/1.10.0/spatialDE/1.4.0/env' 'python=3.8.5'
#> + '/home/biocbuild/.cache/R/basilisk/1.10.0/0/bin/conda' 'install' '--yes' '--prefix' '/home/biocbuild/.cache/R/basilisk/1.10.0/spatialDE/1.4.0/env' '-c' 'conda-forge' 'python=3.8.5' 'python=3.8.5' 'pandas=0.25.3' 'patsy=0.5.1' 'pip=21.0.1'
#> Error in py_module_import(module, convert = convert) : 
#>   ImportError: /usr/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /home/biocbuild/.cache/R/basilisk/1.10.0/spatialDE/1.4.0/env/lib/python3.8/site-packages/scipy/optimize/_highs/_highs_wrapper.cpython-38-x86_64-linux-gnu.so)
#> Attempting to load the environment 'package:reticulate'
norm_expr[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1       1.227749     0.8810934    0.8810934     1.2277491     0.8810934
#> Zbtb5      1.227749     0.8810934    1.2277491     0.8810934     0.8810934
#> Ccnl1      1.227749     1.6889027    1.2277491     1.2277491     0.8810934
#> Lrrfip1    1.484676     1.4846765    0.8810934     0.8810934     1.6889027
#> Bbs1       1.227749     1.4846765    0.8810934     1.8584110     0.8810934

3.3 regress_out

Next, we account for differences in library size between the samples by regressing out the effect of the total counts for each gene using linear regression.

resid_expr <- regress_out(norm_expr, sample_info = MOB_sample_info)
resid_expr[1:5, 1:5]
#>         16.92x9.015 16.945x11.075 16.97x10.118 16.939x12.132 16.949x13.055
#> Nrf1    -0.75226761    -1.2352000   -1.0164479    -0.7903289    -1.0973214
#> Zbtb5    0.09242373    -0.3323719    0.1397144    -0.2760560    -0.2533134
#> Ccnl1   -2.77597164    -2.5903783   -2.6092013    -2.8529340    -3.1193883
#> Lrrfip1 -1.92331333    -2.1578718   -2.3849405    -2.5924072    -1.7163300
#> Bbs1    -1.12186064    -1.0266476   -1.3706460    -0.5363646    -1.4666155

3.4 run

To reduce running time, the SpatialDE test is run on a subset of 1000 genes. Running it on the complete data set takes about 10 minutes.

# For this example, run spatialDE on the first 1000 genes
sample_resid_expr <- head(resid_expr, 1000)

results <- spatialDE::run(sample_resid_expr, coordinates = X)
head(results[order(results$qval), ])

3.6 spatial_patterns

Furthermore, we can group spatially variable genes (SVGs) into spatial patterns using automatic expression histology (AEH).

sp <- spatial_patterns(
    sample_resid_expr,
    coordinates = X,
    de_results = de_results,
    n_patterns = 4L, length = 1.5
)
sp$pattern_results

3.7 Plots

Visualizing one of the most significant genes.

gene <- "Pcp4"

ggplot(data = MOB_sample_info, aes(x = x, y = y, color = norm_expr[gene, ])) +
    geom_point(size = 7) +
    ggtitle(gene) +
    scale_color_viridis_c() +
    labs(color = gene)

3.7.1 Plot Spatial Patterns of Multiple Genes

As an alternative to the previous figure, we can plot multiple genes using the normalized expression values.

ordered_de_results <- de_results[order(de_results$qval), ]

multiGenePlots(norm_expr,
    coordinates = X,
    ordered_de_results[1:6, ]$g,
    point_size = 4,
    viridis_option = "D",
    dark_theme = FALSE
)

3.8 Plot Fraction Spatial Variance vs Q-value

FSV_sig(results, ms_results)
#> Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

4 SpatialExperiment integration

The SpatialDE workflow can also be executed with a SpatialExperiment object as input.

library(SpatialExperiment)

# For SpatialExperiment object, we neeed to transpose the counts matrix in order
# to have genes on rows and spot on columns. 
# For this example, run spatialDE on the first 1000 genes

partial_counts <- head(Rep11_MOB_0, 1000)

spe <- SpatialExperiment(
  assays = list(counts = partial_counts),
  spatialData = DataFrame(MOB_sample_info[, c("x", "y")]),
  spatialCoordsNames = c("x", "y")
)

out <- spatialDE(spe, assay_type = "counts", verbose = FALSE)
head(out[order(out$qval), ])

4.1 Plot Spatial Patterns of Multiple Genes (using SpatialExperiment object)

We can plot spatial patterns of multiples genes using the spe object.

spe_results <- out[out$qval < 0.05, ]

ordered_spe_results <- spe_results[order(spe_results$qval), ]

multiGenePlots(spe,
    assay_type = "counts",
    ordered_spe_results[1:6, ]$g,
    point_size = 4,
    viridis_option = "D",
    dark_theme = FALSE
)

4.2 Classify spatially variable genes with model_search and spatial_patterns

msearch <- modelSearch(spe,
    de_results = out, qval_thresh = 0.05,
    verbose = FALSE
)

head(msearch)
spatterns <- spatialPatterns(spe,
    de_results = de_results, qval_thresh = 0.05,
    n_patterns = 4L, length = 1.5, verbose = FALSE
)

spatterns$pattern_results

sessionInfo

Session info

#> [1] "2022-11-01 19:14:22 EDT"
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.5 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.16-bioc/R/lib/libRblas.so
#> LAPACK: /home/biocbuild/bbs-3.16-bioc/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] SpatialExperiment_1.8.0     SingleCellExperiment_1.20.0
#>  [3] SummarizedExperiment_1.28.0 Biobase_2.58.0             
#>  [5] GenomicRanges_1.50.0        GenomeInfoDb_1.34.0        
#>  [7] IRanges_2.32.0              S4Vectors_0.36.0           
#>  [9] BiocGenerics_0.44.0         MatrixGenerics_1.10.0      
#> [11] matrixStats_0.62.0          reticulate_1.26            
#> [13] ggplot2_3.3.6               spatialDE_1.4.0            
#> [15] BiocStyle_2.26.0           
#> 
#> loaded via a namespace (and not attached):
#>  [1] bitops_1.0-7              filelock_1.0.2           
#>  [3] rprojroot_2.0.3           backports_1.4.1          
#>  [5] tools_4.2.1               bslib_0.4.0              
#>  [7] utf8_1.2.2                R6_2.5.1                 
#>  [9] HDF5Array_1.26.0          DBI_1.1.3                
#> [11] colorspace_2.0-3          rhdf5filters_1.10.0      
#> [13] withr_2.5.0               gridExtra_2.3            
#> [15] tidyselect_1.2.0          compiler_4.2.1           
#> [17] cli_3.4.1                 basilisk.utils_1.10.0    
#> [19] DelayedArray_0.24.0       labeling_0.4.2           
#> [21] bookdown_0.29             sass_0.4.2               
#> [23] scales_1.2.1              checkmate_2.1.0          
#> [25] stringr_1.4.1             digest_0.6.30            
#> [27] rmarkdown_2.17            R.utils_2.12.1           
#> [29] basilisk_1.10.0           XVector_0.38.0           
#> [31] pkgconfig_2.0.3           htmltools_0.5.3          
#> [33] sparseMatrixStats_1.10.0  highr_0.9                
#> [35] fastmap_1.1.0             limma_3.54.0             
#> [37] rlang_1.0.6               DelayedMatrixStats_1.20.0
#> [39] farver_2.1.1              jquerylib_0.1.4          
#> [41] generics_0.1.3            jsonlite_1.8.3           
#> [43] BiocParallel_1.32.0       dplyr_1.0.10             
#> [45] R.oo_1.25.0               RCurl_1.98-1.9           
#> [47] magrittr_2.0.3            GenomeInfoDbData_1.2.9   
#> [49] scuttle_1.8.0             Matrix_1.5-1             
#> [51] Rcpp_1.0.9                munsell_0.5.0            
#> [53] Rhdf5lib_1.20.0           fansi_1.0.3              
#> [55] lifecycle_1.0.3           R.methodsS3_1.8.2        
#> [57] stringi_1.7.8             yaml_2.3.6               
#> [59] edgeR_3.40.0              zlibbioc_1.44.0          
#> [61] rhdf5_2.42.0              grid_4.2.1               
#> [63] ggrepel_0.9.1             parallel_4.2.1           
#> [65] dqrng_0.3.0               dir.expiry_1.6.0         
#> [67] lattice_0.20-45           beachmat_2.14.0          
#> [69] locfit_1.5-9.6            magick_2.7.3             
#> [71] knitr_1.40                pillar_1.8.1             
#> [73] rjson_0.2.21              codetools_0.2-18         
#> [75] glue_1.6.2                evaluate_0.17            
#> [77] BiocManager_1.30.19       png_0.1-7                
#> [79] vctrs_0.5.0               gtable_0.3.1             
#> [81] assertthat_0.2.1          cachem_1.0.6             
#> [83] xfun_0.34                 DropletUtils_1.18.0      
#> [85] viridisLite_0.4.1         tibble_3.1.8             
#> [87] here_1.0.1