Contents

1 Installation

if (!require("BiocManager"))
    install.packages("BiocManager")
BiocManager::install("spicyR")

2 Overview

A SegmentedCells is an object designed to store data from imaging cytometry (FISH, IMC, CycIF, spatial transcriptomics, … ) that has already been segmented and reduced to individual cells. A SegmentedCells extends DataFrame and defines methods that take advantage of DataFrame nesting to represent various elements of cell-based experiments with spatial orientation that are commonly encountered. This object is able to store information on a cell’s spatial location, cellType, morphology, intensity of gene/protein marks as well as image level phenotype information. Ideally this type of data can be used for cell clustering, point process models or nearest neighbour analysis. Below we will consider a few examples of data formats that can be transformed into a SegmentedCells.

First, load the spicyR package.

library(spicyR)
library(S4Vectors)

3 Example 1 - Data resembles cellProfiler output

Here we create a SegmentedCells from data that was output from cellProfiler or similar programs. This assumes that there are columns with the string AreaShape_ and Intensity_Mean and that there are ObjectNumber and ImageNumber columns.

Here we create toy cellProfiler data.


### Something that resembles cellProfiler data

set.seed(51773)

n = 10

cells <- data.frame(row.names = seq_len(n))
cells$ObjectNumber <- seq_len(n)
cells$ImageNumber <- rep(1:2,c(n/2,n/2))
cells$AreaShape_Center_X <- runif(n)
cells$AreaShape_Center_Y <- runif(n)
cells$AreaShape_round <- rexp(n)
cells$AreaShape_diameter <- rexp(n, 2)
cells$Intensity_Mean_CD8 <- rexp(n, 10)
cells$Intensity_Mean_CD4 <- rexp(n, 10)

We can then create a SegmentedCells object.

cellExp <- SegmentedCells(cells, cellProfiler = TRUE)
#> There is no cellType column, setting to NA
cellExp
#> A SegmentedCells object with...
#> Number of images:2
#> Number of cells:10
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 2 [ CD8, CD4 ]
#> Number of morphologies: 2 [ round, diameter ]
#> Number of image phenotypes: 0 [ ]

Extract the cellSummary information and overwrite it as well.

cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#>    imageID      cellID imageCellID         x         y cellType
#>   <factor> <character> <character> <numeric> <numeric> <factor>
#> 1        1      cell_1      cell_1  0.367243  0.820150       NA
#> 2        1      cell_2      cell_2  0.613818  0.846200       NA
#> 3        1      cell_3      cell_3  0.335930  0.318286       NA
#> 4        1      cell_4      cell_4  0.501737  0.373931       NA
#> 5        1      cell_5      cell_5  0.829350  0.140415       NA
#> 6        2      cell_6      cell_6  0.831271  0.175128       NA

cellSummary(cellExp) <- cellSum

We can then set the cell types of each cell by extracting and clustering marker intensity information.

markers <- cellMarks(cellExp)
kM <- kmeans(markers,2)
cellType(cellExp) <- paste('cluster',kM$cluster, sep = '')

cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#>    imageID      cellID imageCellID         x         y cellType
#>   <factor> <character> <character> <numeric> <numeric> <factor>
#> 1        1      cell_1      cell_1  0.367243  0.820150 cluster1
#> 2        1      cell_2      cell_2  0.613818  0.846200 cluster1
#> 3        1      cell_3      cell_3  0.335930  0.318286 cluster2
#> 4        1      cell_4      cell_4  0.501737  0.373931 cluster2
#> 5        1      cell_5      cell_5  0.829350  0.140415 cluster1
#> 6        2      cell_6      cell_6  0.831271  0.175128 cluster1

4 Example 2 - Three pancreatic islets from from Damond et al (2019)

Read in data.

isletFile <- system.file("extdata","isletCells.txt.gz", package = "spicyR")
cells <- read.table(isletFile, header = TRUE)

We can then create a SegmentedCells object.

cellExp <- SegmentedCells(cells, cellProfiler = TRUE)
#> There is no cellType column, setting to NA
cellExp
#> A SegmentedCells object with...
#> Number of images:1
#> Number of cells:4650
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 38 [ c1, c10, ..., c9 ]
#> Number of morphologies: 15 [ Area, Compactness, ..., Solidity ]
#> Number of image phenotypes: 0 [ ]

We can then set the cell types of each cell by extracting and clustering marker intensity information.

markers <- cellMarks(cellExp)
kM <- kmeans(markers,4)
cellType(cellExp) <- paste('cluster',kM$cluster, sep = '')

cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#>    imageID      cellID imageCellID         x         y cellType
#>   <factor> <character> <character> <integer> <integer> <factor>
#> 1        1      cell_1      cell_1        53         0 cluster1
#> 2        1      cell_2      cell_2       128         0 cluster1
#> 3        1      cell_3      cell_3       135         0 cluster1
#> 4        1      cell_4      cell_4       450         0 cluster1
#> 5        1      cell_5      cell_5       458         0 cluster3
#> 6        1      cell_6      cell_6       551         0 cluster3

Here is a very simple plot in ggplot showing the spatial distribution of the cell types

plot(cellExp, imageID=1)

5 Example 3 - Custom markerintensity and morphology column names

Here we create toy data that has a slightly more fluid naming stucture.

set.seed(51773)

n = 10

cells <- data.frame(row.names = seq_len(n))
cells$cellID <- seq_len(n)
cells$imageCellID <- rep(seq_len(n/2),2)
cells$imageID <- rep(1:2,c(n/2,n/2))
cells$x <- runif(n)
cells$y <- runif(n)
cells$shape_round <- rexp(n)
cells$shape_diameter <- rexp(n, 2)
cells$intensity_CD8 <- rexp(n, 10)
cells$intensity_CD4 <- rexp(n, 10)
cells$cellType <- paste('cluster',sample(1:2,n,replace = TRUE), sep = '_')

We can then create a SegmentedCells object.


cellExp <- SegmentedCells(cells, 
                          cellTypeString = 'cellType', 
                          intensityString = 'intensity_', 
                          morphologyString = 'shape_')
cellExp
#> A SegmentedCells object with...
#> Number of images:2
#> Number of cells:10
#> Number of cell types: 2 [ cluster_1, cluster_2 ]
#> Number of intensities: 2 [ CD8, CD4 ]
#> Number of morphologies: 2 [ round, diameter ]
#> Number of image phenotypes: 0 [ ]

Extract morphology information

morph <- cellMorph(cellExp)
head(morph)
#> DataFrame with 6 rows and 2 columns
#>       round  diameter
#>   <numeric> <numeric>
#> 1  0.489959 1.3913000
#> 2  3.601540 0.3670445
#> 3  0.243015 0.0149731
#> 4  0.077238 1.1434083
#> 5  0.639661 0.0043457
#> 6  0.498723 0.0341195

5.1 Phenotype information

We can also include phenotype information for each image. Create some corresponding toy phenotype information which must have a imageID variable.

phenoData <- DataFrame(imageID = c('1','2'), 
                       age = c(21,81), 
                       status = c('dead','alive'))
imagePheno(cellExp) <- phenoData
imagePheno(cellExp)
#> DataFrame with 2 rows and 3 columns
#>       imageID       age      status
#>   <character> <numeric> <character>
#> 1           1        21        dead
#> 2           2        81       alive
imagePheno(cellExp, expand = TRUE)
#> DataFrame with 10 rows and 3 columns
#>       imageID       age      status
#>   <character> <numeric> <character>
#> 1           1        21        dead
#> 1           1        21        dead
#> 1           1        21        dead
#> 1           1        21        dead
#> 1           1        21        dead
#> 2           2        81       alive
#> 2           2        81       alive
#> 2           2        81       alive
#> 2           2        81       alive
#> 2           2        81       alive

6 Example 4 - Minimal example, cells only have spatial coordinates

Here we generate data where we only know the location of each cell.

set.seed(51773)

n = 10

cells <- data.frame(row.names = seq_len(n))
cells$x <- runif(n)
cells$y <- runif(n)
cellExp <- SegmentedCells(cells)
#> There is no cellType column, setting to NA
#> There is no cellID. I'll create these
#> There is no image specific imageCellID. I'll create these
#> There is no imageID. I'll assume this is only one image and create an arbitrary imageID
cellExp
#> A SegmentedCells object with...
#> Number of images:1
#> Number of cells:10
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 0 [ ]
#> Number of morphologies: 0 [ ]
#> Number of image phenotypes: 0 [ ]

Extract the cellSummary information which now also has cellIDs and imageIDs.

cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#>    imageID      cellID imageCellID         x         y cellType
#>   <factor> <character> <character> <numeric> <numeric> <factor>
#> 1   image1      cell_1      cell_1  0.367243  0.820150       NA
#> 2   image1      cell_2      cell_2  0.613818  0.846200       NA
#> 3   image1      cell_3      cell_3  0.335930  0.318286       NA
#> 4   image1      cell_4      cell_4  0.501737  0.373931       NA
#> 5   image1      cell_5      cell_5  0.829350  0.140415       NA
#> 6   image1      cell_6      cell_6  0.831271  0.175128       NA

7 sessionInfo()

sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.13-bioc/R/lib/libRblas.so
#> LAPACK: /home/biocbuild/bbs-3.13-bioc/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#> [1] S4Vectors_0.30.0    BiocGenerics_0.38.0 spicyR_1.4.0       
#> [4] BiocStyle_2.20.0   
#> 
#> loaded via a namespace (and not attached):
#>  [1] sass_0.4.0            tidyr_1.1.3           jsonlite_1.7.2       
#>  [4] splines_4.1.0         bslib_0.2.5.1         assertthat_0.2.1     
#>  [7] highr_0.9             BiocManager_1.30.15   spatstat.geom_2.1-0  
#> [10] yaml_2.2.1            numDeriv_2016.8-1.1   pillar_1.6.1         
#> [13] lattice_0.20-44       glue_1.4.2            digest_0.6.27        
#> [16] RColorBrewer_1.1-2    polyclip_1.10-0       minqa_1.2.4          
#> [19] colorspace_2.0-1      htmltools_0.5.1.1     Matrix_1.3-3         
#> [22] spatstat.sparse_2.0-0 pkgconfig_2.0.3       pheatmap_1.0.12      
#> [25] magick_2.7.2          bookdown_0.22         purrr_0.3.4          
#> [28] spatstat.core_2.1-2   scales_1.1.1          tensor_1.5           
#> [31] spatstat.utils_2.1-0  BiocParallel_1.26.0   lme4_1.1-27          
#> [34] tibble_3.1.2          mgcv_1.8-35           farver_2.1.0         
#> [37] generics_0.1.0        IRanges_2.26.0        ggplot2_3.3.3        
#> [40] ellipsis_0.3.2        magrittr_2.0.1        crayon_1.4.1         
#> [43] deldir_0.2-10         evaluate_0.14         fansi_0.4.2          
#> [46] nlme_3.1-152          MASS_7.3-54           tools_4.1.0          
#> [49] data.table_1.14.0     lifecycle_1.0.0       stringr_1.4.0        
#> [52] munsell_0.5.0         compiler_4.1.0        jquerylib_0.1.4      
#> [55] concaveman_1.1.0      rlang_0.4.11          grid_4.1.0           
#> [58] nloptr_1.2.2.2        goftest_1.2-2         labeling_0.4.2       
#> [61] rmarkdown_2.8         boot_1.3-28           gtable_0.3.0         
#> [64] lmerTest_3.1-3        abind_1.4-5           DBI_1.1.1            
#> [67] R6_2.5.0              knitr_1.33            dplyr_1.0.6          
#> [70] utf8_1.2.1            stringi_1.6.2         spatstat.data_2.1-0  
#> [73] Rcpp_1.0.6            vctrs_0.3.8           rpart_4.1-15         
#> [76] tidyselect_1.1.1      xfun_0.23