spicyR 1.4.0
if (!require("BiocManager"))
install.packages("BiocManager")
BiocManager::install("spicyR")
A SegmentedCells
is an object designed to store data from imaging cytometry
(FISH, IMC, CycIF, spatial transcriptomics, … ) that has already been
segmented and reduced to individual cells. A SegmentedCells
extends DataFrame
and defines methods that take advantage of DataFrame nesting to represent
various elements of cell-based experiments with spatial orientation that are
commonly encountered. This object is able to store information on a cell’s
spatial location, cellType, morphology, intensity of gene/protein marks as well
as image level phenotype information. Ideally this type of data can be used for
cell clustering, point process models or nearest neighbour analysis. Below we
will consider a few examples of data formats that can be transformed into a
SegmentedCells
.
First, load the spicyR
package.
library(spicyR)
library(S4Vectors)
Here we create a SegmentedCells
from data that was output from cellProfiler or
similar programs. This assumes that there are columns with the string
AreaShape_ and Intensity_Mean and that there are ObjectNumber
and
ImageNumber
columns.
Here we create toy cellProfiler data.
### Something that resembles cellProfiler data
set.seed(51773)
n = 10
cells <- data.frame(row.names = seq_len(n))
cells$ObjectNumber <- seq_len(n)
cells$ImageNumber <- rep(1:2,c(n/2,n/2))
cells$AreaShape_Center_X <- runif(n)
cells$AreaShape_Center_Y <- runif(n)
cells$AreaShape_round <- rexp(n)
cells$AreaShape_diameter <- rexp(n, 2)
cells$Intensity_Mean_CD8 <- rexp(n, 10)
cells$Intensity_Mean_CD4 <- rexp(n, 10)
We can then create a SegmentedCells
object.
cellExp <- SegmentedCells(cells, cellProfiler = TRUE)
#> There is no cellType column, setting to NA
cellExp
#> A SegmentedCells object with...
#> Number of images:2
#> Number of cells:10
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 2 [ CD8, CD4 ]
#> Number of morphologies: 2 [ round, diameter ]
#> Number of image phenotypes: 0 [ ]
Extract the cellSummary information and overwrite it as well.
cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#> imageID cellID imageCellID x y cellType
#> <factor> <character> <character> <numeric> <numeric> <factor>
#> 1 1 cell_1 cell_1 0.367243 0.820150 NA
#> 2 1 cell_2 cell_2 0.613818 0.846200 NA
#> 3 1 cell_3 cell_3 0.335930 0.318286 NA
#> 4 1 cell_4 cell_4 0.501737 0.373931 NA
#> 5 1 cell_5 cell_5 0.829350 0.140415 NA
#> 6 2 cell_6 cell_6 0.831271 0.175128 NA
cellSummary(cellExp) <- cellSum
We can then set the cell types of each cell by extracting and clustering marker intensity information.
markers <- cellMarks(cellExp)
kM <- kmeans(markers,2)
cellType(cellExp) <- paste('cluster',kM$cluster, sep = '')
cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#> imageID cellID imageCellID x y cellType
#> <factor> <character> <character> <numeric> <numeric> <factor>
#> 1 1 cell_1 cell_1 0.367243 0.820150 cluster1
#> 2 1 cell_2 cell_2 0.613818 0.846200 cluster1
#> 3 1 cell_3 cell_3 0.335930 0.318286 cluster2
#> 4 1 cell_4 cell_4 0.501737 0.373931 cluster2
#> 5 1 cell_5 cell_5 0.829350 0.140415 cluster1
#> 6 2 cell_6 cell_6 0.831271 0.175128 cluster1
Read in data.
isletFile <- system.file("extdata","isletCells.txt.gz", package = "spicyR")
cells <- read.table(isletFile, header = TRUE)
We can then create a SegmentedCells
object.
cellExp <- SegmentedCells(cells, cellProfiler = TRUE)
#> There is no cellType column, setting to NA
cellExp
#> A SegmentedCells object with...
#> Number of images:1
#> Number of cells:4650
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 38 [ c1, c10, ..., c9 ]
#> Number of morphologies: 15 [ Area, Compactness, ..., Solidity ]
#> Number of image phenotypes: 0 [ ]
We can then set the cell types of each cell by extracting and clustering marker intensity information.
markers <- cellMarks(cellExp)
kM <- kmeans(markers,4)
cellType(cellExp) <- paste('cluster',kM$cluster, sep = '')
cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#> imageID cellID imageCellID x y cellType
#> <factor> <character> <character> <integer> <integer> <factor>
#> 1 1 cell_1 cell_1 53 0 cluster1
#> 2 1 cell_2 cell_2 128 0 cluster1
#> 3 1 cell_3 cell_3 135 0 cluster1
#> 4 1 cell_4 cell_4 450 0 cluster1
#> 5 1 cell_5 cell_5 458 0 cluster3
#> 6 1 cell_6 cell_6 551 0 cluster3
Here is a very simple plot in ggplot showing the spatial distribution of the cell types
plot(cellExp, imageID=1)
Here we create toy data that has a slightly more fluid naming stucture.
set.seed(51773)
n = 10
cells <- data.frame(row.names = seq_len(n))
cells$cellID <- seq_len(n)
cells$imageCellID <- rep(seq_len(n/2),2)
cells$imageID <- rep(1:2,c(n/2,n/2))
cells$x <- runif(n)
cells$y <- runif(n)
cells$shape_round <- rexp(n)
cells$shape_diameter <- rexp(n, 2)
cells$intensity_CD8 <- rexp(n, 10)
cells$intensity_CD4 <- rexp(n, 10)
cells$cellType <- paste('cluster',sample(1:2,n,replace = TRUE), sep = '_')
We can then create a SegmentedCells
object.
cellExp <- SegmentedCells(cells,
cellTypeString = 'cellType',
intensityString = 'intensity_',
morphologyString = 'shape_')
cellExp
#> A SegmentedCells object with...
#> Number of images:2
#> Number of cells:10
#> Number of cell types: 2 [ cluster_1, cluster_2 ]
#> Number of intensities: 2 [ CD8, CD4 ]
#> Number of morphologies: 2 [ round, diameter ]
#> Number of image phenotypes: 0 [ ]
Extract morphology information
morph <- cellMorph(cellExp)
head(morph)
#> DataFrame with 6 rows and 2 columns
#> round diameter
#> <numeric> <numeric>
#> 1 0.489959 1.3913000
#> 2 3.601540 0.3670445
#> 3 0.243015 0.0149731
#> 4 0.077238 1.1434083
#> 5 0.639661 0.0043457
#> 6 0.498723 0.0341195
We can also include phenotype information for each image. Create some
corresponding toy phenotype information which must have a imageID
variable.
phenoData <- DataFrame(imageID = c('1','2'),
age = c(21,81),
status = c('dead','alive'))
imagePheno(cellExp) <- phenoData
imagePheno(cellExp)
#> DataFrame with 2 rows and 3 columns
#> imageID age status
#> <character> <numeric> <character>
#> 1 1 21 dead
#> 2 2 81 alive
imagePheno(cellExp, expand = TRUE)
#> DataFrame with 10 rows and 3 columns
#> imageID age status
#> <character> <numeric> <character>
#> 1 1 21 dead
#> 1 1 21 dead
#> 1 1 21 dead
#> 1 1 21 dead
#> 1 1 21 dead
#> 2 2 81 alive
#> 2 2 81 alive
#> 2 2 81 alive
#> 2 2 81 alive
#> 2 2 81 alive
Here we generate data where we only know the location of each cell.
set.seed(51773)
n = 10
cells <- data.frame(row.names = seq_len(n))
cells$x <- runif(n)
cells$y <- runif(n)
cellExp <- SegmentedCells(cells)
#> There is no cellType column, setting to NA
#> There is no cellID. I'll create these
#> There is no image specific imageCellID. I'll create these
#> There is no imageID. I'll assume this is only one image and create an arbitrary imageID
cellExp
#> A SegmentedCells object with...
#> Number of images:1
#> Number of cells:10
#> Number of cell types: 1 [ NA ]
#> Number of intensities: 0 [ ]
#> Number of morphologies: 0 [ ]
#> Number of image phenotypes: 0 [ ]
Extract the cellSummary information which now also has cellIDs and imageIDs.
cellSum <- cellSummary(cellExp)
head(cellSum)
#> DataFrame with 6 rows and 6 columns
#> imageID cellID imageCellID x y cellType
#> <factor> <character> <character> <numeric> <numeric> <factor>
#> 1 image1 cell_1 cell_1 0.367243 0.820150 NA
#> 2 image1 cell_2 cell_2 0.613818 0.846200 NA
#> 3 image1 cell_3 cell_3 0.335930 0.318286 NA
#> 4 image1 cell_4 cell_4 0.501737 0.373931 NA
#> 5 image1 cell_5 cell_5 0.829350 0.140415 NA
#> 6 image1 cell_6 cell_6 0.831271 0.175128 NA
sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.2 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.13-bioc/R/lib/libRblas.so
#> LAPACK: /home/biocbuild/bbs-3.13-bioc/R/lib/libRlapack.so
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] parallel stats4 stats graphics grDevices utils datasets
#> [8] methods base
#>
#> other attached packages:
#> [1] S4Vectors_0.30.0 BiocGenerics_0.38.0 spicyR_1.4.0
#> [4] BiocStyle_2.20.0
#>
#> loaded via a namespace (and not attached):
#> [1] sass_0.4.0 tidyr_1.1.3 jsonlite_1.7.2
#> [4] splines_4.1.0 bslib_0.2.5.1 assertthat_0.2.1
#> [7] highr_0.9 BiocManager_1.30.15 spatstat.geom_2.1-0
#> [10] yaml_2.2.1 numDeriv_2016.8-1.1 pillar_1.6.1
#> [13] lattice_0.20-44 glue_1.4.2 digest_0.6.27
#> [16] RColorBrewer_1.1-2 polyclip_1.10-0 minqa_1.2.4
#> [19] colorspace_2.0-1 htmltools_0.5.1.1 Matrix_1.3-3
#> [22] spatstat.sparse_2.0-0 pkgconfig_2.0.3 pheatmap_1.0.12
#> [25] magick_2.7.2 bookdown_0.22 purrr_0.3.4
#> [28] spatstat.core_2.1-2 scales_1.1.1 tensor_1.5
#> [31] spatstat.utils_2.1-0 BiocParallel_1.26.0 lme4_1.1-27
#> [34] tibble_3.1.2 mgcv_1.8-35 farver_2.1.0
#> [37] generics_0.1.0 IRanges_2.26.0 ggplot2_3.3.3
#> [40] ellipsis_0.3.2 magrittr_2.0.1 crayon_1.4.1
#> [43] deldir_0.2-10 evaluate_0.14 fansi_0.4.2
#> [46] nlme_3.1-152 MASS_7.3-54 tools_4.1.0
#> [49] data.table_1.14.0 lifecycle_1.0.0 stringr_1.4.0
#> [52] munsell_0.5.0 compiler_4.1.0 jquerylib_0.1.4
#> [55] concaveman_1.1.0 rlang_0.4.11 grid_4.1.0
#> [58] nloptr_1.2.2.2 goftest_1.2-2 labeling_0.4.2
#> [61] rmarkdown_2.8 boot_1.3-28 gtable_0.3.0
#> [64] lmerTest_3.1-3 abind_1.4-5 DBI_1.1.1
#> [67] R6_2.5.0 knitr_1.33 dplyr_1.0.6
#> [70] utf8_1.2.1 stringi_1.6.2 spatstat.data_2.1-0
#> [73] Rcpp_1.0.6 vctrs_0.3.8 rpart_4.1-15
#> [76] tidyselect_1.1.1 xfun_0.23