## ----global_options, include=FALSE-------------------------------------------- ## ThG: chunk added to enable global knitr options. The below turns on ## caching for faster vignette re-build during text editing. knitr::opts_chunk$set(cache=TRUE) ## ----css, echo = FALSE, results = 'asis'-------------------------------------- BiocStyle::markdown(css.files=c('file/custom.css')) ## ----setup0, eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE-------------- library(knitr); opts_chunk$set(message=FALSE, warning=FALSE) ## ----covizOver, echo=FALSE, fig.wide=TRUE, out.width="100%", fig.cap=("Overview of single cell and bulk tissue co-visualization. (1) Single cell data are organized in a `SingleCellExperiment` object where cell grouping information is stored in the `colData` slot. (2) Bulk tissue expression data are stored in a `SummarizedExperiment` object, where tissues are labelled by aSVG feature identifiers (3) in the `colData` slot. (3) The aSVG instance is stored in an `SVG` class. The `coordinate` slot defines spatial feature shapes in spatial heatmap (5) while the `attribute` slot defines styling of these features such as color, line with, etc. (4) Bi-directional mapping between cell or tissue groups are supported. Cell groups and tissues can be colored by fixed colors or by expression values of selected genes. (5) Cells in (1) and bulk tissues in (2) are co-visualized in and embedding plot and spatial heatmap plot respectively, where mapped cell groups and respective tissues are indicated by same colors. ")---- include_graphics('img/covisualize.jpg') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'-------------------- library(spatialHeatmap); library(SummarizedExperiment); library(scran); library(scater); library(igraph); library(SingleCellExperiment); library(BiocParallel) library(kableExtra) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # browseVignettes('spatialHeatmap') ## ----eval=TRUE, echo=TRUE, message=FALSE, warnings=FALSE---------------------- cache.pa <- '~/.cache/shm' # Set path of the cache directory ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ sce.pa <- system.file("extdata/shinyApp/example", "cell_mouse_brain.rds", package="spatialHeatmap") sce <- readRDS(sce.pa) sce.dimred.quick <- process_cell_meta(sce, qc.metric=list(subsets=list(Mt=rowData(sce)$featureType=='mito'), threshold=1)) colData(sce.dimred.quick)[1:3, 1:2] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ sce.aggr.quick <- aggr_rep(sce.dimred.quick, assay.na='logcounts', sam.factor='label', aggr='mean') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'-------------------- svg.mus.brain.pa <- system.file("extdata/shinyApp/example", "mus_musculus.brain.svg", package="spatialHeatmap") svg.mus.brain <- read_svg(svg.mus.brain.pa) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ tail(attribute(svg.mus.brain)[[1]])[, 1:4] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ lis.match.quick <- list(hypothalamus=c('hypothalamus'), cortex.S1=c('cerebral.cortex', 'nose')) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, results='hide', fig.cap=('Co-visualization of cell-to-tissue mapping. The co-visualization is created on gene `Eif5b`. Single cells in the embedding plot and their matching aSVG features in the spatial heatmap are filled by the same color according to aggregated expression values of `Eif5b` in single cell data.')---- shm.lis.quick <- covis(svg=svg.mus.brain, data=sce.aggr.quick, ID=c('Eif5b'), sce.dimred=sce.dimred.quick, dimred='PCA', cell.group='label', tar.cell=names(lis.match.quick), lis.rematch=lis.match.quick, assay.na='logcounts', bar.width=0.11, dim.lgd.nrow=1, height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=3) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- blk.mus.pa <- system.file("extdata/shinyApp/example", "bulk_mouse_cocluster.rds", package="spatialHeatmap") blk.mus <- readRDS(blk.mus.pa) assay(blk.mus)[1:3, 1:5] colData(blk.mus)[1:3, , drop=FALSE] ## ----eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'--------------------- # Normalization. blk.mus.nor <- norm_data(data=blk.mus, norm.fun='ESF', log2.trans=TRUE) # Aggregation. blk.mus.aggr <- aggr_rep(blk.mus.nor, sam.factor='tissue', aggr='mean') assay(blk.mus.aggr)[1:2, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(sce.dimred.quick)[1:3, 1:2] ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Embedding plot of single cell data. The cells (dots) are colored by the grouping information stored in the `colData` slot of the corresponding `SCE` object'), out.width="100%", fig.show='show'---- plot_dim(sce.dimred.quick, color.by="label", dim='UMAP') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- tail(attribute(svg.mus.brain)[[1]])[1:3, 1:4] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colnames(blk.mus) %in% attribute(svg.mus.brain)[[1]]$feature ## ----scLabList, eval=TRUE, echo=TRUE, warnings=FALSE-------------------------- lis.match.blk <- list(cerebral.cortex=c('cortex.S1'), hypothalamus=c('corpus.callosum', 'hypothalamus')) ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization plot through tissue-to-cell mapping. In this plot, `Actr3b` is used as an example. Each cell population is colored by its summarized expression value in the embedding plot on the left, and the corresponding tissue shares the same color in the spatial heatmap on the right.'), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=blk.mus.aggr, ID=c('Actr3b'), sce.dimred=sce.dimred.quick, dimred='UMAP', cell.group='label', tar.bulk=names(lis.match.blk), lis.rematch=lis.match.blk, bar.width=0.09, dim.lgd.nrow=2, dim.lgd.text.size=12, height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=2) ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization plot through tissue-to-cell mapping without expression values. In this plot, mapping beween cell groups and tissues are indicated by fixed colors instead of expression values. '), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=blk.mus.aggr, ID=c('Actr3b'), profile=FALSE, sce.dimred=sce.dimred.quick, dimred='UMAP', cell.group='label', lis.rematch=lis.match.blk, tar.bulk=names(lis.match.blk), bar.width=0.09, dim.lgd.nrow=2, dim.lgd.text.size=12, height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=2) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- manual.clus.mus.sc.pa <- system.file("extdata/shinyApp/example", "manual_cluster_mouse_brain.txt", package="spatialHeatmap") manual.clus.mus.sc <- read.table(manual.clus.mus.sc.pa, header=TRUE, sep='\t') manual.clus.mus.sc[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sce.clus <- manual_group(sce=sce.dimred.quick, df.group=manual.clus.mus.sc, cell='cell', cell.group='cluster') colData(sce.clus)[1:3, c('cluster', 'label', 'expVar')] ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Embedding plot of single cells. The cells (dots) are colored by the grouping information stored in the `colData` slot of the corresponding `SCE` object .'), out.width="100%", fig.show='show'---- plot_dim(sce.clus, color.by="cluster", dim='UMAP') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- tail(attribute(svg.mus.brain)[[1]])[1:3, 1:4] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- lis.match.clus <- list(clus1=c('cerebral.cortex'), clus3=c('brainstem', 'medulla.oblongata')) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sce.clus.aggr <- aggr_rep(sce.clus, assay.na='logcounts', sam.factor='cluster', con.factor='expVar', aggr='mean') colData(sce.clus.aggr)[1:3, c('cluster', 'label', 'expVar')] ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization of single cell data and tissue features with cluster groupings. Gene `Tcea1` is used as an example and the cell groupings were obtained by clustering.'), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=sce.clus.aggr, ID=c('Tcea1'), sce.dimred=sce.clus, dimred='UMAP', cell.group='cluster', assay.na='logcounts', tar.cell=names(lis.match.clus), lis.rematch=lis.match.clus, bar.width=0.09, dim.lgd.nrow=1, height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=3) ## ----coclusOver, echo=FALSE, fig.wide=TRUE, out.width="100%", fig.cap=("Overview of co-clustering. (1) Raw count data are pre-processed. (2) Co-clustering is performed on embedding dimensions of combined bulk and single cell data. (3) Cells are labeled by bulk assignments. (4) Cells and bulk tissues are co-visualized according to (3). ")---- include_graphics('img/coclustering.jpg') ## ----optPar, eval=TRUE, echo=FALSE, warnings=FALSE---------------------------- df.opt <- cbind( Parameter=c('dimensionReduction', 'topDimensions', 'graphBuilding', 'clusterDetection'), Settings=c( '**denoisePCA** (**PCA**, scran), runUMAP (UMAP, scater)', '5 to 80 (**11**, **19**, **33**, **22**)', '**buildKNNGraph** (**knn**), buildSNNGraph (snn) (scran)', 'cluster_walktrap (wt), **cluster_fast_greedy** (**fg**), cluster_leading_eigen (le) (igraph)' ), Description=c( 'Dimension reduction methods', 'Number of top dimensions selected for co-clustering', 'Methods for building a graph where nodes are cells and edges are connections between nearest neighbors', 'Methods for partitioning the graph to generate clusters' ) ) #write.table(df.opt, 'cocluster_para.txt', col.names=TRUE, row.names=TRUE, sep='\t') kable(df.opt, caption='Parameter settings to optimize for co-clustering. Optimal settings are indicated by bold text.', col.names=colnames(df.opt), row.names=FALSE, escape=TRUE) %>% kable_styling("striped", full_width = TRUE) %>% scroll_box(width = "700px", height = "230px") ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sc.mus.pa <- system.file("extdata/shinyApp/example", "cell_mouse_cocluster.rds", package="spatialHeatmap") sc.mus <- readRDS(sc.mus.pa) colData(sc.mus)[1:3, , drop=FALSE] ## ----eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'--------------------- #mus.lis.nor <- read_cache(cache.pa, 'mus.lis.nor') #if (is.null(mus.lis.nor)) { mus.lis.nor <- norm_cell(sce=sc.mus, bulk=blk.mus, com=FALSE) # save_cache(dir=cache.pa, overwrite=TRUE, mus.lis.nor) #} ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ # Aggregate bulk replicates blk.mus.aggr <- aggr_rep(data=mus.lis.nor$bulk, assay.na='logcounts', sam.factor='sample', aggr='mean') # Filter bulk blk.mus.fil <- filter_data(data=blk.mus.aggr, pOA=c(0.1, 1), CV=c(0.1, 50), verbose=FALSE) # Filter cell and subset bulk to genes in cell blk.sc.mus.fil <- filter_cell(sce=mus.lis.nor$cell, bulk=blk.mus.fil, cutoff=1, p.in.cell=0.1, p.in.gen=0.01, verbose=FALSE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ tail(attribute(svg.mus.brain)[[1]])[1:3, 1:4] # Partial features are shown. ## ----eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'--------------------- #coclus.mus <- read_cache(cache.pa, 'coclus.mus') #if (is.null(coclus.mus)) { coclus.mus <- cocluster(bulk=blk.sc.mus.fil$bulk, cell=blk.sc.mus.fil$cell, min.dim=11, dimred='PCA', graph.meth='knn', cluster='fg') # save_cache(dir=cache.pa, overwrite=TRUE, coclus.mus) #} ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(coclus.mus) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- coclus.mus <- filter_asg(coclus.mus, min.sim=0.1) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Embedding plot of co-clusters. Large and small circles refer to bulk tissues and single cells respectively. '), out.width="80%", fig.show='show'---- plot_dim(coclus.mus, dim='PCA', color.by='cluster', cocluster.only=TRUE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ # Separate single cell data. coclus.sc <- subset(coclus.mus, , bulkCell=='cell') # Summarize expression values in each cell group. sc.aggr.coclus <- aggr_rep(data=coclus.sc, assay.na='logcounts', sam.factor='assignedBulk', aggr='mean') colData(sc.aggr.coclus) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization through cell-to-bulk mapping in automated method. This plot is created on gene Adcy1. Colors between the embedding plot and spatial heatmap indicate matching of cells with bulk tissues. '), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=sc.aggr.coclus, ID=c('Adcy1'), sce.dimred=coclus.sc, dimred='UMAP', tar.cell=c('hippocampus', 'hypothalamus', 'cerebellum', 'cerebral.cortex'), dim.lgd.text.size=10, dim.lgd.nrow=2, bar.width=0.1, legend.nrow=4) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ coclus.blk <- subset(coclus.mus, , bulkCell=='bulk') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colnames(coclus.blk) %in% attribute(svg.mus.brain)[[1]]$feature ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization through bulk-to-cell mapping in automated method. This plot is created on gene Adcy1. Colors between the embedding plot and spatial heatmap indicate matching of cells with bulk tissues.'), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=coclus.blk, ID=c('Adcy1'), sce.dimred=coclus.sc, dimred='UMAP', tar.bulk=colnames(coclus.blk), assay.na='logcounts', legend.nrow=4, dim.lgd.text.size=10, dim.lgd.nrow=2, bar.width=0.08) ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization without expression values in automated method. Colors between the embedding plot and spatial heatmap only indicate matching of cells with bulk tissues.'), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=coclus.blk, ID=c('Adcy1'), sce.dimred=coclus.sc, dimred='UMAP', profile=FALSE, assay.na='logcounts', legend.nrow=4, dim.lgd.text.size=10, dim.lgd.nrow=2, bar.width=0.1) ## ----echo=FALSE, fig.wide=TRUE, out.width="100%", fig.cap=('Screenshot of the co-visualization output in Shiny app. The co-visualization plot is generated by the automatic method.')---- include_graphics('img/shiny_coviz.png') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- shiny.dat.pa <- system.file("extdata/shinyApp/example", "shiny_covis_bulk_cell_mouse_brain.rds", package="spatialHeatmap") shiny.dat <- readRDS(shiny.dat.pa) colData(shiny.dat) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('UMAP embedding plot of mouse brain bulk tissues and single cells. Bulk tissues and single cells are indicated by large and small circles respectively. '), out.width="100%", fig.show='show'---- plot_dim(coclus.mus, dim='UMAP', color.by='sample', x.break=seq(-10, 10, 1), y.break=seq(-10, 10, 1), panel.grid=TRUE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.desired.bulk <- data.frame(x.min=c(6), x.max=c(8), y.min=c(-1), y.max=c(0.5), desiredBulk=c('hippocampus'), dimred='UMAP') df.desired.bulk ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'-------------------- # Incorporate desired bulk coclus.mus.tailor <- refine_asg(sce.all=coclus.mus, df.desired.bulk=df.desired.bulk) # Separate cells from bulk coclus.sc.tailor <- subset(coclus.mus.tailor, , bulkCell=='cell') ## ----eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualization through bulk-to-cell mapping after tailoring. This plot is created on gene Adcy1. Only hippocampus cells and tissue are shown to display the tailoring result. '), out.width="100%", fig.show='show', results='hide'---- covis(svg=svg.mus.brain, data=coclus.blk, ID=c('Adcy1'), sce.dimred=coclus.sc.tailor, dimred='UMAP', tar.bulk=c('hippocampus'), assay.na='logcounts', legend.nrow=4, dim.lgd.text.size=10, dim.lgd.nrow=2, bar.width=0.08) ## ----tailorShiny, echo=FALSE, fig.wide=TRUE, out.width="100%", fig.cap=('Screenshot of the Shiny app for selecting desired bulk tissues. On the left is the embedding plot of single cells, where target cells are selected with the "Lasso Select" tool. On the right, desired bulk tissues are assigned for selected cell.')---- include_graphics('img/assign_bulk.png') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- desired.blk.pa <- system.file("extdata/shinyApp/example", "selected_cells_with_desired_bulk.txt", package="spatialHeatmap") df.desired.bulk <- read.table(desired.blk.pa, header=TRUE, row.names=1, sep='\t') df.desired.bulk[1:3, ] ## ----eval=TRUE, echo=TRUE----------------------------------------------------- sessionInfo()