## ----global_options, include=FALSE-------------------------------------------- ## ThG: chunk added to enable global knitr options. The below turns on ## caching for faster vignette re-build during text editing. knitr::opts_chunk$set(cache=TRUE) ## ----css, echo = FALSE, results = 'asis'-------------------------------------- BiocStyle::markdown(css.files=c('file/custom.css')) ## ----setup0, eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE-------------- library(knitr); opts_chunk$set(message=FALSE, warning=FALSE) ## ----illus, echo=FALSE, fig.wide=TRUE, out.width="100%", fig.cap=("Overview of spatialHeatmap. (A) The _saptialHeatmap_ package plots numeric assay data onto spatially annotated images. A wide range of omics technologies is supported including genomic, transcriptomic, proteomic and metabolomic profiling data. The assay data can be provided as numeric vectors, tabular data, or _SummarizedExperiment_ objects. The latter is a widely used data container for organizing both assay data as well as associated annotation and experimental design data. (B) Anatomical and other spatial images need to be provided as annotated SVG (aSVG) files where the spatial features and the corresponding data components of the assay data have matching labels (_e.g._ tissue labels). The assay data are used to color the matching spatial features in aSVG images according to a color key. The result is called a spatial heatmap (SHM). In the regular SHM (C), the feature profiles may or may not be contrasting, while in the enriched SHM (D) there are clear contrasting profiles across features. (E) Data mining graphics, such as matrix heatmaps and network graphs, are integrated to facilitate the identification of factors with similar assay profiles. The functionalities of _spatialHeatmap_ can be accessed from local computers via the R console or a graphical user interface based on Shiny. In addition, the latter can be deployed as a web service on custom servers or cloud-based systems.")---- include_graphics('img/graphical_overview_shm.png') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, results='hide'-------------------- library(spatialHeatmap); library(SummarizedExperiment); library(ExpressionAtlas); library(GEOquery); library(scran) library(scater); library(igraph); library(SingleCellExperiment) library(BiocParallel) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # browseVignettes('spatialHeatmap') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.dir <- system.file("extdata/shinyApp/example", package="spatialHeatmap") svg.hum <- system.file("extdata/shinyApp/example", 'homo_sapiens.brain.svg', package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('lobe'), species=c('homo sapiens'), remote=NULL, dir=svg.dir) feature.df fnames <- feature.df[, 1] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- my_vec <- sample(1:100, length(unique(fnames))+1) names(my_vec) <- c(unique(fnames), 'notMapped') my_vec ## ----toyshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("SHM of human brain with toy data. The plots from left to right represent: color key, SHM and legend. The colors in the first two plots depict the user provided numeric values, whereas in the legend plot they are used to map the feature labels to the corresponding spatial regions in the image. "), out.width="100%"---- shm.lis <- spatial_hm(svg.path=svg.hum, data=my_vec, ID='toy', ncol=1, height=0.9, width=0.8, sub.title.size=20, legend.nrow=2, ft.trans=c('g4320')) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- # The SHM, mapped features, and feature attributes are stored in a list names(shm.lis) # Mapped features shm.lis[['mapped_feature']] # Feature attributes shm.lis[['feature_attribute']][1:3, ] ## ----eval=TRUE, echo=TRUE, message=FALSE, warnings=FALSE---------------------- cache.pa <- '~/.cache/shm' # The path of cache. all.hum <- read_cache(cache.pa, 'all.hum') # Retrieve data from cache. if (is.null(all.hum)) { # Save downloaded data to cache if it is not cached. all.hum <- searchAtlasExperiments(properties="cerebellum", species="Homo sapiens") save_cache(dir=cache.pa, overwrite=TRUE, all.hum) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- all.hum[2, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rse.hum <- read_cache(cache.pa, 'rse.hum') # Read data from cache. if (is.null(rse.hum)) { # Save downloaded data to cache if it is not cached. rse.hum <- getAtlasData('E-GEOD-67196')[[1]][[1]] save_cache(dir=cache.pa, overwrite=TRUE, rse.hum) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.hum)[1:5, 1:5] ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # # Remote aSVG repos. # data(aSVG.remote.repo) # tmp.dir <- normalizePath(tempdir(check=TRUE), winslash="/", mustWork=FALSE) # tmp.dir.ebi <- paste0(tmp.dir, '/ebi.zip') # tmp.dir.shm <- paste0(tmp.dir, '/shm.zip') # # Download the remote aSVG repos as zip files. # download.file(aSVG.remote.repo$ebi, tmp.dir.ebi) # download.file(aSVG.remote.repo$shm, tmp.dir.shm) # remote <- list(tmp.dir.ebi, tmp.dir.shm) ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # tmp.dir.shm <- paste0(normalizePath(tempdir(check=TRUE), winslash="/", mustWork=FALSE), '/shm') # Create empty directory # feature.df <- return_feature(feature=c('frontal cortex', 'cerebellum'), species=c('homo sapiens', 'brain'), keywords.any=TRUE, return.all=FALSE, dir=tmp.dir.shm, remote=remote, match.only=TRUE, desc=FALSE) # Query aSVGs # feature.df[1:8, ] # Return first 8 rows for checking # unique(feature.df$SVG) # Return all matching aSVGs ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('frontal cortex', 'cerebellum'), species=c('homo sapiens', 'brain'), keywords.any=TRUE, return.all=FALSE, dir=svg.dir, remote=NULL) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- hum.tar <- system.file('extdata/shinyApp/example/target_human.txt', package='spatialHeatmap') target.hum <- read.table(hum.tar, header=TRUE, row.names=1, sep='\t') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.hum) <- DataFrame(target.hum) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.hum)[c(1:3, 41:42), 4:5] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.nor.hum <- norm_data(data=rse.hum, norm.fun='ESF', log2.trans=TRUE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.aggr.hum <- aggr_rep(data=se.nor.hum, sam.factor='organism_part', con.factor='disease', aggr='mean') assay(se.aggr.hum)[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.fil.hum <- filter_data(data=se.aggr.hum, sam.factor='organism_part', con.factor='disease', pOA=c(0.01, 5), CV=c(0.3, 100), dir=NULL) ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # assay(se.fil.hum)[c(5, 733:734), ] ## ----humtab, eval=TRUE, echo=FALSE, warnings=FALSE---------------------------- cna <- c("cerebellum\\_\\_ALS", "frontal.cortex\\_\\_ALS", "cerebellum\\_\\_normal", "frontal.cortex\\_\\_normal") kable(assay(se.fil.hum)[c(5, 733:734), ], caption='Slice of fully preprocessed expression matrix.', col.names=cna, escape=TRUE) ## ----humshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("SHM of human brain. Only cerebellum and frontal cortex are colored, because they are present in both the aSVG and the expression data. The legend plot on the right maps the feature labels to the corresponding spatial regions in the image."), out.width="100%", fig.show='show'---- shm.lis <- spatial_hm(svg.path=svg.hum, data=se.fil.hum, ID=c('ENSG00000268433'), height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=2, ft.trans=c('g4320')) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- names(shm.lis) # All slots. shm.lis[['mapped_feature']] # Mapped features. shm.lis[['feature_attribute']][1:3, ] # Feature attributes. ## ----mul, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("SHMs of two genes. The subplots are organized by \"condition\" with the `lay.shm='con'` setting."), out.width="100%"---- spatial_hm(svg.path=svg.hum, data=se.fil.hum, ID=c('ENSG00000268433', 'ENSG00000006047'), lay.shm='con', width=0.8, height=1, legend.r=1.5, legend.nrow=2, ft.trans=c('g4320')) ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # tmp.dir.shm <- paste0(normalizePath(tempdir(check=TRUE), winslash="/"), '/shm') # spatial_hm(svg.path=svg.hum, data=se.fil.hum, ID=c('ENSG00000268433', 'ENSG00000006047'), lay.shm='con', width=0.8, height=1, legend.r=1.5, legend.nrow=2, out.dir=tmp.dir.shm, ft.trans=c('g4320')) ## ----arg, eval=TRUE, echo=FALSE, warnings=FALSE------------------------------- arg.df <- read.table('file/spatial_hm_arg.txt', header=TRUE, row.names=1, sep='\t') kable((arg.df), escape=TRUE, caption="List of important argumnets of \'spatial_hm\'.") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- all.mus <- read_cache(cache.pa, 'all.mus') # Retrieve data from cache. if (is.null(all.mus)) { # Save downloaded data to cache if it is not cached. all.mus <- searchAtlasExperiments(properties="heart", species="Mus musculus") save_cache(dir=cache.pa, overwrite=TRUE, all.mus) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- all.mus[7, ] rse.mus <- read_cache(cache.pa, 'rse.mus') # Read data from cache. if (is.null(rse.mus)) { # Save downloaded data to cache if it is not cached. rse.mus <- getAtlasData('E-MTAB-2801')[[1]][[1]] save_cache(dir=cache.pa, overwrite=TRUE, rse.mus) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.mus)[1:3, ] ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # tmp.dir.shm <- paste0(normalizePath(tempdir(check=TRUE), winslash="/", mustWork=FALSE), '/shm') # feature.df <- return_feature(feature=c('heart', 'kidney'), species=c('Mus musculus'), keywords.any=TRUE, return.all=FALSE, dir=tmp.dir.shm, remote=remote, match.only=FALSE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('heart', 'kidney'), species=NULL, keywords.any=TRUE, return.all=FALSE, dir=svg.dir, remote=NULL, match.only=FALSE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(feature.df$SVG) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- subset(feature.df, SVG=='mus_musculus.male.svg') feature.df[1:3, ] unique(feature.df[, 1]) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.mus <- system.file("extdata/shinyApp/example", "mus_musculus.male.svg", package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- mus.tar <- system.file('extdata/shinyApp/example/target_mouse.txt', package='spatialHeatmap') target.mus <- read.table(mus.tar, header=TRUE, row.names=1, sep='\t') target.mus[1:3, ] unique(target.mus[, 3]) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.mus) <- DataFrame(target.mus) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.nor.mus <- norm_data(data=rse.mus, norm.fun='ESF', log2.trans=TRUE) # Normalization se.aggr.mus <- aggr_rep(data=se.nor.mus, sam.factor='organism_part', con.factor='strain', aggr='mean') # Aggregation of replicates se.fil.mus <- filter_data(data=se.aggr.mus, sam.factor='organism_part', con.factor='strain', pOA=c(0.01, 5), CV=c(0.6, 100), dir=NULL) # Filtering of genes with low counts and variance ## ----musshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("SHM of mouse organs. This is a multiple-layer image where the shapes of the 'skeletal muscle' is set transparent to expose 'lung' and 'heart'."), out.width="100%"---- shm.lis <- spatial_hm(svg.path=svg.mus, data=se.fil.mus, ID=c('ENSMUSG00000000263'), height=0.7, legend.width=0.7, legend.text.size=10, sub.title.size=9, ncol=3, ft.trans=c('skeletal muscle', 'path4204'), legend.nrow=4, line.size=0.2, line.color='grey70') ## ---- musshm1, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("SHM of mouse organs. This is a multiple-layer image where the view onto 'lung' and 'heart' is obstructed by displaying the 'skeletal muscle' tissue."), out.width="100%", fig.show='show'---- spatial_hm(svg.path=svg.mus, data=se.fil.mus, ID=c('ENSMUSG00000000263'), height=0.6, legend.text.size=10, sub.title.size=9, ncol=3, legend.ncol=2, line.size=0.1, line.color='grey70', ft.trans='path4204') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- all.chk <- read_cache(cache.pa, 'all.chk') # Retrieve data from cache. if (is.null(all.chk)) { # Save downloaded data to cache if it is not cached. all.chk <- searchAtlasExperiments(properties="heart", species="gallus") save_cache(dir=cache.pa, overwrite=TRUE, all.chk) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- all.chk[3, ] rse.chk <- read_cache(cache.pa, 'rse.chk') # Read data from cache. if (is.null(rse.chk)) { # Save downloaded data to cache if it is not cached. rse.chk <- getAtlasData('E-MTAB-6769')[[1]][[1]] save_cache(dir=cache.pa, overwrite=TRUE, rse.chk) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.chk)[1:3, ] ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # tmp.dir.shm <- paste0(normalizePath(tempdir(check=TRUE), winslash="/", mustWork=FALSE), '/shm') # # Query aSVGs. # feature.df <- return_feature(feature=c('heart', 'kidney'), species=c('gallus'), keywords.any=TRUE, return.all=FALSE, dir=tmp.dir.shm, remote=remote, match.only=FALSE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('heart', 'kidney'), species=c('gallus'), keywords.any=TRUE, return.all=FALSE, dir=svg.dir, remote=NULL, match.only=FALSE) feature.df[1:3, ] # A slice of the features. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.chk <- system.file("extdata/shinyApp/example", "gallus_gallus.svg", package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- chk.tar <- system.file('extdata/shinyApp/example/target_chicken.txt', package='spatialHeatmap') target.chk <- read.table(chk.tar, header=TRUE, row.names=1, sep='\t') target.chk[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(rse.chk) <- DataFrame(target.chk) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(colData(rse.chk)[, 'organism_part']) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(colData(rse.chk)[, 'age']) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.nor.chk <- norm_data(data=rse.chk, norm.fun='ESF', log2.trans=TRUE) # Normalization se.aggr.chk <- aggr_rep(data=se.nor.chk, sam.factor='organism_part', con.factor='age', aggr='mean') # Replicate agggregation using mean se.fil.chk <- filter_data(data=se.aggr.chk, sam.factor='organism_part', con.factor='age', pOA=c(0.01, 5), CV=c(0.6, 100), dir=NULL) # Filtering of genes with low counts and varince ## ----chkshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("Time course of chicken organs. The SHM shows the expression profile of a single gene across nine time points and four organs."), out.width="100%"---- spatial_hm(svg.path=svg.chk, data=se.fil.chk, ID='ENSGALG00000006346', width=0.9, legend.width=0.9, legend.r=1.5, sub.title.size=9, ncol=3, legend.nrow=2, label=TRUE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- gset <- read_cache(cache.pa, 'gset') # Retrieve data from cache. if (is.null(gset)) { # Save downloaded data to cache if it is not cached. gset <- getGEO("GSE14502", GSEMatrix=TRUE, getGPL=TRUE)[[1]] save_cache(dir=cache.pa, overwrite=TRUE, gset) } se.sh <- as(gset, "SummarizedExperiment") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rownames(se.sh) <- make.names(rowData(se.sh)[, 'Gene.Symbol']) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(se.sh)[60:63, 1:4] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('pGL2', 'pRBCS'), species=c('shoot'), keywords.any=TRUE, return.all=FALSE, dir=svg.dir, remote=NULL, match.only=FALSE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(feature.df$SVG) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- subset(feature.df, SVG=='arabidopsis.thaliana_shoot_shm.svg') feature.df[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.sh <- system.file("extdata/shinyApp/example", "arabidopsis.thaliana_shoot_shm.svg", package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sh.tar <- system.file('extdata/shinyApp/example/target_arab.txt', package='spatialHeatmap') target.sh <- read.table(sh.tar, header=TRUE, row.names=1, sep='\t') target.sh[60:63, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(target.sh[, 'samples']) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(target.sh[, 'conditions']) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- colData(se.sh) <- DataFrame(target.sh) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.aggr.sh <- aggr_rep(data=se.sh, sam.factor='samples', con.factor='conditions', aggr='mean') # Replicate agggregation using mean se.fil.arab <- filter_data(data=se.aggr.sh, sam.factor='samples', con.factor='conditions', pOA=c(0.03, 6), CV=c(0.30, 100), dir=NULL) # Filtering of genes with low intensities and variance ## ----shshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('SHM of Arabidopsis shoots. The expression profile of the HRE2 gene is plotted for control and hypoxia treatment across six cell types.'), out.width="100%"---- spatial_hm(svg.path=svg.sh, data=se.fil.arab, ID=c("HRE2"), height=0.7, legend.nrow=3, legend.text.size=11) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rse.clp <- read_cache(cache.pa, 'rse.clp') # Retrieve data from cache. if (is.null(rse.clp)) { # Save downloaded data to cache if it is not cached. rse.clp <- getAtlasData('E-GEOD-115371')[[1]][[1]] save_cache(dir=cache.pa, overwrite=TRUE, rse.clp) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- clp.tar <- system.file('extdata/shinyApp/example/target_coleoptile.txt', package='spatialHeatmap') target.clp <- read_fr(clp.tar) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- cdat <- colData(rse.clp) # Original targets file. unique(cdat$organism_part) # Original tissues. cdat <- edit_tar(cdat, column='organism_part', old=c('plant embryo', 'plant embryo coleoptile'), new=c('embryo', 'embryoColeoptile')) # Replace old entries with desired ones. unique(cdat$organism_part) # New tissue entries. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- target.clp[1:3, c(6, 7, 9, 10)] # A slice of the targets file. unique(target.clp[, 'age']) # All development stages. unique(target.clp[, 'organism_part']) # All tissues. unique(target.clp[, 'stimulus']) # All conditions. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rse.clp <- com_factor(rse.clp, target.clp, factors2com=c('organism_part', 'age', 'con'), sep='.', factor.new='samTimeCon') colData(rse.clp)[1:3, c(6, 7, 9:11)] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- target.clp <- colData(rse.clp) unique(target.clp$samTimeCon) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('embryo.0h.A', 'embryoColeoptile.1h.A'), species=c('oryza', 'sativa'), keywords.any=FALSE, return.all=FALSE, dir=svg.dir, remote=NULL, match.only=FALSE) feature.df[1:2, ] # The first two rows of the query results. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(feature.df$SVG) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(target.clp$samTimeCon) %in% unique(feature.df$feature) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.clp1 <- system.file("extdata/shinyApp/example", "oryza.sativa_coleoptile.ANT_shm.svg", package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.nor.clp <- norm_data(data=rse.clp, norm.fun='ESF', log2.trans=TRUE) # Normalization ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.aggr.clp1 <- aggr_rep(data=se.nor.clp, sam.factor='samTimeCon', con.factor=NULL, aggr='mean') # Replicate agggregation using mean se.fil.clp1 <- filter_data(data=se.aggr.clp1, sam.factor='samTimeCon', con.factor=NULL, pOA=c(0.07, 7), CV=c(0.7, 100), dir=NULL) # Filtering of genes with low counts and varince. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- assay(se.fil.clp1)[1:3, 1:3] # A slice of the resulting data table. ## ----clpshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("Spatiotemporal heatmap at sample-time-condition factor. Gene expression profile of two genes in coleoptile across eight time points under anoxia and re-oxygenation is visualized in a composite image."), out.width="100%"---- shm.lis <- spatial_hm(svg.path=svg.clp1, data=se.fil.clp1, ID=c('Os12g0630200', 'Os01g0106300'), legend.r=0.7, legend.key.size=0.01, legend.text.size=8, legend.nrow=8, ncol=1, width=0.8, line.size=0) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rse.clp <- read_cache(cache.pa, 'rse.clp') # Retrieve data from cache. if (is.null(rse.clp)) { # Save downloaded data to cache if it is not cached. rse.clp <- getAtlasData('E-GEOD-115371')[[1]][[1]] save_cache(dir=cache.pa, overwrite=TRUE, rse.clp) } ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- clp.tar <- system.file('extdata/shinyApp/example/target_coleoptile.txt', package='spatialHeatmap') target.clp <- read_fr(clp.tar) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- target.clp[1:3, c(6, 7, 9, 10)] # A slice of the targets file. unique(target.clp[, 'age']) # All development stages. unique(target.clp[, 'organism_part']) # All tissues. unique(target.clp[, 'stimulus']) # All conditions. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rse.clp <- com_factor(rse.clp, target.clp, factors2com=c('organism_part', 'age'), factor.new='samTime') target.clp <- colData(rse.clp) target.clp[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- feature.df <- return_feature(feature=c('embryo.0h', 'embryoColeoptile1h'), species=c('oryza', 'sativa'), keywords.any=FALSE, return.all=FALSE, dir=svg.dir, remote=NULL, match.only=FALSE) feature.df[1:2, ] # The first two rows of the query results. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(feature.df$SVG) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(target.clp$samTime) %in% unique(feature.df$feature) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.clp2 <- system.file("extdata/shinyApp/example", "oryza.sativa_coleoptile.NT_shm.svg", package="spatialHeatmap") ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.nor.clp <- norm_data(data=rse.clp, norm.fun='ESF', log2.trans=TRUE) # Normalization ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.aggr.clp2 <- aggr_rep(data=se.nor.clp, sam.factor='samTime', con.factor='stimulus', aggr='mean') # Replicate agggregation using mean. se.fil.clp2 <- filter_data(data=se.aggr.clp2, sam.factor='samTime', con.factor='stimulus', pOA=c(0.07, 7), CV=c(0.7, 100), dir=NULL) # Filtering of genes with low counts and varince. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.fil.clp <- assay(se.fil.clp2) df.fil.clp[1:3, 1:3] # A slice of the resulting data table. ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.fil.clp1 <- df.fil.clp[, !grepl('__aerobic', colnames(df.fil.clp))] # Exclude aerobic data. df.fil.clp1[1:3, 1:3] # A slice of the data table without aerobic data. ## ----clpshm1, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=("Spatiotemporal heatmap at sample-time factor. Gene expression profile of one gene in coleoptile across eight time points under anoxia and re-oxygenation is visualized in two images."), out.width="100%", fig.show='show'---- shm.lis <- spatial_hm(svg.path=svg.clp2, data=df.fil.clp1, ID=c('Os12g0630200'), legend.r=0.9, legend.key.size=0.02, legend.text.size=9, legend.nrow=8, ncol=1, line.size=0) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.random <- data.frame(matrix(sample(x=1:100, size=50, replace=TRUE), nrow=10)) colnames(df.random) <- c('shoot_totalA__condition1', 'shoot_totalA__condition2', 'shoot_totalB__condition1', 'shoot_totalB__condition2', 'notMapped') # Assign column names rownames(df.random) <- paste0('gene', 1:10) # Assign row names df.random[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- svg.sh1 <- system.file("extdata/shinyApp/example", "arabidopsis.thaliana_organ_shm1.svg", package="spatialHeatmap") svg.sh2 <- system.file("extdata/shinyApp/example", "arabidopsis.thaliana_organ_shm2.svg", package="spatialHeatmap") ## ----arabshm, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Spatial heatmap of Arabidopsis at two growth stages. The expression profile of gene1 under condition1 and condition2 is plotted for two growth stages (top and bottom row).'), out.width="100%", fig.show='show'---- spatial_hm(svg.path=c(svg.sh1, svg.sh2), data=df.random, ID=c('gene1'), width=0.7, legend.r=0.2, legend.width=1, preserve.scale=TRUE, bar.width=0.09) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- tmp.pa1 <- system.file('extdata/shinyApp/example/maize_leaf_shm1.png', package='spatialHeatmap') svg.pa1 <- system.file('extdata/shinyApp/example/maize_leaf_shm1.svg', package='spatialHeatmap') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- tmp.pa2 <- system.file('extdata/shinyApp/example/maize_leaf_shm2.png', package='spatialHeatmap') svg.pa2 <- system.file('extdata/shinyApp/example/maize_leaf_shm2.svg', package='spatialHeatmap') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- dat.overlay <- read_fr(system.file('extdata/shinyApp/example/dat_overlay.txt', package='spatialHeatmap')) dat.overlay[1:2, ] ## ----overCol, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Overlaying real images with SHMs (colorful backaground). The expression profiles of gene1 are plotted on ABS cells.'), out.width="100%", fig.show='show'---- spatial_hm(svg.path=c(svg.pa1, svg.pa2), data=dat.overlay, tmp.path=c(tmp.pa1, tmp.pa2), charcoal=FALSE, ID=c('gene1'), alpha.overlay=0.5, bar.width=0.09, sub.title.vjust=4) ## ----overChar, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Overlaying real images with SHMs (black-white background). The expression profiles of gene1 are plotted on ABS cells.'), out.width="100%", fig.show='show'---- spatial_hm(svg.path=c(svg.pa1, svg.pa2), data=dat.overlay, tmp.path=c(tmp.pa1, tmp.pa2), charcoal=TRUE, ID=c('gene1'), alpha.overlay=0.5, bar.width=0.09, sub.title.vjust=4) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sub.mat <- submatrix(data=se.fil.arab, ann='Target.Description', ID=c('RCA', 'HRE2'), p=0.1) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sub.mat[c('RCA', 'HRE2'), c(1:3, 37)] # Subsetted assay matrix ## ----static, eval=TRUE, echo=TRUE, warnings=FALSE, fig.cap=("Matrix Heatmap. Rows are genes and columns are samples. The input genes are tagged by black lines."), out.width='100%'---- matrix_hm(ID=c('RCA', 'HRE2'), data=sub.mat, angleCol=80, angleRow=35, cexRow=0.8, cexCol=0.8, margin=c(10, 6), static=TRUE, arg.lis1=list(offsetRow=0.01, offsetCol=0.01)) ## ----eval=TRUE, echo=TRUE, warnings=FALSE, results=FALSE---------------------- adj.mod <- adj_mod(data=sub.mat) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- adj.mod[['adj']][1:3, 1:3] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- adj.mod[['mod']][1:3, ] ## ----inter, eval=TRUE, echo=TRUE, warnings=FALSE, fig.cap=("Static network. Node size denotes gene connectivity while edge thickness stands for co-expression similarity.")---- network(ID="HRE2", data=sub.mat, adj.mod=adj.mod, adj.min=0.90, vertex.label.cex=1.2, vertex.cex=2, static=TRUE) ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # network(ID="HRE2", data=sub.mat, adj.mod=adj.mod, static=FALSE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(colData(rse.mus)$organism_part) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- unique(colData(rse.mus)$strain) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- data.sub.mus <- sub_data(data=rse.mus, feature='organism_part', features=c('brain', 'liver', 'kidney'), factor='strain', factors=c('DBA.2J', 'C57BL.6', 'CD1'), com.by='feature', target='brain') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- data.sub.mus.fil <- filter_data(data=data.sub.mus, sam.factor='organism_part', con.factor='strain', pOA=c(0.2, 15), CV=c(0.8, 100)) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- deg.lis.mus <- spatial_enrich(data.sub.mus.fil, methods=c('edgeR', 'limma'), log2.fc=1, fdr=0.05, aggr='mean', log2.trans.aggr=TRUE) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- deg.lis.mus$lis.up.down$up.lis$edgeR.up[1:3] # Up-regulated. deg.lis.mus$lis.up.down$down.lis$edgeR.down[1:3] # Down-regulated. ## ----upset, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('UpSet plot of up-regulated genes in mouse brain. The overlap of up-regulated genes detected by edgeR and limma is indicated by bars.'), out.width="100%", fig.show='show'---- deg_ovl(deg.lis.mus$lis.up.down, type='up', plot='upset') ## ----matrix, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Matrix plot of up-regulated genes in mouse brain. The matrix plot displays any overlap between up-regulated genes detected by edgeR and limma.'), out.width="70%", fig.show='show'---- deg_ovl(deg.lis.mus$lis.up.down, type='up', plot='matrix') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- deg.table.mus <- deg.lis.mus$deg.table; deg.table.mus[1:2, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.up.mus <- subset(deg.table.mus, type=='up' & total==2) df.up.mus[1:2, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.down.mus <- subset(deg.table.mus, type=='down' & total==2) df.down.mus[1:2, ] ## ----enrich, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Spatially-enriched mouse genes. The two genes in the image are enriched in the mouse brain relative to kidney and liver. Top: down-regulated in brain. Bottom: up-regulated in brain.'), out.width="100%", fig.show='show'---- spatial_hm(svg.path=svg.mus, data=deg.lis.mus$deg.table, ID=c('ENSMUSG00000026764', 'ENSMUSG00000025479'), legend.r=1, legend.nrow=3, sub.title.size=6.1, ncol=3, bar.width=0.11) ## ----prof, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Line graph of gene expression profiles. The count data is normalized and replicates are aggregated.'), out.width="100%", fig.show='show'---- profile_gene(rbind(df.up.mus[1, ], df.down.mus[1, ])) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ----scRead, eval=TRUE, echo=TRUE, warnings=FALSE----------------------------- sce.manual.pa <- system.file("extdata/shinyApp/example", "sce_manual_mouse.rds", package="spatialHeatmap") sce.manual <- readRDS(sce.manual.pa) ## ----scQC, eval=TRUE, echo=TRUE, warnings=FALSE------------------------------- stats <- perCellQCMetrics(sce.manual, subsets=list(Mt=rowData(sce.manual)$featureType=='mito'), threshold=1) sub.fields <- 'subsets_Mt_percent' ercc <- 'ERCC' %in% altExpNames(sce.manual) if (ercc) sub.fields <- c('altexps_ERCC_percent', sub.fields) qc <- perCellQCFilters(stats, sub.fields=sub.fields, nmads=3) # Discard unreliable cells. colSums(as.matrix(qc)) sce.manual <- sce.manual[, !qc$discard] ## ----scNorm, eval=TRUE, echo=TRUE, warnings=FALSE----------------------------- clusters <- quickCluster(sce.manual) sce.manual <- computeSumFactors(sce.manual, cluster=clusters) sce.manual <- logNormCounts(sce.manual) ## ----scDimred, eval=TRUE, echo=TRUE, warnings=FALSE--------------------------- # Variance modelling. df.var <- modelGeneVar(sce.manual) top.hvgs <- getTopHVGs(df.var, prop = 0.1, n = 3000) # Dimensionality reduction. sce.manual <- denoisePCA(sce.manual, technical=df.var, subset.row=top.hvgs) sce.manual <- runTSNE(sce.manual, dimred="PCA") sce.manual <- runUMAP(sce.manual, dimred = "PCA") ## ----scClus, eval=TRUE, echo=TRUE, warnings=FALSE----------------------------- # Build graph. snn.gr <- buildSNNGraph(sce.manual, use.dimred="PCA") # Partition graph to detect cell clusters. cluster <- paste0('clus', cluster_walktrap(snn.gr)$membership) table(cluster) ## ----scCdat, eval=TRUE, echo=TRUE, warnings=FALSE----------------------------- cdat <- colData(sce.manual) lab.lgc <- 'label' %in% make.names(colnames(cdat)) if (lab.lgc) { cdat <- cbind(cluster=cluster, colData(sce.manual)) idx <- colnames(cdat) %in% c('cluster', 'label') cdat <- cdat[, c(which(idx), which(!idx))] } else cdat <- cbind(cluster=cluster, colData(sce.manual)) colnames(cdat) <- make.names(colnames(cdat)) colData(sce.manual) <- cdat; cdat[1:3, ] ## ----scDim, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Embedding plot single cells. The cell clusters are defined by the `label` column in `colData`.'), out.width="100%", fig.show='show'---- plotUMAP(sce.manual, colour_by="label") ## ----scSVG, eval=TRUE, echo=TRUE, warnings=FALSE------------------------------ svg.mus <- system.file("extdata/shinyApp/example", "mus_musculus.brain.svg", package="spatialHeatmap") # Spatial features to match with single cell clusters. feature.df <- return_feature(svg.path=svg.mus) feature.df$feature ## ----scLab, eval=TRUE, echo=TRUE, warnings=FALSE------------------------------ unique(colData(sce.manual)$label) ## ----scLabAggr, eval=TRUE, echo=TRUE, warnings=FALSE-------------------------- sce.manual.aggr <- aggr_rep(sce.manual, assay.na='logcounts', sam.factor='label', con.factor='expVar', aggr='mean') ## ----scLabList, eval=TRUE, echo=TRUE, warnings=FALSE-------------------------- lis.match <- list(hypothalamus=c('hypothalamus'), cortex.S1=c('cerebral.cortex')) ## ----scLabVis, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualizing single cells and bulk tissues by custom cell clusters. The expression profiles of gene `St18` are used.'), out.width="100%", fig.show='show'---- shm.lis <- spatial_hm(svg.path=svg.mus, data=sce.manual.aggr, ID=c('St18'), height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=2, sce.dimred=sce.manual, dimred='PCA', cell.group='label', assay.na='logcounts', tar.cell=c('matched'), lis.rematch=lis.match, bar.width=0.1, dim.lgd.nrow=1) ## ----scAuto, eval=TRUE, echo=TRUE, warnings=FALSE----------------------------- unique(colData(sce.manual)$cluster) ## ----scAutoAggr, eval=TRUE, echo=TRUE, warnings=FALSE------------------------- sce.manual.aggr <- aggr_rep(sce.manual, assay.na='logcounts', sam.factor='cluster', con.factor=NULL, aggr='mean') ## ----scAutoList, eval=TRUE, echo=TRUE, warnings=FALSE------------------------- lis.match <- list(clus1=c('hypothalamus'), clus3=c('cerebral.cortex', 'midbrain')) ## ----scAutoVis, eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualizing single cells and bulk tissues by automatically-detected cell clusters. The expression profiles of gene `St18` are used.'), out.width="100%", fig.show='show'---- shm.lis <- spatial_hm(svg.path=svg.mus, data=sce.manual.aggr, ID=c('St18'), height=0.7, legend.r=1.5, legend.key.size=0.02, legend.text.size=12, legend.nrow=3, sce.dimred=sce.manual, dimred='PCA', cell.group='cluster', assay.na='logcounts', tar.cell=c('matched'), lis.rematch=lis.match, bar.width=0.11, dim.lgd.nrow=1) ## ----coclusOver, echo=FALSE, fig.wide=TRUE, out.width="80%", fig.cap=("Overview of coclustering. The coclustering are illustrated in 9 steps. Basically, bulk and single data are initially filtered, then combined, normalized, and separated. The normalized bulk and single cell data are filtered again. Single cells are clustered and resulting clusters are refined. Subsequently bulk and single cells are combined and co-clustered. In each co-cluster, bulk tissues are assigned to cells. The assignments are evaluated by ROC curves.")---- include_graphics('img/coclustering_overview.png') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ----optRead, eval=TRUE, echo=TRUE, warnings=FALSE---------------------------- blk <- readRDS(system.file("extdata/cocluster/data", "bulk_cocluster.rds", package="spatialHeatmap")) # Bulk. sc10 <- readRDS(system.file("extdata/cocluster/data", "sc10_cocluster.rds", package="spatialHeatmap")) # Single cell. sc11 <- readRDS(system.file("extdata/cocluster/data", "sc11_cocluster.rds", package="spatialHeatmap")) # Single cell. blk; sc10; sc11 ## ----optInitFil, eval=FALSE, echo=TRUE, warnings=FALSE------------------------ # blk <- filter_data(data=blk, pOA=c(0.2, 15), CV=c(1.5, 100)) # fil.init <- filter_cell(lis=list(sc10=sc10, sc11=sc11), bulk=blk, gen.rm='^ATCG|^ATCG', min.cnt=1, p.in.cell=0.3, p.in.gen=0.1); fil.init ## ----optNorm, eval=FALSE, echo=TRUE, warnings=FALSE--------------------------- # norm.fct <- norm_multi(dat.lis=fil.init, cpm=FALSE) # fct. # norm.cpm <- norm_multi(dat.lis=fil.init, cpm=TRUE) # fct + cpm ## ----optFilPar, eval=TRUE, echo=TRUE, warnings=FALSE-------------------------- df.par.fil <- data.frame(p=c(0.1, 0.2, 0.3, 0.4), A=rep(1, 4), cv1=c(0.1, 0.2, 0.3, 0.4), cv2=rep(100, 4), min.cnt=rep(1, 4), p.in.cell=c(0.1, 0.25, 0.3, 0.35), p.in.gen=c(0.01, 0.05, 0.1, 0.15)) df.par.fil ## ----optFil, eval=FALSE, echo=TRUE, warnings=FALSE---------------------------- # if (!dir.exists('opt_res')) dir.create('opt_res') # fct.fil.all <- filter_iter(bulk=norm.fct$bulk, cell.lis=list(sc10=norm.fct$sc10, sc11=norm.fct$sc11), df.par.fil=df.par.fil, gen.rm='^ATCG|^ATCG', wk.dir='opt_res', norm.meth='fct') # # cpm.fil.all <- filter_iter(bulk=norm.cpm$bulk, cell.lis=list(sc10=norm.cpm$sc10, sc11=norm.cpm$sc11), df.par.fil=df.par.fil, gen.rm='^ATCG|^ATCG', wk.dir='opt_res', norm.meth='cpm') ## ----optMatch, eval=TRUE, echo=TRUE, warnings=FALSE--------------------------- match.pa <- system.file("extdata/cocluster/data", "match_arab_root_cocluster.txt", package="spatialHeatmap") df.match.arab <- read.table(match.pa, header=TRUE, row.names=1, sep='\t') df.match.arab[1:3, ] ## ----optGuide, eval=FALSE, echo=TRUE, warnings=FALSE-------------------------- # coclus_opt(wk.dir='opt_res', parallel.info=TRUE, dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.4, by=0.1), sim.p=seq(0.2, 0.4, by=0.1), dim=seq(5, 7, by=1)) ## ----optTmpl, eval=FALSE, echo=TRUE, warnings=FALSE--------------------------- # file.copy(system.file("extdata/cocluster", "slurm.tmpl", package="spatialHeatmap"), './slurm.tmpl') ## ----optPara2, eval=FALSE, echo=TRUE, warnings=FALSE-------------------------- # opt <- coclus_opt(wk.dir='opt_res', dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.4, by=0.1), sim.p=seq(0.2, 0.4, by=0.1), dim=seq(5, 7, by=1), df.match=df.match.arab, batch.par=BatchtoolsParam(workers=3, cluster="slurm", template='slurm.tmpl', RNGseed=100), multi.core.par=MulticoreParam(workers=2), verbose=FALSE) ## ----optPara1, eval=FALSE, echo=TRUE, warnings=FALSE-------------------------- # opt <- coclus_opt(wk.dir='opt_res', dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.4, by=0.1), sim.p=seq(0.2, 0.4, by=0.1), dim=seq(5, 7, by=1), df.match=df.match.arab, batch.par=NULL, multi.core.par=MulticoreParam(workers=2)) ## ----optParFil, eval=FALSE, echo=TRUE, warnings=FALSE------------------------- # df.lis.fil <- auc_stat(wk.dir='opt_res', tar.par='filter', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ----optParFilMean, eval=FALSE, echo=TRUE, warnings=FALSE--------------------- # df.lis.fil$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.fil[[1]], bar.width=0.07, title='Mean AUCs by filtering settings') ## ----optParFilMeanPlot, echo=FALSE, fig.wide=TRUE, out.width="80%", fig.cap=("Mean AUCs of filtering settings. One bar refers to mean AUCs of a filtering settings at a certain AUC cutoff.")---- include_graphics('img/filter_auc_mean.png') ## ----optParFilAll, eval=FALSE, echo=TRUE, warnings=FALSE---------------------- # auc_violin(df.lis=df.lis.fil, xlab='Filtering settings') ## ----optParFilAllPlot, echo=FALSE, fig.wide=TRUE, out.width="80%", fig.cap=("All AUCs of filtering settings. A violin plot refers to all AUCs of a filtering settings at a certain AUC cutoff.")---- include_graphics('img/filter_auc_all.png') ## ----optParFilSel, eval=TRUE, echo=TRUE, warnings=FALSE----------------------- df.par.fil[c(1, 2, 3), ] ## ----optParNor, eval=FALSE, echo=TRUE, warnings=FALSE------------------------- # df.lis.norm <- auc_stat(wk.dir='opt_res', tar.par='norm', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ----optParNorMean, eval=FALSE, echo=TRUE, warnings=FALSE--------------------- # df.lis.norm$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.norm[[1]], bar.width=0.07, title='Mean AUCs by normalization methods') ## ----optParNorAll, eval=FALSE, echo=TRUE, warnings=FALSE---------------------- # auc_violin(df.lis=df.lis.norm, xlab='Normalization methods') ## ----optParGra, eval=FALSE, echo=TRUE, warnings=FALSE------------------------- # df.lis.graph <- auc_stat(wk.dir='opt_res', tar.par='graph', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ----optParGraMean, eval=FALSE, echo=TRUE, warnings=FALSE--------------------- # df.lis.graph$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.graph[[1]], bar.width=0.07, title='Mean AUCs by graph-building methods') ## ----optParGraAll, eval=FALSE, echo=TRUE, warnings=FALSE---------------------- # auc_violin(df.lis=df.lis.graph, xlab='Graph-building methods') ## ----optParDim, eval=FALSE, echo=TRUE, warnings=FALSE------------------------- # df.lis.dimred <- auc_stat(wk.dir='opt_res', tar.par='dimred', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ----optParDimMean, eval=FALSE, echo=TRUE, warnings=FALSE--------------------- # df.lis.dimred$df.auc.mean[1:3, ] # # Mean AUCs by each dimensionality reduction method and AUC cutoff. # mean_auc_bar(df.lis.dimred[[1]], bar.width=0.07, title='Mean AUCs by dimensionality reduction methods') ## ----optParDimAll, eval=FALSE, echo=TRUE, warnings=FALSE---------------------- # auc_violin(df.lis=df.lis.dimred, xlab='Dimensionality reduction') ## ----optParSpd, eval=FALSE, echo=TRUE, warnings=FALSE------------------------- # df.lis.spd <- auc_stat(wk.dir='opt_res', tar.par='spd.set', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) # df.lis.spd$auc0.5$df.frq[1:3, ] ## ----optParSpdTop, eval=FALSE, echo=TRUE, warnings=FALSE---------------------- # spd_auc_violin(df.lis=df.lis.spd, n=5, xlab='spd.sets', x.vjust=0.6) ## ----optParSpdTopFin, eval=FALSE, echo=TRUE, warnings=FALSE------------------- # n <- 5; df.spd.opt <- NULL # for (i in df.lis.spd) { # df.spd.opt <- rbind(df.spd.opt, i$df.frq[seq_len(n), c('sim', 'sim.p', 'dim')]) # } # df.spd.opt$spd.set <- paste0('s', df.spd.opt$sim, 'p', df.spd.opt$sim.p, 'd', df.spd.opt$dim) # df.spd.opt <- subset(df.spd.opt, !duplicated(spd.set)) # df.spd.opt[1:3, ] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # blk.arab.rt <- filter_data(data=blk.arab.rt, pOA=c(0.05, 5), CV=c(0.05, 100)) # fil.init <- filter_cell(lis=list(sc10=sc.arab.rt10, sc11=sc.arab.rt11, sc12=sc.arab.rt12, sc30=sc.arab.rt30, sc31=sc.arab.rt31), bulk=blk, gen.rm='^ATCG|^ATCG', min.cnt=1, p.in.cell=0.01, p.in.gen=0.05); fil.init ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # norm.fct <- norm_multi(dat.lis=fil.init, cpm=FALSE) # fct. # norm.cpm <- norm_multi(dat.lis=fil.init, cpm=TRUE) # fct + cpm ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.par.fil <- data.frame(p=c(0.1, 0.2, 0.3, 0.4), A=rep(1, 4), cv1=c(0.1, 0.2, 0.3, 0.4), cv2=rep(100, 4), min.cnt=rep(1, 4), p.in.cell=c(0.1, 0.25, 0.3, 0.35), p.in.gen=c(0.01, 0.05, 0.1, 0.15)) df.par.fil ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # if (!dir.exists('opt_real_res')) dir.create('opt_real_res') # # fct.fil.all <- filter_iter(bulk=norm.fct$bulk, cell.lis=list(sc10=norm.fct$sc10, sc11=norm.fct$sc11, sc12=norm.fct$sc12, sc30=norm.fct$sc30, sc31=norm.fct$sc31), df.par.fil=df.par.fil, gen.rm='^ATCG|^ATCG', wk.dir='opt_real_res', norm.meth='fct') # # cpm.fil.all <- filter_iter(bulk=norm.cpm$bulk, cell.lis=list(sc10=norm.cpm$sc10, sc11=norm.cpm$sc11, sc12=norm.cpm$sc12, sc30=norm.cpm$sc30, sc31=norm.cpm$sc31), df.par.fil=df.par.fil, gen.rm='^ATCG|^ATCG', wk.dir='opt_real_res', norm.meth='cpm') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ match.pa <- system.file("extdata/cocluster/data", "match_arab_root_cocluster.txt", package="spatialHeatmap") df.match.arab <- read.table(match.pa, header=TRUE, row.names=1, sep='\t') df.match.arab[1:3, ] ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # coclus_opt(wk.dir='opt_real_res', parallel.info=TRUE, dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.8, by=0.1), sim.p=seq(0.2, 0.8, by=0.1), dim=seq(5, 40, by=1)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # file.copy(system.file("extdata/cocluster", "slurm.tmpl", package="spatialHeatmap"), './slurm.tmpl') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # opt <- coclus_opt(wk.dir='opt_real_res', dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.8, by=0.1), sim.p=seq(0.2, 0.8, by=0.1), dim=seq(5, 40, by=1), df.match=df.match.arab, batch.par=BatchtoolsParam(workers=3, cluster="slurm", template='slurm.tmpl', RNGseed=100), multi.core.par=MulticoreParam(workers=2), verbose=FALSE) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # opt <- coclus_opt(wk.dir='opt_real_res', dimred=c('PCA', 'UMAP'), graph.meth=c('knn', 'snn'), sim=seq(0.2, 0.8, by=0.1), sim.p=seq(0.2, 0.8, by=0.1), dim=seq(5, 40, by=1), df.match=df.match.arab, batch.par=NULL, multi.core.par=MulticoreParam(workers=2)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.fil <- auc_stat(wk.dir='opt_real_res', tar.par='filter', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.fil$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.fil[[1]], bar.width=0.07, title='Mean AUCs by filtering settings') ## ---- echo=FALSE, fig.wide=TRUE, out.width="70%", fig.cap=("Mean AUCs of filtering settings in real optimization. One bar refers to mean AUCs of a filtering settings at a certain AUC cutoff.")---- include_graphics('img/real_filter_auc_mean.png') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # auc_violin(df.lis=df.lis.fil, xlab='Filtering settings') ## ---- echo=FALSE, fig.wide=TRUE, out.width="80%", fig.cap=("All AUCs of filtering settings in real optimization. A violin plot refers to all AUCs of a filtering settings at a certain AUC cutoff.")---- include_graphics('img/real_filter_auc_all.png') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.par.fil[c(1, 2), ] ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.norm <- auc_stat(wk.dir='opt_real_res', tar.par='norm', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.norm$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.norm[[1]], bar.width=0.07, title='Mean AUCs by normalization methods') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # auc_violin(df.lis=df.lis.norm, xlab='Normalization methods') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.graph <- auc_stat(wk.dir='opt_real_res', tar.par='graph', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.graph$df.auc.mean[1:3, ] # mean_auc_bar(df.lis.graph[[1]], bar.width=0.07, title='Mean AUCs by graph-building methods') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # auc_violin(df.lis=df.lis.graph, xlab='Graph-building methods') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.dimred <- auc_stat(wk.dir='opt_real_res', tar.par='dimred', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.dimred$df.auc.mean[1:3, ] # # Mean AUCs by each dimensionality reduction method and AUC cutoff. # mean_auc_bar(df.lis.dimred[[1]], bar.width=0.07, title='Mean AUCs by dimensionality reduction methods') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # auc_violin(df.lis=df.lis.dimred, xlab='Dimensionality reduction') ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.lis.spd <- auc_stat(wk.dir='opt_real_res', tar.par='spd.set', total.min=500, true.min=300, aucs=round(seq(0.5, 0.9, 0.1), 1)) # df.lis.spd$auc0.5$df.frq[1:3, ] ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # spd_auc_violin(df.lis=df.lis.spd, n=5, xlab='spd.sets', x.vjust=0.6) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # n <- 5; df.spd.opt <- NULL # for (i in df.lis.spd) { # df.spd.opt <- rbind(df.spd.opt, i$df.frq[seq_len(n), c('sim', 'sim.p', 'dim')]) # } # # df.spd.opt$spd.set <- paste0('s', df.spd.opt$sim, 'p', df.spd.opt$sim.p, 'd', df.spd.opt$dim) # # df.spd.opt <- subset(df.spd.opt, !duplicated(spd.set)) # df.spd.opt[1:3, ] ## ---- eval=TRUE, echo=FALSE, warnings=FALSE----------------------------------- df.spd.opt <- read.table(system.file("extdata/cocluster", "df_spd_opt.txt", package="spatialHeatmap"), header=TRUE, row.names=1, sep='\t') df.spd.opt[1:3, ] ## ----optPar, eval=TRUE, echo=FALSE, warnings=FALSE---------------------------- df.opt <- data.frame(normalization='fct', filtering.set='fil1, fil2', dimensionality.reduction='pca', graph.building='knn, snn', spd.set=df.spd.opt$spd.set) kable(df.opt, caption='Optimized parameter settings on Arabidopsis thaliana root data sets', col.names=colnames(df.opt), row.names=FALSE, escape=TRUE) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # blk.mus.brain <- filter_data(data=blk.mus.brain, pOA=c(0.05, 5), CV=c(0.05, 100)) # mus.brain.lis <- filter_cell(lis=list(sc.mus=sc.mus.brain), bulk=blk.mus.brain, gen.rm=NULL, min.cnt=1, p.in.cell=0.01, p.in.gen=0.05, verbose=FALSE) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # mus.brain.lis.nor <- norm_multi(dat.lis=mus.brain.lis, cpm=FALSE) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # blk.mus.brain.fil <- filter_data(data=mus.brain.lis.nor$bulk, pOA=c(0.1, 1), CV=c(0.1, 100), verbose=FALSE) # mus.brain.lis.fil <- filter_cell(lis=list(sc.mus=mus.brain.lis.nor$sc.mus), bulk=blk.mus.brain.fil, gen.rm=NULL, min.cnt=1, p.in.cell=0.1, p.in.gen=0.01, verbose=FALSE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ match.mus.brain.pa <- system.file("extdata/shinyApp/example", "match_mouse_brain_cocluster.txt", package="spatialHeatmap") df.match.mus.brain <- read.table(match.mus.brain.pa, header=TRUE, row.names=1, sep='\t') df.match.mus.brain ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # mus.brain.df.para <- cocluster(bulk=mus.brain.lis.fil$bulk, cell=mus.brain.lis.fil$sc.mus, df.match=df.match.mus.brain, df.para=df.spd.opt[, c('sim', 'sim.p', 'dim')], graph.meth='knn', dimred='PCA', return.all=FALSE, multi.core.par=MulticoreParam(workers=2)) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # df.par.rdn <- random_para(fil.set=c('fil3', 'fil4'), norm='cpm', dimred='umap', graph.meth=c('knn', 'snn'), sim=round(seq(0.2, 0.8, by=0.1), 1), sim.p=round(seq(0.2, 0.8,by=0.1), 1), dim=seq(5, 40, by=1), df.spd.opt=df.spd.opt) # df.par.rnd[1:3, ] ## ----validateOpt, echo=FALSE, fig.wide=TRUE, fig.cap=("Validating optimal settings. AUCs of optimal settings on each validating data sets. One bar refers to one AUC of one optimal settings. Asterisks indicate optimal settings have AUC >= 0.5, total bulk assignments >= 500, total true assignments >= 300 across all four data sets. If total bulk assignments < 500 or total true assignments < 300, AUCs are set 0. (a) Mouse brain. (b) Mouse kidndy. (c) Arabidopsis root of single cell 9. (d) Arabidopsis root of single cell 51. "), out.width="100%"---- include_graphics('img/validate_para_opt.png') ## ----validateRdm, echo=FALSE, fig.wide=TRUE, fig.cap=("Random settings. AUCs of random settings on each validating data sets. One bar refers to one AUC of one optimal settings. If total bulk assignments < 500 or total true assignments < 300, AUCs are set 0. (a) Mouse brain. (b) Mouse kidndy. (c) Arabidopsis root of single cell 9. (d) Arabidopsis root of single cell 51. "), out.width="100%"---- include_graphics('img/validate_para_rdm.png') ## ----optParFin, eval=TRUE, echo=FALSE, warnings=FALSE------------------------- df.opt.final <- subset(df.opt, spd.set %in% c('s0.3p0.5d13', 's0.2p0.8d12', 's0.3p0.5d11', 's0.2p0.2d12', 's0.2p0.5d12')) kable(df.opt.final, caption='Final optimal parameter settings after validation.', col.names=colnames(df.opt.final), row.names=FALSE, escape=TRUE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ set.seed(10) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ # Example bulk data. blk.mus.pa <- system.file("extdata/shinyApp/example", "bulk_mouse_cocluster.txt", package="spatialHeatmap") blk.mus <- as.matrix(read.table(blk.mus.pa, header=TRUE, row.names=1, sep='\t', check.names=FALSE)) blk.mus[1:3, 1:5] # Example single cell data. sc.mus.pa <- system.file("extdata/shinyApp/example", "cell_mouse_cocluster.txt", package="spatialHeatmap") sc.mus <- as.matrix(read.table(sc.mus.pa, header=TRUE, row.names=1, sep='\t', check.names=FALSE)) sc.mus[1:3, 1:5] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ blk.mus <- filter_data(data=blk.mus, sam.factor=NULL, con.factor=NULL, pOA=c(0.1, 5), CV=c(0.2, 100), verbose=FALSE) mus.lis <- filter_cell(lis=list(sc.mus=sc.mus), bulk=blk.mus, gen.rm=NULL, min.cnt=1, p.in.cell=0.5, p.in.gen=0.1, verbose=FALSE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ mus.lis.nor <- read_cache(cache.pa, 'mus.lis.nor') # Retrieve data from cache. if (is.null(mus.lis.nor)) { # Save normalized data to cache if not cached. mus.lis.nor <- norm_multi(dat.lis=mus.lis, cpm=FALSE) save_cache(dir=cache.pa, overwrite=TRUE, mus.lis.nor) } ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ blk.mus.fil <- filter_data(data=logcounts(mus.lis.nor$bulk), sam.factor=NULL, con.factor=NULL, pOA=c(0.1, 0.5), CV=c(0.2, 100), verbose=FALSE) mus.lis.fil <- filter_cell(lis=list(sc.mus=logcounts(mus.lis.nor$sc.mus)), bulk=blk.mus.fil, gen.rm=NULL, min.cnt=1, p.in.cell=0.05, p.in.gen=0.02, verbose=FALSE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ svg.mus <- system.file("extdata/shinyApp/example", "mus_musculus.brain.svg", package="spatialHeatmap") # Spatial features. feature.df <- return_feature(svg.path=svg.mus) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ match.mus.brain.pa <- system.file("extdata/shinyApp/example", "match_mouse_brain_cocluster.txt", package="spatialHeatmap") df.match.mus.brain <- read.table(match.mus.brain.pa, header=TRUE, row.names=1, sep='\t') df.match.mus.brain ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.match.mus.brain$SVGBulk %in% feature.df$feature ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ clus.sc <- read_cache(cache.pa, 'clus.sc') # Retrieve data from cache. if (is.null(clus.sc)) { clus.sc <- cluster_cell(data=mus.lis.fil$sc.mus, min.dim=10, max.dim=50, graph.meth='knn', dimred='PCA') save_cache(dir=cache.pa, overwrite=TRUE, clus.sc) } colData(clus.sc)[1:3, ] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ cell.refined <- refine_cluster(clus.sc, sim=0.2, sim.p=0.8, sim.meth='spearman', verbose=FALSE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ cell.refined <- true_bulk(cell.refined, df.match.mus.brain) colData(cell.refined)[1:3, ] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ roc.lis <- read_cache(cache.pa, 'roc.lis') # Retrieve data from cache. if (is.null(roc.lis)) { roc.lis <- coclus_roc(bulk=mus.lis.fil$bulk, cell.refined=cell.refined, df.match=df.match.mus.brain, min.dim=12, graph.meth='knn', dimred='PCA') save_cache(dir=cache.pa, overwrite=TRUE, roc.lis) } ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ roc.lis$df.roc[1:3, ] table(roc.lis$df.roc$response) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('ROC curve of auto-matching on mouse brain data. The AUC value is a measure of accuracy on bulk assignments.'), out.width="70%", fig.show='show'---- plot(roc.lis$roc.obj, print.auc=TRUE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ res.lis <- c(list(cell.refined=cell.refined), roc.lis) ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # res.lis <- cocluster(bulk=mus.lis.fil$bulk, cell=mus.lis.fil$sc.mus, df.match=df.match.mus.brain, df.para=NULL, sim=0.2, sim.p=0.8, dim=12, graph.meth='knn', dimred='PCA', sim.meth='spearman', return.all=TRUE) # res.lis <- res.lis[[1]] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.par <- data.frame(sim=c(0.2, 0.3), sim.p=c(0.8, 0.7), dim=c(12, 13)) df.par ## ---- eval=FALSE, echo=TRUE, warnings=FALSE----------------------------------- # res.multi <- cocluster(bulk=mus.lis.fil$bulk, cell=mus.lis.fil$sc.mus, df.match=df.match.mus.brain, df.para=df.par, sc.dim.min=10, max.dim=50, sim=0.2, sim.p=0.8, dim=12, graph.meth='knn', dimred='PCA', sim.meth='spearman', return.all=TRUE, multi.core.par=MulticoreParam(workers=2)) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ desired.blk.pa <- system.file("extdata/shinyApp/example", "selected_cells_with_desired_bulk.txt", package="spatialHeatmap") df.desired.bulk <- read.table(desired.blk.pa, header=TRUE, row.names=1, sep='\t') df.desired.bulk[1:3, ] ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('PCA embedding plot of mouse brain single cell data. Single cell data after cluster refining are plotted.'), out.width="70%", fig.show='show'---- plot_dim(res.lis$cell.refined, dim='PCA', color.by='cell', x.break=seq(-10, 10, 2), y.break=seq(-10, 10, 2)) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ df.desired.bulk <- data.frame(x.min=c(2, 6), x.max=c(4, 10), y.min=c(6, 8), y.max=c(8, 10), desiredSVGBulk=c('cerebral.cortex', 'cerebral.cortex'), dimred='PCA') df.desired.bulk ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ sce.lis <- sub_asg(res.lis=res.lis, thr=0, df.desired.bulk=df.desired.bulk, df.match=df.match.mus.brain, true.only=TRUE) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ sce.aggr <- aggr_rep(data=sce.lis$cell.sub, assay.na='logcounts', sam.factor='SVGBulk', con.factor=NULL, aggr='mean') ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualizing bulk and single cells of mouse brain with abundance profiles. The aggregated expression profiles of gene `Adcy1` are plotted.'), out.width="100%", fig.show='show'---- shm.lis1 <- spatial_hm(svg.path=svg.mus, data=sce.aggr, ID=c('Adcy1'), legend.nrow=4, sce.dimred=sce.lis$cell.refined, dimred='PCA', assay.na='logcounts', tar.bulk=c('hippocampus'), profile=TRUE, dim.lgd.text.size=10, dim.lgd.nrow=1, bar.width=0.1) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE, fig.wide=TRUE, fig.cap=('Co-visualizing bulk and single cells of mouse brain without abundance profiles. The matching between cells and SVG bulk is denoted by color in-between.'), out.width="100%", fig.show='show'---- shm.lis2 <- spatial_hm(svg.path=svg.mus, data=sce.aggr, ID=c('Adcy1'), legend.nrow=4, sce.dimred=sce.lis$cell.refined, dimred='PCA', tar.bulk=c('hippocampus'), profile=FALSE, dim.lgd.text.size=10, dim.lgd.nrow=1) ## ---- eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------ sce.auto <- readRDS(system.file("extdata/shinyApp/example", 'sce_auto_bulk_cell_mouse_brain.rds', package="spatialHeatmap")) colData(sce.auto) metadata(sce.auto)$df.match ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # shiny_shm() ## ----shiny, echo=FALSE, fig.wide=TRUE, fig.cap=("Screenshot of spatialHeatmap's Shiny App."), out.width="100%"---- include_graphics('img/shiny.png') ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # se.fil.arab <- filter_data(data=se.aggr.sh, ann="Target.Description", sam.factor='sample', con.factor='condition', pOA=c(0.03, 6), CV=c(0.30, 100), dir='./') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- vec <- sample(x=1:100, size=5) # Random numeric values names(vec) <- c('occipital lobe__condition1', 'occipital lobe__condition2', 'parietal lobe__condition1', 'parietal lobe__condition2', 'notMapped') # Assign unique names to random values vec ## ----vecshm, eval=FALSE, echo=TRUE, warnings=FALSE, fig.wide=FALSE, fig.cap=c('SHMs on a vector. \'occipital lobe\' and \'parietal lobe\' are 2 aSVG features and \'condition1\' and \'condition2\' are conditions.')---- # spatial_hm(svg.path=svg.hum, data=vec, ID='toy', ncol=1, legend.r=1.2, sub.title.size=14, ft.trans='g4320') ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.test <- data.frame(matrix(sample(x=1:1000, size=100), nrow=20)) # Create numeric data.frame colnames(df.test) <- names(vec) # Assign column names rownames(df.test) <- paste0('gene', 1:20) # Assign row names df.test[1:3, ] ## ----dfshm, eval=FALSE, echo=TRUE, warnings=FALSE, fig.wide=FALSE, fig.cap=c('SHMs on a data frame. \'occipital lobe\' and \'parietal lobe\' are 2 aSVG features and \'condition1\' and \'condition2\' are conditions.')---- # spatial_hm(svg.path=svg.hum, data=df.test, ID=c('gene1'), ncol=1, legend.r=1.2, sub.title.size=14) ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.test$ann <- paste0('ann', 1:20) df.test[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- sample <- c(rep('occipital lobe', 4), rep('parietal lobe', 4)) condition <- rep(c('condition1', 'condition1', 'condition2', 'condition2'), 2) target.test <- data.frame(sample=sample, condition=condition, row.names=paste0('assay', 1:8)) target.test ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- df.se <- data.frame(matrix(sample(x=1:1000, size=160), nrow=20)) rownames(df.se) <- paste0('gene', 1:20) colnames(df.se) <- row.names(target.test) df.se[1:3, ] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se <- SummarizedExperiment(assays=df.se, colData=target.test) se ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- rowData(se) <- df.test['ann'] ## ----eval=TRUE, echo=TRUE, warnings=FALSE------------------------------------- se.aggr <- aggr_rep(data=se, sam.factor='sample', con.factor='condition', aggr='mean') assay(se.aggr)[1:3, ] ## ----seshm, eval=FALSE, echo=TRUE, warnings=FALSE, fig.wide=FALSE, fig.cap=c('SHMs on a SummarizedExperiment. \'occipital lobe\' and \'parietal lobe\' are 2 aSVG features and \'condition1\' and \'condition2\' are conditions.')---- # spatial_hm(svg.path=svg.hum, data=se.aggr, ID=c('gene1'), ncol=1, legend.r=1.2, sub.title.size=14, ft.trans=c('g4320')) ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # tmp.dir1 <- paste0(normalizePath(tempdir(check=TRUE), winslash="/", mustWork=FALSE), '/shm1') # if (!dir.exists(tmp.dir1)) dir.create(tmp.dir1) # svg.hum <- system.file("extdata/shinyApp/example", 'homo_sapiens.brain.svg', package="spatialHeatmap") # file.copy(from=svg.hum, to=tmp.dir1, overwrite=TRUE) # Copy "homo_sapiens.brain.svg" file into 'tmp.dir1' ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # feature.df <- return_feature(feature=c('frontal cortex', 'prefrontal cortex'), species=c('homo sapiens', 'brain'), dir=tmp.dir1, remote=NULL, keywords.any=FALSE) # feature.df ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # f.new <- c('prefrontalCortex', 'frontalCortex') ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # s.new <- c('0.05', '0.1') # New strokes. # c.new <- c('red', 'green') # New colors. ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # feature.df.new <- cbind(featureNew=f.new, strokeNew=s.new, colorNew=c.new, feature.df) # feature.df.new ## ----eval=FALSE, echo=TRUE, warnings=FALSE------------------------------------ # update_feature(df.new=feature.df.new, dir=tmp.dir1) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- sessionInfo()