## ---- include = FALSE------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----loading packages, warning = FALSE, message = FALSE-------------------- suppressPackageStartupMessages({ library(SingleCellExperiment) library(scMerge) library(scater) }) ## ----subsampling scMergeData, eval = FALSE, echo = FALSE------------------- # library(genefilter) # # load("~/Downloads/sce_mESC.rda") # data("segList_ensemblGeneID", package = "scMerge") # # set.seed(2019) # # example_sce = sce_mESC[, sce_mESC$batch %in% c("batch2", "batch3")] # example_sce$batch = droplevels(example_sce$batch) # batch2Sampled = sample(colnames(example_sce[,example_sce$batch == "batch2"]), 100) # batch3Sampled = sample(colnames(example_sce[,example_sce$batch == "batch3"]), 100) # # countsMat = SingleCellExperiment::counts(example_sce) # # batchTest = rowFtests(countsMat, fac = example_sce$batch) # celltypeTest = rowFtests(countsMat, fac = factor(example_sce$cellTypes)) # # commonSegGenes = intersect(segList_ensemblGeneID$mouse$mouse_scSEG, rownames(sce_mESC)) # # keepGenes = unique(c(commonSegGenes, # rownames(batchTest)[rank(batchTest$p.value) < 50], # rownames(celltypeTest)[rank(celltypeTest$p.value) < 250] # )) # # example_sce = example_sce[keepGenes, c(batch2Sampled, batch3Sampled)] # example_sce = example_sce[base::rowSums(counts(example_sce)) != 0, base::colSums(counts(example_sce)) != 0] # # table(example_sce$batch, # example_sce$cellTypes) # # dim(example_sce) # # scater::plotPCA(example_sce, # colour_by = "cellTypes", # shape_by = "batch") # # save(example_sce, # file = "data/example_sce.rda") ## ----loading data---------------------------------------------------------- ## Subsetted mouse ESC data data("example_sce", package = "scMerge") ## ----checking raw data----------------------------------------------------- scater::plotPCA( example_sce, colour_by = "cellTypes", shape_by = "batch") ## ----load SEG-------------------------------------------------------------- ## single-cell stably expressed gene list data("segList_ensemblGeneID", package = "scMerge") head(segList_ensemblGeneID$mouse$mouse_scSEG) ## ----t1, echo = FALSE------------------------------------------------------ t1 = Sys.time() ## ----unsupervised_default, results='hide',fig.show='hide'------------------ scMerge_unsupervised <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3, 3), assay_name = "scMerge_unsupervised") ## ----t2, echo = FALSE------------------------------------------------------ t2 = Sys.time() ## ----unsupervised_default_plotting----------------------------------------- scater::plotPCA( scMerge_unsupervised, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_unsupervised")) ## ----unsupervised_prop1, results='hide',fig.show='hide'-------------------- scMerge_unsupervised_all <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3, 3), assay_name = "scMerge_unsupervised_all", replicate_prop = 1) ## ----unsupervised_prop1_plotting------------------------------------------- scater::plotPCA( scMerge_unsupervised_all, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_unsupervised_all")) ## ----supervised, results='hide',fig.show='hide'---------------------------- scMerge_supervised <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3, 3), assay_name = "scMerge_supervised", cell_type = example_sce$cellTypes) ## ----supervised_plotting--------------------------------------------------- scater::plotPCA( scMerge_supervised, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_supervised")) ## ----semi_supervised1, results='hide',fig.show='hide'---------------------- scMerge_semisupervised1 <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3,3), assay_name = "scMerge_semisupervised1", cell_type = example_sce$cellTypes, cell_type_inc = which(example_sce$cellTypes == "2i"), cell_type_match = FALSE) ## ----semi_supervised1_plotting--------------------------------------------- scater::plotPCA( scMerge_semisupervised1, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_semisupervised1")) ## ----semi_supervised2, results='hide',fig.show='hide'---------------------- scMerge_semisupervised2 <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3, 3), assay_name = "scMerge_semisupervised2", cell_type = example_sce$cellTypes, cell_type_inc = NULL, cell_type_match = TRUE) ## ----semi_supervised2_plotting--------------------------------------------- scater::plotPCA( scMerge_semisupervised2, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_semisupervised2")) ## ----segIndex1, eval = FALSE----------------------------------------------- # exprs_mat = SummarizedExperiment::assay(example_sce, 'counts') # result = scSEGIndex(exprs_mat = exprs_mat) ## ----segIndex2------------------------------------------------------------- ## SEG list in ensemblGene ID data("segList_ensemblGeneID", package = "scMerge") ## SEG list in official gene symbols data("segList", package = "scMerge") ## SEG list for human scRNA-Seq data head(segList$human$human_scSEG) ## SEG list for human bulk microarray data head(segList$human$bulkMicroarrayHK) ## SEG list for human bulk RNASeq data head(segList$human$bulkRNAseqHK) ## ----t3, echo = FALSE------------------------------------------------------ t3 = Sys.time() ## ----computation_fast, results='hide',fig.show='hide'---------------------- library(BiocSingular) scMerge_fast <- scMerge( sce_combine = example_sce, ctl = segList_ensemblGeneID$mouse$mouse_scSEG, kmeansK = c(3, 3), assay_name = "scMerge_fast", BSPARAM = IrlbaParam(), svd_k = 20) ## ----t4, echo = FALSE------------------------------------------------------ t4 = Sys.time() ## ----computation_svd_plotting---------------------------------------------- paste("Normally, scMerge takes ", round(t2 - t1, 2), " seconds") paste("Fast version of scMerge takes ", round(t4 - t3, 2), " seconds") scater::plotPCA( scMerge_fast, colour_by = "cellTypes", shape_by = "batch", run_args = list(exprs_values = "scMerge_fast")) + labs(title = "fast scMerge yields similar results to the default version") ## ----parallel1, eval = FALSE----------------------------------------------- # library(BiocParallel) # scMerge_parallel1 <- scMerge( # sce_combine = example_sce, # ctl = segList_ensemblGeneID$mouse$mouse_scSEG, # kmeansK = c(3, 3), # assay_name = "scMerge_parallel1", # BPPARAM = MulticoreParam(workers = 2) # ) ## ----reference------------------------------------------------------------- citation("scMerge") ## ----session info---------------------------------------------------------- sessionInfo()