## ----Initialize, echo=FALSE, results="hide", message=FALSE-------------------- require(knitr) opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) ## ----Install Dino BioC, eval = FALSE------------------------------------------ # # Install Bioconductor if not present, skip otherwise # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # Install Dino package # BiocManager::install("Dino") # # # View (this) vignette from R # browseVignettes("Dino") ## ----Install Dino Github, eval = FALSE---------------------------------------- # devtools::install_github('JBrownBiostat/Dino', build_vignettes = TRUE) ## ----Quick Start, eval = FALSE------------------------------------------------ # library(Dino) # # # Return a sparse matrix of normalized expression # Norm_Mat <- Dino(UMI_Mat) # # # Return a Seurat object from already normalized expression # # Use normalized (doNorm = FALSE) and un-transformed (doLog = FALSE) expression # Norm_Seurat <- SeuratFromDino(Norm_Mat, doNorm = FALSE, doLog = FALSE) # # # Return a Seurat object from UMI expression # # Transform normalized expression as log(x + 1) to improve # # some types of downstream analysis # Norm_Seurat <- SeuratFromDino(UMI_Mat) ## ----load pbmcSmall data------------------------------------------------------ set.seed(1) # Bring pbmcSmall into R environment library(Dino) library(Seurat) library(Matrix) data("pbmcSmall") print(dim(pbmcSmall)) ## ----clean data--------------------------------------------------------------- # Filter genes for a minimum of non-zero expression pbmcSmall <- pbmcSmall[rowSums(pbmcSmall != 0) >= 20, ] print(dim(pbmcSmall)) ## ----normalize data, eval = FALSE--------------------------------------------- # # Normalize data # pbmcSmall_Norm <- Dino(pbmcSmall) ## ----normalize data background, echo = FALSE---------------------------------- invisible(capture.output(pbmcSmall_Norm <- Dino(pbmcSmall))) ## ----Seurat clustering-------------------------------------------------------- # Reformat normalized expression as a Seurat object pbmcSmall_Seurat <- SeuratFromDino(pbmcSmall_Norm, doNorm = FALSE) # Cluster pbmcSmall_Seurat pbmcSmall_Seurat <- FindVariableFeatures(pbmcSmall_Seurat, selection.method = "mvp") pbmcSmall_Seurat <- ScaleData(pbmcSmall_Seurat, features = rownames(pbmcSmall_Norm)) pbmcSmall_Seurat <- RunPCA(pbmcSmall_Seurat, features = VariableFeatures(object = pbmcSmall_Seurat), verbose = FALSE) pbmcSmall_Seurat <- FindNeighbors(pbmcSmall_Seurat, dims = 1:10) pbmcSmall_Seurat <- FindClusters(pbmcSmall_Seurat, verbose = FALSE) pbmcSmall_Seurat <- RunUMAP(pbmcSmall_Seurat, dims = 1:10) DimPlot(pbmcSmall_Seurat, reduction = "umap") ## ----SinglleCellExperiment, eval = FALSE-------------------------------------- # # Reformatting pbmcSmall as a SingleCellExperiment # library(SingleCellExperiment) # pbmc_SCE <- SingleCellExperiment(assays = list("counts" = pbmcSmall)) # # # Run Dino # pbmc_SCE <- Dino_SCE(pbmc_SCE) # str(normcounts(pbmc_SCE)) ## ----SinglleCellExperiment Background, echo = F------------------------------- # Reformatting pbmcSmall as a SingleCellExperiment library(SingleCellExperiment) pbmc_SCE <- SingleCellExperiment(assays = list("counts" = pbmcSmall)) # Run Dino invisible(capture.output(pbmc_SCE <- Dino_SCE(pbmc_SCE))) str(normcounts(pbmc_SCE)) ## ----Scran depths, eval = FALSE----------------------------------------------- # library(scran) # # # Compute scran size factors # scranSizes <- calculateSumFactors(pbmcSmall) # # # Re-normalize data # pbmcSmall_SNorm <- Dino(pbmcSmall, nCores = 1, depth = log(scranSizes)) ## ----Unimodal Simulation, echo = FALSE, warning = FALSE----------------------- library(ggplot2) library(gridExtra) library(ggpubr) library(grid) themeObj <- theme( title = element_text(size = 17), plot.subtitle = element_text(size = 15), axis.title = element_text(size = 14), axis.text = element_text(size = 12), strip.text = element_text(size = 14), legend.title = element_text(size = 14), legend.text = element_text(size = 12) ) p1_func <- function(simDat, dinoLam, muVec, themeObj) { p1 <- ggplot(simDat, aes(x = LS, y = y)) + theme_classic() + geom_hex(aes(fill = log10(..count..))) + scale_fill_viridis_c() + labs( x = "log-Depth", y = "Expression (log)", title = "Fitted means", fill = "Count\n(log10)" ) for(i in 1:length(dinoLam)) { p1 <- p1 + geom_abline( slope = 1, intercept = log1p(dinoLam[i]), col = 1, alpha = 0.5, lwd = 0.75 ) } for(i in 1:length(muVec)) { p1 <- p1 + geom_abline( slope = 1, intercept = log1p(muVec[i]), col = 2, lwd = 1.5 ) } p1 <- p1 + themeObj } p2_func <- function(plotDat, themeObj) { x.dens <- plotDat$x[plotDat$model == "NB"] y.dens <- plotDat$y[plotDat$model == "NB"] p2 <- ggplot(plotDat, aes(x = x, y = y, color = model)) + theme_classic() + geom_line() + scale_color_manual(values = 1:2) + labs( x = "Expression", y = "Density", color = "Model", title = "Reference vs.\nFitted distribution" ) + themeObj xInd <- which.max( y.dens < 1e-3 * max(y.dens) & x.dens > x.dens[which.max(y.dens)] ) if(xInd > 1) { p2 <- p2 + xlim(c( 0, x.dens[which.max( y.dens < 1e-3 * max(y.dens) & x.dens > x.dens[which.max(y.dens)] )] )) } dinoDens <- data.frame( x = plotDat$x[plotDat$model == "Dino"], y = plotDat$y[plotDat$model == "Dino"] ) if(max(y.dens) < max(dinoDens$y)) { p2 <- p2 + ylim(c(0, min(c(1.025 * max(dinoDens$y), 1.5 * max(y.dens))))) } return(p2) } plotSim_func <- function(plotList, themeObj) { p1 <- p1_func(plotList$simDat, plotList$dinoLam, plotList$muVec, themeObj) p2 <- p2_func(plotList$plotDat, themeObj) p <- grid.arrange( p1, p2, nrow = 1, top = textGrob(paste0("K = ", plotList$k), gp = gpar(fontsize = 17)) ) return(p) } data("unimodalDat") plotSim_func(unimodalDat, themeObj) ## ----Multimodal Simulation, echo = FALSE, warning = FALSE--------------------- data("multimodalDat") plotSim_func(multimodalDat, themeObj) ## ----------------------------------------------------------------------------- sessionInfo()