## ---- echo=FALSE, results="hide", message=FALSE------------------------------- require(knitr) opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) ## ----setup, echo=FALSE, message=FALSE----------------------------------------- library(scran) library(BiocParallel) register(SerialParam()) # avoid problems with fastMNN parallelization. set.seed(100) ## ----------------------------------------------------------------------------- library(scRNAseq) sce <- GrunPancreasData() sce ## ----------------------------------------------------------------------------- library(scater) qcstats <- perCellQCMetrics(sce) qcfilter <- quickPerCellQC(qcstats, percent_subsets="altexps_ERCC_percent") sce <- sce[,!qcfilter$discard] summary(qcfilter$discard) ## ----------------------------------------------------------------------------- library(scran) clusters <- quickCluster(sce) sce <- computeSumFactors(sce, clusters=clusters) summary(sizeFactors(sce)) ## ----------------------------------------------------------------------------- sce2 <- computeSpikeFactors(sce, "ERCC") summary(sizeFactors(sce2)) ## ----------------------------------------------------------------------------- sce <- logNormCounts(sce) ## ----------------------------------------------------------------------------- dec <- modelGeneVar(sce) plot(dec$mean, dec$total, xlab="Mean log-expression", ylab="Variance") curve(metadata(dec)$trend(x), col="blue", add=TRUE) ## ----------------------------------------------------------------------------- dec2 <- modelGeneVarWithSpikes(sce, 'ERCC') plot(dec2$mean, dec2$total, xlab="Mean log-expression", ylab="Variance") points(metadata(dec2)$mean, metadata(dec2)$var, col="red") curve(metadata(dec2)$trend(x), col="blue", add=TRUE) ## ---- fig.wide=TRUE, fig.asp=1.5---------------------------------------------- dec3 <- modelGeneVar(sce, block=sce$donor) per.block <- dec3$per.block par(mfrow=c(3, 2)) for (i in seq_along(per.block)) { decX <- per.block[[i]] plot(decX$mean, decX$total, xlab="Mean log-expression", ylab="Variance", main=names(per.block)[i]) curve(metadata(decX)$trend(x), col="blue", add=TRUE) } ## ----------------------------------------------------------------------------- # Get the top 10% of genes. top.hvgs <- getTopHVGs(dec, prop=0.1) # Get the top 2000 genes. top.hvgs2 <- getTopHVGs(dec, n=2000) # Get all genes with positive biological components. top.hvgs3 <- getTopHVGs(dec, var.threshold=0) # Get all genes with FDR below 5%. top.hvgs4 <- getTopHVGs(dec, fdr.threshold=0.05) # Running the PCA with the 10% of HVGs. sce <- runPCA(sce, subset_row=top.hvgs) reducedDimNames(sce) ## ----------------------------------------------------------------------------- sced <- denoisePCA(sce, dec2, subset.row=getTopHVGs(dec2, prop=0.1)) ncol(reducedDim(sced, "PCA")) ## ----------------------------------------------------------------------------- output <- getClusteredPCs(reducedDim(sce)) npcs <- metadata(output)$chosen reducedDim(sce, "PCAsub") <- reducedDim(sce, "PCA")[,1:npcs,drop=FALSE] npcs ## ----------------------------------------------------------------------------- # In this case, using the PCs that we chose from getClusteredPCs(). g <- buildSNNGraph(sce, use.dimred="PCAsub") cluster <- igraph::cluster_walktrap(g)$membership sce$cluster <- factor(cluster) table(sce$cluster) ## ----------------------------------------------------------------------------- sce <- runTSNE(sce, dimred="PCAsub") plotTSNE(sce, colour_by="cluster", text_by="cluster") ## ----------------------------------------------------------------------------- ratio <- clusterModularity(g, cluster, as.ratio=TRUE) library(pheatmap) pheatmap(log10(ratio+1), cluster_cols=FALSE, cluster_rows=FALSE, col=rev(heat.colors(100))) ## ----------------------------------------------------------------------------- markers <- findMarkers(sce, sce$cluster) markers[[1]][,1:3] ## ----------------------------------------------------------------------------- wmarkers <- findMarkers(sce, sce$cluster, test.type="wilcox", direction="up", lfc=1) wmarkers[[1]][,1:3] ## ----------------------------------------------------------------------------- markers <- findMarkers(sce, sce$cluster, pval.type="all") markers[[1]][,1:2] ## ----------------------------------------------------------------------------- # Using the first 200 HVs, which are the most interesting anyway. of.interest <- top.hvgs[1:200] cor.pairs <- correlatePairs(sce, subset.row=of.interest) cor.pairs ## ----------------------------------------------------------------------------- cor.pairs2 <- correlatePairs(sce, subset.row=of.interest, block=sce$donor) ## ----------------------------------------------------------------------------- cor.genes <- correlateGenes(cor.pairs) cor.genes ## ----------------------------------------------------------------------------- y <- convertTo(sce, type="edgeR") ## ----------------------------------------------------------------------------- sessionInfo()