## ---- eval=FALSE----------------------------------------------------------- # install.packages("BiocManager") # BiocManager::install("zinbwave") ## ----options, include=FALSE, echo=FALSE------------------------------------ knitr::opts_chunk$set(warning=FALSE, error=FALSE, message=FALSE) set.seed(1133) ## ----load_packs------------------------------------------------------------ library(zinbwave) library(scRNAseq) library(matrixStats) library(magrittr) library(ggplot2) library(biomaRt) # Register BiocParallel Serial Execution BiocParallel::register(BiocParallel::SerialParam()) ## ----pollen---------------------------------------------------------------- fluidigm <- ReprocessedFluidigmData(assays = "tophat_counts") fluidigm table(colData(fluidigm)$Coverage_Type) ## ----filter---------------------------------------------------------------- filter <- rowSums(assay(fluidigm)>5)>5 table(filter) fluidigm <- fluidigm[filter,] ## ----variance-------------------------------------------------------------- assay(fluidigm) %>% log1p %>% rowVars -> vars names(vars) <- rownames(fluidigm) vars <- sort(vars, decreasing = TRUE) head(vars) fluidigm <- fluidigm[names(vars)[1:100],] ## ----rename---------------------------------------------------------------- assayNames(fluidigm)[1] <- "counts" ## ----zinbwave-------------------------------------------------------------- fluidigm_zinb <- zinbwave(fluidigm, K = 2, epsilon=1000) ## ----zinb_plot------------------------------------------------------------- W <- reducedDim(fluidigm_zinb) data.frame(W, bio=colData(fluidigm)$Biological_Condition, coverage=colData(fluidigm)$Coverage_Type) %>% ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + scale_color_brewer(type = "qual", palette = "Set1") + theme_classic() ## ----zinb_coverage--------------------------------------------------------- fluidigm_cov <- zinbwave(fluidigm, K=2, X="~Coverage_Type", epsilon=1000) ## ----zinb_plot2------------------------------------------------------------ W <- reducedDim(fluidigm_cov) data.frame(W, bio=colData(fluidigm)$Biological_Condition, coverage=colData(fluidigm)$Coverage_Type) %>% ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + scale_color_brewer(type = "qual", palette = "Set1") + theme_classic() ## ----gcc, eval=FALSE------------------------------------------------------- # mart <- useMart("ensembl") # mart <- useDataset("hsapiens_gene_ensembl", mart = mart) # bm <- getBM(attributes=c('hgnc_symbol', 'start_position', # 'end_position', 'percentage_gene_gc_content'), # filters = 'hgnc_symbol', # values = rownames(fluidigm), # mart = mart) # # bm$length <- bm$end_position - bm$start_position # len <- tapply(bm$length, bm$hgnc_symbol, mean) # len <- len[rownames(fluidigm)] # gcc <- tapply(bm$percentage_gene_gc_content, bm$hgnc_symbol, mean) # gcc <- gcc[rownames(fluidigm)] ## ----rowdata, eval=FALSE--------------------------------------------------- # rowData(fluidigm) <- data.frame(gccontent = gcc, length = len) ## ----zinb_gcc, eval=FALSE-------------------------------------------------- # fluidigm_gcc <- zinbwave(fluidigm, K=2, V="~gccontent + log(length)", epsilon=1000) ## ----tsne------------------------------------------------------------------ set.seed(93024) library(Rtsne) W <- reducedDim(fluidigm_cov) tsne_data <- Rtsne(W, pca = FALSE, perplexity=10, max_iter=5000) data.frame(Dim1=tsne_data$Y[,1], Dim2=tsne_data$Y[,2], bio=colData(fluidigm)$Biological_Condition, coverage=colData(fluidigm)$Coverage_Type) %>% ggplot(aes(Dim1, Dim2, colour=bio, shape=coverage)) + geom_point() + scale_color_brewer(type = "qual", palette = "Set1") + theme_classic() ## ----norm------------------------------------------------------------------ fluidigm_norm <- zinbwave(fluidigm, K=2, epsilon=1000, normalizedValues=TRUE, residuals = TRUE) ## ----assays---------------------------------------------------------------- fluidigm_norm ## ----zinb------------------------------------------------------------------ zinb <- zinbFit(fluidigm, K=2, epsilon=1000) ## ----zinbwave2------------------------------------------------------------- fluidigm_zinb <- zinbwave(fluidigm, fitted_model = zinb, K = 2, epsilon=1000, observationalWeights = TRUE) ## ----weights--------------------------------------------------------------- weights <- assay(fluidigm_zinb, "weights") ## ----edger----------------------------------------------------------------- library(edgeR) dge <- DGEList(assay(fluidigm_zinb)) dge <- calcNormFactors(dge) design <- model.matrix(~Biological_Condition, data = colData(fluidigm)) dge$weights <- weights dge <- estimateDisp(dge, design) fit <- glmFit(dge, design) lrt <- glmWeightedF(fit, coef = 3) topTags(lrt) ## ----deseq2---------------------------------------------------------------- library(DESeq2) dds <- DESeqDataSet(fluidigm_zinb, design = ~ Biological_Condition) dds <- DESeq(dds, sfType="poscounts", useT=TRUE, minmu=1e-6) res <- lfcShrink(dds, contrast=c("Biological_Condition", "NPC", "GW16"), type = "normal") head(res) ## ----seurat---------------------------------------------------------------- library(Seurat) seu <- as.Seurat(x = fluidigm_zinb, counts = "counts", data = "counts") ## ----seurat2--------------------------------------------------------------- seu ## ----seurat3--------------------------------------------------------------- seu <- FindNeighbors(seu, reduction = "zinbwave", dims = 1:2 #this should match K ) seu <- FindClusters(object = seu) ## -------------------------------------------------------------------------- fluidigm_surf <- zinbsurf(fluidigm, K = 2, epsilon = 1000, prop_fit = 0.5) W2 <- reducedDim(fluidigm_surf) data.frame(W2, bio=colData(fluidigm)$Biological_Condition, coverage=colData(fluidigm)$Coverage_Type) %>% ggplot(aes(W1, W2, colour=bio, shape=coverage)) + geom_point() + scale_color_brewer(type = "qual", palette = "Set1") + theme_classic() ## ---- eval=FALSE----------------------------------------------------------- # library(BiocParallel) # zinb_res <- zinbwave(fluidigm, K=2, BPPARAM=MulticoreParam(2)) ## -------------------------------------------------------------------------- sessionInfo()