## ----knitr-options, include = FALSE------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- suppressPackageStartupMessages({ library("splatter") library("scater") library("VariantAnnotation") library("ggplot2") }) set.seed(42) ## ----quick-start-------------------------------------------------------------- vcf <- mockVCF() gff <- mockGFF() sim <- splatPopSimulate(vcf = vcf, gff = gff, sparsify = FALSE) sim <- logNormCounts(sim) sim <- runPCA(sim, ncomponents = 5) plotPCA(sim, colour_by = "Sample") ## ----eqtlEstimate------------------------------------------------------------- bulk.means <- mockBulkMatrix(n.genes=100, n.samples=100) bulk.eqtl <- mockBulkeQTL(n.genes=100) counts <- mockSCE() params.est <- splatPopEstimate(means = bulk.means, eqtl = bulk.eqtl, counts = counts) params.est ## ----splatPopSimulateMeans---------------------------------------------------- params <- newSplatPopParams() sim.means <- splatPopSimulateMeans(vcf = vcf, gff = gff, params = params) round(sim.means$means[1:5, 1:3], digits = 2) print(sim.means$key[1:5, ], digits = 2) ## ----splatPopSimulateMeans-from-key------------------------------------------- sim.means.rep2 <- splatPopSimulateMeans(vcf = vcf, key=sim.means$key, params = newSplatPopParams()) round(sim.means.rep2$means[1:5, 1:5], digits = 2) ## ----quant-normalize-population-data------------------------------------------ bulk.qnorm <- splatPopQuantNorm(newSplatPopParams(), bulk.means) round(bulk.qnorm[1:5, 1:5], 3) ## ----empirical-mock-data------------------------------------------------------ emp <- mockEmpiricalSet() head(emp$eqtl[, c("geneID", "snpID", "snpCHR", "snpLOC", "snpMAF", "slope")]) ## ----empirical-based-simulation----------------------------------------------- emp.sim <- splatPopSimulateMeans(vcf = emp$vcf, gff = emp$gff, eqtl = emp$eqtl, means = emp$means, params = newSplatPopParams()) head(emp.sim$key) ## ----eqtl-splatPopSimulateSC-simple-object------------------------------------ sim.sc <- splatPopSimulateSC(params=params, key = sim.means$key, sim.means=sim.means$means, batchCells=50, sparsify = FALSE) sim.sc ## ----eqtl-splatPopSimulateSC-simple-plots------------------------------------- sim.sc <- logNormCounts(sim.sc) sim.sc <- runPCA(sim.sc, ncomponents = 10) plotPCA(sim.sc, colour_by = "Sample") ## ----splatPop-simulation-nCells-sampled--------------------------------------- params.nCells <- newSplatPopParams(nCells.sample = TRUE, nCells.shape = 1.5, nCells.rate = 0.01) sim.sc.nCells <- splatPopSimulate(vcf = vcf, gff = gff, params = params.nCells, sparsify = FALSE) sim.sc.nCells <- logNormCounts(sim.sc.nCells) sim.sc.nCells <- runPCA(sim.sc.nCells, ncomponents = 2) plotPCA(sim.sc.nCells, colour_by = "Sample") ## ----group-specific-eQTL-simulations------------------------------------------ params.group <- newSplatPopParams(batchCells = 50, group.prob = c(0.5, 0.5)) sim.sc.gr2 <- splatPopSimulate(vcf = vcf, gff = gff, params = params.group, sparsify = FALSE) sim.sc.gr2 <- logNormCounts(sim.sc.gr2) sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10) plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample") ## ----group-specific-eQTL-simulations-bigger----------------------------------- params.group <- newSplatPopParams(batchCells = 50, similarity.scale = 8, eqtl.group.specific = 0.6, de.prob = 0.5, de.facLoc = 0.5, de.facScale = 0.5, group.prob = c(0.5, 0.5)) sim.sc.gr2 <- splatPopSimulate(vcf = vcf, gff = gff, params = params.group, sparsify = FALSE) sim.sc.gr2 <- logNormCounts(sim.sc.gr2) sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10) plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample") ## ----simulate-population-with-condition-effects------------------------------- params.cond <- newSplatPopParams(eqtl.n = 0.5, batchCells = 50, similarity.scale = 5, condition.prob = c(0.5, 0.5), eqtl.condition.specific = 0.5, cde.facLoc = 0.5, cde.facScale = 0.5) sim.pop.cond <- splatPopSimulate(vcf = vcf, gff = gff, params = params.cond, sparsify = FALSE) sim.pop.cond <- logNormCounts(sim.pop.cond) sim.pop.cond <- runPCA(sim.pop.cond, ncomponents = 10) plotPCA(sim.pop.cond, colour_by = "Condition", shape_by = "Sample", point_size=3) ## ----simulate-population-with-batch-effects----------------------------------- params.batches <- newSplatPopParams(similarity.scale = 4, batchCells = c(20, 20), batch.size = 3, batch.facLoc = 0.1, batch.facScale = 0.2) sim.pop.batches <- splatPopSimulate(vcf = vcf, gff = gff, params = params.batches, sparsify = FALSE) sim.pop.batches <- logNormCounts(sim.pop.batches) sim.pop.batches <- runPCA(sim.pop.batches, ncomponents = 10) plotPCA(sim.pop.batches, colour_by = "Sample", shape_by = "Batch", point_size=3) ## ----simulate-population-with-different-sized-batch-effects------------------- params.batches <- newSplatPopParams(similarity.scale = 8, batchCells = c(5, 5, 5), batch.size = 3, batch.facLoc = c(0.5, 0.1, 0.1), batch.facScale = c(0.6, 0.15, 0.15)) sim.pop.batches <- splatPopSimulate(vcf = vcf, gff = gff, params = params.batches, sparsify = FALSE) sim.pop.batches <- logNormCounts(sim.pop.batches) sim.pop.batches <- runPCA(sim.pop.batches, ncomponents = 10) plotPCA(sim.pop.batches, colour_by = "Sample", shape_by = "Batch", point_size=3) ## ----simulate-population-with-coregulation, fig.height=2---------------------- params.coreg <- newSplatPopParams(eqtl.coreg = 0.4, eqtl.ES.rate=2) vcf <- mockVCF(n.sample = 20, n.snps = 1200) gff <- mockGFF(n.genes = 100) sim.coreg <- splatPopSimulateMeans(vcf = vcf, gff = gff, params = params.coreg) key <- sim.coreg$key sim.cor <- cor(t(sim.coreg$means)) coregSNPs <- names(which(table(sim.coreg$key$eSNP.ID) == 2)) coregCorList <- c() for(snp in coregSNPs){ coregGenes <- sim.coreg$key$geneID[which(sim.coreg$key$eSNP.ID == snp)] coregCorList <- c(coregCorList, sim.cor[coregGenes[1], coregGenes[2]]) } randCorList <- c() for (n in 1:1000){ genes <- sample(sim.coreg$key$geneID, 2) randCorList <- c(randCorList, sim.cor[genes[1], genes[2]]) } cor.df <- as.data.frame(list(type=c(rep("coregulated pairs", length(coregCorList)), rep("random pairs", length(randCorList))), correlation=c(coregCorList, randCorList), id = "x")) ggplot(cor.df, aes(x = correlation, y = id, color = type, alpha = type)) + geom_jitter(size = 2, width = 0.1) + scale_alpha_discrete(range = c(0.6, 0.1)) + stat_summary(fun = mean, geom = "pointrange", size = 1, alpha=1) + theme_minimal() + theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="top") + xlim(c(-1, 1)) + scale_color_manual(values=c("#56B4E9", "#999999")) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()