Example Workflow For Processing a CRISPR-based Pooled Screen

Russell Bainer

2017-04-24

Example Workflow

This is an example workflow for processing a pooled CRISPR-based screen using the provided sample data. See the various manpages for additional visualization options and algorithmic details.

Load dependencies and data

library(Biobase)
library(limma)
library(gCrisprTools)

data("es", package = "gCrisprTools")
data("ann", package = "gCrisprTools")
data("aln", package = "gCrisprTools")

Make a sample key, structured as a factor with control samples in the first level

sk <- relevel(as.factor(pData(es)$TREATMENT_NAME), "ControlReference")
names(sk) <- row.names(pData(es))

Generate a contrast of interest using voom/limma; pairing replicates is a good idea if that information is available.

design <- model.matrix(~ 0 + REPLICATE_POOL + TREATMENT_NAME, pData(es))
colnames(design) <- gsub('TREATMENT_NAME', '', colnames(design))
contrasts <-makeContrasts(DeathExpansion - ControlExpansion, levels = design)

Optionally, trim of trace reads from the unnormalized object (see man page for details)

es <- ct.filterReads(es, trim = 1000, sampleKey = sk)

Normalize, convert to a voom object, and generate a contrast

es <- ct.normalizeGuides(es, method = "scale", plot.it = TRUE) #See man page for other options
vm <- voom(exprs(es), design)

fit <- lmFit(vm, design)
fit <- contrasts.fit(fit, contrasts)
fit <- eBayes(fit)

Edit the annotation file if you used ct.filterReads above

ann <- ct.prepareAnnotation(ann, fit, controls = "NoTarget")

Summarize gRNA signals to identify target genes of interest

resultsDF <-
  ct.generateResults(
    fit,
    annotation = ann,
    RRAalphaCutoff = 0.1,
    permutations = 1000,
    scoring = "combined", 
    permutation.seed = 2
  )

Optionally, just load an example results object for testing purposes (trimming out reads as necessary)

data("fit", package = "gCrisprTools")
data("resultsDF", package = "gCrisprTools")

fit <- fit[(row.names(fit) %in% row.names(ann)),]
resultsDF <- resultsDF[(row.names(resultsDF) %in% row.names(ann)),]

Crispr-specific quality control and visualization tools (see man pages for details):

ct.alignmentChart(aln, sk)
ct.rawCountDensities(es, sk)

Visualize gRNA abundance distributions

ct.gRNARankByReplicate(es, sk) 
ct.gRNARankByReplicate(es, sk, annotation = ann, geneSymb = "NoTarget")  #Show locations of NTC gRNAs

Visualize control guide behavior across conditions

ct.viewControls(es, ann, sk, normalize = FALSE)
ct.viewControls(es, ann, sk, normalize = TRUE)

Visualize GC bias across samples, or within an experimental contrast

ct.GCbias(es, ann, sk)
ct.GCbias(fit, ann, sk)

View most variable gRNAs/Genes (as % of sequencing library)

ct.stackGuides(es,
               sk,
               plotType = "gRNA",
               annotation = ann,
               nguides = 40)
ct.stackGuides(es, 
               sk, 
               plotType = "Target", 
               annotation = ann)
ct.stackGuides(es,
               sk,
               plotType = "Target",
               annotation = ann,
               subset = names(sk)[grep('Expansion', sk)])

View a CDF of genes/guides

ct.guideCDF(es, sk, plotType = "gRNA")
ct.guideCDF(es, sk, plotType = "Target", annotation = ann)

View top enriched/depleted candidates

ct.topTargets(fit,
              resultsDF,
              ann,
              targets = 10,
              enrich = TRUE)
ct.topTargets(fit,
              resultsDF,
              ann,
              targets = 10,
              enrich = FALSE)

View the gRNA behavior of gRNAs targeting a particular gene of interest

ct.viewGuides("Target1633", fit, ann)
ct.gRNARankByReplicate(es, sk, annotation = ann, geneSymb = "Target1633")

View ontological enrichment within the depleted/enriched targets

enrichmentResults <-
  ct.PantherPathwayEnrichment(
    resultsDF,
    pvalue.cutoff = 0.01,
    enrich = TRUE,
    organism = 'mouse'
  )

Test a gene set for enrichment within target candidates

data("essential.genes", package = "gCrisprTools")
ROCs <- ct.ROC(resultsDF, essential.genes, stat = "deplete.p")
PRCs <- ct.PRC(resultsDF, essential.genes, stat = "deplete.p")

Make reports in a directory of interest

path2report <-      #Make a report of the whole experiment
  ct.makeReport(fit = fit, 
                eset = es, 
                sampleKey = sk, 
                annotation = ann, 
                results = resultsDF, 
                aln = aln, 
                outdir = ".") 

path2QC <-          #Or one focusing only on experiment QC
  ct.makeQCReport(es, 
                  trim = 1000, 
                  log2.ratio = 0.05, 
                  sampleKey = sk, 
                  annotation = ann, 
                  aln = aln, 
                  identifier = 'Crispr_QC_Report',
                  lib.size = NULL
                  )                

path2Contrast <-    #Or Contrast-specific one
  ct.makeContrastReport(eset = es, 
                        fit = fit, 
                        sampleKey = sk, 
                        results = resultsDF, 
                        annotation = ann, 
                        comparison.id = NULL, 
                        identifier = 'Crispr_Contrast_Report')