Contents

knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
                      dev="png", message=FALSE, error=FALSE, warning=TRUE)

Citation: if you use MAGeCKFlute in published research, please cite: Binbin Wang, Mei Wang, Wubing Zhang. “Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute.” Nature Protocols (2019), doi: 10.1038/s41596-018-0113-7.

0.1 Install and load the packages

if(!"MAGeCKFlute" %in% installed.packages()) BiocManager::install("MAGeCKFlute")
if(!"ggplot2" %in% installed.packages()) BiocManager::install("ggplot2")
library(MAGeCKFlute)
library(ggplot2)

1 Input data

MAGeCKFlute requires a weighted gene list for enrichment analysis.

file1 = file.path(system.file("extdata", package = "MAGeCKFlute"),
                  "testdata/rra.gene_summary.txt")
gdata = ReadRRA(file1)
genelist = gdata$Score
names(genelist) = gdata$id
genelist = sort(genelist, decreasing = TRUE)
head(genelist)
##    Jak1   Stat1  Ifngr2  Ifngr1     B2m   H2-D1 
## 10.7440  9.0330  5.6097  5.4719  4.3433  4.2110

2 Enrichment analysis methods

2.1 Hypergeometric test

# Alternative functions EnrichAnalyzer and enrich.HGT.
hgtRes1 = EnrichAnalyzer(genelist[1:200], method = "HGT", 
                         type = "Pathway", organism = "mmu")
head(hgtRes1@result)
##                                              ID                  Description
## BIOCARTA_IFNG_PATHWAY     BIOCARTA_IFNG_PATHWAY                 IFNG PATHWAY
## REACTOME_877312                 REACTOME_877312 Regulation of IFNG signaling
## BIOCARTA_IL22BP_PATHWAY BIOCARTA_IL22BP_PATHWAY               IL22BP PATHWAY
## BIOCARTA_IFNA_PATHWAY     BIOCARTA_IFNA_PATHWAY                 IFNA PATHWAY
## BIOCARTA_TID_PATHWAY       BIOCARTA_TID_PATHWAY                  TID PATHWAY
## REACTOME_877300                 REACTOME_877300   Interferon gamma signaling
##                               NES       pvalue     p.adjust GeneRatio BgRatio
## BIOCARTA_IFNG_PATHWAY   18.307949 9.401935e-11 5.048839e-08     5/107  6/9264
## REACTOME_877312          9.714240 2.440253e-05 4.081607e-03     3/107  9/9264
## BIOCARTA_IL22BP_PATHWAY 15.317503 2.440253e-05 4.081607e-03     3/107  9/9264
## BIOCARTA_IFNA_PATHWAY   13.883283 3.040303e-05 4.081607e-03     4/107 18/9264
## BIOCARTA_TID_PATHWAY     9.455281 4.054116e-05 4.354120e-03     4/107 19/9264
## REACTOME_877300          9.714240 9.109707e-05 6.444888e-03     3/107 12/9264
##                                                geneID
## BIOCARTA_IFNG_PATHWAY   16451/16452/20846/15979/15980
## REACTOME_877312                     16452/15979/15980
## BIOCARTA_IL22BP_PATHWAY             16451/16452/20846
## BIOCARTA_IFNA_PATHWAY         16451/15975/15976/20846
## BIOCARTA_TID_PATHWAY          16452/22059/15979/15980
## REACTOME_877300                     16452/15979/15980
##                                              geneName Count
## BIOCARTA_IFNG_PATHWAY   Jak1/Jak2/Stat1/Ifngr1/Ifngr2     5
## REACTOME_877312                    Jak2/Ifngr1/Ifngr2     3
## BIOCARTA_IL22BP_PATHWAY               Jak1/Jak2/Stat1     3
## BIOCARTA_IFNA_PATHWAY        Jak1/Ifnar1/Ifnar2/Stat1     4
## BIOCARTA_TID_PATHWAY         Jak2/Trp53/Ifngr1/Ifngr2     4
## REACTOME_877300                    Jak2/Ifngr1/Ifngr2     3
# hgtRes2 = enrich.HGT(genelist[1:200])
# head(hgtRes2@result)

2.2 Over-representation test

The ORT and GSEA are implemented in the R package clusterProfiler (Yu et al. 2012). So please install it prior to the analysis.

if(!"clusterProfiler" %in% installed.packages()){
  BiocManager::install("clusterProfiler")
}
library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.ORT.
ortRes1 = EnrichAnalyzer(genelist[1:200], method = "ORT", 
                         type = "KEGG", organism = "mmu")
head(ortRes1@result)
##                          ID
## KEGG_mmu05140 KEGG_mmu05140
## KEGG_mmu05235 KEGG_mmu05235
## KEGG_mmu04612 KEGG_mmu04612
## KEGG_mmu05340 KEGG_mmu05340
## KEGG_mmu05212 KEGG_mmu05212
## KEGG_mmu04658 KEGG_mmu04658
##                                                          Description       NES
## KEGG_mmu05140                                          Leishmaniasis 17.491523
## KEGG_mmu05235 PD-L1 expression and PD-1 checkpoint pathway in cancer 17.795521
## KEGG_mmu04612                    Antigen processing and presentation  9.038750
## KEGG_mmu05340                               Primary immunodeficiency  5.717129
## KEGG_mmu05212                                      Pancreatic cancer 13.069760
## KEGG_mmu04658                       Th1 and Th2 cell differentiation 18.307949
##                     pvalue    p.adjust GeneRatio BgRatio
## KEGG_mmu05140 1.530265e-05 0.001254818      7/54 70/4536
## KEGG_mmu05235 5.495082e-04 0.016338254      6/54 88/4536
## KEGG_mmu04612 6.199206e-04 0.016338254      6/54 90/4536
## KEGG_mmu05340 7.969880e-04 0.016338254      4/54 36/4536
## KEGG_mmu05212 1.927944e-03 0.031618287      5/54 76/4536
## KEGG_mmu04658 3.670334e-03 0.044654889      5/54 88/4536
##                                                  geneID
## KEGG_mmu05140 16451/20846/15980/15979/16452/17357/16412
## KEGG_mmu05235       16451/20846/15980/15979/16452/13645
## KEGG_mmu04612       12010/14964/21354/21355/19727/53970
## KEGG_mmu05340                   21354/21355/19727/53970
## KEGG_mmu05212             16451/20846/12575/13645/22059
## KEGG_mmu04658             16451/20846/15980/15979/16452
##                                                   geneName Count
## KEGG_mmu05140 Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Marcksl1/Itgb1     7
## KEGG_mmu05235            Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Egf     6
## KEGG_mmu04612              B2m/H2-D1/Tap1/Tap2/Rfxank/Rfx5     6
## KEGG_mmu05340                        Tap1/Tap2/Rfxank/Rfx5     4
## KEGG_mmu05212                  Jak1/Stat1/Cdkn1a/Egf/Trp53     5
## KEGG_mmu04658                Jak1/Stat1/Ifngr2/Ifngr1/Jak2     5
# ortRes2 = enrich.ORT(genelist[genelist< -1])
# head(ortRes2@result)

2.3 Gene set enrichment analysis

library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.GSE.
gseRes1 = EnrichAnalyzer(genelist, method = "GSEA", type = "Pathway", organism = "mmu")
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (8.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

2.4 Visualize enrichment results.

2.4.1 Barplot

require(ggplot2)
df = hgtRes1@result
df$logFDR = -log10(df$p.adjust)
p = BarView(df[1:5,], "Description", 'logFDR')
p = p + labs(x = NULL) + coord_flip()
p

# Or use function barplot from enrichplot package
barplot(hgtRes1, showCategory = 5)

2.4.2 Dot plot

## top: up-regulated pathways; 
## bottom: down-regulated pathways
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 1)

EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 2)

dotplot(hgtRes1, showCategory = 5)