## ---- eval=FALSE--------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("progeny")
#  
#  ## To install the new version until it is submited to Bioconductor use:
#  devtools::install_github("saezlab/progeny")

## ---- message=FALSE-----------------------------------------------------------
library(progeny)
library(dplyr)
library(Seurat)
library(ggplot2)
library(tidyr)
library(readr)
library(pheatmap)
library(tibble)

## ---- eval=FALSE--------------------------------------------------------------
#  ## Load the PBMC dataset
#  pbmc.data <-
#      Read10X(data.dir = "../data/pbmc3k/filtered_gene_bc_matrices/hg19/")
#  ## Initialize the Seurat object with the raw (non-normalized data).
#  pbmc <-
#      CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3,
#      min.features = 200)

## ---- eval=TRUE , include=FALSE-----------------------------------------------
pbmc <- 
    readRDS(url("https://github.com/alberto-valdeolivas/ProgenyVignette/raw/master/SeuratObject.rds"))

## ---- message=FALSE-----------------------------------------------------------
## Identification of mithocondrial genes
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")

## Filtering cells following standard QC criteria.
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & 
    percent.mt < 5)

## Normalizing the data
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", 
    scale.factor = 10000)

pbmc <- NormalizeData(pbmc)

## Identify the 2000 most highly variable genes
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)

## In addition we scale the data
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)

## ---- message=FALSE, warning=FALSE--------------------------------------------
pbmc <- 
    RunPCA(pbmc, features = VariableFeatures(object = pbmc), verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE)
pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:10,  umap.method = 'uwot', metric='cosine')

## -----------------------------------------------------------------------------
DimPlot(pbmc, reduction = "umap")

## ---- message = FALSE---------------------------------------------------------
## Finding differentially expressed features (cluster biomarkers)
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, 
    logfc.threshold = 0.25, verbose = FALSE)

## Assigning cell type identity to clusters
new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", 
    "FCGR3A+ Mono", "NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)

## We create a data frame with the specification of the cells that belong to 
## each cluster to match with the Progeny scores.
CellsClusters <- data.frame(Cell = names(Idents(pbmc)), 
    CellType = as.character(Idents(pbmc)),
    stringsAsFactors = FALSE)

## ---- message = FALSE, warning = FALSE----------------------------------------
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

## ---- message = FALSE---------------------------------------------------------
## We compute the Progeny activity scores and add them to our Seurat object
## as a new assay called Progeny. 
pbmc <- progeny(pbmc, scale=FALSE, organism="Human", top=500, perm=1, 
    return_assay = TRUE)

## We can now directly apply Seurat functions in our Progeny scores. 
## For instance, we scale the pathway activity scores. 
pbmc <- Seurat::ScaleData(pbmc, assay = "progeny") 

## We transform Progeny scores into a data frame to better handling the results
progeny_scores_df <- 
    as.data.frame(t(GetAssayData(pbmc, slot = "scale.data", 
        assay = "progeny"))) %>%
    rownames_to_column("Cell") %>%
    gather(Pathway, Activity, -Cell) 

## We match Progeny scores with the cell clusters.
progeny_scores_df <- inner_join(progeny_scores_df, CellsClusters)

## We summarize the Progeny scores by cellpopulation
summarized_progeny_scores <- progeny_scores_df %>% 
    group_by(Pathway, CellType) %>%
    summarise(avg = mean(Activity), std = sd(Activity))

## ---- message=FALSE-----------------------------------------------------------
## We prepare the data for the plot
summarized_progeny_scores_df <- summarized_progeny_scores %>%
    dplyr::select(-std) %>%   
    spread(Pathway, avg) %>%
    data.frame(row.names = 1, check.names = FALSE, stringsAsFactors = FALSE) 

## -----------------------------------------------------------------------------
paletteLength = 100
myColor = colorRampPalette(c("Darkblue", "white","red"))(paletteLength)

progenyBreaks = c(seq(min(summarized_progeny_scores_df), 0, 
                      length.out=ceiling(paletteLength/2) + 1),
                  seq(max(summarized_progeny_scores_df)/paletteLength, 
                      max(summarized_progeny_scores_df), 
                      length.out=floor(paletteLength/2)))
progeny_hmap = pheatmap(t(summarized_progeny_scores_df[,-1]),fontsize=14, 
                        fontsize_row = 10, 
                        color=myColor, breaks = progenyBreaks, 
                        main = "PROGENy (500)", angle_col = 45,
                        treeheight_col = 0,  border_color = NA)

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()