## ---- eval=FALSE-------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("progeny") # # ## To install the new version until it is submited to Bioconductor use: # devtools::install_github("saezlab/progeny") ## ---- message=FALSE----------------------------------------------------------- library(progeny) library(dplyr) library(Seurat) library(ggplot2) library(tidyr) library(readr) library(pheatmap) library(tibble) ## ---- eval=FALSE-------------------------------------------------------------- # ## Load the PBMC dataset # pbmc.data <- # Read10X(data.dir = "../data/pbmc3k/filtered_gene_bc_matrices/hg19/") # ## Initialize the Seurat object with the raw (non-normalized data). # pbmc <- # CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, # min.features = 200) ## ---- eval=TRUE , include=FALSE----------------------------------------------- pbmc <- readRDS(url("https://github.com/alberto-valdeolivas/ProgenyVignette/raw/master/SeuratObject.rds")) ## ---- message=FALSE----------------------------------------------------------- ## Identification of mithocondrial genes pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") ## Filtering cells following standard QC criteria. pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) ## Normalizing the data pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000) pbmc <- NormalizeData(pbmc) ## Identify the 2000 most highly variable genes pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) ## In addition we scale the data all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes) ## ---- message=FALSE, warning=FALSE-------------------------------------------- pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), verbose = FALSE) pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE) pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE) pbmc <- RunUMAP(pbmc, dims = 1:10, umap.method = 'uwot', metric='cosine') ## ----------------------------------------------------------------------------- DimPlot(pbmc, reduction = "umap") ## ---- message = FALSE--------------------------------------------------------- ## Finding differentially expressed features (cluster biomarkers) pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = FALSE) ## Assigning cell type identity to clusters new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") names(new.cluster.ids) <- levels(pbmc) pbmc <- RenameIdents(pbmc, new.cluster.ids) ## We create a data frame with the specification of the cells that belong to ## each cluster to match with the Progeny scores. CellsClusters <- data.frame(Cell = names(Idents(pbmc)), CellType = as.character(Idents(pbmc)), stringsAsFactors = FALSE) ## ---- message = FALSE, warning = FALSE---------------------------------------- DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() ## ---- message = FALSE--------------------------------------------------------- ## We compute the Progeny activity scores and add them to our Seurat object ## as a new assay called Progeny. pbmc <- progeny(pbmc, scale=FALSE, organism="Human", top=500, perm=1, return_assay = TRUE) ## We can now directly apply Seurat functions in our Progeny scores. ## For instance, we scale the pathway activity scores. pbmc <- Seurat::ScaleData(pbmc, assay = "progeny") ## We transform Progeny scores into a data frame to better handling the results progeny_scores_df <- as.data.frame(t(GetAssayData(pbmc, slot = "scale.data", assay = "progeny"))) %>% rownames_to_column("Cell") %>% gather(Pathway, Activity, -Cell) ## We match Progeny scores with the cell clusters. progeny_scores_df <- inner_join(progeny_scores_df, CellsClusters) ## We summarize the Progeny scores by cellpopulation summarized_progeny_scores <- progeny_scores_df %>% group_by(Pathway, CellType) %>% summarise(avg = mean(Activity), std = sd(Activity)) ## ---- message=FALSE----------------------------------------------------------- ## We prepare the data for the plot summarized_progeny_scores_df <- summarized_progeny_scores %>% dplyr::select(-std) %>% spread(Pathway, avg) %>% data.frame(row.names = 1, check.names = FALSE, stringsAsFactors = FALSE) ## ----------------------------------------------------------------------------- paletteLength = 100 myColor = colorRampPalette(c("Darkblue", "white","red"))(paletteLength) progenyBreaks = c(seq(min(summarized_progeny_scores_df), 0, length.out=ceiling(paletteLength/2) + 1), seq(max(summarized_progeny_scores_df)/paletteLength, max(summarized_progeny_scores_df), length.out=floor(paletteLength/2))) progeny_hmap = pheatmap(t(summarized_progeny_scores_df[,-1]),fontsize=14, fontsize_row = 10, color=myColor, breaks = progenyBreaks, main = "PROGENy (500)", angle_col = 45, treeheight_col = 0, border_color = NA) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()