## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) suppressPackageStartupMessages(library(SummarizedExperiment)) suppressPackageStartupMessages(library(Rtsne)) suppressPackageStartupMessages(library(umap)) suppressPackageStartupMessages(library(ggplot2)) ## ----------------------------------------------------------------------------- # --------- # Load data # --------- d_SE <- Levine_32dim_SE() ## ----------------------------------------------------------------------------- # ------------- # Preprocessing # ------------- # select 'cell type' marker columns for defining clusters d_sub <- assay(d_SE[, colData(d_SE)$marker_class == "type"]) # extract cell population labels population <- rowData(d_SE)$population_id dim(d_sub) stopifnot(nrow(d_sub) == length(population)) # transform data using asinh with cofactor 5 cofactor <- 5 d_sub <- asinh(d_sub / cofactor) summary(d_sub) # subsample cells for faster runtimes in vignette n <- 2000 set.seed(123) ix <- sample(seq_len(nrow(d_sub)), n) d_sub <- d_sub[ix, ] population <- population[ix] dim(d_sub) stopifnot(nrow(d_sub) == length(population)) # remove any near-duplicate rows (required by Rtsne) dups <- duplicated(d_sub) d_sub <- d_sub[!dups, ] population <- population[!dups] dim(d_sub) stopifnot(nrow(d_sub) == length(population)) ## ---- fig.width = 6----------------------------------------------------------- # ------------------------ # Dimension reduction: PCA # ------------------------ n_dims <- 2 # run PCA # (note: no scaling, since asinh-transformed dimensions are already comparable) out_PCA <- prcomp(d_sub, center = TRUE, scale. = FALSE) dims_PCA <- out_PCA$x[, seq_len(n_dims)] colnames(dims_PCA) <- c("PC_1", "PC_2") head(dims_PCA) stopifnot(nrow(dims_PCA) == length(population)) colnames(dims_PCA) <- c("dimension_x", "dimension_y") dims_PCA <- cbind(as.data.frame(dims_PCA), population, type = "PCA") head(dims_PCA) str(dims_PCA) # generate plot d_plot <- dims_PCA str(d_plot) colors <- c(rainbow(14), "gray75") ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.ratio = 1, legend.key.height = unit(4, "mm")) ## ---- fig.width = 6----------------------------------------------------------- # ------------------------- # Dimension reduction: tSNE # ------------------------- # run Rtsne set.seed(123) out_Rtsne <- Rtsne(as.matrix(d_sub), dims = n_dims) dims_Rtsne <- out_Rtsne$Y colnames(dims_Rtsne) <- c("tSNE_1", "tSNE_2") head(dims_Rtsne) stopifnot(nrow(dims_Rtsne) == length(population)) colnames(dims_Rtsne) <- c("dimension_x", "dimension_y") dims_Rtsne <- cbind(as.data.frame(dims_Rtsne), population, type = "tSNE") head(dims_Rtsne) str(dims_Rtsne) # generate plot d_plot <- dims_Rtsne ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.ratio = 1, legend.key.height = unit(4, "mm")) ## ---- fig.width = 6----------------------------------------------------------- # ------------------------- # Dimension reduction: UMAP # ------------------------- # run umap set.seed(123) out_umap <- umap(d_sub) dims_umap <- out_umap$layout colnames(dims_umap) <- c("UMAP_1", "UMAP_2") head(dims_umap) stopifnot(nrow(dims_umap) == length(population)) colnames(dims_umap) <- c("dimension_x", "dimension_y") dims_umap <- cbind(as.data.frame(dims_umap), population, type = "UMAP") head(dims_umap) str(dims_umap) # generate plot d_plot <- dims_umap ggplot(d_plot, aes(x = dimension_x, y = dimension_y, color = population)) + facet_wrap(~ type, scales = "free") + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + labs(x = "dimension x", y = "dimension y") + theme_bw() + theme(aspect.ratio = 1, legend.key.height = unit(4, "mm")) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) suppressPackageStartupMessages(library(FlowSOM)) suppressPackageStartupMessages(library(flowCore)) suppressPackageStartupMessages(library(mclust)) suppressPackageStartupMessages(library(umap)) suppressPackageStartupMessages(library(ggplot2)) ## ----------------------------------------------------------------------------- # --------- # Load data # --------- d_SE <- Samusik_01_SE() dim(d_SE) ## ----------------------------------------------------------------------------- # ------------- # Preprocessing # ------------- # select 'cell type' marker columns for defining clusters d_sub <- assay(d_SE[, colData(d_SE)$marker_class == "type"]) # extract cell population labels population <- rowData(d_SE)$population_id dim(d_sub) stopifnot(nrow(d_sub) == length(population)) # transform data using asinh with cofactor 5 cofactor <- 5 d_sub <- asinh(d_sub / cofactor) # create flowFrame object (required input format for FlowSOM) d_FlowSOM <- flowFrame(d_sub) ## ----------------------------------------------------------------------------- # ----------- # Run FlowSOM # ----------- # set seed for reproducibility set.seed(123) # run FlowSOM (initial steps prior to meta-clustering) out <- ReadInput(d_FlowSOM, transform = FALSE, scale = FALSE) out <- BuildSOM(out) out <- BuildMST(out) # optional FlowSOM visualization #PlotStars(out) # extract cluster labels (pre meta-clustering) from output object labels_pre <- out$map$mapping[, 1] # specify final number of clusters for meta-clustering k <- 40 # run meta-clustering seed <- 123 out <- metaClustering_consensus(out$map$codes, k = k, seed = seed) # extract cluster labels from output object labels <- out[labels_pre] # summary of cluster sizes and number of clusters table(labels) length(table(labels)) ## ----------------------------------------------------------------------------- # ------------------------------- # Evaluate clustering performance # ------------------------------- # calculate adjusted Rand index # note: this calculation weights all cells equally, which may not be # appropriate for some datasets (see above) stopifnot(nrow(d_sub) == length(labels)) stopifnot(length(population) == length(labels)) # remove "unassigned" cells from cluster evaluation (but note these were # included for clustering) ix_unassigned <- population == "unassigned" d_sub_eval <- d_sub[!ix_unassigned, ] population_eval <- population[!ix_unassigned] labels_eval <- labels[!ix_unassigned] stopifnot(nrow(d_sub_eval) == length(labels_eval)) stopifnot(length(population_eval) == length(labels_eval)) # calculate adjusted Rand index adjustedRandIndex(population_eval, labels_eval) ## ---- fig.width = 7----------------------------------------------------------- # ------------ # Plot results # ------------ # subsample cells for faster runtimes in vignette n <- 4000 set.seed(1004) ix <- sample(seq_len(nrow(d_sub)), n) d_sub <- d_sub[ix, ] population <- population[ix] labels <- labels[ix] dim(d_sub) stopifnot(nrow(d_sub) == length(population)) stopifnot(nrow(population) == length(labels)) # run umap set.seed(1234) out_umap <- umap(d_sub) dims_umap <- out_umap$layout colnames(dims_umap) <- c("UMAP_1", "UMAP_2") stopifnot(nrow(dims_umap) == length(population)) stopifnot(nrow(population) == length(labels)) d_plot <- cbind( as.data.frame(dims_umap), population, labels = as.factor(labels), type = "UMAP" ) # generate plots colors <- c(rainbow(24), "gray75") ggplot(d_plot, aes(x = UMAP_1, y = UMAP_2, color = population)) + geom_point(size = 0.7, alpha = 0.5) + scale_color_manual(values = colors) + ggtitle("Ground truth population labels") + theme_bw() + theme(aspect.ratio = 1, legend.key.height = unit(4, "mm")) ggplot(d_plot, aes(x = UMAP_1, y = UMAP_2, color = labels)) + geom_point(size = 0.7, alpha = 0.5) + ggtitle("FlowSOM cluster labels") + theme_bw() + theme(aspect.ratio = 1, legend.key.height = unit(4, "mm")) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(diffcyt)) suppressPackageStartupMessages(library(SummarizedExperiment)) ## ----------------------------------------------------------------------------- # --------- # Load data # --------- d_SE <- Weber_AML_sim_main_5pc_SE() ## ----------------------------------------------------------------------------- # --------------- # Set up metadata # --------------- # set column names colnames(d_SE) <- colData(d_SE)$marker_name # split input data into one matrix per sample d_input <- split(as.data.frame(assay(d_SE)), rowData(d_SE)$sample_id) # extract sample information experiment_info <- metadata(d_SE)$experiment_info experiment_info # extract marker information marker_info <- colData(d_SE) marker_info ## ----------------------------------------------------------------------------- # ----------------------------------- # Differential abundance (DA) testing # ----------------------------------- # create design matrix design <- createDesignMatrix( experiment_info, cols_design = c("group_id", "patient_id") ) design # create contrast matrix # note: testing condition CN vs. healthy contrast <- createContrast(c(0, 1, 0, 0, 0, 0, 0)) contrast # test for differential abundance (DA) of clusters out_DA <- diffcyt( d_input, experiment_info, marker_info, design = design, contrast = contrast, analysis_type = "DA", seed_clustering = 1234 ) # display results for top DA clusters topTable(out_DA, format_vals = TRUE) ## ----------------------------------------------------------------------------- # --------- # Load data # --------- d_SE <- Weber_BCR_XL_sim_main_SE() ## ----------------------------------------------------------------------------- # --------------- # Set up metadata # --------------- # set column names colnames(d_SE) <- colData(d_SE)$marker_name # split input data into one matrix per sample d_input <- split(as.data.frame(assay(d_SE)), rowData(d_SE)$sample_id) # extract sample information experiment_info <- metadata(d_SE)$experiment_info experiment_info # extract marker information marker_info <- colData(d_SE) marker_info ## ----------------------------------------------------------------------------- # ------------------------------- # Differential state (DS) testing # ------------------------------- # create design matrix design <- createDesignMatrix( experiment_info, cols_design = c("group_id", "patient_id") ) design # create contrast matrix # note: testing condition spike vs. base contrast <- createContrast(c(0, 1, 0, 0, 0, 0, 0, 0, 0)) contrast # test for differential abundance (DA) of clusters out_DS <- diffcyt( d_input, experiment_info, marker_info, design = design, contrast = contrast, analysis_type = "DS", seed_clustering = 1234 ) # display results for top DA clusters topTable(out_DS, format_vals = TRUE)