## ----setup, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) # Load 'SummarizedExperiment' object using named function Bodenmiller_BCR_XL_SE() # Load 'flowSet' object using named function Bodenmiller_BCR_XL_flowSet() ## -------------------------------------------------------------------------- # Create an ExperimentHub instance ehub <- ExperimentHub() # Query ExperimentHub instance to find data sets query(ehub, "HDCytoData") # Load 'SummarizedExperiment' object using index of data set ehub[["EH1119"]] # Load 'flowSet' object using index of data set ehub[["EH1120"]] ## -------------------------------------------------------------------------- # Load data set in 'SummarizedExperiment' format d_SE <- Bodenmiller_BCR_XL_SE() # Inspect the object d_SE assay(d_SE)[1:6, 1:6] rowData(d_SE) colData(d_SE) ## -------------------------------------------------------------------------- # Apply transformation to marker columns cofactor <- 5 cols <- colData(d_SE)$marker_class != "none" assay(d_SE)[, cols] <- asinh(assay(d_SE)[, cols] / cofactor) summary(assay(d_SE)[, 1:10]) ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(FlowSOM)) suppressPackageStartupMessages(library(Rtsne)) suppressPackageStartupMessages(library(ggplot2)) # -------------- # Run clustering # -------------- d_FlowSOM <- as.matrix(assay(d_SE)[, colData(d_SE)$marker_class == "type"]) d_FlowSOM <- flowFrame(d_FlowSOM) set.seed(123) out <- ReadInput(d_FlowSOM, transform = FALSE, scale = FALSE) out <- BuildSOM(out, colsToUse = NULL) labels_pre <- out$map$mapping[, 1] # number of meta-clusters k <- 20 out <- metaClustering_consensus(out$map$codes, k = k, seed = 123) labels <- out[labels_pre] # check meta-cluster labels table(labels) # ------------------------------ # t-SNE plot with cluster labels # ------------------------------ d_Rtsne <- as.matrix(assay(d_SE)[, colData(d_SE)$marker_class == "type"]) colnames(d_Rtsne) <- colData(d_SE)$marker_name[colData(d_SE)$marker_class == "type"] # subsampling n_sub <- 1000 set.seed(123) ix <- sample(seq_along(labels), n_sub) d_Rtsne <- d_Rtsne[ix, ] labels <- labels[ix] # remove any duplicate rows (required by Rtsne) dups <- duplicated(d_Rtsne) d_Rtnse <- d_Rtsne[!dups, ] labels <- labels[!dups] # run Rtsne set.seed(123) out_Rtsne <- Rtsne(d_Rtsne, pca = FALSE, verbose = TRUE) d_plot <- as.data.frame(out_Rtsne$Y) colnames(d_plot) <- c("tSNE_1", "tSNE_2") d_plot$cluster <- as.factor(labels) # create plot ggplot(d_plot, aes(x = tSNE_1, y = tSNE_2, color = cluster)) + geom_point() + ggtitle("t-SNE plot: clustering") + xlab("t-SNE dimension 1") + ylab("t-SNE dimension 2") + theme_classic() # ------------------------------------------- # t-SNE plot with reference population labels # ------------------------------------------- d_plot$population <- rowData(d_SE)$population_id[ix][!dups] # create plot ggplot(d_plot, aes(x = tSNE_1, y = tSNE_2, color = population)) + geom_point() + ggtitle("t-SNE plot: reference populations") + xlab("t-SNE dimension 1") + ylab("t-SNE dimension 2") + theme_classic()