## ----setup, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ---- out.width="190px", echo=FALSE---------------------------------------- knitr::include_graphics("diffcyt.png") ## ---- eval=FALSE----------------------------------------------------------- # # Install Bioconductor installer from CRAN # install.packages("BiocManager") # # # Install 'diffcyt' package from Bioconductor # BiocManager::install("diffcyt") ## ---- eval=FALSE----------------------------------------------------------- # BiocManager::install("HDCytoData") # BiocManager::install("CATALYST") ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) # Download and load 'Bodenmiller_BCR_XL' dataset in 'flowSet' format d_flowSet <- Bodenmiller_BCR_XL_flowSet() suppressPackageStartupMessages(library(flowCore)) # check data format d_flowSet # sample names pData(d_flowSet) # number of cells fsApply(d_flowSet, nrow) # number of columns dim(exprs(d_flowSet[[1]])) # expression values exprs(d_flowSet[[1]])[1:6, 1:6] ## ---- eval=FALSE----------------------------------------------------------- # # Alternatively: load data from '.fcs' files # files <- list.files( # path = "path/to/files", pattern = "\\.fcs$", full.names = TRUE # ) # d_flowSet <- read.flowSet( # files, transformation = FALSE, truncate_max_range = FALSE # ) ## -------------------------------------------------------------------------- # Meta-data: experiment information # check sample order filenames <- as.character(pData(d_flowSet)$name) # sample information sample_id <- gsub("^PBMC8_30min_", "", gsub("\\.fcs$", "", filenames)) group_id <- factor( gsub("^patient[0-9]+_", "", sample_id), levels = c("Reference", "BCR-XL") ) patient_id <- factor(gsub("_.*$", "", sample_id)) experiment_info <- data.frame( group_id, patient_id, sample_id, stringsAsFactors = FALSE ) experiment_info # Meta-data: marker information # source: Bruggner et al. (2014), Table 1 # column indices of all markers, lineage markers, and functional markers cols_markers <- c(3:4, 7:9, 11:19, 21:22, 24:26, 28:31, 33) cols_lineage <- c(3:4, 9, 11, 12, 14, 21, 29, 31, 33) cols_func <- setdiff(cols_markers, cols_lineage) # channel and marker names channel_name <- colnames(d_flowSet) marker_name <- gsub("\\(.*$", "", channel_name) # marker classes # note: using lineage markers for 'cell type', and functional markers for # 'cell state' marker_class <- rep("none", ncol(d_flowSet[[1]])) marker_class[cols_lineage] <- "type" marker_class[cols_func] <- "state" marker_class <- factor(marker_class, levels = c("type", "state", "none")) marker_info <- data.frame( channel_name, marker_name, marker_class, stringsAsFactors = FALSE ) marker_info ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(diffcyt)) # Create design matrix # note: selecting columns 1 and 2, which contain group IDs and patient IDs design <- createDesignMatrix(experiment_info, cols_design = 1:2) ## -------------------------------------------------------------------------- # Create contrast matrix contrast <- createContrast(c(0, 1, rep(0, 7))) # check dimensions nrow(contrast) == ncol(design) ## -------------------------------------------------------------------------- # Test for differential abundance (DA) of clusters # note: using default method 'diffcyt-DA-edgeR' and default parameters # note: include random seed for reproducible clustering out_DA <- diffcyt( d_input = d_flowSet, experiment_info = experiment_info, marker_info = marker_info, design = design, contrast = contrast, analysis_type = "DA", seed_clustering = 123 ) # display results for top DA clusters head(topClusters(out_DA$res)) # calculate number of significant detected DA clusters at 10% false discovery # rate (FDR) threshold <- 0.1 res_DA_all <- topClusters(out_DA$res, all = TRUE) table(res_DA_all$p_adj <= threshold) ## ---- fig.show='hide'------------------------------------------------------ # Test for differential states (DS) within clusters # note: using default method 'diffcyt-DS-limma' and default parameters # note: include random seed for reproducible clustering out_DS <- diffcyt( d_input = d_flowSet, experiment_info = experiment_info, marker_info = marker_info, design = design, contrast = contrast, analysis_type = "DS", seed_clustering = 123, plot = FALSE ) # display results for top DS cluster-marker combinations head(topClusters(out_DS$res)) # calculate number of significant detected DS cluster-marker combinations at # 10% false discovery rate (FDR) threshold <- 0.1 res_DS_all <- topClusters(out_DS$res, all = TRUE) table(res_DS_all$p_adj <= threshold) ## -------------------------------------------------------------------------- # Prepare data d_se <- prepareData(d_flowSet, experiment_info, marker_info) ## -------------------------------------------------------------------------- # Transform data d_se <- transformData(d_se) ## -------------------------------------------------------------------------- # Generate clusters # note: include random seed for reproducible clustering d_se <- generateClusters(d_se, seed_clustering = 123) ## -------------------------------------------------------------------------- # Calculate cluster cell counts d_counts <- calcCounts(d_se) # Calculate cluster medians d_medians <- calcMedians(d_se) ## -------------------------------------------------------------------------- # Test for differential abundance (DA) of clusters res_DA <- testDA_edgeR(d_counts, design, contrast) # display results for top DA clusters head(topClusters(res_DA)) # calculate number of significant detected DA clusters at 10% false discovery # rate (FDR) threshold <- 0.1 table(topClusters(res_DA, all = TRUE)$p_adj <= threshold) ## ---- fig.show='hide'------------------------------------------------------ # Test for differential states (DS) within clusters res_DS <- testDS_limma(d_counts, d_medians, design, contrast, plot = FALSE) # display results for top DS cluster-marker combinations head(topClusters(res_DS)) # calculate number of significant detected DS cluster-marker combinations at # 10% false discovery rate (FDR) threshold <- 0.1 table(topClusters(res_DS, all = TRUE)$p_adj <= threshold) ## -------------------------------------------------------------------------- # Heatmap for top detected DA clusters # note: use optional argument 'sample_order' to group samples by condition sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2)) plotHeatmap(out_DA, analysis_type = "DA", sample_order = sample_order) ## -------------------------------------------------------------------------- # Heatmap for top detected DS cluster-marker combinations # note: use optional argument 'sample_order' to group samples by condition sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2)) plotHeatmap(out_DS, analysis_type = "DS", sample_order = sample_order)