## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ---- out.width="190px", echo=FALSE-------------------------------------------
knitr::include_graphics("diffcyt.png")

## ---- eval=FALSE--------------------------------------------------------------
#  # Install Bioconductor installer from CRAN
#  install.packages("BiocManager")
#  
#  # Install 'diffcyt' package from Bioconductor
#  BiocManager::install("diffcyt")

## ---- eval=FALSE--------------------------------------------------------------
#  BiocManager::install("HDCytoData")
#  BiocManager::install("CATALYST")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(HDCytoData))

# Download and load 'Bodenmiller_BCR_XL' dataset in 'flowSet' format
d_flowSet <- Bodenmiller_BCR_XL_flowSet()

suppressPackageStartupMessages(library(flowCore))

# check data format
d_flowSet
# sample names
pData(d_flowSet)
# number of cells
fsApply(d_flowSet, nrow)
# dimensions
dim(exprs(d_flowSet[[1]]))
# expression values
exprs(d_flowSet[[1]])[1:6, 1:5]

## ---- eval=FALSE--------------------------------------------------------------
#  # Alternatively: load data from '.fcs' files
#  files <- list.files(
#    path = "path/to/files", pattern = "\\.fcs$", full.names = TRUE
#  )
#  d_flowSet <- read.flowSet(
#    files, transformation = FALSE, truncate_max_range = FALSE
#  )

## -----------------------------------------------------------------------------
# Meta-data: experiment information

# check sample order
filenames <- as.character(pData(d_flowSet)$name)

# sample information
sample_id <- gsub("^PBMC8_30min_", "", gsub("\\.fcs$", "", filenames))
group_id <- factor(
  gsub("^patient[0-9]+_", "", sample_id), levels = c("Reference", "BCR-XL")
)
patient_id <- factor(gsub("_.*$", "", sample_id))

experiment_info <- data.frame(
  group_id, patient_id, sample_id, stringsAsFactors = FALSE
)
experiment_info


# Meta-data: marker information

# source: Bruggner et al. (2014), Table 1

# column indices of all markers, lineage markers, and functional markers
cols_markers <- c(3:4, 7:9, 11:19, 21:22, 24:26, 28:31, 33)
cols_lineage <- c(3:4, 9, 11, 12, 14, 21, 29, 31, 33)
cols_func <- setdiff(cols_markers, cols_lineage)

# channel and marker names
channel_name <- colnames(d_flowSet)
marker_name <- gsub("\\(.*$", "", channel_name)

# marker classes
# note: using lineage markers for 'cell type', and functional markers for 
# 'cell state'
marker_class <- rep("none", ncol(d_flowSet[[1]]))
marker_class[cols_lineage] <- "type"
marker_class[cols_func] <- "state"
marker_class <- factor(marker_class, levels = c("type", "state", "none"))

marker_info <- data.frame(
  channel_name, marker_name, marker_class, stringsAsFactors = FALSE
)
marker_info

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(diffcyt))

# Create design matrix
# note: selecting columns containing group IDs and patient IDs (for an 
# unpaired dataset, only group IDs would be included)
design <- createDesignMatrix(
  experiment_info, cols_design = c("group_id", "patient_id")
)

## -----------------------------------------------------------------------------
# Create contrast matrix
contrast <- createContrast(c(0, 1, rep(0, 7)))

# check
nrow(contrast) == ncol(design)
data.frame(parameters = colnames(design), contrast)

## -----------------------------------------------------------------------------
# Test for differential abundance (DA) of clusters

# note: using default method 'diffcyt-DA-edgeR' and default parameters
# note: include random seed for reproducible clustering
out_DA <- diffcyt(
  d_input = d_flowSet, 
  experiment_info = experiment_info, 
  marker_info = marker_info, 
  design = design, 
  contrast = contrast, 
  analysis_type = "DA", 
  seed_clustering = 123
)

# display table of results for top DA clusters
topTable(out_DA, format_vals = TRUE)

# calculate number of significant detected DA clusters at 10% false discovery 
# rate (FDR)
threshold <- 0.1
res_DA_all <- topTable(out_DA, all = TRUE)
table(res_DA_all$p_adj <= threshold)

## ---- fig.show='hide'---------------------------------------------------------
# Test for differential states (DS) within clusters

# note: using default method 'diffcyt-DS-limma' and default parameters
# note: include random seed for reproducible clustering
out_DS <- diffcyt(
  d_input = d_flowSet, 
  experiment_info = experiment_info, 
  marker_info = marker_info, 
  design = design, 
  contrast = contrast, 
  analysis_type = "DS", 
  seed_clustering = 123, 
  plot = FALSE
)

# display table of results for top DS cluster-marker combinations
topTable(out_DS, format_vals = TRUE)

# calculate number of significant detected DS cluster-marker combinations at 
# 10% false discovery rate (FDR)
threshold <- 0.1
res_DS_all <- topTable(out_DS, all = TRUE)
table(res_DS_all$p_adj <= threshold)

## -----------------------------------------------------------------------------
# Prepare data
d_se <- prepareData(d_flowSet, experiment_info, marker_info)

## -----------------------------------------------------------------------------
# Transform data
d_se <- transformData(d_se)

## -----------------------------------------------------------------------------
# Generate clusters
# note: include random seed for reproducible clustering
d_se <- generateClusters(d_se, seed_clustering = 123)

## -----------------------------------------------------------------------------
# Calculate cluster cell counts
d_counts <- calcCounts(d_se)

# Calculate cluster medians
d_medians <- calcMedians(d_se)

## -----------------------------------------------------------------------------
# Test for differential abundance (DA) of clusters
res_DA <- testDA_edgeR(d_counts, design, contrast)

# display table of results for top DA clusters
topTable(res_DA, format_vals = TRUE)

# calculate number of significant detected DA clusters at 10% false discovery 
# rate (FDR)
threshold <- 0.1
table(topTable(res_DA, all = TRUE)$p_adj <= threshold)

## ---- fig.show='hide'---------------------------------------------------------
# Test for differential states (DS) within clusters
res_DS <- testDS_limma(d_counts, d_medians, design, contrast, plot = FALSE)

# display table of results for top DS cluster-marker combinations
topTable(res_DS, format_vals = TRUE)

# calculate number of significant detected DS cluster-marker combinations at 
# 10% false discovery rate (FDR)
threshold <- 0.1
table(topTable(res_DS, all = TRUE)$p_adj <= threshold)

## -----------------------------------------------------------------------------
# Output object from 'diffcyt()' wrapper function
names(out_DA)
dim(out_DA$d_se)
rowData(out_DA$d_se)
str(assay(out_DA$d_se))
head(assay(out_DA$d_se), 2)

# Extract cell-level table for export as .fcs file
# note: including group IDs, patient IDs, sample IDs, and cluster labels for 
# each cell
# note: table must be a numeric matrix (to save as .fcs file)
d_fcs <- assay(out_DA$d_se)
class(d_fcs)

## ---- eval=FALSE--------------------------------------------------------------
#  # Save as .fcs file
#  filename_fcs <- "exported_data.fcs"
#  write.FCS(
#    flowFrame(d_fcs), filename = filename_fcs
#  )
#  
#  # Alternatively, save as tab-delimited .txt file
#  filename_txt <- "exported_data.txt"
#  write.table(
#    d_fcs, file = filename_txt, quote = FALSE, sep = "\t", row.names = FALSE
#  )

## -----------------------------------------------------------------------------
# Heatmap for top detected DA clusters

# note: use optional argument 'sample_order' to group samples by condition
sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2))

plotHeatmap(out_DA, analysis_type = "DA", sample_order = sample_order)

## ---- eval=FALSE--------------------------------------------------------------
#  # Heatmap for top detected DA clusters (alternative code using 'CATALYST')
#  
#  suppressPackageStartupMessages(library(CATALYST))
#  
#  plotDiffHeatmap(out_DA$d_se, out_DA$res)

## -----------------------------------------------------------------------------
# Heatmap for top detected DS cluster-marker combinations

# note: use optional argument 'sample_order' to group samples by condition
sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2))

plotHeatmap(out_DS, analysis_type = "DS", sample_order = sample_order)

## ---- eval=FALSE--------------------------------------------------------------
#  # Heatmap for top detected DA clusters (alternative code using 'CATALYST')
#  
#  suppressPackageStartupMessages(library(CATALYST))
#  
#  plotDiffHeatmap(out_DS$d_se, out_DS$res)