## ----setup, include=FALSE-----------------------------------------------------
library(BiocStyle)
knitr::opts_chunk$set(
  echo = TRUE,
  message = FALSE,
  tidy = FALSE,
  warning = FALSE,
  fig.width = 7,
  fig.height = 5
)

## ----eval=FALSE---------------------------------------------------------------
# BiocManager::install("immGLIPH")

## ----eval=FALSE---------------------------------------------------------------
# BiocManager::install("BiocFileCache")
# ref <- getGLIPHreference()

## -----------------------------------------------------------------------------
library(immGLIPH)

## -----------------------------------------------------------------------------
data("gliph_input_data")
head(gliph_input_data)
dim(gliph_input_data)

## -----------------------------------------------------------------------------
library(scRepertoire)
data("contig_list")

# After processing with cellranger/etc, combine contigs
combined <- combineTCR(contig_list[seq_len(2)],
                       samples = c("P1", "P2"))

# Take a small slice so the example runs quickly. In real use, pass
# all samples and rely on the bundled reference downloaded by
# getGLIPHreference().
combined_small <- lapply(combined, function(x) x[seq_len(50), ])

# Use a small custom reference built from the bundled example data
small_ref <- gliph_input_data[, c("CDR3b", "TRBV")]

# Pass scRepertoire output directly to runGLIPH
results_sc <- runGLIPH(combined_small,
                       method     = "gliph2",
                       refdb_beta = small_ref,
                       sim_depth  = 100,
                       n_cores    = 1)

## -----------------------------------------------------------------------------
library(SingleCellExperiment)
data("gliph_sce")

# SingleCellExperiment object with TCR info in colData
results_sce <- runGLIPH(gliph_sce,
                        method     = "gliph2",
                        chains     = "TRB",
                        refdb_beta = small_ref,
                        sim_depth  = 100,
                        n_cores    = 1)

## -----------------------------------------------------------------------------
data("gliph_input_data")

# Use the input data itself as a small reference for demonstration
ref_df <- gliph_input_data[, c("CDR3b", "TRBV")]

res_gliph2 <- runGLIPH(
  cdr3_sequences = gliph_input_data[seq_len(200), ],
  method         = "gliph2",
  refdb_beta     = ref_df,
  sim_depth      = 100,
  n_cores        = 1
)

## -----------------------------------------------------------------------------
res_gliph1 <- runGLIPH(
  cdr3_sequences = gliph_input_data[seq_len(200), ],
  method         = "gliph1",
  refdb_beta     = ref_df,
  sim_depth      = 100,
  n_cores        = 1
)

## -----------------------------------------------------------------------------
names(res_gliph2)

## -----------------------------------------------------------------------------
head(res_gliph2$cluster_properties)

## -----------------------------------------------------------------------------
# Number of convergence groups
length(res_gliph2$cluster_list)

# Members of the first cluster (if any found)
if (length(res_gliph2$cluster_list) > 0) {
  head(res_gliph2$cluster_list[[1]])
}

## -----------------------------------------------------------------------------
# Significantly enriched motifs
if (!is.null(res_gliph2$motif_enrichment$selected_motifs)) {
  head(res_gliph2$motif_enrichment$selected_motifs)
}

## -----------------------------------------------------------------------------
if (!is.null(res_gliph2$connections)) {
  head(res_gliph2$connections)
}

## -----------------------------------------------------------------------------
res_custom <- runGLIPH(
  cdr3_sequences  = gliph_input_data[seq_len(200), ],
  method          = "custom",
  refdb_beta      = ref_df,
  local_method    = "fisher",    # or "rrs"
  global_method   = "cutoff",    # or "fisher"
  clustering_method = "GLIPH1.0", # or "GLIPH2.0"
  scoring_method  = "GLIPH2.0",  # or "GLIPH1.0"
  sim_depth       = 100,
  n_cores         = 1
)

## -----------------------------------------------------------------------------
res_strict <- runGLIPH(
  cdr3_sequences = gliph_input_data[seq_len(200), ],
  method         = "gliph2",
  refdb_beta     = ref_df,
  lcminp         = 0.001,            # Stricter p-value
  lcminove       = c(10000, 1000, 100), # Higher fold-change
  sim_depth      = 100,
  n_cores        = 1
)

## -----------------------------------------------------------------------------
custom_ref <- gliph_input_data[, c("CDR3b", "TRBV")]
head(custom_ref, 3)

res <- runGLIPH(
  cdr3_sequences = gliph_input_data[seq_len(100), ],
  refdb_beta     = custom_ref,
  method         = "gliph2",
  sim_depth      = 100,
  n_cores        = 1
)

## -----------------------------------------------------------------------------
data("gliph_input_data")
sample_seqs <- as.character(gliph_input_data$CDR3b[seq_len(100)])

# Find all 3-mers appearing at least 5 times
motifs <- findMotifs(seqs = sample_seqs, 
                     q = 3, 
                     kmer_mindepth = 5)
head(motifs[order(motifs$V1, decreasing = TRUE), ])

## -----------------------------------------------------------------------------
disc_motifs <- findMotifs(seqs          = sample_seqs,
                          q             = 2,
                          kmer_mindepth = 5,
                          discontinuous = TRUE
)
# Show discontinuous motifs (those containing a dot)
disc_only <- disc_motifs[grep("\\.", disc_motifs$motif), ]
head(disc_only[order(disc_only$V1, decreasing = TRUE), ])

## -----------------------------------------------------------------------------
# Re-score with GLIPH2 formula
if (length(res_gliph1$cluster_list) > 0) {
  rescored <- clusterScoring(
    cluster_list   = res_gliph1$cluster_list,
    cdr3_sequences = gliph_input_data[seq_len(200), ],
    refdb_beta     = ref_df,
    gliph_version  = 2,
    sim_depth      = 100,
    n_cores        = 1)
  head(rescored)
}

## -----------------------------------------------------------------------------
# Generate de novo TCRs for the first convergence group (if any found)
if (length(res_gliph1$cluster_list) > 0) {
  de_novo <- deNovoTCRs(
    convergence_group_tag = names(res_gliph1$cluster_list)[1],
    clustering_output     = res_gliph1,
    refdb_beta            = ref_df,
    sims                  = 10000,
    num_tops              = 100,
    make_figure           = FALSE,
    n_cores               = 1
  )

  # Top predicted sequences
  head(de_novo$de_novo_sequences)

  # Positional weight matrix used for generation
  head(de_novo$PWM_Scoring)
}

## -----------------------------------------------------------------------------
if (!is.null(res_gliph1$cluster_properties) &&
    nrow(res_gliph1$cluster_properties) > 0) {
  plotNetwork(
    clustering_output = res_gliph1,
    color_info        = "total.score",
    cluster_min_size  = 2,
    n_cores           = 1
  )
}

## -----------------------------------------------------------------------------
out_dir <- file.path(tempdir(), "gliph_results")

res_saved <- runGLIPH(
  cdr3_sequences = gliph_input_data[seq_len(200), ],
  method         = "gliph2",
  refdb_beta     = ref_df,
  result_folder  = out_dir,
  sim_depth      = 100,
  n_cores        = 1
)

reloaded <- loadGLIPH(result_folder = out_dir)
names(reloaded)

## ----eval=FALSE---------------------------------------------------------------
# # Install immApex for performance acceleration
# BiocManager::install("BorchLab/immApex")

## -----------------------------------------------------------------------------
sessionInfo()

