## ----knitr, echo=FALSE, results="hide"-----------------------------------
library("knitr")
opts_chunk$set(tidy=FALSE,tidy.opts=list(width.cutoff=30),dev="png",fig.show="hide",
               fig.width=4,fig.height=4.5,
               message=FALSE)

## ----style, eval=TRUE, echo=FALSE, results="asis"--------------------------
BiocStyle::latex()

## ----options, results="hide", echo=FALSE--------------------------------------
options(digits=3, width=80, prompt=" ", continue=" ")

## ----install_cellTree, eval=FALSE---------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("cellTree")

## ----install_missing_bioconductor_packages, eval=FALSE------------------------
#  BiocManager::install(c("HSMMSingleCell", "org.Hs.eg.db", "biomaRt"))

## ----init_sincell, cache=FALSE, eval=TRUE,warning=FALSE-----------------------
library(cellTree)

## ----load_hsmm_data, eval=TRUE------------------------------------------------
# load HSMMSingleCell package and load the data set:
library(HSMMSingleCell)
data(HSMM_expr_matrix)

# Total number of genes * cells:
dim(HSMM_expr_matrix)

## ----compute_lda_maptpx, eval=FALSE-------------------------------------------
#  # Run LDA inference using 'maptpx' method
#  # finding best number of topics k between 3 and 8:
#  lda.results = compute.lda(HSMM_expr_matrix, k.topics=3:8, method="maptpx")

## ----compute_lda_gibbs, eval=FALSE--------------------------------------------
#  # Run LDA inference using 'Gibbs' method for k = 6 topics:
#  lda.results = compute.lda(HSMM_expr_matrix, k.topics=6, method="Gibbs")

## ----compute_lda_with_hgnc, eval=FALSE----------------------------------------
#  HSMM_expr_matrix.hgnc = HSMM_expr_matrix
#  
#  library("biomaRt")
#  ensembl.ids = sapply(strsplit(rownames(HSMM_expr_matrix), split=".",fixed=TRUE),
#  					 "[",
#  					 1)
#  ensembl.mart = useMart(host="www.ensembl.org",
#  					   "ENSEMBL_MART_ENSEMBL",
#  					   dataset = "hsapiens_gene_ensembl")
#  gene.map = getBM(attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"),
#  				 filters = "ensembl_gene_id",
#  				 values = ensembl.ids,
#  				 mart = ensembl.mart)
#  idx = match(ensembl.ids, gene.map$ensembl_gene_id)
#  hgnc.ids = gene.map$hgnc_symbol[idx]
#  has.hgnc.ids = !is.na(hgnc.ids)&(hgnc.ids!="")
#  rownames(HSMM_expr_matrix.hgnc)[has.hgnc.ids] = hgnc.ids[has.hgnc.ids]
#  
#  HSMM_lda_model = compute.lda(HSMM_expr_matrix.hgnc, k.topics=6)

## ----load_lda_with_hgnc, eval=TRUE--------------------------------------------
# Load pre-computed LDA model for skeletal myoblast RNA-Seq data
# from HSMMSingleCell package:
data(HSMM_lda_model)

# Number of topics of fitted model:
print(HSMM_lda_model$K)

# Model uses HGCN gene names:
head(rownames(HSMM_lda_model$theta))

## ----pairwise_distances, eval=TRUE--------------------------------------------
# Compute pairwise distance between cells
# based on topic distributions in the fitted model:
dists = get.cell.dists(HSMM_lda_model)

print(dists[1:5,1:5])

## ----day_annotation, eval=TRUE------------------------------------------------
# Recover sampling time point for each cell:
library(HSMMSingleCell)
data(HSMM_sample_sheet)
days.factor = HSMM_sample_sheet$Hours
days = as.numeric(levels(days.factor))[days.factor]

# Our grouping annotation (in hours):
print(unique(days))

## ----mst_tree, eval=TRUE------------------------------------------------------
# compute MST from a fitted LDA model:
mst.tree = compute.backbone.tree(HSMM_lda_model, days, only.mst=TRUE)

## ----plot_mst_tree_with_topics, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# plot the tree (showing topic distribution for each cell):
mst.tree.with.layout = ct.plot.topics(mst.tree)

## ----plot_mst_tree_with_groups, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# plot the tree (showing time point for each cell):
mst.tree.with.layout = ct.plot.grouping(mst.tree)

## ----plot_btree_with_groups, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# compute backbone tree from a fitted LDA model:
b.tree = compute.backbone.tree(HSMM_lda_model, days)

# plot the tree (showing time label for each cell):
b.tree.with.layout = ct.plot.grouping(b.tree)

## ----plot_btree_with_groups_wider, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# compute backbone tree from a fitted LDA model:
b.tree = compute.backbone.tree(HSMM_lda_model, days, width.scale.factor=1.5)

# plot the tree (showing time label for each cell):
b.tree.with.layout = ct.plot.grouping(b.tree)

## ----plot_btree_with_topics_wider, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# plot the tree (showing topic distribution for each cell):
b.tree.with.layout = ct.plot.topics(b.tree)

## ----go_lib, eval=TRUE--------------------------------------------------------
# Load GO mappings for human:
library(org.Hs.eg.db)

## ----go_terms, eval=TRUE, results="hide"--------------------------------------
# Compute GO enrichment sets (using the Cellular Components category)
# for each topic
go.results = compute.go.enrichment(HSMM_lda_model,
                            org.Hs.eg.db, ontology.type="CC",
                            bonferroni.correct=TRUE, p.val.threshold=0.01)

## ----go_terms_print, eval=TRUE------------------------------------------------
# Print ranked table of significantly enriched terms for topic 1
# that do not appear in other topics:
go.results$unique[[1]]

## ----go_terms_dag_files, eval=FALSE-------------------------------------------
#  # Compute GO enrichment sets (using the Biological Process category)
#  # for each topic and saves DAG plots to files:
#  go.results.bp = compute.go.enrichment(HSMM_lda_model,
#                                  org.Hs.eg.db, ontology.type="BP",
#                                  bonferroni.correct=TRUE, p.val.threshold=0.01,
#                                  dag.file.prefix="hsmm_go_")

## ----plot_go_results, eval=TRUE, echo=TRUE, fig.show="asis", dpi=144, fig.width=5, fig.height=5, out.width="5in", out.height="5in"----
# plot GO sub-DAG for topics 1 to 3:
go.dag.subtree = ct.plot.go.dag(go.results,
                                up.generations = 2,
                                only.topics=c(1:3))

## ----cell_ordering_table, eval=TRUE-------------------------------------------
# Generate table summary of cells, ranked by tree position:
cell.table = cell.ordering.table(b.tree)

# Print first 5 cells:
cell.table[1:5,]

## ----cell_ordering_table_latex, eval=FALSE------------------------------------
#  # Generate table summary of cells, ranked by tree position:
#  cell.table = cell.ordering.table(b.tree,
#                                  write.to.tex.file="cell_summary.tex")

## ----session_info, eval=TRUE--------------------------------------------------
sessionInfo()