## ----setup, include = FALSE---------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----getPackage, eval=FALSE---------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("cicero") ## ---- eval = FALSE------------------------------------------------------- # BiocManager::install(cole-trapnell-lab/cicero) ## ----Load, message=FALSE------------------------------------------------- library(cicero) ## ------------------------------------------------------------------------ data(cicero_data) ## ---- eval=TRUE---------------------------------------------------------- input_cds <- make_atac_cds(cicero_data, binarize = TRUE) ## ---- eval=TRUE---------------------------------------------------------- set.seed(2017) input_cds <- detectGenes(input_cds) input_cds <- estimateSizeFactors(input_cds) input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6, reduction_method = 'tSNE', norm_method = "none") ## ---- eval=TRUE---------------------------------------------------------- tsne_coords <- t(reducedDimA(input_cds)) row.names(tsne_coords) <- row.names(pData(input_cds)) cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords) ## ---- eval=TRUE---------------------------------------------------------- data("human.hg19.genome") sample_genome <- subset(human.hg19.genome, V1 == "chr18") sample_genome$V2[1] <- 10000000 conns <- run_cicero(cicero_cds, sample_genome, sample_num = 2) # Takes a few minutes to run head(conns) ## ---- fig.width = 7, fig.height = 4, fig.align='center', eval=TRUE------- data(gene_annotation_sample) plot_connections(conns, "chr18", 8575097, 8839855, gene_model = gene_annotation_sample, coaccess_cutoff = .25, connection_width = .5, collapseTranscripts = "longest" ) ## ---- eval=TRUE---------------------------------------------------------- chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200", "chr18_49500_49600"), Peak2 = c("chr18_10600_10700", "chr18_111700_111800", "chr18_10600_10700")) conns$in_chia <- compare_connections(conns, chia_conns) ## ---- eval=TRUE---------------------------------------------------------- conns$in_chia_100 <- compare_connections(conns, chia_conns, maxgap=100) head(conns) ## ---- eval=TRUE---------------------------------------------------------- # Add a column of 1s called "coaccess" chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200", "chr18_49500_49600"), Peak2 = c("chr18_10600_10700", "chr18_111700_111800", "chr18_10600_10700"), coaccess = c(1, 1, 1)) plot_connections(conns, "chr18", 10000, 112367, gene_model = gene_annotation_sample, coaccess_cutoff = 0, connection_width = .5, comparison_track = chia_conns, include_axis_track = FALSE, collapseTranscripts = "longest") ## ---- eval=TRUE---------------------------------------------------------- CCAN_assigns <- generate_ccans(conns) head(CCAN_assigns) ## ---- eval=TRUE---------------------------------------------------------- # Make a subset of the gene annotation column containing just the coordinates # and the gene name gene_annotation_sub <- gene_annotation_sample[,c(1:3, 8)] # Rename the gene symbol column to "gene" names(gene_annotation_sub)[4] <- "gene" input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub) head(fData(input_cds)) # generate unnormalized gene activity matrix unnorm_ga <- build_gene_activity_matrix(input_cds, conns) # make a list of num_genes_expressed num_genes <- pData(input_cds)$num_genes_expressed names(num_genes) <- row.names(pData(input_cds)) # normalize cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes) # if you had two datasets to normalize, you would pass both: # num_genes should then include all cells from both sets unnorm_ga2 <- unnorm_ga cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga, unnorm_ga2), num_genes) ## ---- eval=TRUE---------------------------------------------------------- data("cicero_data") input_cds <- make_atac_cds(cicero_data) # Add some cell meta-data data("cell_data") pData(input_cds) <- cbind(pData(input_cds), cell_data[row.names(pData(input_cds)),]) pData(input_cds)$cell <- NULL agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000) agg_cds <- detectGenes(agg_cds) agg_cds <- estimateSizeFactors(agg_cds) agg_cds <- estimateDispersions(agg_cds) ## ---- eval=TRUE---------------------------------------------------------- # This takes a few minutes to run diff_timepoint <- differentialGeneTest(agg_cds, fullModelFormulaStr="~timepoint + num_genes_expressed") # We chose a very high q-value cutoff because there are # so few sites in the sample dataset, in general a q-value # cutoff in the range of 0.01 to 0.1 would be appropriate ordering_sites <- row.names(subset(diff_timepoint, qval < .5)) length(ordering_sites) ## ---- fig.show='hold', eval=FALSE---------------------------------------- # plot_pc_variance_explained(agg_cds, return_all = FALSE) #Choose 2 PCs # agg_cds <- reduceDimension(agg_cds, # max_components = 2, # norm_method = 'log', # num_dim = 2, # reduction_method = 'tSNE', # verbose = TRUE) # # agg_cds <- clusterCells(agg_cds, verbose = FALSE) # # plot_cell_clusters(agg_cds, color_by = 'as.factor(Cluster)') + theme(text = element_text(size=8)) # clustering_DA_sites <- differentialGeneTest(agg_cds[1:10,], #Subset for example only # fullModelFormulaStr = '~Cluster') # # # Not run because using Option 1 to continue # # ordering_sites <- # # row.names(clustering_DA_sites)[order(clustering_DA_sites$qval)][1:1000] # ## ---- eval=TRUE---------------------------------------------------------- agg_cds <- setOrderingFilter(agg_cds, ordering_sites) ## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------- agg_cds <- reduceDimension(agg_cds, max_components = 2, residualModelFormulaStr="~num_genes_expressed", reduction_method = 'DDRTree') agg_cds <- orderCells(agg_cds) plot_cell_trajectory(agg_cds, color_by = "timepoint") ## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------- plot_cell_trajectory(agg_cds, color_by = "State") ## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------- agg_cds <- orderCells(agg_cds, root_state = 4) plot_cell_trajectory(agg_cds, color_by = "Pseudotime") ## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------- pData(input_cds)$Pseudotime <- pData(agg_cds)[colnames(input_cds),]$Pseudotime pData(input_cds)$State <- pData(agg_cds)[colnames(input_cds),]$State ## ---- fig.width = 3, fig.height = 4, fig.align='center', eval=TRUE------- input_cds_lin <- input_cds[,row.names(subset(pData(input_cds), State != 5))] plot_accessibility_in_pseudotime(input_cds_lin[c("chr18_38156577_38158261", "chr18_48373358_48374180", "chr18_60457956_60459080")]) ## ---- eval=TRUE---------------------------------------------------------- pData(input_cds_lin)$cell_subtype <- cut(pData(input_cds_lin)$Pseudotime, 10) binned_input_lin <- aggregate_by_cell_bin(input_cds_lin, "cell_subtype") ## ---- eval=TRUE---------------------------------------------------------- diff_test_res <- differentialGeneTest(binned_input_lin[1:10,], #Subset for example only fullModelFormulaStr="~sm.ns(Pseudotime, df=3) + sm.ns(num_genes_expressed, df=3)", reducedModelFormulaStr="~sm.ns(num_genes_expressed, df=3)", cores=1) head(diff_test_res) ## ------------------------------------------------------------------------ citation("cicero") ## ------------------------------------------------------------------------ sessionInfo()