## ---- echo=FALSE, include=FALSE----------------------------------------------- library(knitr) #library(kableExtra) knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE) #options(width = 120) options(pillar.min_title_chars = Inf) library(magrittr) library(tibble) library(dplyr) library(magrittr) library(tidyr) library(ggplot2) library(rlang) library(purrr) library(ggrepel) library(tidyHeatmap) library(pasilla) library(tidybulk) my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) ## ----------------------------------------------------------------------------- pasCts = system.file("extdata", "pasilla_gene_counts.tsv", package = "pasilla", mustWork = TRUE) pasAnno = system.file( "extdata", "pasilla_sample_annotation.csv", package = "pasilla", mustWork = TRUE ) cts = as.matrix(read.csv(pasCts, sep = "\t", row.names = "gene_id")) coldata = read.csv(pasAnno, row.names = 1) coldata = coldata[, c("condition", "type")] # Create tidybulk object counts = cts %>% as_tibble(rownames = "feature") %>% pivot_longer(names_to = "sample", values_to = "count", cols = -feature) %>% left_join( coldata %>% as_tibble(rownames = "sample") %>% mutate(sample = gsub("fb", "", sample)) ) %>% mutate_if(is.character, as.factor) ## ----------------------------------------------------------------------------- # Create a tt object with unique raw and normalised counts tt_scaled <- tidybulk(counts, sample, feature, count) %>% aggregate_duplicates() %>% identify_abundant() %>% scale_abundance() # Plot count densities tt_scaled %>% pivot_longer( c(count, count_scaled), values_to = "count", names_to = "Normalisation" ) %>% ggplot(aes(count + 1, group=sample, color=type)) + facet_grid(~Normalisation) + geom_density() + scale_x_log10() ## ----------------------------------------------------------------------------- # Reduce data dimensionality with arbitrary number of dimensions tt_mds <- tt_scaled %>% reduce_dimensions(method="MDS", .dims = 3) # Plot all-vs-all MDS dimensions tt_mds %>% pivot_sample() %>% GGally::ggpairs(columns = 7:9, ggplot2::aes(colour=condition)) ## ----------------------------------------------------------------------------- # Adjust for visualisation tt_adj <- tt_mds %>% adjust_abundance(~ condition + type) # Visualise the association between reduced dimensions and factors tt_mds_adj_mds <- tt_adj %>% filter( count_scaled_adjusted %>% is.na %>% `!`) %>% # Calculate reduced dimensions on the adjusted counts as well reduce_dimensions( .abundance = count_scaled_adjusted, method="MDS", .dim = 3 ) ## ----------------------------------------------------------------------------- # Data manipulation and visualisation tt_mds_adj_mds %>% pivot_sample() %>% # First level reshaping pivot_longer(contains("Dim"), names_to = "Dim", values_to = ".value") %>% separate(Dim, c("Dim", "Adj"), sep="\\.") %>% mutate(Adj = ifelse(Adj == "y", "non", "adj") %>% factor(c("scaled", "adj"))) %>% # Second level reshaping pivot_longer(c(type, condition), names_to = "covar", values_to = "which") %>% # Visualise the integrative plot ggplot(aes(y = .value, x = covar, fill = `which`)) + geom_boxplot() + facet_grid(Adj ~ Dim) ## ----------------------------------------------------------------------------- tt_test <- tt_adj %>% test_differential_abundance(~ condition + type) # MA plot tt_test %>% keep_abundant() %>% pivot_transcript() %>% # Subset data mutate(significant = FDR<0.05 & abs(logFC) >=2) %>% mutate(feature = ifelse(significant, as.character(feature), NA)) %>% # Plot ggplot(aes(x = logCPM, y = logFC, label=feature)) + geom_point(aes(color = significant, size = significant, alpha=significant)) + geom_text_repel() + scale_color_manual(values=c("black", "#e11f28")) + scale_size_discrete(range = c(0, 2)) ## ----------------------------------------------------------------------------- tt_test %>% # Select top genes and reshape data inner_join( arrange((.), PValue) %>% distinct(feature) %>% head(6)) %>% # High level reshaping of the data. # All three count columns are shaped as two columns: # (i) the columns name and (ii) the value of those columns pivot_longer( c(count, count_scaled, count_scaled_adjusted), names_to = "Stage", values_to = "count" ) %>% # This allows the faceted plot ggplot(aes(x = Stage, y = count + 1, fill = condition)) + geom_boxplot() + facet_wrap(~feature) + scale_y_log10() ## ----------------------------------------------------------------------------- # Heatmap tt_test %>% as_tibble() %>% # Select differentially abundant filter(FDR < 0.05 & abs(logFC) > 2) %>% # Plot heatmap( feature, sample, count_scaled_adjusted) %>% add_tile(condition) %>% add_tile(type)