## ---- eval = FALSE------------------------------------------------------------ # ## try http:// if https:// URLs are not supported # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("scFeatureFilter") ## ---- message=FALSE, warning=FALSE, collapse=TRUE----------------------------- library(scFeatureFilter) library(ggplot2) library(cowplot) # multipanel figures + nice theme ## ---- collapse=TRUE----------------------------------------------------------- # example dataset included with the package: scData_hESC # filtering of the dataset with a single function call: sc_feature_filter(scData_hESC) ## ---- collapse=TRUE----------------------------------------------------------- scData_hESC ## ---- collapse=TRUE----------------------------------------------------------- calculate_cvs(scData_hESC) ## ---- collapse=TRUE----------------------------------------------------------- library(magrittr) # to use the pipe %>% calculate_cvs(scData_hESC) %>% plot_mean_variance(colourByBin = FALSE) ## ---- collapse=TRUE----------------------------------------------------------- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 1000) ## ----collapse=TRUE------------------------------------------------------------ myPlot <- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 1000) %>% plot_mean_variance(colourByBin = TRUE, density_color = "blue") myPlot ## ---- collapse=TRUE----------------------------------------------------------- myPlot + annotation_logticks(sides = "l") ## ---- collapse=TRUE----------------------------------------------------------- corDistrib <- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 1000) %>% correlate_windows(n_random = 3) ## ---- collapse=TRUE----------------------------------------------------------- corDens <- correlations_to_densities(corDistrib, absolute_cc = TRUE) plot_correlations_distributions(corDens, facet_ncol = 5) + scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1")) ## ---- collapse=TRUE----------------------------------------------------------- metrics <- get_mean_median(corDistrib) metrics plot_correlations_distributions(corDens, metrics = metrics, facet_ncol = 5) + scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1")) ## ---- collapse=TRUE----------------------------------------------------------- plot_metric(metrics, show_ctrl = FALSE, show_threshold = FALSE) ## ---- collapse=TRUE----------------------------------------------------------- plot_metric(metrics, show_ctrl = TRUE, show_threshold = FALSE) ## ---- collapse=TRUE----------------------------------------------------------- plot_metric(metrics, show_ctrl = TRUE, show_threshold = TRUE, threshold = 2) ## ---- collapse=TRUE----------------------------------------------------------- determine_bin_cutoff(metrics, threshold = 2) ## ---- collapse=TRUE----------------------------------------------------------- binned_data <- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 1000) metrics <- correlate_windows(binned_data, n_random = 3) %>% get_mean_median filtered_data <- filter_expression_table( binned_data, bin_cutoff = determine_bin_cutoff(metrics) ) dim(scData_hESC) dim(filtered_data) filtered_data ## ---- message=FALSE, warning=FALSE, collapse=TRUE----------------------------- library(SingleCellExperiment) library(scRNAseq) # example datasets sce_allen <- ReprocessedAllenData() # sce_allen is an SingleCellExperiment object sce_allen filtered_allen <- sc_feature_filter(sce_allen, sce_assay = "rsem_tpm") is.matrix(filtered_allen) # filtered_allen is a tibble sce_filtered_allen <- sce_allen[rownames(filtered_allen), ] sce_filtered_allen ## ---- collapse=TRUE----------------------------------------------------------- plot_top_window_autocor(calculate_cvs(scData_hESC)) ## ---- collapse=TRUE----------------------------------------------------------- metrics_bigBins <- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 1000) %>% correlate_windows(n_random = 3) %>% get_mean_median metrics_smallBins <- scData_hESC %>% calculate_cvs %>% define_top_genes(window_size = 100) %>% bin_scdata(window_size = 500) %>% correlate_windows(n_random = 3) %>% get_mean_median plot_grid( plot_metric(metrics_bigBins) + labs(title = "1000 genes per bin"), plot_metric(metrics_smallBins) + labs(title = "500 genes per bin") )