## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png", message=FALSE, error=FALSE, warning=TRUE) ## ---- warning=F--------------------------------------------------------------- library(profileplyr) example <- system.file("extdata", "example_deepTools_MAT", package = "profileplyr") proplyrObject <- import_deepToolsMat(example) ## ---- warning=FALSE, message=FALSE-------------------------------------------- signalFiles <- c(system.file("extdata","Sorted_Hindbrain_day_12_1_filtered.bam",package = "profileplyr"), system.file("extdata","Sorted_Liver_day_12_1_filtered.bam",package = "profileplyr")) # BAMs must be indexed require(Rsamtools) for (i in seq_along(signalFiles)){ indexBam(signalFiles[i]) } testRanges <- system.file("extdata", "newranges_small.bed", package = "profileplyr") chipProfile <- BamBigwig_to_chipProfile(signalFiles, testRanges, format = "bam", paired=FALSE, style="percentOfRegion", nOfWindows=20, distanceAround=40 ) chipProfile ## ----------------------------------------------------------------------------- proplyrObject <- as_profileplyr(chipProfile) params(proplyrObject)$rowGroupsInUse ## ---- echo=F------------------------------------------------------------------ #vignette compiles much faster if going off of deeptools input proplyrObject <- import_deepToolsMat(example) ## ----------------------------------------------------------------------------- proplyrObject ## ----------------------------------------------------------------------------- library(SummarizedExperiment) assays(proplyrObject) dim(proplyrObject) ## ----------------------------------------------------------------------------- rowRanges(proplyrObject)[1:3] ## ----------------------------------------------------------------------------- sampleData(proplyrObject) ## ----------------------------------------------------------------------------- params(proplyrObject)$rowGroupsInUse ## ----------------------------------------------------------------------------- params(proplyrObject)$mcolToOrderBy proplyrObject <- orderBy(proplyrObject, "score") params(proplyrObject)$mcolToOrderBy ## ---- echo=FALSE-------------------------------------------------------------- # change back to NULL for any other functions relying on this object proplyrObject <- orderBy(proplyrObject,NULL) ## ----------------------------------------------------------------------------- proplyrObject[1:10,1:10] ## ----------------------------------------------------------------------------- proplyrObject[ , , 1:2] ## ----------------------------------------------------------------------------- rownames(sampleData(proplyrObject)) ## ----------------------------------------------------------------------------- rownames(sampleData(proplyrObject)) <- c("Hindbrain", "Liver", "Kidney") ## ---- eval=T, results='hide'-------------------------------------------------- output_path <- file.path(tempdir(),"ATAC_example.MAT.gz") export_deepToolsMat(proplyrObject, con = output_path) ## ---- echo = FALSE------------------------------------------------------------ proplyrObject <- proplyrObject[1:2,1:2,1] ## ---- fig3, fig.height = 6, fig.width = 4, fig.keep='none'-------------------- # heatmap not printed, see below for examples of rendered heatmaps from this function heatmap <- generateEnrichedHeatmap(proplyrObject) class(heatmap) ## ---- echo=FALSE-------------------------------------------------------------- example <- system.file("extdata", "example_deepTools_MAT", package = "profileplyr") proplyrObject <- import_deepToolsMat(example) rownames(sampleData(proplyrObject)) <- c("Hindbrain", "Liver", "Kidney") ## ---- fig2, fig.height=8, fig.width=4, fig.keep='none'------------------------ EH_mat <- convertToEnrichedHeatmapMat(proplyrObject) EH_mat[[1]] # the matrices of this list can be inputs for EnrichedHeatmap() ## ----------------------------------------------------------------------------- object_sumMat <- summarize(proplyrObject, fun = rowMeans, output = "matrix") object_sumMat[1:3, ] ## ----------------------------------------------------------------------------- proplyrObject_long <- summarize(proplyrObject, fun = rowMeans, output = "long") proplyrObject_long[1:3, ] ## ---- fig15, fig.height=4, fig.width=4, fig.align='center'-------------------- library(ggplot2) ggplot(proplyrObject_long, aes(x = Sample, y = log(Signal))) + geom_boxplot() ## ---- results='hide'---------------------------------------------------------- proplyrObject_summ <- summarize(proplyrObject, fun = rowMeans, output = "object") ## ----------------------------------------------------------------------------- assays(proplyrObject_summ)[[1]][1:3, ] rowRanges(proplyrObject_summ)[1:3] ## ---- fig5, fig.height=6, fig.width=6, results='hide', fig.keep='none'-------- clusterRanges(proplyrObject, fun = rowMeans) # this code prints heatmap (does not return), but heatmap not shown here to save space ## ---- fig6, fig.height=6, fig.width=6----------------------------------------- set.seed(0) kmeans <- clusterRanges(proplyrObject, fun = rowMeans, kmeans_k = 4) rowRanges(kmeans)[1:3] params(kmeans)$rowGroupsInUse ## ---- fig7, fig.height=5, fig.width=5.5--------------------------------------- set.seed(0) kmeans <- clusterRanges(proplyrObject, fun = rowMeans, kmeans_k = 4, silent = FALSE) ## ---- fig8, fig.height=5, fig.width=5----------------------------------------- hclust <- clusterRanges(proplyrObject, fun = rowMeans, cutree_rows = 4, silent = FALSE) ## ---- results='hide'---------------------------------------------------------- output_path <- file.path(tempdir(),"kmeans_cluster_mat.gz") export_deepToolsMat(kmeans, con = output_path) ## ---- fig9, fig.height=6, fig.width=4----------------------------------------- generateEnrichedHeatmap(kmeans) ## ---- results='hide', fig.align="center"-------------------------------------- library(magrittr) set.seed(0) import_deepToolsMat(example) %>% clusterRanges(fun = rowMeans, kmeans_k = 4, silent = TRUE) %>% summarize(fun = rowMeans, output = "long") %>% ggplot(aes(x = Sample, y = log(Signal))) + geom_boxplot() + facet_grid(~cluster) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_x_discrete(labels= c("Hindbrain", "Liver", "Kidney")) ## ----------------------------------------------------------------------------- anno <- annotateRanges(proplyrObject, TxDb = "mm10") rowRanges(anno)[1:3] ## ---- echo=FALSE-------------------------------------------------------------- # subset object to speed up demonstration proplyrObject <- proplyrObject[1:5, , ] ## ---- fig.keep="none"--------------------------------------------------------- anno_great <- annotateRanges_great(proplyrObject, species = "mm10") rowRanges(anno_great)[1:3] ## ----------------------------------------------------------------------------- mcols(proplyrObject)$newColumn <- rnorm(n = nrow(proplyrObject)) params(proplyrObject)$rowGroupsInUse switchGroup <- groupBy(proplyrObject, group = "newColumn") params(switchGroup)$rowGroupsInUse ## ---- echo=F------------------------------------------------------------------ # reload full object with all ranges proplyrObject <- import_deepToolsMat(example) rownames(sampleData(proplyrObject)) <- c("Hindbrain", "Liver", "Kidney") ## ---- results='hide'---------------------------------------------------------- #import pre-made GRangesList data("K27ac_GRlist_hind_liver_top5000") K27ac_GRlist_names <- c("hindbrain_K27ac", "liver_K27ac") # subset the profileplyr object to just hindbrain and liver, # and group by GRanges K27ac_groupByGR <- proplyrObject[,,grepl("Hindbrain|Liver", names(assays(proplyrObject)))] %>% groupBy(group = K27ac_GRlist_hind_liver_top5000, GRanges_names = K27ac_GRlist_names, include_nonoverlapping = FALSE, inherit_groups = FALSE) ## ----------------------------------------------------------------------------- rowRanges(K27ac_groupByGR)[1:3] ## ---- results='hide'---------------------------------------------------------- output_path <- file.path(tempdir(),"K27ac_GRoverlap.gz") export_deepToolsMat(K27ac_groupByGR, con = output_path) ## ----------------------------------------------------------------------------- data("gene_list_character") names(gene_list_character) head(gene_list_character[[1]]) ## ----message=FALSE------------------------------------------------------------ data("gene_list_dataframe") names(gene_list_dataframe) head(gene_list_dataframe[[1]]) signalFiles <- c(system.file("extdata","Sorted_Hindbrain_day_12_1_filtered.bam", package = "profileplyr"), system.file("extdata","Sorted_Liver_day_12_1_filtered.bam", package = "profileplyr")) # BAMs must be indexed require(Rsamtools) for (i in seq_along(signalFiles)){ indexBam(signalFiles[i]) } testRanges <- system.file("extdata", "newranges.bed", package = "profileplyr") ## ---- fig12, fig.height=7, fig.width=5, results='hide'------------------------ BamBigwig_to_chipProfile(signalFiles, testRanges, format = "bam", paired=FALSE, style="percentOfRegion", nOfWindows=20, distanceAround=40) %>% as_profileplyr %>% annotateRanges(TxDb = "mm10") %>% groupBy(group = gene_list_dataframe) %>% #subseting by our gene list generateEnrichedHeatmap(extra_annotation_columns = c("hindbrain_liv_stat"), sample_names = c("Hindbrain", "Liver")) ## ---- fig13, fig.height=8, out.width='100%', results='hide', dpi=35----------- # NOTE: could also use BamBigwig_to_chipProfile() and as_profileplyr() # within R to start from bigwig/BAM and BED file. set.seed(0) annotated_object <- import_deepToolsMat(example) %>% annotateRanges(TxDb = "mm10") %>% # annotate with ChIPseeker groupBy(group = gene_list_dataframe, include_nonoverlapping = TRUE, inherit_groups = FALSE) %>% # group by gene overlap groupBy(group = K27ac_GRlist_hind_liver_top5000, GRanges_names = K27ac_GRlist_names, include_nonoverlapping = TRUE, inherit_groups = FALSE) %>% # group by GRanges clusterRanges(kmeans_k = 3) %>% generateEnrichedHeatmap(extra_annotation_columns=c("annotation_short", "GR_overlap_names", "hindbrain_liv_stat"), sample_names = c("Hindbrain", "Liver", "Kidney"), extra_anno_width = c(5,5,8), matrices_color = c("white", "red"), extra_anno_color = list(NULL, c("black", "green", "red", "white"), c("#e7e1ef", "#c994c7", "#dd1c77")), genes_to_label = c("Myh9", "Sp4", "Rcan2"), gene_label_font_size = 10) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()