script <- system.file("examples/atac_dnase_chip_example.R",package = "EpiCompare")
source(script)
htmltools::includeHTML("https://github.com/neurogenomics/EpiCompare/releases/download/1.3.3/atac_dnase_chip_example.html")
knitr::opts_chunk$set(echo = TRUE,
message = params$debug,
warning = params$debug,
cache = FALSE,
error = params$error,
results = if(isTRUE(params$interact)) 'asis' else 'markup')
### Create Output Directory ###
# if save_output = TRUE
if(params$save_output){
outfile_dir <- file.path(params$output_dir,"EpiCompare_file")
if(!dir.exists(outfile_dir)){
dir.create(outfile_dir, showWarnings = FALSE, recursive = TRUE)
}
}
#### ------ Prepare genome builds ------ ####
# e.g. genome_build <- list(reference="hg19",peakfiles="hg38",blacklist="hg19")
# or... genome_build <- "hg19"
builds <- prepare_genome_builds(genome_build = params$genome_build,
blacklist = params$blacklist)
## Standardise all data to hg19 build
output_build <- prepare_output_build(params$genome_build_output)
#### ------ Prepare peaklist(s) ------ ####
# and check that the list is named, if not default filenames are used
peaklist <- prepare_peaklist(peaklist = params$peakfiles)
peaklist <- liftover_grlist(grlist = peaklist,
input_build = builds$peaklist,
output_build = output_build)
#### ------ Prepare reference(s) ------ ####
reference <- prepare_reference(reference = params$reference)
reference <- liftover_grlist(grlist = reference,
input_build = builds$reference,
output_build = output_build)
#### ------ Prepare blacklist ------ ####
blacklist <- prepare_blacklist(blacklist = params$blacklist,
output_build = output_build,
blacklist_build = builds$blacklist)
### Standardise peaklist(s) ###
peaklist <- tidy_peakfile(peaklist = peaklist,
blacklist = blacklist)
peaklist_tidy <- peaklist
### Standardise reference(s) ###
# and include in peaklist
reference_tidy <- reference
if (!is.null(reference)){
reference_tidy <- tidy_peakfile(peaklist = reference,
blacklist = blacklist)
peaklist_tidy <- c(peaklist_tidy, reference_tidy)
}
### Obtain Genome Annotation ###
txdb <- check_genome_build(genome_build = output_build)
### Dynamic Figure Height ###
fig_height <- fig_length(default_size = 7,
number_of_items = length(peaklist_tidy),
max_items = 10)
needs_ref <- function(arg,
reference=NULL){
if(isTRUE(arg)){
if(is.null(reference)){
cat("NOTE: This plot is not generated when reference=NULL.")
return(FALSE)
} else{
return(TRUE)
}
} else {
return(FALSE)
}
}
EpiCompare
compares epigenomic datasets for quality control and benchmarking
purposes.
The report consists of three sections:
General Metrics: Metrics on peaks for each sample: % blacklisted and non-standard peaks, peak widths, fragments, duplication rates.
Peak Overlap: Frequency, percentage, statistical significance of overlapping and non-overlapping peaks. This also includes upset, precision-recall, and correlation plots.
Functional
Annotation: Functional annotation (ChromHMM
analysis, peak annotation, and enrichment analysis) of peaks. This also
includes peak distributions around transcription start sites
(TSS).
# print peak file names and numerate
cat(paste0(" - File ",seq_len(length(names(peaklist_tidy))),": ",
names(peaklist_tidy),
collapse = "\n\n"))
File 1: ATAC_ENCFF558BLC
File 2: Dnase_ENCFF274YGF
File 3: ChIP_ENCFF038DDS
File 4: Dnase_ENCFF185XRG_reference
Processed, filtered and lifted files for: peaklist
,
reference
, blacklist
download_button(object = list(peaklist = peaklist,
reference = reference_tidy,
blacklist = blacklist),
save_output = params$save_output,
filename = paste0("processed_peakfiles_",output_build),
outfile_dir = outfile_dir)
The EpiCompare
function call used to generate the
report:
cmd <- report_command(params = params,
peaklist_tidy = peaklist_tidy,
reference_tidy = reference_tidy)
cat(cmd)
EpiCompare(peakfiles = list('ATAC_ENCFF558BLC','Dnase_ENCFF274YGF','ChIP_ENCFF038DDS','Dnase_ENCFF185XRG_reference'),
genome_build = list('hg38'),
genome_build_output = 'hg38',
blacklist = NULL,
picard_files = list(),
reference = 'Dnase_ENCFF185XRG_reference',
upset_plot = TRUE,
stat_plot = TRUE,
chromHMM_plot = TRUE,
chromHMM_annotation = 'K562',
chipseeker_plot = TRUE,
enrichment_plot = TRUE,
tss_plot = TRUE,
tss_distance = c(-3000,3000),
precision_recall_plot = TRUE,
n_threshold = 20,
corr_plot = TRUE,
interact = TRUE,
save_output = TRUE,
output_dir = '/var/folders/zq/h7mtybc533b1qzkys_ttgpth0000gn/T//RtmphV63Tj',
workers = 1,
error = FALSE)
BRGenomics::tidyChromosomes()
.:information: Note: All EpiCompare
analyses conducted on the peak files after they have been
filtered (i.e. blacklisted regions and non-standard chromosomes
removed) and lifted (as needed).
peak_info_df <- peak_info(peaklist = peaklist,
blacklist = blacklist)
download_button(object = peak_info_df,
filename = "peak_info_df",
output_extension = ".csv")
# Print table
knitr::kable(peak_info_df, format = "markdown")
PeakN Before Tidy | Blacklisted Peaks Removed (%) | Non-standard Peaks Removed (%) | PeakN After Tidy | |
---|---|---|---|---|
ATAC_ENCFF558BLC | 12459 | 0 | 0 | 12459 |
Dnase_ENCFF274YGF | 12452 | 0 | 0 | 12452 |
ChIP_ENCFF038DDS | 5221 | 0 | 0 | 5221 |
save_output(save_output = params$save_output,
file = peak_info_df,
file_type = "data.frame",
filename = "peak_info",
outpath = outfile_dir)
remove(peak_info_df)
Metrics on fragments is shown only if Picard summary is provided. See manual for help.
if (!is.null(params$picard_files)){
fragment_info_df <- fragment_info(picard_list = params$picard_files)
download_button(object = fragment_info_df,
filename = "fragment_info",
output_extension = ".csv")
# Print data frame
knitr::kable(fragment_info_df, format = "markdown")
save_output(save_output = params$save_output,
file = fragment_info_df,
file_type = "data.frame",
filename = "fragment_info",
outpath = outfile_dir)
remove(fragment_info_df)
}
Distribution of peak widths in samples
width_plot <- width_boxplot(peaklist = peaklist_tidy,
interact = params$interact)
download_button(object = width_plot,
filename = "width_boxplot",
self_contained = params$interact,
add_download_button = params$add_download_button)
NULL
width_plot$plot
# Save boxplot
save_output(save_output = params$save_output,
file = width_plot$plot,
file_type = "ggplot",
filename = "width_plot",
outpath = outfile_dir,
interactive = params$interact)
# Remove variable
remove(width_plot)
Percentage of overlapping peaks between samples. Hover over heatmap for percentage values.
The heatmap can be interpreted as follows:
overlap_heatmap <- overlap_heatmap(peaklist = peaklist_tidy,
interact = params$interact)
download_button(object = overlap_heatmap,
filename = "overlap_heatmap",
self_contained = params$interact,add_download_button = params$add_download_button)
NULL
overlap_heatmap$plot
# Save output
save_output(save_output = params$save_output,
file = overlap_heatmap$plot,
file_type = "ggplot",
filename = "samples_percent_overlap",
outpath = outfile_dir,
interactive = params$interact)
# Delete variable
remove(overlap_heatmap)
Upset plot of overlapping peaks between samples. See here on how to interpret the plot.
upset_plot <- NULL
if(isTRUE(params$upset_plot)){
upset_plot <- overlap_upset_plot(peaklist = peaklist_tidy)
download_button(object = upset_plot,
filename = "upset_plot")
save_output(save_output = params$save_output,
file = upset_plot,
file_type = "image",
filename = "upset_plot",
outpath = outfile_dir)
}
Error : C stack usage 14613382 is too close to the limit quartz_off_screen 2
upset_plot
$plot $data peak ATAC_ENCFF558BLC ChIP_ENCFF038DDS Dnase_ENCFF185XRG_reference Dnase_ENCFF274YGF 1 1 1 0 1 1 2 2 1 1 1 1 3 3 1 0 0 0 4 4 1 0 1 0 5 5 1 0 1 0 6 6 1 0 0 0 7 7 1 0 0 0 8 8 1 1 1 1 9 9 1 0 1 0 10 10 1 1 1 1 11 11 1 0 1 1 12 12 1 0 0 1 13 13 1 1 1 1 14 14 1 1 1 1 15 15 1 1 1 1 16 16 1 0 0 0 17 17 1 0 0 0 18 18 1 1 1 0 19 19 1 1 1 1 20 20 1 1 1 1 21 21 1 0 0 0 22 22 1 0 0 0 23 23 1 0 0 0 24 24 1 1 0 0 25 25 1 1 1 1 26 26 1 1 1 1 27 27 1 0 1 0 28 28 1 0 0 0 29 29 1 0 1 0 30 30 1 0 1 1 31 31 1 0 0 0 32 32 1 0 1 1 33 33 1 0 0 0 34 34 1 1 1 1 35 35 1 0 1 1 36 36 1 0 1 1 37 37 1 0 0 0 38 38 1 1 1 1 39 39 1 0 0 0 40 40 1 0 0 0 41 41 1 0 1 1 42 42 1 0 0 0 43 43 1 1 1 1 44 44 1 1 1 1 45 45 1 0 0 0 46 46 1 0 0 0 47 47 1 0 1 1 48 48 1 0 1 1 49 49 1 0 0 0 50 50 1 0 1 1 51 51 1 1 1 1 52 52 1 1 1 1 53 53 1 1 1 0 54 54 1 0 1 0 55 55 1 0 1 1 56 56 1 1 0 0 57 57 1 0 1 1 58 58 1 1 1 1 59 59 1 0 1 1 60 60 1 0 0 0 61 61 1 0 1 0 62 62 1 0 1 0 63 63 1 1 1 1 64 64 1 0 0 0 65 65 1 0 1 1 66 66 1 1 1 1 67 67 1 0 0 0 68 68 1 0 1 0 69 69 1 0 1 1 70 70 1 0 0 0 71 71 1 1 1 1 72 72 1 0 0 0 73 73 1 0 1 1 74 74 1 1 1 1 75 75 1 0 0 0 76 76 1 0 0 0 77 77 1 0 0 0 78 78 1 0 1 0 79 79 1 0 1 1 80 80 1 0 1 0 81 81 1 0 0 0 82 82 1 0 0 0 83 83 1 0 1 1 84 84 1 0 0 0 85 85 1 0 1 0 86 86 1 1 1 1 87 87 1 0 1 1 88 88 1 0 1 1 89 89 1 0 0 0 90 90 1 1 1 1 91 91 1 0 1 1 92 92 1 0 1 1 93 93 1 0 0 0 94 94 1 0 1 1 95 95 1 1 1 1 96 96 1 0 0 0 97 97 1 0 0 0 98 98 1 1 1 1 99 99 1 1 1 1 100 100 1 1 1 1 101 101 1 1 1 1 102 102 1 0 1 1 103 103 1 0 1 1 104 104 1 0 0 0 105 105 1 0 0 0 106 106 1 0 1 1 107 107 1 0 1 1 108 108 1 0 1 1 109 109 1 0 1 0 110 110 1 1 1 1 111 111 1 0 0 0 112 112 1 0 1 1 113 113 1 0 1 1 114 114 1 1 1 1 115 115 1 1 1 1 116 116 1 0 1 1 117 117 1 1 1 1 118 118 1 0 1 0 119 119 1 0 1 1 120 120 1 1 1 1 121 121 1 0 0 0 122 122 1 1 1 1 123 123 1 1 1 1 124 124 1 1 1 1 125 125 1 0 0 0 126 126 1 0 0 0 127 127 1 0 1 1 128 128 1 0 1 1 129 129 1 0 0 0 130 130 1 1 1 1 131 131 1 0 0 0 132 132 1 0 0 1 133 133 1 1 1 1 134 134 1 0 1 0 135 135 1 0 0 0 136 136 1 0 1 1 137 137 1 1 1 1 138 138 1 0 0 0 139 139 1 1 1 1 140 140 1 0 1 1 141 141 1 0 1 1 142 142 1 0 1 1 143 143 1 0 1 1 144 144 1 0 0 0 145 145 1 0 0 0 146 146 1 0 0 0 147 147 1 0 0 0 148 148 1 0 1 1 149 149 1 0 1 1 150 150 1 0 1 1 151 151 1 0 1 1 152 152 1 0 1 1 153 153 1 0 1 0 154 154 1 0 1 0 155 155 1 0 1 0 156 156 1 1 1 1 157 157 1 0 0 0 158 158 1 0 1 1 159 159 1 0 1 1 160 160 1 0 1 0 161 161 1 0 1 1 162 162 1 0 1 1 163 163 1 0 0 0 164 164 1 0 0 0 165 165 1 0 1 1 166 166 1 0 1 1 167 167 1 0 1 0 168 168 1 1 1 1 169 169 1 0 1 1 170 170 1 0 1 1 171 171 1 0 0 0 172 172 1 0 1 0 173 173 1 0 0 0 174 174 1 0 1 1 175 175 1 0 0 0 176 176 1 0 1 0 177 177 1 0 1 1 178 178 1 0 1 1 179 179 1 0 1 1 180 180 1 0 0 0 181 181 1 0 0 0 182 182 1 0 1 1 183 183 1 1 1 1 184 184 1 0 1 1 185 185 1 0 1 0 186 186 1 0 1 0 187 187 1 0 0 0 188 188 1 0 0 0 189 189 1 0 0 0 190 190 1 0 0 0 191 191 1 0 0 0 192 192 1 0 0 0 193 193 1 0 0 0 194 194 1 0 0 0 195 195 1 0 0 0 196 196 1 0 1 1 197 197 1 0 0 0 198 198 1 0 1 1 199 199 1 0 1 1 200 200 1 0 0 0 [ reached ‘max’ / getOption(“max.print”) – omitted 46411 rows ]
remove(upset_plot)
Depending on the format of the reference file,
EpiCompare
produces different plots:
MACS2
):
EpiCompare
generates paired boxplot per sample showing the
distribution of -log10(q-value) of reference peaks that are
overlapping and non-overlapping with the sample dataset.EpiCompare
generates a barplot of percentage of overlapping
sample peaks with the reference. Statistical significance (adjusted
p-value) is written above each bar.Reference peakfile: Dnase_ENCFF185XRG_reference
if (needs_ref(params$stat_plot, reference)){
stat_plot <- overlap_stat_plot(reference = reference_tidy,
peaklist = peaklist,
txdb = txdb,
interact = params$interact)
download_button(object = stat_plot,
filename = "overlap_stat_plot",
button_label = "Download overlap stat plot",
self_contained = params$interact,
add_download_button = params$add_download_button)
stat_plot$plot
# Save output
save_output(save_output = params$save_output,
file = stat_plot$plot,
file_type = "ggplot",
filename = "stat_plot",
outpath = outfile_dir,
interactive = params$interact)
# Remove variables
remove(stat_plot)
}
The first plot shows the balance between precision and recall across multiple peak calling stringency thresholds.
The second plot shows F1 score (a score that combines precision and recall) across the different peak calling stringency thresholds.
2*(precision*recall) / (precision+recall)
pr_out <- NULL
if(needs_ref(params$precision_recall_plot, reference)){
#### Create save path ####
save_path <- if(isFALSE(params$save_output)){NULL}else{
file.path(outfile_dir,"precision_recall.csv")
}
pr_out <- plot_precision_recall(peakfiles = peaklist,
reference = reference_tidy,
n_threshold = params$n_threshold,
workers = params$workers,
show_plot = FALSE,
verbose = FALSE,
save_path = save_path,
interact = params$interact)
download_button(object = pr_out,
filename = "precision_recall",
self_contained = params$interact,
add_download_button = params$add_download_button)
}
NULL
pr_out$precision_recall_plot
cat("\n\n")
pr_out$f1_plot
remove(pr_out)
The correlation plot shows the correlation between the quantiles when the genome is binned at a set size. These quantiles are based on the intensity of the peak, dependent on the peak caller used (q-value for MACS2):
cp_out <- NULL
if(isTRUE(params$corr_plot)){
#### Create save path ####
save_path <- if(isFALSE(params$save_output)){NULL}else{
file.path(outfile_dir,"corr.csv.gz")
}
cp_out <- plot_corr(peakfiles = peaklist_tidy,
# reference can be NULL
reference = reference_tidy,
genome_build = output_build,
bin_size = params$bin_size,
workers = params$workers,
show_plot = FALSE,
save_path = save_path,
interact = params$interact)
download_button(object = cp_out,
filename = "correlation_plot",
self_contained = params$interact,add_download_button = params$add_download_button)
}
NULL
cp_out$corr_plot
remove(cp_out)
ChromHMM
annotates and characterises peaks into different chromatin states.
ChromHMM
annotations used in EpiCompare
were obtained from here.
ChromHMM annotation definitions:
For more information on ChromHMM states, please see here
ChromHMM annotation of individual samples.
samples_chromHMM <- NULL
if(isTRUE(params$chromHMM_plot)){
# Get ChromHMM annotation file
chromHMM_list <- get_chromHMM_annotation(cell_line = params$chromHMM_annotation)
# Plot chromHMM
samples_chromHMM <- plot_chromHMM(peaklist = peaklist_tidy,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = samples_chromHMM,
filename = "samples_ChromHMM",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = samples_chromHMM,
file_type = "ggplot",
filename = "samples_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
samples_chromHMM
remove(samples_chromHMM)
Percentage of Sample peaks found in Reference peaks (Reference peakfile: Dnase_ENCFF185XRG_reference)
if(needs_ref(params$chromHMM_plot, reference)){
# generate data frame of percentage overlap
sample_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy,
peaklist2 = reference_tidy,
invert = FALSE)
download_button(object = sample_in_ref_df,
filename = "sample_in_ref_df",
output_extension = ".csv")
knitr::kable(sample_in_ref_df, format = "markdown")
}
query | subject | overlap | total | Percentage | type | peaklist1 | peaklist2 |
---|---|---|---|---|---|---|---|
ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference | 8604 | 12459 | 69.1 | precision | ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference | 10226 | 12452 | 82.1 | precision | Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference |
ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference | 3760 | 5221 | 72.0 | precision | ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 16479 | 16479 | 100.0 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC | 10891 | 16479 | 66.1 | recall | ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF | 10190 | 16479 | 61.8 | recall | Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS | 6038 | 16479 | 36.6 | recall | ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 16479 | 16479 | 100.0 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ChromHMM annotation of sample peaks found in reference peaks.
sample_in_ref_chromHMM <- NULL
if(needs_ref(params$chromHMM_plot, reference)){
# Obtain overlapping peaks
sample_in_ref_list <- mapply(peaklist_tidy, FUN=function(file){
IRanges::subsetByOverlaps(x = file,
ranges = reference_tidy[[1]])
})
# Run ChromHMM
sample_in_ref_chromHMM <- plot_chromHMM(peaklist = sample_in_ref_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = sample_in_ref_chromHMM,
filename = "sample_in_ref_chromHMM",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = sample_in_ref_chromHMM,
file_type = "ggplot",
filename = "sample_in_ref_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
sample_in_ref_chromHMM
remove(sample_in_ref_chromHMM)
Percentage of Reference peaks found in Sample peaks (Reference peakfile: Dnase_ENCFF185XRG_reference)
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of overlapping peaks
ref_in_sample_df <- overlap_percent(peaklist1 = reference_tidy,
peaklist2 = peaklist_tidy,
invert = FALSE)
download_button(object = ref_in_sample_df,
filename = "ref_in_sample_df",
output_extension = ".csv")
knitr::kable(ref_in_sample_df, format = "markdown")
}
query | subject | overlap | total | Percentage | type | peaklist1 | peaklist2 |
---|---|---|---|---|---|---|---|
Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC | 10891 | 16479 | 66.1 | precision | Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF | 10190 | 16479 | 61.8 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF |
Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS | 6038 | 16479 | 36.6 | precision | Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 16479 | 16479 | 100.0 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference | 8604 | 12459 | 69.1 | recall | Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC |
Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference | 10226 | 12452 | 82.1 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF |
ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference | 3760 | 5221 | 72.0 | recall | Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 16479 | 16479 | 100.0 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ChromHMM annotation of reference peaks found in sample peaks.
ref_in_sample_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
# Subset overlapping peaks
ref_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = reference_tidy[[1]],
ranges = file)
})
# Plot ChromHMM
ref_in_sample_chromHMM <- plot_chromHMM(peaklist = ref_in_sample_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = ref_in_sample_chromHMM,
filename = "ref_in_sample_chromHMM",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = ref_in_sample_chromHMM,
file_type = "ggplot",
filename = "ref_in_sample_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
ref_in_sample_chromHMM
remove(ref_in_sample_chromHMM)
Percentage of sample peaks not found in reference peaks (Reference peakfile: Dnase_ENCFF185XRG_reference)
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of non-overlapping peaks
sample_not_in_ref_df <- overlap_percent(peaklist1 = peaklist_tidy,
peaklist2 = reference_tidy,
invert = TRUE)
download_button(object = sample_not_in_ref_df,
filename = "sample_not_in_ref_df",
output_extension = ".csv")
knitr::kable(sample_not_in_ref_df, format = "markdown")
}
query | subject | overlap | total | Percentage | type | peaklist1 | peaklist2 |
---|---|---|---|---|---|---|---|
ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference | 3855 | 12459 | 30.9 | precision | ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference | 2226 | 12452 | 17.9 | precision | Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference |
ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference | 1461 | 5221 | 28.0 | precision | ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 0 | 16479 | 0.0 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC | 5588 | 16479 | 33.9 | recall | ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF | 6289 | 16479 | 38.2 | recall | Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS | 10441 | 16479 | 63.4 | recall | ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 0 | 16479 | 0.0 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ChromHMM annotation of sample peaks not found in reference peaks.
sample_not_in_ref_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
sample_not_in_ref_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = file,
ranges = reference_tidy[[1]],
invert = TRUE)
})
# Run ChromHMM
sample_not_in_ref_chromHMM<-plot_chromHMM(peaklist = sample_not_in_ref_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = sample_not_in_ref_chromHMM,
filename = "sample_not_in_ref_chromHMM",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = sample_not_in_ref_chromHMM,
file_type = "ggplot",
filename = "sample_not_in_ref_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
sample_not_in_ref_chromHMM
remove(sample_not_in_ref_chromHMM)
Percentage of reference peaks not found in sample peaks (Reference peakfile: Dnase_ENCFF185XRG_reference)
if (needs_ref(params$chromHMM_plot, reference)){
# Data frame of non-overlapping peaks
ref_not_in_sample_df <- overlap_percent(peaklist1 = reference_tidy,
peaklist2 = peaklist_tidy,
invert = TRUE)
download_button(object = ref_not_in_sample_df,
filename = "ref_not_in_sample_df",
output_extension = ".csv")
knitr::kable(ref_not_in_sample_df, format = "markdown")
}
query | subject | overlap | total | Percentage | type | peaklist1 | peaklist2 |
---|---|---|---|---|---|---|---|
Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC | 5588 | 16479 | 33.9 | precision | Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF | 6289 | 16479 | 38.2 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF |
Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS | 10441 | 16479 | 63.4 | precision | Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 0 | 16479 | 0.0 | precision | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ATAC_ENCFF558BLC | Dnase_ENCFF185XRG_reference | 3855 | 12459 | 30.9 | recall | Dnase_ENCFF185XRG_reference | ATAC_ENCFF558BLC |
Dnase_ENCFF274YGF | Dnase_ENCFF185XRG_reference | 2226 | 12452 | 17.9 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF274YGF |
ChIP_ENCFF038DDS | Dnase_ENCFF185XRG_reference | 1461 | 5221 | 28.0 | recall | Dnase_ENCFF185XRG_reference | ChIP_ENCFF038DDS |
Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference | 0 | 16479 | 0.0 | recall | Dnase_ENCFF185XRG_reference | Dnase_ENCFF185XRG_reference |
ChromHMM annotation of reference peaks not found in sample peaks.
ref_not_in_sample_chromHMM <- NULL
if (needs_ref(params$chromHMM_plot, reference)){
# Subset unique peaks
ref_not_in_sample_list <- mapply(peaklist_tidy, FUN = function(file){
IRanges::subsetByOverlaps(x = reference_tidy[[1]],
ranges = file,
invert = TRUE)
})
# Run ChromHMM
ref_not_in_sample_chromHMM<-plot_chromHMM(peaklist = ref_not_in_sample_list,
chromHMM_annotation = chromHMM_list,
genome_build = output_build,
interact = params$interact)
download_button(object = ref_not_in_sample_chromHMM,
filename = "ref_not_in_sample_chromHMM",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = ref_not_in_sample_chromHMM,
file_type = "ggplot",
filename = "ref_not_in_sample_ChromHMM",
outpath = outfile_dir,
interactive = params$interact)
}
ref_not_in_sample_chromHMM
remove(ref_not_in_sample_chromHMM)
EpiCompare
uses ChIPseeker::annotatePeak()
to annotate peaks with the nearest gene and genomic regions where the
peak is located. The peaks are annotated with genes taken from human
genome annotations (hg19 or hg38) distributed by Bioconductor.
chipseeker_plot <- NULL
if(isTRUE(params$chipseeker_plot)){
chipseeker_plot <- plot_ChIPseeker_annotation(
peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
interact = params$interact)
download_button(object = chipseeker_plot,
filename = "chipseeker_plot",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = chipseeker_plot,
file_type = "ggplot",
filename = "chipseeker_annotation",
outpath = outfile_dir,
interactive = params$interact)
}
chipseeker_plot
remove(chipseeker_plot)
EpiCompare
performs KEGG pathway and GO enrichment
analysis using clusterProfiler
.
ChIPseeker::annotatePeak()
is first used to assign peaks to
nearest genes. Biological themes amongst the genes are identified using
ontologies (KEGG and GO). The peaks are annotated with genes taken from
annotations of human genome (hg19 or hg38) provided by Bioconductor.
enrichment_plots <- NULL
if (isTRUE(params$enrichment_plot)){
enrichment_plots <- plot_enrichment(peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
interact = params$interact)
# Figure height
max_terms <- max(
length(unique(enrichment_plots$kegg_plot$data$Description)),
length(unique(enrichment_plots$go_plot$data$Description))
)
fig_height <- fig_length(default_size = 10,
number_of_items = max_terms,
max_items = 20)
}
Kyoto Encyclopedia of Genes and Genomes (KEGG) enrichment results.
if (isTRUE(params$enrichment_plot)){
download_button(object = enrichment_plots$kegg_plot,
filename = "KEGG_plot",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = enrichment_plots$kegg_plot,
file_type = "ggplot",
filename = "KEGG_analysis",
outpath = outfile_dir,
interactive = params$interact)
}
enrichment_plots$kegg_plot
Gene Ontology (GO) enrichment results.
GeneRatio definition:
if (isTRUE(params$enrichment_plot)){
download_button(object = enrichment_plots$go_plot,
filename = "GO_plot",
self_contained = params$interact,add_download_button = params$add_download_button)
save_output(save_output = params$save_output,
file = enrichment_plots$go_plot,
file_type = "ggplot",
filename = "GO_analysis",
outpath = outfile_dir,
interactive = params$interact)
}
enrichment_plots$go_plot
remove(enrichment_plots)
This plots peaks that are mapping to transcriptional start sites (TSS). TSS regions are defined as the flanking sequence of the TSS sites.
By default, this function plots the frequency of peaks upstream
(-3000bp) and downstream (default: +3000bp) of TSS.
These ranges can be adjusted with the argument
EpCompare(tss_distance=)
.
The grey area around the main frequency line represents the 95% confidence interval estimated by bootstrapping.
tssplt <- NULL
if (isTRUE(params$tss_plot)){
tssplt <- tss_plot(peaklist = peaklist_tidy,
txdb = txdb,
tss_distance = params$tss_distance,
workers = params$workers,
interact = params$interact)
download_button(object = tssplt,
filename = "tss_plots",
self_contained = params$interact,add_download_button = params$add_download_button)
}
Running bootstrapping for tag matrix… 2023-02-24 19:58:54 Running bootstrapping for tag matrix… 2023-02-24 19:59:25 Running bootstrapping for tag matrix… 2023-02-24 19:59:46 Running bootstrapping for tag matrix… 2023-02-24 20:00:20 NULL
tssplt
$ATAC_ENCFF558BLC
$Dnase_ENCFF274YGF
$ChIP_ENCFF038DDS
$Dnase_ENCFF185XRG_reference
remove(tssplt,p)
If you use EpiCompare
, please cite:
cat(utils::citation("EpiCompare")$textVersion)
EpiCompare: R package for the comparison and quality control of epigenomic peak files (2022) Sera Choi, Brian M. Schilder, Leyla Abbasova, Alan E. Murphy, Nathan G. Skene, bioRxiv, 2022.07.22.501149; doi: https://doi.org/10.1101/2022.07.22.501149
utils::sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Ventura 13.2.1
##
## Matrix products: default
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] EpiCompare_1.3.4 rtracklayer_1.58.0 GenomicRanges_1.50.2 GenomeInfoDb_1.34.9 IRanges_2.32.0
## [6] S4Vectors_0.36.1 BiocGenerics_0.44.0 PeakyFinders_0.99.3
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 R.methodsS3_1.8.2
## [3] tidyr_1.3.0 ggplot2_3.4.1
## [5] bit64_4.0.5 knitr_1.42
## [7] DelayedArray_0.24.0 R.utils_2.12.2
## [9] data.table_1.14.8 KEGGREST_1.38.0
## [11] RCurl_1.98-1.10 GEOquery_2.66.0
## [13] generics_0.1.3 GenomicFeatures_1.50.4
## [15] cowplot_1.1.1 RSQLite_2.3.0
## [17] shadowtext_0.1.2 bit_4.0.5
## [19] tzdb_0.3.0 enrichplot_1.18.3
## [21] webshot_0.5.4 xml2_1.3.3
## [23] lubridate_1.9.2 httpuv_1.6.9
## [25] SummarizedExperiment_1.28.0 assertthat_0.2.1
## [27] viridis_0.6.2 xfun_0.37
## [29] hms_1.1.2 jquerylib_0.1.4
## [31] evaluate_0.20 promises_1.2.0.1
## [33] TSP_1.2-2 fansi_1.0.4
## [35] restfulr_0.0.15 progress_1.2.2
## [37] caTools_1.18.2 dendextend_1.16.0
## [39] dbplyr_2.3.0 igraph_1.4.0
## [41] DBI_1.1.3 geneplotter_1.76.0
## [43] htmlwidgets_1.6.1 purrr_1.0.1
## [45] ellipsis_0.3.2 crosstalk_1.2.0
## [47] dplyr_1.1.0 annotate_1.76.0
## [49] gridBase_0.4-7 biomaRt_2.54.0
## [51] MatrixGenerics_1.10.0 vctrs_0.5.2
## [53] Biobase_2.58.0 cachem_1.0.6
## [55] withr_2.5.0 ggforce_0.4.1
## [57] BSgenome.Hsapiens.UCSC.hg38_1.4.5 HDO.db_0.99.1
## [59] BSgenome_1.66.3 genomation_1.30.0
## [61] vroom_1.6.1 GenomicAlignments_1.34.0
## [63] treeio_1.22.0 prettyunits_1.1.1
## [65] DOSE_3.24.2 ExperimentHub_2.6.0
## [67] ape_5.7 dir.expiry_1.6.0
## [69] lazyeval_0.2.2 crayon_1.5.2
## [71] basilisk.utils_1.10.0 labeling_0.4.2
## [73] pkgconfig_2.0.3 tweenr_2.0.2
## [75] nlme_3.1-162 pkgload_1.3.2
## [77] seriation_1.4.1 rlang_1.0.6
## [79] lifecycle_1.0.3 downloader_0.4
## [81] registry_0.5-1 filelock_1.0.2
## [83] BiocFileCache_2.6.1 seqPattern_1.30.0
## [85] AnnotationHub_3.6.0 polyclip_1.10-4
## [87] matrixStats_0.63.0 Matrix_1.5-3
## [89] aplot_0.1.9 boot_1.3-28.1
## [91] base64enc_0.1-3 png_0.1-8
## [93] viridisLite_0.4.1 rjson_0.2.21
## [95] ca_0.71.1 bitops_1.0-7
## [97] gson_0.0.9 R.oo_1.25.0
## [99] KernSmooth_2.23-20 Biostrings_2.66.0
## [101] blob_1.2.3 MACSr_1.6.0
## [103] stringr_1.5.0 qvalue_2.30.0
## [105] regioneR_1.30.0 readr_2.1.4
## [107] gridGraphics_0.5-1 echodata_0.99.16
## [109] scales_1.2.1 memoise_2.0.1
## [111] magrittr_2.0.3 plyr_1.8.8
## [113] gplots_3.1.3 zlibbioc_1.44.0
## [115] compiler_4.2.1 scatterpie_0.1.8
## [117] echoconda_0.99.9 TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
## [119] BiocIO_1.8.0 RColorBrewer_1.1-3
## [121] plotrix_3.8-2 DESeq2_1.38.3
## [123] Rsamtools_2.14.0 cli_3.6.0
## [125] XVector_0.38.0 patchwork_1.1.2
## [127] MASS_7.3-58.2 tidyselect_1.2.0
## [129] stringi_1.7.12 highr_0.10
## [131] yaml_2.3.7 GOSemSim_2.24.0
## [133] locfit_1.5-9.7 ggrepel_0.9.3
## [135] grid_4.2.1 sass_0.4.5
## [137] fastmatch_1.1-3 tools_4.2.1
## [139] timechange_0.2.0 parallel_4.2.1
## [141] rstudioapi_0.14 foreach_1.5.2
## [143] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 piggyback_0.1.4
## [145] gridExtra_2.3 BRGenomics_1.10.0
## [147] farver_2.1.1 ComplexUpset_1.3.3
## [149] ggraph_2.1.0 digest_0.6.31
## [151] BiocManager_1.30.19 shiny_1.7.4
## [153] Rcpp_1.0.10 BiocVersion_3.16.0
## [155] later_1.3.0 org.Hs.eg.db_3.16.0
## [157] httr_1.4.4 AnnotationDbi_1.60.0
## [159] colorspace_2.1-0 brio_1.1.3
## [161] XML_3.99-0.13 fs_1.6.1
## [163] reticulate_1.28 splines_4.2.1
## [165] yulab.utils_0.0.6 tidytree_0.4.2
## [167] graphlayouts_0.8.4 basilisk_1.10.2
## [169] ggplotify_0.1.0 plotly_4.10.1
## [171] xtable_1.8-4 jsonlite_1.8.4
## [173] ggtree_3.6.2 downloadthis_0.3.2
## [175] heatmaply_1.4.2 tidygraph_1.2.3
## [177] ggfun_0.0.9 testthat_3.1.6
## [179] R6_2.5.1 pillar_1.8.1
## [181] htmltools_0.5.4 mime_0.12
## [183] clusterProfiler_4.6.0 glue_1.6.2
## [185] fastmap_1.1.0 DT_0.27
## [187] BiocParallel_1.32.5 interactiveDisplayBase_1.36.0
## [189] codetools_0.2-19 ChIPseeker_1.34.1
## [191] fgsea_1.24.0 utf8_1.2.3
## [193] lattice_0.20-45 bslib_0.4.2
## [195] tibble_3.1.8 curl_5.0.0
## [197] bsplus_0.1.4 gtools_3.9.4
## [199] zip_2.2.2 GO.db_3.16.0
## [201] openxlsx_4.2.5.2 limma_3.54.1
## [203] rmarkdown_2.20.1 munsell_0.5.0
## [205] GenomeInfoDbData_1.2.9 iterators_1.0.14
## [207] impute_1.72.3 reshape2_1.4.4
## [209] gtable_0.3.1