## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(BiocStyle) ## ----"install", eval = FALSE-------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # Install the development version of Bioconductor (need 3.14 and above) # # BiocManager::install(version = "devel") # # Check that you have a valid Bioconductor installation # # BiocManager::valid() # # Install the package # BiocManager::install("excluderanges", version = "devel") # # # BiocManager::install("mdozmorov/excluderanges") ## ----AnnotationHub------------------------------------------------------------ suppressMessages(library(AnnotationHub)) ah <- AnnotationHub() query_data <- query(ah, "excluderanges") query_data ## ----hg38excluderanges-------------------------------------------------------- # Check titles # as.data.frame(mcols(query_data[1:10])["title"]) excludeGR.hg38.Kundaje.1 <- query_data[["AH95917"]] excludeGR.hg38.Kundaje.1 ## ----eval=FALSE--------------------------------------------------------------- # rtracklayer::export(excludeGR.hg38.Kundaje.1, "hg38.Kundaje.GRCh38_unified_Excludable.bed", format = "bed") ## ----allhg38excluderanges----------------------------------------------------- query_data <- query(ah, c("excluderanges", "hg38", "Exclusion regions")) query_data excludeGR.hg38.Bernstein <- query_data[["AH95915"]] excludeGR.hg38.Kundaje.2 <- query_data[["AH95916"]] excludeGR.hg38.Reddy <- query_data[["AH95918"]] excludeGR.hg38.Wold <- query_data[["AH95919"]] excludeGR.hg38.Yeo <- query_data[["AH95920"]] ## ----excluderanges_hg38_count, fig.width=6.5, fig.height=2-------------------- library(ggplot2) mtx_to_plot <- data.frame(Count = c(length(excludeGR.hg38.Bernstein), length(excludeGR.hg38.Kundaje.1), length(excludeGR.hg38.Kundaje.2), length(excludeGR.hg38.Reddy), length(excludeGR.hg38.Wold), length(excludeGR.hg38.Yeo)), Source = c("Bernstein.Mint_Excludable_GRCh38", "Kundaje.GRCh38_unified_Excludable", "Kundaje.GRCh38.Excludable", "Reddy.wgEncodeDacMapabilityConsensusExcludable", "Wold.hg38mitoExcludable", "Yeo.eCLIP_Excludableregions.hg38liftover.bed")) # Order Source by the number of regions mtx_to_plot$Source <- factor(mtx_to_plot$Source, levels = mtx_to_plot$Source[order(mtx_to_plot$Count)]) ggplot(mtx_to_plot, aes(x = Source, y = Count, fill = Source)) + geom_bar(stat = "identity") + coord_flip() + theme_bw() + theme(legend.position = "none") # ggsave("man/figures/excluderanges_hg38_count.png", width = 5.5, height = 2) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # knitr::include_graphics('man/figures/excluderanges_hg38_count.png') ## ----excluderanges_hg38_width, fig.width=6.5, fig.height=2-------------------- library(ggridges) mtx_to_plot <- data.frame(Width = c(width(excludeGR.hg38.Bernstein), width(excludeGR.hg38.Kundaje.1), width(excludeGR.hg38.Kundaje.2), width(excludeGR.hg38.Reddy), width(excludeGR.hg38.Wold), width(excludeGR.hg38.Yeo)), Source = c(rep("Bernstein.Mint_Excludable_GRCh38", length(excludeGR.hg38.Bernstein)), rep("Kundaje.GRCh38_unified_Excludable", length(excludeGR.hg38.Kundaje.1)), rep("Kundaje.GRCh38.Excludable", length(excludeGR.hg38.Kundaje.2)), rep("Reddy.wgEncodeDacMapabilityConsensusExcludable", length(excludeGR.hg38.Reddy)), rep("Wold.hg38mitoExcludable", length(excludeGR.hg38.Wold)), rep("Yeo.eCLIP_Excludableregions.hg38liftover.bed", length(excludeGR.hg38.Yeo)))) ggplot(mtx_to_plot, aes(x = log2(Width), y = Source, fill = Source)) + geom_density_ridges() + theme_bw() + theme(legend.position = "none") # ggsave("man/figures/excluderanges_hg38_width.png", width = 5.5, height = 2) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # knitr::include_graphics('man/figures/excluderanges_hg38_width.png') ## ----excluderanges_hg38_sumwidth, fig.width=6.5, fig.height=2----------------- mtx_to_plot <- data.frame(TotalWidth = c(sum(width(excludeGR.hg38.Bernstein)), sum(width(excludeGR.hg38.Kundaje.1)), sum(width(excludeGR.hg38.Kundaje.2)), sum(width(excludeGR.hg38.Reddy)), sum(width(excludeGR.hg38.Wold)), sum(width(excludeGR.hg38.Yeo))), Source = c("Bernstein.Mint_Excludable_GRCh38", "Kundaje.GRCh38_unified_Excludable", "Kundaje.GRCh38.Excludable", "Reddy.wgEncodeDacMapabilityConsensusExcludable", "Wold.hg38mitoExcludable", "Yeo.eCLIP_Excludableregions.hg38liftover")) ggplot(mtx_to_plot, aes(x = TotalWidth, y = Source, fill = Source)) + geom_bar(stat="identity") + scale_x_log10() + scale_y_discrete(label=abbreviate) + xlab("log10 total width") # ggsave("man/figures/excluderanges_hg38_sumwidth.png", width = 6.5, height = 2) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # knitr::include_graphics('man/figures/excluderanges_hg38_sumwidth.png') ## ----excluderanges_hg38_jaccard, warning=FALSE, fig.width=6.5, fig.height=6---- library(pheatmap) library(stringr) # Jaccard calculations jaccard <- function(gr_a, gr_b) { intersects <- GenomicRanges::intersect(gr_a, gr_b, ignore.strand = TRUE) intersection <- sum(width(intersects)) union <- sum(width(GenomicRanges::union(gr_a, gr_b, ignore.strand = TRUE))) DataFrame(intersection, union, jaccard = intersection/union, n_intersections = length(intersects)) } # List and names of all excludable regions all_excludeGR_list <- list(excludeGR.hg38.Bernstein, excludeGR.hg38.Kundaje.1, excludeGR.hg38.Kundaje.2, excludeGR.hg38.Reddy, excludeGR.hg38.Wold, excludeGR.hg38.Yeo) all_excludeGR_name <- c("Bernstein.Mint_Excludable_GRCh38", "Kundaje.GRCh38_unified_Excludable", "Kundaje.GRCh38.Excludable", "Reddy.wgEncodeDacMapabilityConsensusExcludable", "Wold.hg38mitoExcludable", "Yeo.eCLIP_Excludableregions.hg38liftover") # Correlation matrix, empty mtx_to_plot <- matrix(data = 0, nrow = length(all_excludeGR_list), ncol = length(all_excludeGR_list)) # Fill it in for (i in 1:length(all_excludeGR_list)) { for (j in 1:length(all_excludeGR_list)) { # If diagonal, set to zero if (i == j) mtx_to_plot[i, j] <- 0 # Process only one half, the other is symmetric if (i > j) { mtx_to_plot[i, j] <- mtx_to_plot[j, i] <- jaccard(all_excludeGR_list[[i]], all_excludeGR_list[[j]])[["jaccard"]] } } } # Trim row/colnames rownames(mtx_to_plot) <- colnames(mtx_to_plot) <- str_trunc(all_excludeGR_name, width = 25) # Save the plot # png("man/figures/excluderanges_hg38_jaccard.png", width = 1000, height = 900, res = 200) pheatmap(data.matrix(mtx_to_plot)) # dev.off() ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # knitr::include_graphics('man/figures/excluderanges_hg38_jaccard.png') ## ----excluderanges_hg38_Reddy_metadata, fig.width=6.5, fig.height=3----------- mcols(excludeGR.hg38.Reddy) mtx_to_plot <- as.data.frame(table(mcols(excludeGR.hg38.Reddy)[["name"]])) colnames(mtx_to_plot) <- c("Type", "Number") mtx_to_plot <- mtx_to_plot[order(mtx_to_plot$Number), ] mtx_to_plot$Type <- factor(mtx_to_plot$Type, levels = mtx_to_plot$Type) ggplot(mtx_to_plot, aes(x = Number, y = Type, fill = Type)) + geom_bar(stat="identity") + theme_bw() + theme(legend.position = "none") # ggsave("man/figures/excluderanges_hg38_Reddy_metadata.png", width = 5, height = 2.5) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # knitr::include_graphics('man/figures/excluderanges_hg38_Reddy_metadata.png') ## ----combinedexcluderanges---------------------------------------------------- excludeGR.hg38.all <- reduce(c(excludeGR.hg38.Bernstein, excludeGR.hg38.Kundaje.1, excludeGR.hg38.Kundaje.2, excludeGR.hg38.Reddy, excludeGR.hg38.Wold, excludeGR.hg38.Yeo)) # Keep only standard chromosomes excludeGR.hg38.all <- keepStandardChromosomes(excludeGR.hg38.all, pruning.mode = "coarse") print(length(excludeGR.hg38.all)) summary(width(excludeGR.hg38.all)) ## ----eval=FALSE--------------------------------------------------------------- # # Search for the gap track # # ahData <- query(ah, c("gap", "Homo sapiens", "hg19")) # # ahData[ahData$title == "Gap"] # gaps <- ahData[["AH6444"]] ## ----echo=FALSE, out.height="70%", out.width="70%"---------------------------- knitr::include_graphics('../man/figures/excluderanges_hg19_gaps_number.png') ## ----gapshg19----------------------------------------------------------------- query_data <- query(ah, c("excluderanges", "UCSC", "Homo Sapiens", "hg19")) query_data gapsGR_hg19_centromere <- query_data[["AH95927"]] gapsGR_hg19_centromere ## ----gapshg38----------------------------------------------------------------- suppressPackageStartupMessages(library(rCGH)) suppressPackageStartupMessages(library(GenomicRanges)) # hg38 # data.frame # Adjust chromosome names hg38$chrom[hg38$chrom == 23] <- "X" hg38$chrom[hg38$chrom == 24] <- "Y" hg38$chrom <- paste0("chr", hg38$chrom) # Make GRanges object hg38.UCSC.centromere <- makeGRangesFromDataFrame(hg38, seqnames.field = "chrom", start.field = "centromerStart", end.field = "centromerEnd") # Assign seqinfo data seqlengths(hg38.UCSC.centromere) <- hg38$length genome(hg38.UCSC.centromere) <- "hg38" # Resulting object hg38.UCSC.centromere ## ----echo=FALSE--------------------------------------------------------------- mtx <- read.csv("../inst/extdata/table_excluderanges.csv") knitr::kable(mtx) ## ----echo=FALSE--------------------------------------------------------------- mtx <- read.csv("../inst/extdata/table_gap.csv") knitr::kable(mtx) ## ----reproduce3, echo=FALSE--------------------------------------------------- ## Session info sessionInfo()