## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE, warning = FALSE, message = FALSE, fig.align = "center", out.width = "80%" ) ## ----logo, echo=FALSE, eval=TRUE, out.width='10%'----------------------------- knitr::include_graphics("../man/figures/UMI4Cats.png", dpi = 800) ## ----load--------------------------------------------------------------------- library(UMI4Cats) ## ----umi4cats-scheme, echo=FALSE, eval=TRUE, fig.cap="Overview of the different functions included in the UMI4Cats package to analyze UMI-4C data."---- knitr::include_graphics("figures/scheme.png", dpi = 400) ## ----processing-quick-start, eval=FALSE--------------------------------------- # ## 0) Download example data ------------------------------- # path <- downloadUMI4CexampleData() # # ## 1) Generate Digested genome ---------------------------- # # The selected RE in this case is DpnII (|GATC), so the cut_pos is 0, and the res_enz "GATC". # hg19_dpnii <- digestGenome( # cut_pos = 0, # res_enz = "GATC", # name_RE = "DpnII", # ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19, # out_path = file.path(tempdir(), "digested_genome/") # ) # # ## 2) Process UMI-4C fastq files -------------------------- # raw_dir <- file.path(path, "CIITA", "fastq") # # contactsUMI4C( # fastq_dir = raw_dir, # wk_dir = file.path(path, "CIITA"), # bait_seq = "GGACAAGCTCCCTGCAACTCA", # bait_pad = "GGACTTGCA", # res_enz = "GATC", # cut_pos = 0, # digested_genome = hg19_dpnii, # bowtie_index = file.path(path, "ref_genome", "ucsc.hg19.chr16"), # ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19, # threads = 5 # ) # # ## 3) Get filtering and alignment stats ------------------- # statsUMI4C(wk_dir = file.path(path, "CIITA")) # # ## 4) Analyze the results --------------------------------- # # Load sample processed file paths # files <- list.files(file.path(path, "CIITA", "count"), # pattern = "*_counts.tsv.gz", # full.names = TRUE # ) # # # Create colData including all relevant information # colData <- data.frame( # sampleID = gsub("_counts.tsv.gz", "", basename(files)), # file = files, # stringsAsFactors = FALSE # ) # # library(tidyr) # colData <- colData %>% # separate(sampleID, # into = c("condition", "replicate", "viewpoint"), # remove = FALSE # ) # # # Make UMI-4C object including grouping by condition # umi <- makeUMI4C( # colData = colData, # viewpoint_name = "CIITA", # grouping = "condition", # bait_expansion = 2e6 # ) # # # Plot replicates # plotUMI4C(umi, grouping=NULL) # # ## 5) Get significant interactions # # Generate windows # win_frags <- makeWindowFragments(umi, n_frags=8) # # # Call interactions # gr <- callInteractions(umi4c = umi, # design = ~condition, # query_regions = win_frags, # padj_threshold = 0.01, # zscore_threshold=2) # # # Plot interactions # all <- plotInteractionsUMI4C(umi, gr, grouping = NULL, significant=FALSE, xlim=c(10.75e6, 11.1e6)) # Plot all regions # sign <- plotInteractionsUMI4C(umi, gr, grouping = NULL, significant=TRUE, xlim=c(10.75e6, 11.1e6)) # Plot only significantly interacting regions # cowplot::plot_grid(all, sign, ncol=2, labels=c("All", "Significant")) # # # Obtain unique significant interactions # inter <- getSignInteractions(gr) # # ## 6) Differential testing ---------------------- # # Fisher test # umi_fisher <- fisherUMI4C(umi, query_regions = iter, # filter_low = 20, # grouping="condition") # plotUMI4C(umi_fisher, ylim = c(0, 10), grouping="condition") # # # DESeq2 Wald Test # umi_wald <- waldUMI4C(umi4c=umi, # query_regions = inter, # design=~condition) # # plotUMI4C(umi_wald, ylim = c(0, 10), grouping="condition") ## ----demultiplex, eval=TRUE--------------------------------------------------- ## Input files path <- downloadUMI4CexampleData(reduced=TRUE) fastq <- file.path(path, "CIITA", "fastq", "ctrl_hi24_CIITA_R1.fastq.gz") ## Barcode info barcodes <- data.frame( sample = c("CIITA"), barcode = c("GGACAAGCTCCCTGCAACTCA") ) ## Output path out_path <- tempdir() ## Demultiplex baits inside FastQ file demultiplexFastq( fastq = fastq, barcodes = barcodes, out_path = out_path ) ## ----digest------------------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) refgen <- BSgenome.Hsapiens.UCSC.hg19 hg19_dpnii <- digestGenome( res_enz = "GATC", cut_pos = 0, name_RE = "dpnII", ref_gen = refgen, sel_chr = "chr16", # Select bait's chr (chr16) to make example faster out_path = file.path(tempdir(), "digested_genome/") ) hg19_dpnii ## ----read-scheme, echo=FALSE, eval=TRUE, fig.cap="Schematic of a UMI-4C read detailing the different elements that need to be used as input for processing the data."---- knitr::include_graphics("figures/read_scheme.png", dpi = 500) ## ----processing, message=TRUE------------------------------------------------- ## Use reduced example to make vignette faster ## If you want to download the full dataset, set reduced = FALSE or remove ## the reduce argument. ## The reduced example is already downloaded in the demultiplexFastq chunk. # path <- downloadUMI4CexampleData(reduced=TRUE) raw_dir <- file.path(path, "CIITA", "fastq") index_path <- file.path(path, "ref_genome", "ucsc.hg19.chr16") ## Run main function to process UMI-4C contacts contactsUMI4C( fastq_dir = raw_dir, wk_dir = file.path(path, "CIITA"), file_pattern = "ctrl_hi24_CIITA", # Select only one sample to reduce running time bait_seq = "GGACAAGCTCCCTGCAACTCA", bait_pad = "GGACTTGCA", res_enz = "GATC", cut_pos = 0, digested_genome = hg19_dpnii, bowtie_index = index_path, ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19, sel_seqname = "chr16", # Input bait chr to reduce running time threads = 2, numb_reads = 1e6 # Reduce memory usage ) ## ----stats-------------------------------------------------------------------- # Using the full dataset included in the package statsUMI4C(wk_dir = system.file("extdata", "CIITA", package = "UMI4Cats" )) # Read stats table stats <- read.delim(system.file("extdata", "CIITA", "logs", "stats_summary.txt", package = "UMI4Cats" )) knitr::kable(stats) ## ----make-umi4c--------------------------------------------------------------- # Load sample processed file paths files <- list.files(system.file("extdata", "CIITA", "count", package="UMI4Cats"), pattern = "*_counts.tsv.gz", full.names = TRUE ) # Create colData including all relevant information colData <- data.frame( sampleID = gsub("_counts.tsv.gz", "", basename(files)), file = files, stringsAsFactors = FALSE ) library(tidyr) colData <- colData %>% separate(sampleID, into = c("condition", "replicate", "viewpoint"), remove = FALSE ) # Load UMI-4C data and generate UMI4C object umi <- makeUMI4C( colData = colData, viewpoint_name = "CIITA", grouping = "condition", ref_umi4c = c("condition"="ctrl"), bait_expansion = 2e6 ) umi groupsUMI4C(umi) ## ----methods-umi4c------------------------------------------------------------ groupsUMI4C(umi) # Available grouped UMI-4C objects head(assay(umi)) # Retrieve raw UMIs head(assay(groupsUMI4C(umi)$condition)) # Retrieve UMIs grouped by condition colData(umi) # Retrieve column information rowRanges(umi) # Retrieve fragment coordinates dgram(umi) # Retrieve domainograms dgram(groupsUMI4C(umi)$condition) # Retrieve domainograms bait(umi) # Retrieve bait coordinates head(trend(umi)) # Retrieve adaptive smoothing trend ## ----interactions-win, fig.width=12, fig.height=7, results="hold"------------- # Generate windows win_frags <- makeWindowFragments(umi, n_frags=8) # Call interactions gr <- callInteractions(umi4c = umi, design = ~condition, query_regions = win_frags, padj_threshold = 0.01, zscore_threshold=2) # Plot interactions all <- plotInteractionsUMI4C(umi, gr, grouping = NULL, significant=FALSE, xlim=c(10.75e6, 11.1e6)) # Plot all regions sign <- plotInteractionsUMI4C(umi, gr, grouping = NULL, significant=TRUE, xlim=c(10.75e6, 11.1e6)) # Plot only significantly interacting regions cowplot::plot_grid(all, sign, ncol=2, labels=c("All", "Significant")) # Obtain unique significant interactions inter <- getSignInteractions(gr) ## ----dif-deseq, message=TRUE, eval=TRUE--------------------------------------- umi_wald <- waldUMI4C(umi, query_regions = inter, design = ~condition) ## ----dif-query---------------------------------------------------------------- # Perform differential test umi_fisher <- fisherUMI4C(umi, grouping = "condition", query_regions = inter, filter_low = 20) ## ----show-results-umi4c------------------------------------------------------- resultsUMI4C(umi_fisher, ordered = TRUE, counts = TRUE, format = "data.frame") ## ----plot-umi4c--------------------------------------------------------------- plotUMI4C(umi, grouping = NULL, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene, dgram_plot = FALSE ) ## ----plot-dif----------------------------------------------------------------- plotUMI4C(umi_fisher, grouping = "condition", xlim=c(10.75e6, 11.25e6), ylim=c(0,10))