## ---- echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"-------------- knitr::include_graphics(system.file("scATAC-seq_workflow.png", package = "scPipe")) ## ---- message=FALSE----------------------------------------------------------- library(scPipe) data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE) ## ----------------------------------------------------------------------------- output_folder <- paste0(tempdir(), "/scPipeATACVignette") ## ----------------------------------------------------------------------------- # data fastq files r1 <- file.path(data.folder, "small_chr21_R1.fastq.gz") r2 <- file.path(data.folder, "small_chr21_R3.fastq.gz") # barcodes in fastq format barcode_fastq <- file.path(data.folder, "small_chr21_R2.fastq.gz") # barcodes in .csv format barcode_1000 <- file.path(data.folder, "chr21_modified_barcode_1000.csv") ## ----------------------------------------------------------------------------- sc_atac_trim_barcode (r1 = r1, r2 = r2, bc_file = barcode_fastq, rmN = FALSE, rmlow = FALSE, output_folder = output_folder) ## ----eval=FALSE--------------------------------------------------------------- # sc_atac_trim_barcode (r1 = r1, # r2 = r2, # bc_file = barcode_1000, # id1_st = -1, # id1_len = -1, # id2_st = -1, # id2_len = -10, # output_folder = output_folder, # rmN = TRUE) ## ----------------------------------------------------------------------------- demux_r1 <- file.path(output_folder, "demux_completematch_small_chr21_R1.fastq.gz") demux_r2 <- file.path(output_folder, "demux_completematch_small_chr21_R3.fastq.gz") reference = file.path(data.folder, "small_chr21.fa") aligned_bam <- sc_aligning(ref = reference, R1 = demux_r1, R2 = demux_r2, nthreads = 12, output_folder = output_folder) ## ----------------------------------------------------------------------------- sorted_tagged_bam <- sc_atac_bam_tagging (inbam = aligned_bam, output_folder = output_folder, bam_tags = list(bc="CB", mb="OX"), nthreads = 12) ## ---- eval=FALSE-------------------------------------------------------------- # # removed <- sc_atac_remove_duplicates(sorted_tagged_bam, # output_folder = output_folder) # if (!isFALSE(removed)) # sorted_tagged_bam <- removed # ## ----------------------------------------------------------------------------- sc_atac_create_fragments(inbam = sorted_tagged_bam, output_folder = output_folder) ## ---- eval=FALSE-------------------------------------------------------------- # # CHECK IF THE .narrowPeak file is small enough to include in the package, # # otherwise, we need to make this basilisk call work?? # sc_atac_peak_calling(inbam = sorted_tagged_bam, # ref = reference, # genome_size = NULL, # output_folder = output_folder) # ## ---- eval=FALSE-------------------------------------------------------------- # features <- file.path(output_folder, "NA_peaks.narrowPeak") # # sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"), # feature_input = features, # bam_tags = list(bc="CB", mb="OX"), # feature_type = "peak", # organism = "hg38", # yieldsize = 1000000, # exclude_regions = TRUE, # output_folder = output_folder, # fix_chr = "none" # ) ## ----eval=FALSE--------------------------------------------------------------- # reference <- file.path(data.folder, "small_chr21.fa") # sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"), # feature_input = reference, # bam_tags = list(bc="CB", mb="OX"), # feature_type = "genome_bin", # organism = "hg38", # cell_calling = FALSE, # yieldsize = 1000000, # exclude_regions = TRUE, # output_folder = output_folder, # fix_chr = "none" # ) # ## ----eval=FALSE--------------------------------------------------------------- # features <- file.path(output_folder, "NA_peaks.narrowPeak") ## ---- eval=TRUE, echo=FALSE--------------------------------------------------- features <- file.path(data.folder, "NA_peaks.narrowPeak") ## ----------------------------------------------------------------------------- sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"), feature_input = features, bam_tags = list(bc="CB", mb="OX"), feature_type = "peak", organism = "hg38", cell_calling = "filter", min_uniq_frags = 0, min_frac_peak = 0, min_frac_promoter = 0, yieldsize = 1000000, exclude_regions = TRUE, output_folder = output_folder, fix_chr = "none" ) ## ----------------------------------------------------------------------------- feature_matrix <- readRDS(file.path(output_folder, "unfiltered_feature_matrix.rds")) dplyr::glimpse(feature_matrix) sparseM <- readRDS(file.path(output_folder, "sparse_matrix.rds")) dplyr::glimpse(sparseM) ## ----------------------------------------------------------------------------- sce <- sc_atac_create_sce(input_folder = output_folder, organism = "hg38", feature_type = "peak", pheno_data = NULL, report = FALSE) ## ---- echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"-------------- knitr::include_graphics(system.file("report_example.png", package = "scPipe")) ## ----eval = FALSE------------------------------------------------------------- # data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE) # output_folder <- tempdir() # sce <- sc_atac_pipeline(r1 = file.path(data.folder, "small_chr21_R1.fastq.gz"), # r2 = file.path(data.folder, "small_chr21_R3.fastq.gz"), # barcode_fastq = file.path(data.folder, "small_chr21_R2.fastq.gz"), # organism = "hg38", # reference = file.path(data.folder, "small_chr21.fa"), # feature_type = "peak", # remove_duplicates = FALSE, # min_uniq_frags = 0, # set to 0 as the sample dataset only has a small number of reads # min_frac_peak = 0, # min_frac_promoter = 0, # output_folder = output_folder) ## ---- echo=FALSE, results='hide'---------------------------------------------- # cleanup tempfiles unlink(output_folder, recursive=TRUE) ## ----------------------------------------------------------------------------- sessionInfo()