## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## -----------------------------------------------------------------------------
library(SpliceWiz)

## ----eval = FALSE-------------------------------------------------------------
#  ref_path <- "./Reference"

## ----eval=FALSE---------------------------------------------------------------
#  buildRef(
#      reference_path = ref_path,
#      fasta = "genome.fa", gtf = "transcripts.gtf",
#      genome_type = "hg38"
#  )

## ----eval=FALSE---------------------------------------------------------------
#  getResources(
#      reference_path = ref_path,
#      fasta = "genome.fa",
#      gtf = "transcripts.gtf"
#  )
#  
#  buildRef(
#      reference_path = ref_path,
#      genome_type = "hg38"
#  )

## ----eval=FALSE---------------------------------------------------------------
#  # Assuming hg38 genome:
#  
#  buildRef(
#      reference_path = ref_path,
#      genome_type = "hg38",
#      overwrite = TRUE
#  )

## ----eval=FALSE---------------------------------------------------------------
#  FTP <- "ftp://ftp.ensembl.org/pub/release-94/"
#  
#  buildRef(
#      reference_path = ref_path,
#      fasta = paste0(FTP, "fasta/homo_sapiens/dna/",
#          "Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"),
#      gtf = paste0(FTP, "gtf/homo_sapiens/",
#          "Homo_sapiens.GRCh38.94.chr.gtf.gz"),
#      genome_type = "hg38"
#  )

## -----------------------------------------------------------------------------
require(AnnotationHub)

ah <- AnnotationHub()
query(ah, "Ensembl")

## -----------------------------------------------------------------------------
query(ah, c("Homo Sapiens", "release-94"))

## ----eval=FALSE---------------------------------------------------------------
#  buildRef(
#      reference_path = ref_path,
#      fasta = "AH65745",
#      gtf = "AH64631",
#      genome_type = "hg38"
#  )

## ----eval=FALSE---------------------------------------------------------------
#  buildRef(
#      reference_path = ref_path,
#      fasta = "genome.fa", gtf = "transcripts.gtf",
#      genome_type = ""
#  )

## -----------------------------------------------------------------------------
STAR_version()

## ----eval = FALSE-------------------------------------------------------------
#  ref_path = "./Reference"
#  
#  # Ensure genome resources are prepared from genome FASTA and GTF file:
#  
#  if(!dir.exists(file.path(ref_path, "resource"))) {
#      getResources(
#          reference_path = ref_path,
#          fasta = "genome.fa",
#          gtf = "transcripts.gtf"
#      )
#  }
#  
#  # Generate a STAR genome reference:
#  STAR_BuildRef(
#      reference_path = ref_path,
#      n_threads = 8
#  )
#  

## ----eval=FALSE---------------------------------------------------------------
#  buildFullRef(
#      reference_path = ref_path,
#      fasta = "genome.fa", gtf = "transcripts.gtf",
#      genome_type = "",
#      use_STAR_mappability = TRUE,
#      n_threads = 4
#  )

## ----eval = FALSE-------------------------------------------------------------
#  # (1a) Creates genome resource files
#  
#  ref_path <- file.path(tempdir(), "Reference")
#  
#  getResources(
#      reference_path = ref_path,
#      fasta = chrZ_genome(),
#      gtf = chrZ_gtf()
#  )
#  
#  # (1b) Systematically generate reads based on the SpliceWiz example genome:
#  
#  generateSyntheticReads(
#      reference_path = ref_path
#  )
#  
#  # (2) Align the generated reads using Rsubread:
#  
#  # (2a) Build the Rsubread genome index:
#  
#  setwd(ref_path)
#  Rsubread::buildindex(basename = "./reference_index",
#      reference = chrZ_genome())
#  
#  # (2b) Align the synthetic reads using Rsubread::subjunc()
#  
#  Rsubread::subjunc(
#      index = "./reference_index",
#      readfile1 = file.path(ref_path, "Mappability", "Reads.fa"),
#      output_file = file.path(ref_path, "Mappability", "AlignedReads.bam"),
#      useAnnotation = TRUE,
#      annot.ext = chrZ_gtf(),
#      isGTF = TRUE
#  )
#  
#  # (3) Analyse the aligned reads in the BAM file for low-mappability regions:
#  
#  calculateMappability(
#      reference_path = ref_path,
#      aligned_bam = file.path(ref_path, "Mappability", "AlignedReads.bam")
#  )
#  
#  # (4) Build the SpliceWiz reference using the calculated Mappability Exclusions
#  
#  buildRef(ref_path)
#  

## -----------------------------------------------------------------------------
STAR_version()

## ----eval = FALSE-------------------------------------------------------------
#  STAR_alignReads(
#      STAR_ref_path = file.path(ref_path, "STAR"),
#      BAM_output_path = "./bams/sample1",
#      fastq_1 = "sample1_1.fastq", fastq_2 = "sample1_2.fastq",
#      n_threads = 8
#  )

## ----eval = FALSE-------------------------------------------------------------
#  Experiment <- data.frame(
#      sample = c("sample_A", "sample_B"),
#      forward = file.path("raw_data", c("sample_A", "sample_B"),
#          c("sample_A_1.fastq", "sample_B_1.fastq")),
#      reverse = file.path("raw_data", c("sample_A", "sample_B"),
#          c("sample_A_2.fastq", "sample_B_2.fastq"))
#  )
#  
#  STAR_alignExperiment(
#      Experiment = Experiment,
#      STAR_ref_path = file.path("Reference_FTP", "STAR"),
#      BAM_output_path = "./bams",
#      n_threads = 8,
#      two_pass = FALSE
#  )

## ----eval = FALSE-------------------------------------------------------------
#  # Assuming sequencing files are named by their respective sample names
#  fastq_files <- findFASTQ("./sequencing_files", paired = TRUE,
#      fastq_suffix = ".fq.gz", level = 0)

## ----eval = FALSE-------------------------------------------------------------
#  STAR_alignExperiment(
#      Experiment = fastq_files,
#      STAR_ref_path = file.path("Reference_FTP", "STAR"),
#      BAM_output_path = "./bams",
#      n_threads = 8,
#      two_pass = FALSE
#  )

## ----eval=FALSE---------------------------------------------------------------
#  bams <- findBAMS("./bams", level = 1)

## ----eval=FALSE---------------------------------------------------------------
#  # assume SpliceWiz reference has been generated in `ref_path`
#  
#  processBAM(
#      bamfiles = bams$path,
#      sample_names = bams$sample,
#      reference_path = ref_path,
#      output_path = "./pb_output",
#      n_threads = 4,
#      useOpenMP = TRUE
#  )

## ----eval=FALSE---------------------------------------------------------------
#  BAM2COV(
#      bamfiles = bams$path,
#      sample_names = bams$sample,
#      output_path = "./pb_output",
#      n_threads = 4,
#      useOpenMP = TRUE
#  )

## ----eval=FALSE---------------------------------------------------------------
#  expr <- findSpliceWizOutput("./pb_output")

## ----eval = FALSE-------------------------------------------------------------
#  collateData(
#      Experiment = expr,
#      reference_path = ref_path,
#      output_path = "./NxtSE_output"
#  )

## ----eval = FALSE-------------------------------------------------------------
#  se <- makeSE("./NxtSE_output")

## -----------------------------------------------------------------------------
sessionInfo()