## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  comment = "",
  warning = FALSE,
  message = FALSE,
  cache = FALSE
)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# BiocManager::install("UPDhmm")
# BiocManager::install("regioneR")
# BiocManager::install("karyoploteR")
# 

## -----------------------------------------------------------------------------
library(UPDhmm)
library(dplyr)

## -----------------------------------------------------------------------------
library(VariantAnnotation)
library(regioneR)
library(karyoploteR)

#Example files
files <- c(
  system.file(package = "UPDhmm", "extdata", "sample1.vcf.gz"),
  system.file(package = "UPDhmm", "extdata", "sample2.vcf.gz")
)


# Define the trio structure
trio_list <- list(
  list(
    proband = "NA19675",
    mother  = "NA19678",
    father  = "NA19679"
  ),
  list(
    proband = "NA19685",
    mother  = "NA19688",
    father  = "NA19689"
  )
)

# Read and preprocess each VCF
vcf_list <- mapply(function(file, trio) {
  vcf <- readVcf(file)
  vcfCheck(vcf,
           proband = trio$proband,
           mother  = trio$mother,
           father  = trio$father)
}, 
files, trio_list, SIMPLIFY = FALSE)



## -----------------------------------------------------------------------------
events_list <- lapply(vcf_list, calculateEvents, add_ratios = TRUE)

head(events_list[[1]])


## ----eval=FALSE---------------------------------------------------------------
# 
# library(BiocParallel)
# BPPARAM <- MulticoreParam(workers = min(2, parallel::detectCores()))
# events_list <- lapply(vcf_list, calculateEvents, BPPARAM = BPPARAM)
# 

## -----------------------------------------------------------------------------
collapsed_list <- lapply(events_list, collapseEvents)
collapsed_all <- do.call(rbind.data.frame, c(collapsed_list, make.row.names = FALSE))
head(collapsed_all)

## -----------------------------------------------------------------------------
recurrentRegions <- identifyRecurrentRegions(
  df = collapsed_all,
  ID_col = "ID",
  error_threshold = 100,
  min_support = 2,
  max_dist = 0.97
)

head(recurrentRegions)
annotatedEvents <- markRecurrentRegions(
  df = collapsed_all,
  recurrent_regions = recurrentRegions
)

head(annotatedEvents)

## -----------------------------------------------------------------------------

library(regioneR)
bed <- system.file(package = "UPDhmm", "extdata", "SFARI.bed")
bed_df <- read.table(
    bed,
    header = TRUE
)

bed_gr <- toGRanges(bed_df)

annotatedEvents <- markRecurrentRegions(
  df = collapsed_all,
  recurrent_regions = bed_gr
)

head(annotatedEvents)

## -----------------------------------------------------------------------------
library(karyoploteR)
library(regioneR)

# Function to visualize UPD events across the genome
plotUPDKp <- function(updEvents) {
  #--------------------------------------------------------------
  # 1. Detect coordinate columns
  #--------------------------------------------------------------
  if (all(c("start", "end") %in% colnames(updEvents))) {
    start_col <- "start"
    end_col <- "end"
  } else if (all(c("min_start", "max_end") %in% colnames(updEvents))) {
    start_col <- "min_start"
    end_col <- "max_end"
  } else {
    stop("Input must contain either (start, end) or (min_start, max_end) columns.")
  }

  #--------------------------------------------------------------
  # 2. Ensure chromosome naming format (e.g. "chr3")
  #--------------------------------------------------------------
  updEvents$seqnames <- ifelse(grepl("^chr", updEvents$chromosome),
                               updEvents$seqnames,
                               paste0("chr", updEvents$chromosome))

  #--------------------------------------------------------------
  # 3. Helper function to safely create GRanges only if events exist
  #--------------------------------------------------------------
  to_gr_safe <- function(df, grp) {
    subset_df <- subset(df, group == grp)
    if (nrow(subset_df) > 0) {
      toGRanges(subset_df[, c("seqnames", start_col, end_col)])
    } else {
      NULL
    }
  }

  #--------------------------------------------------------------
  # 4. Separate UPD event types
  #--------------------------------------------------------------
  het_fat <- to_gr_safe(updEvents, "het_fat")
  het_mat <- to_gr_safe(updEvents, "het_mat")
  iso_fat <- to_gr_safe(updEvents, "iso_fat")
  iso_mat <- to_gr_safe(updEvents, "iso_mat")

  #--------------------------------------------------------------
  # 5. Create the genome ideogram
  #--------------------------------------------------------------
  kp <- plotKaryotype(genome = "hg19")

  #--------------------------------------------------------------
  # 6. Plot regions by inheritance type (if any exist)
  #--------------------------------------------------------------
  if (!is.null(het_fat)) kpPlotRegions(kp, het_fat, col = "#AAF593")
  if (!is.null(het_mat)) kpPlotRegions(kp, het_mat, col = "#FFB6C1")
  if (!is.null(iso_fat)) kpPlotRegions(kp, iso_fat, col = "#A6E5FC")
  if (!is.null(iso_mat)) kpPlotRegions(kp, iso_mat, col = "#E9B864")

  #--------------------------------------------------------------
  # 7. Add legend
  #--------------------------------------------------------------
  legend("topright",
         legend = c("Het_Fat", "Het_Mat", "Iso_Fat", "Iso_Mat"),
         fill = c("#AAF593", "#FFB6C1", "#A6E5FC", "#E9B864"))
}
# Example: visualize UPD events for the first trio
plotUPDKp(annotatedEvents)


## -----------------------------------------------------------------------------
sessionInfo()

