get_UTR3eSet {InPAS}R Documentation

prepare 3' UTR coverage data for usage test

Description

generate a UTR3eSet object with PDUI information for statistic tests

Usage

get_UTR3eSet(
  sqlite_db,
  normalize = c("none", "quantiles", "quantiles.robust", "mean", "median"),
  ...,
  singleSample = FALSE
)

Arguments

sqlite_db

A path to the SQLite database for InPAS, i.e. the output of setup_sqlitedb().

normalize

A character(1) vector, spcifying the normalization method. It can be "none", "quantiles", "quantiles.robust", "mean", or "median"

...

parameter can be passed into preprocessCore::normalize.quantiles.robust()

singleSample

A logical(1) vector, indicating whether data is prepared for analysis in a singleSample mode? Default, FALSE

Value

An object of UTR3eSet which contains following elements: usage: an GenomicRanges::GRanges object with CP sites info. PDUI: a matrix of PDUI PDUI.log2: log2 transformed PDUI matrix short: a matrix of usage of short form long: a matrix of usage of long form if singleSample is TRUE, one more element, signals, will be included.

Author(s)

Jianhong Ou, Haibo Liu

Examples

if (interactive()) {
   library(BSgenome.Mmusculus.UCSC.mm10)
   library(TxDb.Mmusculus.UCSC.mm10.knownGene)
   genome <- BSgenome.Mmusculus.UCSC.mm10
   TxDb <- TxDb.Mmusculus.UCSC.mm10.knownGene
   
   ## load UTR3 annotation and convert it into a GRangesList
   data(utr3.mm10)
   utr3 <- split(utr3.mm10, seqnames(utr3.mm10))
   
   bedgraphs <- system.file("extdata",c("Baf3.extract.bedgraph",
                                        "UM15.extract.bedgraph"), 
                           package = "InPAS")
   tags <- c("Baf3", "UM15")
   metadata <- data.frame(tag = tags, 
                          condition = c("Baf3", "UM15"),
                          bedgraph_file = bedgraphs)
   outdir = tempdir()
   write.table(metadata, file =file.path(outdir, "metadata.txt"), 
               sep = "\t", quote = FALSE, row.names = FALSE)
   
   sqlite_db <- setup_sqlitedb(metadata = file.path(outdir, 
                                         "metadata.txt"), outdir)
   coverage <- list()
   for (i in seq_along(bedgraphs)) {
   coverage[[tags[i]]] <- get_ssRleCov(bedgraph = bedgraphs[i],
                            tag = tags[i],
                            genome = genome,
                            sqlite_db = sqlite_db,
                            outdir = outdir,
                            removeScaffolds = TRUE,
                            BPPARAM = NULL)}
   coverage_files <- assemble_allCov(sqlite_db, 
                                    outdir, 
                                    genome, 
                                    removeScaffolds = TRUE)
   data4CPsSearch <- setup_CPsSearch(sqlite_db,
                                     genome,
                                     utr3,
                                     background = "10K",
                                     TxDb = TxDb,
                                     removeScaffolds = TRUE,
                                     BPPARAM = NULL,
                                     hugeData = TRUE,
                                     outdir = outdir)
   ## polyA_PWM
   load(system.file("extdata", "polyA.rda", package = "InPAS"))
   
   ## load the Naive Bayes classifier model from the cleanUpdTSeq package
   library(cleanUpdTSeq)
   data(classifier)
   
   CPs <- search_CPs(seqname = "chr6",
                     sqlite_db = sqlite_db, 
                     utr3 = utr3,
                     background = data4CPsSearch$background, 
                     z2s = data4CPsSearch$z2s,
                     depth.weight = data4CPsSearch$depth.weight,
                     genome = genome, 
                     MINSIZE = 10, 
                     window_size = 100,
                     search_point_START =50,
                     search_point_END = NA,
                     cutStart = 10, 
                     cutEnd = 0,
                     adjust_distal_polyA_end = TRUE,
                     coverage_threshold = 5,
                     long_coverage_threshold = 2,
                     PolyA_PWM = pwm, 
                     classifier = classifier,
                     classifier_cutoff = 0.8,
                     shift_range = 100,
                     step = 5,
                     two_way = FALSE,
                     hugeData = TRUE,
                     outdir = outdir)
                          
utr3_cds <- InPAS:::get_UTR3CDS(sqlite_db,
                        chr.utr3 = utr3[["chr6"]],
                        BPPARAM = NULL)
                        
utr3_cds_cov <- get_regionCov(chr.utr3 = utr3[["chr6"]],
                              sqlite_db,
                              outdir,
                              BPPARAM = NULL,
                              phmm = FALSE)

eSet <- get_UTR3eSet(sqlite_db,
                     normalize ="none", 
                     singleSample = FALSE)
test_out <- test_dPDUI(eset = eSet, 
                       method = "fisher.exact",
                       normalize = "none",
                       sqlite_db = sqlite_db) 
}

[Package InPAS version 2.1.0 Index]