utils-sequence {universalmotif} | R Documentation |
Sequence-related utility functions.
calc_complexity(string, complexity.method = c("WoottonFederhen", "WoottonFederhenFast", "Trifonov", "TrifonovFast", "DUST"), alph = NULL, trifonov.max.word.size = 7) calc_windows(n, window = 1, overlap = 0, return.incomp = TRUE) count_klets(string, k = 1, alph) get_klets(lets, k = 1) mask_ranges(seqs, ranges, letter = "-") mask_seqs(seqs, pattern, RC = FALSE, letter = "-") shuffle_string(string, k = 1, method = c("euler", "linear", "markov"), rng.seed = sample.int(10000, 1)) slide_fun(string, FUN, FUN.VALUE, window = 1, overlap = 0, return.incomp = TRUE) window_string(string, window = 1, overlap = 0, return.incomp = TRUE, nthreads = 1)
string |
|
complexity.method |
|
alph |
|
trifonov.max.word.size |
|
n |
|
window |
|
overlap |
|
return.incomp |
|
k |
|
lets |
|
seqs |
|
ranges |
|
letter |
|
pattern |
|
RC |
|
method |
|
rng.seed |
|
FUN |
|
FUN.VALUE |
The expected return type for |
nthreads |
|
For calc_complexity()
: A vector of numeric
values.
For calc_windows()
: A data.frame
with columns start
and stop
.
For count_klets()
: A data.frame
with columns lets
and counts
.
For get_klets()
: A character
vector of k-lets.
For mask_ranges()
: The masked XStringSet
object.
For mask_seqs()
: The masked XStringSet
object.
For shuffle_string()
: A single character
string.
For slide_fun()
: A vector with type FUN.VALUE
.
For window_string()
: A character
vector.
Benjamin Jean-Marie Tremblay, benjamin.tremblay@uwaterloo.ca
create_sequences()
, get_bkg()
, sequence_complexity()
,
shuffle_sequences()
####################################################################### ## calc_complexity ## Calculate complexity for abitrary strings calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhen") calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhenFast") calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "Trifonov") calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "TrifonovFast") calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "DUST") ####################################################################### ## calc_windows ## Calculate window coordinates for any value 'n'. calc_windows(100, 10, 5) ####################################################################### ## count_klets ## Count k-lets for any string of characters count_klets("GCAAATGTACGCAGGGCCGA", k = 2) ## The default 'k' value (1) counts individual letters count_klets("GCAAATGTACGCAGGGCCGA") ####################################################################### ## get_klets ## Generate all possible k-lets for a set of characters get_klets(c("A", "C", "G", "T"), 3) ## Note that each element in 'lets' is considered a single unit; ## see: get_klets(c("AA", "B"), k = 2) ####################################################################### ## mask_ranges ## Mask arbitrary ranges if (requireNamespace("GenomicRanges", quiet = TRUE)) { ranges <- GenomicRanges::GRanges("A", IRanges::IRanges(1, 5)) seq <- Biostrings::DNAStringSet(c(A = "ATGACTGATTACTTATA")) mask_ranges(seq, ranges, "-") } ####################################################################### ## mask_seqs ## Mask repetitive seqeuences data(ArabidopsisPromoters) mask_seqs(ArabidopsisPromoters, "AAAAAA") ####################################################################### ## shuffle_string ## Shuffle any string of characters shuffle_string("ASDADASDASDASD", k = 1) ####################################################################### ## slide_fun ## Apply a function to a character vector along sliding windows FUN <- function(x) grepl("[GC]", x) data.frame( Window = window_string("ATGCATCTATGCA", 2, 1), HasGC = slide_fun("ATGCATCTATGCA", FUN, logical(1), 2, 1) ) ####################################################################### ## window_string ## Get sliding windows for a string of characters window_string("ABCDEFGHIJ", 2, 1)