## ----setup, echo=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE, comment = "#>") suppressPackageStartupMessages(library(universalmotif)) suppressPackageStartupMessages(library(Biostrings)) data(ArabidopsisPromoters) data(ArabidopsisMotif) ## -------------------------------------------------------------------------- library(universalmotif) library(Biostrings) ## Create some DNA sequences for use with an external program (default ## is DNA): sequences.dna <- create_sequences(seqnum = 500, monofreqs = c(0.3, 0.2, 0.2, 0.3)) ## writeXStringSet(sequences.dna, "dna.fasta") sequences.dna ## Amino acid: create_sequences(alphabet = "AA") ## Any set of characters can be used create_sequences(alphabet = paste(letters, collapse = "")) ## -------------------------------------------------------------------------- library(universalmotif) data(ArabidopsisPromoters) ## Potentially starting off with some external sequences: # library(Biostrings) # ArabidopsisPromoters <- readDNAStringSet("ArabidopsisPromoters.fasta") markov <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "markov") linear <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "linear") random <- shuffle_sequences(ArabidopsisPromoters, k = 2, method = "random") ## -------------------------------------------------------------------------- o.letter <- colSums(oligonucleotideFrequency(ArabidopsisPromoters, 1, as.prob = FALSE)) m.letter <- colSums(oligonucleotideFrequency(markov, 1, as.prob = FALSE)) l.letter <- colSums(oligonucleotideFrequency(linear, 1, as.prob = FALSE)) r.letter <- colSums(oligonucleotideFrequency(random, 1, as.prob = FALSE)) data.frame(original=o.letter, markov=m.letter, linear=l.letter, random=r.letter) o.counts <- colSums(oligonucleotideFrequency(ArabidopsisPromoters, 2, as.prob = FALSE)) m.counts <- colSums(oligonucleotideFrequency(markov, 2, as.prob = FALSE)) l.counts <- colSums(oligonucleotideFrequency(linear, 2, as.prob = FALSE)) r.counts <- colSums(oligonucleotideFrequency(random, 2, as.prob = FALSE)) data.frame(original=o.counts, markov=m.counts, linear=l.counts, random=r.counts) ## -------------------------------------------------------------------------- library(universalmotif) m1 <- create_motif("TATATATATA", nsites = 50, type = "PWM", pseudocount = 1) m2 <- matrix(c(0.10,0.27,0.23,0.19,0.29,0.28,0.51,0.12,0.34,0.26, 0.36,0.29,0.51,0.38,0.23,0.16,0.17,0.21,0.23,0.36, 0.45,0.05,0.02,0.13,0.27,0.38,0.26,0.38,0.12,0.31, 0.09,0.40,0.24,0.30,0.21,0.19,0.05,0.30,0.31,0.08), byrow=TRUE,nrow=4) m2 <- create_motif(m2, alphabet = "DNA", type = "PWM") m1["motif"] m2["motif"] ## -------------------------------------------------------------------------- motif_pvalue(m2, pvalue = 0.001, progress = FALSE) ## -------------------------------------------------------------------------- library(universalmotif) library(Biostrings) data(ArabidopsisPromoters) ## A 2-letter example: motif.k2 <- create_motif("CWWWWCC", nsites = 6) sequences.k2 <- DNAStringSet(rep(c("CAAAACC", "CTTTTCC"), 3)) motif.k2 <- add_multifreq(motif.k2, sequences.k2) ## -------------------------------------------------------------------------- head(scan_sequences(motif.k2, ArabidopsisPromoters, RC = TRUE, verbose = 0, threshold = 0.0001, progress = FALSE)) ## -------------------------------------------------------------------------- head(scan_sequences(motif.k2, ArabidopsisPromoters, use.freq = 2, RC = TRUE, verbose = 0, progress = FALSE)) ## -------------------------------------------------------------------------- library(universalmotif) library(Biostrings) sequences <- DNAStringSet(rep(c("CAAAACC", "CTTTTCC"), 3)) motif <- create_motif(sequences, add.multifreq = 2:3) ## ----sessionInfo, echo=FALSE----------------------------------------------- sessionInfo()