## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----seqArchR-install, echo=TRUE, eval=FALSE---------------------------------- # # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("seqArchR") ## ----setup-two, echo=TRUE----------------------------------------------------- # Load seqArchR library(seqArchR) library(Biostrings, quietly = TRUE) # Set seed for reproducibility set.seed(1234) ## ----load-example-data, echo=TRUE--------------------------------------------- # Creation of one-hot encoded data matrix from FASTA file inputFname <- system.file("extdata", "example_data.fa.gz", package = "seqArchR", mustWork = TRUE) # Specifying `dinuc` generates dinucleotide features inputSeqsMat <- seqArchR::prepare_data_from_FASTA(fasta_fname = inputFname, sinuc_or_dinuc = "dinuc") inputSeqsRaw <- seqArchR::prepare_data_from_FASTA(fasta_fname = inputFname, raw_seq = TRUE) nSeqs <- length(inputSeqsRaw) positions <- seq(1, Biostrings::width(inputSeqsRaw[1])) ## ----load-example-data-2, echo=TRUE, eval=TRUE-------------------------------- # Creation of one-hot encoded data matrix from a DNAStringSet object inputSeqs_direct <- seqArchR::get_one_hot_encoded_seqs(seqs = inputSeqsRaw, sinuc_or_dinuc = "dinuc") identical(inputSeqs_direct, inputSeqsMat) ## ----plot-seqs, echo=TRUE, fig.dim=c(4,6)------------------------------------- # Visualize the sequences in a image matrix where the DNA bases are # assigned fixed colors seqArchR::viz_seqs_acgt_mat(as.character(inputSeqsRaw), pos_lab = positions, save_fname = NULL) ## ----setup-seqArchR-config-call, echo=TRUE------------------------------------ # Set seqArchR configuration seqArchRconfig <- seqArchR::set_config( parallelize = TRUE, n_cores = 2, n_runs = 100, k_min = 1, k_max = 20, mod_sel_type = "stability", bound = 10^-6, chunk_size = 100, result_aggl = "ward.D", result_dist = "euclid", flags = list(debug = FALSE, time = TRUE, verbose = TRUE, plot = FALSE) ) ## ----call-seqArchR, echo=TRUE, eval=FALSE------------------------------------- # # Call/Run seqArchR # seqArchRresult <- seqArchR::seqArchR(config = seqArchRconfig, # seqs_ohe_mat = inputSeqsMat, # seqs_raw = inputSeqsRaw, # seqs_pos = positions, # total_itr = 2, # set_ocollation = c(TRUE, FALSE)) # ## ----read-stored-result, echo=FALSE------------------------------------------- seqArchRresult <- readRDS(system.file("extdata", "example_seqArchRresult.rds", package = "seqArchR", mustWork = TRUE)) ## ----seqArchR-result-clust-factors-------------------------------------------- # Basis vectors at iteration 2 seqArchR::get_clBasVec_k(seqArchRresult, iter=2) i2_bv <- seqArchR::get_clBasVec_m(seqArchRresult, iter=2) dim(i2_bv) head(i2_bv) ## ----viz-BV-1, echo=TRUE, fig.height=5, fig.width=25-------------------------- seqArchR::viz_bas_vec(feat_mat = get_clBasVec_m(seqArchRresult, 1), ptype = c("heatmap", "seqlogo"), method = "bits", sinuc_or_dinuc = "dinuc") ## ----viz-BV-2, fig.height=5, fig.width=25, echo=TRUE, warning=FALSE----------- seqArchR::viz_bas_vec(feat_mat = get_clBasVec_m(seqArchRresult, 2), ptype = c("heatmap", "seqlogo"), method = "bits", sinuc_or_dinuc = "dinuc") ## ----clust-itr1, fig.dim=c(4,6), fig.cap="Clusters at iteration 1"------------ seqArchR::viz_seqs_acgt_mat(seqs_str(seqArchRresult, iter = 1, ord = TRUE), pos_lab = positions) ## ----clust-itr2, fig.dim=c(4,6), fig.cap="Clusters at iteration 2"------------ seqArchR::viz_seqs_acgt_mat(seqs_str(seqArchRresult, iter = 2, ord = TRUE), pos_lab = positions) ## ----session_info, include=TRUE, echo=TRUE, results='markup'------------------ sessionInfo()