## ----installme, eval=FALSE----------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("polyester") ## ----fcmat--------------------------------------------------------------- fold_changes = matrix(c(4,4,rep(1,18),1,1,4,4,rep(1,16)), nrow=20) head(fold_changes) ## ----builtinex, warning=FALSE, message=FALSE----------------------------- library(polyester) library(Biostrings) # FASTA annotation fasta_file = system.file('extdata', 'chr22.fa', package='polyester') fasta = readDNAStringSet(fasta_file) # subset the FASTA file to first 20 transcripts small_fasta = fasta[1:20] writeXStringSet(small_fasta, 'chr22_small.fa') # ~20x coverage ----> reads per transcript = transcriptlength/readlength * 20 # here all transcripts will have ~equal FPKM readspertx = round(20 * width(small_fasta) / 100) # simulation call: simulate_experiment('chr22_small.fa', reads_per_transcript=readspertx, num_reps=c(10,10), fold_changes=fold_changes, outdir='simulated_reads') ## ----countmat------------------------------------------------------------ # set up transcript-by-timepoint matrix: num_timepoints = 12 countmat = matrix(readspertx, nrow=length(small_fasta), ncol=num_timepoints) # add spikes in expression at certain timepoints to certain transcripts: up_early = c(1,2) up_late = c(3,4) countmat[up_early, 2] = 3*countmat[up_early, 2] countmat[up_early, 3] = round(1.5*countmat[up_early, 3]) countmat[up_late, 10] = 6*countmat[up_late, 10] countmat[up_late, 11] = round(1.2*countmat[up_late, 11]) # simulate reads: simulate_experiment_countmat('chr22_small.fa', readmat=countmat, outdir='timecourse_reads') ## ----installbg, eval=FALSE----------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("ballgown") # library(ballgown) # data(bg) # bg = subset(bg, "chr=='22'") # # # load gtf file for annotation: # gtfpath = system.file('extdata', 'bg.gtf.gz', package='polyester') # gtf = subset(gffRead(gtfpath), seqname=='22') # # # load chromosome sequence corresponding to gtf file (just for this example -- requires download because of file size) # system('wget https://www.dropbox.com/s/04i6msi9vu2snif/chr22seq.rda') # load('chr22seq.rda') # names(chr22seq) = '22' # # # simulate reads based on this experiment's FPKMs # simulate_experiment_empirical(bg, grouplabels=pData(bg)$group, gtf=gtf, # seqpath=chr22seq, mean_rps=5000, outdir='empirical_reads', seed=1247) ## ----info, results='markup'---------------------------------------------- sessionInfo()