################################################### ### chunk number 1: seeding ################################################### #line 93 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" library(ChIPsim) set.seed(1) ################################################### ### chunk number 2: genome ################################################### #line 107 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" chrLen <- c(2e5, 1e5) chromosomes <- sapply(chrLen, function(n) paste(sample(DNA_BASES, n, replace = TRUE), collapse = "")) names(chromosomes) <- paste("CHR", seq_along(chromosomes), sep="") genome <- DNAStringSet(chromosomes) ################################################### ### chunk number 3: ################################################### #line 115 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" rm(chromosomes) ################################################### ### chunk number 4: simulateQualities ################################################### #line 125 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" randomQuality <- function(read, ...){ paste(sample(unlist(strsplit(rawToChar(as.raw(64:104)),"")), nchar(read), replace = TRUE), collapse="") } ################################################### ### chunk number 5: output ################################################### #line 140 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" dfReads <- function(readPos, readNames, sequence, readLen, ...){ ## create vector to hold read sequences and qualities readSeq <- character(sum(sapply(readPos, sapply, length))) readQual <- character(sum(sapply(readPos, sapply, length))) idx <- 1 ## process read positions for each chromosome and strand for(k in length(readPos)){ ## chromosome for(i in 1:2){ ## strand for(j in 1:length(readPos[[k]][[i]])){ ## get (true) sequence readSeq[idx] <- as.character(readSequence(readPos[[k]][[i]][j], sequence[[k]], strand=ifelse(i==1, 1, -1), readLen=readLen)) ## get quality readQual[idx] <- randomQuality(readSeq[idx]) ## introduce sequencing errors readSeq[idx] <- readError(readSeq[idx], decodeQuality(readQual[idx])) idx <- idx + 1 } } } data.frame(name=unlist(readNames), sequence=readSeq, quality=readQual, stringsAsFactors = FALSE) } ################################################### ### chunk number 6: simulation ################################################### #line 200 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" myFunctions <- defaultFunctions() myFunctions$readSequence <- dfReads nReads <- 1000 simulated <- simChIP(nReads, genome, file = "", functions = myFunctions, control = defaultControl(readDensity=list(meanLength = 150))) ################################################### ### chunk number 7: listSummary ################################################### #line 217 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" names(simulated) ################################################### ### chunk number 8: ################################################### #line 221 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" head(simulated$readSequence) ################################################### ### chunk number 9: eval=FALSE ################################################### ## #line 227 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" ## feat <- simulated$features[[1]] ## stableIdx <- which(sapply(feat, inherits, "StableFeature")) ## start <- feat[[stableIdx[1]]]$start ## plot((start-2000):(start+500), simulated$bindDensity[[1]][(start-2000):(start+500)], xlab="Chromosome 1", ## ylab="Density", type='l') ## lines((start-2000):(start+500), simulated$readDensity[[1]][(start-2000):(start+500),1], col=4) ## lines((start-2000):(start+500), simulated$readDensity[[1]][(start-2000):(start+500),2], col=2) ## legend("topright", legend=c("Nucleosome density", "Read density (+)", "Read density (-)"), col=c(1,4,2), lwd=1) ################################################### ### chunk number 10: ################################################### #line 239 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" feat <- simulated$features[[1]] stableIdx <- which(sapply(feat, inherits, "StableFeature")) start <- feat[[stableIdx[1]]]$start plot((start-2000):(start+500), simulated$bindDensity[[1]][(start-2000):(start+500)], xlab="Chromosome 1", ylab="Density", type='l') lines((start-2000):(start+500), simulated$readDensity[[1]][(start-2000):(start+500),1], col=4) lines((start-2000):(start+500), simulated$readDensity[[1]][(start-2000):(start+500),2], col=2) legend("topright", legend=c("Nucleosome density", "Read density (+)", "Read density (-)"), col=c(1,4,2), lwd=1) ################################################### ### chunk number 11: transitions ################################################### #line 318 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" transition <- list(Binding=c(Background=1), Background=c(Binding=0.05, Background=0.95)) transition <- lapply(transition, "class<-", "StateDistribution") ################################################### ### chunk number 12: initial ################################################### #line 329 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" init <- c(Binding=0, Background=1) class(init) <- "StateDistribution" ################################################### ### chunk number 13: bgEmission ################################################### #line 342 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" backgroundFeature <- function(start, length=1000, shape=1, scale=20){ weight <- rgamma(1, shape=1, scale=20) params <- list(start = start, length = length, weight = weight) class(params) <- c("Background", "SimulatedFeature") params } ################################################### ### chunk number 14: bindingEmission ################################################### #line 369 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" bindingFeature <- function(start, length=500, shape=1, scale=20, enrichment=5, r=1.5){ stopifnot(r > 1) avgWeight <- shape * scale * enrichment lowerBound <- ((r - 1) * avgWeight) weight <- actuar::rpareto1(1, r, lowerBound) params <- list(start = start, length = length, weight = weight) class(params) <- c("Binding", "SimulatedFeature") params } ################################################### ### chunk number 15: features1_1 ################################################### #line 389 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" set.seed(1) generator <- list(Binding=bindingFeature, Background=backgroundFeature) features <- ChIPsim::makeFeatures(generator, transition, init, start = 0, length = 1e6, globals=list(shape=1, scale=20), experimentType="TFExperiment", lastFeat=c(Binding = FALSE, Background = TRUE)) ################################################### ### chunk number 16: ################################################### #line 405 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" bindIdx <- sapply(features, inherits, "Binding") plot(density(sapply(features[!bindIdx], "[[", "weight")), xlim=c(0,max(sapply(features, "[[", "weight"))), xlab="Sampling weight", main="") lines(density(sapply(features[bindIdx], "[[", "weight")), col=2) legend("topright", legend=c("Background", "Binding"), col=1:2, lty=1) ################################################### ### chunk number 17: ################################################### #line 418 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" features[[1]] ################################################### ### chunk number 18: features1_2 ################################################### #line 435 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" set.seed(1) features <- ChIPsim::placeFeatures(generator, transition, init, start = 0, length = 1e6, globals=list(shape=1, scale=20), experimentType="TFExperiment", lastFeat=c(Binding = FALSE, Background = TRUE)) ################################################### ### chunk number 19: ################################################### #line 442 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" features[[1]] ################################################### ### chunk number 20: featureDensity1 ################################################### #line 455 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" constRegion <- function(weight, length) rep(weight, length) featureDensity.Binding <- function(feature, ...) constRegion(feature$weight, feature$length) featureDensity.Background <- function(feature, ...) constRegion(feature$weight, feature$length) ################################################### ### chunk number 21: density1 ################################################### #line 461 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" dens <- ChIPsim::feat2dens(features) ################################################### ### chunk number 22: readLoc1 ################################################### #line 467 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" readLoc <- sample(44000:64000, 1e3, prob=dens[44000:64000], replace=TRUE) ################################################### ### chunk number 23: ################################################### #line 472 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" start <- features[[min(which(bindIdx))]]$start length <- features[[min(which(bindIdx))]]$length par(mfrow=c(2,1)) par(mar=c(0, 4.1, 1.1, 2.1)) plot((start-10000):(start+10000), dens[(start-10000):(start+10000)], type='l', xlab="", ylab="Sampling weight", xaxt = 'n') abline(v=start, col="grey", lty=2) abline(v=start+length-1, col="grey", lty=2) par(mar=c(4.1, 4.1, 0, 2.1)) counts <- hist(readLoc, breaks=seq(44000-0.5, 64000.5, 1), plot = FALSE)$counts extReads <- zoo::rollmean(ts(counts), 200)*200 plot(44100:63901,extReads, xlab="Position in genomic region", ylab="Read count", main="", type='l', xlim = c(start-10000, start+10000)) abline(v=start, col="grey", lty=2) abline(v=start+length-1, col="grey", lty=2) ################################################### ### chunk number 24: reconcileFeatures ################################################### #line 520 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" reconcileFeatures.TFExperiment <- function(features, ...){ bindIdx <- sapply(features, inherits, "Binding") if(any(bindIdx)) bindLength <- features[[min(which(bindIdx))]]$length else bindLength <- 1 lapply(features, function(f){ if(inherits(f, "Background")) f$weight <- f$weight/bindLength ## The next three lines (or something to this effect) ## are required by all 'reconcileFeatures' implementations. f$overlap <- 0 currentClass <- class(f) class(f) <- c(currentClass[-length(currentClass)], "ReconciledFeature", currentClass[length(currentClass)]) f }) } ################################################### ### chunk number 25: features2 ################################################### #line 540 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" set.seed(1) features <- ChIPsim::placeFeatures(generator, transition, init, start = 0, length = 1e6, globals=list(shape=1, scale=20), experimentType="TFExperiment", lastFeat=c(Binding = FALSE, Background = TRUE), control=list(Binding=list(length=50))) ################################################### ### chunk number 26: ################################################### #line 548 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" features[[1]] ################################################### ### chunk number 27: featureDensity2 ################################################### #line 554 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" featureDensity.Binding <- function(feature, ...){ featDens <- numeric(feature$length) featDens[floor(feature$length/2)] <- feature$weight featDens } ################################################### ### chunk number 28: density2 ################################################### #line 562 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" dens <- ChIPsim::feat2dens(features, length = 1e6) ################################################### ### chunk number 29: fragmentLength ################################################### #line 576 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" fragLength <- function(x, minLength, maxLength, meanLength, ...){ sd <- (maxLength - minLength)/4 prob <- dnorm(minLength:maxLength, mean = meanLength, sd = sd) prob <- prob/sum(prob) prob[x - minLength + 1] } ################################################### ### chunk number 30: readDens ################################################### #line 586 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" readDens <- ChIPsim::bindDens2readDens(dens, fragLength, bind = 50, minLength = 150, maxLength = 250, meanLength = 200) ################################################### ### chunk number 31: ################################################### #line 595 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" bindIdx <- sapply(features, inherits, "Binding") start <- features[[min(which(bindIdx))]]$start length <- features[[min(which(bindIdx))]]$length par(mfrow=c(2,1)) par(mar=c(0, 4.1, 1.1, 2.1)) plot((start-10000):(start+10000), dens[(start-10000):(start+10000)], type='l', xlab="", ylab="Sampling weight", xaxt='n') par(mar=c(4.1, 4.1, 0, 2.1)) plot((start-10000):(start+10000), readDens[(start-10000):(start+10000), 1], col=4, type='l', xlab="Position in genomic region", ylab="Sampling weight") lines((start-10000):(start+10000), readDens[(start-10000):(start+10000), 2], col=2) legend("topleft", legend=c("forward", "reverse"), col=c(4,2), lty=1) ################################################### ### chunk number 32: readLoc2 ################################################### #line 619 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" readLoc <- ChIPsim::sampleReads(readDens, 1e5) ################################################### ### chunk number 33: ################################################### #line 627 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" fwdCount <- hist(readLoc$fwd, breaks=seq(0.5, 1000000.5, by=1), plot=FALSE)$counts revCount <- hist(readLoc$rev, breaks=seq(0.5, 1000000.5, by=1), plot=FALSE)$counts fwdExt <- zoo::rollmean(ts(fwdCount[(start-1000):(start+1050)]), 200, align="right")*200 revExt <- zoo::rollmean(ts(revCount[(start-1000):(start+1050)]), 200, align="left")*200 par(mfrow=c(2,1)) mar.old <- par("mar") par(mar=c(0, 4.1, 1.1, 2.1)) plot((start-1000):(start+1050) ,fwdCount[(start-1000):(start+1050)], col="lightblue", type='h', xlab="", ylab="Read count / density", ylim=c(0, 2), xlim=c(start-975, start+1025), xaxt='n') lines((start-975):(start+1025) ,revCount[(start-975):(start+1025)], col="mistyrose2", type='h') lines((start-10000):(start+10000), dens[(start-10000):(start+10000)]) lines((start-10000):(start+10000), readDens[(start-10000):(start+10000), 1], col=4) lines((start-10000):(start+10000), readDens[(start-10000):(start+10000), 2], col=2) par(mar=c(4.1, 4.1, 0, 2.1)) plot((start-801):(start+851), fwdExt+revExt, xlim=c(start-975, start+1025), xlab="Position in genomic region", ylab="Overlap count", type='s') lines((start-801):(start+1050), fwdExt, col=4) lines((start-1000):(start+851), revExt, col=2) abline(v=c(start-150, start+200), col="grey", lty=2) ################################################### ### chunk number 34: ################################################### #line 667 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" randomQuality <- function(read, ...){ paste(sample(unlist(strsplit(rawToChar(as.raw(64:104)),"")), nchar(read), replace = TRUE), collapse="") } dfReads <- function(readPos, readNames, sequence, readLen, ...){ ## create vector to hold read sequences and qualities readSeq <- character(sum(sapply(readPos, sapply, length))) readQual <- character(sum(sapply(readPos, sapply, length))) idx <- 1 ## process read positions for each chromosome and strand for(k in length(readPos)){ ## chromosome for(i in 1:2){ ## strand for(j in 1:length(readPos[[k]][[i]])){ ## get (true) sequence readSeq[idx] <- as.character(ChIPsim::readSequence(readPos[[k]][[i]][j], sequence[[k]], strand=ifelse(i==1, 1, -1), readLen=readLen)) ## get quality readQual[idx] <- randomQuality(readSeq[idx]) ## introduce sequencing errors readSeq[idx] <- ChIPsim::readError(readSeq[idx], ChIPsim::decodeQuality(readQual[idx])) idx <- idx + 1 } } } data.frame(name=unlist(readNames), sequence=readSeq, quality=readQual, stringsAsFactors = FALSE) } ################################################### ### chunk number 35: ################################################### #line 702 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" myFunctions <- ChIPsim::defaultFunctions() myFunctions$readSequence <- dfReads ################################################### ### chunk number 36: ################################################### #line 708 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" featureArgs <- list(generator=generator, transition=transition, init=init, start = 0, length = 1e6, globals=list(shape=1, scale=20), experimentType="TFExperiment", lastFeat=c(Binding = FALSE, Background = TRUE), control=list(Binding=list(length=50))) readDensArgs <- list(fragment=fragLength, bind = 50, minLength = 150, maxLength = 250, meanLength = 200) ################################################### ### chunk number 37: simulation ################################################### #line 716 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" genome <- Biostrings::DNAStringSet(c(CHR=paste(sample(Biostrings::DNA_BASES, 1e6, replace = TRUE), collapse = ""))) set.seed(1) simulated <- ChIPsim::simChIP(1e4, genome, file = "", functions = myFunctions, control = ChIPsim::defaultControl(features=featureArgs, readDensity=readDensArgs)) ################################################### ### chunk number 38: ################################################### #line 725 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" all.equal(readDens, simulated$readDensity[[1]]) ################################################### ### chunk number 39: ################################################### #line 731 "vignettes/ChIPsim/inst/doc/ChIPsimIntro.Rnw" sessionInfo()