## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex(use.unsrturl=FALSE) ## ----setup, include=FALSE, cache=FALSE----------------------------------- library(knitr) # set global chunk options for knitr opts_chunk$set(comment=NA, warning=FALSE, message=FALSE, fig.path='figure/systemPipeR-') options(formatR.arrow=TRUE, width=95) unlink("test.db") ## ----eval=TRUE----------------------------------------------------------- library(systemPipeR) ## ----eval=FALSE---------------------------------------------------------- ## library(systemPipeRdata) ## genWorkenvir(workflow="chipseq") ## setwd("chipseq") ## ----eval=FALSE---------------------------------------------------------- ## source("systemPipeChIPseq_Fct.R") ## ----eval=TRUE----------------------------------------------------------- targetspath <- system.file("extdata", "targets_chip.txt", package="systemPipeR") targets <- read.delim(targetspath, comment.char = "#") targets[1:4,-c(5,6)] ## ----eval=FALSE, messages=FALSE, warning=FALSE, cache=TRUE--------------- ## args <- systemArgs(sysma="param/trim.param", mytargets="targets_chip.txt") ## filterFct <- function(fq, cutoff=20, Nexceptions=0) { ## qcount <- rowSums(as(quality(fq), "matrix") <= cutoff) ## fq[qcount <= Nexceptions] # Retains reads where Phred scores are >= cutoff with N exceptions ## } ## preprocessReads(args=args, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", batchsize=100000) ## writeTargetsout(x=args, file="targets_chip_trim.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="param/bowtieSE.param", mytargets="targets_chip_trim.txt") ## fqlist <- seeFastq(fastq=infile1(args), batchsize=100000, klength=8) ## pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist)) ## seeFastqPlot(fqlist) ## dev.off() ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="param/bowtieSE.param", mytargets="targets_chip_trim.txt") ## sysargs(args)[1] # Command-line parameters for first FASTQ file ## moduleload(modules(args)) # Skip if a module system is not used ## system("bowtie2-build ./data/tair10.fasta ./data/tair10.fasta") # Indexes reference genome ## runCommandline(args) ## writeTargetsout(x=args, file="targets_bam.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## file.exists(outpaths(args)) ## ----eval=FALSE---------------------------------------------------------- ## read_statsDF <- alignStats(args=args) ## write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t") ## read.delim("results/alignStats.xls") ## ----eval=FALSE---------------------------------------------------------- ## symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"), ## urlbase="http://biocluster.ucr.edu/~tgirke/", ## urlfile="./results/IGVurl.txt") ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma=NULL, mytargets="targets_bam.txt") ## args_merge <- mergeBamByFactor(args, overwrite=TRUE) ## writeTargetsout(x=args_merge, file="targets_mergeBamByFactor.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="param/macs2_noinput.param", mytargets="targets_mergeBamByFactor.txt") ## sysargs(args)[1] # Command-line parameters for first FASTQ file ## runCommandline(args) ## file.exists(outpaths(args)) ## writeTargetsout(x=args, file="targets_macs.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## writeTargetsRef(infile="targets_mergeBamByFactor.txt", outfile="targets_bam_ref.txt", silent=FALSE, overwrite=TRUE) ## args <- systemArgs(sysma="param/macs2.param", mytargets="targets_bam_ref.txt") ## sysargs(args)[1] # Command-line parameters for first FASTQ file ## runCommandline(args) ## file.exists(outpaths(args)) ## writeTargetsout(x=args, file="targets_macs.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## library(ChIPpeakAnno); library(GenomicFeatures) ## args <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") ## txdb <- loadDb("./data/tair10.sqlite") ## ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type")) ## for(i in seq(along=args)) { ## peaksGR <- as(read.delim(infile1(args)[i], comment="#"), "GRanges") ## annotatedPeak <- annotatePeakInBatch(peaksGR, AnnotationData=genes(txdb)) ## df <- data.frame(as.data.frame(annotatedPeak), as.data.frame(values(ge[values(annotatedPeak)$feature,]))) ## write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t") ## } ## writeTargetsout(x=args, file="targets_peakanno.txt", overwrite=TRUE) ## ----eval=FALSE, include=FALSE------------------------------------------- ## ## Perform previous step with full genome annotation from Biomart ## # txdb <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_PLANT", dataset="athaliana_eg_gene") ## # tx <- transcripts(txdb, columns=c("tx_name", "gene_id", "tx_type")) ## # ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type")) # works as well ## # seqlevels(ge) <- c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5", "ChrC", "ChrM") ## # table(mcols(tx)$tx_type) ## # tx <- tx[!duplicated(unstrsplit(values(tx)$gene_id, sep=",")) # Keeps only first transcript model for each gene] ## # annotatedPeak <- annotatePeakInBatch(macsOutput, AnnotationData = tx) ## ----eval=FALSE---------------------------------------------------------- ## library(ChIPseeker) ## txdb <- loadDb("./data/tair10.sqlite") ## for(i in seq(along=args)) { ## peakAnno <- annotatePeak(infile1(args)[i], TxDb=txdb, verbose=FALSE) ## df <- as.data.frame(peakAnno) ## write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t") ## } ## writeTargetsout(x=args, file="targets_peakanno.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## peak <- readPeakFile(infile1(args)[1]) ## covplot(peak, weightCol="X.log10.pvalue.") ## peakHeatmap(outpaths(args)[1], TxDb=txdb, upstream=1000, downstream=1000, color="red") ## plotAvgProf2(outpaths(args)[1], TxDb=txdb, upstream=1000, downstream=1000, xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency") ## ----eval=FALSE---------------------------------------------------------- ## library(GenomicRanges) ## args <- systemArgs(sysma="param/count_rangesets.param", mytargets="targets_macs.txt") ## args_bam <- systemArgs(sysma=NULL, mytargets="targets_bam.txt") ## bfl <- BamFileList(outpaths(args_bam), yieldSize=50000, index=character()) ## countDFnames <- countRangeset(bfl, args, mode="Union", ignore.strand=TRUE) ## writeTargetsout(x=args, file="targets_countDF.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## args_diff <- systemArgs(sysma="param/rundiff.param", mytargets="targets_countDF.txt") ## cmp <- readComp(file=args_bam, format="matrix") ## dbrlist <- runDiff(args=args_diff, diffFct=run_edgeR, targets=targetsin(args_bam), ## cmp=cmp[[1]], independent=TRUE, dbrfilter=c(Fold=2, FDR=1)) ## writeTargetsout(x=args_diff, file="targets_rundiff.txt", overwrite=TRUE) ## ----eval=FALSE---------------------------------------------------------- ## args <- systemArgs(sysma="param/macs2.param", mytargets="targets_bam_ref.txt") ## args_anno <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") ## annofiles <- outpaths(args_anno) ## gene_ids <- sapply(names(annofiles), function(x) unique(as.character(read.delim(annofiles[x])[,"gene_id"]))) ## load("data/GO/catdb.RData") ## BatchResult <- GOCluster_Report(catdb=catdb, setlist=gene_ids, method="all", id_type="gene", CLSZ=2, cutoff=0.9, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL) ## ----eval=FALSE---------------------------------------------------------- ## library(Biostrings); library(seqLogo); library(BCRANK) ## args <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") ## rangefiles <- infile1(args) ## for(i in seq(along=rangefiles)) { ## df <- read.delim(rangefiles[i], comment="#") ## peaks <- as(df, "GRanges") ## names(peaks) <- paste0(as.character(seqnames(peaks)), "_", start(peaks), "-", end(peaks)) ## peaks <- peaks[order(values(peaks)$X.log10.pvalue, decreasing=TRUE)] ## pseq <- getSeq(FaFile("./data/tair10.fasta"), peaks) ## names(pseq) <- names(peaks) ## writeXStringSet(pseq, paste0(rangefiles[i], ".fasta")) ## } ## ----eval=FALSE---------------------------------------------------------- ## set.seed(0) ## BCRANKout <- bcrank(paste0(rangefiles[1], ".fasta"), restarts=25, use.P1=TRUE, use.P2=TRUE) ## toptable(BCRANKout) ## topMotif <- toptable(BCRANKout, 1) ## weightMatrix <- pwm(topMotif, normalize = FALSE) ## weightMatrixNormalized <- pwm(topMotif, normalize = TRUE) ## pdf("results/seqlogo.pdf") ## seqLogo(weightMatrixNormalized) ## dev.off() ## ----sessionInfo, results='asis'----------------------------------------- toLatex(sessionInfo())