## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"-------------------- BiocStyle::latex(use.unsrturl=FALSE) ## ----setup, include=FALSE, cache=FALSE------------------------------------------------------- library(knitr) # set global chunk options for knitr opts_chunk$set(comment=NA, warning=FALSE, message=FALSE, fig.path='figure/systemPipeR-') options(formatR.arrow=TRUE, width=95) unlink("test.db") ## ----eval=TRUE------------------------------------------------------------------------------- library(systemPipeR) ## ----eval=FALSE------------------------------------------------------------------------------ # library(systemPipeRdata) # genWorkenvir(workflow="chipseq") # setwd("chipseq") ## ----eval=FALSE------------------------------------------------------------------------------ # source("systemPipeChIPseq_Fct.R") ## ----eval=TRUE------------------------------------------------------------------------------- targetspath <- system.file("extdata", "targets_chip.txt", package="systemPipeR") targets <- read.delim(targetspath, comment.char = "#") targets[1:4,-c(5,6)] ## ----eval=FALSE, messages=FALSE, warning=FALSE, cache=TRUE----------------------------------- # args <- systemArgs(sysma="param/trim.param", mytargets="targets_chip.txt") # filterFct <- function(fq, cutoff=20, Nexceptions=0) { # qcount <- rowSums(as(quality(fq), "matrix") <= cutoff) # fq[qcount <= Nexceptions] # Retains reads where Phred scores are >= cutoff with N exceptions # } # preprocessReads(args=args, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", batchsize=100000) # writeTargetsout(x=args, file="targets_chip_trim.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # args <- systemArgs(sysma="param/bowtieSE.param", mytargets="targets_chip_trim.txt") # fqlist <- seeFastq(fastq=infile1(args), batchsize=100000, klength=8) # pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist)) # seeFastqPlot(fqlist) # dev.off() ## ----eval=FALSE------------------------------------------------------------------------------ # args <- systemArgs(sysma="param/bowtieSE.param", mytargets="targets_chip_trim.txt") # sysargs(args)[1] # Command-line parameters for first FASTQ file # moduleload(modules(args)) # Skip if a module system is not used # system("bowtie2-build ./data/tair10.fasta ./data/tair10.fasta") # Indexes reference genome # runCommandline(args) # writeTargetsout(x=args, file="targets_bam.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # file.exists(outpaths(args)) ## ----eval=FALSE------------------------------------------------------------------------------ # read_statsDF <- alignStats(args=args) # write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t") # read.delim("results/alignStats.xls") ## ----eval=FALSE------------------------------------------------------------------------------ # symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"), # urlbase="http://biocluster.ucr.edu/~tgirke/", # urlfile="./results/IGVurl.txt") ## ----eval=FALSE------------------------------------------------------------------------------ # args <- systemArgs(sysma=NULL, mytargets="targets_bam.txt") # args_merge <- mergeBamByFactor(args, overwrite=TRUE) # writeTargetsout(x=args_merge, file="targets_mergeBamByFactor.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # args <- systemArgs(sysma="param/macs2_noinput.param", mytargets="targets_mergeBamByFactor.txt") # sysargs(args)[1] # Command-line parameters for first FASTQ file # runCommandline(args) # file.exists(outpaths(args)) # writeTargetsout(x=args, file="targets_macs.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # writeTargetsRef(infile="targets_mergeBamByFactor.txt", outfile="targets_bam_ref.txt", silent=FALSE, overwrite=TRUE) # args <- systemArgs(sysma="param/macs2.param", mytargets="targets_bam_ref.txt") # sysargs(args)[1] # Command-line parameters for first FASTQ file # runCommandline(args) # file.exists(outpaths(args)) # writeTargetsout(x=args, file="targets_macs.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # library(ChIPpeakAnno); library(GenomicFeatures) # args <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") # txdb <- loadDb("./data/tair10.sqlite") # ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type")) # for(i in seq(along=args)) { # peaksGR <- as(read.delim(infile1(args)[i], comment="#"), "GRanges") # annotatedPeak <- annotatePeakInBatch(peaksGR, AnnotationData=genes(txdb)) # df <- data.frame(as.data.frame(annotatedPeak), as.data.frame(values(ge[values(annotatedPeak)$feature,]))) # write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t") # } # writeTargetsout(x=args, file="targets_peakanno.txt", overwrite=TRUE) ## ----eval=FALSE, include=FALSE--------------------------------------------------------------- # ## Perform previous step with full genome annotation from Biomart # # txdb <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_PLANT", dataset="athaliana_eg_gene") # # tx <- transcripts(txdb, columns=c("tx_name", "gene_id", "tx_type")) # # ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type")) # works as well # # seqlevels(ge) <- c("Chr1", "Chr2", "Chr3", "Chr4", "Chr5", "ChrC", "ChrM") # # table(mcols(tx)$tx_type) # # tx <- tx[!duplicated(unstrsplit(values(tx)$gene_id, sep=",")) # Keeps only first transcript model for each gene] # # annotatedPeak <- annotatePeakInBatch(macsOutput, AnnotationData = tx) ## ----eval=FALSE------------------------------------------------------------------------------ # library(ChIPseeker) # txdb <- loadDb("./data/tair10.sqlite") # for(i in seq(along=args)) { # peakAnno <- annotatePeak(infile1(args)[i], TxDb=txdb, verbose=FALSE) # df <- as.data.frame(peakAnno) # write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t") # } # writeTargetsout(x=args, file="targets_peakanno.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # peak <- readPeakFile(infile1(args)[1]) # covplot(peak, weightCol="X.log10.pvalue.") # peakHeatmap(outpaths(args)[1], TxDb=txdb, upstream=1000, downstream=1000, color="red") # plotAvgProf2(outpaths(args)[1], TxDb=txdb, upstream=1000, downstream=1000, xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency") ## ----eval=FALSE------------------------------------------------------------------------------ # library(GenomicRanges) # args <- systemArgs(sysma="param/count_rangesets.param", mytargets="targets_macs.txt") # args_bam <- systemArgs(sysma=NULL, mytargets="targets_bam.txt") # bfl <- BamFileList(outpaths(args_bam), yieldSize=50000, index=character()) # countDFnames <- countRangeset(bfl, args, mode="Union", ignore.strand=TRUE) # writeTargetsout(x=args, file="targets_countDF.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # args_diff <- systemArgs(sysma="param/rundiff.param", mytargets="targets_countDF.txt") # cmp <- readComp(file=args_bam, format="matrix") # dbrlist <- runDiff(args=args_diff, diffFct=run_edgeR, targets=targetsin(args_bam), # cmp=cmp[[1]], independent=TRUE, dbrfilter=c(Fold=2, FDR=1)) # writeTargetsout(x=args_diff, file="targets_rundiff.txt", overwrite=TRUE) ## ----eval=FALSE------------------------------------------------------------------------------ # args <- systemArgs(sysma="param/macs2.param", mytargets="targets_bam_ref.txt") # args_anno <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") # annofiles <- outpaths(args_anno) # gene_ids <- sapply(names(annofiles), function(x) unique(as.character(read.delim(annofiles[x])[,"gene_id"]))) # load("data/GO/catdb.RData") # BatchResult <- GOCluster_Report(catdb=catdb, setlist=gene_ids, method="all", id_type="gene", CLSZ=2, cutoff=0.9, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL) ## ----eval=FALSE------------------------------------------------------------------------------ # library(Biostrings); library(seqLogo); library(BCRANK) # args <- systemArgs(sysma="param/annotate_peaks.param", mytargets="targets_macs.txt") # rangefiles <- infile1(args) # for(i in seq(along=rangefiles)) { # df <- read.delim(rangefiles[i], comment="#") # peaks <- as(df, "GRanges") # names(peaks) <- paste0(as.character(seqnames(peaks)), "_", start(peaks), "-", end(peaks)) # peaks <- peaks[order(values(peaks)$X.log10.pvalue, decreasing=TRUE)] # pseq <- getSeq(FaFile("./data/tair10.fasta"), peaks) # names(pseq) <- names(peaks) # writeXStringSet(pseq, paste0(rangefiles[i], ".fasta")) # } ## ----eval=FALSE------------------------------------------------------------------------------ # set.seed(0) # BCRANKout <- bcrank(paste0(rangefiles[1], ".fasta"), restarts=25, use.P1=TRUE, use.P2=TRUE) # toptable(BCRANKout) # topMotif <- toptable(BCRANKout, 1) # weightMatrix <- pwm(topMotif, normalize = FALSE) # weightMatrixNormalized <- pwm(topMotif, normalize = TRUE) # pdf("results/seqlogo.pdf") # seqLogo(weightMatrixNormalized) # dev.off() ## ----sessionInfo, results='asis'------------------------------------------------------------- toLatex(sessionInfo())