### R code from vignette source 'RIPSeeker.Rnw' ################################################### ### code chunk number 1: RIPSeeker ################################################### library(RIPSeeker) ################################################### ### code chunk number 2: helppage (eval = FALSE) ################################################### ## ?RIPSeeker ################################################### ### code chunk number 3: fullPRC2dataDownload (eval = FALSE) ################################################### ## BiocManager::install("RIPSeekerData") ## ## library(RIPSeekerData) ## ## extdata.dir <- system.file("extdata", package="RIPSeekerData") ## ## bamFiles <- list.files(extdata.dir, "\\.bam$", ## recursive=TRUE, full.names=TRUE) ## ## bamFiles <- grep("PRC2/", bamFiles, value=TRUE) ################################################### ### code chunk number 4: bamFiles ################################################### # Retrieve system files # OR change it to the extdata.dir from the code chunk above # to get RIP predictions on the full alignment data extdata.dir <- system.file("extdata", package="RIPSeeker") bamFiles <- list.files(extdata.dir, "\\.bam$", recursive=TRUE, full.names=TRUE) bamFiles <- grep("PRC2", bamFiles, value=TRUE) # RIP alignment for Ezh2 in mESC ripGal <- combineAlignGals(grep(pattern="SRR039214", bamFiles, value=TRUE, invert=TRUE), reverseComplement=TRUE, genomeBuild="mm9") # Control RIP alignments for mutant Ezh2 -/- mESC ctlGal <- combineAlignGals(grep(pattern="SRR039214", bamFiles, value=TRUE, invert=FALSE), reverseComplement=TRUE, genomeBuild="mm9") ripGal ctlGal ################################################### ### code chunk number 5: plotCoverage ################################################### ripGR <- as(ripGal, "GRanges") ripGRList <- split(ripGR, seqnames(ripGR)) # get only the nonempty chromosome ripGRList <- ripGRList[elementNROWS(ripGRList) != 0] ctlGR <- as(ctlGal, "GRanges") ctlGRList <- GRangesList(as.list(split(ctlGR, seqnames(ctlGR)))) ctlGRList <- ctlGRList[lapply(ctlGRList, length) != 0] binSize <- 1000 #c(bottom, left, top, right) par(mfrow=c(1, 2), mar=c(2, 2, 2, 0) + 0.1) tmp <- lapply(ripGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="RIP", cex.main=1.5) tmp <- lapply(ctlGRList, plotStrandedCoverage, binSize=binSize, xlab="", ylab="", plotLegend=TRUE, legend.cex=1.5, main="CTL", cex.main=1.5) ################################################### ### code chunk number 6: ripSeek (eval = FALSE) ################################################### ## # specify control name ## cNAME <- "SRR039214" ## ## # output file directory ## outDir <- file.path(getwd(), "RIPSeeker_vigenette_example_PRC2") ## ## # Parameters setting ## binSize <- NULL # set to NULL to automatically determine bin size ## minBinSize <- 10000 # min bin size in automatic bin size selection ## maxBinSize <- 10100 # max bin size in automatic bin size selection ## multicore <- TRUE # use multicore ## strandType <- "-" # set strand type to minus strand ## ## biomart <- "ENSEMBL_MART_ENSEMBL" # use archive to get ensembl 65 ## biomaRt_dataset <- "mmusculus_gene_ensembl" # mouse dataset id name ## host <- "dec2011.archive.ensembl.org" # use ensembl 65 for annotation for mm9 ## goAnno <- "org.Mm.eg.db" # GO annotation database ## ## ################ run main function ripSeek to predict RIP ################ ## seekOut.PRC2 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, ## reverseComplement = TRUE, genomeBuild = "mm9", ## strandType = strandType, ## uniqueHit = TRUE, assignMultihits = TRUE, ## rerunWithDisambiguatedMultihits = TRUE, ## binSize=binSize, minBinSize = minBinSize, ## maxBinSize = maxBinSize, ## biomart=biomart, host=host, ## biomaRt_dataset = biomaRt_dataset, goAnno = goAnno, ## multicore=multicore, outDir=outDir) ################################################### ### code chunk number 7: ripSeekOutputs (eval = FALSE) ################################################### ## names(seekOut.PRC2) ## ## df <- elementMetadata(seekOut.PRC2$annotatedRIPGR$sigGRangesAnnotated) ## ## # order by eFDR ## df <- df[order(df$eFDR), ] ## ## # get top 100 predictions ## df.top100 <- head(df, 100) ## ## head(df.top100) ## ## # examine known PRC2-associated lncRNA transcripts ## df.top100[grep("Xist", df$external_gene_id), ] ################################################### ### code chunk number 8: ripSeekOutFiles (eval = FALSE) ################################################### ## list.files(outDir) ################################################### ### code chunk number 9: ucsc (eval = FALSE) ################################################### ## viewRIP(seekOut.PRC2$RIPGRList$chrX, ## seekOut.PRC2$mainSeekOutputRIP$alignGalFiltered, ## seekOut.PRC2$mainSeekOutputCTL$alignGalFiltered, scoreType="eFDR") ################################################### ### code chunk number 10: computeRPKM (eval = FALSE) ################################################### ## # Retrieve system files ## extdata.dir <- system.file("extdata", package="RIPSeeker") ## ## bamFiles <- list.files(extdata.dir, ".bam$", recursive=TRUE, full.names=TRUE) ## ## bamFiles <- grep("PRC2", bamFiles, value=TRUE) ## ## # use biomart ## txDbName <- "biomart" ## biomart <- "ENSEMBL_MART_ENSEMBL" # use archive to get ensembl 65 ## dataset <- "mmusculus_gene_ensembl" ## host <- "dec2011.archive.ensembl.org" # use ensembl 65 for annotation ## ## # compute transcript abundance in RIP ## ripRPKM <- computeRPKM(bamFiles=grep(pattern="SRR039214", bamFiles, value=TRUE, invert=TRUE), ## dataset=dataset, moreGeneInfo=TRUE, justRPKM=FALSE, ## idType="ensembl_transcript_id", txDbName=txDbName, ## biomart=biomart, host=host, by="tx") ## ## # compute transcript abundance in RIP and control as well as ## # foldchnage in RIP over control ## rulebase.results <- rulebaseRIPSeek(bamFiles=bamFiles, cNAME=cNAME, myMin=1, ## featureGRanges=ripRPKM$featureGRanges, ## biomart=biomart, host=host, dataset=dataset) ## ## head(ripRPKM$rpkmDF) ## ## df <- rulebase.results$rpkmDF ## ## df <- df[order(df$foldchange, decreasing=TRUE), ] ## ## # top 10 transcripts ## head(df, 10) ################################################### ### code chunk number 11: ripSeek_CCNT1 (eval = FALSE) ################################################### ## # Retrieve system files ## BiocManager::install("RIPSeekerData") ## ## extdata.dir <- system.file("extdata", package="RIPSeekerData") ## ## bamFiles <- list.files(extdata.dir, "\\.bam$", ## recursive=TRUE, full.names=TRUE) ## ## bamFiles <- grep("CCNT1/firstscreen", bamFiles, value=TRUE) ## ## # specify control name ## cNAME <- "GFP" ## ## # output file directory ## outDir <- file.path(getwd(), "RIPSeeker_vigenette_example_CCNT1") ## ## # Parameters setting ## binSize <- 10000 # automatically determine bin size ## minBinSize <- NULL # min bin size in automatic bin size selection ## maxBinSize <- NULL # max bin size in automatic bin size selection ## multicore <- TRUE # use multicore ## strandType <- "+" # set strand type to minus strand ## ## biomart <- "ensembl" # use archive to get ensembl 65 ## biomaRt_dataset <- "hsapiens_gene_ensembl" # human dataset id name ## goAnno <- "org.Hs.eg.db" # GO annotation database ## ## ################ run main function ripSeek to predict RIP ################ ## seekOut.CCNT1 <- ripSeek(bamPath = bamFiles, cNAME = cNAME, ## reverseComplement = TRUE, genomeBuild = "hg19", ## strandType = strandType, ## uniqueHit = TRUE, assignMultihits = TRUE, ## rerunWithDisambiguatedMultihits = TRUE, ## binSize=binSize, minBinSize = minBinSize, ## maxBinSize = maxBinSize, ## biomart=biomart, goAnno = goAnno, ## biomaRt_dataset = biomaRt_dataset, ## multicore=multicore, outDir=outDir) ## ## df <- elementMetadata(seekOut.CCNT1$annotatedRIPGR$sigGRangesAnnotated) ## ## # order by eFDR ## df <- df[order(df$eFDR), ] ## ## # get top 100 predictions ## df.top20 <- head(df, 20) ## ## # examine known PRC2-associated lncRNA transcripts ## df.top20[grep("RN7SK", df$external_gene_id)[1], ] ## ## list.files(outDir) ################################################### ### code chunk number 12: ucsc_CCNT1 (eval = FALSE) ################################################### ## viewRIP(seekOut.CCNT1$RIPGRList$chr6, seekOut.CCNT1$mainSeekOutputRIP$alignGalFiltered, seekOut.CCNT1$mainSeekOutputCTL$alignGalFiltered, scoreType="eFDR") ################################################### ### code chunk number 13: computeRPKM_CCNT1 (eval = FALSE) ################################################### ## # use biomart ## txDbName <- "biomart" ## biomart <- "ensembl" ## dataset <- "hsapiens_gene_ensembl" ## ## # compute transcript abundance in RIP ## ripRPKM <- computeRPKM(bamFiles=bamFiles[1], ## dataset=dataset, moreGeneInfo=TRUE, justRPKM=FALSE, ## idType="ensembl_transcript_id", txDbName=txDbName, ## biomart=biomart, by="tx") ## ## # compute transcript abundance in RIP and control as well as ## # foldchnage in RIP over control ## rulebase.results <- rulebaseRIPSeek(bamFiles=bamFiles, cNAME=cNAME, myMin=1, ## featureGRanges=ripRPKM$featureGRanges, ## biomart=biomart, dataset=dataset) ## ## head(ripRPKM$rpkmDF) ## ## df <- rulebase.results$rpkmDF ## ## df <- df[order(df$foldchange, decreasing=TRUE), ] ## ## # top 10 transcripts ## head(df, 10) ################################################### ### code chunk number 14: sessi ################################################### sessionInfo()