## ----pipeline, out.width = 500, ppi=100, fig.retina = NULL, fig.align="center", echo=FALSE, eval=TRUE, fig.cap="**Figure 1:** Diagram of IntEREst running pipeline"---- knitr::include_graphics("../inst/fig/IntEREst.png") ## ----collapseExons, out.width = 500, ppi=100, fig.retina = NULL, fig.align="center", echo=FALSE, eval=TRUE, fig.cap="**Figure 2:** Running reference isoforms a, b, c and d leads to gene A with exons that are results of collapsing of exons of all isoforms."---- knitr::include_graphics("../inst/fig/collapseExons.png") ## ----reference_build, out.width = 500, echo=TRUE, eval=TRUE-------------- # Load library quietly suppressMessages(library("IntEREst")) # Selecting rows related to RHBDD3 gene tmpGen<-u12[u12[,"gene_name"]=="RHBDD3",] # Extracting exons tmpEx<-tmpGen[tmpGen[,"int_ex"]=="exon",] # Building GFF3 file exonDat<- cbind(tmpEx[,3], ".", tmpEx[,c(7,4,5)], ".", tmpEx[,6], ".",paste("ID=exon", tmpEx[,11], "; Parent=ENST00000413811", sep="") ) trDat<- c(tmpEx[1,3], ".", "mRNA", as.numeric(min(tmpEx[,4])), as.numeric(max(tmpEx[,5])), ".", tmpEx[1,6], ".", "ID=ENST00000413811") outDir<- file.path(tempdir(),"tmpFolder") dir.create(outDir) outDir<- normalizePath(outDir) gff3File<-paste(outDir, "gffFile.gff", sep="/") cat("##gff-version 3\n",file=gff3File, append=FALSE) cat(paste(paste(trDat, collapse="\t"),"\n", sep=""), file=gff3File, append=TRUE) write.table(exonDat, gff3File, row.names=FALSE, col.names=FALSE, sep='\t', quote=FALSE, append=TRUE) # Extracting U12 introns info from 'u12' data u12Int<-u12[u12$int_ex=="intron"&u12$int_type=="U12",] # Building reference #Since it is based on one gene only (that does not feature alternative splicing #events) there is no difference if the collapseExons is set as TRUE or FALSE testRef<- referencePrepare (sourceBuild="file", filePath=gff3File, u12IntronsChr=u12Int[,"chr"], u12IntronsBeg=u12Int[,"begin"], u12IntronsEnd=u12Int[,"end"], collapseExons=TRUE, fileFormat="gff3", annotateGeneIds=FALSE) head (testRef) ## ----annotate_u12, out.width = 500, echo=TRUE, eval=TRUE----------------- # Improting genome BSgenome.Hsapiens.UCSC.hg19 <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 #Index of the subset of rows ind<- u12$gene_name %in% c("RHBDD2", "YBX2") # Annotate U12 introns with strong U12 donor site, branch point # and acceptor site from the u12 data in the package annoU12<- annotateU12(pwmU12U2=list(pwmU12db[[1]][,11:17],pwmU12db[[2]], pwmU12db[[3]][,38:40],pwmU12db[[4]][,11:17], pwmU12db[[5]][,38:40]), pwmSsIndex=list(indexDonU12=1, indexBpU12=1, indexAccU12=3, indexDonU2=1, indexAccU2=3), referenceChr=u12[ind,'chr'], referenceBegin=u12[ind,'begin'], referenceEnd=u12[ind,'end'], referenceIntronExon=u12[ind,"int_ex"], intronExon="intron", matchWindowRelativeUpstreamPos=c(NA,-29,NA,NA,NA), matchWindowRelativeDownstreamPos=c(NA,-9,NA,NA,NA), minMatchScore=c(rep(paste(80,"%",sep=""),2), "40%", paste(80,"%",sep=""), "40%"), refGenome=BSgenome.Hsapiens.UCSC.hg19, setNaAs="U2", annotateU12Subtype=TRUE) # How many U12 and U2 type introns with strong U12 donor sites, # acceptor sites (and branch points for U12-type) are there? table(annoU12[,1]) ## ----testInterest, out.width = 500, echo=TRUE, eval=TRUE----------------- # Creating temp directory to store the results outDir<- file.path(tempdir(),"interestFolder") dir.create(outDir) outDir<- normalizePath(outDir) # Loading suitable bam file bamF <- system.file("extdata", "small_test_SRR1691637_ZRSR2Mut_RHBDD3.bam", package="IntEREst", mustWork=TRUE) # Choosing reference for the gene RHBDD3 ref<-u12[u12[,"gene_name"]=="RHBDD3",] # Intron retention analysis # Reads mapping to inner introns are considered, hence # junctionReadsOnly is FALSE testInterest<- interest( bamFileYieldSize=10000, junctionReadsOnly=FALSE, bamFile=bamF, isPaired=TRUE, isPairedDuplicate=FALSE, isSingleReadDuplicate=NA, reference=ref, referenceGeneNames=ref[,"ens_gene_id"], referenceIntronExon=ref[,"int_ex"], repeatsTableToFilter=c(), outFile=paste(outDir, "intRetRes.tsv", sep="/"), logFile=paste(outDir, "log.txt", sep="/"), method="IntRet", clusterNo=1, returnObj=FALSE, scaleLength= TRUE, scaleFragment= TRUE ) testIntRetObj<- readInterestResults( resultFiles= paste(outDir, "intRetRes.tsv", sep="/"), sampleNames="small_test_SRR1691637_ZRSR2Mut_RHBDD3", sampleAnnotation=data.frame( type="ZRSR2mut", test_ctrl="test"), commonColumns=1:ncol(ref), freqCol=ncol(ref)+1, scaledRetentionCol=ncol(ref)+2, scaleLength=TRUE, scaleFragment=TRUE, reScale=TRUE, geneIdCol="ens_gene_id") # Intron Spanning analysis # Reads mapping to inner introns are considered, hence # junctionReadsOnly is FALSE testInterest<- interest( bamFileYieldSize=10000, junctionReadsOnly=FALSE, bamFile=bamF, isPaired=TRUE, isPairedDuplicate=FALSE, isSingleReadDuplicate=NA, reference=ref, referenceGeneNames=ref[,"ens_gene_id"], referenceIntronExon=ref[,"int_ex"], repeatsTableToFilter=c(), outFile=paste(outDir, "intSpanRes.tsv", sep="/"), logFile=paste(outDir, "log.txt", sep="/"), method="IntSpan", clusterNo=1, returnObj=FALSE, scaleLength= TRUE, scaleFragment= TRUE ) testIntSpanObj<- readInterestResults( resultFiles= paste(outDir, "intSpanRes.tsv", sep="/"), sampleNames="small_test_SRR1691637_ZRSR2Mut_RHBDD3", sampleAnnotation=data.frame( type="ZRSR2mut", test_ctrl="test"), commonColumns=1:ncol(ref), freqCol=ncol(ref)+1, scaledRetentionCol=ncol(ref)+2, scaleLength=TRUE, scaleFragment=TRUE, reScale=TRUE, geneIdCol="ens_gene_id") # Exon-exon junction analysis # Reads mapping to inner exons are NOT considered, hence # junctionReadsOnly is TRUE testInterest<- interest( bamFileYieldSize=10000, junctionReadsOnly=TRUE, bamFile=bamF, isPaired=TRUE, isPairedDuplicate=FALSE, isSingleReadDuplicate=NA, reference=ref, referenceGeneNames=ref[,"ens_gene_id"], referenceIntronExon=ref[,"int_ex"], repeatsTableToFilter=c(), outFile=paste(outDir, "exExRes.tsv", sep="/"), logFile=paste(outDir, "log.txt", sep="/"), method="ExEx", clusterNo=1, returnObj=FALSE, scaleLength= TRUE, scaleFragment= TRUE ) testExExObj<- readInterestResults( resultFiles= paste(outDir, "exExRes.tsv", sep="/"), sampleNames="small_test_SRR1691637_ZRSR2Mut_RHBDD3", sampleAnnotation=data.frame( type="ZRSR2mut", test_ctrl="test"), commonColumns=1:ncol(ref), freqCol=ncol(ref)+1, scaledRetentionCol=ncol(ref)+2, scaleLength=TRUE, scaleFragment=TRUE, reScale=TRUE, geneIdCol="ens_gene_id") # View intron retention result object testIntRetObj # View intron spanning result object testIntSpanObj # View exon-exon junction result object testExExObj # View first rows of intron retention read counts table head(counts(testIntRetObj)) # View first rows of intron spanning read counts table head(counts(testIntSpanObj)) # View first rows of exon-exon junction read counts table head(counts(testExExObj)) ## ----view_object, out.width = 500, echo=TRUE, eval=TRUE------------------ # Load library quietly suppressMessages(library("IntEREst")) #View object mdsChr22Obj mdsChr22ExObj # See read counts head(counts(mdsChr22Obj)) # See FPKM Normalized values head(scaledRetention(mdsChr22Obj)) # See intron/exon annotations head(rowData(mdsChr22Obj)) # See sample annotations head(colData(mdsChr22Obj)) ## ----plot_intron_object, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center", fig.cap="**Figure 3:** Plotting the distribution of the retention levels ($log_e$ scaled retention) of introns of genes located on chromosome 22. The values have been averaged across the sample types ZRSR2 mutated, ZRSR2 wild type, and healthy."---- # Retention of all introns plot(mdsChr22Obj, logScaleBase=exp(1), pch=20, loessLwd=1.2, summary="mean", col="black", sampleAnnoCol="type", lowerPlot=TRUE, upperPlot=TRUE) ## ----plot_u12intron_object, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center", fig.cap="**Figure 4:** Plotting the distribution of the retention levels ($log_e$ scaled retention) of introns of genes located on chromosome 22. The values have been averaged across the sample types ZRSR2 mutated, ZRSR2 wild type, and healthy."---- #Retention of U12 introns plot(mdsChr22Obj, logScaleBase=exp(1), pch=20, plotLoess=FALSE, summary="mean", col="black", sampleAnnoCol="type", subsetRows=u12Index(mdsChr22Obj, intTypeCol="intron_type")) ## ----exact_test, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center"---- # Check the sample annotation table getAnnotation(mdsChr22Obj) # Run exact test test<- exactTestInterest(mdsChr22Obj, sampleAnnoCol="test_ctrl", sampleAnnotation=c("ctrl","test"), geneIdCol= "collapsed_transcripts_id", silent=TRUE, disp="common") # Number of stabilized introns (in Chr 22) sInt<- length(which(test$table[,"PValue"]<0.05 & test$table[,"logFC"]>0 & rowData(mdsChr22Obj)[,"int_ex"]=="intron")) print(sInt) # Number of stabilized (significantly retained) U12 type introns numStU12Int<- length(which(test$table[,"PValue"]<0.05 & test$table[,"logFC"]>0 & rowData(mdsChr22Obj)[,"intron_type"]=="U12" & !is.na(rowData(mdsChr22Obj)[,"intron_type"]))) # Number of U12 introns numU12Int<- length(which(rowData(mdsChr22Obj)[,"intron_type"]=="U12" & !is.na(rowData(mdsChr22Obj)[,"intron_type"]))) # Fraction(%) of stabilized (significantly retained) U12 introns perStU12Int<- numStU12Int/numU12Int*100 print(perStU12Int) # Number of stabilized U2 type introns numStU2Int<- length(which(test$table[,"PValue"]<0.05 & test$table[,"logFC"]>0 & rowData(mdsChr22Obj)[,"intron_type"]=="U2" & !is.na(rowData(mdsChr22Obj)[,"intron_type"]))) # Number of U2 introns numU2Int<- length(which(rowData(mdsChr22Obj)[,"intron_type"]=="U2" & !is.na(rowData(mdsChr22Obj)[,"intron_type"]))) # Fraction(%) of stabilized U2 introns perStU2Int<- numStU2Int/numU2Int*100 print(perStU2Int) ## ----glr_test, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center"---- # Extract type of samples group <- getAnnotation(mdsChr22Obj)[,"type"] group # Test retention levels' differentiation across 3 types samples qlfRes<- qlfInterest(x=mdsChr22Obj, design=model.matrix(~group), silent=TRUE, disp="tagwiseInitTrended", coef=2:3, contrast=NULL, poisson.bound=TRUE) # Extract index of the introns with significant retention changes ind= which(qlfRes$table$PValue< 0.05) # Extract introns with significant retention level changes variedIntrons= rowData(mdsChr22Obj)[ind,] #Show first 5 rows and columns of the result table print(variedIntrons[1:5,1:5]) ## ----boxplot_object, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center", fig.cap= "**Figure 5:** Boxplot of the retention levels of U12 introns vs U2 introns, summed over samples with similar annotations *i.e.* ZRSR2 mutation, ZRSR2 wild type, or healthy."---- # boxplot U12 and U2-type introns par(mar=c(7,4,2,1)) u12Boxplot(mdsChr22Obj, sampleAnnoCol="type", intExCol="int_ex", intTypeCol="intron_type", intronExon="intron", col=rep(c("orange", "yellow"),3) , lasNames=3, outline=FALSE, ylab="FPKM", cex.axis=0.8) ## ----u12BoxplotNb_object, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center", fig.cap="**Figure 6:** Boxplot of retention levels of U12 introns vs their up- and down-stream U2 introns across all samples."---- # boxplot U12-type intron and its up/downstream U2-type introns par(mar=c(2,4,1,1)) u12BoxplotNb(mdsChr22Obj, sampleAnnoCol="type", lasNames=1, intExCol="int_ex", intTypeCol="intron_type", intronExon="intron", boxplotNames=c(), outline=FALSE, plotLegend=TRUE, geneIdCol="collapsed_transcripts_id", xLegend="topleft", col=c("pink", "lightblue", "lightyellow"), ylim=c(0,1e+06), ylab="FPKM", cex.axis=0.8) ## ----density_plot, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.cap= "**Figure 7:** Density plot of the log fold change of U12-type introns, random U2-type introns and U2 introns (up / down / up and down)stream of U12-type introns. "---- u12DensityPlotIntron(mdsChr22Obj, type= c("U12", "U2Up", "U2Dn", "U2UpDn", "U2Rand"), fcType= "edgeR", sampleAnnoCol="test_ctrl", sampleAnnotation=c("ctrl","test"), intExCol="int_ex", intTypeCol="intron_type", strandCol= "strand", geneIdCol= "collapsed_transcripts_id", naUnstrand=FALSE, col=c(2,3,4,5,6), lty=c(1,2,3,4,5), lwd=1, plotLegend=TRUE, cexLegend=0.7, xLegend="topright", yLegend=NULL, legend=c(), randomSeed=10, ylim=c(0,0.6), xlab=expression("log"[2]*" fold change FPKM")) # estimate log fold-change of introns # by comparing test samples vs ctrl # and don't show warnings ! lfcRes<- lfc(mdsChr22Obj, fcType= "edgeR", sampleAnnoCol="test_ctrl",sampleAnnotation=c("ctrl","test")) # Build the order vector ord<- rep(1,length(lfcRes)) ord[u12Index(mdsChr22Obj, intTypeCol="intron_type")]=2 # Median of log fold change of U2 introns (ZRSR2 mut. vs ctrl) median(lfcRes[ord==1]) # Median of log fold change of U2 introns (ZRSR2 mut. vs ctrl) median(lfcRes[ord==2]) ## ----lfc-sig, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center"---- # Run Jockheere Terpstra's trend test library(clinfun) jtRes<- jonckheere.test(lfcRes, ord, alternative = "increasing", nperm=1000) jtRes ## ----downstreamExample, echo = TRUE, eval=TRUE, message = FALSE, fig.width=6, fig.height=4, fig.align="center"---- mdsChr22RefIntRetSpObj<- cbind(mdsChr22Obj, mdsChr22IntSpObj) mdsChr22RefIntRetSpObj<- addAnnotation(x=mdsChr22RefIntRetSpObj, sampleAnnotationType="intronExon", sampleAnnotation=c(rep("intron",16), rep("exon",16)) ) library(BiocParallel) mdsChr22RefIntRetSpIntFilObj<- mdsChr22RefIntRetSpObj[rowData(mdsChr22RefIntRetSpObj)$int_ex=="intron",] # Differential IR analysis run ddsChr22Diff<- deseqInterest(mdsChr22RefIntRetSpIntFilObj, design=~test_ctrl+test_ctrl:intronExon, sizeFactor=rep(1,nrow(colData(mdsChr22RefIntRetSpIntFilObj))), contrast=list("test_ctrltest.intronExonintron", "test_ctrlctrl.intronExonintron"), bpparam = SnowParam(workers=20)) # See the number of significantly more retained U12 and U2 introns pThreshold<- 0.01 mdsChr22UpIntInd<- which(ddsChr22Diff$padj< pThreshold & ddsChr22Diff$padj>0) table(rowData(mdsChr22RefIntRetSpIntFilObj)$intron_type[mdsChr22UpIntInd]) # See the fraction of significantly more retained U12 and U2 introns 100*table(rowData(mdsChr22RefIntRetSpIntFilObj)$intron_type[mdsChr22UpIntInd])/ table(rowData(mdsChr22RefIntRetSpIntFilObj)$intron_type)