## ---- echo = FALSE------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ---- results = "hide", message = FALSE, warning=FALSE------------------- library(IsoformSwitchAnalyzeR) ## ------------------------------------------------------------------------ packageVersion('IsoformSwitchAnalyzeR') ## ------------------------------------------------------------------------ ### Please note # The way of importing files in the following example with # "system.file('pathToFile', package="IsoformSwitchAnalyzeR") is # specialized way of accessing the example data in the IsoformSwitchAnalyzeR package # and not something you need to do - just supply the string e.g. # "/path/to/mySalmonQuantifications/" pointing to the parent directory (where # each sample is a separate sub-directory) to the function. ### Import quantifications salmonQuant <- importIsoformExpression( parentDir = system.file("extdata/",package="IsoformSwitchAnalyzeR") ) ## ------------------------------------------------------------------------ head(salmonQuant$abundance, 2) head(salmonQuant$counts, 2) ## ------------------------------------------------------------------------ myDesign <- data.frame( sampleID = colnames(salmonQuant$abundance)[-1], condition = gsub('_.*', '', colnames(salmonQuant$abundance)[-1]) ) myDesign ## ---- message = FALSE, warning=FALSE------------------------------------- ### Please note: # The way of importing files in the following example with # "system.file("extdata/example.gtf.gz", package="IsoformSwitchAnalyzeR")"" is # specialiced way of accessing the example data in the IsoformSwitchAnalyzeR package # and not somhting you need to do - just supply the string e.g. # "/myAnnotation/myQuantified.gtf" to the isoformExonAnnoation argument ### Create switchAnalyzeRlist aSwitchList <- importRdata( isoformCountMatrix = salmonQuant$counts, isoformRepExpression = salmonQuant$abundance, designMatrix = myDesign, isoformExonAnnoation = system.file("extdata/example.gtf.gz" , package="IsoformSwitchAnalyzeR"), isoformNtFasta = system.file("extdata/example_isoform_nt.fasta.gz", package="IsoformSwitchAnalyzeR"), showProgress = FALSE ) aSwitchList ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchList ## ---- results = "hide", message = FALSE, warning=FALSE------------------- exampleSwitchList <- isoformSwitchAnalysisPart1( switchAnalyzeRlist = exampleSwitchList, dIFcutoff = 0.3, # Cutoff for finding switches - set high for short runtime in example data # pathToOutput = 'path/to/where/output/should/be/' outputSequences = FALSE, # change to TRUE whan analyzing your own data prepareForWebServers = FALSE # change to TRUE if you will use webservers for external sequence analysis ) ## ------------------------------------------------------------------------ extractSwitchSummary( exampleSwitchList, dIFcutoff = 0.3 # the same cutoff to the summary function as used above ) ## ------------------------------------------------------------------------ # Please note that in the following the part of the examples using # the "system.file()" command not nesseary when using your own # data - just supply the path as a string # (e.g. pathToCPC2resultFile = "/myFiles/myCPC2results.txt" ) exampleSwitchList <- isoformSwitchAnalysisPart2( switchAnalyzeRlist = exampleSwitchList, dIFcutoff = 0.3, # Cutoff for finding switches - set high for short runtime in example data n = 10, # if plotting was enabled, it would only output the top 10 switches removeNoncodinORFs = TRUE, # Because ORF was predicted de novo pathToCPC2resultFile = system.file("extdata/cpc2_result.txt" , package = "IsoformSwitchAnalyzeR"), pathToPFAMresultFile = system.file("extdata/pfam_results.txt" , package = "IsoformSwitchAnalyzeR"), pathToIUPred2AresultFile = system.file("extdata/iupred2a_result.txt.gz" , package = "IsoformSwitchAnalyzeR"), pathToSignalPresultFile = system.file("extdata/signalP_results.txt" , package = "IsoformSwitchAnalyzeR"), outputPlots = FALSE # keeps the function from outputting the plots from this example ) ## ---- message = FALSE---------------------------------------------------- extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=3) ## ------------------------------------------------------------------------ data("exampleSwitchListAnalyzed") ## ------------------------------------------------------------------------ extractSwitchSummary( exampleSwitchListAnalyzed, filterForConsequences = TRUE ) ## ------------------------------------------------------------------------ subset( extractTopSwitches( exampleSwitchListAnalyzed, filterForConsequences = TRUE, n=10, inEachComparison = TRUE )[,c('gene_name','condition_2','gene_switch_q_value','Rank')], gene_name == 'ZAK' ) ## ---- fig.width=12, fig.height=7, warning=FALSE-------------------------- switchPlot( exampleSwitchListAnalyzed, gene='ZAK', condition1 = 'COAD_ctrl', condition2 = 'COAD_cancer', localTheme = theme_bw(base_size = 13) # making text sightly larger for vignette ) ## ---- fig.width=9, fig.height=5------------------------------------------ extractSwitchOverlap( exampleSwitchListAnalyzed, filterForConsequences=TRUE ) ## ---- fig.width=12, fig.height=5----------------------------------------- extractConsequenceSummary( exampleSwitchListAnalyzed, consequencesToAnalyze='all', plotGenes = FALSE, # enables analysis of genes (instead of isoforms) asFractionTotal = FALSE # enables analysis of fraction of significant features ) ## ---- fig.width=14, fig.height=5----------------------------------------- extractConsequenceEnrichment( exampleSwitchListAnalyzed, consequencesToAnalyze='all', analysisOppositeConsequence = TRUE, returnResult = FALSE # if TRUE returns a data.frame with the results ) ## ---- fig.width=12, fig.height=4----------------------------------------- extractConsequenceEnrichmentComparison( exampleSwitchListAnalyzed, consequencesToAnalyze=c('domains_identified','intron_retention','coding_potential'), analysisOppositeConsequence = TRUE, returnResult = FALSE # if TRUE returns a data.frame with the results ) ## ---- fig.width=12, fig.height=4.5--------------------------------------- extractSplicingEnrichment( exampleSwitchListAnalyzed, returnResult = FALSE # if TRUE returns a data.frame with the results ) ## ---- fig.width=12, fig.height=4----------------------------------------- extractSplicingEnrichmentComparison( exampleSwitchListAnalyzed, splicingToAnalyze = c('A3','MES','ATSS','ATTS'), # the splicing highlighted above returnResult = FALSE # if TRUE returns a data.frame with the results ) ## ---- fig.width=8, fig.height=4, warning=FALSE--------------------------- ### Volcano like plot: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff facet_wrap( ~ condition_2) + #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') + theme_bw() ## ---- fig.width=8, fig.height=4, warning=FALSE--------------------------- ### Switch vs Gene changes: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + facet_wrap(~ condition_2) + #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions geom_hline(yintercept = 0, linetype='dashed') + geom_vline(xintercept = 0, linetype='dashed') + scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + labs(x='Gene log2 fold change', y='dIF') + theme_bw() ## ------------------------------------------------------------------------ data("exampleSwitchList") # A newly created switchAnalyzeRlist + switch analysis names(exampleSwitchList) data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist names(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### Preview head(exampleSwitchList, 2) # tail(exampleSwitchList, 2) ### Dimentions dim(exampleSwitchList$isoformFeatures) nrow(exampleSwitchList) ncol(exampleSwitchList) dim(exampleSwitchList) ## ------------------------------------------------------------------------ exampleSwitchList ### Subset subsetSwitchAnalyzeRlist( exampleSwitchList, exampleSwitchList$isoformFeatures$gene_name == 'ARHGEF19' ) ## ------------------------------------------------------------------------ head(exampleSwitchList$exons,2) ## ------------------------------------------------------------------------ ### Please note # The way of importing files in the following example with # "system.file('pathToFile', package="IsoformSwitchAnalyzeR") is # specialized way of accessing the example data in the IsoformSwitchAnalyzeR package # and not something you need to do - just supply the string e.g. # "/mySalmonQuantifications/" pointing to the parent directory (where # each sample is a seperate sub-directory) to the function. ### Import Salmon example data in R package salmonQuant <- importIsoformExpression( parentDir = system.file("extdata/", package="IsoformSwitchAnalyzeR"), addIsofomIdAsColumn = TRUE ) ## ------------------------------------------------------------------------ ### Make design matrix myDesign <- data.frame( sampleID = colnames(salmonQuant$abundance)[-1], condition = gsub('_.*', '', colnames(salmonQuant$abundance)[-1]) ) ### Create switchAnalyzeRlist aSwitchList <- importRdata( isoformCountMatrix = salmonQuant$counts, isoformRepExpression = salmonQuant$abundance, designMatrix = myDesign, isoformExonAnnoation = system.file("extdata/example.gtf.gz" , package="IsoformSwitchAnalyzeR"), isoformNtFasta = system.file("extdata/example_isoform_nt.fasta.gz", package="IsoformSwitchAnalyzeR"), showProgress = FALSE ) ## ------------------------------------------------------------------------ head(aSwitchList$isoformFeatures,2) head(aSwitchList$exons,2) head(aSwitchList$ntSequence,2) # Etc... ## ---- warning=FALSE, message=FALSE--------------------------------------- aSwitchList <- importGTF(pathToGTF = system.file("extdata/example.gtf.gz", package="IsoformSwitchAnalyzeR")) ## ------------------------------------------------------------------------ aSwitchList head(aSwitchList,2) head(aSwitchList$conditions,2) ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchListFiltered <- preFilter( switchAnalyzeRlist = exampleSwitchList, geneExpressionCutoff = 1, isoformExpressionCutoff = 0, removeSingleIsoformGenes = TRUE ) exampleSwitchListFilteredStrict <- preFilter( switchAnalyzeRlist = exampleSwitchList, geneExpressionCutoff = 10, isoformExpressionCutoff = 3, removeSingleIsoformGenes = TRUE ) ## ---- warning=FALSE------------------------------------------------------ # Load example data and pre-filter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList) # preFilter to remove lowly expressed features # Perform test exampleSwitchListAnalyzed <- isoformSwitchTestDEXSeq( switchAnalyzeRlist = exampleSwitchList, reduceToSwitchingGenes=TRUE ) # Summarize switching features extractSwitchSummary(exampleSwitchListAnalyzed) ## ---- warning=FALSE------------------------------------------------------ # Load example data and pre-filter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList, geneExpressionCutoff = 4) # preFilter for fast runtime # Perform test (takes ~10 sec) exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq( switchAnalyzeRlist = exampleSwitchList, testIntegration='isoform_only', reduceToSwitchingGenes=TRUE ) # Summarize switching features extractSwitchSummary(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Usage of The Statistical Test' section above exampleSwitchListAnalyzed <- analyzeORF( exampleSwitchListAnalyzed, orfMethod = "longest", # genomeObject = Hsapiens, # not necessary since sequences are already stored in the switchAnalyzeRlist showProgress=FALSE ) head(exampleSwitchListAnalyzed$orfAnalysis, 3) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Analyzing Open Reading Frames' section above exampleSwitchListAnalyzed <- extractSequence( exampleSwitchListAnalyzed, pathToOutput = '', writeToFile=FALSE # to avoid output when running this example data ) ## ------------------------------------------------------------------------ ### Load test data (matching the external sequence analysis results) data("exampleSwitchListIntermediary") exampleSwitchListIntermediary ### Add CPAT analysis exampleSwitchListAnalyzed <- analyzeCPAT( switchAnalyzeRlist = exampleSwitchListIntermediary, pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human removeNoncodinORFs = TRUE # because ORF was predicted de novo ) ### Add CPC2 analysis exampleSwitchListAnalyzed <- analyzeCPC2( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToCPC2resultFile = system.file("extdata/cpc2_result.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human removeNoncodinORFs = TRUE # because ORF was predicted de novo ) ### Add PFAM analysis exampleSwitchListAnalyzed <- analyzePFAM( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"), showProgress=FALSE ) ### Add SignalP analysis exampleSwitchListAnalyzed <- analyzeSignalP( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR") ) ### Add NetSurfP2 analysis exampleSwitchListAnalyzed <- analyzeIUPred2A( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToIUPred2AresultFile = system.file("extdata/iupred2a_result.txt.gz", package = "IsoformSwitchAnalyzeR"), showProgress = FALSE ) exampleSwitchListAnalyzed ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Importing External Sequence Analysis' section above exampleSwitchListAnalyzed <- analyzeAlternativeSplicing( switchAnalyzeRlist = exampleSwitchListAnalyzed, quiet=TRUE ) ### overview of number of intron retentions (IR) table( exampleSwitchListAnalyzed$AlternativeSplicingAnalysis$IR ) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Predicting Alternative Splicing' section above # the consequences highlighted in the text above consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity') exampleSwitchListAnalyzed <- analyzeSwitchConsequences( exampleSwitchListAnalyzed, consequencesToAnalyze = consequencesOfInterest, dIFcutoff = 0.4, # very high cutoff for fast runtimes showProgress=FALSE ) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE) ## ------------------------------------------------------------------------ ### Load already analyzed data data("exampleSwitchListAnalyzed") ### Let's reduce the switchAnalyzeRlist to only one condition exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'COAD_ctrl' ) exampleSwitchListAnalyzedSubset ### Extract top switching genes (by q-value) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = TRUE ) ### Extract top 2 switching genes (by dIF values) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = FALSE ) ## ------------------------------------------------------------------------ ### Extract data.frame with all switching isoforms switchingIso <- extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = NA, # n=NA: all features are returned extractGenes = FALSE, # when FALSE isoforms are returned sortByQvals = TRUE ) subset(switchingIso, gene_name == 'ZAK') ## ---- fig.width=12, fig.height=7----------------------------------------- switchPlot(exampleSwitchListAnalyzedSubset, gene = 'ZAK') ## ---- fig.width=12, fig.height=3----------------------------------------- ### exampleSwitchListAnalyzedSubset is created above switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'ZAK') ## ---- fig.width=3, fig.height=3------------------------------------------ switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'ZAK') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'ZAK') ## ------------------------------------------------------------------------ data("exampleSwitchListAnalyzed") exampleSwitchListAnalyzed ## ---- fig.width=12, fig.height=5----------------------------------------- extractSplicingSummary( exampleSwitchListAnalyzed, asFractionTotal = FALSE, plotGenes=FALSE ) ## ---- fig.width=12, fig.height=4.5--------------------------------------- splicingEnrichment <- extractSplicingEnrichment( exampleSwitchListAnalyzed, splicingToAnalyze='all', returnResult=TRUE, returnSummary=TRUE ) ## ---- fig.width=12, fig.height=6----------------------------------------- subset(splicingEnrichment, splicingEnrichment$AStype == 'ATSS') ## ---- fig.width=12, fig.height=4.5--------------------------------------- extractSplicingEnrichmentComparison( exampleSwitchListAnalyzed, splicingToAnalyze=c('A3','MES','ATSS'), # Those significant in COAD in the previous analysis returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame ) ## ---- fig.width=10, fig.height=6----------------------------------------- extractSplicingGenomeWide( exampleSwitchListAnalyzed, featureToExtract = 'all', # all isoforms stored in the switchAnalyzeRlist splicingToAnalyze = c('A3','MES','ATSS'), # Splice types significantly enriched in COAD plot=TRUE, returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame ) ## ---- results = "hide", message = FALSE---------------------------------- ### Load example data data("exampleSwitchListAnalyzed") ### Reduce datasize for fast runtime selectedGenes <- unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id)[50:100] exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% selectedGenes ) ### analyze the biological mechanisms bioMechanismeAnalysis <- analyzeSwitchConsequences( exampleSwitchListAnalyzedSubset, consequencesToAnalyze = c('tss','tts','intron_structure'), showProgress = FALSE )$switchConsequence # only the consequences are interesting here ### subset to those with differences bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),] ### extract the consequences of interest already stored in the switchAnalyzeRlist myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),] myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison ### Obtain the mechanisms of the isoform switches with consequences bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated) bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),] # id for specific iso comparison ## ---- fig.width=3.5, fig.height=3.5-------------------------------------- ### Create list with the isoPair ids for each consequence AS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')] aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss' )] aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts' )] mechList <- list( AS=AS, aTSS=aTSS, aTTS=aTTS ) ### Create Venn diagram library(VennDiagram) myVenn <- venn.diagram( x = mechList, col='transparent', alpha=0.4, fill=RColorBrewer::brewer.pal(n=3,name='Dark2'), filename=NULL ) ### Plot the venn diagram grid.newpage() ; grid.draw(myVenn) ## ------------------------------------------------------------------------ myDesign <- data.frame( sampleID=1:6, condition=c('wt','wt','wt','ko','ko','ko') # WT = wildtype, KO = Knock out ) myDesign ## ------------------------------------------------------------------------ myDesign$batch <- factor(c('a','a','b','a','b','b')) myDesign ## ------------------------------------------------------------------------ packageVersion('IsoformSwitchAnalyzeR') sessionInfo()