## ---- echo = FALSE-------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
library(IsoformSwitchAnalyzeR)

## ------------------------------------------------------------------------
packageVersion('IsoformSwitchAnalyzeR')

## ------------------------------------------------------------------------
### Please note
# The way of importing files in the following example with
# "system.file('pathToFile', package="IsoformSwitchAnalyzeR") is
# specialized way of accessing the example data in the IsoformSwitchAnalyzeR package
# and not something  you need to do - just supply the string e.g.
# "/path/to/mySalmonQuantifications/" pointing to the parent directory (where 
# each sample is a separate sub-directory) to the function.

### Import quantifications
salmonQuant <- importIsoformExpression(
    parentDir = system.file("extdata/",package="IsoformSwitchAnalyzeR")
)

## ------------------------------------------------------------------------
head(salmonQuant$abundance, 2)

head(salmonQuant$counts, 2)

## ------------------------------------------------------------------------
myDesign <- data.frame(
    sampleID = colnames(salmonQuant$abundance)[-1],
    condition = gsub('_.*', '', colnames(salmonQuant$abundance)[-1])
)
myDesign

## ---- message = FALSE, warning=FALSE-------------------------------------
### Please note:
# The way of importing files in the following example with
# "system.file("extdata/example.gtf.gz", package="IsoformSwitchAnalyzeR")"" is
# specialiced way of accessing the example data in the IsoformSwitchAnalyzeR package
# and not somhting you need to do - just supply the string e.g.
# "/myAnnotation/myQuantified.gtf" to the isoformExonAnnoation argument

### Create switchAnalyzeRlist
aSwitchList <- importRdata(
    isoformCountMatrix   = salmonQuant$counts,
    isoformRepExpression = salmonQuant$abundance,
    designMatrix         = myDesign,
    isoformExonAnnoation = system.file("extdata/example.gtf.gz"             , package="IsoformSwitchAnalyzeR"),
    isoformNtFasta       = system.file("extdata/example_isoform_nt.fasta.gz", package="IsoformSwitchAnalyzeR"),
    showProgress = FALSE
)
aSwitchList

## ------------------------------------------------------------------------
data("exampleSwitchList")
exampleSwitchList

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
exampleSwitchList <- isoformSwitchAnalysisPart1(
    switchAnalyzeRlist   = exampleSwitchList,
    dIFcutoff            = 0.3, # Cutoff for finding switches - set high for short runtime in example data
    # pathToOutput = 'path/to/where/output/should/be/'
    outputSequences      = FALSE, # change to TRUE whan analyzing your own data 
    prepareForWebServers = FALSE  # change to TRUE if you will use webservers for external sequence analysis
)

## ------------------------------------------------------------------------
extractSwitchSummary(
    exampleSwitchList,
    dIFcutoff = 0.3 # the same cutoff to the summary function as used above
)

## ------------------------------------------------------------------------
# Please note that in the following the part of the examples using
# the "system.file()" command not nesseary when using your own
# data - just supply the path as a string
# (e.g. pathToCPC2resultFile = "/myFiles/myCPC2results.txt" )

exampleSwitchList <- isoformSwitchAnalysisPart2(
  switchAnalyzeRlist        = exampleSwitchList, 
  dIFcutoff                 = 0.3,   # Cutoff for finding switches - set high for short runtime in example data
  n                         = 10,    # if plotting was enabled, it would only output the top 10 switches
  removeNoncodinORFs        = TRUE,  # Because ORF was predicted de novo
  pathToCPC2resultFile      = system.file("extdata/cpc2_result.txt"         , package = "IsoformSwitchAnalyzeR"),
  pathToPFAMresultFile      = system.file("extdata/pfam_results.txt"        , package = "IsoformSwitchAnalyzeR"),
  pathToIUPred2AresultFile  = system.file("extdata/iupred2a_result.txt.gz"  , package = "IsoformSwitchAnalyzeR"),
  pathToSignalPresultFile   = system.file("extdata/signalP_results.txt"     , package = "IsoformSwitchAnalyzeR"),
  outputPlots               = FALSE  # keeps the function from outputting the plots from this example
)

## ---- message = FALSE----------------------------------------------------
extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=3)

## ------------------------------------------------------------------------
data("exampleSwitchListAnalyzed")

## ------------------------------------------------------------------------
extractSwitchSummary(
    exampleSwitchListAnalyzed,
    filterForConsequences = TRUE
) 

## ------------------------------------------------------------------------
subset(
    extractTopSwitches(
        exampleSwitchListAnalyzed,
        filterForConsequences = TRUE,
        n=10,
        inEachComparison = TRUE
    )[,c('gene_name','condition_2','gene_switch_q_value','Rank')],
    gene_name == 'ZAK'
)

## ---- fig.width=12, fig.height=7, warning=FALSE--------------------------
switchPlot(
    exampleSwitchListAnalyzed,
    gene='ZAK',
    condition1 = 'COAD_ctrl',
    condition2 = 'COAD_cancer',
    localTheme = theme_bw(base_size = 13) # making text sightly larger for vignette
)

## ---- fig.width=9, fig.height=5------------------------------------------
extractSwitchOverlap(
    exampleSwitchListAnalyzed,
    filterForConsequences=TRUE
)

## ---- fig.width=12, fig.height=5-----------------------------------------
extractConsequenceSummary(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze='all',
    plotGenes = FALSE,           # enables analysis of genes (instead of isoforms)
    asFractionTotal = FALSE      # enables analysis of fraction of significant features
)

## ---- fig.width=14, fig.height=5-----------------------------------------
extractConsequenceEnrichment(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze='all',
    analysisOppositeConsequence = TRUE,
    returnResult = FALSE # if TRUE returns a data.frame with the results
)

## ---- fig.width=12, fig.height=4-----------------------------------------
extractConsequenceEnrichmentComparison(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze=c('domains_identified','intron_retention','coding_potential'),
    analysisOppositeConsequence = TRUE,
    returnResult = FALSE # if TRUE returns a data.frame with the results
)

## ---- fig.width=12, fig.height=4.5---------------------------------------
extractSplicingEnrichment(
    exampleSwitchListAnalyzed,
    returnResult = FALSE # if TRUE returns a data.frame with the results
)

## ---- fig.width=12, fig.height=4-----------------------------------------
extractSplicingEnrichmentComparison(
    exampleSwitchListAnalyzed,
    splicingToAnalyze = c('A3','MES','ATSS','ATTS'), # the splicing highlighted above
    returnResult = FALSE # if TRUE returns a data.frame with the results
)

## ---- fig.width=8, fig.height=4, warning=FALSE---------------------------
### Volcano like plot:
ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) +
     geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) +
    geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff
    geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff
    facet_wrap( ~ condition_2) +
    #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') +
    theme_bw()

## ---- fig.width=8, fig.height=4, warning=FALSE---------------------------
### Switch vs Gene changes:
ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) +
    geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) + 
    facet_wrap(~ condition_2) +
    #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions
    geom_hline(yintercept = 0, linetype='dashed') +
    geom_vline(xintercept = 0, linetype='dashed') +
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    labs(x='Gene log2 fold change', y='dIF') +
    theme_bw()

## ------------------------------------------------------------------------
data("exampleSwitchList")         # A newly created switchAnalyzeRlist + switch analysis
names(exampleSwitchList)

data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist
names(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### Preview
head(exampleSwitchList, 2)
# tail(exampleSwitchList, 2)

### Dimentions
dim(exampleSwitchList$isoformFeatures)

nrow(exampleSwitchList)
ncol(exampleSwitchList)
dim(exampleSwitchList)

## ------------------------------------------------------------------------
exampleSwitchList

### Subset
subsetSwitchAnalyzeRlist(
    exampleSwitchList,
    exampleSwitchList$isoformFeatures$gene_name == 'ARHGEF19'
)

## ------------------------------------------------------------------------
head(exampleSwitchList$exons,2)

## ------------------------------------------------------------------------
### Please note
# The way of importing files in the following example with
# "system.file('pathToFile', package="IsoformSwitchAnalyzeR") is
# specialized way of accessing the example data in the IsoformSwitchAnalyzeR package
# and not something  you need to do - just supply the string e.g.
# "/mySalmonQuantifications/" pointing to the parent directory (where 
# each sample is a seperate sub-directory) to the function.

### Import Salmon example data in R package
salmonQuant <- importIsoformExpression(
    parentDir = system.file("extdata/", package="IsoformSwitchAnalyzeR"),
    addIsofomIdAsColumn = TRUE
)

## ------------------------------------------------------------------------
### Make design matrix
myDesign <- data.frame(
    sampleID = colnames(salmonQuant$abundance)[-1],
    condition = gsub('_.*', '', colnames(salmonQuant$abundance)[-1])
)

### Create switchAnalyzeRlist
aSwitchList <- importRdata(
    isoformCountMatrix   = salmonQuant$counts,
    isoformRepExpression = salmonQuant$abundance,
    designMatrix         = myDesign,
    isoformExonAnnoation = system.file("extdata/example.gtf.gz"             , package="IsoformSwitchAnalyzeR"),
    isoformNtFasta       = system.file("extdata/example_isoform_nt.fasta.gz", package="IsoformSwitchAnalyzeR"),
    showProgress = FALSE
)

## ------------------------------------------------------------------------
head(aSwitchList$isoformFeatures,2)

head(aSwitchList$exons,2)

head(aSwitchList$ntSequence,2)

# Etc...

## ---- warning=FALSE, message=FALSE---------------------------------------
aSwitchList <- importGTF(pathToGTF = system.file("extdata/example.gtf.gz", package="IsoformSwitchAnalyzeR"))

## ------------------------------------------------------------------------
aSwitchList

head(aSwitchList,2)
head(aSwitchList$conditions,2)

## ------------------------------------------------------------------------
data("exampleSwitchList")

exampleSwitchListFiltered <- preFilter(
    switchAnalyzeRlist = exampleSwitchList,
    geneExpressionCutoff = 1,
    isoformExpressionCutoff = 0,
    removeSingleIsoformGenes = TRUE
)

exampleSwitchListFilteredStrict <- preFilter(
    switchAnalyzeRlist = exampleSwitchList,
    geneExpressionCutoff = 10,
    isoformExpressionCutoff = 3,
    removeSingleIsoformGenes = TRUE
)

## ---- warning=FALSE------------------------------------------------------
# Load example data and pre-filter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList) # preFilter to remove lowly expressed features

# Perform test
exampleSwitchListAnalyzed <- isoformSwitchTestDEXSeq(
    switchAnalyzeRlist = exampleSwitchList,
    reduceToSwitchingGenes=TRUE
)

# Summarize switching features
extractSwitchSummary(exampleSwitchListAnalyzed)

## ---- warning=FALSE------------------------------------------------------
# Load example data and pre-filter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList, geneExpressionCutoff = 4) # preFilter for fast runtime

# Perform test (takes ~10 sec)
exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq(
    switchAnalyzeRlist = exampleSwitchList,
    testIntegration='isoform_only',
    reduceToSwitchingGenes=TRUE
)

# Summarize switching features
extractSwitchSummary(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Usage of The Statistical Test' section above 

exampleSwitchListAnalyzed <- analyzeORF(
    exampleSwitchListAnalyzed,
    orfMethod = "longest",
    # genomeObject = Hsapiens, # not necessary since sequences are already stored in the switchAnalyzeRlist
    showProgress=FALSE
)

head(exampleSwitchListAnalyzed$orfAnalysis, 3)


## ------------------------------------------------------------------------
### This example relies on the example data from the 'Analyzing Open Reading Frames' section above 

exampleSwitchListAnalyzed <- extractSequence(
    exampleSwitchListAnalyzed, 
    pathToOutput = '<insert_path>',
    writeToFile=FALSE # to avoid output when running this example data
)

## ------------------------------------------------------------------------
### Load test data (matching the external sequence analysis results)
data("exampleSwitchListIntermediary")
exampleSwitchListIntermediary

### Add CPAT analysis
exampleSwitchListAnalyzed <- analyzeCPAT(
    switchAnalyzeRlist   = exampleSwitchListIntermediary,
    pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff         = 0.725, # the coding potential cutoff we suggested for human
    removeNoncodinORFs   = TRUE   # because ORF was predicted de novo
)

### Add CPC2 analysis
exampleSwitchListAnalyzed <- analyzeCPC2(
    switchAnalyzeRlist   = exampleSwitchListAnalyzed,
    pathToCPC2resultFile = system.file("extdata/cpc2_result.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff         = 0.725, # the coding potential cutoff we suggested for human
    removeNoncodinORFs   = TRUE   # because ORF was predicted de novo
)

### Add PFAM analysis
exampleSwitchListAnalyzed <- analyzePFAM(
    switchAnalyzeRlist   = exampleSwitchListAnalyzed,
    pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"),
    showProgress=FALSE
)

### Add SignalP analysis
exampleSwitchListAnalyzed <- analyzeSignalP(
    switchAnalyzeRlist       = exampleSwitchListAnalyzed,
    pathToSignalPresultFile  = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR")
)

### Add NetSurfP2 analysis
exampleSwitchListAnalyzed <- analyzeIUPred2A(
    switchAnalyzeRlist        = exampleSwitchListAnalyzed,
    pathToIUPred2AresultFile = system.file("extdata/iupred2a_result.txt.gz", package = "IsoformSwitchAnalyzeR"),
    showProgress = FALSE
)

exampleSwitchListAnalyzed

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Importing External Sequence Analysis' section above 

exampleSwitchListAnalyzed <- analyzeAlternativeSplicing(
    switchAnalyzeRlist = exampleSwitchListAnalyzed,
    quiet=TRUE
)

### overview of number of intron retentions (IR)
table( exampleSwitchListAnalyzed$AlternativeSplicingAnalysis$IR )

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Predicting Alternative Splicing' section above 

# the consequences highlighted in the text above
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity')

exampleSwitchListAnalyzed <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze = consequencesOfInterest, 
    dIFcutoff = 0.4, # very high cutoff for fast runtimes
    showProgress=FALSE
)

extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE)
extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE)

## ------------------------------------------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

### Let's reduce the switchAnalyzeRlist to only one condition
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed, 
    exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'COAD_ctrl'
)
exampleSwitchListAnalyzedSubset

### Extract top switching genes (by q-value)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = TRUE
)

### Extract top 2 switching genes (by dIF values)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = FALSE
)

## ------------------------------------------------------------------------
### Extract data.frame with all switching isoforms
switchingIso <- extractTopSwitches( 
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = NA,                  # n=NA: all features are returned
    extractGenes = FALSE,    # when FALSE isoforms are returned
    sortByQvals = TRUE
)

subset(switchingIso, gene_name == 'ZAK')

## ---- fig.width=12, fig.height=7-----------------------------------------
switchPlot(exampleSwitchListAnalyzedSubset, gene = 'ZAK')

## ---- fig.width=12, fig.height=3-----------------------------------------
### exampleSwitchListAnalyzedSubset is created above

switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'ZAK')

## ---- fig.width=3, fig.height=3------------------------------------------
switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'ZAK')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'ZAK')

## ------------------------------------------------------------------------
data("exampleSwitchListAnalyzed")
exampleSwitchListAnalyzed

## ---- fig.width=12, fig.height=5-----------------------------------------
extractSplicingSummary(
    exampleSwitchListAnalyzed,
    asFractionTotal = FALSE,
    plotGenes=FALSE
)

## ---- fig.width=12, fig.height=4.5---------------------------------------
splicingEnrichment <- extractSplicingEnrichment(
    exampleSwitchListAnalyzed,
    splicingToAnalyze='all',
    returnResult=TRUE,
    returnSummary=TRUE
)

## ---- fig.width=12, fig.height=6-----------------------------------------
subset(splicingEnrichment, splicingEnrichment$AStype == 'ATSS')

## ---- fig.width=12, fig.height=4.5---------------------------------------
extractSplicingEnrichmentComparison(
    exampleSwitchListAnalyzed,
    splicingToAnalyze=c('A3','MES','ATSS'), # Those significant in COAD in the previous analysis
    returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame
)

## ---- fig.width=10, fig.height=6-----------------------------------------
extractSplicingGenomeWide(
    exampleSwitchListAnalyzed,
    featureToExtract = 'all',                 # all isoforms stored in the switchAnalyzeRlist
    splicingToAnalyze = c('A3','MES','ATSS'), # Splice types significantly enriched in COAD
    plot=TRUE,
    returnResult=FALSE  # Preventing the summary statistics to be returned as a data.frame
)

## ---- results = "hide", message = FALSE----------------------------------
### Load example data
data("exampleSwitchListAnalyzed")

### Reduce datasize for fast runtime
selectedGenes <- unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id)[50:100]
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed,
    exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% selectedGenes
)

### analyze the biological mechanisms
bioMechanismeAnalysis <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzedSubset, 
    consequencesToAnalyze = c('tss','tts','intron_structure'),
    showProgress = FALSE
)$switchConsequence # only the consequences are interesting here

### subset to those with differences
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),]

### extract the consequences of interest already stored in the switchAnalyzeRlist
myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence
myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),]
myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison

### Obtain the mechanisms of the isoform switches with consequences
bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated)
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),]  # id for specific iso comparison

## ---- fig.width=3.5, fig.height=3.5--------------------------------------
### Create list with the isoPair ids for each consequence
AS   <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')]
aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss'             )]
aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts'             )]

mechList <- list(
    AS=AS,
    aTSS=aTSS,
    aTTS=aTTS
)

### Create Venn diagram
library(VennDiagram)
myVenn <- venn.diagram(
    x = mechList,
    col='transparent',
    alpha=0.4,
    fill=RColorBrewer::brewer.pal(n=3,name='Dark2'),
    filename=NULL
)

### Plot the venn diagram
grid.newpage() ; grid.draw(myVenn)

## ------------------------------------------------------------------------
myDesign <- data.frame(
    sampleID=1:6,
    condition=c('wt','wt','wt','ko','ko','ko') # WT = wildtype, KO = Knock out
)

myDesign

## ------------------------------------------------------------------------
myDesign$batch <- factor(c('a','a','b','a','b','b'))
myDesign

## ------------------------------------------------------------------------
packageVersion('IsoformSwitchAnalyzeR')

sessionInfo()