library(NanoMethViz)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:OrganismDbi':
## 
##     select
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Importing Annotations

This package comes with helper functions that import exon annotations from the Bioconductor packages Homo.sapiens and Mus.musculus. The functions get_exons_homo_sapiens() and get_exons_mus_musculus() simply take data from the respective packages, and reorganise the columns such that we have seven columns

This is used to provide gene annotations for the gene or region plots.

For other annotations, they will most likely be able to be imported using rtracklayer::import() and manipulated into the desired format. As an example, we can use a small sample of the C. Elegans gene annotation provided by ENSEMBL. rtracklayer will import the annotation as a GRanges object, this can be coerced into a data.frame and manipuated using dplyr.

anno <- rtracklayer::import(system.file(package = "NanoMethViz", "c_elegans.gtf.gz"))

head(anno)
## GRanges object with 6 ranges and 13 metadata columns:
##       seqnames          ranges strand |   source     type     score     phase
##          <Rle>       <IRanges>  <Rle> | <factor> <factor> <numeric> <integer>
##   [1]       IV 9601517-9601695      - | WormBase     exon        NA      <NA>
##   [2]       IV 9601040-9601345      - | WormBase     exon        NA      <NA>
##   [3]       IV 9600828-9600953      - | WormBase     exon        NA      <NA>
##   [4]       IV 9600627-9600780      - | WormBase     exon        NA      <NA>
##   [5]       IV 9600002-9600392      - | WormBase     exon        NA      <NA>
##   [6]       IV 9599702-9599873      - | WormBase     exon        NA      <NA>
##              gene_id transcript_id exon_number   gene_name gene_source
##          <character>   <character> <character> <character> <character>
##   [1] WBGene00000002     F27C8.1.1           1       aat-1    WormBase
##   [2] WBGene00000002     F27C8.1.1           2       aat-1    WormBase
##   [3] WBGene00000002     F27C8.1.1           3       aat-1    WormBase
##   [4] WBGene00000002     F27C8.1.1           4       aat-1    WormBase
##   [5] WBGene00000002     F27C8.1.1           5       aat-1    WormBase
##   [6] WBGene00000002     F27C8.1.1           6       aat-1    WormBase
##         gene_biotype transcript_source transcript_biotype      exon_id
##          <character>       <character>        <character>  <character>
##   [1] protein_coding          WormBase     protein_coding F27C8.1.1.e1
##   [2] protein_coding          WormBase     protein_coding F27C8.1.1.e2
##   [3] protein_coding          WormBase     protein_coding F27C8.1.1.e3
##   [4] protein_coding          WormBase     protein_coding F27C8.1.1.e4
##   [5] protein_coding          WormBase     protein_coding F27C8.1.1.e5
##   [6] protein_coding          WormBase     protein_coding F27C8.1.1.e6
##   -------
##   seqinfo: 3 sequences from an unspecified genome; no seqlengths
anno <- anno %>%
    as.data.frame() %>%
    dplyr::rename(
        chr = seqnames,
        symbol = gene_name
    ) %>%
    dplyr::select(gene_id, chr, strand, start, end, transcript_id, symbol)

head(anno)
##          gene_id chr strand   start     end transcript_id symbol
## 1 WBGene00000002  IV      - 9601517 9601695     F27C8.1.1  aat-1
## 2 WBGene00000002  IV      - 9601040 9601345     F27C8.1.1  aat-1
## 3 WBGene00000002  IV      - 9600828 9600953     F27C8.1.1  aat-1
## 4 WBGene00000002  IV      - 9600627 9600780     F27C8.1.1  aat-1
## 5 WBGene00000002  IV      - 9600002 9600392     F27C8.1.1  aat-1
## 6 WBGene00000002  IV      - 9599702 9599873     F27C8.1.1  aat-1

Alternative Annotations

Annotations can be simplified if full exon and isoform information is not required. For example, genebody annotation can be represented as single exon genes. For example we can take the example dataset and transform the isoform annotations of Peg3 into a single genebody block. The helper function exons_to_genes() can help with this common conversion.

nmr <- load_example_nanomethresult()

plot_gene(nmr, "Peg3")

new_exons <- NanoMethViz::exons(nmr) %>%
    exons_to_genes() %>%
    mutate(transcript_id = gene_id)

NanoMethViz::exons(nmr) <- new_exons

plot_gene(nmr, "Peg3")