library(NanoMethViz)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
This package comes with helper functions that import exon annotations from the Bioconductor packages Homo.sapiens
and Mus.musculus
. The functions get_exons_homo_sapiens()
and get_exons_mus_musculus()
simply take data from the respective packages, and reorganise the columns such that we have seven columns
This is used to provide gene annotations for the gene or region plots.
For other annotations, they will most likely be able to be imported using rtracklayer::import()
and manipulated into the desired format. As an example, we can use a small sample of the C. Elegans gene annotation provided by ENSEMBL. rtracklayer
will import the annotation as a GRanges
object, this can be coerced into a data.frame and manipuated using dplyr
.
anno <- rtracklayer::import(system.file(package = "NanoMethViz", "c_elegans.gtf.gz"))
head(anno)
## GRanges object with 6 ranges and 13 metadata columns:
## seqnames ranges strand | source type score phase
## <Rle> <IRanges> <Rle> | <factor> <factor> <numeric> <integer>
## [1] IV 9601517-9601695 - | WormBase exon NA <NA>
## [2] IV 9601040-9601345 - | WormBase exon NA <NA>
## [3] IV 9600828-9600953 - | WormBase exon NA <NA>
## [4] IV 9600627-9600780 - | WormBase exon NA <NA>
## [5] IV 9600002-9600392 - | WormBase exon NA <NA>
## [6] IV 9599702-9599873 - | WormBase exon NA <NA>
## gene_id transcript_id exon_number gene_name gene_source
## <character> <character> <character> <character> <character>
## [1] WBGene00000002 F27C8.1.1 1 aat-1 WormBase
## [2] WBGene00000002 F27C8.1.1 2 aat-1 WormBase
## [3] WBGene00000002 F27C8.1.1 3 aat-1 WormBase
## [4] WBGene00000002 F27C8.1.1 4 aat-1 WormBase
## [5] WBGene00000002 F27C8.1.1 5 aat-1 WormBase
## [6] WBGene00000002 F27C8.1.1 6 aat-1 WormBase
## gene_biotype transcript_source transcript_biotype exon_id
## <character> <character> <character> <character>
## [1] protein_coding WormBase protein_coding F27C8.1.1.e1
## [2] protein_coding WormBase protein_coding F27C8.1.1.e2
## [3] protein_coding WormBase protein_coding F27C8.1.1.e3
## [4] protein_coding WormBase protein_coding F27C8.1.1.e4
## [5] protein_coding WormBase protein_coding F27C8.1.1.e5
## [6] protein_coding WormBase protein_coding F27C8.1.1.e6
## -------
## seqinfo: 3 sequences from an unspecified genome; no seqlengths
anno <- anno %>%
as.data.frame() %>%
dplyr::rename(
chr = seqnames,
symbol = gene_name
) %>%
dplyr::select(gene_id, chr, strand, start, end, transcript_id, symbol)
head(anno)
## gene_id chr strand start end transcript_id symbol
## 1 WBGene00000002 IV - 9601517 9601695 F27C8.1.1 aat-1
## 2 WBGene00000002 IV - 9601040 9601345 F27C8.1.1 aat-1
## 3 WBGene00000002 IV - 9600828 9600953 F27C8.1.1 aat-1
## 4 WBGene00000002 IV - 9600627 9600780 F27C8.1.1 aat-1
## 5 WBGene00000002 IV - 9600002 9600392 F27C8.1.1 aat-1
## 6 WBGene00000002 IV - 9599702 9599873 F27C8.1.1 aat-1
Annotations can be simplified if full exon and isoform information is not required. For example, genebody annotation can be represented as single exon genes. For example we can take the example dataset and transform the isoform annotations of Peg3 into a single genebody block. The helper function exons_to_genes()
can help with this common conversion.
nmr <- load_example_nanomethresult()
plot_gene(nmr, "Peg3")
new_exons <- NanoMethViz::exons(nmr) %>%
exons_to_genes() %>%
mutate(transcript_id = gene_id)
NanoMethViz::exons(nmr) <- new_exons
plot_gene(nmr, "Peg3")