if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
# orthogene is only available on Bioconductor>=3.14
if(BiocManager::version()<"3.14") BiocManager::install(version = "devel")
BiocManager::install("orthogene")
library(orthogene)
data("exp_mouse")
# Setting to "homologene" for the purposes of quick demonstration.
# We generally recommend using method="gprofiler" (default).
method <- "homologene"
It’s not always clear whether a dataset is using the original species gene names, human gene names, or some other species’ gene names.
infer_species
takes a list/matrix/data.frame with genes and
infers the species that they best match to!
For the sake of speed, the genes extracted from gene_df
are tested against genomes from only the following 6 test_species
by default:
- human
- monkey
- rat
- mouse
- zebrafish
- fly
However, you can supply your own list of test_species
, which will
be automatically be mapped and standardised using map_species
.
matches <- orthogene::infer_species(gene_df = exp_mouse)
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 15,259 genes extracted.
## Testing for gene overlap with: human
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Gene table with 19,129 rows retrieved.
## Returning all 19,129 genes from human.
## Testing for gene overlap with: monkey
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: monkey
## Common name mapping found for monkey
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Returning all 16,843 genes from monkey.
## Testing for gene overlap with: rat
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Returning all 20,616 genes from rat.
## Testing for gene overlap with: mouse
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Gene table with 21,207 rows retrieved.
## Returning all 21,207 genes from mouse.
## Testing for gene overlap with: zebrafish
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: zebrafish
## Common name mapping found for zebrafish
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Returning all 20,897 genes from zebrafish.
## Testing for gene overlap with: fly
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: fly
## Common name mapping found for fly
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Returning all 8,438 genes from fly.
## Top match:
## - species: mouse
## - percent_match: 92%
To create an example dataset, turn the gene names into rat genes.
exp_rat <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "rat",
method = "homologene")
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 15,259 genes extracted.
## Converting mouse ==> rat orthologs using: homologene
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Checking for genes without orthologs in rat.
## Extracting genes from input_gene.
## 13,812 genes extracted.
## Extracting genes from ortholog_gene.
## 13,812 genes extracted.
## Checking for genes without 1:1 orthologs.
## Dropping 486 genes that have multiple input_gene per ortholog_gene.
## Dropping 148 genes that have multiple ortholog_gene per input_gene.
## Filtering gene_df with gene_map
## Setting ortholog_gene to rownames.
##
## =========== REPORT SUMMARY ===========
## Total genes dropped after convert_orthologs :
## 2,322 / 15,259 (15%)
## Total genes remaining after convert_orthologs :
## 12,937 / 15,259 (85%)
matches <- orthogene::infer_species(gene_df = exp_rat)
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 12,937 genes extracted.
## Testing for gene overlap with: human
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Gene table with 19,129 rows retrieved.
## Returning all 19,129 genes from human.
## Testing for gene overlap with: monkey
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: monkey
## Common name mapping found for monkey
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Returning all 16,843 genes from monkey.
## Testing for gene overlap with: rat
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Returning all 20,616 genes from rat.
## Testing for gene overlap with: mouse
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Gene table with 21,207 rows retrieved.
## Returning all 21,207 genes from mouse.
## Testing for gene overlap with: zebrafish
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: zebrafish
## Common name mapping found for zebrafish
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Returning all 20,897 genes from zebrafish.
## Testing for gene overlap with: fly
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: fly
## Common name mapping found for fly
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Returning all 8,438 genes from fly.
## Top match:
## - species: rat
## - percent_match: 100%
To create an example dataset, turn the gene names into human genes.
exp_human <- orthogene::convert_orthologs(gene_df = exp_mouse,
input_species = "mouse",
output_species = "human",
method = "homologene")
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 15,259 genes extracted.
## Converting mouse ==> human orthologs using: homologene
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Checking for genes without orthologs in human.
## Extracting genes from input_gene.
## 13,416 genes extracted.
## Extracting genes from ortholog_gene.
## 13,416 genes extracted.
## Checking for genes without 1:1 orthologs.
## Dropping 46 genes that have multiple input_gene per ortholog_gene.
## Dropping 56 genes that have multiple ortholog_gene per input_gene.
## Filtering gene_df with gene_map
## Setting ortholog_gene to rownames.
##
## =========== REPORT SUMMARY ===========
## Total genes dropped after convert_orthologs :
## 2,016 / 15,259 (13%)
## Total genes remaining after convert_orthologs :
## 13,243 / 15,259 (87%)
matches <- orthogene::infer_species(gene_df = exp_human)
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 13,243 genes extracted.
## Testing for gene overlap with: human
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: human
## Common name mapping found for human
## 1 organism identified from search: 9606
## Gene table with 19,129 rows retrieved.
## Returning all 19,129 genes from human.
## Testing for gene overlap with: monkey
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: monkey
## Common name mapping found for monkey
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Returning all 16,843 genes from monkey.
## Testing for gene overlap with: rat
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: rat
## Common name mapping found for rat
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Returning all 20,616 genes from rat.
## Testing for gene overlap with: mouse
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: mouse
## Common name mapping found for mouse
## 1 organism identified from search: 10090
## Gene table with 21,207 rows retrieved.
## Returning all 21,207 genes from mouse.
## Testing for gene overlap with: zebrafish
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: zebrafish
## Common name mapping found for zebrafish
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Returning all 20,897 genes from zebrafish.
## Testing for gene overlap with: fly
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: fly
## Common name mapping found for fly
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Returning all 8,438 genes from fly.
## Top match:
## - species: human
## - percent_match: 100%
test_species
You can even supply test_species
with the name of one of the R packages that
orthogene
gets orthologs from. This will test against all species available
in that particular R package.
For example, by setting test_species="homologene"
we automatically test for
% gene matches in each of the 20+ species available in homologene
.
matches <- orthogene::infer_species(gene_df = exp_human,
test_species = "homologene")
## Retrieving all organisms available in homologene.
## Preparing gene_df.
## sparseMatrix format detected.
## Extracting genes from rownames.
## 13,243 genes extracted.
## Testing for gene overlap with: Mus musculus
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Mus musculus
## 1 organism identified from search: 10090
## Gene table with 21,207 rows retrieved.
## Returning all 21,207 genes from Mus musculus.
## Testing for gene overlap with: Rattus norvegicus
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Rattus norvegicus
## 1 organism identified from search: 10116
## Gene table with 20,616 rows retrieved.
## Returning all 20,616 genes from Rattus norvegicus.
## Testing for gene overlap with: Kluyveromyces lactis
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Kluyveromyces lactis
## WARNING: No organisms identified matched 'kluyveromyces lactis' Try a different query.
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Kluyveromyces lactis.
## Testing for gene overlap with: Magnaporthe oryzae
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Magnaporthe oryzae
## 1 organism identified from search: 242507
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Magnaporthe oryzae.
## Testing for gene overlap with: Eremothecium gossypii
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Eremothecium gossypii
## 1 organism identified from search: 284811
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Eremothecium gossypii.
## Testing for gene overlap with: Arabidopsis thaliana
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Arabidopsis thaliana
## 1 organism identified from search: 3702
## Gene table with 19,143 rows retrieved.
## Returning all 19,143 genes from Arabidopsis thaliana.
## Testing for gene overlap with: Oryza sativa
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Oryza sativa
## 2 organisms identified from search.
## Selecting first:
## - 39946
## - 39947
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Oryza sativa.
## Testing for gene overlap with: Schizosaccharomyces pombe
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Schizosaccharomyces pombe
## 1 organism identified from search: 284812
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Schizosaccharomyces pombe.
## Testing for gene overlap with: Saccharomyces cerevisiae
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Saccharomyces cerevisiae
## 1 organism identified from search: 4932
## Gene table with 4,579 rows retrieved.
## Returning all 4,579 genes from Saccharomyces cerevisiae.
## Testing for gene overlap with: Neurospora crassa
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Neurospora crassa
## 1 organism identified from search: 367110
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Neurospora crassa.
## Testing for gene overlap with: Caenorhabditis elegans
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Caenorhabditis elegans
## 2 organisms identified from search.
## Selecting first:
## - 6239
## - 6239
## Gene table with 7,575 rows retrieved.
## Returning all 7,575 genes from Caenorhabditis elegans.
## Testing for gene overlap with: Anopheles gambiae
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Anopheles gambiae
## 1 organism identified from search: 7165
## Gene table with 8,428 rows retrieved.
## Returning all 8,428 genes from Anopheles gambiae.
## Testing for gene overlap with: Drosophila melanogaster
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Drosophila melanogaster
## 1 organism identified from search: 7227
## Gene table with 8,438 rows retrieved.
## Returning all 8,438 genes from Drosophila melanogaster.
## Testing for gene overlap with: Danio rerio
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Danio rerio
## 1 organism identified from search: 7955
## Gene table with 20,897 rows retrieved.
## Returning all 20,897 genes from Danio rerio.
## Testing for gene overlap with: Xenopus (Silurana) tropicalis
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Xenopus (Silurana) tropicalis
## Warning in grepl(spec_queries, display_name, ignore.case = TRUE): TRE pattern
## compilation error 'Missing ')''
## Testing for gene overlap with: Gallus gallus
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Gallus gallus
## 1 organism identified from search: 9031
## Gene table with 14,600 rows retrieved.
## Returning all 14,600 genes from Gallus gallus.
## Testing for gene overlap with: Macaca mulatta
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Macaca mulatta
## 1 organism identified from search: 9544
## Gene table with 16,843 rows retrieved.
## Returning all 16,843 genes from Macaca mulatta.
## Testing for gene overlap with: Pan troglodytes
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Pan troglodytes
## 1 organism identified from search: 9598
## Gene table with 18,730 rows retrieved.
## Returning all 18,730 genes from Pan troglodytes.
## Testing for gene overlap with: Homo sapiens
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Homo sapiens
## 1 organism identified from search: 9606
## Gene table with 19,129 rows retrieved.
## Returning all 19,129 genes from Homo sapiens.
## Testing for gene overlap with: Canis lupus familiaris
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Canis lupus familiaris
## 1 organism identified from search: 9615
## Gene table with 18,117 rows retrieved.
## Returning all 18,117 genes from Canis lupus familiaris.
## Testing for gene overlap with: Bos taurus
## Retrieving all genes using: homologene.
## Retrieving all organisms available in gprofiler.
## Using stored `gprofiler_orgs`.
## Mapping species name: Bos taurus
## 2 organisms identified from search.
## Selecting first:
## - 30522
## - 9913
## Gene table with 0 rows retrieved.
## Returning all 0 genes from Bos taurus.
## Top match:
## - species: Homo sapiens
## - percent_match: 100%
utils::sessionInfo()
## R version 4.1.1 Patched (2021-08-22 r80813)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] orthogene_1.0.0 BiocStyle_2.22.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 sass_0.4.0
## [3] tidyr_1.1.4 jsonlite_1.7.2
## [5] viridisLite_0.4.0 carData_3.0-4
## [7] gprofiler2_0.2.1 bslib_0.3.1
## [9] assertthat_0.2.1 BiocManager_1.30.16
## [11] highr_0.9 GenomeInfoDbData_1.2.7
## [13] cellranger_1.1.0 yaml_2.2.1
## [15] pillar_1.6.4 backports_1.2.1
## [17] lattice_0.20-45 glue_1.4.2
## [19] digest_0.6.28 ggsignif_0.6.3
## [21] colorspace_2.0-2 htmltools_0.5.2
## [23] Matrix_1.3-4 pkgconfig_2.0.3
## [25] babelgene_21.4 broom_0.7.9
## [27] magick_2.7.3 haven_2.4.3
## [29] bookdown_0.24 purrr_0.3.4
## [31] patchwork_1.1.1 scales_1.1.1
## [33] openxlsx_4.2.4 rio_0.5.27
## [35] tibble_3.1.5 generics_0.1.1
## [37] farver_2.1.0 car_3.0-11
## [39] ggplot2_3.3.5 ellipsis_0.3.2
## [41] ggpubr_0.4.0 lazyeval_0.2.2
## [43] cli_3.0.1 magrittr_2.0.1
## [45] crayon_1.4.1 readxl_1.3.1
## [47] evaluate_0.14 fansi_0.5.0
## [49] rstatix_0.7.0 homologene_1.4.68.19.3.27
## [51] forcats_0.5.1 foreign_0.8-81
## [53] tools_4.1.1 data.table_1.14.2
## [55] hms_1.1.1 lifecycle_1.0.1
## [57] stringr_1.4.0 plotly_4.10.0
## [59] munsell_0.5.0 zip_2.2.0
## [61] compiler_4.1.1 jquerylib_0.1.4
## [63] rlang_0.4.12 grid_4.1.1
## [65] htmlwidgets_1.5.4 labeling_0.4.2
## [67] rmarkdown_2.11 gtable_0.3.0
## [69] abind_1.4-5 DBI_1.1.1
## [71] curl_4.3.2 R6_2.5.1
## [73] knitr_1.36 dplyr_1.0.7
## [75] fastmap_1.1.0 utf8_1.2.2
## [77] stringi_1.7.5 parallel_4.1.1
## [79] Rcpp_1.0.7 vctrs_0.3.8
## [81] tidyselect_1.1.1 xfun_0.27