Benchmark the following strategies:
method="homologene"
method="gorth"
For each method, benchmark the following metrics:
all_genes
).map_orthologs
).library(orthogene)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
Repeat tests across various common model organisms.
<- c(human="H sapiens",
species chimp="P troglodytes",
baboon="P anubis",
macaque = "M mulatta",
marmoset = "C jacchus",
mouse = "M musculus",
rat = "R norvegicus",
hamster = "M auratus",
dog = "C lupus familiaris",
cat = "F catus",
cow = "B taurus",
chicken = "G gallus",
zebrafish = "D rerio",
fly = "D melanogaster",
worm = "C elegans",
rice = "O sativa"
)<- map_species(species = species) %>% `names<-`(names(species)) species_mapped
## Using stored `gprofiler_orgs`.
## Mapping species name: H sapiens
## 1 organism identified from search: hsapiens
## Mapping species name: P troglodytes
## 1 organism identified from search: ptroglodytes
## Mapping species name: P anubis
## 1 organism identified from search: panubis
## Mapping species name: M mulatta
## 1 organism identified from search: mmulatta
## Mapping species name: C jacchus
## 1 organism identified from search: cjacchus
## Mapping species name: M musculus
## 1 organism identified from search: mmusculus
## Mapping species name: R norvegicus
## 1 organism identified from search: rnorvegicus
## Mapping species name: M auratus
## 1 organism identified from search: mauratus
## Mapping species name: C lupus familiaris
## 1 organism identified from search: clfamiliaris
## Mapping species name: F catus
## 1 organism identified from search: fcatus
## Mapping species name: B taurus
## 1 organism identified from search: btaurus
## Mapping species name: G gallus
## 1 organism identified from search: ggallus
## Mapping species name: D rerio
## 1 organism identified from search: drerio
## Mapping species name: D melanogaster
## 1 organism identified from search: dmelanogaster
## Mapping species name: C elegans
## 1 organism identified from search: celegans
## Mapping species name: O sativa
## 1 organism identified from search: osativa
Note the orthogene:::function
notation is needed to use these benchmarking functions, as they are internal.
run_benchmark()
will run the full benchmarking pipeline.
You can set mc.cores
to speed this up with multi-core parallelisation.
WARNING: This step can take a long time. For the purposes of this example, we’ll not run the full benchmark and instead provide some pre-computed results.
<- orthogene:::run_benchmark(species_mapped = species_mapped[c("human","mouse","fly")],
bench_res run_convert_orthologs = TRUE,
mc.cores = 10)
# write.csv(bench_res, here::here("inst/benchmark/bench_res_example.csv"), row.names = FALSE)
Load stored benchmark results.
if(!exists("bench_res")) {
<- read.csv(system.file(package = "orthogene","benchmark/bench_res_example.csv"))
bench_res
}::kable(bench_res) knitr
species | method | test | time | genes |
---|---|---|---|---|
hsapiens | homologene | all_genes() | 0.4597051 | 19129 |
hsapiens | homologene | convert_orthologs() | 0.7699881 | 18713 |
hsapiens | gprofiler | all_genes() | 8.6186595 | 39357 |
hsapiens | gprofiler | convert_orthologs() | 134.0899084 | 1 |
mmusculus | homologene | all_genes() | 0.4631681 | 21207 |
mmusculus | homologene | convert_orthologs() | 0.8222713 | 16482 |
mmusculus | gprofiler | all_genes() | 10.9749143 | 53725 |
mmusculus | gprofiler | convert_orthologs() | 146.1204503 | 16189 |
dmelanogaster | homologene | all_genes() | 0.4492853 | 8437 |
dmelanogaster | homologene | convert_orthologs() | 0.7071683 | 4059 |
dmelanogaster | gprofiler | all_genes() | 6.1289411 | 4493 |
dmelanogaster | gprofiler | convert_orthologs() | 5.2325366 | 638 |
For each method, plot the run time (a) and the number of genes returned (b).
<- orthogene:::plot_benchmark_bar(bench_res = bench_res) bench_barplot
# ggsave(here::here("inst/benchmark/bench_barplot.pdf"),bench_barplot, height = 8)
For each method, plot the relationship between number of genes returned and run time.
<- orthogene:::plot_benchmark_scatter(bench_res = bench_res) bench_scatterplot
## `geom_smooth()` using formula 'y ~ x'
# ggsave(here::here("inst/benchmark/bench_scatterplot.pdf"),bench_scatterplot)
::sessionInfo() utils
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 ggplot2_3.3.5 dplyr_1.0.7 orthogene_0.1.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 fs_1.5.0
## [3] usethis_2.0.1 devtools_2.4.2
## [5] httr_1.4.2 rprojroot_2.0.2
## [7] tools_4.1.0 backports_1.2.1
## [9] bslib_0.2.5.1 utf8_1.2.2
## [11] R6_2.5.0 mgcv_1.8-36
## [13] DBI_1.1.1 lazyeval_0.2.2
## [15] colorspace_2.0-2 withr_2.4.2
## [17] tidyselect_1.1.1 prettyunits_1.1.1
## [19] processx_3.5.2 curl_4.3.2
## [21] compiler_4.1.0 cli_3.0.1
## [23] xml2_1.3.2 desc_1.3.0
## [25] plotly_4.9.4.9000 labeling_0.4.2
## [27] sass_0.4.0 scales_1.1.1
## [29] callr_3.7.0 stringr_1.4.0
## [31] digest_0.6.27 foreign_0.8-81
## [33] rmarkdown_2.9 rio_0.5.27
## [35] pkgconfig_2.0.3 htmltools_0.5.1.1
## [37] sessioninfo_1.1.1 highr_0.9
## [39] fastmap_1.1.0 htmlwidgets_1.5.3
## [41] rlang_0.4.11 readxl_1.3.1
## [43] rstudioapi_0.13 farver_2.1.0
## [45] jquerylib_0.1.4 generics_0.1.0
## [47] jsonlite_1.7.2 zip_2.2.0
## [49] car_3.0-11 homologene_1.4.68.19.3.27
## [51] magrittr_2.0.1 Matrix_1.3-4
## [53] Rcpp_1.0.7 munsell_0.5.0
## [55] fansi_0.5.0 abind_1.4-5
## [57] lifecycle_1.0.0 stringi_1.7.3
## [59] yaml_2.2.1 carData_3.0-4
## [61] pkgbuild_1.2.0 grid_4.1.0
## [63] parallel_4.1.0 forcats_0.5.1
## [65] crayon_1.4.1 lattice_0.20-44
## [67] splines_4.1.0 haven_2.4.1
## [69] hms_1.1.0 knitr_1.33
## [71] ps_1.6.0 pillar_1.6.1
## [73] ggpubr_0.4.0 ggsignif_0.6.2
## [75] pkgload_1.2.1 glue_1.4.2
## [77] gprofiler2_0.2.0 evaluate_0.14
## [79] data.table_1.14.0 remotes_2.4.0
## [81] vctrs_0.3.8 testthat_3.0.4
## [83] cellranger_1.1.0 gtable_0.3.0
## [85] purrr_0.3.4 tidyr_1.1.3
## [87] assertthat_0.2.1 cachem_1.0.5
## [89] xfun_0.24 openxlsx_4.2.4
## [91] broom_0.7.8 roxygen2_7.1.1
## [93] rstatix_0.7.0 viridisLite_0.4.0
## [95] tibble_3.1.3 memoise_2.0.0
## [97] ellipsis_0.3.2 here_1.0.1