Authors: Koki Tsuyuzaki [aut, cre], Manabu Ishii [aut], Itoshi Nikaido [aut]
Last modified: 2022-10-19 10:00:54
Compiled: Wed Oct 19 10:02:29 2022

1 Installation

To install this package, start R (>= 4.1.0) and enter:

if(!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("AHPubMedDbs")

2 Fetch PubMed tibble datasets from AnnotationHub

The AHPubMedDbs package provides the metadata for all PubMed datasets , which is preprocessed as tibble format and saved in AnnotationHub. First we load/update the AnnotationHub resource.

library(AnnotationHub)
ah <- AnnotationHub()

Next we list all PubMed entries from AnnotationHub.

query(ah, "PubMed")
## AnnotationHub with 84 records
## # snapshotDate(): 2022-10-18
## # $dataprovider: NCBI
## # $species: NA
## # $rdataclass: data.table, Tibble, SQLiteFile
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
## #   rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH91771"]]' 
## 
##              title                                   
##   AH91771  | SQLite for PubMed ID                    
##   AH91772  | SQLite for PubMed Abstract              
##   AH91773  | SQLite for PubMed Author Information    
##   AH91774  | SQLite for PMC                          
##   AH91775  | SQLite for MeSH (Descriptor)            
##   ...        ...                                     
##   AH107299 | Data.table for PubMed Author Information
##   AH107300 | Data.table for PMC                      
##   AH107301 | Data.table for MeSH (Descriptor)        
##   AH107302 | Data.table for MeSH (Qualifier)         
##   AH107303 | Data.table for MeSH (SCR)

We can confirm the metadata in AnnotationHub in Bioconductor S3 bucket with mcols().

mcols(query(ah, "PubMed"))
## DataFrame with 84 rows and 15 columns
##                           title dataprovider     species taxonomyid      genome
##                     <character>  <character> <character>  <integer> <character>
## AH91771    SQLite for PubMed ID         NCBI          NA         NA          NA
## AH91772  SQLite for PubMed Ab..         NCBI          NA         NA          NA
## AH91773  SQLite for PubMed Au..         NCBI          NA         NA          NA
## AH91774          SQLite for PMC         NCBI          NA         NA          NA
## AH91775  SQLite for MeSH (Des..         NCBI          NA         NA          NA
## ...                         ...          ...         ...        ...         ...
## AH107299 Data.table for PubMe..         NCBI          NA         NA          NA
## AH107300     Data.table for PMC         NCBI          NA         NA          NA
## AH107301 Data.table for MeSH ..         NCBI          NA         NA          NA
## AH107302 Data.table for MeSH ..         NCBI          NA         NA          NA
## AH107303 Data.table for MeSH ..         NCBI          NA         NA          NA
##                     description coordinate_1_based             maintainer
##                     <character>          <integer>            <character>
## AH91771                    PMID                  1 Koki Tsuyuzaki <k.t...
## AH91772  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91773  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91774  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91775  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## ...                         ...                ...                    ...
## AH107299 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH107300 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH107301 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH107302 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH107303 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
##          rdatadateadded preparerclass                           tags
##             <character>   <character>                         <AsIs>
## AH91771      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91772      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91773      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91774      2021-04-19   AHPubMedDbs            NCBI,PMC,SQLite,...
## AH91775      2021-04-19   AHPubMedDbs       Descriptor,MeSH,NCBI,...
## ...                 ...           ...                            ...
## AH107299     2022-10-18   AHPubMedDbs     data.table,NCBI,PubMed,...
## AH107300     2022-10-18   AHPubMedDbs        data.table,NCBI,PMC,...
## AH107301     2022-10-18   AHPubMedDbs data.table,Descriptor,MeSH,...
## AH107302     2022-10-18   AHPubMedDbs       data.table,MeSH,NCBI,...
## AH107303     2022-10-18   AHPubMedDbs       data.table,MeSH,NCBI,...
##           rdataclass              rdatapath              sourceurl  sourcetype
##          <character>            <character>            <character> <character>
## AH91771   SQLiteFile AHPubMedDbs/v001/pub.. https://github.com/r..         XML
## AH91772   SQLiteFile AHPubMedDbs/v001/abs.. https://github.com/r..         XML
## AH91773   SQLiteFile AHPubMedDbs/v001/aut.. https://github.com/r..         XML
## AH91774   SQLiteFile AHPubMedDbs/v001/pmc.. https://github.com/r..         XML
## AH91775   SQLiteFile AHPubMedDbs/v001/des.. https://github.com/r..         XML
## ...              ...                    ...                    ...         ...
## AH107299  data.table AHPubMedDbs/v004/aut.. https://github.com/r..         XML
## AH107300  data.table AHPubMedDbs/v004/pmc.. https://github.com/r..         XML
## AH107301  data.table AHPubMedDbs/v004/des.. https://github.com/r..         XML
## AH107302  data.table AHPubMedDbs/v004/qua.. https://github.com/r..         XML
## AH107303  data.table AHPubMedDbs/v004/scr.. https://github.com/r..         XML

We can retrieve only the PubMedDb tibble files as follows.

qr <- query(ah, c("PubMedDb"))
# pubmed_tibble <- qr[[1]]

Session information

## R version 4.2.0 Patched (2022-05-05 r82321)
## Platform: x86_64-apple-darwin19.6.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Users/ka36530_ca/R-stuff/bin/R-4-2/lib/libRblas.dylib
## LAPACK: /Users/ka36530_ca/R-stuff/bin/R-4-2/lib/libRlapack.dylib
## 
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] AnnotationHub_3.5.2 BiocFileCache_2.5.0 dbplyr_2.2.1       
## [4] BiocGenerics_0.43.4 BiocStyle_2.25.0   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.9                    png_0.1-7                    
##  [3] Biostrings_2.65.6             assertthat_0.2.1             
##  [5] digest_0.6.29                 utf8_1.2.2                   
##  [7] mime_0.12                     R6_2.5.1                     
##  [9] GenomeInfoDb_1.33.7           stats4_4.2.0                 
## [11] RSQLite_2.2.17                evaluate_0.16                
## [13] httr_1.4.4                    pillar_1.8.1                 
## [15] zlibbioc_1.43.0               rlang_1.0.6                  
## [17] curl_4.3.3                    jquerylib_0.1.4              
## [19] blob_1.2.3                    S4Vectors_0.35.4             
## [21] rmarkdown_2.16                stringr_1.4.1                
## [23] RCurl_1.98-1.8                bit_4.0.4                    
## [25] shiny_1.7.2                   compiler_4.2.0               
## [27] httpuv_1.6.6                  xfun_0.33                    
## [29] pkgconfig_2.0.3               htmltools_0.5.3              
## [31] tidyselect_1.2.0              KEGGREST_1.37.3              
## [33] GenomeInfoDbData_1.2.9        tibble_3.1.8                 
## [35] interactiveDisplayBase_1.35.0 bookdown_0.29                
## [37] IRanges_2.31.2                fansi_1.0.3                  
## [39] withr_2.5.0                   crayon_1.5.2                 
## [41] dplyr_1.0.10                  later_1.3.0                  
## [43] bitops_1.0-7                  rappdirs_0.3.3               
## [45] jsonlite_1.8.2                xtable_1.8-4                 
## [47] lifecycle_1.0.3               DBI_1.1.3                    
## [49] magrittr_2.0.3                cli_3.4.1                    
## [51] stringi_1.7.8                 cachem_1.0.6                 
## [53] XVector_0.37.1                promises_1.2.0.1             
## [55] bslib_0.4.0                   ellipsis_0.3.2               
## [57] filelock_1.0.2                generics_0.1.3               
## [59] vctrs_0.4.2                   tools_4.2.0                  
## [61] bit64_4.0.5                   Biobase_2.57.1               
## [63] glue_1.6.2                    purrr_0.3.5                  
## [65] BiocVersion_3.16.0            fastmap_1.1.0                
## [67] yaml_2.3.5                    AnnotationDbi_1.59.1         
## [69] BiocManager_1.30.18           memoise_2.0.1                
## [71] knitr_1.40                    sass_0.4.2