The MgDb Class in the metagenomeFeatures package includes the sequences and taxonomic information for a 16S database. The following vignette demonstrates the class methods for exploring and subsetting a MgDb-class object using the demoMgDb included in the metagenomeFeatures package. MgDb-class object with full databases are inseparte packages such as the greengenes13.5MgDb package.

Demonstration MgDb-class Object

library(metagenomeFeatures)
demoMgDb <- get_demoMgDb()
demoMgDb
## MgDb object:[1] "Metadata"
## |ACCESSION_DATE: 3/31/2015
## |URL: https://greengenes.microbio.me
## |DB_TYPE_NAME: GreenGenes-MgDb-Demo
## |DB_TYPE_VALUE: MgDb
## |DB_SCHEMA_VERSION: 1.0
## [1] "Sequence Data:"
##   A DNAStringSet instance of length 249
##       width seq                                        names               
##   [1]  1343 GACGAACGCTGGCGGCGTGC...TGAATACGTTCCCGGGCCT 1093016
##   [2]  1326 GACGAACGCTGGCGGCGTGC...TGAATACGTTCCCGGGCCT 1083934
##   [3]  1334 GATGAACGCTGGCGGCACGC...TGAATGCGTTCCCGGGCCT 1075456
##   [4]  1345 GATGAACGCTAGCGGGAGGC...TGAATACGTTCCCGGGCCT 1023948
##   [5]  1504 GACGAACGCTGGCGGCGCGC...GGGGTTGATGATTGGGGTG 983909
##   ...   ... ...
## [245]  1422 TCCGGTTGATCCTGCCGGAG...TCGAAACTGGGCCTCGCGA 4327819
## [246]  1419 CACTGCTATTGGAGTCCGAC...GGGGTTGCGTGAGGGGGGC 4344031
## [247]  1343 CGGTTGATCCTGCCGAAGGC...CCTTGCACACACCGCCCGT 4357608
## [248]  1270 TAACGTGAAGACCGGGATAA...CGAGCAGGTTTTAGGTGAG 4437875
## [249]  1554 TTTTTTCTGAGAATTTGATC...GGGCTGGATCACCTCCTTT 4485266
## [1] "Taxonomy Data:"
## Source: sqlite 3.8.6 [/tmp/RtmpjUAPWW/Rinst17802a100cbe/metagenomeFeatures/extdata/demoTaxa.sqlite]
## From: taxa [249 x 8]
## 
##       Keys     Kingdom             Phylum             Class
##      (chr)       (chr)              (chr)             (chr)
## 1  4324716 k__Bacteria   p__Bacteroidetes               c__
## 2   246960 k__Bacteria  p__Planctomycetes c__028H05-P-BN-P5
## 3   222675 k__Bacteria p__Armatimonadetes       c__0319-6E2
## 4   156874 k__Bacteria            p__NC10          c__12-24
## 5  4383832 k__Bacteria            p__GN02         c__3BR-5F
## 6  4383502 k__Bacteria   p__Elusimicrobia           c__4-29
## 7   315344 k__Bacteria   p__Cyanobacteria         c__4C0d-2
## 8  2655590 k__Bacteria            p__GN04       c__5bav_B12
## 9   552241 k__Bacteria         p__SBR1093        c__A712011
## 10 4327819  k__Archaea   p__Crenarchaeota            c__AAG
## ..     ...         ...                ...               ...
## Variables not shown: Order (chr), Family (chr), Genus (chr), Species (chr)

MgDb Methods

taxa_keytypes

taxa_keytypes(demoMgDb)
## [1] "Keys"    "Kingdom" "Phylum"  "Class"   "Order"   "Family"  "Genus"  
## [8] "Species"
taxa_columns(demoMgDb)
## [1] "Keys"    "Kingdom" "Phylum"  "Class"   "Order"   "Family"  "Genus"  
## [8] "Species"
head(taxa_keys(demoMgDb, keytype = c("Kingdom")))
## Source: local data frame [6 x 1]
## 
##       Kingdom
##         (chr)
## 1 k__Bacteria
## 2 k__Bacteria
## 3 k__Bacteria
## 4 k__Bacteria
## 5 k__Bacteria
## 6 k__Bacteria

Select Methods

Used to retrieve db entries for a specified taxanomic group or id list, can return either taxonomic, sequences information, or both.

Selecting taxonomic information

select(demoMgDb, type = "taxa",
                keys = c("Vibrio", "Salmonella"),
                keytype = "Genus")
## Source: local data frame [0 x 8]
## 
## Variables not shown: Keys (chr), Kingdom (chr), Phylum (chr), Class (chr),
##   Order (chr), Family (chr), Genus (chr), Species (chr)

Selecting sequence information

select(demoMgDb, type = "seq",
                keys = c("Vibrio", "Salmonella"),
                keytype = "Genus")
##   A DNAStringSet instance of length 0

Selecting both

select(demoMgDb, type = "both",
                keys = c("Vibrio", "Salmonella"),
                keytype = "Genus")
## $taxa
## Source: local data frame [0 x 8]
## 
## Variables not shown: Keys (chr), Kingdom (chr), Phylum (chr), Class (chr),
##   Order (chr), Family (chr), Genus (chr), Species (chr)
## 
## $seq
##   A DNAStringSet instance of length 0
sessionInfo()
## R Under development (unstable) (2015-09-09 r69333)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.3 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] Biostrings_2.39.0        XVector_0.11.0          
##  [3] IRanges_2.5.0            S4Vectors_0.9.0         
##  [5] metagenomeSeq_1.13.0     RColorBrewer_1.1-2      
##  [7] glmnet_2.0-2             foreach_1.4.3           
##  [9] Matrix_1.2-2             limma_3.27.0            
## [11] metagenomeFeatures_1.1.0 Biobase_2.31.0          
## [13] BiocGenerics_0.17.0     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.1                formatR_1.2.1             
##  [3] futile.logger_1.4.1        GenomeInfoDb_1.7.0        
##  [5] bitops_1.0-6               futile.options_1.0.0      
##  [7] iterators_1.0.8            tools_3.3.0               
##  [9] zlibbioc_1.17.0            digest_0.6.8              
## [11] evaluate_0.8               RSQLite_1.0.0             
## [13] lattice_0.20-33            DBI_0.3.1                 
## [15] yaml_2.1.13                dplyr_0.4.3               
## [17] stringr_1.0.0              hwriter_1.3.2             
## [19] knitr_1.11                 caTools_1.17.1            
## [21] gtools_3.5.0               grid_3.3.0                
## [23] R6_2.1.1                   BiocParallel_1.5.0        
## [25] rmarkdown_0.8.1            gdata_2.17.0              
## [27] latticeExtra_0.6-26        lambda.r_1.1.7            
## [29] magrittr_1.5               gplots_2.17.0             
## [31] matrixStats_0.14.2         Rsamtools_1.23.0          
## [33] codetools_0.2-14           htmltools_0.2.6           
## [35] GenomicRanges_1.23.0       GenomicAlignments_1.7.0   
## [37] assertthat_0.1             ShortRead_1.29.0          
## [39] SummarizedExperiment_1.1.0 KernSmooth_2.23-15        
## [41] stringi_0.5-5              lazyeval_0.1.10