## ----setup, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(cache = TRUE) suppressPackageStartupMessages(library(curatedMetagenomicData)) ## ---- message=FALSE-------------------------------------------------------- # BiocManager::install("curatedMetagenomicData") library(curatedMetagenomicData) ## ----eval=FALSE------------------------------------------------------------ # ?combined_metadata # View(combined_metadata) ## -------------------------------------------------------------------------- table(combined_metadata$antibiotics_current_use) table(combined_metadata$disease) ## -------------------------------------------------------------------------- dsranking <- combined_metadata %>% as.data.frame() %>% group_by(dataset_name) %>% summarize(mediandepth = median(number_reads) / 1e6) %>% mutate(dsorder = rank(mediandepth)) %>% arrange(dsorder) dsranking ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(ggplot2)) combined_metadata %>% as.data.frame() %>% mutate(ds = factor(combined_metadata$dataset_name, levels=dsranking$dataset_name)) %>% ggplot(aes(ds, number_reads / 1e6, fill=body_site)) + geom_boxplot() + theme(axis.text.x = element_text(angle=45, hjust=1)) + labs(x="Dataset", y="Read Depth (millions)") ## ---- message=FALSE-------------------------------------------------------- suppressPackageStartupMessages(library(curatedMetagenomicData)) loman.eset = LomanNJ_2013.metaphlan_bugs_list.stool() ## -------------------------------------------------------------------------- loman <- curatedMetagenomicData("LomanNJ_2013.metaphlan_bugs_list.stool", dryrun = FALSE) loman loman.eset <- loman[[1]] loman.eset ## ---- message=FALSE, results='hide'---------------------------------------- oral <- c("BritoIL_2016.metaphlan_bugs_list.oralcavity", "Castro-NallarE_2015.metaphlan_bugs_list.oralcavity") esl <- curatedMetagenomicData(oral, dryrun = FALSE) esl esl[[1]] esl[[2]] ## ---- eval=TRUE------------------------------------------------------------ curatedMetagenomicData("*metaphlan_bugs_list.stool*", dryrun = TRUE) ## -------------------------------------------------------------------------- eset <- mergeData(esl) eset ## -------------------------------------------------------------------------- experimentData( loman.eset ) ## -------------------------------------------------------------------------- head( pData( loman.eset ) ) ## -------------------------------------------------------------------------- exprs( loman.eset )[1:6, 1:5] #first 6 rows and 5 columns ## -------------------------------------------------------------------------- loman.counts = sweep(exprs( loman.eset ), 2, loman.eset$number_reads / 100, "*") loman.counts = round(loman.counts) loman.counts[1:6, 1:5] ## -------------------------------------------------------------------------- loman.eset2 = curatedMetagenomicData("LomanNJ_2013.metaphlan_bugs_list.stool", counts = TRUE, dryrun = FALSE)[[1]] all.equal(exprs(loman.eset2), loman.counts) ## -------------------------------------------------------------------------- grep("coli", rownames(loman.eset), value=TRUE) ## -------------------------------------------------------------------------- x = exprs( loman.eset )[grep("s__Escherichia_coli$", rownames( loman.eset)), ] summary( x ) ## -------------------------------------------------------------------------- hist( x, xlab = "Relative Abundance", main="Prevalence of E. Coli", breaks="FD") ## ---- warning=FALSE-------------------------------------------------------- suppressPackageStartupMessages(library(phyloseq)) loman.pseq = ExpressionSet2phyloseq( loman.eset ) ## -------------------------------------------------------------------------- loman.tree <- ExpressionSet2phyloseq( loman.eset, phylogenetictree = TRUE) ## -------------------------------------------------------------------------- wt = UniFrac(loman.tree, weighted=TRUE, normalized=FALSE, parallel=FALSE, fast=TRUE) plot(hclust(wt), main="Weighted UniFrac distances") ## -------------------------------------------------------------------------- loman.pseq ## ---- warning=FALSE-------------------------------------------------------- otu_table( loman.pseq )[1:6, 1:5] ## ---- warning=FALSE-------------------------------------------------------- sample_data( loman.pseq )[1:6, 1:5] ## ---- warning=FALSE-------------------------------------------------------- head( tax_table( loman.pseq ) ) ## ---- warning=FALSE-------------------------------------------------------- rank_names( loman.pseq ) ## ---- warning=FALSE-------------------------------------------------------- subset_taxa( loman.pseq, !is.na(Species)) ## -------------------------------------------------------------------------- subset_taxa( loman.pseq, is.na(Class) & !is.na(Phylum)) ## -------------------------------------------------------------------------- loman.bd = subset_taxa( loman.pseq, Phylum == "Bacteroidetes") head( taxa_names( loman.bd ) ) ## ---- warning=FALSE-------------------------------------------------------- keepotu = genefilter_sample(loman.pseq, filterfun_sample(topp(0.05)), A=5) summary(keepotu) subset_taxa(loman.pseq, keepotu) ## ---- warning=FALSE-------------------------------------------------------- loman.filt = subset_taxa(loman.pseq, keepotu & !is.na(Strain)) plot_heatmap(loman.filt, method="PCoA", distance="bray") ## ---- warning=FALSE-------------------------------------------------------- loman.sp = subset_taxa(loman.pseq, !is.na(Species) & is.na(Strain)) par(mar = c(20, 4, 0, 0) + 0.15) #increase margin size on the bottom barplot(sort(taxa_sums(loman.sp), TRUE)[1:20] / nsamples(loman.sp), ylab = "Total counts", las = 2) ## ---- warning=FALSE-------------------------------------------------------- alphas = c("Shannon", "Simpson", "InvSimpson") plot_richness(loman.sp, "stool_texture", measures = alphas) ## ---- warning=FALSE-------------------------------------------------------- pairs( estimate_richness(loman.sp, measures = alphas) ) ## ---- warning=FALSE-------------------------------------------------------- mydist = distance(loman.sp, method="bray") myhclust = hclust( mydist ) plot(myhclust, main="Bray-Curtis Dissimilarity", method="ward.D", xlab="Samples", sub = "") ## ---- warning=FALSE-------------------------------------------------------- ordinated_taxa = ordinate(loman.sp, method="PCoA", distance="bray") plot_ordination(loman.sp, ordinated_taxa, color="stool_texture", title = "Bray-Curtis Principal Coordinates Analysis") ## -------------------------------------------------------------------------- plot_scree(ordinated_taxa, title="Screeplot") ## -------------------------------------------------------------------------- suppressPackageStartupMessages(library(ExperimentHub)) eh = ExperimentHub() ## -------------------------------------------------------------------------- myquery = query(eh, "curatedMetagenomicData") ## -------------------------------------------------------------------------- myquery ## ---- eval=FALSE----------------------------------------------------------- # View(mcols(myquery)) ## ---- eval=FALSE----------------------------------------------------------- # write.csv(mcols(myquery), file="curatedMetagenomicData_allrecords.csv") ## -------------------------------------------------------------------------- head(myquery$tags) ## -------------------------------------------------------------------------- grep("(?=HMP)(?=.*metaphlan)", myquery$title, perl=TRUE, val=TRUE) ## -------------------------------------------------------------------------- grep("(?=.*stool)(?=.*metaphlan)", myquery$title, perl=TRUE, val=TRUE) ## -------------------------------------------------------------------------- (lomannames = grep("LomanNJ_2013", myquery$title, perl=TRUE, val=TRUE)) ## ---- eval=FALSE----------------------------------------------------------- # (idx = grep("LomanNJ_2013", myquery$title, perl=TRUE)) # loman.list = lapply(idx, function(i){ # return(myquery[[i]]) # }) # names(loman.list) = lomannames # loman.list