## ----setup, include=FALSE------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(cache = TRUE) ## ---- message=FALSE------------------------------------------------------ # BiocInstaller::biocLite("curatedMetagenomicData") #Bioconductor version # BiocInstaller::biocLite("vobencha/curatedMetagenomicData") #bleeding edge version library(curatedMetagenomicData) ## ------------------------------------------------------------------------ suppressPackageStartupMessages(library(ExperimentHub)) eh = ExperimentHub() ## ------------------------------------------------------------------------ myquery = query(eh, "curatedMetagenomicData") ## ------------------------------------------------------------------------ myquery ## ---- eval=FALSE--------------------------------------------------------- # View(mcols(myquery)) ## ---- eval=FALSE--------------------------------------------------------- # write.csv(mcols(myquery), file="curatedMetagenomicData_allrecords.csv") ## ------------------------------------------------------------------------ head(myquery$tags) ## ------------------------------------------------------------------------ grep("(?=HMP)(?=.*metaphlan)", myquery$title, perl=TRUE, val=TRUE) ## ------------------------------------------------------------------------ grep("(?=.*stool)(?=.*metaphlan)", myquery$title, perl=TRUE, val=TRUE) ## ------------------------------------------------------------------------ (lomannames = grep("LomanNJ_2013_Hi", myquery$title, perl=TRUE, val=TRUE)) ## ---- eval=FALSE--------------------------------------------------------- # (idx = grep("LomanNJ_2013_Hi", myquery$title, perl=TRUE)) # loman.list = lapply(idx, function(i){ # return(myquery[[i]]) # }) # names(loman.list) = lomannames # loman.list ## ------------------------------------------------------------------------ suppressPackageStartupMessages(library(curatedMetagenomicData)) loman.eset = LomanNJ_2013_Hi.metaphlan_bugs_list.stool() ## ------------------------------------------------------------------------ experimentData( loman.eset ) ## ------------------------------------------------------------------------ head( pData( loman.eset ) ) ## ------------------------------------------------------------------------ exprs( loman.eset )[1:6, 1:5] #first 6 rows and 5 columns ## ------------------------------------------------------------------------ grep("coli", rownames(loman.eset), value=TRUE) ## ------------------------------------------------------------------------ x = exprs( loman.eset )[grep("s__Escherichia_coli$", rownames( loman.eset)), ] summary( x ) ## ------------------------------------------------------------------------ hist( x, xlab = "Relative Abundance", main="Prevalence of E. Coli", breaks="FD") ## ---- warning=FALSE------------------------------------------------------ loman.pseq = ExpressionSet2phyloseq( loman.eset ) ## ------------------------------------------------------------------------ loman.counts = sweep(exprs( loman.eset ), 2, loman.eset$number_reads, "*") loman.counts[1:6, 1:5] ## ------------------------------------------------------------------------ loman.pseq2 = ExpressionSet2phyloseq( loman.eset, relab=FALSE ) otu_table( loman.pseq2 )[1:6, 1:5] ## ------------------------------------------------------------------------ loman.pseq ## ---- warning=FALSE------------------------------------------------------ otu_table( loman.pseq )[1:6, 1:5] ## ---- warning=FALSE------------------------------------------------------ sample_data( loman.pseq )[1:6, 1:5] ## ---- warning=FALSE------------------------------------------------------ head( tax_table( loman.pseq ) ) ## ---- warning=FALSE------------------------------------------------------ rank_names( loman.pseq ) ## ---- warning=FALSE------------------------------------------------------ subset_taxa( loman.pseq, !is.na(Species)) ## ------------------------------------------------------------------------ subset_taxa( loman.pseq, is.na(Class) & !is.na(Phylum)) ## ------------------------------------------------------------------------ loman.bd = subset_taxa( loman.pseq, Phylum == "Bacteroidetes") head( taxa_names( loman.bd ) ) ## ---- warning=FALSE------------------------------------------------------ keepotu = genefilter_sample(loman.pseq, filterfun_sample(topp(0.05)), A=5) summary(keepotu) subset_taxa(loman.pseq, keepotu) ## ---- warning=FALSE------------------------------------------------------ loman.filt = subset_taxa(loman.pseq, keepotu & !is.na(Strain)) plot_heatmap(loman.filt, method="PCoA", distance="bray") ## ---- warning=FALSE------------------------------------------------------ loman.sp = subset_taxa(loman.pseq, !is.na(Species) & is.na(Strain)) par(mar = c(20, 4, 0, 0) + 0.15) #increase margin size on the bottom barplot(sort(taxa_sums(loman.sp), TRUE)[1:20] / nsamples(loman.sp), ylab = "Total counts", las = 2) ## ---- warning=FALSE------------------------------------------------------ alphas = c("Shannon", "Simpson", "InvSimpson") plot_richness(loman.sp, "stooltexture", measures = alphas) ## ---- warning=FALSE------------------------------------------------------ pairs( estimate_richness(loman.sp, measures = alphas) ) ## ---- warning=FALSE------------------------------------------------------ mydist = distance(loman.sp, method="bray") myhclust = hclust( mydist ) plot(myhclust, main="Bray-Curtis Dissimilarity", method="ward.D", xlab="Samples", sub = "") ## ---- warning=FALSE------------------------------------------------------ ordinated_taxa = ordinate(loman.sp, method="PCoA", distance="bray") plot_ordination(loman.sp, ordinated_taxa, color="stooltexture", title = "Bray-Curtis Principal Coordinates Analysis") ## ------------------------------------------------------------------------ plot_scree(ordinated_taxa, title="Screeplot")