### R code from vignette source 'usingPathprintGEOData.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: usingPathprintGEOData.Rnw:61-119 ################################################### # use the pathprint library library(pathprint) library(SummarizedExperiment) library(pathprintGEOData) # load the data data(SummarizedExperimentGEO) ds = c("chipframe", "genesets","pathprint.Hs.gs","platform.thresholds", "pluripotents.frame") data(list = ds) # see available platforms names(chipframe) # extract GEO.fingerprint.matrix and GEO.metadata.matrix GEO.fingerprint.matrix = assays(geo_sum_data)$fingerprint GEO.metadata.matrix = colData(geo_sum_data) # create consensus fingerprint for pluripotent samples pluripotent.consensus<-consensusFingerprint( GEO.fingerprint.matrix[,pluripotents.frame$GSM], threshold=0.9) # calculate distance from the pluripotent consensus geo.pluripotentDistance<-consensusDistance( pluripotent.consensus, GEO.fingerprint.matrix) # plot histograms par(mfcol = c(2,1), mar = c(0, 4, 4, 2)) geo.pluripotentDistance.hist<-hist( geo.pluripotentDistance[,"distance"], nclass = 50, xlim = c(0,1), main = "Distance from pluripotent consensus") par(mar = c(7, 4, 4, 2)) hist(geo.pluripotentDistance[ pluripotents.frame$GSM, "distance"], breaks = geo.pluripotentDistance.hist$breaks, xlim = c(0,1), main = "", xlab = "above: all GEO, below: pluripotent samples") # annotate top 100 matches not in original seed with metadata geo.pluripotentDistance.noSeed<-geo.pluripotentDistance[ !(rownames(geo.pluripotentDistance) %in% pluripotents.frame$GSM), ] top.noSeed.meta<-GEO.metadata.matrix[ match( head(rownames(geo.pluripotentDistance.noSeed), 100), rownames(GEO.metadata.matrix)), ] print(top.noSeed.meta[, c(1:4)])