## ----include=FALSE, cache=FALSE------------------------------------------ library(ggplot2) library(knitr) library(rTRM) library(org.Mm.eg.db) opts_chunk$set(fig.align = 'center', fig.width = 3, fig.height = 3, fig.show = 'hold', tidy = TRUE, comment = "", highlight = FALSE, prompt = TRUE) #options(replace.assign=TRUE,width = 80) knit_hooks$set(no.mar = function(before, options, envir) { if (before) par(mar = rep(0,4)) # no margins. }) ## ----fig.cap = "Identification of a TRM 1from a test network (left). In the resulting TRM (right) dark blue indicates the target node light blue are query nodes and white nodes are bridge nodes", no.mar = TRUE---- # load the rTRM package library(rTRM) # load network example. load(system.file(package = "rTRM", "extra/example.rda")) # plot network plot(g,vertex.size=20,vertex.label.cex=.8,layout=layout.graphopt) # define target and query nodes: target = "N6" query = c("N7", "N12", "N28") # find TRM: s = findTRM(g, target = target, query = query, method = "nsa", max.bridge = 1) # annotate nodes: V(s)$color = "white" V(s)[ name %in% query]$color = "steelblue2" V(s)[ name %in% target]$color = "steelblue4" # plot: plot(s,vertex.size=20,vertex.label.cex=.8) ## ------------------------------------------------------------------------ pwm = getMatrices() head(pwm, 1) ## ------------------------------------------------------------------------ ann = getAnnotations() head(ann) ## ------------------------------------------------------------------------ map = getMaps() head(map) ## ------------------------------------------------------------------------ o = getOrthologs(organism = "mouse") head(o) ## ------------------------------------------------------------------------ getOrthologFromMatrix("MA0009.1", organism="human") getOrthologFromMatrix("MA0009.1", organism="mouse") ## ------------------------------------------------------------------------ # check statistics about the network. biogrid_mm() # load mouse PPI network: data(biogrid_mm) ## ----eval = FALSE-------------------------------------------------------- # # obtain dataset. # db = getBiogridData() # retrieves latest release. # # db = getBiogridData("3.2.96") # to get a specific release. # # # check release: # db$release # db$data # # # process PPI data for different organisms (currently supported human and mouse): # biogrid_hs = processBiogrid(db, org = "human") # biogrid_mm = processBiogrid(db, org = "mouse") ## ----eval=FALSE---------------------------------------------------------- # library(PSICQUIC) # psicquic <- PSICQUIC() # providers(psicquic) # # # obtain BioGrid human PPIs (as data.frame): # tbl = interactions(psicquic, species="9606",provider="BioGrid") # # # the target and source node information needs to be polished (i.e. must be Entrez gene id only) # biogrid_hs = data.frame(source=tbl$A,target=tbl$B) # biogrid_hs$source = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$source) # biogrid_hs$target = sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$target) # # # create graph. # library(igraph) # biogrid_hs=graph.data.frame(biogrid_hs,directed=FALSE) # biogrid_hs=simplify(biogrid_hs) # # # annotate with symbols. # library(org.Hs.eg.db) # V(biogrid_hs)$label=select(org.Hs.eg.db,keys=V(biogrid_hs)$name,columns=c("SYMBOL"))$SYMBOL ## ------------------------------------------------------------------------ # read motif enrichment results. motif_file = system.file("extra/sox2_motif_list.rda", package = "rTRM") load(motif_file) length(motif_list) head(motif_list) ## ------------------------------------------------------------------------ # get the corresponding gene. tfs_list = getOrthologFromMatrix(motif_list, organism = "mouse") tfs_list = unique(unlist(tfs_list, use.names = FALSE)) length(tfs_list) head(tfs_list) ## ------------------------------------------------------------------------ # load expression data. eg_esc_file = system.file("extra/ESC-expressed.txt", package = "rTRM") eg_esc = scan(eg_esc_file, what = "") length(eg_esc) head(eg_esc) tfs_list_esc = tfs_list[tfs_list %in% eg_esc] length(tfs_list_esc) head(tfs_list_esc) ## ------------------------------------------------------------------------ # load and process PPI data. biogrid_mm() data(biogrid_mm) ppi = biogrid_mm vcount(ppi) ecount(ppi) # remove outliers. f = c("Ubc", "Sumo1", "Sumo2", "Sumo3") f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID f ppi = removeVertices(ppi, f) vcount(ppi) ecount(ppi) # filter by expression. ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ]) vcount(ppi_esc) ecount(ppi_esc) # ensure a single component. ppi_esc = getLargestComp(ppi_esc) vcount(ppi_esc) ecount(ppi_esc) ## ------------------------------------------------------------------------ # define target. target = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID target # find TRM. s = findTRM(ppi_esc, target, tfs_list_esc, method = "nsa", max.bridge = 1) vcount(s) ecount(s) ## ----fig.cap = "Sox2 specific TRM in ESCs."------------------------------ # generate layout (order by cluster, then label) cl = getConcentricList(s, target, tfs_list_esc) l = layout.concentric(s, cl, order = "label") # plot TRM. plotTRM(s, layout = l, vertex.cex = 15, label.cex = .8) plotTRMlegend(s, title = "ESC Sox2 TRM", cex = .8) ## ------------------------------------------------------------------------ library(rTRM) library(BSgenome.Mmusculus.UCSC.mm8.masked) # Sox2 peaks found against mm8 library(PWMEnrich) registerCoresPWMEnrich(1) # register number of cores for parallelization in PWMEnrich library(MotifDb) # select mouse PWMs: sel.mm = values(MotifDb)$organism %in% c("Mmusculus") pwm.mm = MotifDb[sel.mm] ## ------------------------------------------------------------------------ # generate logn background model of PWMs: p = as.list(pwm.mm) p = lapply(p, function(x) round(x * 100)) p = lapply(p, function(x) t(apply(x, 1, as.integer))) ## ----eval=FALSE---------------------------------------------------------- # pwm_logn = makeBackground(p, Mmusculus, type = "logn") ## ----echo=FALSE---------------------------------------------------------- load(system.file("extra/pwm_mm_logn.rda", package = "rTRM")) ## ------------------------------------------------------------------------ sox2_bed = read.table(system.file("extra/ESC_Sox2_peaks.txt", package = "rTRM")) colnames(sox2_bed) = c("chr", "start", "end") sox2_seq = getSequencesFromGenome(sox2_bed, Mmusculus, append.id="Sox2") # PWMEnrich throws an error if the sequences are shorter than the motifs so we filter those sequences. min.width = max(sapply(p, ncol)) sox2_seq_filter = sox2_seq[width(sox2_seq) >= min.width] ## ----eval=FALSE---------------------------------------------------------- # # find enrichment: # sox2_enr = motifEnrichment(sox2_seq_filter, pwms=pwm_logn, group.only=TRUE) ## ----echo=FALSE---------------------------------------------------------- load(system.file("extra/sox2_enr.rda", package = "rTRM")) ## ----fig.cap="Density of log2(raw.score) for group. The selected cutoff is indicated with a red line."---- res = groupReport(sox2_enr) plot(density(res$raw.score),main="",log="x",xlab="log(raw.score)") abline(v=log2(5),col="red") mtext(text="log2(5)",at=log2(5),side=3,cex=.8,col="red") res.gene = unique(values(MotifDb[res$id[res$raw.score > 5]])$geneId) res.gene = unique(na.omit(res.gene)) ## ----fig.cap="Sox2 TRM identified using PWMEnrich for the motif enrichment."---- data(biogrid_mm) ppi = biogrid_mm vcount(ppi) ecount(ppi) f = c("Ubc", "Sumo1", "Sumo2", "Sumo3") f=select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID ppi = removeVertices(ppi, f) vcount(ppi) ecount(ppi) # filter by expression. eg_esc = scan(system.file("extra/ESC-expressed.txt", package = "rTRM"), what = "") ppi_esc = induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ]) vcount(ppi_esc) ecount(ppi_esc) # ensure a single component. ppi_esc = getLargestComp(ppi_esc) vcount(ppi_esc) ecount(ppi_esc) sox2.gene = select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID sox2_trm = findTRM(ppi_esc, target=sox2.gene, query = res.gene) cl = getConcentricList(sox2_trm, t=sox2.gene,e=res.gene) l = layout.concentric(sox2_trm, concentric=cl, order="label") plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .8) plotTRMlegend(sox2_trm, title = "ESC Sox2 TRM", cex = .8) ## ----fig.cap="Similarity between the TRMs predicted using motif enrichment information from PWMEnrich and HOMER."---- m=getSimilarityMatrix(list(PWMEnrich=sox2_trm,HOMER=s)) m d=as.data.frame.table(m) g = ggplot(d, aes(x = Var1, y = Var2, fill = Freq)) g + geom_tile() + scale_fill_gradient2(limit = c(0, 100), low = "white", mid = "darkblue", high = "orange", guide = guide_legend("similarity", reverse=TRUE), midpoint = 50) + xlab(NULL) + ylab(NULL) + theme(aspect.ratio = 1, axis.text.x=element_text(angle = 90, vjust = .5, hjust = 1)) ## ----fig.width=2.5,fig.height=2.5,fig.cap="Sox2 TRM obtained with PWMEnrich workflow and layout.concentric is shown in the left. Same TRM with layout.arc is shown in the right."---- plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .7) l=layout.arc(sox2_trm,target=sox2.gene,query=res.gene) plotTRM(sox2_trm, layout=l,vertex.cex=15,label.cex=.7) ## ------------------------------------------------------------------------ citation(package="rTRM") ## ------------------------------------------------------------------------ sessionInfo()