## ----knitrSetup, include=FALSE------------------------------------------------ library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center", tidy=TRUE) ## ----style, include=FALSE, echo=FALSE, results='asis'------------------------- BiocStyle::markdown() ## ----installPaxtoolsr, eval=FALSE--------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("paxtoolsr") ## ----loadLibrary, message=FALSE, warning=FALSE-------------------------------- library(paxtoolsr) ## ----searchHelp, eval=FALSE, tidy=FALSE--------------------------------------- # help.search("paxtoolsr") ## ----showHelp, eval=FALSE, tidy=FALSE----------------------------------------- # help(graphPc) # ?graphPc ## ----paxtoolsMergeExample----------------------------------------------------- file1 <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr") file2 <- system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr") mergedFile <- mergeBiopax(file1, file2) ## ----summarize, results='hold'------------------------------------------------ s1 <- summarize(file1) s2 <- summarize(file2) s3 <- summarize(mergedFile) s1$Catalysis s2$Catalysis s3$Catalysis ## ----paxtoolsValidationExample, eval=FALSE------------------------------------ # errorLog <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), onlyErrors=TRUE) ## ----loadSifFromFile---------------------------------------------------------- sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr")) ## ----viewSifHead-------------------------------------------------------------- head(sif) ## ----toSifnxExample, tidy=TRUE------------------------------------------------ # Select additional node and edge properties inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr") results <- toSifnx(inputFile=inputFile) ## ----downloadSif, eval=FALSE-------------------------------------------------- # results <- downloadPc2(version="12") ## ----paxtoolsrCache, eval=FALSE----------------------------------------------- # Sys.getenv("PAXTOOLSR_CACHE") ## ----searchResultsExample, eval=FALSE----------------------------------------- # ## Search Pathway Commons for "glycolysis"-related pathways # searchResults <- searchPc(q="glycolysis", type="pathway") ## ----searchResultsExampleVerbose---------------------------------------------- ## Search Pathway Commons for "glycolysis"-related pathways searchResults <- searchPc(q="glycolysis", type="pathway", verbose=TRUE) ## ----searchResultsType-------------------------------------------------------- str(searchResults) ## ----searchResultsXpathExample------------------------------------------------ xpathSApply(searchResults, "/searchResponse/searchHit/name", xmlValue)[1] xpathSApply(searchResults, "/searchResponse/searchHit/pathway", xmlValue)[1] ## ----convertXmlToDf, message=FALSE-------------------------------------------- library(plyr) searchResultsDf <- ldply(xmlToList(searchResults), data.frame) # Simplified results simplifiedSearchResultsDf <- searchResultsDf[, c("name", "uri", "biopaxClass")] head(simplifiedSearchResultsDf) ## ----saveBiopaxFileFromPcQuery, message=FALSE, results='hide'----------------- ## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results tmpSearchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue) ## Generate temporary file to save content into biopaxFile <- tempfile() ## Extract a URI for a pathway in the search results and save into a file idx <- which(grepl("panther", simplifiedSearchResultsDf$uri) & grepl("glycolysis", simplifiedSearchResultsDf$name, ignore.case=TRUE)) uri <- simplifiedSearchResultsDf$uri[idx] saveXML(getPc(uri, format="BIOPAX"), biopaxFile) ## ----traverse----------------------------------------------------------------- # Convert the Uniprot ID to a URI that would be found in Pathway Commons uri <- paste0("http://identifiers.org/uniprot/P31749") # Get URIs for only the ModificationFeatures of the protein xml <- traverse(uri=uri, path="ProteinReference/entityFeature:ModificationFeature") # Extract all the URIs uris <- xpathSApply(xml, "//value/text()", xmlValue) # For the first URI get the modification position and type tmpXml <- traverse(uri=uris[1], path="ModificationFeature/featureLocation:SequenceSite/sequencePosition") cat("Modification Position: ", xpathSApply(tmpXml, "//value/text()", xmlValue)) tmpXml <- traverse(uri=uris[1], path="ModificationFeature/modificationType/term") cat("Modification Type: ", xpathSApply(tmpXml, "//value/text()", xmlValue)) ## ----loadGraphLibraries, message=FALSE---------------------------------------- library(igraph) ## ----plotGraph---------------------------------------------------------------- sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr")) # graph.edgelist requires a matrix g <- graph.edgelist(as.matrix(sif[,c(1,3)]), directed=FALSE) plot(g, layout=layout.fruchterman.reingold) ## ----graphQueryExampleSingle, figure.width=20, figure.height=20--------------- gene <- "BDNF" t1 <- graphPc(source=gene, kind="neighborhood", format="SIF", verbose=TRUE) t2 <- t1[which(t1[,2] == "controls-state-change-of"),] # Show only 100 interactions for simplicity g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE) plot(g, layout=layout.fruchterman.reingold) ## ----graphQueryExample, fig.width=7, fig.height=7----------------------------- genes <- c("AKT1", "IRS1", "MTOR", "IGF1R") t1 <- graphPc(source=genes, kind="PATHSBETWEEN", format="SIF", verbose=TRUE) t2 <- t1[which(t1[,2] == "controls-state-change-of"),] # Show only 100 interactions for simplicity g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE) plot(g, layout=layout.fruchterman.reingold) ## ----dataOverlayExample, fig.width=7, fig.height=7---------------------------- library(RColorBrewer) # Generate a color palette that goes from white to red that contains 10 colors numColors <- 10 colors <- colorRampPalette(brewer.pal(9, "Reds"))(numColors) # Generate values that could represent some experimental values values <- runif(length(V(g)$name)) # Scale values to generate indicies from the color palette xrange <- range(values) newrange <- c(1, numColors) factor <- (newrange[2]-newrange[1]) / (xrange[2]-xrange[1]) scaledValues <- newrange[1] + (values-xrange[1]) * factor indicies <- as.integer(scaledValues) # Color the nodes based using the indicies and the color palette created above g <- set.vertex.attribute(g, "color", value=colors[indicies]) #get.vertex.attribute(h, "color") plot(g, layout = layout.fruchterman.reingold) ## ----sifNetStats-------------------------------------------------------------- # Degree for each node in the igraph network degree(g) #Number of nodes length(V(g)$name) #Clustering coefficient transitivity(g) #Network density graph.density(g) # Network diameter diameter(g) ## ----sifShortestPath---------------------------------------------------------- # Get the first shortest path between two nodes tmp <- get.shortest.paths(g, from="IRS1", to="MTOR") # igraph seems to return different objects on Linux versus OS X for get.shortest.paths() if(is(tmp[[1]], "list")) { path <- tmp[[1]][[1]] # Linux } else { path <- tmp[[1]] # OS X } # Convert from indicies to vertex names V(g)$name[path] ## ----gseaExampleLibraries, message=FALSE-------------------------------------- library(paxtoolsr) # To retrieve data from Pathway Commons library(clusterProfiler) # Enrichment analysis library(org.Hs.eg.db) library(XML) # To parse XML files ## ----gseaExampleGenGeneSet, results='hide', eval=FALSE------------------------ # # Generate a Gene Set # ## Search Pathway Commons for "glycolysis"-related pathways # searchResults <- searchPc(q="glycolysis", type="pathway") # # ## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results # searchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue) # # ## Generate temporary files to save content into # biopaxFile <- tempfile() # # ## Extract the URI for the first pathway in the search results and save into a file # uri <- searchResults[2] # saveXML(getPc(uri, "BIOPAX"), biopaxFile) ## ----gseaExampleGenGeneSetSave, results='hide'-------------------------------- ## Generate temporary files to save content into gseaFile <- tempfile() ## Generate a gene set for the BioPAX pathway with gene symbols ### NOTE: Not all search results are guaranteed to result in gene sets tmp <- toGSEA(biopaxFile, gseaFile, "HGNC Symbol", FALSE) geneSet <- tmp$geneSet ## ----gseaExample-------------------------------------------------------------- library(clusterProfiler) # Example gene list at the end of some end analysis geneList <- c("ALDOA", "ENO1", "GAPDH", "GPI", "HK1", "PFKL", "PGK1", "PKM") # Read Pathway Commons V12 KEGG dataset inluded with package gmt <- readGmt(system.file("extdata", "test_PathwayCommons12.kegg.hgnc.gmt", package = "paxtoolsr"), returnInfo = TRUE) geneSetList <- lapply(seq_along(gmt), function(x, n, i) { tmp <- x[[i]] data.frame(id=n[i], name=tmp[["name"]], gene=tmp[["geneSet"]], stringsAsFactors=FALSE) }, x=gmt, n=names(gmt)) tmp <- do.call("rbind", geneSetList) rownames(tmp) <- 1:nrow(tmp) # For convenience pc2gene <- tmp[, c("id", "gene")] pc2name <- tmp[, c("id", "name")] enrichOutput <- clusterProfiler::enricher(geneList, pvalueCutoff=0.05, minGSSize=10, maxGSSize=500, TERM2GENE=pc2gene, TERM2NAME=pc2name) enrichOutput@result ## ----idMapping, eval=FALSE---------------------------------------------------- # sif <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")) # # ids <- c(sif$PARTICIPANT_A, sif$PARTICIPANT_B) # # output <- clusterProfiler::bitr(ids, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db") # output ## ----filePathExample, eval=FALSE---------------------------------------------- # toSif("/directory/file") # #or # toSif("X:\\directory\\file") ## ----changeHeapSize, eval=FALSE----------------------------------------------- # options(java.parameters="-Xmx1024m") # # library(paxtoolsr) # # # Megabyte size # mbSize <- 1048576.0 # # runtime <- .jcall("java/lang/Runtime", "Ljava/lang/Runtime;", "getRuntime") # maxMemory <- .jcall(runtime, "J", "maxMemory") # maxMemoryMb <- maxMemory / mbSize # cat("Max Memory: ", maxMemoryMb, "\n") ## ----sessionInfo-------------------------------------------------------------- sessionInfo()