## ---- fig1, dpi=300, fig.width=10, fig.height=10, fig.cap="Overview of the resources featured in OmniPath. Causal resources (including activity-flow and enzyme-substrate resources) can provide direction (*) or sign and direction (+) of interactions.", echo=FALSE---- library(knitr) knitr::include_graphics("../man/figures/page1_1.png") ## ----installation, eval=FALSE------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("OmnipathR") ## ----libraries, message=FALSE------------------------------------------------- library(OmnipathR) library(tidyr) library(dnet) library(gprofiler2) ## ----interactions------------------------------------------------------------- ## We check some of the different interaction databases get_interaction_resources() ## The interactions are stored into a data frame. interactions <- import_omnipath_interactions(resources=c("SignaLink3","PhosphoSite", "SIGNOR")) ## We visualize the first interactions in the data frame. print_interactions(head(interactions)) ## ----sp, message=TRUE--------------------------------------------------------- ## We transform the interactions data frame into a graph OPI_g <- interaction_graph(interactions = interactions) ## Find and print shortest paths on the directed network between proteins ## of interest: print_path_es(shortest_paths(OPI_g,from = "TYRO3",to = "STAT3", output = 'epath')$epath[[1]],OPI_g) ## Find and print all shortest paths between proteins of interest: print_path_vs(all_shortest_paths(OPI_g,from = "DYRK2", to = "MAPKAPK2")$res,OPI_g) ## ----clustering, message=FALSE------------------------------------------------ ## We apply a clustering algorithm (Louvain) to group proteins in ## our network. We apply here Louvain which is fast but can only run ## on undirected graphs. Other clustering algorithms can deal with ## directed networks but with longer computational times, ## such as cluster_edge_betweenness. These cluster methods are directly ## available in the igraph package. OPI_g_undirected <- as.undirected(OPI_g, mode=c("mutual")) OPI_g_undirected <- simplify(OPI_g_undirected) cl_results <- cluster_fast_greedy(OPI_g_undirected) ## We extract the cluster where a protein of interest is contained cluster_id <- cl_results$membership[which(cl_results$names == "ERBB2")] module_graph <- induced_subgraph(OPI_g_undirected, V(OPI_g)$name[which(cl_results$membership == cluster_id)]) ## ----pathwayextra------------------------------------------------------------- ## We query and store the interactions into a dataframe interactions <- import_pathwayextra_interactions(resources=c("BioGRID","STRING"), organism = 10090) ## We select all the interactions in which Amfr gene is involved interactions_Amfr <- dplyr::filter(interactions, source_genesymbol == "Amfr" | target_genesymbol == "Amfr") ## We print these interactions: print_interactions(interactions_Amfr) ## ----kinaseextra-------------------------------------------------------------- ## We query and store the interactions into a dataframe interactions <- import_kinaseextra_interactions(resources=c("PhosphoPoint", "PhosphoSite"), organism = 10116) ## We select the interactions in which Dpysl2 gene is a target interactions_TargetDpysl2 <- dplyr::filter(interactions, target_genesymbol == "Dpysl2") ## We print these interactions: print_interactions(interactions_TargetDpysl2) ## ----ligrecextra-------------------------------------------------------------- ## We query and store the interactions into a dataframe interactions <- import_ligrecextra_interactions(resources=c("iTALK", "Baccin2019"), organism=9606) ## Receptors of the CDH1 ligand. interactions_ADM2 <- dplyr::filter(interactions, source_genesymbol == "ADM2") ## We transform the interactions data frame into a graph OPI_g <- interaction_graph(interactions = interactions_ADM2) ## We induce a network with these genes Induced_Network <- dNetInduce(g=OPI_g, nodes_query=as.character( V(OPI_g)$name), knn=0, remove.loops=FALSE, largest.comp=FALSE) ## ----dorothea----------------------------------------------------------------- ## We query and store the interactions into a dataframe interactions <- import_dorothea_interactions( resources=c("DoRothEA"), dorothea_levels = 'A', organism=9606 ) ## We select the most confident interactions for a given TF and we print ## the interactions to check the way it regulates its different targets interactions_A_GLI1 <- dplyr::filter(interactions, dorothea_level=="A", source_genesymbol == "GLI1") print_interactions(interactions_A_GLI1) ## ----mirnatarget-------------------------------------------------------------- ## We query and store the interactions into a dataframe interactions <- import_mirnatarget_interactions(resources=c("miRTarBase","miRecords")) ## We select the interactions where a miRNA is interacting with the TF ## used in the previous code chunk and we print these interactions. interactions_miRNA_GLI1 <- dplyr::filter(interactions, target_genesymbol == "GLI1") print_interactions(interactions_miRNA_GLI1) ## We transform the previous selections to graphs (igraph objects) OPI_g_1 <-interaction_graph(interactions = interactions_A_GLI1) OPI_g_2 <-interaction_graph(interactions = interactions_miRNA_GLI1) ## ----PTMs--------------------------------------------------------------------- ## We check the different PTMs databases get_enzsub_resources() ## We query and store the ptms into a dataframe. No filtering by ## databases in this case. ptms <- import_omnipath_enzsub() ## We can select and print the reactions between a specific kinase and ## a specific substrate print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1", substrate_genesymbol=="MAPK3")) ## In the previous results, we can see that ptms does not contain sign ## (activation/inhibition). We can generate this information based on the ## protein-protein OmniPath interaction dataset. interactions <- import_omnipath_interactions() ptms <- get_signed_ptms(ptms, interactions) ## We select again the same kinase and substrate. Now we have information ## about inhibition or activation when we print the ptms print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1", substrate_genesymbol=="MAPK3")) ## We can also transform the ptms into a graph. ptms_g <- ptms_graph(ptms = ptms) ## We download PTMs for mouse ptms <- import_omnipath_enzsub(resources=c("PhosphoSite", "SIGNOR"), organism=10090) ## ----complexes---------------------------------------------------------------- ## We check the different complexes databases get_complex_resources() ## We query and store complexes from some sources into a dataframe. complexes <- import_omnipath_complexes(resources=c("CORUM", "hu.MAP")) ## We check all the molecular complexes where a set of genes participate query_genes <- c("WRN","PARP1") ## Complexes where any of the input genes participate complexes_query_genes_any <- unique(get_complex_genes(complexes,query_genes, total_match=FALSE)) ## We print the components of the different selected components head(complexes_query_genes_any$components_genesymbols,6) ## Complexes where all the input genes participate jointly complexes_query_genes_join <- unique(get_complex_genes(complexes,query_genes, total_match=TRUE)) ## We print the components of the different selected components complexes_query_genes_join$components_genesymbols ## ----enrichment--------------------------------------------------------------- genes_complex <- unlist(strsplit(complexes_query_genes_join$components_genesymbols, "_")) ## We can perform an enrichment analyses with the genes in the complex EnrichmentResults <- gost(genes_complex, significant = TRUE, user_threshold = 0.001, correction_method = c("fdr"), sources=c("GO:BP","GO:CC","GO:MF")) ## We show the most significant results EnrichmentResults$result %>% dplyr::select(term_id, source, term_name,p_value) %>% dplyr::top_n(5,-p_value) ## ----complex_annotations------------------------------------------------------ ## We check the different annotation databases get_annotation_resources() ## We can further investigate the features of the complex selected ## in the previous section. ## We first get the annotations of the complex itself: annotations <- import_omnipath_annotations(proteins=paste0("COMPLEX:", complexes_query_genes_join$components_genesymbols)) head(dplyr::select(annotations,source,label,value),10) ## ----annotations_components--------------------------------------------------- ## Then, we explore some annotations of its individual components ## Pathways where the proteins belong: annotations <- import_omnipath_annotations(proteins=genes_complex, resources=c("NetPath")) dplyr::select(annotations,genesymbol,value) ## Cellular localization of our proteins annotations <-import_omnipath_annotations(proteins=genes_complex, resources=c("ComPPI")) ## Since we have same record_id for some results of our query, we spread ## these records across columns spread(annotations, label, value) %>% dplyr::arrange(desc(score)) %>% dplyr::top_n(10, score) ## ----intercell---------------------------------------------------------------- ## We check some of the different intercell categories get_intercell_generic_categories() ## We import the intercell data into a dataframe intercell <- import_omnipath_intercell(scope = 'generic', aspect = 'locational') ## We check the intercell annotations for the individual components of ## our previous complex. We filter our data to print it in a good format dplyr::filter(intercell,genesymbol %in% genes_complex) %>% dplyr::distinct(genesymbol, parent, .keep_all = TRUE) %>% dplyr::select(category, genesymbol, parent) %>% dplyr::arrange(genesymbol) ## We close graphical connections while (!is.null(dev.list())) dev.off() ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()