OmnipathR 2.0.0
In many applications we would like to understand how a specific drug interacts with the protein signaling network through its targets.
library(dplyr)
library(ggplot2)
library(OmnipathR)
library(igraph)
library(ggraph)
We query protein-protein interactions from the webservice of OmniPath [1,2] at https://omnipathdb.org/ using OmnipathR package:
# Download protein-protein interactions
interactions = import_omnipath_interactions() %>% as_tibble()
## Downloaded 41419 interactions.
# Convert to igraph objects:
OPI_g = interaction_graph(interactions = interactions )
For direct drug targets we will use DrugBank [3] database accessed via the dbparser package. Please note, that the following few chuncks of code is not evaluated. DrugBank requires registrations to access the data, therefore we ask the reader to register at DrugBank and download the data from here.
The next block of code is used to process the DrugBank dataset.
library(dbparser)
library(XML)
## parse data from XML and save it to memory
get_xml_db_rows("..path-to-DrugBank/full database.xml")
## load drugs data
drugs <- parse_drug() %>% select(primary_key, name)
drugs <- rename(drugs,drug_name = name)
## load drug target data
drug_targets <- parse_drug_targets() %>%
select(id, name,organism,parent_key) %>%
rename(target_name = name)
## load polypeptide data
drug_peptides <- parse_drug_targets_polypeptides() %>%
select(id, name, general_function, specific_function,
gene_name, parent_id) %>%
rename(target_name = name, gene_id = id)
# join the 3 datasets
drug_targets_full <- inner_join(drug_targets, drug_peptides,
by=c("id"="parent_id", "target_name")) %>%
inner_join(drugs, by=c("parent_key"="primary_key")) %>%
select(-other_keys)
Here we declare the names of drugs of interest.
drug_names = c("Valproat" = "Valproic Acid",
"Diclofenac" = "Diclofenac",
"Paracetamol" = "Acetaminophen",
"Ciproflaxin" = "Ciprofloxacin",
"Nitrofurantoin"= "Nitrofurantoin",
"Tolcapone",
"Azathioprine",
"Troglitazone",
"Nefazodone",
"Ketoconazole",
"Omeprazole",
"Phenytoin",
"Amiodarone",
"Cisplatin",
"Cyclosporin A" = "Cyclosporine",
"Verapamil",
"Buspirone",
"Melatonin",
"N-Acetylcysteine"= "Acetylcysteine",
"Vitamin C" = "Ascorbic acid",
"Famotidine",
"Vancomycin")
drug_target_data_sample <- drug_targets_full %>%
filter(organism == "Humans",drug_name %in% drug_names)
We only use a small sample of the database:
drug_targets <- OmnipathR:::drug_target_data_sample %>%
filter(organism == "Humans",drug_name %in% drug_names)
Check which drug targets are in Omnipath
drug_targets <- drug_targets %>%
select(-target_name, -organism) %>%
mutate(in_OP = gene_id %in% c(interactions$source))
# not all drug-targets are in OP.
print(all(drug_targets$in_OP))
## [1] FALSE
# But each drug has at least one target in OP.
drug_targets %>% group_by(drug_name) %>% summarise(any(in_OP))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 19 x 2
## drug_name `any(in_OP)`
## <chr> <lgl>
## 1 Acetaminophen TRUE
## 2 Acetylcysteine TRUE
## 3 Amiodarone TRUE
## 4 Ascorbic acid TRUE
## 5 Azathioprine TRUE
## 6 Buspirone TRUE
## 7 Ciprofloxacin FALSE
## 8 Cisplatin TRUE
## 9 Diclofenac TRUE
## 10 Famotidine TRUE
## 11 Ketoconazole TRUE
## 12 Melatonin TRUE
## 13 Nefazodone TRUE
## 14 Omeprazole TRUE
## 15 Phenytoin TRUE
## 16 Tolcapone TRUE
## 17 Troglitazone TRUE
## 18 Valproic Acid TRUE
## 19 Verapamil TRUE
We would like to investigate the effect of the drugs on some selected proteins. For example, the activity of these proteins are measured upon the drug perturbation. We’ll build a network from the drug targets to these selected nodes.
First we declare protein of interest (POI):
POI = tibble(protein = c("NFE2L2","HMOX1","TP53","CDKN1A","BTG2","NFKB1",
"ICAM1","HSPA5", "ATF4","DDIT3","XBP1"))
Checking which POI are in Omnipath
POI <- POI %>% mutate(in_OP = protein %in% interactions$target_genesymbol)
# all POI is in Omnipath
print(all(POI$in_OP))
## [1] TRUE
First, we find paths between the drug targets and the POIs. For the sake of this simplicity we focus on drug targets of one drug, Cisplatin.
The paths are represented by a set of nodes:
source_nodes <- drug_targets %>%
filter(in_OP, drug_name=="Cisplatin") %>%
pull(gene_name)
target_nodes <- POI %>% filter(in_OP) %>% pull(protein)
collected_path_nodes = list()
for(i_source in 1:length(source_nodes)){
paths <- shortest_paths(OPI_g, from = source_nodes[[i_source]],
to = target_nodes,
output = 'vpath')
path_nodes <- lapply(paths$vpath,names) %>% unlist() %>% unique()
collected_path_nodes[[i_source]] <- path_nodes
}
collected_path_nodes <- unlist(collected_path_nodes) %>% unique()
The direct drug targets, the POIs and the intermediate pathway members give rise to the network.
cisplatin_nodes <- c(source_nodes,target_nodes, collected_path_nodes) %>%
unique()
cisplatin_network <- induced_subgraph(graph = OPI_g,vids = cisplatin_nodes)
We annotate the nodes of the network and plot it.
V(cisplatin_network)$node_type = ifelse(
V(cisplatin_network)$name %in% source_nodes, "direct drug target",
ifelse(
V(cisplatin_network)$name %in% target_nodes,"POI","intermediate node"))
ggraph(
cisplatin_network,
layout = "lgl",
area = vcount(cisplatin_network)^2.3,
repulserad = vcount(cisplatin_network)^1.2,
coolexp = 1.1
) +
geom_edge_link(
aes(
start_cap = label_rect(node1.name),
end_cap = label_rect(node2.name)),
arrow = arrow(length = unit(4, 'mm')
),
edge_width = .5,
edge_alpha = .2
) +
geom_node_point() +
geom_node_label(aes(label = name, color = node_type)) +
scale_color_discrete(
guide = guide_legend(title = 'Node type')
) +
theme_bw() +
xlab("") +
ylab("") +
ggtitle("Cisplatin induced network")