## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse=TRUE, comment="#>", warning=FALSE, error=FALSE, eval=FALSE, crop = NULL ) ## ----library, message=FALSE, warning=FALSE, error=FALSE----------------------- # library(BiocStyle) # library(HPAanalyze) # library(dplyr) # library(tibble) # library(readr) # library(tidyr) ## ----echo=FALSE, eval=TRUE, fig.cap="The 'Fields >>' button.", out.width = '100%'---- knitr::include_graphics("figures/query_fields.png") ## ----echo=FALSE, eval=TRUE, fig.cap="Build your query with the drop-down menus.", out.width = '100%'---- knitr::include_graphics("figures/query_dropdown.png") ## ----echo=FALSE, eval=TRUE, fig.cap="Click the 'Search' button.", out.width = '100%'---- knitr::include_graphics("figures/query_search.png") ## ----echo=FALSE, eval=TRUE, fig.cap="Copy the link to the tsv file.", out.width = '100%'---- knitr::include_graphics("figures/query_tsvlink.png") ## ----------------------------------------------------------------------------- # ## The link to your query tsv # my_hpa_query <- "https://www.proteinatlas.org/search/protein_class%3ACD+markers+AND+normal_expression%3ACerebral+cortex%3BAny%3BNot+detected%2CLow+AND+prognostic%3AGlioma%3BUnfavourable?format=tsv" # # ## Create a temporary file as destination for the download # temp <- tempfile("query", fileext=c(".tsv.gz")) # # ## Download to the temporary file # download.file(url=my_hpa_query, destfile = temp, method = "curl", mode = "wb") # # ## read the file into a data frame # query_df <- readr::read_tsv(temp) # # ## Unlink the temp file # unlink(temp) ## ----------------------------------------------------------------------------- # tibble::glimpse(query_df) # # #> Observations: 6 # #> Variables: 22 # #> $ Gene "CD81", "NRP1", "PRNP", "SDC1", "THY... # #> $ `Gene synonym` "TAPA-1, TAPA1, TSPAN28", "CD304, NR... # #> $ Ensembl "ENSG00000110651", "ENSG00000099250"... # #> $ `Gene description` "CD81 molecule", "Neuropilin 1", "Pr... # #> $ Chromosome 11, 10, 20, 2, 11, 17 # #> $ Position "2376177-2397419", "33177492-3333626... # #> $ `Protein class` "CD markers, Disease related genes, ... # #> $ Evidence "Evidence at protein level", "Eviden... # #> $ Antibody "CAB002507, HPA007234", "CAB004511, ... # #> $ `Reliability (IH)` "Supported", "Approved", "Enhanced",... # #> $ `Reliability (Mouse Brain)` NA, NA, NA, NA, NA, NA # #> $ `Reliability (IF)` "Supported", "Uncertain", "Approved"... # #> $ `Subcellular location` "Plasma membrane", "Mitochondria", "... # #> $ `Prognostic p-value` "Glioma:5.12e-5 (unfavourable), Panc... # #> $ `RNA cancer category` "Expressed in all", "Expressed in al... # #> $ `RNA tissue category` "Expressed in all", "Expressed in al... # #> $ `RNA TS` NA, NA, NA, NA, NA, NA # #> $ `RNA TS TPM` NA, NA, NA, "esophagus: 250.7;skin: ... # #> $ `TPM max in non-specific` "seminal vesicle: 2273.0", "placenta... # #> $ `RNA cell line category` "Cell line enhanced", "Cell line enh... # #> $ `RNA CS` NA, NA, NA, NA, NA, NA # #> $ `RNA CS TPM` "ASC diff: 2031.3", "U-87 MG: 437.4"... ## ----------------------------------------------------------------------------- # ## since the query give you the latest HPA version, get the latest datasets to match # latest_datasets <- hpaDownload() # # hpaVis(data = latest_datasets, # targetGene = query_df$Gene, # targetTissue = "cerebral cortex", # targetCancer = "glioma") ## ----------------------------------------------------------------------------- # ## Download and import the xml files for proteins of interest # query_xml_list <- lapply(query_df$Ensembl, hpaXmlGet) # # ## Extract protein classes as a list of data frame # query_protein_classes <- lapply(query_xml_list, hpaXmlProtClass) # names(query_protein_classes) <- query_df$Gene # name list items # # ## Turn the list into a data frame # query_protein_classes_df <- # tidyr::unnest(tibble::enframe(query_protein_classes, name = "protein")) # # glimpse(query_protein_classes_df) # # #> Observations: 122 # #> Variables: 5 # #> $ protein "CD81", "CD81", "CD81", "CD81", "CD81", "CD81", "CD81"... # #> $ id "Cd", "Ja", "Jf", "Ma", "Md", "Me", "Mf", "Mg", "Mh", ... # #> $ name "CD markers", "Transporters", "Accessory Factors Invol... # #> $ parent_id NA, NA, "Ja", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ... # #> $ source "UniProt", "TCDB", "TCDB", "MDM", "MDM", "MEMSAT3", "M... # # # ## Which proteins in our list are also potential drug targets? # filter(query_protein_classes_df, name == "Potential drug targets") # # #> # A tibble: 2 x 5 # #> protein id name parent_id source # #> # #> 1 CD81 Pd Potential drug targets HPA # #> 2 PRNP Pd Potential drug targets HPA