## ----setup, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE, cache=FALSE---- library(knitr) opts_chunk$set( cache = FALSE, echo = TRUE, warning = FALSE, error = FALSE, message = FALSE ) ## ----------------------------------------------------------------------------- library(airway) library(DESeq2) data(airway) # import data to DESeq2 and variance stabilize dset = DESeqDataSetFromMatrix(assay(airway), colData=as.data.frame(colData(airway)), design=~dex) dset = estimateSizeFactors(dset) dset = estimateDispersions(dset) gene_expr = getVarianceStabilizedData(dset) # annotate matrix with HGNC symbols library(biomaRt) mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl")) genes = getBM(attributes = c("ensembl_gene_id","hgnc_symbol"), values=rownames(gene_expr), mart=mart) matched = match(rownames(gene_expr), genes$ensembl_gene_id) rownames(gene_expr) = genes$hgnc_symbol[matched] ## ----------------------------------------------------------------------------- library(progeny) pathways = progeny(gene_expr, scale=FALSE) controls = airway$dex == "untrt" ctl_mean = apply(pathways[controls,], 2, mean) ctl_sd = apply(pathways[controls,], 2, sd) pathways = t(apply(pathways, 1, function(x) x - ctl_mean)) pathways = apply(pathways, 1, function(x) x / ctl_sd) ## ----------------------------------------------------------------------------- library(dplyr) result = apply(pathways, 1, function(x) { broom::tidy(lm(x ~ !controls)) %>% filter(term == "!controlsTRUE") %>% dplyr::select(-term) }) mutate(bind_rows(result), pathway=names(result)) ## ----------------------------------------------------------------------------- # set up a file cache so we download only once library(BiocFileCache) bfc = BiocFileCache(".") # gene expression and drug response base = "http://www.cancerrxgene.org/gdsc1000/GDSC1000_WebResources/Data/" paths = bfcrpath(bfc, paste0(base, c("suppData/TableS4A.xlsx", "preprocessed/Cell_line_RMA_proc_basalExp.txt.zip"))) ## ----------------------------------------------------------------------------- # load the downloaded files drug_table <- readxl::read_excel(paths[1], skip=5, na="NA") drug_table <- replace(drug_table, is.na(drug_table), 0) gene_table <- readr::read_tsv(paths[2]) # we need drug response with COSMIC IDs drug_response <- data.matrix(drug_table[,3:ncol(drug_table)]) rownames(drug_response) <- drug_table[[1]] # we need genes in rows and samples in columns gene_expr <- data.matrix(gene_table[,3:ncol(gene_table)]) colnames(gene_expr) <- sub("DATA.", "", colnames(gene_expr), fixed=TRUE) rownames(gene_expr) <- gene_table$GENE_SYMBOLS ## ----------------------------------------------------------------------------- library(progeny) pathways <- progeny(gene_expr,scale = TRUE, organism = "Human", top = 100, perm = 1, verbose = FALSE) ## ---- fig.width=6, fig.height= 6---------------------------------------------- library(pheatmap) myColor = colorRampPalette(c("Darkblue", "white","red"))(100) pheatmap(pathways,fontsize=14, show_rownames = FALSE, color=myColor, main = "PROGENy", angle_col = 45, treeheight_col = 0, border_color = NA) ## ----------------------------------------------------------------------------- head(pathways) ## ----------------------------------------------------------------------------- cell_lines = intersect(rownames(pathways), rownames(drug_response)) trametinib = drug_response[cell_lines, "Trametinib"] mapk = pathways[cell_lines, "MAPK"] associations = lm(trametinib ~ mapk) summary(associations) ## ----echo=FALSE--------------------------------------------------------------- sessionInfo()