## ----knitrSetup, include=FALSE------------------------------------------------ library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center", fig.width=8, fig.height=8, tidy=FALSE) ## ----install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("rcellminer") # BiocManager::install("rcellminerData") ## ----loadLibrary, message=FALSE, warning=FALSE-------------------------------- library(rcellminer) library(rcellminerData) ## ----searchHelp, eval=FALSE, tidy=FALSE--------------------------------------- # help.search("rcellminer") ## ----searchForNscs------------------------------------------------------------ searchForNscs("nib$") ## ----plotCellminer------------------------------------------------------------ # Get Cellminer data drugAct <- exprs(getAct(rcellminerData::drugData)) molData <- getMolDataMatrices() # One drug nsc <- "94600" plots <- c("drug") plotCellMiner(drugAct, molData, plots, nsc, NULL) # One expression gene <- "TP53" plots <- c("exp") plotCellMiner(drugAct, molData, plots, NULL, gene) # Two drugs: Irinotecan and topotecan nsc <- c("616348", "609699") plots <- c("drug", "drug") plotCellMiner(drugAct, molData, plots, nsc, NULL) # Two genes # NOTE: subscript out of bounds Errors likely mean the gene is not present for that data type gene <- c("TP53", "MDM2") plots <- c("exp", "mut", "exp") plotCellMiner(drugAct, molData, plots, NULL, gene) # Gene and drug to plot nsc <- "609699" gene <- "SLFN11" plots <- c("exp", "cop", "drug") plotCellMiner(drugAct, molData, plots, nsc, gene) ## ----plotDrugSets------------------------------------------------------------- # Get CellMiner data drugAct <- exprs(getAct(rcellminerData::drugData)) drugs <- searchForNscs("camptothecin") drugAct <- drugAct[drugs,] mainLabel <- paste("Drug Set: Camptothecin Derivatives, Drugs:", length(drugs), sep=" ") plotDrugSets(drugAct, drugs, mainLabel) ## ----plotStructures, fig.width=3, fig.height=3-------------------------------- drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]] drugAnnot[1:5, "SMILES"] ## ----makeDrugInfoTable, results='hide', message=FALSE------------------------- # getFeatureAnnot() returns a named list of data frames with annotation data # for drugs ("drug") and drug activity repeat experiments ("drugRepeat"). drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]] # Get the names and MOA for compounds with MOA entries knownMoaDrugs <- unique(c(getMoaToCompounds(), recursive = TRUE)) knownMoaDrugInfo <- data.frame(NSC=knownMoaDrugs, stringsAsFactors = FALSE) knownMoaDrugInfo$Name <- drugAnnot[knownMoaDrugInfo$NSC, "NAME"] # Process all NSCs knownMoaDrugInfo$MOA <- sapply(knownMoaDrugInfo$NSC, getMoaStr) # Order drugs by mechanism of action. knownMoaDrugInfo <- knownMoaDrugInfo[order(knownMoaDrugInfo$MOA), ] # Drug_MOA_Key data frame provides further details on MOA abbrevations. Drug_MOA_Key[c("A2", "A7"), ] ## ----computeGI50Data, results='hide', message=FALSE--------------------------- negLogGI50Data <- getDrugActivityData(nscSet = knownMoaDrugInfo$NSC) gi50Data <- 10^(-negLogGI50Data) ## ----makeIntegratedTable, results='hide', message=FALSE----------------------- knownMoaDrugAct <- as.data.frame(cbind(knownMoaDrugInfo, gi50Data), stringsAsFactors = FALSE) # This table can be written out to a file #write.table(knownMoaDrugAct, file="knownMoaDrugAct.txt", quote=FALSE, sep="\t", row.names=FALSE, col.names=TRUE, na="NA") ## ----reinhold2015------------------------------------------------------------- # Gene of interest gene <- "PTEN" # Get Data drugAct <- exprs(getAct(rcellminerData::drugData)) molData <- getMolDataMatrices() # Get the cell lines names for cell lines meeting particular thresholds copKnockdown <- names(which(molData[["cop"]][paste0("cop", gene), ] < -1)) expKnockdown <- names(which(molData[["exp"]][paste0("exp", gene), ] < -1.5)) mutKnockdown <- names(which(molData[["mut"]][paste0("mut", gene), ] == 1)) # Make composite pattern pattern <- rep(0, length(molData[["cop"]][paste0("cop", gene), ])) names(pattern) <- names(molData[["cop"]][paste0("cop", gene), ]) tmp <- Reduce(union, list(copKnockdown, expKnockdown, mutKnockdown)) pattern[tmp] <- 1 # Composite plot data extraPlot <- list() extraPlot[["title"]] <- "Composite Pattern" extraPlot[["label"]] <- "Knockdown Composite (Binary)" extraPlot[["values"]] <- pattern # Plot data plotCellMiner(molData=molData, plots=c("cop", "exp", "mut"), gene=gene, extraPlot=extraPlot) # Significant drug correlations to FDA-approved/Clinical trial drugs # getFeatureAnnot() returns a named list of data frames with annotation data # for drugs ("drug") and drug activity repeat experiments ("drugRepeat"). drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]] tmpDA <- drugAnnot[drugAnnot$FDA_STATUS != "-", c("NSC", "FDA_STATUS")] tmpDrugAct <- drugAct[rownames(drugAct) %in% tmpDA$NSC,] results <- patternComparison(pattern, tmpDrugAct) sigResults <- results[results$PVAL < 0.01, ] # Get names of drugs that are FDA approved or in clinical trials getDrugName(rownames(sigResults)) ## ----varma2014, fig.height=7.5, fig.width=6----------------------------------- molData <- getMolDataMatrices() # Get Data copGenes <- removeMolDataType(rownames(molData[["cop"]])) expGenes <- removeMolDataType(rownames(molData[["exp"]])) genes <- intersect(copGenes, expGenes) # Generate the appropriate rownames expGeneLabels <- paste0("exp", genes) copGeneLabels <- paste0("cop", genes) a <- molData[["exp"]][expGeneLabels,] b <- molData[["cop"]][copGeneLabels,] allGenes <- rowCors(a, b) #selectedOncogenes <- c("FZD1", "JAK2", "ALK", "PIK3CG", "RET", "CDC25A", "PDGFB", "PIK3CB", "WNT3") selectedOncogenes <- c("ABL1", "ALK", "BRAF", "CCND1", "CCND3", "CCNE1", "CCNE2", "CDC25A", "EGFR", "ERBB2", "EZH2", "FOS", "FOXL2", "HRAS", "IDH1", "IDH2", "JAK2", "KIT", "KRAS", "MET", "MOS", "MYC", "NRAS", "PDGFB", "PDGFRA", "PIK3CA", "PIK3CB", "PIK3CD", "PIK3CG", "PIM1", "PML", "RAF1", "REL", "RET", "SRC", "STK11", "TP63", "WNT10B", "WNT4", "WNT2B", "WNT9A", "WNT3", "WNT5A", "WNT5B", "WNT10A", "WNT11", "WNT2", "WNT1", "WNT7B", "WISP1", "WNT8B", "WNT7A", "WNT16", "WISP2", "WISP3", "FZD5", "FZD1") selectedOncogenes <- intersect(selectedOncogenes, genes) # Generate the appropriate rownames expGeneLabels <- paste0("exp", selectedOncogenes) copGeneLabels <- paste0("cop", selectedOncogenes) a <- molData[["exp"]][expGeneLabels,] b <- molData[["cop"]][copGeneLabels,] selectedOncogenesCor <- rowCors(a, b) hist(allGenes$cor, main="", xlab="Pearson correlation between expression and copy number", breaks=200, col="lightgray", border="lightgray") segments(x0=median(allGenes$cor), y0=0, x1=median(allGenes$cor), y1=175, lty=2, lwd=2) text(median(allGenes$cor)+0.02, y=175, adj=0, labels="Median Correlation\nAll Genes", cex=0.75) segments(x0=median(selectedOncogenesCor$cor), y0=0, x1=median(selectedOncogenesCor$cor), y1=140, lty=2, lwd=2, col="red") text(median(selectedOncogenesCor$cor)+0.02, y=140, adj=0, labels="Median Correlation\nOncogenes", cex=0.75) rug(selectedOncogenesCor$cor, col="red") ## ----idTopo1InhCors----------------------------------------------------------- # Get normalized (Z-score) NCI-60 gene expression and drug activity data. nci60DrugActZ <- exprs(getAct(rcellminerData::drugData)) nci60GeneExpZ <- getAllFeatureData(rcellminerData::molData)[["exp"]] antiApoptosisGenes <- c("BAG1", "BAG3", "BAG4", "BCL10", "BCL2", "BCL2A1", "BCL2L1", "BCL2L10", "BCL2L2", "BFAR", "BIRC3", "BIRC6", "BNIP1", "BNIP2", "BNIP3", "BNIP3L", "BRAF", "CASP3", "CD27", "CD40LG", "CFLAR", "CIDEA", "DAPK1", "DFFA", "FAS", "IGF1R", "MCL1", "NOL3", "TMBIM1", "TP53", "TP73", "XIAP") camptothecinNsc <- "94600" # Compute table of correlations between camptothecin activity and anti-apoptosis gene expression. pcResults <- patternComparison(nci60DrugActZ[camptothecinNsc, ], nci60GeneExpZ[antiApoptosisGenes, ]) # Adjust p-values for multiple comparisons, sort with respect to q-values. pcResults$QVAL <- p.adjust(pcResults$PVAL, method = "fdr") pcResults <- pcResults[order(pcResults$QVAL), ] # Identify genes with significant negative correlations (FDR < 0.1) pcResults[((pcResults$COR < 0) & (pcResults$QVAL < 0.10)), ] ## ----plotCptActivityVsGeneExp------------------------------------------------- colorTab <- loadNciColorSet(returnDf = TRUE) tissueColorTab <- unique(colorTab[, c("tissues", "colors")]) plotData <- as.data.frame(t(rbind(nci60DrugActZ[camptothecinNsc, , drop=FALSE], nci60GeneExpZ[c("TMBIM1", "BCL2L2"), ]))) colnames(plotData) <- c("Camptothecin", "TMBIM1", "BCL2L2") plot(x=plotData$TMBIM1, y=plotData$Camptothecin, col=colorTab$colors, pch=16, xlab="TMBIM1 mRNA exp (Z-score)", ylab="Camptothecin Activity (Z-score)", main=paste0("Camptothecin Activity vs. TMBIM Expression (r = ", round(pcResults["TMBIM1", "COR"], 2), ")")) abline(lm(formula("Camptothecin ~ TMBIM1"), plotData), col="red") legend("bottomleft", legend=tissueColorTab$tissues, col=tissueColorTab$colors, cex=0.7, pch = 16) plot(x=plotData$BCL2L2, y=plotData$Camptothecin, col=colorTab$colors, pch=16, xlab="BCL2L2 mRNA exp (Z-score)", ylab="Camptothecin Activity (Z-score)", main=paste0("Camptothecin Activity vs. BCL2L2 Expression (r = ", round(pcResults["BCL2L2", "COR"], 2), ")")) abline(lm(formula("Camptothecin ~ BCL2L2"), plotData), col="red") legend("bottomleft", legend=tissueColorTab$tissues, col=tissueColorTab$colors, cex=0.7, pch = 16) ## ----getCellMinerData--------------------------------------------------------- # Get Cellminer data # getFeatureAnnot() returns a named list of data frames with annotation data # for drugs ("drug") and drug activity repeat experiments ("drugRepeat"). drugAnnot <- getFeatureAnnot(rcellminerData::drugData)[["drug"]] fdaDrugAnnot <- drugAnnot[which(drugAnnot$FDA_STATUS == "FDA approved"), ] nci60FdaDrugAct <- getDrugActivityData(nscSet = fdaDrugAnnot$NSC) nci60GeneExp <- getAllFeatureData(rcellminerData::molData)[["xai"]] nci60GeneMut <- getAllFeatureData(rcellminerData::molData)[["mut"]] ## ----vizDnarGeneMutations, fig.width=10, fig.height=7------------------------- dnarGeneSet <- c("APC", "APLF", "ATM", "CLSPN", "ERCC6", "FANCI", "FANCM", "GEN1", "HLTF", "MLH1", "POLD1", "POLE", "POLG", "POLQ", "RAD54L", "REV3L", "RMI1", "SLX4", "SMARCA4", "SMC2", "TP53", "WRN") dnarGeneSet <- intersect(dnarGeneSet, rownames(nci60GeneMut)) dnarGeneMut <- nci60GeneMut[dnarGeneSet, ] # Identify most frequently mutated genes. numMutLines <- apply(dnarGeneMut, MARGIN = 1, FUN = sum) sort(numMutLines, decreasing = TRUE) dnarGeneMutPlotData <- dnarGeneMut[order(numMutLines), ] heatmap(dnarGeneMutPlotData, scale="none", Rowv = NA, Colv = NA, col = c("grey", "red"), main="Deleterious mutations") ## ----vizDnarGeneExpKnockouts, fig.width=10, fig.height=7---------------------- dnarGeneExp <- nci60GeneExp[dnarGeneSet, ] hist(dnarGeneExp, xlab="mRNA expression (log2 intensity)", main="Distribution of DNA repair gene expression values") # Set low expression threshold to 1st percentile of expression values. lowExpThreshold <- quantile(dnarGeneExp, na.rm = TRUE, probs = c(0.01)) lowExpThreshold # Construct binary (potential) expression knockout matrix. dnarGeneExpKo <- matrix(0, nrow=nrow(dnarGeneExp), ncol=ncol(dnarGeneExp)) rownames(dnarGeneExpKo) <- rownames(dnarGeneExp) colnames(dnarGeneExpKo) <- colnames(dnarGeneExp) dnarGeneExpKo[which(dnarGeneExp < lowExpThreshold)] <- 1 # Restrict to wild type expression knockouts. dnarGeneExpKo[which(dnarGeneMut == 1)] <- 0 # Identify genes with most frequent loss of expression. numExpKoLines <- apply(dnarGeneExpKo, MARGIN = 1, FUN = sum) sort(numExpKoLines, decreasing = TRUE) dnarGeneExpKoPlotData <- dnarGeneExpKo[order(numExpKoLines), ] heatmap(dnarGeneExpKoPlotData, scale="none", Rowv = NA, Colv = NA, col = c("grey", "blue"), main="Potential expression knockouts") ## ----vizDnarGeneAlterations, fig.width=10, fig.height=7----------------------- dnarGeneAlt <- matrix(0, nrow=nrow(dnarGeneExp), ncol=ncol(dnarGeneExp)) rownames(dnarGeneAlt) <- rownames(dnarGeneExp) colnames(dnarGeneAlt) <- colnames(dnarGeneExp) dnarGeneAlt[which(dnarGeneMut == 1)] <- 1 dnarGeneAlt[which(dnarGeneExpKo == 1)] <- 2 # Identify genes with most frequent alterations (by mutation or lack of expression). numAltLines <- apply(dnarGeneAlt, MARGIN = 1, FUN = sum) sort(numAltLines, decreasing = TRUE) dnarGeneAltPlotData <- dnarGeneAlt[order(numAltLines), ] heatmap(dnarGeneAltPlotData, scale="none", Rowv = NA, Colv = NA, col = c("grey", "red", "blue"), main="Altered genes by mutation (red) or low expression (blue)") ## ----checkAplfDrugAssocs, fig.width=10, fig.height=7-------------------------- geneName <- "APLF" altLineIndices <- which(dnarGeneAlt[geneName, ] != 0) nonAltLineIndices <- which(dnarGeneAlt[geneName, ] == 0) drugAssocTab <- data.frame(GENE=geneName, DRUG_NAME=fdaDrugAnnot$NAME, DRUG_NSC=fdaDrugAnnot$NSC, TTEST_PVAL=NA, ADJ_PVAL=NA, MEAN_ACT_DIFF=NA, stringsAsFactors = FALSE) rownames(drugAssocTab) <- drugAssocTab$DRUG_NSC for (drugNsc in drugAssocTab$DRUG_NSC){ ttestResult <- t.test(x=nci60FdaDrugAct[drugNsc, altLineIndices], y=nci60FdaDrugAct[drugNsc, nonAltLineIndices]) drugAssocTab[drugNsc, "TTEST_PVAL"] <- ttestResult$p.value drugAssocTab[drugNsc, "MEAN_ACT_DIFF"] <- ttestResult$estimate[1] - ttestResult$estimate[2] } drugAssocTab$ADJ_PVAL <- p.adjust(drugAssocTab$TTEST_PVAL, method = "bonferroni") drugAssocTab <- drugAssocTab[order(drugAssocTab$ADJ_PVAL), ] drugAssocTab[drugAssocTab$ADJ_PVAL < 0.05, c("GENE", "DRUG_NAME", "DRUG_NSC", "ADJ_PVAL", "MEAN_ACT_DIFF")] meanActDiff <- drugAssocTab$MEAN_ACT_DIFF negLog10Pval <- -log10(drugAssocTab$TTEST_PVAL) plot(x=meanActDiff, y=negLog10Pval, xlim = c((min(meanActDiff)-1), (max(meanActDiff)+1)), xlab="Difference between -logGI50 means (altered vs. non-altered lines)", ylab="-log10(p-value)", main=(paste0(geneName, " Drug Response Associations"))) ptLabels <- character(length(drugAssocTab$DRUG_NAME)) numLabeledPts <- 7 # label the k most significant drug associations ptLabels[1:numLabeledPts] <- drugAssocTab$DRUG_NAME[1:numLabeledPts] text(x=meanActDiff, negLog10Pval, ptLabels, cex=0.9, pos=4, col="red") ## ----sessionInfo-------------------------------------------------------------- sessionInfo()