### R code from vignette source 'IdMappingAnalysis.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: chunk1 ################################################### library(IdMappingAnalysis); ################################################### ### code chunk number 2: chunk1 ################################################### options(width=110) ################################################### ### code chunk number 3: chunk2 (eval = FALSE) ################################################### ## # Initialize the ID mapping retrieval system ## library(IdMappingRetrieval); ## Annotation$init(); ## AnnotationAffx$setCredentials(user=MY_AFFY_USERNAME, ## password=MY_AFFY_PASSWORD,verbose=FALSE); ## # Create a service manage object encapsulating default ID retrieval services. ## svm <- ServiceManager(ServiceManager$getDefaultServices()); ## # Retrieve the ID Map list for selected array type and services. ## identDfList <- getIdMapList(svm,arrayType="HG-U133_Plus_2", ## selection=names(svm$getServices()),verbose=TRUE); ## class(identDfList) ## class(identDfList)[[1]] ################################################### ### code chunk number 4: chunk3 ################################################### # A list of ID maps to be analysed. names(examples$identDfList) head(examples$identDfList[[1]], 5) # Define the primary and secondary IDs to work with. primaryIDs <- IdMapBase$primaryIDs(examples$msmsExperimentSet) head(primaryIDs) ### UniProt IDs for proteins. secondaryIDs <- IdMapBase$primaryIDs(examples$mrnaExperimentSet); head(secondaryIDs) ### Affymetrix probeset IDs. # Construct a JointIdMap object which combines # the various ID maps, aligned by the union of the primary ID vectors. jointIdMap <- JointIdMap(examples$identDfList,primaryIDs,verbose=FALSE); names(getMethods.Class(JointIdMap)[["JointIdMap"]]) ### Methods specific to JointIDMap str(jointIdMap$as.data.frame()) ## Structure of the data field of JointIdMap ################################################### ### code chunk number 5: chunkSwapKeys ################################################### identDfListReversed <- lapply(examples$identDfList, function(identDf) IdMap$swapKeys(IdMap(identDf))) jointIdMapReversed <- JointIdMap(identDfListReversed, secondaryIDs, verbose=FALSE) ################################################### ### code chunk number 6: chunk4 ################################################### # Assemble secondary ID counts object for a given set of DB's mapCounts <- getCounts(jointIdMap, idMapNames=c("NetAffx_Q","NetAffx_F","DAVID_Q","DAVID_F","EnVision_Q"), verbose=FALSE); # This shows the structure of the contents of an IdMapCounts object. str(mapCounts$as.data.frame()) names(getMethods.Class(mapCounts)[["IdMapCounts"]]) # Tabulating the number of returned secondary IDs per primary ID, aligned across services. statsByMatchCount <- mapCounts$getStats(summary=FALSE,verbose=FALSE); statsByMatchCount[1:6,1:10]; # ...and the same results with a simplified summary. mapCounts$getStats(summary=TRUE,cutoff=3,verbose=FALSE); # A plot of empirical cdf's of the secondary ID counts par(mfrow = c(1, 2)); mapCounts$plot(); ### Or alternatice syntax: plot(mapCounts) ################################################### ### code chunk number 7: chunk5 ################################################### diffsBetweenMap <- jointIdMap$getDiff("DAVID_Q","EnVision_Q",verbose=FALSE); class(diffsBetweenMap) diffCounts <- IdMapDiffCounts(diffsBetweenMap,verbose=FALSE); names(getMethods(IdMapDiffCounts)[["IdMapDiffCounts"]]) ### We characterize the differences betwen the two ID maps. diffCounts$summary(verbose); par(mfrow = c(1, 2)); diffCounts$plot(adj=0.1,sides=1); # The same result is achieved more succinctly (for a different pair of ID mapping resources in this example) in this way: # summary(jointIdMap$diffCounts.plot(c("DAVID_Q", "EnVision_Q"), # adj=0.1,sides=1,verbose=FALSE)); # We can also characterize the reverse mapping created above. summary(jointIdMapReversed$diffCounts.plot(c("DAVID_Q", "EnVision_Q"), adj=0.1,sides=1,verbose=FALSE)); ################################################### ### code chunk number 8: chunk6 (eval = FALSE) ################################################### ## jointIdMap$diffCounts.plot("loop",adj=0.1,sides=1,verbose=FALSE); ################################################### ### code chunk number 9: chunk7 ################################################### msmsExperimentSet <- DataFilter$do.apply(examples$msmsExperimentSet, byRows=TRUE,filterFun=DataFilter$minAvgCountConstraint,filtParams=0.52,verbose=FALSE); dim(examples$msmsExperimentSet) dim(msmsExperimentSet) msmsExperimentSet <- DataFilter$removeNASeries(msmsExperimentSet,byRows=TRUE,verbose=FALSE); dim(msmsExperimentSet) mrnaExperimentSet <- examples$mrnaExperimentSet # We log10-transform the mRNA signal data. mrnaExperimentSet[,-1] <- log10(mrnaExperimentSet[,-1]); # The primary IDs are now defined by the experiment. primaryIDs_from_dataset <- IdMapBase$primaryIDs(msmsExperimentSet); secondaryIDs <- IdMapBase$primaryIDs(mrnaExperimentSet); # The method name primaryIDs() is unfortunate! This will be changed. # Create a JointIdMap object as before, # but with a primary ID vector reduced by the filtering step above. jointIdMap_from_dataset <- JointIdMap(examples$identDfList, primaryIDs_from_dataset, verbose=FALSE); ################################################### ### code chunk number 10: chunk8 ################################################### uniquePairs <- as.UniquePairs( getUnionIdMap(jointIdMap_from_dataset,verbose=FALSE),secondaryIDs); str(uniquePairs$as.data.frame()) corrData <- CorrData(uniquePairs, examples$msmsExperimentSet,examples$mrnaExperimentSet,verbose=FALSE); str(corrData) names(getMethods(CorrData)[["CorrData"]]) # (The method as.MultiSet() is deprecated.) ################################################### ### code chunk number 11: chunkJointUniquePairs ################################################### # create pairs match object from unique pairs and joint ID map object jointUniquePairs <- JointUniquePairs(uniquePairs, getIdMapList(jointIdMap_from_dataset,verbose=FALSE),verbose=FALSE); str(jointUniquePairs$as.data.frame()) ################################################### ### code chunk number 12: chunkIndividualScatterplot ################################################### # cross-correlation matrix of the expression signals for probesets mapped with UniprotAcc="P07355" idMatchInfo <- jointIdMap_from_dataset$ getMatchInfo(IDs="P07355", idMapNames=c("NetAffx_Q", "DAVID_Q", "EnVision_Q"))[[1]] print(idMatchInfo) data_Uniprot = corrData$getExperimentSet(modality="Uniprot", IDs="P07355") data_Affy <- corrData$getExperimentSet(modality="Affy", IDs=colnames(idMatchInfo)); data <- cbind(t(data_Uniprot[,-1]), t(data_Affy[,-1])) cor(data,method="spearman"); # Scatterplot for Uniprot="P07355" (annexin 2), probe set ID="1568126_at". # Patient outcomes guide symbols and colors. par(mfrow = c(1, 2)); corrData$plot(input=list(c("P07355","1568126_at")), outcomePairs=examples$outcomeMap,proteinNames="ANXA2", cex=1.2,cex.main=1.2,cex.lab=1.2,cols=c("green","red","darkblue")); # scatterplot with outcome for Uniprot="P07384" (annexin 2) # for all matching probesets without outcome corrData$plot(input="P07384",proteinNames="ANXA2", cex=1.2,cex.main=1.2,cex.lab=1.2,cols=c("green","red","darkblue")); ################################################### ### code chunk number 13: chunk10 (eval = FALSE) ################################################### ## # interactive scatterplot with a single primary ID (uniprot) and outcomes ## corrData$interactive.plot(c("P07355"), ## outcomePairs=examples$outcomeMap,proteinNames="ANXA2"); ## ## # interactive scatterplot with a single primary ID (uniprot) and without outcomes ## corrData$interactive.plot(c("P07355"),proteinNames="ANXA2"); ## ## # interactive scatterplot with multiple probeset IDs (uniprot) and without outcomes ## corrData$interactive.plot(c("P07355","P07384","P09382")); ## ## # interactive scatterplot with all available probeset ## #IDs (uniprot) and without outcomes ## corrData$interactive.plot(); ################################################### ### code chunk number 14: chunkDensityFit ################################################### par(mfrow = c(1, 3)); corr <- Corr(corrData,method="pearson",verbose=FALSE); corr[1:6, ] ## Left-hand plot: density estimate for all correlations. corr$plot(cex=1.2,cex.main=1.4,cex.lab=1.2,cex.legend=1.2); ## Center plot: individual density estimates for selected Id mapping resources. corrSet <- getCorr(jointUniquePairs,corr, groups=c("union","EnVision_Q","NetAffx_Q","DAVID_Q","DAVID_F"), full.group=TRUE,verbose=FALSE); names(corrSet) Corr$plot(corrSet,cex=1.2,cex.main=1.4,cex.lab=1.2,cex.legend=1.2); ## Right-hand plot: individual density estimates for selected Id mapping resources. corrSet <- jointUniquePairs$corr.plot(corr, idMapNames=c("NetAffx_Q","DAVID_Q","EnVision_Q"), plot.Union=FALSE, subsetting=TRUE, verbose=FALSE, cex=1.2, cex.main=1.4 ,cex.lab=1.2, cex.legend=1.2); names(corrSet) ################################################### ### code chunk number 15: chunk13 (eval = FALSE) ################################################### ## jointUniquePairs$interactive.corr.plot(corr,verbose=FALSE); ################################################### ### code chunk number 16: chunkMIXTURE ################################################### # create and plot the mixture model for number of components = 2 mixture <- Mixture(corr,G=2,verbose=FALSE); par(mfrow=c(1, 2)); mixture$plot(); mixture$getStats(); ################################################### ### code chunk number 17: chunkPlotMixtureNotEvaluated (eval = FALSE) ################################################### ## # Create and plot the mixture model determining ## #the optimal number of components) ## # for a given DB subset treating the subset as a full group ## mixture.subset <- jointUniquePairs$getMixture(corr, groups=c("NetAffx_Q","DAVID_Q","EnVision_Q"), ## full.group=TRUE,G=c(1:5),verbose); ## ## mixture.subset <- jointUniquePairs$mixture.plot(corr, ## idMapNames=c("NetAffx_Q","DAVID_Q","EnVision_Q"), ## subsetting=TRUE,G=c(1:5),verbose=FALSE); ################################################### ### code chunk number 18: chunk17 ################################################### # retrieve the mixture parameters mixture$getStats(); ################################################### ### code chunk number 19: chunkBOXPLOTS ################################################### par(mfrow = c(1, 2)); # Choose the mapping services, and define the corresponding short names to use in the figure. mappingServicesToPlot <- list("NetAffx_Q"="AffQ","DAVID_Q"="DQ","EnVision_Q"="EnV"); # Plot correlation probability distributions by match group boxplotResult_correlation = jointUniquePairs$corr.boxplot( corr, idMapNames=mappingServicesToPlot, subsetting=TRUE, srt=35, col.points="green"); # The following extracts the correlations for the group of ID pairs # reported by DQ and EnV but not by AffQ. boxplotResult_correlation$response.grouped$`!AffQ & DQ & EnV` # Plot posterior second component probability distributions by match group boxplotResult_mixtureProb = jointUniquePairs$mixture.boxplot( corr, idMapNames=mappingServicesToPlot, subsetting=TRUE, plot.G=2, srt=35, col.points="green"); # invisible by default. boxplotResult_mixtureProb$response.grouped$`!AffQ & DQ & EnV` ################################################### ### code chunk number 20: chunk18 (eval = FALSE) ################################################### ## # plot the results of mixture fit interactively choosing the DB subset ## interactive.mixture.plot(jointUniquePairs,corr,verbose=FALSE); ################################################### ### code chunk number 21: chunkCOMPARE ################################################### # Perform regression analysis relating which mapping services predict good correlations. fit<-jointUniquePairs$do.glm(corr$getData(), idMapNames=c("DAVID_Q","EnVision_Q","NetAffx_Q")); coefficients(summary(fit)); # Perform regression analysis using the second mixture component as the outcome variable. qualityMeasure <- mixture$getData(G=2) ## 2nd component posterior probability fitLinear <- jointUniquePairs$do.glm(qualityMeasure, idMapNames=c("DAVID_Q","EnVision_Q","NetAffx_Q")); coefficients(summary(fitLinear)); #To do this directly: fitLinear <- with(as.data.frame(jointUniquePairs), glm(qualityMeasure ~ DAVID_Q + EnVision_Q + NetAffx_Q)) coefficients(summary(fitLinear)); # Another variation: fitHitCount <- with(as.data.frame(jointUniquePairs), glm(qualityMeasure ~ I(DAVID_Q + EnVision_Q + NetAffx_Q))) coefficients(summary(fitHitCount)); fitHitCount$dev - fitLinear$dev ################################################### ### code chunk number 22: chunkBootstrap ################################################### bootstrap<-Bootstrap(corrData,Fisher=TRUE,verbose=FALSE); str(as.data.frame(bootstrap)) # Scatterplot of the estimated standard deviation versus the observed correlation. bootstrap$plot(new.plot=TRUE,file.copy=TRUE,copy.zoom=2,bg="white"); # One can use these estimates as weights. fitLinear <- with(as.data.frame(jointUniquePairs), glm(qualityMeasure ~ DAVID_Q + EnVision_Q + NetAffx_Q, weights=(as.data.frame(bootstrap)$sd) ^ (-2)) ) coefficients(summary(fitLinear)); # Another variation: fitHitCount <- with(as.data.frame(jointUniquePairs), glm(qualityMeasure ~ I(DAVID_Q + EnVision_Q + NetAffx_Q), weights=(as.data.frame(bootstrap)$sd) ^ (-2)) ) coefficients(summary(fitHitCount)); fitHitCount$dev - fitLinear$dev ################################################### ### code chunk number 23: chunkTWOSAMPLE ################################################### affyOnly = boxplotResult_mixtureProb$response.grouped$`AffQ & !DQ & !EnV` envOnly = boxplotResult_mixtureProb$response.grouped$`!AffQ & !DQ & EnV` davidOnly = boxplotResult_mixtureProb$response.grouped$`!AffQ & DQ & !EnV` allThree = boxplotResult_mixtureProb$response.grouped$`AffQ & DQ & EnV` t.test(affyOnly, envOnly) wilcox.test(affyOnly, envOnly) t.test(affyOnly, davidOnly) wilcox.test(affyOnly, envOnly) t.test(affyOnly, allThree) wilcox.test(affyOnly, allThree) ################################################### ### code chunk number 24: chunkDeleteMe ################################################### head(jointUniquePairs$as.data.frame() ) pairs <- jointUniquePairs$as.data.frame() pairsNotReportedByNetAffx = pairs[ (!pairs$NetAffx_Q & (pairs$DAVID_Q | pairs$EnVision_Q)) , c("Uniprot", "Affy")] head(pairsNotReportedByNetAffx) ################################################### ### code chunk number 25: sessionInfo ################################################### toLatex(sessionInfo())