### R code from vignette source 'curatedBreastData-manual.Rnw' ################################################### ### code chunk number 1: style-Sweave ################################################### BiocStyle::latex() ################################################### ### code chunk number 2: curatedBreastData-manual.Rnw:61-66 ################################################### library("curatedBreastData") #load up datasets that are in S4 expressionSet format. #clinical data from master clinicalTable already linked to each sample #in these ExpressionSets in the phenoData slot. data(curatedBreastDataExprSetList); ################################################### ### code chunk number 3: curatedBreastData-manual.Rnw:70-76 ################################################### #check out the clinical data for dataset 3 #first look at the GEO study name names(curatedBreastDataExprSetList)[3] #only take the first 3 patients for sake of printing to screen #look at first 10 clinical variables head(pData(curatedBreastDataExprSetList[[3]])[c(1:3), c(1:10)]) ################################################### ### code chunk number 4: curatedBreastData-manual.Rnw:82-85 ################################################### #process only the first two datasets to avoid a long-running example: #take top 5000 genes by variance from each dataset. proc_curatedBreastDataExprSetList <- processExpressionSetList(exprSetList=curatedBreastDataExprSetList[1:2], outputFileDirectory = "./", numTopVarGenes=5000) ################################################### ### code chunk number 5: curatedBreastData-manual.Rnw:93-109 ################################################### #load up master clinical data table data(clinicalData) #look at some of the clinical variable name definitions clinicalData$clinicalVarDef[c(1:2),] #Check out the treatment information. #just do first three patients head(clinicalData$clinicalTable)[c(1:3), c(112:ncol(clinicalData$clinicalTable))] #how many had chemotherapy? numChemoPatients <- length(which( clinicalData$clinicalTable$chemotherapyClass==1)) #around 1500 had chemotherapy numChemoPatients #which patients specifically had a taxane chemotherapy? numChemoTaxane <- length(which(clinicalData$clinicalTable$taxane==1)) numChemoTaxane ################################################### ### code chunk number 6: curatedBreastData-manual.Rnw:114-119 ################################################### #how many had adjuvant therapy? numAdjPatients <- length(which( clinicalData$clinicalTable$neoadjuvant_or_adjuvant=="adj")) #over a 1000 had (documented) adjuvant therapy numAdjPatients ################################################### ### code chunk number 7: curatedBreastData-manual.Rnw:125-136 ################################################### #how many patients have non-NA OS binary data? length(which(!is.na(clinicalData$clinicalTable$OS))) #how many have OS data in the more granular form of months until OS? #this variable includes studies that had a cieling for tracking OS length(which(!is.na(clinicalData$clinicalTable$OS_months_or_MIN_months_of_OS))) #how many patients have OS information that is definitively #followed up until their death (details on how studies collect OS data can be surprising!) length(which(!is.na(clinicalData$clinicalTable$OS_up_until_death))) #finish up with sessionInfo sessionInfo()