### R code from vignette source 'Affymetrix.Rnw' ################################################### ### code chunk number 1: Affymetrix.Rnw:68-88 ################################################### library(GWASTools) library(GWASdata) # Load the SNP annotation (simple data frame) data(affy_snp_annot) # Create a SnpAnnotationDataFrame snpAnnot <- SnpAnnotationDataFrame(affy_snp_annot) # names of columns varLabels(snpAnnot) # data head(pData(snpAnnot)) # Add metadata to describe the columns meta <- varMetadata(snpAnnot) meta[c("snpID", "chromosome", "position", "rsID", "probeID"), "labelDescription"] <- c("unique integer ID for SNPs", paste("integer code for chromosome: 1:22=autosomes,", "23=X, 24=pseudoautosomal, 25=Y, 26=Mitochondrial, 27=Unknown"), "base pair position on chromosome (build 36)", "RS identifier", "unique ID from Affymetrix") varMetadata(snpAnnot) <- meta ################################################### ### code chunk number 2: Affymetrix.Rnw:110-137 ################################################### # Load the scan annotation (simple data frame) data(affy_scan_annot) # Create a ScanAnnotationDataFrame scanAnnot <- ScanAnnotationDataFrame(affy_scan_annot) # names of columns varLabels(scanAnnot) # data head(pData(scanAnnot)) # Add metadata to describe the columns meta <- varMetadata(scanAnnot) meta[c("scanID", "subjectID", "family", "father", "mother", "CoriellID", "race", "sex", "status", "genoRunID", "plate", "alleleFile", "chpFile"), "labelDescription"] <- c("unique integer ID for scans", "subject identifier (may have multiple scans)", "family identifier", "father identifier as subjectID", "mother identifier as subjectID", "Coriell subject identifier", "HapMap population group", "sex coded as M=male and F=female", "simulated case/control status" , "genotyping instance identifier", "plate containing samples processed together for genotyping chemistry", "data file with intensities", "data file with genotypes and quality scores") varMetadata(scanAnnot) <- meta ################################################### ### code chunk number 3: Affymetrix.Rnw:160-240 ################################################### geno.file <- "tmp.geno.gds" # first 3 samples only scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")] names(scan.annotation) <- c("scanID", "scanName", "file") # indicate which column of SNP annotation is referenced in data files snp.annotation <- affy_snp_annot[,c("snpID", "probeID", "chromosome", "position")] names(snp.annotation)[2] <- "snpName" # col.nums is an integer vector indicating which columns of the raw text file # contain variables for input col.nums <- as.integer(c(2,3)) names(col.nums) <- c("snp", "geno") # Define a path to the raw data CHP text files path <- system.file("extdata", "affy_raw_data", package="GWASdata") diag.geno.file <- "diag.geno.RData" diag.geno <- createDataFile(path = path, filename = geno.file, file.type = "gds", variables = "genotype", snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, diagnostics.filename = diag.geno.file, verbose = FALSE) # Look at the values included in the "diag.geno" object which holds # all output from the function call names(diag.geno) # `read.file' is a vector indicating whether (1) or not (0) each file # specified in the `files' argument was read successfully table(diag.geno$read.file) # `row.num' is a vector of the number of rows read from each file table(diag.geno$row.num) # `sample.match' is a vector indicating whether (1) or not (0) # the sample name inside the raw text file matches that in the # sample annotation data.frame table(diag.geno$sample.match) # `snp.chk' is a vector indicating whether (1) or not (0) # the raw text file has the expected set of SNP names table(diag.geno$snp.chk) # `chk' is a vector indicating whether (1) or not (0) all previous # checks were successful and the data were written to the file table(diag.geno$chk) # open the file we just created (genofile <- GdsGenotypeReader(geno.file)) # Take out genotype data for the first 3 samples and # the first 5 SNPs (genos <- getGenotype(genofile, snp=c(1,5), scan=c(1,3))) close(genofile) # check that the data file matches the raw text files check.geno.file <- "check.geno.RData" check.geno <- checkGenotypeFile(path = path, filename = geno.file, file.type = "gds", snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, check.scan.index = 1:3, n.scans.loaded = 3, diagnostics.filename = check.geno.file, verbose = FALSE) # Look at the values included in the "check.geno" object which holds # all output from the function call names(check.geno) # 'geno.chk' is a vector indicating whether (1) or not (0) the genotypes # match the text file table(check.geno$geno.chk) ################################################### ### code chunk number 4: Affymetrix.Rnw:258-311 ################################################### qual.file <- "tmp.qual.gds" scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")] names(scan.annotation) <- c("scanID", "scanName", "file") snp.annotation <- affy_snp_annot[,c("snpID", "probeID", "chromosome", "position")] names(snp.annotation)[2] <- "snpName" col.nums <- as.integer(c(2,4)) names(col.nums) <- c("snp", "quality") path <- system.file("extdata", "affy_raw_data", package="GWASdata") diag.qual.file <- "diag.qual.RData" diag.qual <- createDataFile(path = path, filename = qual.file, file.type = "gds", variables = "quality", snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, diagnostics.filename = diag.qual.file, verbose = FALSE) # Open the GDS file we just created (qualfile <- GdsIntensityReader(qual.file)) # Take out the quality scores for the first # 5 SNPs for the first 3 samples (qual <- getQuality(qualfile, snp=c(1,5), scan=c(1,3))) close(qualfile) # check check.qual.file <- "check.qual.RData" check.qual <- checkIntensityFile(path = path, filename = qual.file, file.type = "gds", snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, check.scan.index = 1:3, n.scans.loaded = 3, affy.inten = FALSE, diagnostics.filename = check.qual.file, verbose = FALSE) ################################################### ### code chunk number 5: Affymetrix.Rnw:317-359 ################################################### xy.file <- "tmp.xy.gds" # we need the allele files instead of CHP files scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "alleleFile")] names(scan.annotation) <- c("scanID", "scanName", "file") snp.annotation <- affy_snp_annot[,c("snpID", "probeID", "chromosome", "position")] names(snp.annotation)[2] <- "snpName" diag.xy.file <- "diag.xy.RData" diag.xy <- createAffyIntensityFile(path = path, filename = xy.file, file.type = "gds", snp.annotation = snp.annotation, scan.annotation = scan.annotation, diagnostics.filename = diag.xy.file, verbose = FALSE) # Open the file we just created (intenfile <- GdsIntensityReader(xy.file)) # Take out the normalized X intensity values for the first # 5 SNPs for the first 3 samples (xinten <- getX(intenfile, snp=c(1,5), scan=c(1,3))) close(intenfile) # check the intensity files - note affy.inten=TRUE check.xy.file <- "check.xy.RData" check.xy <- checkIntensityFile(path = path, filename = xy.file, file.type = "gds", snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 2, col.nums = setNames(as.integer(c(1,2,2)), c("snp", "X", "Y")), scan.name.in.file = -1, check.scan.index = 1:3, n.scans.loaded = 3, affy.inten = TRUE, diagnostics.filename = check.xy.file, verbose = FALSE) ################################################### ### code chunk number 6: Affymetrix.Rnw:445-463 ################################################### bl.file <- "tmp.bl.gds" xyData <- IntensityData(GdsIntensityReader(xy.file), snpAnnot=snpAnnot) genoData <- GenotypeData(GdsGenotypeReader(geno.file), snpAnnot=snpAnnot) BAFfromGenotypes(xyData, genoData, filename = bl.file, file.type = "gds", min.n.genotypes = 0, call.method ="by.study", verbose = FALSE) close(xyData) close(genoData) # Open the file we just created (blfile <- GdsIntensityReader(bl.file)) # Look at the BAlleleFrequency values for the first 5 SNPs (baf <- getBAlleleFreq(blfile, snp=c(1,5), scan=c(1,3))) close(blfile) ################################################### ### code chunk number 7: Affymetrix.Rnw:468-471 ################################################### file.remove(geno.file, qual.file, xy.file, bl.file) file.remove(diag.geno.file, diag.qual.file, diag.xy.file) file.remove(check.geno.file, check.qual.file, check.xy.file)