### R code from vignette source 'vignettes/GWASTools/inst/doc/Affymetrix.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: Affymetrix.Rnw:67-87 ################################################### library(GWASTools) library(GWASdata) # Load the SNP annotation (simple data frame) data(affy_snp_annot) # Create a SnpAnnotationDataFrame snpAnnot <- SnpAnnotationDataFrame(affy_snp_annot) # names of columns varLabels(snpAnnot) # data head(pData(snpAnnot)) # Add metadata to describe the columns meta <- varMetadata(snpAnnot) meta[c("snpID", "chromosome", "position", "rsID", "probeID"), "labelDescription"] <- c("unique integer ID for SNPs", paste("integer code for chromosome: 1:22=autosomes,", "23=X, 24=pseudoautosomal, 25=Y, 26=Mitochondrial, 27=Unknown"), "base pair position on chromosome (build 36)", "RS identifier", "unique ID from Affymetrix") varMetadata(snpAnnot) <- meta ################################################### ### code chunk number 2: Affymetrix.Rnw:109-136 ################################################### # Load the scan annotation (simple data frame) data(affy_scan_annot) # Create a ScanAnnotationDataFrame scanAnnot <- ScanAnnotationDataFrame(affy_scan_annot) # names of columns varLabels(scanAnnot) # data head(pData(scanAnnot)) # Add metadata to describe the columns meta <- varMetadata(scanAnnot) meta[c("scanID", "subjectID", "family", "father", "mother", "CoriellID", "race", "sex", "status", "genoRunID", "plate", "alleleFile", "chpFile"), "labelDescription"] <- c("unique integer ID for scans", "subject identifier (may have multiple scans)", "family identifier", "father identifier as subjectID", "mother identifier as subjectID", "Coriell subject identifier", "HapMap population group", "sex coded as M=male and F=female", "simulated case/control status" , "genotyping instance identifier", "plate containing samples processed together for genotyping chemistry", "data file with intensities", "data file with genotypes and quality scores") varMetadata(scanAnnot) <- meta ################################################### ### code chunk number 3: Affymetrix.Rnw:198-208 ################################################### geno.nc.file <- "tmp.geno.nc" # get snp annotation data frame for function snp <- affy_snp_annot[,c("snpID", "chromosome", "position")] ncdfCreate(ncdf.filename = geno.nc.file, snp.annotation = snp, variables = c("genotype"), array.name = "AffyGenomeWideSNP_6", genome.build = "36", n.samples = 3, precision = "single") ################################################### ### code chunk number 4: Affymetrix.Rnw:217-223 ################################################### # first 3 samples only scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")] names(scan.annotation) <- c("scanID", "scanName", "file") # indicate which column of SNP annotation is referenced in data files snp.annotation <- affy_snp_annot[,c("snpID", "probeID")] names(snp.annotation) <- c("snpID", "snpName") ################################################### ### code chunk number 5: Affymetrix.Rnw:230-236 ################################################### col.nums <- as.integer(c(2,3)) names(col.nums) <- c("snp", "geno") # Define a path to the raw data CHP text files which are read by # ncdfAddData to access the raw genotypic data path <- system.file("extdata", "affy_raw_data", package="GWASdata") ################################################### ### code chunk number 6: Affymetrix.Rnw:245-276 ################################################### diag.geno.file <- "diag.geno.RData" diag.geno <- ncdfAddData(path = path, ncdf.filename = geno.nc.file, snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, scan.start.index = 1, diagnostics.filename = diag.geno.file, verbose = FALSE) # Look at the values included in the "diag.geno" object which holds # all output from the function call names(diag.geno) # `read.file' is a vector indicating whether (1) or not (0) each file # specified in the `files' argument was read successfully table(diag.geno$read.file) # `row.num' is a vector of the number of rows read from each file table(diag.geno$row.num) # `sample.match' is a vector indicating whether (1) or not (0) # the sample name inside the raw text file matches that in the # sample annotation data.frame table(diag.geno$sample.match) # `snp.chk' is a vector indicating whether (1) or not (0) # the raw text file has the expected set of SNP names table(diag.geno$snp.chk) # `chk' is a vector indicating whether (1) or not (0) all previous # checks were successful and the data were written to the NetCDF file table(diag.geno$chk) ################################################### ### code chunk number 7: Affymetrix.Rnw:285-291 ################################################### (genofile <- NcdfGenotypeReader(geno.nc.file)) # Take out genotype data for the first 3 samples and # the first 5 SNPs (genos <- getGenotype(genofile, snp=c(1,5), scan=c(1,3))) # Close the NetCDF file close(genofile) ################################################### ### code chunk number 8: Affymetrix.Rnw:297-320 ################################################### check.geno.file <- "check.geno.RData" check.geno <- ncdfCheckGenotype(path = path, ncdf.filename = geno.nc.file, snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, check.scan.index = 1:3, n.scans.loaded = 3, diagnostics.filename = check.geno.file, verbose = FALSE) # Look at the values included in the "check.geno" object which holds # all output from the function call names(check.geno) # 'snp.order' is a vector indicating whether (1) or not (0) the snp ids # are in the same order in each file. table(check.geno$snp.order) # 'geno.chk' is a vector indicating whether (1) or not (0) the genotypes # in the netCDF match the text file table(check.geno$geno.chk) ################################################### ### code chunk number 9: Affymetrix.Rnw:359-369 ################################################### qxy.nc.file <- "tmp.qxy.nc" # get snp annotation data frame for function snp <- affy_snp_annot[,c("snpID", "chromosome", "position")] ncdfCreate(ncdf.filename = qxy.nc.file, snp.annotation = snp, variables = c("quality","X","Y"), array.name = "AffyGenomeWideSNP_6", genome.build = "36", n.samples = 3, precision = "single") ################################################### ### code chunk number 10: Affymetrix.Rnw:378-384 ################################################### # first 3 samples only scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "chpFile")] names(scan.annotation) <- c("scanID", "scanName", "file") # indicate which column of SNP annotation is referenced in data files snp.annotation <- affy_snp_annot[,c("snpID", "probeID")] names(snp.annotation) <- c("snpID", "snpName") ################################################### ### code chunk number 11: Affymetrix.Rnw:391-397 ################################################### col.nums <- as.integer(c(2,4)) names(col.nums) <- c("snp","qs") # Define a path to the raw data CHP text files which are read by # ncdfAddData to access the raw genotypic data path <- system.file("extdata", "affy_raw_data", package="GWASdata") ################################################### ### code chunk number 12: Affymetrix.Rnw:406-419 ################################################### diag.qual.file <- "diag.qual.RData" diag.qual <- ncdfAddData(path = path, ncdf.filename = qxy.nc.file, snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, scan.start.index = 1, diagnostics.filename = diag.qual.file, verbose = FALSE) ################################################### ### code chunk number 13: Affymetrix.Rnw:426-437 ################################################### scan.annotation <- affy_scan_annot[1:3, c("scanID", "genoRunID", "alleleFile")] names(scan.annotation) <- c("scanID", "scanName", "file") diag.xy.file <- "diag.xy.RData" diag.xy <- ncdfAddIntensity(path = path, ncdf.filename = qxy.nc.file, snp.annotation = snp.annotation, scan.annotation = scan.annotation, scan.start.index = 1, n.consecutive.scans = 3, diagnostics.filename = diag.xy.file, verbose = FALSE) ################################################### ### code chunk number 14: Affymetrix.Rnw:444-451 ################################################### # Open the NetCDF file we just created (intenfile <- NcdfIntensityReader(qxy.nc.file)) # Take out the normalized X intensity values for the first # 5 SNPs for the first 3 samples (xinten <- getX(intenfile, snp=c(1,5), scan=c(1,3))) # Close the NetCDF file close(intenfile) ################################################### ### code chunk number 15: Affymetrix.Rnw:457-476 ################################################### scan.annotation <- affy_scan_annot[1:5, c("scanID", "genoRunID", "chpFile", "alleleFile")] names(scan.annotation) <- c("scanID", "scanName", "file", "inten.file") check.qxy.file <- "check.qxy.RData" check.qxy <- ncdfCheckIntensity(path = path, intenpath = path, ncdf.filename = qxy.nc.file, snp.annotation = snp.annotation, scan.annotation = scan.annotation, sep.type = "\t", skip.num = 1, col.total = 6, col.nums = col.nums, scan.name.in.file = -1, check.scan.index = 1:3, n.scans.loaded = 3, affy.inten = TRUE, diagnostics.filename = check.qxy.file, verbose = FALSE) ################################################### ### code chunk number 16: Affymetrix.Rnw:564-574 ################################################### bl.nc.file <- "tmp.bl.nc" # get snp annotation data frame for function snp <- affy_snp_annot[,c("snpID", "chromosome", "position")] ncdfCreate(ncdf.filename = bl.nc.file, snp.annotation = snp, variables = c("BAlleleFreq","LogRRatio"), array.name = "AffyGenomeWideSNP_6", genome.build = "36", n.samples = 3, precision = "single") ################################################### ### code chunk number 17: Affymetrix.Rnw:586-600 ################################################### xyNC <- NcdfIntensityReader(qxy.nc.file) genoNC <- NcdfGenotypeReader(geno.nc.file) BAFfromGenotypes(xyNC, genoNC, bl.ncdf.filename = bl.nc.file, min.n.genotypes = 0, call.method ="by.study") close(xyNC) close(genoNC) # Open the NetCDF file we just created (blfile <- NcdfIntensityReader(bl.nc.file)) # Look at the BAlleleFrequency values for the first 5 SNPs (baf <- getBAlleleFreq(blfile, snp=c(1,5), scan=c(1,3))) # Close the NetCDF file close(blfile) ################################################### ### code chunk number 18: Affymetrix.Rnw:605-608 ################################################### file.remove(geno.nc.file, qxy.nc.file, bl.nc.file) file.remove(diag.geno.file, diag.qual.file, diag.xy.file) file.remove(check.geno.file, check.qxy.file)