### R code from vignette source 'vignettes/VariantAnnotation/inst/doc/filterVcf.Rnw' ################################################### ### code chunk number 1: setup ################################################### options(width=80, prompt = " ", continue = " ") ################################################### ### code chunk number 2: prefilters ################################################### isGermlinePrefilter <- function(x) { grepl("Germline", x, fixed=TRUE) } notInDbsnpPrefilter <- function(x) { !(grepl("dbsnp", x, fixed=TRUE)) } ################################################### ### code chunk number 3: filters ################################################### isSnp <- function(x) { refSnp <- nchar(ref(x)) == 1L a <- alt(x) altSnp <- elementLengths(a) == 1L ai <- unlist(a[altSnp]) # all length 1, so unlisting is 1:1 map altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T")) refSnp & altSnp } allelicDepth <- function(x) { ## ratio of AD of the "alternate allele" for the tumor sample ## OR "reference allele" for normal samples to total reads for ## the sample should be greater than some threshold (say 0.1, ## that is: at least 10% of the sample should have the allele ## of interest) ad <- geno(x)$AD tumorPct <- ad[,1,2,drop=FALSE] / rowSums(ad[,1,,drop=FALSE]) normPct <- ad[,2,1, drop=FALSE] / rowSums(ad[,2,,drop=FALSE]) test <- (tumorPct > 0.1) | (normPct > 0.1) !is.na(test) & test } ################################################### ### code chunk number 4: createFilterRules ################################################### library(VariantAnnotation) prefilters <- FilterRules(list(germline=isGermlinePrefilter, dbsnp=notInDbsnpPrefilter)) filters <- FilterRules(list(isSnp=isSnp, AD=allelicDepth)) ################################################### ### code chunk number 5: createFilteredFile ################################################### file.gz <- system.file("extdata", "chr7-sub.vcf.gz", package="VariantAnnotation") file.gz.tbi <- system.file("extdata", "chr7-sub.vcf.gz.tbi", package="VariantAnnotation") destination.file <- tempfile() tabix.file <- TabixFile(file.gz, yieldSize=10000) filterVcf(tabix.file, "hg19", destination.file, prefilters=prefilters, filters=filters, verbose=TRUE) ################################################### ### code chunk number 6: mcf7regulatoryRegions (eval = FALSE) ################################################### ## library(AnnotationHub) ## hub <- AnnotationHub() ## # paste operation is only for display purposes on a narrow page ## ctcf.regs <- paste("goldenpath.hg19.encodeDCC.wgEncodeUwTfbs", ## "wgEncodeUwTfbsMcf7CtcfStdPkRep1.narrowPeak_0.0.1.RData", ## sep=".") ## mcf7.gr <- hub[[ctcf.regs]] ################################################### ### code chunk number 7: findOverlaps (eval = FALSE) ################################################### ## vcf <- readVcf(destination.file, "hg19") ## seqlevels(vcf) <- paste("chr", seqlevels(vcf), sep="") ## ov.mcf7 <- findOverlaps(vcf, mcf7.gr) ################################################### ### code chunk number 8: locateVariant (eval = FALSE) ################################################### ## library(TxDb.Hsapiens.UCSC.hg19.knownGene) ## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## print(locateVariants(vcf[6,], txdb, AllVariants()))