## ----echo=FALSE--------------------------------------------------------------------------------------------- options(width=110) ## ----message=FALSE------------------------------------------------------------------------------------------ library(SeqArray) library(SAIGEgds) # the genotype file in SeqArray GDS format (1000 Genomes Phase1, chromosome 22) (geno_fn <- seqExampleFileName("KG_Phase1")) ## ----------------------------------------------------------------------------------------------------------- # open a SeqArray file in the package (1000 Genomes Phase1, chromosome 22) gds <- seqOpen(geno_fn) ## ----------------------------------------------------------------------------------------------------------- library(SNPRelate) set.seed(1000) snpset <- snpgdsLDpruning(gds) str(snpset) snpset.id <- unlist(snpset, use.names=FALSE) # get the variant IDs of a LD-pruned set head(snpset.id) ## ----------------------------------------------------------------------------------------------------------- grm_fn <- "grm_geno.gds" seqSetFilter(gds, variant.id=snpset.id) # export to a GDS genotype file without annotation data seqExport(gds, grm_fn, info.var=character(), fmt.var=character(), samp.var=character()) ## ----------------------------------------------------------------------------------------------------------- # close the file seqClose(gds) ## ----------------------------------------------------------------------------------------------------------- set.seed(1000) sampid <- seqGetData(grm_fn, "sample.id") # sample IDs in the genotype file pheno <- data.frame( sample.id = sampid, y = sample(c(0, 1), length(sampid), replace=TRUE, prob=c(0.95, 0.05)), x1 = rnorm(length(sampid)), x2 = rnorm(length(sampid)), stringsAsFactors = FALSE) head(pheno) # null model fitting using GRM from grm_fn grm_fn glmm <- seqFitNullGLMM_SPA(y ~ x1 + x2, pheno, grm_fn, trait.type="binary", sample.col="sample.id") ## ----------------------------------------------------------------------------------------------------------- # genetic variants stored in the file geno_fn geno_fn # calculate, using 2 processes assoc <- seqAssocGLMM_SPA(geno_fn, glmm, mac=10, parallel=2) head(assoc) # filtering based on p-value assoc[assoc$pval < 5e-4, ] ## ----------------------------------------------------------------------------------------------------------- # save to 'assoc.gds' seqAssocGLMM_SPA(geno_fn, glmm, mac=10, parallel=2, res.savefn="assoc.gds") ## ----------------------------------------------------------------------------------------------------------- # open the GDS file (f <- openfn.gds("assoc.gds")) # get p-values pval <- read.gdsn(index.gdsn(f, "pval")) summary(pval) closefn.gds(f) ## ----------------------------------------------------------------------------------------------------------- res <- seqSAIGE_LoadPval("assoc.gds") head(res) ## ----fig.width=4, fig.height=4, fig.align='center'---------------------------------------------------------- n <- nrow(assoc) # expected p-values exp.pval <- (1:n) / n # observed p-values obs.pval <- sort(assoc$pval) plot(-log10(exp.pval), -log10(obs.pval), xlab="-log10(expected P)", ylab="-log10(observed P)", cex=2/3) abline(0, 1, col="red", lty=2) ## ----------------------------------------------------------------------------------------------------------- sessionInfo() ## ----echo=FALSE--------------------------------------------------------------------------------------------- unlink(c("grm_geno.gds", "assoc.gds"), force=TRUE)