## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy = FALSE, cache = FALSE, dev = "png", message = FALSE, error = FALSE, warning = FALSE) ## ----------------------------------------------------------------------------- library(epistasisGA) data(case) data(dad) data(mom) ## ----------------------------------------------------------------------------- library(Matrix) block.ld.mat <- as.matrix(bdiag(list(matrix(rep(TRUE, 25^2), nrow = 25), matrix(rep(TRUE, 25^2), nrow = 25), matrix(rep(TRUE, 25^2), nrow = 25), matrix(rep(TRUE, 25^2), nrow = 25)))) ## ----------------------------------------------------------------------------- pp.list <- preprocess.genetic.data(case, father.genetic.data = dad, mother.genetic.data = mom, block.ld.mat = block.ld.mat) ## ---- eval=F------------------------------------------------------------------ # ### FOR ILLUSTRATION ONLY, DO NOT TRY TO RUN THIS CHUNK ### # pp.list <- preprocess.genetic.data(affected.sibling.genotypes, # complement.genetic.data = unaffected.sibling.genotypes, # block.ld.mat = block.ld.mat) # ## ----------------------------------------------------------------------------- complement <- mom + dad - case ## ---- eval = FALSE------------------------------------------------------------ # ### STILL JUST FOR ILLUSTRATION ### # combined.case <- rbind(case, affected.sibling.genotypes) # combined.complement <- rbind(complement, unaffected.sibling.genotypes) # pp.list <- preprocess.genetic.data(case.genetic.data = combined.case, # complement.genetic.data = combined.complement, # block.ld.mat = block.ld.mat) # ## ---- message = FALSE--------------------------------------------------------- run.gadgets(pp.list, n.chromosomes = 5, chromosome.size = 3, results.dir = "size3_res", cluster.type = "interactive", registryargs = list(file.dir = "size3_reg", seed = 1300), n.islands = 8, island.cluster.size = 4, n.migrations = 2) run.gadgets(pp.list, n.chromosomes = 5, chromosome.size = 4, results.dir = "size4_res", cluster.type = "interactive", registryargs = list(file.dir = "size4_reg", seed = 1400), n.islands = 8, island.cluster.size = 4, n.migrations = 2) ## ---- eval=F------------------------------------------------------------------ # ### FOR ILLUSTRATION ONLY, DO NOT TRY TO RUN THIS CHUNK ### # # library(BiocParallel) # fname <- batchtoolsTemplate("slurm") # run.gadgets(pp.list, n.chromosomes = 20, chromosome.size = 3, # results.dir = "size3_res", cluster.type = "slurm", # registryargs = list(file.dir = "size3_reg", seed = 1300), # cluster.template = fname, # resources = list(chunks.as.arrayjobs = TRUE), # n.islands = 12, # island.cluster.size = 4) # ## ----------------------------------------------------------------------------- data(snp.annotations) size3.combined.res <- combine.islands("size3_res", snp.annotations, pp.list, 2) size4.combined.res <- combine.islands("size4_res", snp.annotations, pp.list, 2) ## ----------------------------------------------------------------------------- library(magrittr) library(knitr) library(kableExtra) kable(head(size4.combined.res)) %>% kable_styling() %>% scroll_box(width = "750px") ## ----------------------------------------------------------------------------- set.seed(1400) perm.data.list <- permute.dataset(case, father.genetic.data = dad, mother.genetic.data = mom, n.permutations = 4) ## ----------------------------------------------------------------------------- preprocess.lists <- lapply(perm.data.list, function(permutation){ preprocess.genetic.data(permutation$case, complement.genetic.data = permutation$comp, block.ld.mat = block.ld.mat) }) ## ---- results = "hide"-------------------------------------------------------- #specify chromosome sizes chrom.sizes <- 3:4 #specify a different seed for each permutation seeds <- 4:7 #run GADGETS for each permutation and size lapply(chrom.sizes, function(chrom.size){ lapply(seq_along(preprocess.lists), function(permutation){ perm.data.list <- preprocess.lists[[permutation]] seed.val <- chrom.size*seeds[permutation] res.dir <- paste0("perm", permutation, "_size", chrom.size, "_res") reg.dir <- paste0("perm", permutation, "_size", chrom.size, "_reg") run.gadgets(perm.data.list, n.chromosomes = 5, chromosome.size = chrom.size, results.dir = res.dir, cluster.type = "interactive", registryargs = list(file.dir = reg.dir, seed = seed.val), n.islands = 8, island.cluster.size = 4, n.migrations = 2) }) }) #condense the results perm.res.list <- lapply(chrom.sizes, function(chrom.size){ lapply(seq_along(preprocess.lists), function(permutation){ perm.data.list <- preprocess.lists[[permutation]] res.dir <- paste0("perm", permutation, "_size", chrom.size, "_res") combine.islands(res.dir, snp.annotations, perm.data.list, 2) }) }) ## ----------------------------------------------------------------------------- # chromosome size 3 results # function requires a list of vectors of # permutation based fitness scores chrom3.perm.fs <- lapply(perm.res.list[[1]], function(x) x$fitness.score) chrom3.list <- list(observed.data = size3.combined.res$fitness.score, permutation.list = chrom3.perm.fs) # chromosome size 4 results chrom4.perm.fs <- lapply(perm.res.list[[2]], function(x) x$fitness.score) chrom4.list <- list(observed.data = size4.combined.res$fitness.score, permutation.list = chrom4.perm.fs) # list of results across chromosome sizes, with each list # element corresponding to a chromosome size final.results <- list(chrom3.list, chrom4.list) # run global test global.test.res <- global.test(final.results, n.top.scores = 5) # examine how many chromosomes were used for each chromosome size global.test.res$chrom.size.k # look at the global test stat and p-value global.test.res$obs.test.stat global.test.res$pval ## ---- echo = FALSE------------------------------------------------------------ library(ggplot2) plot.data <- data.frame(distance_type = c(rep("original", 4), rep("log", 4)), data = rep(rep("Permuted", 4), 2), distance = c(global.test.res$perm.test.stats, log(global.test.res$perm.test.stats))) obs.plot.data <- data.frame(distance_type = c("original", "log"), data = rep("Observed", 2), distance = c(global.test.res$obs.test.stat, log(global.test.res$obs.test.stat))) ggplot(plot.data, aes(x = factor(""), y = distance, color = data)) + geom_boxplot() + geom_point() + geom_point(data = obs.plot.data, aes(x = factor(""), y = distance, color = data)) + facet_wrap(distance_type ~ ., scales = "free_y", strip.position = "left", nrow = 2, labeller = as_labeller(c(original = "T", log = "log(T)"))) + ylab(NULL) + xlab(NULL) + theme(strip.background = element_blank(), strip.placement = "outside", axis.ticks.x = element_blank(), axis.text.x = element_blank()) + guides(color=guide_legend(title="Data Type")) ## ----------------------------------------------------------------------------- global.test.res$obs.marginal.test.stats global.test.res$marginal.pvals ## ----------------------------------------------------------------------------- global.test.res$max.obs.fitness global.test.res$max.order.pvals ## ---- fig.width=6------------------------------------------------------------- library(grid) grid.draw(global.test.res$boxplot.grob) ## ----------------------------------------------------------------------------- top.snps <- as.vector(t(size4.combined.res[1, 1:4])) set.seed(10) epi.test.res <- epistasis.test(top.snps, pp.list) epi.test.res$pval ## ----------------------------------------------------------------------------- # vector of 95th percentile of null fitness scores chrom.size.thresholds <- global.test.res$max.perm.95th.pctl # chromosome size 3 threshold d3.t <- chrom.size.thresholds[1] # chromosome size 4 threshold d4.t <- chrom.size.thresholds[2] # create results list obs.res.list <- list(size3.combined.res[size3.combined.res$fitness.score >= d3.t, ], size4.combined.res[size4.combined.res$fitness.score >= d4.t, ]) ## ----------------------------------------------------------------------------- obs.res.list.no.permutes <- list(size3.combined.res[1:5, ], size4.combined.res[1:5, ]) ## ----------------------------------------------------------------------------- set.seed(10) graphical.scores <- compute.graphical.scores(obs.res.list, pp.list) ## ---- fig.width = 14, fig.height = 12----------------------------------------- network.plot(graphical.scores, pp.list, graph.area = 200, node.size = 40, vertex.label.cex = 2) ## ---- fig.width = 14, fig.height = 12----------------------------------------- network.plot(graphical.scores, pp.list, n.top.scoring.pairs = 6, graph.area = 10, node.size = 40, vertex.label.cex = 2) ## ----------------------------------------------------------------------------- head(graphical.scores[["pair.scores"]]) head(graphical.scores[["snp.scores"]]) ## ---- fig.width = 14, fig.height = 12----------------------------------------- risk.numbers <- c(51, 52, 76, 77) graphical.scores[[1]]$SNP1.rsid <- ifelse(graphical.scores[[1]]$SNP1 %in% risk.numbers, "*", "") graphical.scores[[1]]$SNP2.rsid <- ifelse(graphical.scores[[1]]$SNP2 %in% risk.numbers, "*", "") graphical.scores[[2]]$rsid <- ifelse(graphical.scores[[2]]$SNP %in% risk.numbers, "*", "") network.plot(graphical.scores, pp.list, graph.area = 200, node.size = 40, vertex.label.cex = 10) ## ---- results="hide"---------------------------------------------------------- #remove all example directories perm.reg.dirs <- as.vector(outer(paste0("perm", 1:4), paste0("_size", chrom.sizes, "_reg"), paste0)) perm.res.dirs <- as.vector(outer(paste0("perm", 1:4), paste0("_size", chrom.sizes, "_res"), paste0)) lapply(c("size3_res", "size3_reg", "size4_res", "size4_reg", perm.reg.dirs, perm.res.dirs), unlink, recursive = TRUE) ## ----------------------------------------------------------------------------- #session information sessionInfo()