## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ---- echo=F------------------------------------------------------------------ #knitr::opts_chunk$set(echo = TRUE, fig.align = "left") ## ---- include = FALSE--------------------------------------------------------- show_matrix <- function(M, w, h){ M[1:h,1:w] %>% as.matrix %>% as.data.frame } size_matrix <- function(df){ paste("ncol:", ncol(df), " - nrow:", nrow(df)) } ## ---- error=F, message=F, results = "hide"------------------------------------ library(CIMICE) ## ---- error=F, message=F, results = "hide"------------------------------------ # Dataframe manipulation library(dplyr) # Plot display library(ggplot2) # Improved string operations library(glue) # Dataframe manipulation library(tidyr) # Graph data management library(igraph) # Remove transitive edges on a graph # library(relations) # Interactive graph visualization library(networkD3) # Interactive graph visualization library(visNetwork) # Correlation plot visualization library(ggcorrplot) # Functional R programming library(purrr) # Graph Visualization library(ggraph) # sparse matrices library(Matrix) ## ---- results = 'hide'-------------------------------------------------------- # Read input dataset in CAPRI/CAPRESE format dataset.big <- read_CAPRI(system.file("extdata", "example.CAPRI", package = "CIMICE", mustWork = TRUE)) ## ---- echo=F------------------------------------------------------------------ dataset.big %>% show_matrix(6,6) dataset.big %>% size_matrix() ## ----------------------------------------------------------------------------- # genes dataset <- make_dataset(A,B,C,D) %>% # samples update_df("S1", 0, 0, 0, 1) %>% update_df("S2", 1, 0, 0, 0) %>% update_df("S3", 1, 0, 0, 0) %>% update_df("S4", 1, 0, 0, 1) %>% update_df("S5", 1, 1, 0, 1) %>% update_df("S6", 1, 1, 0, 1) %>% update_df("S7", 1, 0, 1, 1) %>% update_df("S8", 1, 1, 0, 1) ## ---- echo=FALSE-------------------------------------------------------------- dataset ## ----------------------------------------------------------------------------- # path to MAF file read_MAF(system.file("extdata", "paac_jhu_2014_500.maf", package = "CIMICE", mustWork = TRUE))[1:5,1:5] ## ----------------------------------------------------------------------------- gene_mutations_hist(dataset.big) ## ----------------------------------------------------------------------------- sample_mutations_hist(dataset.big, binwidth = 10) ## ---- eval = FALSE------------------------------------------------------------ # select_genes_on_mutations(dataset.big, 100) ## ---- echo = FALSE------------------------------------------------------------ temp <- select_genes_on_mutations(dataset.big, 100) temp %>% show_matrix(6,6) temp %>% size_matrix() ## ---- eval = FALSE------------------------------------------------------------ # select_samples_on_mutations(dataset.big, 100, desc = FALSE) ## ---- echo=FALSE-------------------------------------------------------------- temp <- select_samples_on_mutations(dataset.big, 100, desc = FALSE) temp %>% show_matrix(6,6) temp %>% size_matrix() ## ---- eval = FALSE------------------------------------------------------------ # select_samples_on_mutations(dataset.big , 100, desc = FALSE) %>% select_genes_on_mutations(100) ## ---- echo=FALSE-------------------------------------------------------------- temp <- select_samples_on_mutations(dataset.big , 100, desc = FALSE) %>% select_genes_on_mutations(100) temp %>% show_matrix(6,6) temp %>% size_matrix() ## ----------------------------------------------------------------------------- corrplot_genes(dataset) ## ----------------------------------------------------------------------------- corrplot_samples(dataset) ## ----------------------------------------------------------------------------- # groups and counts equal genotypes compactedDataset <- compact_dataset(dataset) ## ---- echo = FALSE------------------------------------------------------------ compactedDataset ## ----------------------------------------------------------------------------- samples <- compactedDataset$matrix ## ----echo=F------------------------------------------------------------------- samples ## ----------------------------------------------------------------------------- genes <- colnames(samples) ## ----echo=F------------------------------------------------------------------- genes ## ----------------------------------------------------------------------------- freqs <- compactedDataset$counts/sum(compactedDataset$counts) ## ---- echo = FALSE------------------------------------------------------------ freqs ## ----------------------------------------------------------------------------- # prepare node labels listing the mutated genes for each node labels <- prepare_labels(samples, genes) if( is.null(compactedDataset$row_names) ){ compactedDataset$row_names <- rownames(compactedDataset$matrix) } matching_samples <- compactedDataset$row_names # fix Colonal genotype absence, if needed fix <- fix_clonal_genotype(samples, freqs, labels, matching_samples) samples = fix[["samples"]] freqs = fix[["freqs"]] labels = fix[["labels"]] matching_samples <- fix[["matching_samples"]] ## ----echo=F------------------------------------------------------------------- samples ## ----------------------------------------------------------------------------- # compute edges based on subset relation edges <- build_topology_subset(samples) ## ----------------------------------------------------------------------------- # remove transitive edges and prepare igraph object g <- build_subset_graph(edges, labels) ## ---- echo=F, out.height="300px",dpi=300-------------------------------------- V(g)$vertex.size <- rep(10, length(V(g))) plot(g, vertex.size=rep(55, length(V(g)))) ## ----------------------------------------------------------------------------- A <- as_adj(g) ## ---- echo=F------------------------------------------------------------------ A ## ----------------------------------------------------------------------------- no.of.children <- get_no_of_children(A,g) ## ---- echo=F------------------------------------------------------------------ no.of.children ## ----------------------------------------------------------------------------- upWeights <- computeUPW(g, freqs, no.of.children, A) ## ---- echo=F------------------------------------------------------------------ upWeights ## ----------------------------------------------------------------------------- normUpWeights <- normalizeUPW(g, freqs, no.of.children, A, upWeights) ## ---- echo=F------------------------------------------------------------------ normUpWeights ## ----------------------------------------------------------------------------- downWeights <- computeDWNW(g, freqs, no.of.children, A, normUpWeights) ## ---- echo=F------------------------------------------------------------------ downWeights ## ----------------------------------------------------------------------------- normDownWeights <- normalizeDWNW(g, freqs, no.of.children, A, downWeights) ## ---- echo=F------------------------------------------------------------------ normDownWeights ## ----------------------------------------------------------------------------- draw_ggraph(quick_run(example_dataset())) ## ---- results = 'hide'-------------------------------------------------------- draw_networkD3(quick_run(example_dataset())) ## ---- echo=FALSE, out.width = "100%", out.height="100%"----------------------- knitr::include_graphics("networkD3.png") ## ---- results = 'hide'-------------------------------------------------------- draw_visNetwork(quick_run(example_dataset())) ## ---- echo=FALSE, out.width = "100%", out.height="100%"----------------------- knitr::include_graphics("visGraph.png") ## ----------------------------------------------------------------------------- cat(to_dot(quick_run(example_dataset()))) ## ----------------------------------------------------------------------------- sessionInfo() ## ---- echo=FALSE-------------------------------------------------------------- # run ALL