## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(DuoClustering2018) }) scename <- "sce_filteredExpr10_Koh" sce <- sce_filteredExpr10_Koh() method <- "PCAHC" ## ----------------------------------------------------------------------------- ## Load parameter files. General dataset and method parameters as well as ## dataset/method-specific parameters params <- duo_clustering_all_parameter_settings_v2()[[paste0(scename, "_", method)]] params ## ---- eval=FALSE-------------------------------------------------------------- # ## Set number of times to run clustering for each k # n_rep <- 5 # # ## Run clustering # set.seed(1234) # L <- lapply(seq_len(n_rep), function(i) { ## For each run # cat(paste0("run = ", i, "\n")) # if (method == "Seurat") { # tmp <- lapply(params$range_resolutions, function(resolution) { # ## For each resolution # cat(paste0("resolution = ", resolution, "\n")) # ## Run clustering # res <- get(paste0("apply_", method))(sce = sce, params = params, # resolution = resolution) # # ## Put output in data frame # df <- data.frame(dataset = scename, # method = method, # cell = names(res$cluster), # run = i, # k = length(unique(res$cluster)), # resolution = resolution, # cluster = res$cluster, # stringsAsFactors = FALSE, row.names = NULL) # tm <- data.frame(dataset = scename, # method = method, # run = i, # k = length(unique(res$cluster)), # resolution = resolution, # user.self = res$st[["user.self"]], # sys.self = res$st[["sys.self"]], # user.child = res$st[["user.child"]], # sys.child = res$st[["sys.child"]], # elapsed = res$st[["elapsed"]], # stringsAsFactors = FALSE, row.names = NULL) # kest <- data.frame(dataset = scename, # method = method, # run = i, # k = length(unique(res$cluster)), # resolution = resolution, # est_k = res$est_k, # stringsAsFactors = FALSE, row.names = NULL) # list(clusters = df, timing = tm, kest = kest) # }) ## End for each resolution # } else { # tmp <- lapply(params$range_clusters, function(k) { ## For each k # cat(paste0("k = ", k, "\n")) # ## Run clustering # res <- get(paste0("apply_", method))(sce = sce, params = params, k = k) # # ## Put output in data frame # df <- data.frame(dataset = scename, # method = method, # cell = names(res$cluster), # run = i, # k = k, # resolution = NA, # cluster = res$cluster, # stringsAsFactors = FALSE, row.names = NULL) # tm <- data.frame(dataset = scename, # method = method, # run = i, # k = k, # resolution = NA, # user.self = res$st[["user.self"]], # sys.self = res$st[["sys.self"]], # user.child = res$st[["user.child"]], # sys.child = res$st[["sys.child"]], # elapsed = res$st[["elapsed"]], # stringsAsFactors = FALSE, row.names = NULL) # kest <- data.frame(dataset = scename, # method = method, # run = i, # k = k, # resolution = NA, # est_k = res$est_k, # stringsAsFactors = FALSE, row.names = NULL) # list(clusters = df, timing = tm, kest = kest) # }) ## End for each k # } # # ## Summarize across different values of k # assignments <- do.call(rbind, lapply(tmp, function(w) w$clusters)) # timings <- do.call(rbind, lapply(tmp, function(w) w$timing)) # k_estimates <- do.call(rbind, lapply(tmp, function(w) w$kest)) # list(assignments = assignments, timings = timings, k_estimates = k_estimates) # }) ## End for each run # # ## Summarize across different runs # assignments <- do.call(rbind, lapply(L, function(w) w$assignments)) # timings <- do.call(rbind, lapply(L, function(w) w$timings)) # k_estimates <- do.call(rbind, lapply(L, function(w) w$k_estimates)) # # ## Add true group for each cell # truth <- data.frame(cell = as.character(rownames(colData(sce))), # trueclass = as.character(colData(sce)$phenoid), # stringsAsFactors = FALSE) # assignments$trueclass <- truth$trueclass[match(assignments$cell, truth$cell)] # # ## Combine results # res <- list(assignments = assignments, timings = timings, # k_estimates = k_estimates) # # df <- dplyr::full_join(res$assignments %>% # dplyr::select(dataset, method, cell, run, k, # resolution, cluster, trueclass), # res$k_estimates %>% # dplyr::select(dataset, method, run, k, # resolution, est_k) # ) %>% dplyr::full_join(res$timings %>% dplyr::select(dataset, method, run, k, # resolution, elapsed)) ## ----------------------------------------------------------------------------- sessionInfo()