## ----------------------------------------------------------------------------- library(ExperimentHub) eh <- ExperimentHub() myfiles <- query(eh, "HarmonizedTCGAData") Wall <- myfiles[[1]] project_ids <- myfiles[[2]] surv.plot <- myfiles[[3]] ## ----------------------------------------------------------------------------- names(Wall) ## ----------------------------------------------------------------------------- names(Wall[[1]][[1]]) #Note: "fpkm" refers to gene expression measurement, which can be HTSeq-Counts, transformed HTSeq-Counts (log2 transformation or variance-stabilizing transformation), and FPKM values. Sorry for the confusing name. ## ----------------------------------------------------------------------------- names(Wall[[1]]) ## ----------------------------------------------------------------------------- library(ANF) affinity.mat <- Wall[["adrenal_gland"]][["log.sel"]][["fpkm"]] labels <- spectral_clustering(affinity.mat, k = 2) ## ----------------------------------------------------------------------------- true.disease.types <- as.factor(project_ids[rownames(affinity.mat)]) print(table(labels, true.disease.types)) nmi <- igraph::compare(true.disease.types, labels, method = "nmi") adjusted.rand = igraph::compare(true.disease.types, labels, method = "adjusted.rand") # we can also calculate p-value using `surv.plot` data surv.plot <- surv.plot[rownames(affinity.mat), ] f <- survival::Surv(surv.plot$time, !surv.plot$censored) fit <- survival::survdiff(f ~ labels) pval <- stats::pchisq(fit$chisq, df = length(fit$n) - 1, lower.tail = FALSE) print(paste("NMI =", nmi, ", ARI =", adjusted.rand, ", p-val =", pval)) ## ----------------------------------------------------------------------------- res <- eval_clu(project_ids, w = affinity.mat, surv = surv.plot) res$labels ## ----------------------------------------------------------------------------- res <- eval_clu(project_ids, w = Wall$uterus$raw.all$fpkm) ## ----------------------------------------------------------------------------- # fuse three matrices: "fpkm" (gene expression), "mirnas" (miRNA expression) and "methy450" (DNA methylation) fused.mat <- ANF(Wall = Wall$uterus$raw.all) # Spectral clustering on fused patient affinity matrix labels <- spectral_clustering(A = fused.mat, k = 2) # Or we can directly evaluate clustering results using function `eval_clu`, which calls `spectral_clustering` and calculate NMI and ARI (and p-value if patient survival data is available. `surv.plot` does not contain information for uterus cancer patients) res <- eval_clu(true_class = project_ids[rownames(fused.mat)], w = fused.mat)