## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ---- eval = FALSE------------------------------------------------------------ # if (!("devtools" %in% rownames(installed.packages()))) # install.packages("devtools") # # library(devtools) # devtools::install_github("SydneyBioX/scReClassify") ## ---- eval = FALSE------------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("scReClassify") ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(scReClassify) library(DT) library(mclust) library(dplyr) library(SummarizedExperiment) library(SingleCellExperiment) }) data("gse87795_subset_sce") dat <- gse87795_subset_sce cellTypes <- gse87795_subset_sce$cellTypes ## ----------------------------------------------------------------------------- # Cell types table(cellTypes) # We set the number of clusters nCs <- length(table(cellTypes)) nCs # This demo dataset is already pre-processed dim(dat) ## ----------------------------------------------------------------------------- reducedDim(dat, "matPCs") <- matPCs(dat, assay = "logNorm", 0.7) ## ----------------------------------------------------------------------------- lab <- cellTypes set.seed(1) # Function to create noise in the cell type label noisyCls <- function(dat, rho, cls.truth){ cls.noisy <- cls.truth names(cls.noisy) <- colnames(dat) for(i in seq_len(length(table(cls.noisy)))) { # class label starts from 0 if (i != length(table(cls.noisy))) { cls.noisy[sample(which(cls.truth == names(table(cls.noisy))[i]), floor(sum(cls.truth == names(table(cls.noisy))[i])* rho))] <- names(table(cls.noisy))[i+1] } else { cls.noisy[sample(which(cls.truth == names(table(cls.noisy))[i]), floor(sum(cls.truth == names(table(cls.noisy))[i])* rho))] <- names(table(cls.noisy))[1] } } print(sum(cls.truth != cls.noisy)) return(cls.noisy) } cls.noisy01 <- noisyCls(t(reducedDim(dat, "matPCs")), rho=0.1, lab) cls.noisy02 <- noisyCls(t(reducedDim(dat, "matPCs")), rho=0.2, lab) cls.noisy03 <- noisyCls(t(reducedDim(dat, "matPCs")), rho=0.3, lab) cls.noisy04 <- noisyCls(t(reducedDim(dat, "matPCs")), rho=0.4, lab) cls.noisy05 <- noisyCls(t(reducedDim(dat, "matPCs")), rho=0.5, lab) ## ----------------------------------------------------------------------------- ################################### # SVM ################################### base <- "svm" set.seed(1) result = lapply(seq_len(10), function(j) { final <- multiAdaSampling(dat, cls.noisy01, reducedDimName = "matPCs", classifier=base, percent=1, L=10)$final ari01 <- mclust::adjustedRandIndex(lab, final) acc01 <- bAccuracy(lab, final) final <- multiAdaSampling(dat, cls.noisy02, reducedDimName = "matPCs", classifier=base, percent=1, L=10)$final ari02 <- mclust::adjustedRandIndex(lab, final) acc02 <- bAccuracy(lab, final) final <- multiAdaSampling(dat, cls.noisy03, reducedDimName = "matPCs", classifier=base, percent=1, L=10)$final ari03 <- mclust::adjustedRandIndex(lab, final) acc03 <- bAccuracy(lab, final) final <- multiAdaSampling(dat, cls.noisy04, reducedDimName = "matPCs", classifier=base, percent=1, L=10)$final ari04 <- mclust::adjustedRandIndex(lab, final) acc04 <- bAccuracy(lab, final) final <- multiAdaSampling(dat, cls.noisy05, reducedDimName = "matPCs", classifier=base, percent=1, L=10)$final ari05 <- mclust::adjustedRandIndex(lab, final) acc05 <- bAccuracy(lab, final) c( acc01 = acc01, acc02 = acc02, acc03 = acc03, acc04 = acc04, acc05 = acc05, ari01 = ari01, ari02 = ari02, ari03 = ari03, ari04 = ari04, ari05 = ari05 ) }) result = do.call(rbind, result) acc = result[,seq_len(5)] colnames(acc) = seq(from=0.1,to=0.5,by=0.1) ari = result[,seq(from= 6, to = 10)] colnames(ari) = seq(from=0.1,to=0.5,by=0.1) ## ----------------------------------------------------------------------------- plot.new() par(mfrow = c(1,2)) boxplot(acc, col="lightblue", main="SVM Accuracy", ylim=c(0.45, 1), xlab = "rho", ylab = "Accuracy") points(x=seq_len(5), y=c( bAccuracy(lab, cls.noisy01), bAccuracy(lab, cls.noisy02), bAccuracy(lab, cls.noisy03), bAccuracy(lab, cls.noisy04), bAccuracy(lab, cls.noisy05)), col="red3", pch=c(2,3,4,5,6), cex=1) boxplot(ari, col="lightblue", main="SVM ARI", ylim=c(0.25, 1), xlab = "rho", ylab = "ARI") points(x=seq_len(5), y=c( mclust::adjustedRandIndex(lab, cls.noisy01), mclust::adjustedRandIndex(lab, cls.noisy02), mclust::adjustedRandIndex(lab, cls.noisy03), mclust::adjustedRandIndex(lab, cls.noisy04), mclust::adjustedRandIndex(lab, cls.noisy05)), col="red3", pch=c(2,3,4,5,6), cex=1) ## ----------------------------------------------------------------------------- # PCA procedure reducedDim(dat, "matPCs") <- matPCs(dat, assay = "logNorm", 0.7) # run scReClassify set.seed(1) cellTypes.reclassify <- multiAdaSampling(dat, cellTypes, reducedDimName = "matPCs", classifier = "svm", percent = 1, L = 10) # Verification by marker genes End <- c("ITGA2B", "ITGB3") ## ----------------------------------------------------------------------------- # check examples idx <- which(cellTypes.reclassify$final != cellTypes) cbind(original=cellTypes[idx], reclassify=cellTypes.reclassify$final[idx]) %>% DT::datatable() ## ----------------------------------------------------------------------------- mat <- assay(dat, "logNorm") c1 <- mat[, which(cellTypes=="Endothelial Cell")] c2 <- mat[, which(cellTypes=="Erythrocyte")] c3 <- mat[, which(cellTypes=="Hepatoblast")] c4 <- mat[, which(cellTypes=="Macrophage")] c5 <- mat[, which(cellTypes=="Megakaryocyte")] c6 <- mat[, which(cellTypes=="Mesenchymal Cell")] cs <- rainbow(length(table(cellTypes))) # (example 1 E13.5_C14) ##### par(mfrow=c(1,2)) marker <- End[1] boxplot(c1[marker,], c2[marker,], c3[marker,], c4[marker,], c5[marker,], c6[marker,], col=cs, main=marker, names=c("Others", "Others", "Others", "Orignal", "Reclassified", "Others"), las=2, xlab = "Labels", ylab = "log2FPKM") points(5, mat[marker, which(colnames(mat) %in% "E13.5_C14")], pch=16, col="red", cex=2) marker <- End[2] boxplot(c1[marker,], c2[marker,], c3[marker,], c4[marker,], c5[marker,], c6[marker,], col=cs, main=marker, names=c("Others", "Others", "Others", "Orignal", "Reclassified", "Others"), las=2, xlab = "Labels", ylab = "log2FPKM") points(5, mat[marker, which(colnames(mat) %in% "E13.5_C14")], pch=16, col="red", cex=2) ## ----------------------------------------------------------------------------- sessionInfo()