## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, # fig.align = "center", comment = ">" ) ## ---- eval = FALSE------------------------------------------------------------ # # install.packages("BiocManager") # BiocManager::install("POMA") ## ---- warning = FALSE, message = FALSE, comment = FALSE----------------------- library(POMA) ## ---- eval = FALSE------------------------------------------------------------ # data("st000336") # PomaEDA(st000336) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ library(patchwork) library(knitr) library(dplyr) library(tibble) library(ggplot2) library(reshape2) library(Biobase) e <- t(Biobase::exprs(st000336)) target <- Biobase::pData(st000336) %>% rownames_to_column("ID") %>% rename(Group = 2) %>% select(ID, Group) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ imputed <- PomaImpute(st000336, method = "knn") pre_processed <- PomaNorm(imputed, method = "log_pareto") %>% PomaOutliers(coef = 1.5) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ # zeros zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>% rownames_to_column("names") %>% filter(number != 0) all_zero <- zeros %>% filter(number == nrow(e)) # missing values nas <- data.frame(number = colSums(is.na(e))) %>% rownames_to_column("names") %>% filter(number != 0) # zero variance var_zero <- e %>% as.data.frame() %>% summarise_all(~ var(., na.rm = TRUE)) %>% t() %>% as.data.frame() %>% rownames_to_column("names") %>% filter(V1 == 0) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ summary_table1 <- data.frame(Samples = nrow(e), Features = ncol(e), Covariates = ncol(Biobase::pData(st000336)) - 1) summary_table2 <- data.frame(Counts_Zero = sum(zeros$number), Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%")) summary_table3 <- data.frame(Counts_NA = sum(is.na(e)), Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) knitr::kable(summary_table1) knitr::kable(summary_table2) knitr::kable(summary_table3) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if (nrow(nas) >= 1){ ggplot(nas, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Missing values") + xlab("") + ggtitle("Missing Value Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if (nrow(zeros) >= 1){ ggplot(zeros, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Zeros") + xlab("") + ggtitle("Zeros Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ counts <- data.frame(table(target$Group)) colnames(counts) <- c("Group", "Counts") ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) + geom_col() + ylab("Counts") + xlab("") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ indNum <- nrow(Biobase::pData(pre_processed)) jttr <- ifelse(indNum <= 10, TRUE, FALSE) p1 <- PomaBoxplots(imputed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle("Not Normalized") + theme(legend.position = "bottom") p2 <- PomaBoxplots(pre_processed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle("Normalized ('log_pareto')") + theme(legend.position = "bottom") p1 + p2 ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ p3 <- PomaDensity(imputed) + ggtitle("Not Normalized") p4 <- PomaDensity(pre_processed) + ggtitle("Normalized ('log_pareto')") p3/p4 ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ outliers <- st000336 %>% PomaImpute(method = "knn") %>% PomaNorm(method = "log_pareto") %>% PomaOutliers(do = "analyze", coef = 1.5) outliers$polygon_plot ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if(nrow(outliers$outliers) >= 1){ knitr::kable(outliers$outliers) } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ correlations <- PomaCorr(pre_processed) high_correlations <- correlations$correlations %>% filter(abs(corr) > 0.97) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ PomaHeatmap(pre_processed) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ PomaMultivariate(pre_processed, method = "pca")$scoresplot