## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"-------------------- BiocStyle::latex(relative.path = TRUE) ## ----knitr, echo=FALSE, results="hide"------------------------------------- library("knitr") opts_chunk$set( tidy=FALSE, dev="png", fig.show="hide", # fig.width=4, fig.height=4.5, fig.width=10, fig.height=8, fig.pos="tbh", cache=TRUE, message=FALSE) ## ----chu_citation, dev="pdf"----------------------------------------------- citation("DaMiRseq") ## ----chu_1----------------------------------------------------------------- library(DaMiRseq) ## only for example: # rawdata.path <- system.file(package = "DaMiRseq","extdata") # setwd(rawdata.path) # filecounts <- list.files(rawdata.path, full.names = TRUE)[2] # filecovariates <- list.files(rawdata.path, full.names = TRUE)[1] # count_data <- read.delim(filecounts) # covariate_data <- read.delim(filecovariates) # SE<-DaMiR.makeSE(count_data, covariate_data) ## ----chu_2----------------------------------------------------------------- data(SE) assay(SE)[1:5, c(1:5, 21:25)] colData(SE) ## ----chu_4----------------------------------------------------------------- data_norm <- DaMiR.normalization(SE, minCounts=10, fSample=0.7, hyper = "no") ## ----chu_5----------------------------------------------------------------- data_norm <- DaMiR.normalization(SE, minCounts=10, fSample=0.7, hyper = "yes", th.cv=3) print(data_norm) assay(data_norm)[c(1:5), c(1:5, 21:25)] ## ----chu_6, eval=FALSE----------------------------------------------------- # # Time Difference, using VST or rlog for normalization: # # # #data_norm <- DaMiR.normalization(SE, minCounts=10, fSample=0.7, th.cv=3) # # VST: about 80 seconds # # # #data_norm <- DaMiR.normalization(SE, minCounts=10, fSample=0.7, th.cv=3, # # type="rlog") # # rlog: about 8890 seconds (i.e. 2 hours and 28 minutes!) ## ----chu_7----------------------------------------------------------------- data_filt <- DaMiR.sampleFilt(data_norm, th.corr=0.9) dim(data_filt) ## ----chu_8, dev="pdf"------------------------------------------------------ sv <- DaMiR.SV(data_filt) ## ----chu_9, dev="pdf"------------------------------------------------------ DaMiR.corrplot(sv, colData(data_filt), sig.level = 0.01) ## ----chu_10, dev="pdf"----------------------------------------------------- data_adjust<-DaMiR.SVadjust(data_filt, sv, n.sv=4) assay(data_adjust[c(1:5), c(1:5, 21:25)]) ## ----chu_11, dev="pdf"----------------------------------------------------- # After gene filtering and normalization DaMiR.Allplot(data_filt, colData(data_filt)) ## ----chu_12, dev="pdf"----------------------------------------------------- # After sample filtering and sv adjusting DaMiR.Allplot(data_adjust, colData(data_adjust)) ## ----chu_13---------------------------------------------------------------- set.seed(12345) data_clean<-DaMiR.transpose(assay(data_adjust)) df<-colData(data_adjust) data_reduced <- DaMiR.FSelect(data_clean, df, th.corr=0.4) ## ----chu_14, dev="pdf"----------------------------------------------------- data_reduced <- DaMiR.FReduct(data_reduced$data) DaMiR.MDSplot(data_reduced, df) ## ----chu_15, dev="pdf"----------------------------------------------------- # Rank genes by importance: df.importance <- DaMiR.FSort(data_reduced, df) head(df.importance) ## ----chu_16, dev="pdf"----------------------------------------------------- # Select Best Predictors: selected_features <- DaMiR.FBest(data_reduced, ranking=df.importance, n.pred = 5) selected_features$predictors # Dendrogram and heatmap: DaMiR.Clustplot(selected_features$data, df) ## ----chu_17, dev="pdf"----------------------------------------------------- Classification_res <- DaMiR.EnsembleLearning(selected_features$data, classes=df$class, fSample.tr = 0.5, fSample.tr.w = 0.5, iter = 30) ## ----chu_export, dev="pdf", eval=FALSE------------------------------------- # outputfile <- "DataNormalized.txt" # write.table(data_norm, file = outputfile_norm, quote = FALSE, sep = "\t") ## ----chu_18, dev="pdf"----------------------------------------------------- ## Feature Selection set.seed(12345) data_clean_2<-DaMiR.transpose(assay(data_filt)) df_2<-colData(data_filt) data_reduced_2 <- DaMiR.FSelect(data_clean_2, df_2, th.corr=0.4) data_reduced_2 <- DaMiR.FReduct(data_reduced_2$data) df.importance_2 <- DaMiR.FSort(data_reduced_2, df_2) head(df.importance_2) selected_features_2 <- DaMiR.FBest(data_reduced_2, ranking=df.importance_2, n.pred=5) selected_features_2$predictors ## Classification Classification_res_2 <- DaMiR.EnsembleLearning(selected_features_2$data, classes=df_2$class, fSample.tr = 0.5, fSample.tr.w = 0.5, iter = 30) ## ----chu_ver_16, dev="pdf", eval=FALSE------------------------------------- # # # Training Set # trainx <- selected_features$data[c(1:10,21:30),] # trainy <- df$class[c(1:10,21:30)] # # # Test Set # testx <- selected_features$data[c(11:15,31:35),] # testy <- df$class[c(11:15,31:35)] # # # New predidictions # predx <- selected_features$data[c(16:20,36:40),] # predy <- df$class[c(16:20,36:40)] # # # Training and Validating Classifiers # DaMiR.Ens.Tr_out <- DaMiR.EnsembleLearning2cl_Training(data = trainx, # classes = trainy, # fSample.tr.w = 0.8, # cl_type = c("RF","LR"), # type_model = "median", # iter = 100) # # # Testing Classifiers # DaMiR.Ens.Ts_out <- DaMiR.EnsembleLearning2cl_Test(data = testx, # classes = testy, # models_List = DaMiR.Ens.Tr_out) # # New predictions # DaMiR.Ens.Pred <- DaMiR.EnsembleLearning2cl_Predict(data = predx, # models_List = DaMiR.Ens.Tr_out) # ## ----sessInfo, results="asis", echo=FALSE---------------------------------- toLatex(sessionInfo())