## ----GlobalOptions, results="hide", include=FALSE, cache=FALSE----------- knitr::opts_chunk$set(fig.align="center", cache=FALSE, cache.path = "clusterExperimentTutorial_cache/", fig.path="clusterExperimentTutorial_figure/",error=FALSE, #make it stop on error fig.width=6,fig.height=6,autodep=TRUE,out.width="600px",out.height="600px", message=FALSE) #knitr::opts_knit$set(stop_on_error = 2L) #really make it stop #knitr::dep_auto() ## ----cache=FALSE--------------------------------------------------------- set.seed(14456) ## for reproducibility, just in case library(scRNAseq) data("fluidigm") ## ------------------------------------------------------------------------ assay(fluidigm)[1:5,1:10] colData(fluidigm)[,1:5] NCOL(fluidigm) #number of samples ## ----filter_high--------------------------------------------------------- se <- fluidigm[,colData(fluidigm)[,"Coverage_Type"]=="High"] ## ----filter-------------------------------------------------------------- wh_zero <- which(rowSums(assay(se))==0) pass_filter <- apply(assay(se), 1, function(x) length(x[x >= 10]) >= 10) se <- se[pass_filter,] dim(se) ## ----normalization------------------------------------------------------- fq <- round(limma::normalizeQuantiles(assay(se))) assays(se) <- list(normalized_counts=fq) ## ----clusterMany--------------------------------------------------------- options(getClass.msg=FALSE) #get rid of annoying messages about cache until fixed internally in R library(clusterExperiment) ce<-clusterMany(se, clusterFunction="pam",ks=5:10, isCount=TRUE,dimReduce=c("PCA","var"),nVarDims=c(100,500,1000), nPCADims=c(5,15,50),run=TRUE) ## ----plotClusterEx1------------------------------------------------------ defaultMar<-par("mar") plotCMar<-c(1.1,8.1,4.1,1.1) par(mar=plotCMar) plotClusters(ce,main="Clusters from clusterMany", whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), axisLine=-1) ## ----plotCluster_newLabels----------------------------------------------- cl<-clusterLabels(ce) cl<-gsub("Features","",cl) clusterLabels(ce)<-cl ## ----plotCluster_newOrder------------------------------------------------ cl<-clusterLabels(ce) ndims<-sapply(sapply(strsplit(cl,","),function(x){strsplit(x[1],"=")}),function(x){x[2]}) ord<-order(as.numeric(ndims)) par(mar=plotCMar) plotClusters(ce,main="Clusters from clusterMany", whichClusters=ord, sampleData=c("Biological_Condition","Cluster2"), axisLine=-1) ## ----clusterMatrix------------------------------------------------------- head(clusterMatrix(ce)[,1:3]) ## ------------------------------------------------------------------------ ce<-combineMany(ce) ## ----lookAtCombineMany--------------------------------------------------- head(clusterMatrix(ce)[,1:3]) par(mar=plotCMar) plotClusters(ce,whichClusters="workflow") ## ----combineMany_changeLabel--------------------------------------------- wh<-which(clusterLabels(ce)=="combineMany") if(length(wh)!=1) stop() else clusterLabels(ce)[wh]<-"combineMany,default" ## ----combineMany_newCombineMany------------------------------------------ ce<-combineMany(ce,proportion=0.7,clusterLabel="combineMany,0.7") par(mar=plotCMar) plotClusters(ce,whichClusters="workflow") ## ----combineMany_changeMinSize------------------------------------------- ce<-combineMany(ce,proportion=0.7,minSize=3,clusterLabel="combineMany,final") par(mar=plotCMar) plotClusters(ce,whichClusters="workflow",main="Min. Size=3") ## ----plotCoClustering_quickstart----------------------------------------- plotCoClustering(ce) ## ----makeDendrogram------------------------------------------------------ ce<-makeDendrogram(ce,dimReduce="var",ndims=500) plotDendrogram(ce) ## ----makeDendrogram_show------------------------------------------------- ce ## ----mergeClustersPlot--------------------------------------------------- mergeClusters(ce,mergeMethod="adjP",plot="mergeMethod") ## ----mergeClusters------------------------------------------------------- ce<-mergeClusters(ce,mergeMethod="adjP",cutoff=0.01) par(mar=plotCMar) plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2")) plotCoClustering(ce,whichClusters=c("mergeClusters","combineMany"), sampleData=c("Biological_Condition","Cluster2"),annLegend=FALSE) ## ----plotHeatmap--------------------------------------------------------- plotHeatmap(ce,clusterSamplesData="dendrogramValue",breaks=.99, sampleData=c("Biological_Condition", "Cluster1", "Cluster2")) ## ----getBestFeatures----------------------------------------------------- pairsAll<-getBestFeatures(ce,contrastType="Pairs",p.value=0.05, number=nrow(ce),isCount=TRUE) head(pairsAll) ## ----getBestFeatures_size------------------------------------------------ length(pairsAll$Feature)==length(unique(pairsAll$Feature)) ## ----getBestFeatures_heatmap--------------------------------------------- plotHeatmap(ce, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(pairsAll[,"IndexInOriginal"]), main="Heatmap of features w/ significant pairwise differences", breaks=.99) ## ----show---------------------------------------------------------------- ce ## ----CEHelperCommands1--------------------------------------------------- head(clusterMatrix(ce))[,1:5] primaryCluster(ce) ## ----CEHelperCommands2--------------------------------------------------- head(clusterLabels(ce),10) ## ----CEHelperCommands3--------------------------------------------------- head(clusterTypes(ce),10) ## ----SECommandsOnCE------------------------------------------------------ colData(ce)[,1:5] ## ----CEClusterLengend---------------------------------------------------- length(clusterLegend(ce)) clusterLegend(ce)[1:2] ## ----plotClusterEx1_redo------------------------------------------------- par(mar=plotCMar) plotClusters(ce,main="Clusters from clusterMany", whichClusters="workflow", axisLine=-1) ## ----plotClusterEx1_newOrder--------------------------------------------- par(mar=plotCMar) plotClusters(ce,whichClusters="clusterMany", main="Only Clusters from clusterMany",axisLine=-1) ## ----plotClusterEx1_addData---------------------------------------------- par(mar=plotCMar) plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), main="Workflow clusters plus other data",axisLine=-1) ## ----plotClusterEx1_setColors-------------------------------------------- par(mar=plotCMar) ce_temp<-plotClusters(ce,whichClusters="workflow", sampleData=c("Biological_Condition","Cluster2"), main="Clusters from clusterMany, different order",axisLine=-1,resetNames=TRUE,resetColors=TRUE,resetOrderSamples=TRUE) clusterLegend(ce_temp)[c("mergeClusters","combineMany,final")] ## ----plotClusterEx1_forceColors,fig.width=18,fig.height=6---------------- par(mar=plotCMar) par(mfrow=c(1,2)) plotClusters(ce_temp, sampleData=c("Biological_Condition","Cluster2"), whichClusters="workflow", existingColors="all", main="Workflow Clusters, fix the color of clusters",axisLine=-1) plotClusters(ce_temp, sampleData=c("Biological_Condition","Cluster2"), existingColors="all", whichClusters="clusterMany", main="clusterMany Clusters, fix the color of clusters", axisLine=-1) ## ----plotHeatmap_Ex1----------------------------------------------------- par(mfrow=c(1,1)) par(mar=defaultMar) plotHeatmap(ce,main="Heatmap with clusterMany") ## ----plotHeatmap_Ex1.1--------------------------------------------------- whClusterPlot<-1:2 plotHeatmap(ce,whichClusters=whClusterPlot, annLegend=FALSE) ## ----plotHeatmap_primaryCluster------------------------------------------ plotHeatmap(ce,clusterSamplesData="primaryCluster", whichClusters="primaryCluster", main="Heatmap with clusterMany",annLegend=FALSE) ## ----showCE_dendrogram--------------------------------------------------- show(ce) ## ----plotHeatmap_dendro-------------------------------------------------- plotHeatmap(ce,clusterSamplesData="dendrogramValue", whichClusters=c("mergeClusters","combineMany"), main="Heatmap with clusterMany", sampleData=c("Biological_Condition","Cluster2"),annLegend=FALSE) ## ----tableArguments, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- # simple table creation here tabl <- " Argument| Dependencies | Passed to | Argument passed to ---------------|-----------------|:-------------:|------:| ks | sequential=TRUE | seqCluster | k0 - | sequential=FALSE, findBestK=FALSE, clusterFunction of type 'K' | clusterD | k - | sequential=FALSE, findBestK=FALSE, subsample=TRUE | subsampleClustering | k - | sequential=FALSE, findBestK=TRUE, clusterFunction of type 'K' | clusterD | kRange dimReduce | none | transform | dimReduce nVarDims | dimReduce in 'mad','cv','var' | transform | nVarDims nPCADims | dimReduce='PCA' | transform | nPCADims clusterFunction| none | clusterD | clusterFunction minSizes | none | clusterD | minSize distFunction | subsample=FALSE | clusterD | distFunction alphas | clusterFunction of type '01'| clusterD | alpha findBestK | clusterFunction of type 'K' | clusterD | findBestK removeSil | clusterFunction of type 'K' | clusterD | removeSil silCutoff | clusterFunction of type 'K' | clusterD | silCutoff betas | sequential=TRUE | seqCluster | beta " cat(tabl) # output the table in a format good for HTML/PDF/docx conversion ## ----defineDist---------------------------------------------------------- corDist<-function(x){(1-cor(t(x),method="pearson"))/2} spearDist<-function(x){(1-cor(t(x),method="spearman"))/2} ## ----visualizeSubsamplingD----------------------------------------------- ceSub<-clusterSingle(ce,dimReduce="mad",ndims=1000,subsample=TRUE,clusterFunction="hierarchical01",subsampleArgs=list(k=8),clusterLabel="subsamplingCluster",clusterDArgs=list(minSize=5)) plotCoClustering(ceSub,colorScale=rev(seqPal5)) ## ----visualizeSpearmanDist----------------------------------------------- dSp<-spearDist(t(transform(ce,dimReduce="mad",nVarDims=1000))) plotHeatmap(dSp,isSymmetric=TRUE,colorScale=rev(seqPal5)) ## ----clusterManyDiffDist,fig.width=15,fig.height=6----------------------- ceDist<-clusterMany(ce, k=7:9, alpha=c(0.35,0.4,0.45), clusterFunction=c("tight","hierarchical01","pam","hierarchicalK"),findBestK=FALSE, removeSil=c(FALSE),dist=c("corDist","spearDist"), isCount=TRUE,dimReduce=c("mad"),nPCADims=1000,run=TRUE) clusterLabels(ceDist)<-gsub("clusterFunction","alg",clusterLabels(ceDist)) clusterLabels(ceDist)<-gsub("Dist","",clusterLabels(ceDist)) clusterLabels(ceDist)<-gsub("distFunction","dist",clusterLabels(ceDist)) clusterLabels(ceDist)<-gsub("hierarchical","hier",clusterLabels(ceDist)) par(mar=c(1.1,15.1,1.1,1.1)) plotClusters(ceDist,axisLine=-2,sampleData=c("Biological_Condition")) ## ----clusterManyCheckParam----------------------------------------------- checkParam<-clusterMany(se, clusterFunction="pam", ks=2:10, removeSil=c(TRUE,FALSE), isCount=TRUE, dimReduce=c("PCA","var"), nVarDims=c(100,500,1000),nPCADims=c(5,15,50),run=FALSE) dim(checkParam$paramMatrix) #number of rows is the number of clusterings ## ----clusterManyCheckParam2,eval=FALSE----------------------------------- # # ce<-clusterMany(se, paramMatrix=checkParam$paramMatrix, clusterDArgs=checkParam$clusterDArgs, seqArgs=checkParam$seqArgs,subsampleArgs=checkParam$subsampleArgs) # ce<-clusterMany(ce, clusterFunction="pam",ks=2:10,findBestK=TRUE,removeSil=c(TRUE), isCount=TRUE,dimReduce=c("PCA","var"),nVarDims=c(100,500,1000),nPCADims=c(5,15,50),run=TRUE) ## ----combineMany_detailed------------------------------------------------ plotCoClustering(ce) ## ----combineMany_chooseClusters------------------------------------------ wh<-grep("nVAR",clusterLabels(ce)) ce<-combineMany(ce,whichCluster=wh,proportion=0.7,minSize=3, clusterLabel="combineMany,nVAR") plotCoClustering(ce) ## ----combineMany_showDifferent------------------------------------------- wh<-grep("combineMany",clusterTypes(ce)) par(mar=plotCMar) plotClusters(ce,whichClusters=rev(wh),axisLine=-1) ## ----makeDendrogram_reducedFeatures-------------------------------------- ce<-makeDendrogram(ce,dimReduce="var",ndims=500) plotDendrogram(ce) ## ----showCe-------------------------------------------------------------- show(ce) ## ----remakeMakeDendrogram------------------------------------------------ ce<-makeDendrogram(ce,dimReduce="var",ndims=500, whichCluster="combineMany,final") plotDendrogram(ce) ## ----clusterTypeOfCombineMany-------------------------------------------- clusterTypes(ce)[which(clusterLabels(ce)=="combineMany,final")] ## ----getBackCombineMany-------------------------------------------------- ce<-setToCurrent(ce,whichCluster="combineMany,final") show(ce) ## ----checkWhatDendro----------------------------------------------------- ce ## ----mergeClusters_plot,fig.width=12------------------------------------- mergeClusters(ce,mergeMethod="none",plotType="all") ## ----mergeClusters_ex---------------------------------------------------- ce<-mergeClusters(ce,cutoff=0.05,mergeMethod="adjP",clusterLabel="mergeClusters,v2") ce ## ----mergeClusters_redo-------------------------------------------------- ce<-mergeClusters(ce,cutoff=0.15,mergeMethod="MB", clusterLabel="mergeClusters,v3") ce ## ----workflowTable------------------------------------------------------- workflowClusterTable(ce) ## ----workflowDetails----------------------------------------------------- head(workflowClusterDetails(ce),8) ## ----markFinal----------------------------------------------------------- ce<-setToFinal(ce,whichCluster="mergeClusters,v2", clusterLabel="Final Clustering") par(mar=plotCMar) plotClusters(ce,whichClusters="workflow") ## ----rsecTable, echo=FALSE, message=FALSE, warnings=FALSE, results='asis'---- # simple table creation here tabl <- " | | Arguments Internally Fixed | Arguments for the user | Correspondence | Notes | |:-----------------|:-----------------|:-------------|:------|:------| *clusterMany* | | | | -| sequential=TRUE | k0s | ks | only sets 'k0', no other k - | distFunction=NA | clusterFunction | | only tight or hierarchical01 - | removeSil=FALSE | dimReduce | | - | subsample=TRUE | nVarDims | | - | silCutoff=0 | nPCADims | | - | | alphas | | - | | betas | | - | | minSizes | | - | | clusterDArgs | | - | | subsampleArgs | | - | | seqArgs | | - | | run | | - | | ncores | | - | | random.seed | | - | | isCount | | - | | transFun | | *combineMany* | | | | - | propUnassigned = *(default)* | combineProportion | proportion - | combineMinSize | minSize | | *makeDendrogram* | | | | - | ignoreUnassignedVar=TRUE | dendroReduce | dimReduce | - | unassignedSamples= *(default)* | dendroNDims | ndims | *mergeClusters* | | | | - | plotType='none' | mergeMethod | | - | | mergeCutoff | cutoff | - | | isCount | | used for both mergeMethod and clusterMany " cat(tabl) # output the table in a format good for HTML/PDF/docx conversion ## ----resetToManyClusters------------------------------------------------- wh<-which(clusterLabels(ce)=="mergeClusters,v3") if(length(wh)==1) primaryClusterIndex(ce)<-wh else stop() ## ----getBestFeatures_onlyTopPairs---------------------------------------- pairsAllTop<-getBestFeatures(ce,contrastType="Pairs",p.value=0.05) dim(pairsAllTop) head(pairsAllTop) ## ----getBestFeatures_oneAgainstAll--------------------------------------- best1vsAll<-getBestFeatures(ce,contrastType="OneAgainstAll",p.value=0.05) head(best1vsAll) ## ----getBestFeatures_dendroFail,error=TRUE------------------------------- bestDendroTest1<-getBestFeatures(ce,contrastType="Dendro",p.value=0.05) ## ----showCeAgain--------------------------------------------------------- show(ce) ## ----getBestFeatures_dendro---------------------------------------------- ce<-makeDendrogram(ce,dimReduce="var",ndims=500) bestDendro<-getBestFeatures(ce,contrastType="Dendro",p.value=0.05) head(bestDendro) ## ----dendroContrastLevels------------------------------------------------ levels((bestDendro)$Contrast) ## ----dendroWithNodeNames------------------------------------------------- plotDendrogram(ce,show.node.label=TRUE) ## ----sessionInfo--------------------------------------------------------- sessionInfo()