Contents

library(COTAN)
library(zeallot)
library(data.table)
library(Rtsne)
library(qpdf)
library(GEOquery)
library(rlang)

options(parallelly.fork.enable = TRUE)

0.1 Introduction

This tutorial contains the same functionalities as the first release of the COTAN tutorial but done using the new and updated functions.

0.2 Get the data-set

Download the data-set for "mouse cortex E17.5".

dataDir <- tempdir()

GEO <- "GSM2861514"
fName <- "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz"
dataSetFile <- file.path(dataDir, GEO, fName)

if (!file.exists(dataSetFile)) {
  getGEOSuppFiles(GEO, makeDirectory = TRUE,
                  baseDir = dataDir, fetch_files = TRUE,
                  filter_regex = fName)
}
#>                                                                              size
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1509523
#>                                                                           isdir
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz FALSE
#>                                                                           mode
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz  644
#>                                                                                         mtime
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-02 16:58:27
#>                                                                                         ctime
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-02 16:58:27
#>                                                                                         atime
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-02 16:58:25
#>                                                                            uid
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1001
#>                                                                            gid
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1001
#>                                                                               uname
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz biocbuild
#>                                                                              grname
#> /tmp/RtmpFxTx4d/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz biocbuild
sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L)

Define a directory where the output will be stored.

outDir <- dataDir

# Log-level 2 was chosen to showcase better how the package works
# In normal usage a level of 0 or 1 is more appropriate
setLoggingLevel(2L)
#> Setting new log level to 2

# This file will contain all the logs produced by the package
# as if at the highest logging level
setLoggingFile(file.path(outDir, "vignette_v2.log"))
#> Setting log file to be: /tmp/RtmpFxTx4d/vignette_v2.log

message("COTAN uses the `torch` library when asked to `optimizeForSpeed`")
#> COTAN uses the `torch` library when asked to `optimizeForSpeed`
message("Run the command 'options(COTAN.UseTorch = FALSE)'",
        " in your session to disable `torch` completely!")
#> Run the command 'options(COTAN.UseTorch = FALSE)' in your session to disable `torch` completely!

# this command does check whether the torch library is properly installed
c(useTorch, deviceStr) %<-% COTAN:::canUseTorch(TRUE, "cuda")
#> While trying to load the `torch` library Error in doTryCatch(return(expr), name, parentenv, handler): The `torch` library is installed but the required additional libraries are not avalable yet
#> Warning in value[[3L]](cond): The `torch` library is installed, but might
#> require further initialization
#> Warning in value[[3L]](cond): Please look at the `torch` package installation
#> guide to complete the installation
#> Warning in COTAN:::canUseTorch(TRUE, "cuda"): Falling back to legacy
#> [non-torch] code.
if (useTorch) {
  message("The `torch` library is availble and ready to use")
  if (deviceStr == "cuda") {
    message("The `torch` library can use the `CUDA` GPU")
  } else {
    message("The `torch` library can only use the CPU")
    message("Please ensure you have the `OpenBLAS` libraries",
            " installed on the system")
  }
}

rm(useTorch, deviceStr)

1 Analytical pipeline

Initialize the COTAN object with the row count table and the metadata for the experiment.

cond <- "mouse_cortex_E17.5"

obj <- COTAN(raw = sample.dataset)
obj <- initializeMetaDataset(obj,
                             GEO = GEO,
                             sequencingMethod = "Drop_seq",
                             sampleCondition = cond)
#> Initializing `COTAN` meta-data

logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])),
        logLevel = 1L)
#> Condition mouse_cortex_E17.5

Before we proceed to the analysis, we need to clean the data. The analysis will use a matrix of raw UMI counts as the input. To obtain this matrix, we have to remove any potential cell doublets or multiplets, as well as any low quality or dying cells.

1.1 Data cleaning

We can check the library size (UMI number) with an empirical cumulative distribution function

ECDPlot(obj, yCut = 700L)

cellSizePlot(obj)
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).

genesSizePlot(obj)
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).

During the cleaning, every time we want to remove cells or genes we can use the dropGenesCells()function.

Drop cells with too many reads as they are probably doublets or multiplets

cellsSizeThr <- 6000L
obj <- addElementToMetaDataset(obj, "Cells size threshold", cellsSizeThr)

cells_to_rem <- getCells(obj)[getCellsSize(obj) > cellsSizeThr]
obj <- dropGenesCells(obj, cells = cells_to_rem)

cellSizePlot(obj)
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).

To drop cells by gene number: high genes count might also indicate doublets…

genesSizeHighThr <- 3000L
obj <- addElementToMetaDataset(obj, "Num genes high threshold", genesSizeHighThr)

cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) > genesSizeHighThr]
obj <- dropGenesCells(obj, cells = cells_to_rem)

genesSizePlot(obj)

Drop cells with too low genes expression as they are probably dead

genesSizeLowThr <- 500L
obj <- addElementToMetaDataset(obj, "Num genes low threshold", genesSizeLowThr)

cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) < genesSizeLowThr]
obj <- dropGenesCells(obj, cells = cells_to_rem)

genesSizePlot(obj)

Cells with a too high percentage of mitochondrial genes are likely dead (or at the last problematic) cells. So we drop them!

c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt")

mitPlot

mitPercThr <- 1.0
obj <- addElementToMetaDataset(obj, "Mitoc. perc. threshold", mitPercThr)

cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]
obj <- dropGenesCells(obj, cells = cells_to_rem)

c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt")

plot(mitPlot)

If we do not want to consider the mitochondrial genes we can remove them before starting the analysis.

genes_to_rem <- getGenes(obj)[grep("^Mt", getGenes(obj))]
cells_to_rem <- getCells(obj)[which(getCellsSize(obj) == 0L)]

obj <- dropGenesCells(obj, genes_to_rem, cells_to_rem)

We want also to log the current status.

logThis(paste("n cells", getNumCells(obj)), logLevel = 1L)
#> n cells 859

The clean() function estimates all the parameters for the data. Therefore, we have to run it again every time we remove any genes or cells from the data.

obj <- addElementToMetaDataset(obj, "Num drop B group", 0)

obj <- clean(obj)
#> Genes/cells selection done: dropped [4787] genes and [0] cells
#> Working on [12235] genes and [859] cells

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj)
#> PCA: START
#> PCA: DONE
#> Hierarchical clustering: START
#> Hierarchical clustering: DONE

pcaCellsPlot

genesPlot

We can observe here that the red cells are really enriched in hemoglobin genes so we prefer to remove them. They can be extracted from the pcaCellsData object and removed.

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
obj <- dropGenesCells(obj, cells = cells_to_rem)

obj <- addElementToMetaDataset(obj, "Num drop B group", 1)

obj <- clean(obj)
#> Genes/cells selection done: dropped [5] genes and [0] cells
#> Working on [12230] genes and [855] cells

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-%
  cleanPlots(obj)
#> PCA: START
#> PCA: DONE
#> Hierarchical clustering: START
#> Hierarchical clustering: DONE

pcaCellsPlot

To color the PCA based on nu (so the cells’ efficiency)

UDEPlot

UDE (color) should not correlate with principal components! This is very important.

The next part is used to remove the cells with efficiency too low.

plot(nuPlot)

We can zoom on the smallest values and, if COTAN detects a clear elbow, we can decide to remove the cells.

plot(zoomedNuPlot)

We also save the defined threshold in the metadata and re-run the estimation

UDELowThr <- 0.28
obj <- addElementToMetaDataset(obj, "Low UDE cells' threshold", UDELowThr)

obj <- addElementToMetaDataset(obj, "Num drop B group", 2)

cells_to_rem <- getCells(obj)[getNu(obj) < UDELowThr]
obj <- dropGenesCells(obj, cells = cells_to_rem)
#> Asked to drop no genes or cells

Repeat the estimation after the cells are removed

obj <- clean(obj)
#> Genes/cells selection done: dropped [0] genes and [0] cells
#> Working on [12230] genes and [855] cells

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-%
  cleanPlots(obj)
#> PCA: START
#> PCA: DONE
#> Hierarchical clustering: START
#> Hierarchical clustering: DONE

plot(pcaCellsPlot)

scatterPlot(obj)

logThis(paste("n cells", getNumCells(obj)), logLevel = 1L)
#> n cells 855

1.2 COTAN analysis

In this part, all the contingency tables are computed and used to get the statistics necessary to COEX evaluation and storing

obj <- proceedToCoex(obj, calcCoex = TRUE,
                     optimizeForSpeed = TRUE, cores = 6L,
                     saveObj = TRUE, outDir = outDir)
#> Cotan analysis functions started
#> Genes/cells selection done: dropped [0] genes and [0] cells
#> Working on [12230] genes and [855] cells
#> PCA: START
#> PCA: DONE
#> Hierarchical clustering: START
#> Hierarchical clustering: DONE
#> Estimate dispersion: START
#> Estimate dispersion: DONE
#> dispersion | min: -0.056640625 | max: 373.75 | % negative: 19.5175797219951
#> Cotan genes' coex estimation started
#> While trying to load the `torch` library Error in doTryCatch(return(expr), name, parentenv, handler): The `torch` library is installed but the required additional libraries are not avalable yet
#> Warning in canUseTorch(optimizeForSpeed, deviceStr): Falling back to legacy
#> [non-torch] code.
#> Calculate genes' coex (legacy): START
#> Warning in asMethod(object): sparse->dense coercion: allocating vector of size
#> 1.1 GiB
#> Total calculations elapsed time: 175.195901632309
#> Calculate genes' coex (legacy): DONE
#> Saving elaborated data locally at: /tmp/RtmpFxTx4d/mouse_cortex_E17.5.cotan.RDS

When saveObj == TRUE, in the previous step, this step can be skipped as the COTAN object has already been saved in the outDir.

# saving the structure
saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS")))

1.3 Automatic run

It is also possible to run directly a single function if we don’t want to clean anything.

obj2 <- automaticCOTANObjectCreation(
  raw = sample.dataset,
  GEO = GEO,
  sequencingMethod = "Drop_seq",
  sampleCondition = cond,
  calcCoex = TRUE, cores = 6L, optimizeForSpeed = TRUE,
  saveObj = TRUE, outDir = outDir)

2 Analysis of the elaborated data

2.1 GDI

To calculate the GDI we can run:

quantP <- calculateGDI(obj)
#> Calculate GDI dataframe: START
#> Calculate GDI dataframe: DONE
head(quantP)
#>               sum.raw.norm      GDI exp.cells
#> 0610007N19Rik     3.330191 1.626328  2.222222
#> 0610007P14Rik     5.276520 1.348450 18.713450
#> 0610009B22Rik     4.730837 1.282753 11.578947
#> 0610009D07Rik     6.310666 1.372077 43.274854
#> 0610009E02Rik     2.587763 1.015792  1.169591
#> 0610009L18Rik     3.507004 1.257020  3.040936

The next function can either plot the GDI for the dataset directly or use the pre-computed dataframe. It marks the given threshold 1.43 (in red) and the 50% and 75% quantiles (in blue). We can also specify some gene sets (three in this case) that we want to label explicitly in the GDI plot.

genesList <- list(
  "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
  "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
  "hk"   = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
             "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
             "Tars", "Amacr")
)

# needs to be recalculated after the changes in nu/dispersion above
#obj <- calculateCoex(obj, actOnCells = FALSE)

GDIPlot(obj, GDIIn = quantP, cond = cond,
        genes = genesList, GDIThreshold = 1.43)
#> GDI plot
#> Removed 0 low GDI genes (such as the fully-expressed) in GDI plot

The percentage of cells expressing the gene in the third column of this data-frame is reported.

2.2 Heatmaps

To perform the Gene Pair Analysis, we can plot a heatmap of the COEX values between two gene sets. We have to define the different gene sets (list.genes) in a list. Then we can choose which sets to use in the function parameter sets (for example, from 1 to 3). We also have to provide an array of the file name prefixes for each condition (for example, “mouse_cortex_E17.5”). In fact, this function can plot genes relationships across many different conditions to get a complete overview.

heatmapPlot(obj, genesLists = genesList)
#> heatmap plot: START
#> Hangling COTAN object with condition: mouse_cortex_E17.5
#> calculating PValues: START
#> Get p-values on a set of genes on columns and on a set of genes on rows
#> calculating PValues: DONE
#> min coex: -0.471922870283673 max coex: 0.456570020977233

We can also plot a general heatmap of COEX values based on some markers like the following one.

genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Vim", "Hes1"),
                 pValueThreshold = 0.001, symmetric = TRUE)
#> calculating PValues: START
#> Get p-values genome wide on columns and genome wide on rows
#> calculating PValues: DONE

genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"),
                 secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"),
                 pValueThreshold = 0.001, symmetric = FALSE)

2.3 Get data tables

Sometimes we can also be interested in the numbers present directly in the contingency tables for two specific genes. To get them we can use two functions:

contingencyTables() to produce the observed and expected data

print("Contingency Tables:")
#> [1] "Contingency Tables:"
contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b")
#> $observed
#>            Satb2.yes Satb2.no
#> Bcl11b.yes        47      149
#> Bcl11b.no        289      370
#> 
#> $expected
#>            Satb2.yes Satb2.no
#> Bcl11b.yes  83.07467 112.9258
#> Bcl11b.no  252.92576 406.0737

print("Corresponding Coex")
#> [1] "Corresponding Coex"
getGenesCoex(obj)["Satb2", "Bcl11b"]
#> [1] -0.2038791

Another useful function is getGenesCoex(). This can be used to extract the whole or a partial COEX matrix from a COTAN object.

# For the whole matrix
coex <- getGenesCoex(obj, zeroDiagonal = FALSE)
coex[1000L:1005L, 1000L:1005L]
#> 6 x 6 Matrix of class "dspMatrix"
#>             Ap3s1       Ap3s2        Ap4b1       Ap4e1       Ap4m1        Ap4s1
#> Ap3s1  0.91801721 -0.02963354  0.028194218 -0.02849519  0.01155578 -0.032150754
#> Ap3s2 -0.02963354  0.91389403 -0.045625989 -0.03041560 -0.05190230  0.095438608
#> Ap4b1  0.02819422 -0.04562599  0.907264954 -0.03640411  0.02804359  0.009157585
#> Ap4e1 -0.02849519 -0.03041560 -0.036404109  0.82725213 -0.04361712 -0.025528838
#> Ap4m1  0.01155578 -0.05190230  0.028043593 -0.04361712  0.91384860  0.045156667
#> Ap4s1 -0.03215075  0.09543861  0.009157585 -0.02552884  0.04515667  0.907424044
# For a partial matrix
coex <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2"))
coex[1000L:1005L, ]
#> 6 x 3 Matrix of class "dgeMatrix"
#>            Bcl11b        Fezf2       Satb2
#> Ap3s1 -0.04561160 -0.003187684  0.01251261
#> Ap3s2  0.01328760  0.027019200  0.01659743
#> Ap4b1  0.04083893  0.010874563 -0.03229092
#> Ap4e1 -0.05263126 -0.014921995 -0.03497405
#> Ap4m1  0.05454023  0.018899232 -0.03385801
#> Ap4s1 -0.06814884  0.006308966 -0.01672474

2.4 Establishing genes’ clusters

COTAN provides a way to establish genes’ clusters given some lists of markers

layersGenes <- list(
  "L1"   = c("Reln",   "Lhx5"),
  "L2/3" = c("Satb2",  "Cux1"),
  "L4"   = c("Rorb",   "Sox5"),
  "L5/6" = c("Bcl11b", "Fezf2"),
  "Prog" = c("Vim",    "Hes1")
)
c(gSpace, eigPlot, pcaGenesClDF, treePlot) %<-%
  establishGenesClusters(obj, groupMarkers = layersGenes,
                         numGenesPerMarker = 25L, kCuts = 5L)
#> Establishing gene clusters - START
#> Calculating gene co-expression space - START
#> calculating PValues: START
#> Get p-values on a set of genes on columns and genome wide on rows
#> calculating PValues: DONE
#> Number of selected secondary markers: 184
#> Calculating gene co-expression space - DONE
#> Establishing gene clusters - DONE

plot(eigPlot)

plot(treePlot)

UMAPPlot(pcaGenesClDF[, 1L:10L],
         clusters = rlang::set_names(pcaGenesClDF[["hclust"]],
                                     rownames(pcaGenesClDF)),
         elements = layersGenes,
         title = "Genes' clusters UMAP Plot")
#> UMAP plot
#> Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
#> Also defined by 'spam'

2.5 Uniform Clustering

It is possible to obtain a cell clusterization based on the concept of uniformity of expression of the genes across the cells. That is the cluster satisfies the null hypothesis of the COTAN model: the genes expression is not dependent on the cell in consideration.

There are two functions involved into obtaining a proper clusterization: the first is cellsUniformClustering that uses standard tools clusterization methods, but then discards and re-clusters any non-uniform cluster. Please not that the most important parameter for the users is the GDIThreshold: it defines how strict is the uniformity check, good values are between 1.42 (more strict) and 1.43 (less strict).

c(splitClusters, splitCoexDF) %<-%
  cellsUniformClustering(obj, GDIThreshold = 1.43, initialResolution = 0.8,
                         optimizeForSpeed = TRUE, cores = 6L,
                         saveObj = TRUE, outDir = outDir)
obj <- addClusterization(obj, clName = "split",
                         clusters = splitClusters, coexDF = splitCoexDF)

In the case one already has an existing clusterization, it is possible to calculate the DEA data.frame and add it to the COTAN object.

data("vignette.split.clusters", package = "COTAN")
splitClusters <- vignette.split.clusters

splitCoexDF <- DEAOnClusters(obj, clusters = splitClusters)
#> Differential Expression Analysis - START
#> *********
#> Differential Expression Analysis - DONE

obj <- addClusterization(obj, clName = "split", clusters = splitClusters,
                         coexDF = splitCoexDF, override = FALSE)

It is possible to have some statistics about the clusterization

c(summaryData, summaryPlot) %<-%
  clustersSummaryPlot(obj, clName = "split", plotTitle = "split summary")

summaryData
#>   split NoCond CellNumber CellPercentage MeanUDE MedianUDE ExpGenes25 ExpGenes
#> 1    -1 NoCond          8            0.9    1.25      1.12       1722     5951
#> 2     1 NoCond         29            3.4    1.44      1.40       2216     8977
#> 3     2 NoCond        231           27.0    0.78      0.75       1175    11218
#> 4     3 NoCond        154           18.0    1.43      1.37       2186    11474
#> 5     4 NoCond         79            9.2    1.43      1.34       2261    10973
#> 6     5 NoCond         52            6.1    0.44      0.41        421     8713
#> 7     6 NoCond         70            8.2    1.04      0.91       1537    10682
#> 8     7 NoCond        146           17.1    0.91      0.77       1353    11057
#> 9     8 NoCond         86           10.1    0.73      0.69       1084    10057

The ExpGenes column contains the number of genes that are expressed in any cell of the relevant cluster, while the ExpGenes25 column contains the number of genes expressed in at the least 25% of the cells of the relevant cluster

plot(summaryPlot)

It is possible to visualize how relevant are the marker genes’ lists with respect to the given clusterization

c(., scoreDF) %<-% clustersMarkersHeatmapPlot(obj, groupMarkers = layersGenes,
                                              clName = "split", kCuts = 5L)
#> clustersDeltaExpression - START
#> clustersDeltaExpression - DONE

However since the algorithm that creates the clusters is not geared directly to achieve cluster uniformity, there might be clusters that can be merged back together and still be uniform. This is the purpose of the function mergeUniformCellsClusters that takes a clusterization and tries to merge cluster pairs after checking that together the pair forms a uniform cluster. In order to avoid the massive possible checks the function relies on a related distance the find the cluster pairs that will be most likely merged. Again the most important parameter for the users is the GDIThreshold, it is supposed to be the same as the one used to generate the clusterization in the first place, but if needed it can be slightly relaxed in order to reduce the number of clusters.

c(mergeClusters, mergeCoexDF) %<-%
  mergeUniformCellsClusters(obj, clusters = splitClusters, GDIThreshold = 1.43,
                            optimizeForSpeed = TRUE, cores = 6L,
                            saveObj = TRUE, outDir = outDir)

# merges are:
#  1 <- '-1' + 1 + 4
#  2 <- 2 + 3 + 8
#  3 <- 5
#  4 <- 6
#  5 <- 7

obj <- addClusterization(obj, clName = "merge",
                         clusters = mergeClusters, coexDF = mergeCoexDF)

In the case one already has an existing clusterization, it is possible to calculate the DEA data.frame and add it to the COTAN object.

data("vignette.merge.clusters", package = "COTAN")
mergeClusters <- vignette.merge.clusters

mergeCoexDF <- DEAOnClusters(obj, clusters = mergeClusters)
#> Differential Expression Analysis - START
#> *****
#> Differential Expression Analysis - DONE

obj <- addClusterization(obj, clName = "merge",
                         clusters = mergeClusters, coexDF = mergeCoexDF)

In the following graph, it is possible to check that the found clusters align very well to the expression of the layers’ genes defined above…

c(.,scoreDF) %<-% clustersMarkersHeatmapPlot(obj, groupMarkers = layersGenes,
                                             clName = "merge", kCuts = 5L)
#> clustersDeltaExpression - START
#> clustersDeltaExpression - DONE

2.6 Vignette clean-up stage

The next few lines are just to clean.

if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) {
  #Delete file if it exists
  file.remove(file.path(outDir, paste0(cond, ".cotan.RDS")))
}
#> [1] TRUE
if (file.exists(file.path(outDir, paste0(cond, "_times.csv")))) {
  #Delete file if it exists
  file.remove(file.path(outDir, paste0(cond, "_times.csv")))
}
#> [1] TRUE
if (dir.exists(file.path(outDir, cond))) {
  unlink(file.path(outDir, cond), recursive = TRUE)
}
if (dir.exists(file.path(outDir, GEO))) {
  unlink(file.path(outDir, GEO), recursive = TRUE)
}

# stop logging to file
setLoggingFile("")
#> Closing previous log file - Setting log file to be:
file.remove(file.path(outDir, "vignette_v2.log"))
#> [1] TRUE

options(parallelly.fork.enable = FALSE)
Sys.time()
#> [1] "2024-10-02 17:04:53 EDT"
sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] rlang_1.1.4         GEOquery_2.72.0     Biobase_2.64.0     
#>  [4] BiocGenerics_0.50.0 qpdf_1.3.3          Rtsne_0.17         
#>  [7] data.table_1.16.0   zeallot_0.1.0       COTAN_2.4.6        
#> [10] BiocStyle_2.32.1   
#> 
#> loaded via a namespace (and not attached):
#>   [1] RcppAnnoy_0.0.22          splines_4.4.1            
#>   [3] later_1.3.2               tibble_3.2.1             
#>   [5] polyclip_1.10-7           fastDummies_1.7.4        
#>   [7] lifecycle_1.0.4           doParallel_1.0.17        
#>   [9] processx_3.8.4            globals_0.16.3           
#>  [11] lattice_0.22-6            MASS_7.3-61              
#>  [13] dendextend_1.17.1         magrittr_2.0.3           
#>  [15] limma_3.60.6              plotly_4.10.4            
#>  [17] sass_0.4.9                rmarkdown_2.28           
#>  [19] jquerylib_0.1.4           yaml_2.3.10              
#>  [21] httpuv_1.6.15             Seurat_5.1.0             
#>  [23] sctransform_0.4.1         askpass_1.2.0            
#>  [25] spam_2.10-0               sp_2.1-4                 
#>  [27] spatstat.sparse_3.1-0     reticulate_1.39.0        
#>  [29] cowplot_1.1.3             pbapply_1.7-2            
#>  [31] RColorBrewer_1.1-3        abind_1.4-8              
#>  [33] zlibbioc_1.50.0           purrr_1.0.2              
#>  [35] coro_1.0.4                torch_0.13.0             
#>  [37] circlize_0.4.16           IRanges_2.38.1           
#>  [39] S4Vectors_0.42.1          ggrepel_0.9.6            
#>  [41] irlba_2.3.5.1             listenv_0.9.1            
#>  [43] spatstat.utils_3.1-0      umap_0.2.10.0            
#>  [45] goftest_1.2-3             RSpectra_0.16-2          
#>  [47] spatstat.random_3.3-2     dqrng_0.4.1              
#>  [49] fitdistrplus_1.2-1        parallelly_1.38.0        
#>  [51] DelayedMatrixStats_1.26.0 leiden_0.4.3.1           
#>  [53] codetools_0.2-20          DelayedArray_0.30.1      
#>  [55] xml2_1.3.6                tidyselect_1.2.1         
#>  [57] shape_1.4.6.1             farver_2.1.2             
#>  [59] ScaledMatrix_1.12.0       viridis_0.6.5            
#>  [61] matrixStats_1.4.1         stats4_4.4.1             
#>  [63] spatstat.explore_3.3-2    jsonlite_1.8.9           
#>  [65] GetoptLong_1.0.5          progressr_0.14.0         
#>  [67] ggridges_0.5.6            survival_3.7-0           
#>  [69] iterators_1.0.14          foreach_1.5.2            
#>  [71] tools_4.4.1               ica_1.0-3                
#>  [73] Rcpp_1.0.13               glue_1.8.0               
#>  [75] gridExtra_2.3             SparseArray_1.4.8        
#>  [77] xfun_0.47                 MatrixGenerics_1.16.0    
#>  [79] ggthemes_5.1.0            dplyr_1.1.4              
#>  [81] withr_3.0.1               BiocManager_1.30.25      
#>  [83] fastmap_1.2.0             fansi_1.0.6              
#>  [85] openssl_2.2.2             callr_3.7.6              
#>  [87] digest_0.6.37             rsvd_1.0.5               
#>  [89] parallelDist_0.2.6        R6_2.5.1                 
#>  [91] mime_0.12                 colorspace_2.1-1         
#>  [93] Cairo_1.6-2               scattermore_1.2          
#>  [95] tensor_1.5                spatstat.data_3.1-2      
#>  [97] utf8_1.2.4                tidyr_1.3.1              
#>  [99] generics_0.1.3            httr_1.4.7               
#> [101] htmlwidgets_1.6.4         S4Arrays_1.4.1           
#> [103] uwot_0.2.2                pkgconfig_2.0.3          
#> [105] gtable_0.3.5              ComplexHeatmap_2.20.0    
#> [107] lmtest_0.9-40             XVector_0.44.0           
#> [109] htmltools_0.5.8.1         dotCall64_1.1-1          
#> [111] bookdown_0.40             clue_0.3-65              
#> [113] SeuratObject_5.0.2        scales_1.3.0             
#> [115] png_0.1-8                 spatstat.univar_3.0-1    
#> [117] knitr_1.48                tzdb_0.4.0               
#> [119] reshape2_1.4.4            rjson_0.2.23             
#> [121] curl_5.2.3                nlme_3.1-166             
#> [123] cachem_1.1.0              zoo_1.8-12               
#> [125] GlobalOptions_0.1.2       stringr_1.5.1            
#> [127] KernSmooth_2.23-24        parallel_4.4.1           
#> [129] miniUI_0.1.1.1            RcppZiggurat_0.1.6       
#> [131] pillar_1.9.0              grid_4.4.1               
#> [133] vctrs_0.6.5               RANN_2.6.2               
#> [135] promises_1.3.0            BiocSingular_1.20.0      
#> [137] beachmat_2.20.0           xtable_1.8-4             
#> [139] cluster_2.1.6             evaluate_1.0.0           
#> [141] magick_2.8.5              tinytex_0.53             
#> [143] readr_2.1.5               cli_3.6.3                
#> [145] compiler_4.4.1            crayon_1.5.3             
#> [147] future.apply_1.11.2       labeling_0.4.3           
#> [149] ps_1.8.0                  plyr_1.8.9               
#> [151] stringi_1.8.4             viridisLite_0.4.2        
#> [153] deldir_2.0-4              BiocParallel_1.38.0      
#> [155] assertthat_0.2.1          munsell_0.5.1            
#> [157] lazyeval_0.2.2            spatstat.geom_3.3-3      
#> [159] PCAtools_2.16.0           Matrix_1.7-0             
#> [161] RcppHNSW_0.6.0            hms_1.1.3                
#> [163] patchwork_1.3.0           bit64_4.5.2              
#> [165] sparseMatrixStats_1.16.0  future_1.34.0            
#> [167] ggplot2_3.5.1             statmod_1.5.0            
#> [169] shiny_1.9.1               highr_0.11               
#> [171] ROCR_1.0-11               Rfast_2.1.0              
#> [173] igraph_2.0.3              RcppParallel_5.1.9       
#> [175] bslib_0.8.0               bit_4.5.0