library(gDRutils)
suppressPackageStartupMessages(library(MultiAssayExperiment))
gDRutils
is the part of gDR
suite. This package provides bunch of tools for, among others:
* data manipulation, especially output of the gDRcore
package (MultiAssayExperiments
and SummarizedExperiment
),
* data extraction,
* managing identifiers used for creating gDR
experiments,
* data validation.
The basic output of gDRcore
package is the MultiAssayExperiment
object. Function MAEpply
allows for the data manipulation of this object, and can be used in a similar way as a basic function lapply
.
mae <- get_synthetic_data("finalMAE_combo_matrix_small")
MAEpply(mae, dim)
#> $matrix
#> [1] 6 2
#>
#> $`single-agent`
#> [1] 5 2
MAEpply(mae, rowData)
#> $matrix
#> DataFrame with 6 rows and 7 columns
#> Gnumber DrugName
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 G00004 drug_004
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 G00004 drug_004
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 G00005 drug_005
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 G00005 drug_005
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 G00006 drug_006
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 G00006 drug_006
#> drug_moa Gnumber_2
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> DrugName_2 drug_moa_2
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> Duration
#> <numeric>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 72
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 72
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 72
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 72
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 72
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 72
#>
#> $`single-agent`
#> DataFrame with 5 rows and 4 columns
#> Gnumber DrugName drug_moa Duration
#> <character> <character> <character> <numeric>
#> G00004_drug_004_moa_A_72 G00004 drug_004 moa_A 72
#> G00005_drug_005_moa_A_72 G00005 drug_005 moa_A 72
#> G00006_drug_006_moa_A_72 G00006 drug_006 moa_A 72
#> G00021_drug_021_moa_D_72 G00021 drug_021 moa_D 72
#> G00026_drug_026_moa_E_72 G00026 drug_026 moa_E 72
This function allows also for extraction of unified data across all the SummarizedExperiment
s inside MultiAssayExperiment
, e.g.
MAEpply(mae, rowData, unify = TRUE)
#> Gnumber DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration
#> 1: G00004 drug_004 moa_A G00021 drug_021 moa_D 72
#> 2: G00004 drug_004 moa_A G00026 drug_026 moa_E 72
#> 3: G00005 drug_005 moa_A G00021 drug_021 moa_D 72
#> 4: G00005 drug_005 moa_A G00026 drug_026 moa_E 72
#> 5: G00006 drug_006 moa_A G00021 drug_021 moa_D 72
#> 6: G00006 drug_006 moa_A G00026 drug_026 moa_E 72
#> 7: G00004 drug_004 moa_A <NA> <NA> <NA> 72
#> 8: G00005 drug_005 moa_A <NA> <NA> <NA> 72
#> 9: G00006 drug_006 moa_A <NA> <NA> <NA> 72
#> 10: G00021 drug_021 moa_D <NA> <NA> <NA> 72
#> 11: G00026 drug_026 moa_E <NA> <NA> <NA> 72
All the metrics data are stored inside assays
of SummarizedExperiment
. For the downstream analyses we provide tools allowing for the extraction of the data into user-friendly data.table
style.
There are two functions working on MultiAssayExperiment
object (convert_mae_assay_to_dt
) and for SummarizedExperiment
(convert_se_assay_to_dt
).
mdt <- convert_mae_assay_to_dt(mae, "Metrics")
#> Loading required package: BumpyMatrix
head(mdt, 3)
#> rId
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> cId x_mean x_AOC x_AOC_range xc50 x_max ec50
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046 1.7046 -Inf -0.7046 0
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039 1.7039 -Inf -0.7039 0
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920 1.6920 -Inf -0.6920 0
#> x_inf x_0 h r2 x_sd_avg fit_type maxlog10Concentration
#> 1: -0.7046 -0.7046 1e-04 0 0 DRCConstantFitResult 0.4996871
#> 2: -0.7039 -0.7039 1e-04 0 0 DRCConstantFitResult 0.4996871
#> 3: -0.6920 -0.6920 1e-04 0 0 DRCConstantFitResult 0.4996871
#> N_conc normalization_type fit_source cotrt_value ratio source Gnumber
#> 1: 8 GR gDR 3.160 NA row_fittings G00004
#> 2: 8 GR gDR 1.000 NA row_fittings G00004
#> 3: 8 GR gDR 0.316 NA row_fittings G00004
#> DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration clid
#> 1: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> 2: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> 3: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> CellLineName Tissue ReferenceDivisionTime
#> 1: cellline_GB tissue_y 46
#> 2: cellline_GB tissue_y 46
#> 3: cellline_GB tissue_y 46
or alternatively for SummarizedExperiment
object:
se <- mae[[1]]
sdt <- convert_se_assay_to_dt(se, "Metrics")
head(sdt, 3)
#> rId
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> cId x_mean x_AOC x_AOC_range xc50 x_max ec50
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046 1.7046 -Inf -0.7046 0
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039 1.7039 -Inf -0.7039 0
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920 1.6920 -Inf -0.6920 0
#> x_inf x_0 h r2 x_sd_avg fit_type maxlog10Concentration
#> 1: -0.7046 -0.7046 1e-04 0 0 DRCConstantFitResult 0.4996871
#> 2: -0.7039 -0.7039 1e-04 0 0 DRCConstantFitResult 0.4996871
#> 3: -0.6920 -0.6920 1e-04 0 0 DRCConstantFitResult 0.4996871
#> N_conc normalization_type fit_source cotrt_value ratio source Gnumber
#> 1: 8 GR gDR 3.160 NA row_fittings G00004
#> 2: 8 GR gDR 1.000 NA row_fittings G00004
#> 3: 8 GR gDR 0.316 NA row_fittings G00004
#> DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration clid
#> 1: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> 2: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> 3: drug_004 moa_A G00021 drug_021 moa_D 72 CL00016
#> CellLineName Tissue ReferenceDivisionTime
#> 1: cellline_GB tissue_y 46
#> 2: cellline_GB tissue_y 46
#> 3: cellline_GB tissue_y 46
In gDR
we require standard identifiers that should be visible in the input data, such as e.g. Gnumber
, CLID
, Concentration
. However, user can define their own custom identifiers.
To display gDR default identifier they can use get_env_identifiers
function:
get_env_identifiers()
#> $duration
#> [1] "Duration"
#>
#> $cellline
#> [1] "clid"
#>
#> $cellline_name
#> [1] "CellLineName"
#>
#> $cellline_tissue
#> [1] "Tissue"
#>
#> $cellline_ref_div_time
#> [1] "ReferenceDivisionTime"
#>
#> $cellline_parental_identifier
#> [1] "parental_identifier"
#>
#> $cellline_subtype
#> [1] "subtype"
#>
#> $drug
#> [1] "Gnumber"
#>
#> $drug_name
#> [1] "DrugName"
#>
#> $drug_moa
#> [1] "drug_moa"
#>
#> $untreated_tag
#> [1] "vehicle" "untreated"
#>
#> $masked_tag
#> [1] "masked"
#>
#> $well_position
#> [1] "WellRow" "WellColumn"
#>
#> $concentration
#> [1] "Concentration"
#>
#> $template
#> [1] "Template"
#>
#> $barcode
#> [1] "Barcode" "Plate"
#>
#> $drug2
#> [1] "Gnumber_2"
#>
#> $drug_name2
#> [1] "DrugName_2"
#>
#> $drug_moa2
#> [1] "drug_moa_2"
#>
#> $concentration2
#> [1] "Concentration_2"
#>
#> $drug3
#> [1] "Gnumber_3"
#>
#> $drug_name3
#> [1] "DrugName_3"
#>
#> $drug_moa3
#> [1] "drug_moa_3"
#>
#> $concentration3
#> [1] "Concentration_3"
#>
#> $data_source
#> [1] "data_source"
#>
#> $replicate
#> [1] "Replicate"
To change any of these identifiers user can use set_env_identifier
, e.g.
set_env_identifier("concentration", "Dose")
and confirm, by displaying:
get_env_identifiers("concentration")
#> [1] "Dose"
To restore default identifiers user can use reset_env_identifiers
.
reset_env_identifiers()
get_env_identifiers("concentration")
#> [1] "Concentration"
Applied custom changes in the gDR output can upset internal functions operation. Custom changes can be validated using validate_MAE
validate_MAE(mae)
or validate_SE
.
validate_SE(se)
assay(se, "Normalized") <- NULL
validate_SE(se)
#> Error in validate_SE(se): Assertion on 'exp_assay_names' failed: Must be a subset of {'RawTreated','Controls','Averaged','SmoothMatrix','BlissExcess','HSAExcess','all_iso_points','isobolograms','BlissScore','HSAScore','CIScore_50','CIScore_80','Metrics'}, but has additional elements {'Normalized'}.
sessionInfo()
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.18-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] BumpyMatrix_1.10.0 MultiAssayExperiment_1.28.0
#> [3] SummarizedExperiment_1.32.0 Biobase_2.62.0
#> [5] GenomicRanges_1.54.0 GenomeInfoDb_1.38.0
#> [7] IRanges_2.36.0 S4Vectors_0.40.0
#> [9] BiocGenerics_0.48.0 MatrixGenerics_1.14.0
#> [11] matrixStats_1.0.0 gDRutils_1.0.0
#> [13] BiocStyle_2.30.0
#>
#> loaded via a namespace (and not attached):
#> [1] Matrix_1.6-1.1 jsonlite_1.8.7 compiler_4.3.1
#> [4] BiocManager_1.30.22 crayon_1.5.2 Rcpp_1.0.11
#> [7] bitops_1.0-7 jquerylib_0.1.4 yaml_2.3.7
#> [10] fastmap_1.1.1 lattice_0.22-5 R6_2.5.1
#> [13] XVector_0.42.0 S4Arrays_1.2.0 knitr_1.44
#> [16] backports_1.4.1 checkmate_2.2.0 DelayedArray_0.28.0
#> [19] bookdown_0.36 stringfish_0.15.8 GenomeInfoDbData_1.2.11
#> [22] bslib_0.5.1 rlang_1.1.1 cachem_1.0.8
#> [25] xfun_0.40 sass_0.4.7 RcppParallel_5.1.7
#> [28] SparseArray_1.2.0 cli_3.6.1 magrittr_2.0.3
#> [31] zlibbioc_1.48.0 digest_0.6.33 grid_4.3.1
#> [34] data.table_1.14.8 evaluate_0.22 abind_1.4-5
#> [37] RCurl_1.98-1.12 RApiSerialize_0.1.2 rmarkdown_2.25
#> [40] qs_0.25.5 tools_4.3.1 htmltools_0.5.6.1