library(gDRutils)
suppressPackageStartupMessages(library(MultiAssayExperiment))
gDRutils
is the part of gDR
suite. This package provides bunch of tools for, among others:
* data manipulation, especially output of the gDRcore
package (MultiAssayExperiments
and SummarizedExperiment
),
* data extraction,
* managing identifiers used for creating gDR
experiments,
* data validation.
The basic output of gDRcore
package is the MultiAssayExperiment
object. Function MAEpply
allows for the data manipulation of this object, and can be used in a similar way as a basic function lapply
.
mae <- get_synthetic_data("finalMAE_combo_matrix_small")
MAEpply(mae, dim)
#> $combination
#> [1] 6 2
#>
#> $`single-agent`
#> [1] 5 2
MAEpply(mae, rowData)
#> $combination
#> DataFrame with 6 rows and 7 columns
#> Gnumber DrugName
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 G00004 drug_004
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 G00004 drug_004
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 G00005 drug_005
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 G00005 drug_005
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 G00006 drug_006
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 G00006 drug_006
#> drug_moa Gnumber_2
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 moa_A G00021
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 moa_A G00026
#> DrugName_2 drug_moa_2
#> <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 drug_021 moa_D
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 drug_026 moa_E
#> Duration
#> <numeric>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72 72
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72 72
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72 72
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72 72
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72 72
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72 72
#>
#> $`single-agent`
#> DataFrame with 5 rows and 4 columns
#> Gnumber DrugName drug_moa Duration
#> <character> <character> <character> <numeric>
#> G00004_drug_004_moa_A_72 G00004 drug_004 moa_A 72
#> G00005_drug_005_moa_A_72 G00005 drug_005 moa_A 72
#> G00006_drug_006_moa_A_72 G00006 drug_006 moa_A 72
#> G00021_drug_021_moa_D_72 G00021 drug_021 moa_D 72
#> G00026_drug_026_moa_E_72 G00026 drug_026 moa_E 72
This function allows also for extraction of unified data across all the SummarizedExperiment
s inside MultiAssayExperiment
, e.g.
MAEpply(mae, rowData, unify = TRUE)
#> Gnumber DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration
#> <char> <char> <char> <char> <char> <char> <num>
#> 1: G00004 drug_004 moa_A G00021 drug_021 moa_D 72
#> 2: G00004 drug_004 moa_A G00026 drug_026 moa_E 72
#> 3: G00005 drug_005 moa_A G00021 drug_021 moa_D 72
#> 4: G00005 drug_005 moa_A G00026 drug_026 moa_E 72
#> 5: G00006 drug_006 moa_A G00021 drug_021 moa_D 72
#> 6: G00006 drug_006 moa_A G00026 drug_026 moa_E 72
#> 7: G00004 drug_004 moa_A <NA> <NA> <NA> 72
#> 8: G00005 drug_005 moa_A <NA> <NA> <NA> 72
#> 9: G00006 drug_006 moa_A <NA> <NA> <NA> 72
#> 10: G00021 drug_021 moa_D <NA> <NA> <NA> 72
#> 11: G00026 drug_026 moa_E <NA> <NA> <NA> 72
All the metrics data are stored inside assays
of SummarizedExperiment
. For the downstream analyses we provide tools allowing for the extraction of the data into user-friendly data.table
style.
There are two functions working on MultiAssayExperiment
object (convert_mae_assay_to_dt
) and for SummarizedExperiment
(convert_se_assay_to_dt
).
mdt <- convert_mae_assay_to_dt(mae, "Metrics")
#> Loading required package: BumpyMatrix
head(mdt, 3)
#> rId
#> <char>
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> cId x_mean x_AOC x_AOC_range xc50 x_max
#> <char> <num> <num> <num> <num> <num>
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046 1.7046 -Inf -0.7046
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039 1.7039 -Inf -0.7039
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920 1.6920 -Inf -0.6920
#> ec50 x_inf x_0 h r2 x_sd_avg fit_type
#> <num> <num> <num> <num> <num> <num> <char>
#> 1: 0 -0.7046 -0.7046 1e-04 0 0 DRCConstantFitResult
#> 2: 0 -0.7039 -0.7039 1e-04 0 0 DRCConstantFitResult
#> 3: 0 -0.6920 -0.6920 1e-04 0 0 DRCConstantFitResult
#> maxlog10Concentration N_conc normalization_type fit_source cotrt_value ratio
#> <num> <int> <char> <char> <num> <num>
#> 1: 0.4996871 8 GR gDR 3.160 NA
#> 2: 0.4996871 8 GR gDR 1.000 NA
#> 3: 0.4996871 8 GR gDR 0.316 NA
#> source Gnumber DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2
#> <char> <char> <char> <char> <char> <char> <char>
#> 1: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> 2: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> 3: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> Duration clid CellLineName Tissue ReferenceDivisionTime
#> <num> <char> <char> <char> <num>
#> 1: 72 CL00016 cellline_GB tissue_y 46
#> 2: 72 CL00016 cellline_GB tissue_y 46
#> 3: 72 CL00016 cellline_GB tissue_y 46
or alternatively for SummarizedExperiment
object:
se <- mae[[1]]
sdt <- convert_se_assay_to_dt(se, "Metrics")
head(sdt, 3)
#> rId
#> <char>
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> cId x_mean x_AOC x_AOC_range xc50 x_max
#> <char> <num> <num> <num> <num> <num>
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046 1.7046 -Inf -0.7046
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039 1.7039 -Inf -0.7039
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920 1.6920 -Inf -0.6920
#> ec50 x_inf x_0 h r2 x_sd_avg fit_type
#> <num> <num> <num> <num> <num> <num> <char>
#> 1: 0 -0.7046 -0.7046 1e-04 0 0 DRCConstantFitResult
#> 2: 0 -0.7039 -0.7039 1e-04 0 0 DRCConstantFitResult
#> 3: 0 -0.6920 -0.6920 1e-04 0 0 DRCConstantFitResult
#> maxlog10Concentration N_conc normalization_type fit_source cotrt_value ratio
#> <num> <int> <char> <char> <num> <num>
#> 1: 0.4996871 8 GR gDR 3.160 NA
#> 2: 0.4996871 8 GR gDR 1.000 NA
#> 3: 0.4996871 8 GR gDR 0.316 NA
#> source Gnumber DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2
#> <char> <char> <char> <char> <char> <char> <char>
#> 1: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> 2: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> 3: row_fittings G00004 drug_004 moa_A G00021 drug_021 moa_D
#> Duration clid CellLineName Tissue ReferenceDivisionTime
#> <num> <char> <char> <char> <num>
#> 1: 72 CL00016 cellline_GB tissue_y 46
#> 2: 72 CL00016 cellline_GB tissue_y 46
#> 3: 72 CL00016 cellline_GB tissue_y 46
In gDR
we require standard identifiers that should be visible in the input data, such as e.g. Gnumber
, CLID
, Concentration
. However, user can define their own custom identifiers.
To display gDR default identifier they can use get_env_identifiers
function:
get_env_identifiers()
#> $duration
#> [1] "Duration"
#>
#> $cellline
#> [1] "clid"
#>
#> $cellline_name
#> [1] "CellLineName"
#>
#> $cellline_tissue
#> [1] "Tissue"
#>
#> $cellline_ref_div_time
#> [1] "ReferenceDivisionTime"
#>
#> $cellline_parental_identifier
#> [1] "parental_identifier"
#>
#> $cellline_subtype
#> [1] "subtype"
#>
#> $drug
#> [1] "Gnumber"
#>
#> $drug_name
#> [1] "DrugName"
#>
#> $drug_moa
#> [1] "drug_moa"
#>
#> $untreated_tag
#> [1] "vehicle" "untreated"
#>
#> $masked_tag
#> [1] "masked"
#>
#> $well_position
#> [1] "WellRow" "WellColumn"
#>
#> $concentration
#> [1] "Concentration"
#>
#> $template
#> [1] "Template" "Treatment"
#>
#> $barcode
#> [1] "Barcode" "Plate"
#>
#> $drug2
#> [1] "Gnumber_2"
#>
#> $drug_name2
#> [1] "DrugName_2"
#>
#> $drug_moa2
#> [1] "drug_moa_2"
#>
#> $concentration2
#> [1] "Concentration_2"
#>
#> $drug3
#> [1] "Gnumber_3"
#>
#> $drug_name3
#> [1] "DrugName_3"
#>
#> $drug_moa3
#> [1] "drug_moa_3"
#>
#> $concentration3
#> [1] "Concentration_3"
#>
#> $data_source
#> [1] "data_source"
#>
#> $replicate
#> [1] "Replicate"
To change any of these identifiers user can use set_env_identifier
, e.g.
set_env_identifier("concentration", "Dose")
and confirm, by displaying:
get_env_identifiers("concentration")
#> [1] "Dose"
To restore default identifiers user can use reset_env_identifiers
.
reset_env_identifiers()
get_env_identifiers("concentration")
#> [1] "Concentration"
Applied custom changes in the gDR output can upset internal functions operation. Custom changes can be validated using validate_MAE
validate_MAE(mae)
or validate_SE
.
validate_SE(se)
assay(se, "Normalized") <- NULL
validate_SE(se)
#> Error in validate_SE(se): Assertion on 'exp_assay_names' failed: Must be a subset of {'RawTreated','Controls','Averaged','excess','all_iso_points','isobolograms','scores','Metrics'}, but has additional elements {'Normalized'}.
sessionInfo()
#> R version 4.4.0 beta (2024-04-15 r86425)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] BumpyMatrix_1.12.0 MultiAssayExperiment_1.30.0
#> [3] SummarizedExperiment_1.34.0 Biobase_2.64.0
#> [5] GenomicRanges_1.56.0 GenomeInfoDb_1.40.0
#> [7] IRanges_2.38.0 S4Vectors_0.42.0
#> [9] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
#> [11] matrixStats_1.3.0 gDRutils_1.2.0
#> [13] BiocStyle_2.32.0
#>
#> loaded via a namespace (and not attached):
#> [1] Matrix_1.7-0 jsonlite_1.8.8 compiler_4.4.0
#> [4] BiocManager_1.30.22 crayon_1.5.2 Rcpp_1.0.12
#> [7] jquerylib_0.1.4 yaml_2.3.8 fastmap_1.1.1
#> [10] lattice_0.22-6 R6_2.5.1 XVector_0.44.0
#> [13] S4Arrays_1.4.0 knitr_1.46 backports_1.4.1
#> [16] checkmate_2.3.1 DelayedArray_0.30.0 bookdown_0.39
#> [19] stringfish_0.16.0 GenomeInfoDbData_1.2.12 bslib_0.7.0
#> [22] rlang_1.1.3 cachem_1.0.8 xfun_0.43
#> [25] sass_0.4.9 RcppParallel_5.1.7 SparseArray_1.4.0
#> [28] cli_3.6.2 zlibbioc_1.50.0 digest_0.6.35
#> [31] grid_4.4.0 lifecycle_1.0.4 data.table_1.15.4
#> [34] evaluate_0.23 abind_1.4-5 RApiSerialize_0.1.2
#> [37] rmarkdown_2.26 httr_1.4.7 qs_0.26.1
#> [40] tools_4.4.0 htmltools_0.5.8.1 UCSC.utils_1.0.0