Contents

library(gDRutils)
suppressPackageStartupMessages(library(MultiAssayExperiment))

1 Overview

gDRutils is the part of gDR suite. This package provides bunch of tools for, among others: * data manipulation, especially output of the gDRcore package (MultiAssayExperiments and SummarizedExperiment), * data extraction, * managing identifiers used for creating gDR experiments, * data validation.

2 Use cases

2.1 Data manipulation

The basic output of gDRcore package is the MultiAssayExperiment object. Function MAEpply allows for the data manipulation of this object, and can be used in a similar way as a basic function lapply.

mae <- get_synthetic_data("finalMAE_combo_matrix_small")
MAEpply(mae, dim)
#> $matrix
#> [1] 6 2
#> 
#> $`single-agent`
#> [1] 5 2
MAEpply(mae, rowData)
#> $matrix
#> DataFrame with 6 rows and 7 columns
#>                                                    Gnumber    DrugName
#>                                                <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72      G00004    drug_004
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72      G00004    drug_004
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72      G00005    drug_005
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72      G00005    drug_005
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72      G00006    drug_006
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72      G00006    drug_006
#>                                                   drug_moa   Gnumber_2
#>                                                <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72       moa_A      G00021
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72       moa_A      G00026
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72       moa_A      G00021
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72       moa_A      G00026
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72       moa_A      G00021
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72       moa_A      G00026
#>                                                 DrugName_2  drug_moa_2
#>                                                <character> <character>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72    drug_021       moa_D
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72    drug_026       moa_E
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72    drug_021       moa_D
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72    drug_026       moa_E
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72    drug_021       moa_D
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72    drug_026       moa_E
#>                                                 Duration
#>                                                <numeric>
#> G00004_drug_004_moa_A_G00021_drug_021_moa_D_72        72
#> G00004_drug_004_moa_A_G00026_drug_026_moa_E_72        72
#> G00005_drug_005_moa_A_G00021_drug_021_moa_D_72        72
#> G00005_drug_005_moa_A_G00026_drug_026_moa_E_72        72
#> G00006_drug_006_moa_A_G00021_drug_021_moa_D_72        72
#> G00006_drug_006_moa_A_G00026_drug_026_moa_E_72        72
#> 
#> $`single-agent`
#> DataFrame with 5 rows and 4 columns
#>                              Gnumber    DrugName    drug_moa  Duration
#>                          <character> <character> <character> <numeric>
#> G00004_drug_004_moa_A_72      G00004    drug_004       moa_A        72
#> G00005_drug_005_moa_A_72      G00005    drug_005       moa_A        72
#> G00006_drug_006_moa_A_72      G00006    drug_006       moa_A        72
#> G00021_drug_021_moa_D_72      G00021    drug_021       moa_D        72
#> G00026_drug_026_moa_E_72      G00026    drug_026       moa_E        72

This function allows also for extraction of unified data across all the SummarizedExperiments inside MultiAssayExperiment, e.g.

MAEpply(mae, rowData, unify = TRUE)
#>     Gnumber DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration
#>  1:  G00004 drug_004    moa_A    G00021   drug_021      moa_D       72
#>  2:  G00004 drug_004    moa_A    G00026   drug_026      moa_E       72
#>  3:  G00005 drug_005    moa_A    G00021   drug_021      moa_D       72
#>  4:  G00005 drug_005    moa_A    G00026   drug_026      moa_E       72
#>  5:  G00006 drug_006    moa_A    G00021   drug_021      moa_D       72
#>  6:  G00006 drug_006    moa_A    G00026   drug_026      moa_E       72
#>  7:  G00004 drug_004    moa_A      <NA>       <NA>       <NA>       72
#>  8:  G00005 drug_005    moa_A      <NA>       <NA>       <NA>       72
#>  9:  G00006 drug_006    moa_A      <NA>       <NA>       <NA>       72
#> 10:  G00021 drug_021    moa_D      <NA>       <NA>       <NA>       72
#> 11:  G00026 drug_026    moa_E      <NA>       <NA>       <NA>       72

2.2 Data extraction

All the metrics data are stored inside assays of SummarizedExperiment. For the downstream analyses we provide tools allowing for the extraction of the data into user-friendly data.table style.

There are two functions working on MultiAssayExperiment object (convert_mae_assay_to_dt) and for SummarizedExperiment (convert_se_assay_to_dt).

mdt <- convert_mae_assay_to_dt(mae, "Metrics")
#> Loading required package: BumpyMatrix
head(mdt, 3)
#>                                               rId
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#>                                cId  x_mean  x_AOC x_AOC_range xc50   x_max ec50
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046      1.7046 -Inf -0.7046    0
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039      1.7039 -Inf -0.7039    0
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920      1.6920 -Inf -0.6920    0
#>      x_inf     x_0     h r2 x_sd_avg             fit_type maxlog10Concentration
#> 1: -0.7046 -0.7046 1e-04  0        0 DRCConstantFitResult             0.4996871
#> 2: -0.7039 -0.7039 1e-04  0        0 DRCConstantFitResult             0.4996871
#> 3: -0.6920 -0.6920 1e-04  0        0 DRCConstantFitResult             0.4996871
#>    N_conc normalization_type fit_source cotrt_value ratio       source Gnumber
#> 1:      8                 GR        gDR       3.160    NA row_fittings  G00004
#> 2:      8                 GR        gDR       1.000    NA row_fittings  G00004
#> 3:      8                 GR        gDR       0.316    NA row_fittings  G00004
#>    DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration    clid
#> 1: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#> 2: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#> 3: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#>    CellLineName   Tissue ReferenceDivisionTime
#> 1:  cellline_GB tissue_y                    46
#> 2:  cellline_GB tissue_y                    46
#> 3:  cellline_GB tissue_y                    46

or alternatively for SummarizedExperiment object:

se <- mae[[1]]
sdt <- convert_se_assay_to_dt(se, "Metrics")
head(sdt, 3)
#>                                               rId
#> 1: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 2: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#> 3: G00004_drug_004_moa_A_G00021_drug_021_moa_D_72
#>                                cId  x_mean  x_AOC x_AOC_range xc50   x_max ec50
#> 1: CL00016_cellline_GB_tissue_y_46 -0.7046 1.7046      1.7046 -Inf -0.7046    0
#> 2: CL00016_cellline_GB_tissue_y_46 -0.7039 1.7039      1.7039 -Inf -0.7039    0
#> 3: CL00016_cellline_GB_tissue_y_46 -0.6920 1.6920      1.6920 -Inf -0.6920    0
#>      x_inf     x_0     h r2 x_sd_avg             fit_type maxlog10Concentration
#> 1: -0.7046 -0.7046 1e-04  0        0 DRCConstantFitResult             0.4996871
#> 2: -0.7039 -0.7039 1e-04  0        0 DRCConstantFitResult             0.4996871
#> 3: -0.6920 -0.6920 1e-04  0        0 DRCConstantFitResult             0.4996871
#>    N_conc normalization_type fit_source cotrt_value ratio       source Gnumber
#> 1:      8                 GR        gDR       3.160    NA row_fittings  G00004
#> 2:      8                 GR        gDR       1.000    NA row_fittings  G00004
#> 3:      8                 GR        gDR       0.316    NA row_fittings  G00004
#>    DrugName drug_moa Gnumber_2 DrugName_2 drug_moa_2 Duration    clid
#> 1: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#> 2: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#> 3: drug_004    moa_A    G00021   drug_021      moa_D       72 CL00016
#>    CellLineName   Tissue ReferenceDivisionTime
#> 1:  cellline_GB tissue_y                    46
#> 2:  cellline_GB tissue_y                    46
#> 3:  cellline_GB tissue_y                    46

2.3 Managing gDR identifiers

In gDR we require standard identifiers that should be visible in the input data, such as e.g. Gnumber, CLID, Concentration. However, user can define their own custom identifiers.

To display gDR default identifier they can use get_env_identifiers function:

get_env_identifiers()
#> $duration
#> [1] "Duration"
#> 
#> $cellline
#> [1] "clid"
#> 
#> $cellline_name
#> [1] "CellLineName"
#> 
#> $cellline_tissue
#> [1] "Tissue"
#> 
#> $cellline_ref_div_time
#> [1] "ReferenceDivisionTime"
#> 
#> $cellline_parental_identifier
#> [1] "parental_identifier"
#> 
#> $cellline_subtype
#> [1] "subtype"
#> 
#> $drug
#> [1] "Gnumber"
#> 
#> $drug_name
#> [1] "DrugName"
#> 
#> $drug_moa
#> [1] "drug_moa"
#> 
#> $untreated_tag
#> [1] "vehicle"   "untreated"
#> 
#> $masked_tag
#> [1] "masked"
#> 
#> $well_position
#> [1] "WellRow"    "WellColumn"
#> 
#> $concentration
#> [1] "Concentration"
#> 
#> $template
#> [1] "Template"
#> 
#> $barcode
#> [1] "Barcode" "Plate"  
#> 
#> $drug2
#> [1] "Gnumber_2"
#> 
#> $drug_name2
#> [1] "DrugName_2"
#> 
#> $drug_moa2
#> [1] "drug_moa_2"
#> 
#> $concentration2
#> [1] "Concentration_2"
#> 
#> $drug3
#> [1] "Gnumber_3"
#> 
#> $drug_name3
#> [1] "DrugName_3"
#> 
#> $drug_moa3
#> [1] "drug_moa_3"
#> 
#> $concentration3
#> [1] "Concentration_3"
#> 
#> $data_source
#> [1] "data_source"
#> 
#> $replicate
#> [1] "Replicate"

To change any of these identifiers user can use set_env_identifier, e.g.

set_env_identifier("concentration", "Dose")

and confirm, by displaying:

get_env_identifiers("concentration")
#> [1] "Dose"

To restore default identifiers user can use reset_env_identifiers.

reset_env_identifiers()
get_env_identifiers("concentration")
#> [1] "Concentration"

2.4 Data validation

Applied custom changes in the gDR output can upset internal functions operation. Custom changes can be validated using validate_MAE

validate_MAE(mae)

or validate_SE.

validate_SE(se)
assay(se, "Normalized") <- NULL
validate_SE(se)
#> Error in validate_SE(se): Assertion on 'exp_assay_names' failed: Must be a subset of {'RawTreated','Controls','Averaged','SmoothMatrix','BlissExcess','HSAExcess','all_iso_points','isobolograms','BlissScore','HSAScore','CIScore_50','CIScore_80','Metrics'}, but has additional elements {'Normalized'}.

SessionInfo

sessionInfo()
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.18-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] BumpyMatrix_1.10.0          MultiAssayExperiment_1.28.0
#>  [3] SummarizedExperiment_1.32.0 Biobase_2.62.0             
#>  [5] GenomicRanges_1.54.0        GenomeInfoDb_1.38.0        
#>  [7] IRanges_2.36.0              S4Vectors_0.40.0           
#>  [9] BiocGenerics_0.48.0         MatrixGenerics_1.14.0      
#> [11] matrixStats_1.0.0           gDRutils_1.0.0             
#> [13] BiocStyle_2.30.0           
#> 
#> loaded via a namespace (and not attached):
#>  [1] Matrix_1.6-1.1          jsonlite_1.8.7          compiler_4.3.1         
#>  [4] BiocManager_1.30.22     crayon_1.5.2            Rcpp_1.0.11            
#>  [7] bitops_1.0-7            jquerylib_0.1.4         yaml_2.3.7             
#> [10] fastmap_1.1.1           lattice_0.22-5          R6_2.5.1               
#> [13] XVector_0.42.0          S4Arrays_1.2.0          knitr_1.44             
#> [16] backports_1.4.1         checkmate_2.2.0         DelayedArray_0.28.0    
#> [19] bookdown_0.36           stringfish_0.15.8       GenomeInfoDbData_1.2.11
#> [22] bslib_0.5.1             rlang_1.1.1             cachem_1.0.8           
#> [25] xfun_0.40               sass_0.4.7              RcppParallel_5.1.7     
#> [28] SparseArray_1.2.0       cli_3.6.1               magrittr_2.0.3         
#> [31] zlibbioc_1.48.0         digest_0.6.33           grid_4.3.1             
#> [34] data.table_1.14.8       evaluate_0.22           abind_1.4-5            
#> [37] RCurl_1.98-1.12         RApiSerialize_0.1.2     rmarkdown_2.25         
#> [40] qs_0.25.5               tools_4.3.1             htmltools_0.5.6.1