bigPint 1.4.0
Researchers who prefer using the SummarizedExperiment
class can feed their data and associated metrics into the bigPint
methods using one SummarizedExperiment
object (instead of both a data
and dataMetrics
object). We demonstrate here how to create a SummarizedExperiment
object. In the remaining articles of the vignette, the example code uses the data
and dataMetrics
objects as example input. However, at the bottom of each vignette, we also include the corresponding example code that uses instead the SummarizedExperiment
object to create the exact same plots.
As was shown in the article Data object, the data
object called soybean_ir_sub
contained 5,604 genes and two treatment groups, N and P (Lauter and Graham 2016). We can create a SummarizedExperiment
object that combines aspects of the data
object and dataMetrics
object. We start by preparing the data
and dataMetrics
objects.
library(bigPint)
library(edgeR)
library(data.table)
library(dplyr)
library(DelayedArray)
library(SummarizedExperiment)
data(soybean_ir_sub)
data = soybean_ir_sub
rownames(data) = data[,1]
y = DGEList(counts=data[,-1])
group = c(1,1,1,2,2,2)
y = DGEList(counts=y, group=group)
Group = factor(c(rep("N",3), rep("P",3)))
design <- model.matrix(~0+Group, data=y$samples)
colnames(design) <- levels(Group)
y <- estimateDisp(y, design)
fit <- glmFit(y, design)
dataMetrics <- data.frame(ID = rownames(data))
for (i in 1:(ncol(fit)-1)){
for (j in (i+1):ncol(fit)){
contrast=rep(0,ncol(fit))
contrast[i]=1
contrast[j]=-1
lrt <- glmLRT(fit, contrast=contrast)
lrt <- topTags(lrt, n = nrow(y[[1]]))[[1]]
setDT(lrt, keep.rownames = TRUE)[]
lrt <- as.data.frame(lrt)
colnames(lrt) <- paste0(colnames(fit)[i], "_", colnames(fit)[j], ".", colnames(lrt))
colnames(lrt)[1] = "ID"
reorderID <- full_join(data, lrt, by = "ID")
dataMetrics <- cbind(dataMetrics, reorderID[,-c(1:ncol(data))])
}
}
Now, we can tranform our data
object into a DelayedMatrix
class object and then input both our data
and dataMetrics
objects combined into a SummarizedExperiment
class object. We can verify that the assay()
and rowData()
methods work for accessing our SummarizedExperiment
object.
dataMetrics$ID <- as.character(dataMetrics$ID)
data = data[,-1]
data <- DelayedArray(data)
se_soybean_ir_sub <- SummarizedExperiment(assays = data, rowData = dataMetrics)
assay(se_soybean_ir_sub)
## <5604 x 6> matrix of class DelayedMatrix and type "integer":
## N.1 N.2 N.3 P.1 P.2 P.3
## Glyma.06G158700.Wm82.a2.v1 48 28 15 29 16 8
## Glyma.08G156000.Wm82.a2.v1 0 0 0 0 0 0
## Glyma.12G070600.Wm82.a2.v1 8 11 11 5 8 7
## Glyma.19G045200.Wm82.a2.v1 200 192 187 186 193 183
## Glyma.05G050000.Wm82.a2.v1 0 0 0 0 0 0
## ... . . . . . .
## Glyma.08G043300.Wm82.a2.v1 31 32 17 23 24 23
## Glyma.01G089300.Wm82.a2.v1 1859 1904 1607 1869 1735 1479
## Glyma.04G168000.Wm82.a2.v1 1 2 0 1 1 6
## Glyma.U001000.Wm82.a2.v1 0 0 0 0 0 0
## Glyma.17G217700.Wm82.a2.v1 0 0 0 0 0 0
rowData(se_soybean_ir_sub)
## DataFrame with 5604 rows and 6 columns
## ID N_P.logFC N_P.logCPM
## <character> <numeric> <numeric>
## Glyma.06G158700.Wm82.a2.v1 Glyma.06G158700.Wm82.a2.v1 0.604141 5.26602
## Glyma.08G156000.Wm82.a2.v1 Glyma.08G156000.Wm82.a2.v1 0.000000 1.57995
## Glyma.12G070600.Wm82.a2.v1 Glyma.12G070600.Wm82.a2.v1 0.430824 3.94990
## Glyma.19G045200.Wm82.a2.v1 Glyma.19G045200.Wm82.a2.v1 -0.102641 8.17766
## Glyma.05G050000.Wm82.a2.v1 Glyma.05G050000.Wm82.a2.v1 0.000000 1.57995
## ... ... ... ...
## Glyma.08G043300.Wm82.a2.v1 Glyma.08G043300.Wm82.a2.v1 0.0281390 5.33083
## Glyma.01G089300.Wm82.a2.v1 Glyma.01G089300.Wm82.a2.v1 -0.0690497 11.35453
## Glyma.04G168000.Wm82.a2.v1 Glyma.04G168000.Wm82.a2.v1 -1.5000373 2.52153
## Glyma.U001000.Wm82.a2.v1 Glyma.U001000.Wm82.a2.v1 0.0000000 1.57995
## Glyma.17G217700.Wm82.a2.v1 Glyma.17G217700.Wm82.a2.v1 0.0000000 1.57995
## N_P.LR N_P.PValue N_P.FDR
## <numeric> <numeric> <numeric>
## Glyma.06G158700.Wm82.a2.v1 1.83612 0.175406 0.459549
## Glyma.08G156000.Wm82.a2.v1 0.00000 1.000000 1.000000
## Glyma.12G070600.Wm82.a2.v1 0.91407 0.339037 0.653582
## Glyma.19G045200.Wm82.a2.v1 0.38498 0.534950 0.862391
## Glyma.05G050000.Wm82.a2.v1 0.00000 1.000000 1.000000
## ... ... ... ...
## Glyma.08G043300.Wm82.a2.v1 0.00922095 0.923500 1.000000
## Glyma.01G089300.Wm82.a2.v1 0.17565563 0.675134 0.979359
## Glyma.04G168000.Wm82.a2.v1 2.31443887 0.128178 0.370071
## Glyma.U001000.Wm82.a2.v1 0.00000000 1.000000 1.000000
## Glyma.17G217700.Wm82.a2.v1 0.00000000 1.000000 1.000000
Similarly, as was shown in the data page, the data
object called soybean_cn_sub
contained 7,332 genes and three treatment groups, S1, S2, and S3 (Brown and Hudson 2015). We can create a SummarizedExperiment
object that combines aspects of the data
object and dataMetrics
object. We start by preparing the data
and dataMetrics
objects.
data(soybean_cn_sub)
data = soybean_cn_sub
rownames(data) = data[,1]
y = DGEList(counts=data[,-1])
group = c(1,1,1,2,2,2,3,3,3)
y = DGEList(counts=y, group=group)
Group = factor(c(rep("S1",3), rep("S2",3), rep("S3",3)))
design <- model.matrix(~0+Group, data=y$samples)
colnames(design) <- levels(Group)
y <- estimateDisp(y, design)
fit <- glmFit(y, design)
dataMetrics <- data.frame(ID = rownames(data))
for (i in 1:(ncol(fit)-1)){
for (j in (i+1):ncol(fit)){
contrast=rep(0,ncol(fit))
contrast[i]=1
contrast[j]=-1
lrt <- glmLRT(fit, contrast=contrast)
lrt <- topTags(lrt, n = nrow(y[[1]]))[[1]]
setDT(lrt, keep.rownames = TRUE)[]
lrt <- as.data.frame(lrt)
colnames(lrt) <- paste0(colnames(fit)[i], "_", colnames(fit)[j], ".", colnames(lrt))
colnames(lrt)[1] = "ID"
reorderID <- full_join(data, lrt, by = "ID")
dataMetrics <- cbind(dataMetrics, reorderID[,-c(1:ncol(data))])
}
}
Now, we can tranform our data
object into a DelayedMatrix
class object and then input both our data
and dataMetrics
objects combined into a SummarizedExperiment
class object. We can verify that the assay()
and rowData()
methods work for accessing our SummarizedExperiment
object.
dataMetrics$ID <- as.character(dataMetrics$ID)
data = data[,-1]
data <- DelayedArray(data)
se_soybean_cn_sub <- SummarizedExperiment(assays = data, rowData = dataMetrics)
assay(se_soybean_cn_sub)
## <7332 x 9> matrix of class DelayedMatrix and type "double":
## S1.1 S1.2 S1.3 ... S3.2 S3.3
## Glyma06g12670.1 0.8024444 2.7088837 1.7634068 . 8.3675932 8.3893470
## Glyma08g12390.2 4.7687202 5.2357774 5.1666308 . 4.0314271 4.2693124
## Glyma12g02076.11 3.1899340 2.9021310 2.9065021 . 2.7311049 3.2556486
## Glyma18g53450.1 0.8024444 0.8024444 0.8024444 . 0.8024444 0.8024444
## Glyma05g03470.4 3.1899340 2.4801080 3.2620818 . 3.4196490 4.1435591
## ... . . . . . .
## Glyma02g14300.1 4.6005599 4.9369766 3.8996875 . 5.7465508 5.8893165
## Glyma06g17451.4 0.8024444 3.0701936 2.2841221 . 2.4400407 0.8024444
## Glyma06g01600.2 2.5761594 4.0431593 4.6920688 . 3.8463167 3.2556486
## Glyma15g08800.1 0.8024444 2.7088837 2.2841221 . 4.1727846 3.4845295
## Glyma02g34995.1 0.8024444 0.8024444 0.8024444 . 0.8024444 0.8024444
rowData(se_soybean_cn_sub)
## DataFrame with 7332 rows and 16 columns
## ID S1_S2.logFC S1_S2.logCPM S1_S2.LR
## <character> <numeric> <numeric> <numeric>
## Glyma06g12670.1 Glyma06g12670.1 -1.978636 8.18978 1.08123e+01
## Glyma08g12390.2 Glyma08g12390.2 0.451359 7.86566 6.59893e-01
## Glyma12g02076.11 Glyma12g02076.11 -0.140494 7.58726 4.78955e-02
## Glyma18g53450.1 Glyma18g53450.1 -0.035308 6.71937 9.64288e-04
## Glyma05g03470.4 Glyma05g03470.4 0.319366 7.57941 2.10906e-01
## ... ... ... ... ...
## Glyma02g14300.1 Glyma02g14300.1 -0.3907852 8.10283 0.586513744
## Glyma06g17451.4 Glyma06g17451.4 0.0931248 7.24670 0.013834139
## Glyma06g01600.2 Glyma06g01600.2 1.4610614 7.50687 3.752409950
## Glyma15g08800.1 Glyma15g08800.1 -1.0684358 7.65877 2.528633939
## Glyma02g34995.1 Glyma02g34995.1 -0.0353080 6.71937 0.000964288
## S1_S2.PValue S1_S2.FDR S1_S3.logFC S1_S3.logCPM S1_S3.LR
## <numeric> <numeric> <numeric> <numeric> <numeric>
## Glyma06g12670.1 0.00100827 0.189554 -2.2258968 8.18978 14.87797823
## Glyma08g12390.2 0.41659763 0.999675 0.2906365 7.86566 0.28867032
## Glyma12g02076.11 0.82676658 0.999675 -0.0946709 7.58726 0.02140064
## Glyma18g53450.1 0.97522729 0.999675 -0.0372724 6.71937 0.00107434
## Glyma05g03470.4 0.64605806 0.999675 -0.4482166 7.57941 0.53623771
## ... ... ... ... ... ...
## Glyma02g14300.1 0.4437704 0.999675 -0.3833138 8.10283 0.56244469
## Glyma06g17451.4 0.9063699 0.999675 -0.1368297 7.24670 0.03217222
## Glyma06g01600.2 0.0527314 0.999675 0.0665984 7.50687 0.01240261
## Glyma15g08800.1 0.1117970 0.999675 -1.0852520 7.65877 2.62164024
## Glyma02g34995.1 0.9752273 0.999675 -0.0372724 6.71937 0.00107434
## S1_S3.PValue S1_S3.FDR S2_S3.logFC S2_S3.logCPM S2_S3.LR
## <numeric> <numeric> <numeric> <numeric> <numeric>
## Glyma06g12670.1 0.000114694 0.0347301 -0.24726041 8.18978 3.51797e-01
## Glyma08g12390.2 0.591073883 0.9997596 -0.16072237 7.86566 7.48433e-02
## Glyma12g02076.11 0.883692803 0.9997596 0.04582293 7.58726 5.17944e-03
## Glyma18g53450.1 0.973852272 0.9997596 -0.00196448 6.71937 2.97320e-06
## Glyma05g03470.4 0.463996139 0.9997596 -0.76758228 7.57941 1.39286e+00
## ... ... ... ... ... ...
## Glyma02g14300.1 0.453277 0.99976 0.00747137 8.10283 2.40346e-04
## Glyma06g17451.4 0.857650 0.99976 -0.22995447 7.24670 8.68999e-02
## Glyma06g01600.2 0.911325 0.99976 -1.39446297 7.50687 3.30368e+00
## Glyma15g08800.1 0.105415 0.99976 -0.01681614 7.65877 8.85502e-04
## Glyma02g34995.1 0.973852 0.99976 -0.00196448 6.71937 2.97320e-06
## S2_S3.PValue S2_S3.FDR
## <numeric> <numeric>
## Glyma06g12670.1 0.553098 0.999825
## Glyma08g12390.2 0.784411 0.999825
## Glyma12g02076.11 0.942627 0.999825
## Glyma18g53450.1 0.998624 0.999825
## Glyma05g03470.4 0.237922 0.999825
## ... ... ...
## Glyma02g14300.1 0.9876308 0.999825
## Glyma06g17451.4 0.7681559 0.999825
## Glyma06g01600.2 0.0691248 0.999825
## Glyma15g08800.1 0.9762605 0.999825
## Glyma02g34995.1 0.9986242 0.999825
Brown, Anne V., and Karen A. Hudson. 2015. “Developmental Profiling of Gene Expression in Soybean Trifoliate Leaves and Cotyledons.” BMC Plant Biology 15 (1). BioMed Central:169.
Lauter, AN Moran, and MA Graham. 2016. “NCBI Sra Bioproject Accession: PRJNA318409.”