The gctsc package provides fast and scalable tools for
fitting Gaussian copula models for count time series,
supporting a wide range of discrete marginals:
These models are constructed from a latent Gaussian ARMA process combined with a discrete marginal distribution. Likelihood evaluation requires the multivariate normal rectangle probability
\[ P\big( a_t(y_t;\psi) \le Z_t \le b_t(y_t;\psi),\ t=1,\ldots,n \big), \]
where \((Z_t)\) follows a Gaussian ARMA\((p,q)\) process with correlation matrix \(\Psi\), and the marginal-specific truncation bounds are
\[ a_t = \Phi^{-1}\!\big(F_t(y_t - 1;\psi)\big), \qquad b_t = \Phi^{-1}\!\big(F_t(y_t;\psi)\big). \]
The package provides several likelihood approximation methods:
In addition, the package offers:
The TMET method is an adaptation of minimax exponential tilting to causal and invertible ARMA processes, enabling fast, stable, and accurate likelihood computation even for large \(n\). The package also includes utilities for simulating count time series under various marginal models, fitting Gaussian copula models, performing diagnostics, and generating forecasts.
This section illustrates basic usage of the gctsc
package, including data simulation, likelihood approximation, and model
fitting under several margins.
library(gctsc)
n <- 100
mu <- 10
phi <- 0.2
arma_order <- c(1, 0)
tau <- c(phi)
# Simulate Poisson count data
sim_data <- sim_poisson(mu, tau, arma_order, nsim = n, seed = 7)
y <- sim_data$y
plot(y, type = "l", main = "Simulated Poisson AR(1) Counts")n <- 100
phi <- 0.5
tau <- c(phi)
beta_0 <- 1.2
prob <- plogis(beta_0)
rho <- 0.1
pi0 <- 0.8
size <- 24
set.seed(7)
sim_data <- sim_zibb(prob = prob * rep(1,n),
rho = rho, pi0 = pi0,
size = size, tau = tau,
arma_order = c(1,0),
nsim = n)
y <- sim_data$y
plot(y, type = "l", main = "Simulated ZIBB AR(1) Counts")n <- 300
mu <- 10
phi <- 0.5
theta <- 0.2
arma_order <- c(1,1)
tau <- c(phi, theta)
# Simulate Poisson count data
sim_data <- sim_poisson(mu, tau, arma_order, nsim = n, seed = 1)
y <- sim_data$y
# Compute bounds for truncated MVN using marginal object
marg <- poisson.marg()
X <- matrix(1, nrow = n)
ab <- marg$bounds(y, X, lambda = mu)
lower <- ab[,1]
upper <- ab[,2]
# Likelihood approximation
llk_tmet <- pmvn_tmet(lower, upper, tau, od = arma_order, pm = 30, QMC = TRUE)
llk_ghk <- pmvn_ghk(lower, upper, tau, od = arma_order, QMC = TRUE)
c(TMET = llk_tmet, GHK = llk_ghk)
#> TMET GHK
#> -689.3957 -689.5311fit <- gctsc(
formula = y ~ 1,
marginal = poisson.marg(),
cormat = arma.cormat(p = 1, q = 1),
method = "CE"
)
summary(fit)
#>
#> --- Summary of Gaussian Copula Time Series Model ---
#> Call:
#> gctsc(formula = y ~ 1, marginal = poisson.marg(), cormat = arma.cormat(p = 1, q = 1), method = "CE")
#>
#> Marginal Model Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> mu_(Intercept) 9.8800 0.3457 28.58 <2e-16 ***
#>
#> Copula (Dependence) Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> ar1 0.54119 0.06393 8.465 <2e-16 ***
#> ma1 0.19588 0.08804 2.225 0.0261 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Model Fit Statistics:
#> Log-likelihood: -688.6073
#> AIC: 1383.2147
#> BIC: 1394.3260
predict(fit)
#> $mean
#> [1] 11.28665
#>
#> $median
#> [1] 11
#>
#> $mode
#> [1] 11
#>
#> $variance
#> [1] 6.22632
#>
#> $p_y
#> [1] 4.169107e-09 4.004506e-07 1.047309e-05 1.281917e-04 9.164282e-04
#> [6] 4.319817e-03 1.449642e-02 3.651350e-02 7.175403e-02 1.133094e-01
#> [11] 1.471716e-01 1.602127e-01 1.484676e-01 1.186562e-01 8.269738e-02
#> [16] 5.074418e-02 2.764304e-02 1.346672e-02 5.905024e-03 2.344048e-03
#> [21] 8.467194e-04 2.796171e-04 8.477591e-05 2.368836e-05 6.121777e-06
#> [26] 1.467913e-06 3.275622e-07 6.821017e-08 1.328835e-08 2.427647e-09
#> [31] 4.168203e-10 6.739905e-11 1.674515e-11 1.674515e-11
#>
#> $lower
#> [1] 7
#>
#> $upper
#> [1] 16pred_tmet <- predict(
fit,
method = "TMET",
y_obs = 10
)
pred_tmet
#> $mean
#> [1] 11.00809
#>
#> $median
#> [1] 11
#>
#> $mode
#> [1] 11
#>
#> $variance
#> [1] 5.929456
#>
#> $p_y
#> [1] 4.669191e-09 4.732837e-07 1.273629e-05 1.579037e-04 1.130171e-03
#> [6] 5.284239e-03 1.745280e-02 4.297591e-02 8.207544e-02 1.252930e-01
#> [11] 1.565649e-01 1.632569e-01 1.443291e-01 1.096304e-01 7.236548e-02
#> [16] 4.191805e-02 2.148999e-02 9.823879e-03 4.031023e-03 1.493464e-03
#> [21] 5.022490e-04 1.540497e-04 4.328116e-05 1.118263e-05 2.666618e-06
#> [26] 5.888241e-07 1.207650e-07 2.307012e-08 4.115713e-09 6.873528e-10
#> [31] 1.077048e-10 1.586821e-11 3.760333e-12 3.760333e-12
#>
#> $lower
#> [1] 7
#>
#> $upper
#> [1] 16
#>
#> $CRPS
#> [1] 0.6905651
#>
#> $LOGS
#> [1] 1.854284## Load weekly Campylobacter incidence data
data("campyl", package = "gctsc")
y <- campyl[,"X1"]
n <- length(y)
## Plot the time series
ts_y <- ts(y, start = c(2001, 1), frequency = 52)
plot(ts_y, main = "Weekly Campylobacter Incidence",
ylab = "Cases", xlab = "Year")
## ---------------------------------------------------------------
## Construct seasonal covariates
## ---------------------------------------------------------------
## Seasonal structure appears to have yearly periodicity,
## so we include sine/cosine terms with period = 52 weeks.
time <- 1:n
X <- data.frame(
intercept = 1,
sin52 = sin(2 * pi * time / 52),
cos52 = cos(2 * pi * time / 52)
)
## Combine response and covariates
data_df <- data.frame(Y = y, X)
## Use the first 800 observations for model fitting
train_end <- 1000
data_train <- data_df[1:train_end, ]
## ---------------------------------------------------------------
## Fit a Negative Binomial Gaussian Copula model
## ---------------------------------------------------------------
## We use:
## - Negative Binomial marginal (log link)
## - ARMA(1,1) latent correlation structure
## - GHK likelihood approximation
##
## The model is:
## E[Y_t] = exp(β0 + β1 sin + β2 cos)
##
## Covariates enter only through the marginal mean.
fit <- gctsc(
formula = Y ~ sin52 + cos52,
data = data_train,
marginal = negbin.marg(link = "log"),
cormat = arma.cormat(p = 1, q = 1),
method = "CE", ### method can be changed to TMET or GHK
options = gctsc.opts(seed = 1) # fixed seed for reproducibility
)
summary(fit)
#>
#> --- Summary of Gaussian Copula Time Series Model ---
#> Call:
#> gctsc(formula = Y ~ sin52 + cos52, data = data_train, marginal = negbin.marg(link = "log"), cormat = arma.cormat(p = 1, q = 1), method = "CE", options = gctsc.opts(seed = 1))
#>
#> Marginal Model Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> mu_(Intercept) 1.57385 0.07968 19.753 < 2e-16 ***
#> mu_sin52 -0.31857 0.02911 -10.945 < 2e-16 ***
#> mu_cos52 -0.30412 0.02860 -10.634 < 2e-16 ***
#> dispersion 0.13623 0.03234 4.212 2.53e-05 ***
#>
#> Copula (Dependence) Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> ar1 0.988033 0.006538 151.13 <2e-16 ***
#> ma1 -0.914750 0.020137 -45.42 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Model Fit Statistics:
#> Log-likelihood: -2318.2763
#> AIC: 4648.5526
#> BIC: 4677.9992
## ---------------------------------------------------------------
## Residual diagnostics
## ---------------------------------------------------------------
plot(fit)
## ---------------------------------------------------------------
## One-step-ahead prediction
## ---------------------------------------------------------------
## Predict Y_{801} using fitted model
new_obs_index <- train_end + 1
pred <- predict(
fit,
X_test = data_df[new_obs_index, ],
y_obs = data_df[new_obs_index, "Y"]
)
pred
#> $mean
#> [1] 4.022336
#>
#> $median
#> [1] 4
#>
#> $mode
#> [1] 3
#>
#> $variance
#> [1] 4.855465
#>
#> $p_y
#> [1] 2.133817e-02 8.592509e-02 1.558846e-01 1.893890e-01 1.785033e-01
#> [6] 1.407713e-01 9.716988e-02 6.045921e-02 3.460629e-02 1.849376e-02
#> [11] 9.330130e-03 4.481838e-03 2.063762e-03 9.159080e-04 3.935089e-04
#> [16] 1.642699e-04 6.683347e-05 2.656987e-05 1.034443e-05 3.951611e-06
#> [21] 1.483580e-06 5.482073e-07 1.996306e-07 7.172078e-08 2.544668e-08
#> [26] 8.924245e-09 3.009186e-09 3.009186e-09 3.009186e-09 3.009186e-09
#>
#> $lower
#> [1] 1
#>
#> $upper
#> [1] 9
#>
#> $CRPS
#> [1] 1.066654
#>
#> $LOGS
#> [1] 1.858639Further examples (Negative Binomial, Binomial,Beta-Binomial, ZIP,
ZIB, ZIBB, including cases with covariates) and another real data
analysis are available in the inst/examples/ directory of
the package source.