Examples of Nonstandard Copulas – “Wild Animals”

Marius Hofert and Martin Mächler

2023-12-07

1 Swiss Alps copulas of Hofert, Vrins (2013)

This example implements the Swiss Alps copulas of Hofert, Vrins (2013, “Sibuya copulas”).

Lambda and its inverse

Lambda <- function(t) pmax(log(t), 0)
LambdaInv <- function(t) (t != 0)*exp(t) ## <=>  ifelse(t == 0, 0, exp(t))

M and its inverse (for \(M_i, i=1,2\)):

M1 <- function(t) 0.01*t
M1Inv <- function(t) t/0.01

M2 <- function(t) {
    f <- function(t.) {
        ct <- ceiling(t.)
        if(ct %% 2) t. - (ct-1)/2 else ct/2
    }
    unlist(lapply(t, f))
}
M2Inv <- function(t) {
    f <- function(t.) {
        if(t. == 0) return(0)
        t. + ceiling(t) - 1
    }
    unlist(lapply(t, f))
}

S and its inverse (for \(S_i, i=1,2\))

S1 <- function(t, H) exp(-(M1(t) + Lambda(t)*(1-exp(-H))))
S1Inv <- function(u, H, upper=1e6) unlist(lapply(u, function(u.)
    uniroot(function(x) S1(x,H=H)-u., interval=c(0, upper))$root))
S2 <- function(t, H) exp(-(M2(t) + Lambda(t)*(1-exp(-H))))
S2Inv <- function(u, H, upper=1e6) unlist(lapply(u, function(u.)
    uniroot(function(x) S2(x,H=H)-u., interval=c(0, upper))$root))

Wrappers for \(p_1\) and \(p_2\) and their inverses:

p and its inverse (for \(p_i(t_k-)\) and \(p_i(t_k), i = 1,2\)):

p1 <- function(t, k, H) exp(-M1(t)-H*k)
p1Inv <- function(u, k, H, upper=1e6) unlist(lapply(u, function(u.)
    uniroot(function(x) p1(x,k=k,H=H)-u., interval=c(0, upper))$root))
p2 <- function(t, k, H) exp(-M2(t)-H*k)
p2Inv <- function(u, k, H, upper=1e6) unlist(lapply(u, function(u.)
    uniroot(function(x) p2(x,k=k,H=H)-u., interval=c(0, upper))$root))

and the wrappers, which work with p1(), p2(), p1Inv(), p2Inv() as arguments:

p <- function(t, k, H, I, p1, p2){
    if((lI <- length(I)) == 0){
        stop("error in p")
    }else if(lI==1){
        if(I==1) p1(t, k=k, H=H) else p2(t, k=k, H=H)
    }else{ # lI == 2
        c(p1(t, k=k, H=H), p2(t, k=k, H=H))
    }
}
pInv <- function(u, k, H, I, p1Inv, p2Inv){
    if((lI <- length(I)) == 0){
        stop("error in pInv")
    }else if(lI==1){
        if(I==1) p1Inv(u, k=k, H=H) else p2Inv(u, k=k, H=H)
    }else{ # lI == 2
        c(p1Inv(u[1], k=k, H=H), p2Inv(u[2], k=k, H=H))
    }
}

Define the copula \(C\)

C <- function(u, H, Lambda, S1Inv, S2Inv) {
    if(all(u == 0)) 0 else
    u[1]*u[2] * exp(expm1(-H)^2 * Lambda(min(S1Inv(u[1],H), S2Inv(u[2],H))))
}

Compute the singular component (given \(u_1=\) u1, find \(u_2=\) u2 on the singular component) \(u_2 = S2(S1^{-1}(u_1))\):

s.comp <- function(u1, H, S1Inv, S2){
    if(u1 == 0) 0 else S2 (S1Inv(u1,H), H)
}

Generate one bivariate random vector from C:

rC1 <- function(H, LambdaInv, S1, S2, p1, p2, p1Inv, p2Inv) {
    d <- 2 # dim = 2
    ## (1)
    U <- runif(d) # for determining the default times of all components
    ## (2) -- t_{h,0} := initial value for the occurrence of the homogeneous
    ##                   Poisson process with unit intensity
    t_h <- 0
    t <- 0 # t_0; initial value for the occurrence of the jump process
    k <- 1 # indices for the sets I
    I_ <- list(1:d) # I_k; indices for which tau has to be determined
    tau <- rep(Inf,d) # in the beginning, set all default times to Inf (= "no default")
    ## (3)
    repeat{
        ## (4)
        ## k-th occurrence of a homogeneous Poisson process with unit intensity:
        t_h[k] <- rexp(1) + if(k == 1) 0 else t_h[k-1]
        ## k-th occ. of non-homogeneous Poisson proc. with integrated rate function Lambda:
        t[k] <- LambdaInv(t_h[k])
        ## (5)
        pvec <- p(t[k], k=k, H=H, I=c(1,2), p1=p1, p2=p2) # c(p_1(t_k), p_2(t_k))
        I. <- (1:d)[U >= pvec] # determine all i in I
        I <- intersect(I., I_[[k]]) # determine I
        ## (6)--(10)
        if(length(I) > 0){
            default.at.t <- U[I] <= p(t[k], k=k-1, H=H, I=I, p1=p1, p2=p2)
            tau[I[default.at.t]] <- t[k]
            Ic <- I[!default.at.t] # I complement
            if(length(Ic) > 0) tau[Ic] <- pInv(U[Ic], k=k-1, H=H, I=Ic,
                                               p1Inv=p1Inv, p2Inv=p2Inv)
        }
        ## (11) --  define I_{k+1} := I_k \ I
        I_[[k+1]] <- setdiff(I_[[k]], I)
        ## (12)
        if(length(I_[[k+1]]) == 0) break else k <- k+1
    }
    ## (14)
    c(S1(tau[1], H=H), S2(tau[2], H=H))
}

Draw n vectors of random variates from \(C\)

rC <- function(n, H, LambdaInv, S1, S2, p1, p2, p1Inv, p2Inv){
    mat <- t(sapply(rep(H, n), rC1, LambdaInv=LambdaInv, S1=S1, S2=S2,
                    p1=p1, p2=p2, p1Inv=p1Inv, p2Inv=p2Inv))
    row.names(mat) <- NULL
    mat
}

## Generate copula data
n <- 2e4  # <<< use for niceness
n <- 4000 # (rather use to decrease *.html and final package size)
H <- 10
set.seed(271)
U <- rC(n, H=H, LambdaInv=LambdaInv, S1=S1, S2=S2, p1=p1, p2=p2, p1Inv=p1Inv, p2Inv=p2Inv)

## Check margins of U
par(pty="s")
hist(U[,1], probability=TRUE, main="Histogram of the first component",
     xlab=expression(italic(U[1])))

hist(U[,2], probability=TRUE, main="Histogram of the second component",
     xlab=expression(italic(U[2])))

## Plot U (copula sample)
plot(U, pch=".", xlab=expression(italic(U[1])%~%~"U[0,1]"),
               , ylab=expression(italic(U[2])%~%~"U[0,1]"))

Wireframe plot to incorporate singular component :

require(lattice)
wf.plot <- function(grid, val.grid, s.comp, val.s.comp, Lambda, S1Inv, S2Inv){
    wireframe(val.grid ~ grid[,1]*grid[,2], xlim=c(0,1), ylim=c(0,1), zlim=c(0,1),
              aspect=1, scales = list(arrows=FALSE, col=1), # remove arrows
              par.settings= list(axis.line = list(col="transparent"), # remove global box
                                 clip = list(panel="off")),
              pts = cbind(s.comp, val.s.comp), # <- add singular component
              panel.3d.wireframe = function(x, y, z, xlim, ylim, zlim, xlim.scaled,
                                            ylim.scaled, zlim.scaled, pts, ...) {
                  panel.3dwire(x=x, y=y, z=z, xlim=xlim, ylim=ylim, zlim=zlim,
                               xlim.scaled=xlim.scaled, ylim.scaled=ylim.scaled,
                               zlim.scaled=zlim.scaled, ...)
                  xx <- xlim.scaled[1]+diff(xlim.scaled)*(pts[,1]-xlim[1])/diff(xlim)
                  yy <- ylim.scaled[1]+diff(ylim.scaled)*(pts[,2]-ylim[1])/diff(ylim)
                  zz <- zlim.scaled[1]+diff(zlim.scaled)*(pts[,3]-zlim[1])/diff(zlim)
                  panel.3dscatter(x=xx, y=yy, z=zz, xlim=xlim, ylim=ylim, zlim=zlim,
                                  xlim.scaled=xlim.scaled, ylim.scaled=ylim.scaled,
                                  zlim.scaled=zlim.scaled, type="l", col=1, ...)
              },
              xlab = expression(italic(u[1])),
              ylab = expression(italic(u[2])),
              zlab = list(expression(italic(C(u[1],u[2]))), rot=90))
}

## Copula plot with singular component
u <- seq(0, 1, length.out=20) # grid points per dimension
grid <- expand.grid(u1=u, u2=u) # grid
val.grid <- apply(grid, 1, C, H=H, Lambda=Lambda, S1Inv=S1Inv, S2Inv=S2Inv) # copula values on grid
s.comp <- cbind(u, sapply(u, s.comp, H=H, S1Inv=S1Inv, S2=S2)) # pairs (u1, u2) on singular component
val.s.comp <- apply(s.comp, 1, C, H=H, Lambda=Lambda, S1Inv=S1Inv, S2Inv=S2Inv) # corresponding z-values
wf.plot(grid=grid, val.grid=val.grid, s.comp=s.comp, val.s.comp=val.s.comp,
        Lambda=Lambda, S1Inv=S1Inv, S2Inv=S2Inv)

2 An example from Wolfgang Trutschnig and Manuela Schreyer

For more details, see Trutschnig, Fernandez Sanchez (2014) “Copulas with continuous, strictly increasing singular conditional distribution functions”

Roughly, one defines an Iterated Function System whose attractor is the word “Copula” and starts the chaos game.

Define the Iterated Function System

IFS <- local({ ## Using `local`, so `n` is part of IFS
    n <- 23
    list(function(x) c(3*x[1]/n,      x[2]/4),
         function(x) c(-(x[2]-1)/n,   x[1]/2+1/4),
         function(x) c(3*x[1]/n,      x[2]/4+3/4),
         function(x) c((3*x[1]+4)/n,  x[2]/4),
         function(x) c(-(x[2]-5)/n,   x[1]/2+1/4),
         function(x) c((3*x[1]+4)/n,  x[2]/4+3/4),
         function(x) c(-(x[2]-7)/n,   x[1]/2+1/4),
         function(x) c(-x[2]/n+9/n,   3*x[1]/4),
         function(x) c((3*x[1]+8)/n,  x[2]/4+3/4),
         function(x) c(x[1]/n+10/n,   x[2]/8+1/2+1/8),
         function(x) c(2*x[1]/n+9/n,  x[2]/4+1/4+1/8),
         function(x) c(-x[2]/n+13/n,  (3*x[1]+1)/4),
         function(x) c((3*x[1]+12)/n, x[2]/4),
         function(x) c((3*x[1]+12)/n, x[2]/4),
         function(x) c(-x[2]/n+15/n,  (3*x[1]+1)/4),
         function(x) c((3*x[1]+16)/n, x[2]/4),
         function(x) c(-(x[2]-21)/n,  3*x[1]/4),
         function(x) c((3*x[1]+20)/n, x[2]/4+3/4),
         function(x) c((x[1]+21)/n,   x[2]/4+1/4+1/8),
         function(x) c(-(x[2]-23)/n,  3*x[1]/4))
})

Run chaos game B times

B <- 20 # replications
n.steps <- 20000 # number of steps
AA <- vector("list", length=B)
set.seed(271)
for(i in 1:B) {
    ind <- sample(length(IFS), size=n.steps, replace=TRUE) # (randomly) 'bootstrap' functions
    res <- matrix(0, nrow=n.steps+1, ncol=2) # result matrix (for each i)
    pt <- c(0, 0) # initial point
    for(r in seq_len(n.steps)) {
        res[r+1,] <- IFS[[ind[r]]](pt) # evaluate randomly chosen functions at pt
        pt <- res[r+1,] # redefine point
    }
    AA[[i]] <- res # keep this matrix
}

A <- do.call(rbind, AA) # rbind (n.steps+1, 2)-matrices
n <- nrow(A)
stopifnot(ncol(A) == 2, n == B*(n.steps+1)) # sanity check

\(X :=\) Rotate \(A\) by \(-45^{o} = -\pi/4\) :

phi <- -pi/4
X <- cbind(cos(phi)*A[,1] - sin(phi)*A[,2]/3,
           sin(phi)*A[,1] + cos(phi)*A[,2]/3)
stopifnot(identical(dim(X), dim(A)))

Now transform the margings by their marginal ECDF’s so we get uniform margins. Note that, it is equivalent but faster to use rank(*, ties.method="max"):

U <- apply(X, 2, function(x) ecdf(x)(x))
## Prove equivalence:
stopifnot(all.equal(U,
                    apply(X, 2, rank, ties.method="max") / n,
                    tolerance = 1e-14))

Now, visually check the margins of U; they are perfectly uniform:

par(pty="s")
sfsmisc::mult.fig(mfcol = c(1,2), main = "Margins are uniform")
hist(U[,1], probability=TRUE, main="Histogram of U[,1]", xlab=quote(italic(U[1])))
hist(U[,2], probability=TRUE, main="Histogram of U[,2]", xlab=quote(italic(U[2])))

whereas U, the copula sample, indeed is peculiar and contains the word “COPULA” many times if you look closely (well, the “L” is defect …):

par(pty="s")
plot(U, pch=".", xlab = quote(italic(U[1]) %~% ~ "U[0,1]"),
     asp = 1,    ylab = quote(italic(U[2]) %~% ~ "U[0,1]"))

3 Sierpinski tetrahedron

This is an implementation of Example 2.3 in https://arxiv.org/pdf/0906.4853.pdf

library(abind) # for merging arrays via abind()
library(lattice) # for cloud()
library(sfsmisc) # for polyn.eval()
## 
## Attache Paket: 'sfsmisc'
## Die folgenden Objekte sind maskiert durch '.GlobalEnv':
## 
##     Sys.memGB, relErr, relErrV

Implement the random number generator:

##' @title Generate samples from the Sierpinski tetrahedron
##' @param n sample size
##' @param N digits in the base-2 expansion
##' @return (n, 3)-matrix
##' @author Marius Hofert
rSierpinskyTetrahedron <- function(n, N)
{
    stopifnot(n >= 1, N >= 1)
    ## Build coefficients in the base-2 expansion
    U12coeff <- array(sample(0:1, size = 2*n*N, replace = TRUE),
                      dim = c(2, n, N), dimnames = list(U12 = c("U1", "U2"),
                                                        sample = 1:n,
                                                        base2digit = 1:N)) # (2, n, N)-array
    U3coeff <- apply(U12coeff, 2:3, function(x) sum(x) %% 2) # (n, N)-matrix
    Ucoeff <- abind(U12coeff, U3 = U3coeff, along = 1)
    ## Convert to U's
    t(apply(Ucoeff, 1:2, function(x)
        polyn.eval(coef = rev(x), x = 2))/2^N) # see sfsmisc::bi2int
}

Draw vectors of random numbers following a “Sierpinski tetrahedron copula”:

set.seed(271)
U <- rSierpinskyTetrahedron(1e4, N = 6)

Use a scatterplot matrix to check all bivariate margins:

pairs(U, gap = 0, cex = 0.25, col = "black",
      labels = as.expression( sapply(1:3, function(j) bquote(U[.(j)])) ))

All pairs “look” independent but, of course, they aren’t:

cloud(U[,3] ~ U[,1] * U[,2], cex = 0.25, col = "black", zoom = 1,
      scales = list(arrows = FALSE, col = "black"), # ticks instead of arrows
      par.settings = list(axis.line = list(col = "transparent"), # to remove box
                          clip = list(panel = "off"),
                          standard.theme(color = FALSE)),
      xlab = expression(U[1]), ylab = expression(U[2]), zlab = expression(U[3]))

Session information

## R version 4.3.2 Patched (2023-12-01 r85659)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora Linux 38 (Thirty Eight)
## 
## Matrix products: default
## BLAS:   /u/maechler/R/D/r-patched/F38-64-inst/lib/libRblas.so 
## LAPACK: /usr/lib64/liblapack.so.3.11.0
## 
## attached base packages:
##  [1] splines   parallel  grid      stats4    tools     stats     graphics 
##  [8] grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] sfsmisc_1.1-16    abind_1.4-5       randtoolbox_2.0.4 rngWELL_0.10-9   
##  [5] qrng_0.0-9        gridExtra_2.3     VGAM_1.1-9        rugarch_1.5-1    
##  [9] gsl_2.1-8         mev_1.16          lattice_0.22-5    bbmle_1.0.25     
## [13] copula_1.1-3     
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.4                pspline_1.0-19             
##  [3] xfun_0.41                   bslib_0.6.1                
##  [5] partitions_1.10-7           ks_1.14.1                  
##  [7] Runuran_0.38                mathjaxr_1.6-0             
##  [9] numDeriv_2016.8-1.1         Rdpack_2.6                 
## [11] highr_0.10                  xts_0.13.1                 
## [13] Matrix_1.6-4                KernSmooth_2.23-22         
## [15] DistributionUtils_0.6-1     alabama_2023.1.0           
## [17] lifecycle_1.0.4             truncnorm_1.0-9            
## [19] compiler_4.3.2              spd_2.0-1                  
## [21] htmltools_0.5.7             SkewHyperbolic_0.4-2       
## [23] sass_0.4.8                  Rsolnp_1.16                
## [25] yaml_2.3.7                  gmp_0.7-3                  
## [27] pracma_2.4.4                nloptr_2.0.3               
## [29] jquerylib_0.1.4             GeneralizedHyperbolic_0.8-6
## [31] MASS_7.3-60                 cachem_1.0.8               
## [33] mclust_6.0.1                bdsmatrix_1.3-6            
## [35] digest_0.6.33               mvtnorm_1.2-4              
## [37] pcaPP_2.0-3                 fastmap_1.1.1              
## [39] cli_3.6.1                   rmarkdown_2.25             
## [41] zoo_1.8-12                  evaluate_0.23              
## [43] knitr_1.45                  rbibutils_2.2.16           
## [45] ADGofTest_0.3               stabledist_0.7-1           
## [47] nleqslv_3.3.5               rlang_1.1.2                
## [49] Rcpp_1.0.11                 glue_1.6.2                 
## [51] polynom_1.4-1               jsonlite_1.8.7             
## [53] R6_2.5.1