## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
opts_chunk$set(fig.align = "center", 
               out.width = "90%",
               fig.width = 6, 
               fig.height = 5.5,
               dev.args = list(pointsize=10),
               par = TRUE, # needed for setting hook 
               collapse = TRUE, # collapse input & ouput code in chunks
               cache = FALSE,
               warning = FALSE, message = FALSE)
knit_hooks$set(par = function(before, options, envir)
  { if(before && options$fig.show != "none") 
       par(mar=c(4.1,4.1,1.1,1.1), mgp=c(3,1,0), tcl=-0.5)
})

## -----------------------------------------------------------------------------
library(mclust)
library(clustvarsel)

## -----------------------------------------------------------------------------
n <- 200      # sample size
pro <- 0.5    # mixing proportion
mu1 <- c(0,0) # mean vector for the first cluster
mu2 <- c(3,3) # mean vector for the second cluster
sigma1 <- matrix(c(1,0.5,0.5,1),2,2)       # covar matrix for the first cluster
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2) # covar matrix for the second cluster
X <- matrix(0, n, 5, dimnames = list(NULL, paste0("X", 1:5)))
set.seed(1234) # for replication
u <- runif(n)
Class <- ifelse(u < pro, 1, 2)
X[u < pro, 1:2]  <- MASS::mvrnorm(sum(u < pro), mu = mu1, Sigma = sigma1)
X[u >= pro, 1:2] <- MASS::mvrnorm(sum(u >= pro), mu = mu2, Sigma = sigma2)
X[, 3] <- X[, 1] + rnorm(n)
X[, 4] <- rnorm(n, mean = 1.5, sd = 2)
X[, 5] <- rnorm(n, mean = 2, sd = 1)
clPairs(X, Class)

## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X)
# ## iter 1
# ## + adding step
# ##   Var  BICdiff Step Decision
# ## 1  X2 11.17931  Add Accepted
# ## iter 2
# ## + adding step
# ##   Var BICdiff Step Decision
# ## 2  X1 85.1953  Add Accepted
# ## iter 3
# ## + adding step
# ## - removing step
# ##   Var   BICdiff   Step Decision
# ## 3  X3 -14.91130    Add Rejected
# ## 4  X1  85.19104 Remove Rejected
# ## final iter
# ## * fitting model on selected subset
# 
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (forward/backward) greedy search
# ## ------------------------------------------------------
# ##
# ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
# ##                 X2          Add  -822.6398     E 2  11.17931 Accepted
# ##                 X1          Add -1482.8408   VEV 2  85.19530 Accepted
# ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
# ##                 X1       Remove  -822.6355     E 2  85.19104 Rejected
# ##
# ## Selected subset: X2, X1
# 
# summary(out$model)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust VEV (ellipsoidal, equal shape) model with 2 components:
# ##
# ##  log.likelihood   n df       BIC       ICL
# ##       -714.9288 200 10 -1482.841 -1493.898
# ##
# ## Clustering table:
# ##   1   2
# ##  99 101

## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X, direction = "backward", fit = FALSE)
# ## iter 1
# ## - removing step
# ##   Var   BICdiff   Step Decision
# ## 1  X3 -38.09195 Remove Accepted
# ## iter 2
# ## - removing step
# ##   Var   BICdiff   Step Decision
# ## 2  X4 -23.50212 Remove Accepted
# ## iter 3
# ## - removing step
# ## + adding step
# ##   Var   BICdiff   Step Decision
# ## 3  X5 -16.25831 Remove Accepted
# ## 4  X3 -14.91130    Add Rejected
# ## iter 4
# ## - removing step
# ## + adding step
# ##   Var   BICdiff   Step Decision
# ## 5  X1  95.55735 Remove Rejected
# ## 6  X3 -14.91130    Add Rejected
# 
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (backward/forward) greedy search
# ## ------------------------------------------------------
# ##
# ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
# ##                 X3       Remove -2925.4851   EVE 2 -38.09195 Accepted
# ##                 X4       Remove -2067.0668   EVE 2 -23.50212 Accepted
# ##                 X5       Remove -1482.8408   VEV 2 -16.25831 Accepted
# ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
# ##                 X1       Remove  -833.0019     V 2  95.55735 Rejected
# ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
# ##
# ## Selected subset: X1, X2

## ----eval=FALSE---------------------------------------------------------------
# out <- clustvarsel(X, search = "headlong")
# ## iter 1
# ## + adding step
# ##   Var  BICdiff Step Decision
# ## 1  X2 11.17931  Add Accepted
# ## iter 2
# ## + adding step
# ##   Var BICdiff Step Decision
# ## 2  X1 85.1953  Add Accepted
# ## iter 3
# ## + adding step
# ## - removing step
# ##   Var  BICdiff   Step Decision
# ## 3  X3 -14.9113    Add Rejected
# ## 4  X1  85.1953 Remove Rejected
# ## final iter
# ## * fitting model on selected subset
# 
# out
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Headlong (forward/backward) search
# ## ------------------------------------------------------
# ##
# ##  Variable proposed Type of step   BICclust Model G   BICdiff Decision
# ##                 X2          Add  -822.6398     E 2  11.17931 Accepted
# ##                 X1          Add -1482.8408   VEV 2  85.19530 Accepted
# ##                 X3          Add -2047.7064   EEV 2 -14.91130 Rejected
# ##                 X1       Remove  -822.6398     E 2  85.19530 Rejected
# ##
# ## Selected subset: X2, X1

## -----------------------------------------------------------------------------
n <- 200
p <- 10
mu <- rep(0,p)
sigma1 <- matrix(c(1,0.5,0.5,1),2,2)
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2)
sigma <- Matrix::bdiag(sigma1, sigma2, diag(6))
set.seed(12345)
X <- MASS::mvrnorm(n, mu, sigma)
colnames(X) <- paste0("X", 1:p)
clPairs(X)

## ----eval=FALSE---------------------------------------------------------------
# mod <- Mclust(X)
# summary(mod$BIC)
# ## Best BIC values:
# ##              EII,2       VII,2       EII,3
# ## BIC      -5899.073 -5901.88582 -5922.53452
# ## BIC diff     0.000    -2.81261   -23.46131
# summary(mod)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust EII (spherical, equal volume) model with 2 components:
# ##
# ##  log.likelihood   n df       BIC       ICL
# ##       -2891.255 200 22 -5899.073 -5953.953
# ##
# ## Clustering table:
# ##   1   2
# ##  90 110

## ----eval=FALSE---------------------------------------------------------------
# (out1 <- clustvarsel(X, verbose = FALSE))
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (forward/backward) greedy search
# ## ------------------------------------------------------
# ##
# ##  Variable proposed Type of step   BICclust Model G    BICdiff Decision
# ##                 X3          Add  -666.9232     E 2 -4.8472740 Accepted
# ##                 X7          Add -1242.8998   EII 2  1.5678544 Accepted
# ##                X10          Add -1814.8848   EII 2  1.5172709 Accepted
# ##                X10       Remove -1242.8998   EII 2  1.5172709 Rejected
# ##                 X6          Add -2390.3224   EII 2  0.6539224 Accepted
# ##                 X6       Remove -1814.8848   EII 2  0.6539224 Rejected
# ##                 X2          Add -2942.2744   EII 2  0.1214452 Accepted
# ##                 X2       Remove -2390.3224   EII 2  0.1214452 Rejected
# ##                 X8          Add -3495.9758   EII 2  0.1104619 Accepted
# ##                 X8       Remove -2942.2744   EII 2  0.1104619 Rejected
# ##                 X9          Add -4081.9212   EII 2 -2.0296117 Rejected
# ##                 X8       Remove -2942.2744   EII 2  0.1104619 Rejected
# ##
# ## Selected subset: X3, X7, X10, X6, X2, X8
# 
# summary(out1$model)
# # or
# # mod1 <- Mclust(X[,out1$subset])
# # summary(mod1)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust XII (spherical multivariate normal) model with 1 component:
# ##
# ##  log.likelihood   n df       BIC       ICL
# ##       -1727.188 200  7 -3491.465 -3491.465
# ##
# ## Clustering table:
# ##   1
# ## 200

## ----eval=FALSE---------------------------------------------------------------
# (out2 <- clustvarsel(X, direction = "backward", verbose = FALSE))
# ## ------------------------------------------------------
# ## Variable selection for Gaussian model-based clustering
# ## Stepwise (backward/forward) greedy search
# ## ------------------------------------------------------
# ##
# ##  Variable proposed Type of step  BICclust Model G     BICdiff Decision
# ##                 X2       Remove -5346.204   EII 2 -48.0289667 Accepted
# ##                 X4       Remove -4696.525   EII 2  -7.1909995 Accepted
# ##                 X3       Remove -4032.114   EII 2  -2.3352781 Accepted
# ##                 X3          Add -4696.525   EII 2  -2.3352781 Rejected
# ##                 X7       Remove -3453.786   EII 2  -1.1967844 Accepted
# ##                 X7          Add -4032.114   EII 2  -1.1967844 Rejected
# ##                 X8       Remove -2899.545   EII 2  -0.4288443 Accepted
# ##                 X8          Add -3453.786   EII 2  -0.4288443 Rejected
# ##                 X5       Remove -2308.785   EII 2   1.0712786 Rejected
# ##                 X8          Add -3453.786   EII 2  -0.4288443 Rejected
# ##
# ## Selected subset: X1, X5, X6, X9, X10
# 
# summary(out2$model)
# ## ----------------------------------------------------
# ## Gaussian finite mixture model fitted by EM algorithm
# ## ----------------------------------------------------
# ##
# ## Mclust XII (spherical multivariate normal) model with 1 component:
# ##
# ##  log.likelihood   n df       BIC       ICL
# ##       -1424.072 200  6 -2879.934 -2879.934
# ##
# ## Clustering table:
# ##   1
# ## 200

## -----------------------------------------------------------------------------
sessionInfo()