## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE----------------------------------------------------------
#  library(MASS)
#  n <- 100 # Sample size
#  B <- 30 # Number of iterations in the simulated annealing algorithm.
#  p <- 50 # Number of columns of Y.

## ----eval=FALSE----------------------------------------------------------
#  S <- matrix(0.2, p, p)
#  S[1:(p/2),(p/2+1):p] <- 0
#  S[(p/2+1):p,1:(p/2)] <- 0
#  S <- S-diag(diag(S)) + diag(p)
#  mu <- rep(0, p)
#  
#  W0 <- matrix(1,p,p)
#  W0[1:(p/2),1:(p/2)] <- 0
#  W0[(p/2+1):p,(p/2+1):p] <- 0
#  Denum <- sum(W0)
#  
#  Y <- mvrnorm(n, mu, S)

## ----eval=FALSE----------------------------------------------------------
#  Res <- ncut(Y,
#              K     = 2,
#              B     = 30,
#              N     = 1000,
#              dist  = 'correlation',
#              scale = TRUE,
#              q     = 0.2,
#              sigma = 0.1)
#  
#  Cx  <- Res[[2]]
#  f11 <- matrix(Cx[ ,1], p, 1)
#  f12 <- matrix(Cx[ ,2], p, 1)
#  
#  errorL <- sum((f11%*%t(f11))*W0)/Denum + sum((f12%*%t(f12))*W0)/Denum
#  # This is the true error of the clustering solution.
#  errorL

## ----eval=FALSE----------------------------------------------------------
#  n  <- 200 # Sample size
#  B  <- 5000 # Number of iterations in the simulated annealing algorithm.
#  L  <- 10000 # Temperature coefficient.
#  p  <- 200 # Number of columns of Y.
#  q  <- p # Number of columns of X.
#  h1 <- 0.05 # Lower bound for the B coefficiens in Y = X*B+e.
#  h2 <- 0.15 # Upper bound for the B coefficients in the model Y = X*B+e.

## ----eval=FALSE----------------------------------------------------------
#   S <- matrix(0.2,q,q)
#   S[1:(q/2),(q/2+1):q] <- 0
#   S[(q/2+1):q,1:(q/2)] <- 0
#   S <- S - diag(diag(S)) + diag(q)
#  
#   mu <- rep(0,q)
#  
#   W0 <- matrix(1,p,p)
#   W0[1:(p/2),1:(p/2)] <- 0
#   W0[(p/2+1):p,(p/2+1):p] <- 0
#  
#   B <- matrix(0,q,p)
#   for (i in 1:(p/2)){
#      B[1:(q/2),i] <- runif(q/2, h1, h2)
#      in1          <- sample.int(q/2, 6)
#      B[-in1,i]    <- 0#This makes B sparse.
#   }
#  
#   for (i in (p/2+1):p){
#      B[(q/2+1):q,i] <- runif(q/2, h1, h2)
#      in2            <- sample(seq(q/2+1,q), 6)
#      B[-in2,i]      <- 0 # This makes B sparse.
#   }
#  
#   X <- mvrnorm(n, mu, S)
#   Z <- X%*%B
#   Y <- Z + matrix(rnorm(n*p,0,2), n, p)

## ----eval=FALSE----------------------------------------------------------
#  # ANCut method
#  Res <- anut(Y, X, B, L, K=2, alpha = 0, ncv = 5)
#  Cx  <- Res[[2]]
#  f11 <- matrix(Cx[ ,1], p, 1)
#  f12 <- matrix(Cx[ ,2], p, 1)
#  
#  errorL <- sum((f11%*%t(f11))*W0)/Denum+sum((f12%*%t(f12))*W0)/p^2
#  # This is the true error of the clustering solution.
#  errorL

## ----eval=FALSE----------------------------------------------------------
#  # Below is a plot of the simulated annealing path
#  plot(Res[[1]], type='l')
#  # Clusters found by ANCut
#  image.plot(Cx)

## ----eval=FALSE----------------------------------------------------------
#  n   <- 50
#  p   <- 50
#  h   <- 0.5
#  rho <- 0.5
#  
#  Sigma <- matrix(rho,p,p)
#  Sigma[1:(p/5),1:(p/5)] <- 2*rho
#  Sigma[(p/5+1):(3*p/5),(p/5+1):(3*p/5)] <- 2*rho
#  Sigma[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- 2*rho
#  Sigma <- Sigma - diag(diag(Sigma))
#  Sigma <- Sigma + diag(p)

## ----eval=FALSE----------------------------------------------------------
#  W0 <- matrix(1,p,p)
#  W0[1:(p/5),1:(p/5)] <- 0
#  W0[(p/5+1):(3*p/5),(p/5+1):(3*p/5)] <- 0
#  W0[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- 0
#  W0[(4*p/5+1):p,(4*p/5+1):p] <- 0
#  W0 <- cbind(W0,W0,W0)
#  W0 <- rbind(W0,W0,W0)

## ----eval=FALSE----------------------------------------------------------
#  Y <- matrix(0, n, p)
#  Z <- matrix(0, n, p)
#  
#  X  <- mvrnorm(n, rep(0, p), Sigma)
#  B1 <- matrix(0, p, p)
#  B2 <- matrix(0, p, p)
#  
#  B1[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.2)
#  B1[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, h/2, h)*rbinom((2*p/5)^2, 1, 0.2)
#  B1[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.2)
#  
#  B2[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.2)
#  B2[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, h/2, h)*rbinom((2*p/5)^2, 1, 0.2)
#  B2[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.2)
#  
#  Y <- X%*%B1 + matrix(rnorm(n*p, 0, 0.5), n, p)
#  Z <- Y%*%B2 + matrix(rnorm(n*p, 0, 0.5), n, p)

## ----eval=FALSE----------------------------------------------------------
#  clust <- muncut(Z,
#                  Y,
#                  X,
#                  K        = 4,
#                  B        = 15000,
#                  L        = 500,
#                  sampling = 'size',
#                  alpha    = 0.5,
#                  ncv      = 3,
#                  nlambdas = 20,
#                  sigma    = 10,
#                  scale    = TRUE,
#                  model    = FALSE,
#                  gamma    = 0.1)
#  
#  A <- clust[[2]][ ,1]%*%t(clust[[2]][ ,1]) + clust[[2]][ ,2]%*%t(clust[[2]][ ,2]) +
#       clust[[2]][ ,3]%*%t(clust[[2]][ ,3]) + clust[[2]][ ,4]%*%t(clust[[2]][ ,4])
#  
#  errorK <- sum(A*W0)/(3*p)^2
#  errorK

## ----eval=FALSE----------------------------------------------------------
#  n <- 100 # Sample size
#  p <- 100 # Number of columns of Y.
#  K <- 3

## ----eval=FALSE----------------------------------------------------------
#  C0            <- matrix(0,p,K)
#  C0[1:25,1]    <- matrix(1,25,1)
#  C0[26:75,1:3] <- matrix(1/3,50,3)
#  C0[76:100,3]  <- matrix(1,25,1)
#  
#  A0 <- C0[ ,1]%*%t(C0[ ,1]) + C0[ ,2]%*%t(C0[ ,2]) +
#        C0[ ,3]%*%t(C0[ ,3])
#  A0 <- A0 - diag(diag(A0)) + diag(p)
#  
#  Z1 <- rnorm(n,0,2)
#  Z2 <- rnorm(n,0,2)
#  Z3 <- rnorm(n,0,2)
#  
#  Y <- matrix(0,n,p)
#  Y[ ,1:25]   <-  matrix(rnorm(n*25, 0, 2), n, 25) + matrix(Z1, n, 25, byrow=FALSE)
#  Y[ ,26:75]  <-  matrix(rnorm(n*50, 0, 2), n, 50) + matrix(Z1, n, 50, byrow=FALSE) +
#                  matrix(Z2, n, 50, byrow=FALSE) + matrix(Z3, n, 50, byrow=FALSE)
#  Y[ ,76:100] <-  matrix(rnorm(n*25, 0, 2), n, 25) + matrix(Z3, n, 25, byrow=FALSE)

## ----eval=FALSE----------------------------------------------------------
#  trial <- pwncut(Y,
#                  K       = 3,
#                  B       = 10000,
#                  L       = 1000,
#                  lambda  = 1.5,
#                  start   = 'default',
#                  scale   = TRUE,
#                  nstarts = 1,
#                  epsilon = 0,
#                  dist    = 'correlation',
#                  sigma   = 10)
#  
#  A1 <- trial[[2]][ ,1]%*%t(trial[[2]][ ,1]) +
#        trial[[2]][ ,2]%*%t(trial[[2]][ ,2]) +
#        trial[[2]][ ,3]%*%t(trial[[2]][ ,3])
#  
#  A1 <- A1 - diag(diag(A1)) + diag(p)
#  
#  plot(trial[[1]], type='l')
#  errorL <- sum(abs(A0-A1))/p^2
#  errorL

## ----eval=FALSE----------------------------------------------------------
#  n   <- 50
#  p   <- 50
#  h   <- 0.15
#  rho <- 0.15
#  mu  <- 1

## ----eval=FALSE----------------------------------------------------------
#  W0 <- matrix(1,p,p)
#  W0[1:(p/5),1:(p/5)] <- 0
#  W0[(p/5+1):(3*p/5),(p/5+1):(3*p/5)] <- 0
#  W0[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- 0
#  W0[(4*p/5+1):p,(4*p/5+1):p]=0
#  W0=cbind(W0,W0,W0)
#  W0=rbind(W0,W0,W0)
#  
#  W1 <- matrix(1,n,n)
#  W1[1:(n/2),1:(n/2)] <- 0
#  W1[(n/2+1):n,(n/2+1):n] <- 0

## ----eval=FALSE----------------------------------------------------------
#  X <- matrix(0,n,p)
#  Y <- matrix(0,n,p)
#  Z <- matrix(0,n,p)
#  
#  Sigma=matrix(0,p,p)
#  Sigma[1:(p/5),1:(p/5)] <- rho
#  Sigma[(p/5+1):(3*p/5),(p/5+1):(3*p/5)] <- rho
#  Sigma[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- rho
#  Sigma[(4*p/5+1):p,(4*p/5+1):p] <- rho
#  Sigma <- Sigma - diag(diag(Sigma))
#  Sigma <- Sigma + diag(p)
#  
#  X[1:(n/2),]   <- mvrnorm(n/2,rep(mu,p),Sigma)
#  X[(n/2+1):n,] <- mvrnorm(n/2,rep(-mu,p),Sigma)
#  
#  B11 <- matrix(0,p,p)
#  B12 <- matrix(0,p,p)
#  B21 <- matrix(0,p,p)
#  B22 <- matrix(0,p,p)
#  
#  B11[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.5)
#  B11[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, h/2, h)*rbinom((2*p/5)^2, 1, 0.5)
#  B11[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2, 1, 0.5)
#  B11[(4*p/5+1):p,(4*p/5+1):p]             <- runif((1*p/5)^2, h/2, h)*rbinom((1*p/5)^2, 1, 0.5)
#  
#  B12[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, -h, -h/2)*rbinom((p/5)^2, 1, 0.5)
#  B12[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, -h, -h/2)*rbinom((2*p/5)^2, 1, 0.5)
#  B12[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, -h, -h/2)*rbinom((p/5)^2, 1, 0.5)
#  B12[(4*p/5+1):p,(4*p/5+1):p]             <- runif((1*p/5)^2, -h, -h/2)*rbinom((1*p/5)^2, 1, 0.5)
#  
#  B21[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2,1,0.5)
#  B21[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, h/2, h)*rbinom((2*p/5)^2,1,0.5)
#  B21[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, h/2, h)*rbinom((p/5)^2,1,0.5)
#  B21[(4*p/5+1):p,(4*p/5+1):p]             <- runif((1*p/5)^2, h/2, h)*rbinom((1*p/5)^2,1,0.5)
#  
#  B22[1:(p/5),1:(p/5)]                     <- runif((p/5)^2, -h, -h/2)*rbinom((p/5)^2, 1, 0.5)
#  B22[(p/5+1):(3*p/5),(p/5+1):(3*p/5)]     <- runif((2*p/5)^2, -h, -h/2)*rbinom((2*p/5)^2, 1, 0.5)
#  B22[(3*p/5+1):(4*p/5),(3*p/5+1):(4*p/5)] <- runif((p/5)^2, -h, -h/2)*rbinom((p/5)^2, 1, 0.5)
#  B22[(4*p/5+1):p,(4*p/5+1):p]             <- runif((1*p/5)^2, -h, -h/2)*rbinom((1*p/5)^2, 1, 0.5)
#  
#  Y[1:(n/2), ]   <- X[1:(n/2),]%*%B11+matrix(rnorm((n/2)*p, 0, 0.25), n/2, p)
#  Y[(n/2+1):n, ] <- X[(n/2+1):n,]%*%B12+matrix(rnorm((n/2)*p, 0, 0.25), n/2, p)
#  
#  Z[1:(n/2), ]   <- Y[1:(n/2),]%*%B21+matrix(rnorm((n/2)*p, 0, 0.25),n/2,p)
#  Z[(n/2+1):n, ] <- Y[(n/2+1):n,]%*%B22+matrix(rnorm((n/2)*p, 0, 0.25),n/2,p)

## ----eval=FALSE----------------------------------------------------------
#  trial <- mlbncut(Z,
#                   Y,
#                   X,
#                   K=4,
#                   R=2,
#                   B=10,
#                   N=100,
#                   dist='correlation',
#                   q0=0.15,
#                   scale=TRUE,
#                   sigmas=0.05,
#                   sigmac=1)
#  
#  plot(trial[[1]],type='l')
#  image.plot(trial[[2]])
#  image.plot(trial[[3]])
#  
#  errorK <- sum((trial[[3]][ ,1]%*%t(trial[[3]][ ,1]) +
#                 trial[[3]][ ,2]%*%t(trial[[3]][ ,2]) +
#                 trial[[3]][ ,3]%*%t(trial[[3]][ ,3]) +
#                 trial[[3]][ ,4]%*%t(trial[[3]][ ,4]))*W0)/(3*p)^2 +
#            sum((trial[[2]][ ,1]%*%t(trial[[2]][ ,1]) +
#                 trial[[2]][ ,2]%*%t(trial[[2]][ ,2]))*W1)/(n)^2
#  errorK

## ----eval=FALSE----------------------------------------------------------
#  set.seed(123456)
#  #This sets up the initial parameters for the simulation.
#  lambda <- seq(2,6,1) #Tuning parameter lambda
#  Tau    <- seq(0.2,0.8,0.2) #Tuning parameter tau
#  
#  n=30; n1=10; n2=10; n3=n-n1-n2 #Sample size
#  p1=30; p2=30; r1=28; r2=28; #Number of variables and noises in each dataset
#  
#  K=3; #Number of clusters
#  
#  mu=1; #Mean of the marginal distribution
#  u1=0.5; #Range of enties in the coefficient matrix
#  
#  epsilon <- matrix(rnorm(n*(p1-r1),0,1), n, (p1-r1)) # Generation of random error
#  
#  Sigma1 <- matrix(rep(0.8,(p1-r1)^2),(p1-r1),(p1-r1)) # Generation of the covariance matrix
#  diag(Sigma1) <- 1

## ----eval=FALSE----------------------------------------------------------
#  # Generation of the original distribution of the three clusters
#  T1 <- matrix(rmvnorm(n1,mean=rep(-mu,(p1-r1)),sigma=Sigma1),n1,(p1-r1))
#  T2 <- matrix(rmvnorm(n2,mean=rep(0,(p1-r1)),sigma=Sigma1),n2,(p1-r1))
#  T3 <- matrix(rmvnorm(n3,mean=rep(mu,(p1-r1)),sigma=Sigma1),n3,(p1-r1))
#  
#  X1 <- sign(T1)*(exp(abs(T1))) #Generation of signals in X
#  X2 <- sign(T2)*(exp(abs(T2)))
#  X3 <- sign(T3)*(exp(abs(T3)))
#  ep1 <- (matrix(rnorm(n*r1,0,1),n,r1)) #Generation of noises in X
#  X <- rbind(X1,X2,X3)
#  
#  beta1 <- matrix(runif((p1-r1)*(p2-r2),-u1,u1),(p1-r1),(p2-r2)) #Generation of the coefficient matrix
#  Z     <- X%*%beta1+epsilon #Generation of signals in Z
#  ep2   <- (matrix(rnorm(n*r2,0.5,1),n,r2)) #Generation of noises in Z
#  
#  X <- cbind(X,ep1)
#  Z <- cbind(Z,ep2)

## ----eval=FALSE----------------------------------------------------------
#  # AWNCut method
#  Tune1         <- awncut.selection(X, Z, K, lambda, Tau, B = 30, L = 1000)
#  awncut.result <- awncut(X, Z, 3, Tune1$lam, Tune1$tau, B = 30, L = 1000)
#  ErrorRate(awncut.resu