library(tip)
#> Registered S3 method overwritten by 'GGally':
#> method from
#> +.gg ggplot2
# A function to generate random matrices from a matrix normal distribution
random_mat_normal < function(mu, num_rows, num_cols){
LaplacesDemon::rmatrixnorm(M = matrix(mu,
nrow = num_rows,
ncol = num_cols),
U = diag(num_rows),
V = diag(num_cols))
}
# Generate 3 clusters of matrices
p < 5
m < 3
c1 < lapply(1:10, function(x) random_mat_normal(mu = 0, num_rows = m, num_cols = p))
c2 < lapply(1:10, function(x) random_mat_normal(mu = 5, num_rows = m, num_cols = p))
c3 < lapply(1:10, function(x) random_mat_normal(mu = 5, num_rows = m, num_cols = p))
# Put all the data into a list
data_list < c(c1,c2,c3)
# Create a vector of true labels. True labels are only necessary
# for constructing network graphs that incorporate the true labels;
# this is often useful for research.
true_labels < c(rep("Cluster 1", length(c1)),
rep("Cluster 2", length(c2)),
rep("Cluster 3", length(c3)))
distance_matrix < matrix(NA,
nrow = length(true_labels),
ncol = length(true_labels))
# Distance matrix
for(i in 1:length(true_labels)){
for(j in i:length(true_labels)){
distance_matrix[i,j] < SMFilter::FDist2(mX = data_list[[i]],
mY = data_list[[j]])
distance_matrix[j,i] < distance_matrix[i,j]
}
}
# Compute the temperature parameter estiamte
temperature < 1/median(distance_matrix[upper.tri(distance_matrix)])
# For each subject, compute the point estimate for the number of similar
# subjects using univariate multiple change point detection (i.e.)
init_num_neighbors = get_cpt_neighbors(.distance_matrix = distance_matrix)
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
# Set the number of burnin iterations in the Gibbs samlper
# RECOMMENDATION: burn >= 1000
burn < 10
# Set the number of sampling iterations in the Gibbs sampler
# RECOMMENDATION: samples >= 1000
samples < 10
# Set the subject names
names_subjects < paste(1:dim(distance_matrix)[1])
# Run TIP clustering using only the prior
# > That is, the likelihood function is constant
tip1 < tip(.data = data_list,
.burn = burn,
.samples = samples,
.similarity_matrix = exp(1.0*temperature*distance_matrix),
.init_num_neighbors = init_num_neighbors,
.likelihood_model = "CONSTANT",
.subject_names = names_subjects,
.num_cores = 1)
#> Bayesian Clustering: Table Invitation Prior Gibbs Sampler
#> burnin: 10
#> samples: 10
#> Likelihood Model: CONSTANT
#>

  0%

====  6%

========  11%

============  17%

================  22%

===================  28%

=======================  33%

===========================  39%

===============================  44%

===================================  50%

=======================================  56%

===========================================  61%

===============================================  67%

===================================================  72%

======================================================  78%

==========================================================  83%

==============================================================  89%

==================================================================  94%

====================================================================== 100%
# Associate class labels and colors for the plot
class_palette_colors < c("Cluster 1" = "blue",
"Cluster 2" = 'green',
"Cluster 3" = "red")
# Associate class labels and shapes for the plot
class_palette_shapes < c("Cluster 1" = 19,
"Cluster 2" = 18,
"Cluster 3" = 17)
# Visualize the posterior similarity matrix by constructing a graph plot of
# the onecluster graph. The true labels are used here (below they are not).
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = NA,
.subject_class_names = true_labels,
.class_colors = class_palette_colors,
.class_shapes = class_palette_shapes,
.node_size = 2,
.add_node_labels = FALSE)
#> Warning: Duplicated override.aes is ignored.