--- title: "High Performance Benchmarks" author: "Joseph Wood" date: "01/31/2025" output: rmarkdown::html_vignette: toc: true number_sections: false vignette: > %\VignetteIndexEntry{High Performance Benchmarks} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- This document serves as an overview for measuring the performance of `RcppAlgos` against other tools for generating combinations, permutations, and partitions. This stackoverflow post: [How to generate permutations or combinations of object in R?]() has some benchmarks. You will note that the examples in that post are relatively small. The benchmarks below will focus on larger examples where performance really matters and for this reason we only consider the packages [arrangements](), [partitions](), and [RcppAlgos](). ## Setup Information For the benchmarks below, we used a `2022 Macbook Air Apple M2 24 GB` machine. ``` r library(RcppAlgos) library(partitions) library(arrangements) #> #> Attaching package: 'arrangements' #> The following object is masked from 'package:partitions': #> #> compositions library(microbenchmark) options(digits = 4) options(width = 90) pertinent_output <- capture.output(sessionInfo()) cat(paste(pertinent_output[1:3], collapse = "\n")) #> R version 4.4.2 (2024-10-31) #> Platform: aarch64-apple-darwin20 #> Running under: macOS Sonoma 14.5 pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark") sapply(pkgs, packageVersion, simplify = FALSE) #> $RcppAlgos #> [1] '2.9.2' #> #> $arrangements #> [1] '1.1.9' #> #> $partitions #> [1] '1.10.7' #> #> $microbenchmark #> [1] '1.4.10' numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6) numThreads #> [1] 4 ``` ## Combinations ### Combinations - Distinct ``` r set.seed(13) v1 <- sort(sample(100, 30)) m <- 21 t1 <- comboGeneral(v1, m, Parallel = T) t2 <- combinations(v1, m) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14307150 21 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v1, m), cbArrangements = combinations(v1, m), times = 15, unit = "relative") #> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), : #> less accurate nanosecond times to avoid potential integer overflows #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 #> cbRcppAlgosSer 2.666 2.427 2.432 2.423 2.402 2.401 15 #> cbArrangements 2.517 2.290 2.292 2.282 2.263 2.252 15 ``` ### Combinations - Repetition ``` r v2 <- v1[1:10] m <- 20 t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads) t2 <- combinations(v2, m, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 10015005 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v2, m, TRUE), cbArrangements = combinations(v2, m, replace = TRUE), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 #> cbRcppAlgosSer 3.014 2.937 2.910 2.921 2.850 2.844 15 #> cbArrangements 2.771 2.782 2.766 2.762 2.689 2.944 15 ``` ### Combinations - Multisets ``` r myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5) v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610)) t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads) t2 <- combinations(freq = myFreqs, k = 20, x = v3) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14594082 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs), cbArrangements = combinations(freq = myFreqs, k = 20, x = v3), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 #> cbRcppAlgosSer 3.105 3.069 3.058 3.040 2.996 3.170 10 #> cbArrangements 5.664 5.726 5.662 5.672 5.598 5.604 10 ``` ## Permutations ### Permutations - Distinct ``` r v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59)) t1 <- permuteGeneral(v4, 6, nThreads = numThreads) t2 <- permutations(v4, 6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8910720 6 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v4, 6), cbArrangements = permutations(v4, 6), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 #> cbRcppAlgosSer 1.167 1.174 1.090 1.160 1.132 1.056 15 #> cbArrangements 2.042 2.052 1.953 2.025 2.167 1.656 15 ## Indexing permutation example with the partitions package t1 <- permuteGeneral(11, nThreads = 4) t2 <- permutations(11) t3 <- perms(11) dim(t1) #> [1] 39916800 11 stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3)))) rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4), cbRcppAlgosSer = permuteGeneral(11), cbArrangements = permutations(11), cbPartitions = perms(11), times = 5, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5 #> cbRcppAlgosSer 2.540 2.477 2.434 2.841 2.825 1.867 5 #> cbArrangements 3.915 4.136 3.692 4.158 4.246 2.684 5 #> cbPartitions 10.436 10.282 9.375 10.392 10.990 6.637 5 ``` ### Permutations - Repetition ``` r v5 <- v3[1:5] t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads) t2 <- permutations(v5, 10, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9765625 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE), cbArrangements = permutations(x = v5, k = 10, replace = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 #> cbRcppAlgosSer 3.002 2.803 2.334 2.778 2.780 0.937 10 #> cbArrangements 3.289 3.055 2.739 3.077 3.077 1.668 10 ``` ### Permutations - Multisets ``` r v6 <- sort(runif(12)) t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads) t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 19520760 7 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)), cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 #> cbRcppAlgosSer 3.531 3.460 3.217 3.437 2.758 2.815 10 #> cbArrangements 3.901 3.824 3.591 3.816 3.128 3.084 10 ``` ## Partitions ### Partitions - Distinct #### All Distinct Partitions ``` r t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)), constraintFun = "sum", comparisonFun = "==", limitConstraints = 140) t2 <- partitions(140, distinct = TRUE) t3 <- diffparts(140) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 140 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 140), all(rowSums(t2) == 140), all(colSums(t3) == 140)) dim(t1) #> [1] 9617150 16 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))), cbArrangements = partitions(140, distinct = TRUE), cbPartitions = diffparts(140), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 #> cbRcppAlgosSer 3.172 3.178 2.887 3.098 2.696 2.539 10 #> cbArrangements 2.510 2.528 2.316 2.488 2.205 2.173 10 #> cbPartitions 16.918 17.160 15.042 17.001 13.310 12.298 10 ``` #### Restricted Distinct Partitions ``` r t1 <- comboGeneral(160, 10, constraintFun = "sum", comparisonFun = "==", limitConstraints = 160) t2 <- partitions(160, 10, distinct = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8942920 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(160, 10), cbArrangements = partitions(160, 10, distinct = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 #> cbRcppAlgosSer 3.428 3.401 3.013 3.220 3.200 2.316 10 #> cbArrangements 4.657 4.587 3.951 4.307 4.274 2.936 10 ``` ### Partitions - Repetition #### All Partitions ``` r t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 65) t2 <- partitions(65) t3 <- parts(65) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 65 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 65), all(rowSums(t2) == 65), all(colSums(t3) == 65)) dim(t1) #> [1] 2012558 65 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE), cbArrangements = partitions(65), cbPartitions = parts(65), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 #> cbRcppAlgosSer 2.849 2.767 2.311 2.461 2.313 1.751 20 #> cbArrangements 2.122 2.047 1.712 1.834 1.794 1.130 20 #> cbPartitions 8.855 8.639 6.886 7.736 6.792 4.217 20 ``` #### Restricted Partitions ``` r t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 100) t2 <- partitions(100, 15) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9921212 15 rm(t1, t2) # This takes a really long time... not because of restrictedparts, # but because apply is not that fast. This transformation is # needed for proper comparisons. As a result, we will compare # a smaller example # t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort)) t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort)) stopifnot(identical(partitions(50, 15), t3)) rm(t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE), cbArrangements = partitions(100, 15), cbPartitions = restrictedparts(100, 15, include.zero = FALSE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.00 1.000 10 #> cbRcppAlgosSer 3.402 3.281 3.055 3.150 2.85 2.827 10 #> cbArrangements 4.194 4.045 3.884 4.087 3.51 3.985 10 #> cbPartitions 14.250 14.076 12.877 13.534 11.57 11.824 10 ``` ### Partitions - Multisets Currenlty, `RcppAlgos` is the only package capable of efficiently generating partitions of multisets. Therefore, we will only time `RcppAlgos` and use this as a reference for future improvements. ``` r t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15), constraintFun = "sum", comparisonFun = "==", limitConstraints = 120) dim(t1) #> [1] 7340225 10 stopifnot(all(rowSums(t1) == 120)) microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)), times = 10) #> Unit: milliseconds #> expr min lq mean median uq max neval #> cbRcppAlgos 246.8 250.6 255.3 252.6 256.4 269.3 10 ``` ## Compositions ### Compositions - Repetition #### All Compositions (Small case) ``` r t1 <- compositionsGeneral(0:15, repetition = TRUE) t2 <- arrangements::compositions(15) t3 <- partitions::compositions(15) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 15 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 15), all(rowSums(t2) == 15), all(colSums(t3) == 15)) dim(t1) #> [1] 16384 15 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE), cbArrangements = arrangements::compositions(15), cbPartitions = partitions::compositions(15), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosSer 1.000 1.000 1.00 1.000 1.000 1.000 20 #> cbArrangements 1.182 1.205 1.18 1.194 1.179 1.054 20 #> cbPartitions 129.267 145.992 186.50 192.884 219.196 230.413 20 ``` For the next two examples, we will exclude the `partitions` package for efficiency reasons. #### All Compositions (Larger case) ``` r t1 <- compositionsGeneral(0:23, repetition = TRUE) t2 <- arrangements::compositions(23) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 23 stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23), all(rowSums(t2) == 23)) dim(t1) #> [1] 4194304 23 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE), cbArrangements = arrangements::compositions(23), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 #> cbRcppAlgosSer 3.410 3.329 3.331 3.327 3.324 3.397 20 #> cbArrangements 3.797 3.699 3.691 3.698 3.689 3.611 20 ``` #### Restricted Compositions ``` r t1 <- compositionsGeneral(30, 10, repetition = TRUE) t2 <- arrangements::compositions(30, 10) stopifnot(identical(t1, t2), all(rowSums(t1) == 30)) dim(t1) #> [1] 10015005 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE), cbArrangements = arrangements::compositions(30, 10), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 #> cbRcppAlgosSer 2.988 3.077 2.952 3.020 3.082 1.924 20 #> cbArrangements 3.199 3.170 3.036 3.113 3.074 2.213 20 ``` ## Iterators We will show one example from each category to demonstrate the efficiency of the iterators in `RcppAlgos`. The results are similar for the rest of the cases not shown. ### Combinations ``` r pkg_arrangements <- function(n, total) { a <- icombinations(n, as.integer(n / 2)) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- comboIter(n, as.integer(n / 2)) for (i in 1:total) a@nextIter() } total <- comboCount(18, 9) total #> [1] 48620 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total), cbArrangements = pkg_arrangements(18, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.0 1.00 15 #> cbArrangements 19.31 19.12 18.91 18.81 18.6 18.81 15 ``` ### Permutations ``` r pkg_arrangements <- function(n, total) { a <- ipermutations(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- permuteIter(n) for (i in 1:total) a@nextIter() } total <- permuteCount(8) total #> [1] 40320 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total), cbArrangements = pkg_arrangements(8, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 #> cbArrangements 19.61 19.41 18.93 19.14 18.66 17.41 15 ``` ### Partitions ``` r pkg_partitions <- function(n, total) { a <- firstpart(n) for (i in 1:(total - 1)) a <- nextpart(a) } pkg_arrangements <- function(n, total) { a <- ipartitions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- partitionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- partitionsCount(0:40, repetition = TRUE) total #> [1] 37338 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total), cbArrangements = pkg_arrangements(40, total), cbPartitions = pkg_partitions(40, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 #> cbArrangements 15.40 15.11 14.18 14.50 13.21 13.74 15 #> cbPartitions 24.79 24.45 23.08 23.48 21.81 21.54 15 ``` ### Compositions ``` r pkg_partitions <- function(n, total) { a <- firstcomposition(n) for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE) } pkg_arrangements <- function(n, total) { a <- icompositions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- compositionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- compositionsCount(0:15, repetition = TRUE) total #> [1] 16384 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total), cbArrangements = pkg_arrangements(15, total), cbPartitions = pkg_partitions(15, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 #> cbArrangements 14.23 14.15 13.45 14.01 12.98 12.10 15 #> cbPartitions 43.88 43.91 41.42 43.40 40.17 34.25 15 ```