---
title: "High Performance Benchmarks"
author: "Joseph Wood"
date: "01/31/2025"
output:
  rmarkdown::html_vignette:
    toc: true
    number_sections: false
vignette: >
  %\VignetteIndexEntry{High Performance Benchmarks}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
---

This document serves as an overview for measuring the performance of `RcppAlgos` against other tools for generating combinations, permutations, and partitions. This stackoverflow post: [How to generate permutations or combinations of object in R?](<https://stackoverflow.com/a/47983855/4408538>) has some benchmarks. You will note that the examples in that post are relatively small. The benchmarks below will focus on larger examples where performance really matters and for this reason we only consider the packages [arrangements](<https://cran.r-project.org/package=arrangements>), [partitions](<https://cran.r-project.org/package=partitions>), and [RcppAlgos](<https://cran.r-project.org/package=RcppAlgos>).

## Setup Information

For the benchmarks below, we used a `2022 Macbook Air Apple M2 24 GB` machine.

``` r
library(RcppAlgos)
library(partitions)
library(arrangements)
#> 
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#> 
#>     compositions
library(microbenchmark)

options(digits = 4)
options(width = 90)

pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.4.2 (2024-10-31)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Sonoma 14.5

pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.9.2'
#> 
#> $arrangements
#> [1] '1.1.9'
#> 
#> $partitions
#> [1] '1.10.7'
#> 
#> $microbenchmark
#> [1] '1.4.10'

numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4
```

## Combinations

### Combinations - Distinct

``` r
set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150       21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v1, m),
               cbArrangements = combinations(v1, m),
               times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 2.666 2.427 2.432  2.423 2.402 2.401    15
#>  cbArrangements 2.517 2.290 2.292  2.282 2.263 2.252    15
```

### Combinations - Repetition

``` r
v2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
               cbArrangements = combinations(v2, m, replace = TRUE),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 3.014 2.937 2.910  2.921 2.850 2.844    15
#>  cbArrangements 2.771 2.782 2.766  2.762 2.689 2.944    15
```

### Combinations - Multisets

``` r
myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
               cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.105 3.069 3.058  3.040 2.996 3.170    10
#>  cbArrangements 5.664 5.726 5.662  5.672 5.598 5.604    10
```

## Permutations

### Permutations - Distinct

``` r
v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720       6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v4, 6),
               cbArrangements = permutations(v4, 6),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 1.167 1.174 1.090  1.160 1.132 1.056    15
#>  cbArrangements 2.042 2.052 1.953  2.025 2.167 1.656    15


## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)

dim(t1)
#> [1] 39916800       11

stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())

microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
               cbRcppAlgosSer = permuteGeneral(11),
               cbArrangements = permutations(11),
               cbPartitions   = perms(11),
               times = 5, unit = "relative")
#> Unit: relative
#>            expr    min     lq  mean median     uq   max neval
#>  cbRcppAlgosPar  1.000  1.000 1.000  1.000  1.000 1.000     5
#>  cbRcppAlgosSer  2.540  2.477 2.434  2.841  2.825 1.867     5
#>  cbArrangements  3.915  4.136 3.692  4.158  4.246 2.684     5
#>    cbPartitions 10.436 10.282 9.375 10.392 10.990 6.637     5
```

### Permutations - Repetition

``` r
v5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
               cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.002 2.803 2.334  2.778 2.780 0.937    10
#>  cbArrangements 3.289 3.055 2.739  3.077 3.077 1.668    10
```

### Permutations - Multisets

``` r
v6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760        7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
               cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.531 3.460 3.217  3.437 2.758 2.815    10
#>  cbArrangements 3.901 3.824 3.591  3.816 3.128 3.084    10
```

## Partitions

### Partitions - Distinct

#### All Distinct Partitions

``` r
t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
                    all(rowSums(t1) == 140), all(rowSums(t2) == 140),
                    all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150      16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
               cbArrangements = partitions(140, distinct = TRUE),
               cbPartitions   = diffparts(140),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median     uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.000  1.000    10
#>  cbRcppAlgosSer  3.172  3.178  2.887  3.098  2.696  2.539    10
#>  cbArrangements  2.510  2.528  2.316  2.488  2.205  2.173    10
#>    cbPartitions 16.918 17.160 15.042 17.001 13.310 12.298    10
```

#### Restricted Distinct Partitions

``` r
t1 <- comboGeneral(160, 10,
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(160, 10),
               cbArrangements = partitions(160, 10, distinct = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.428 3.401 3.013  3.220 3.200 2.316    10
#>  cbArrangements 4.657 4.587 3.951  4.307 4.274 2.936    10
```

### Partitions - Repetition

#### All Partitions

``` r
t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 65), all(rowSums(t2) == 65),
          all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558      65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
               cbArrangements = partitions(65),
               cbPartitions   = parts(65),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 2.849 2.767 2.311  2.461 2.313 1.751    20
#>  cbArrangements 2.122 2.047 1.712  1.834 1.794 1.130    20
#>    cbPartitions 8.855 8.639 6.886  7.736 6.792 4.217    20
```

#### Restricted Partitions

``` r
t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212      15
rm(t1, t2)

# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
               cbArrangements = partitions(100, 15),
               cbPartitions   = restrictedparts(100, 15,
                                                include.zero = FALSE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median    uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.00  1.000    10
#>  cbRcppAlgosSer  3.402  3.281  3.055  3.150  2.85  2.827    10
#>  cbArrangements  4.194  4.045  3.884  4.087  3.51  3.985    10
#>    cbPartitions 14.250 14.076 12.877 13.534 11.57 11.824    10
```

### Partitions - Multisets

Currenlty, `RcppAlgos` is the only package capable of efficiently generating partitions of multisets. Therefore, we will only time `RcppAlgos` and use this as a reference for future improvements.

``` r
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 120)
dim(t1)
#> [1] 7340225      10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
               times = 10)
#> Unit: milliseconds
#>         expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgos 246.8 250.6 255.3  252.6 256.4 269.3    10
```

## Compositions

### Compositions - Repetition

#### All Compositions (Small case)

``` r
t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 15), all(rowSums(t2) == 15),
          all(colSums(t3) == 15))
dim(t1)
#> [1] 16384    15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
               cbArrangements = arrangements::compositions(15),
               cbPartitions   = partitions::compositions(15),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr     min      lq   mean  median      uq     max neval
#>  cbRcppAlgosSer   1.000   1.000   1.00   1.000   1.000   1.000    20
#>  cbArrangements   1.182   1.205   1.18   1.194   1.179   1.054    20
#>    cbPartitions 129.267 145.992 186.50 192.884 219.196 230.413    20
```

For the next two examples, we will exclude the `partitions` package for efficiency reasons.

#### All Compositions (Larger case)

``` r
t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
          all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304      23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
               cbArrangements = arrangements::compositions(23),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 3.410 3.329 3.331  3.327 3.324 3.397    20
#>  cbArrangements 3.797 3.699 3.691  3.698 3.689 3.611    20
```

#### Restricted Compositions

``` r
t1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)

stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005       10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
               cbArrangements = arrangements::compositions(30, 10),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 2.988 3.077 2.952  3.020 3.082 1.924    20
#>  cbArrangements 3.199 3.170 3.036  3.113 3.074 2.213    20
```

## Iterators

We will show one example from each category to demonstrate the efficiency of the iterators in `RcppAlgos`. The results are similar for the rest of the cases not shown.

### Combinations

``` r
pkg_arrangements <- function(n, total) {
    a <- icombinations(n, as.integer(n / 2))
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- comboIter(n, as.integer(n / 2))
    for (i in 1:total) a@nextIter()
}

total <- comboCount(18, 9)
total
#> [1] 48620

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(18, total),
               cbArrangements = pkg_arrangements(18, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median   uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.0  1.00    15
#>  cbArrangements 19.31 19.12 18.91  18.81 18.6 18.81    15
```

### Permutations

``` r
pkg_arrangements <- function(n, total) {
    a <- ipermutations(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- permuteIter(n)
    for (i in 1:total) a@nextIter()
}

total <- permuteCount(8)
total
#> [1] 40320

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(8, total),
               cbArrangements = pkg_arrangements(8, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 19.61 19.41 18.93  19.14 18.66 17.41    15
```

### Partitions

``` r
pkg_partitions <- function(n, total) {
    a <- firstpart(n)
    for (i in 1:(total - 1)) a <- nextpart(a)
}

pkg_arrangements <- function(n, total) {
    a <- ipartitions(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- partitionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(40, total),
               cbArrangements = pkg_arrangements(40, total),
               cbPartitions   = pkg_partitions(40, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 15.40 15.11 14.18  14.50 13.21 13.74    15
#>    cbPartitions 24.79 24.45 23.08  23.48 21.81 21.54    15
```

### Compositions

``` r
pkg_partitions <- function(n, total) {
    a <- firstcomposition(n)
    for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}

pkg_arrangements <- function(n, total) {
    a <- icompositions(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- compositionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(15, total),
               cbArrangements = pkg_arrangements(15, total),
               cbPartitions   = pkg_partitions(15, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 14.23 14.15 13.45  14.01 12.98 12.10    15
#>    cbPartitions 43.88 43.91 41.42  43.40 40.17 34.25    15
```