vignettes/HighPerformanceBenchmarks.Rmd
HighPerformanceBenchmarks.Rmd
This document serves as an overview for measuring the performance of
RcppAlgos
against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: How to generate
permutations or combinations of object in R? has some benchmarks.
You will note that the examples in that post are relatively small. The
benchmarks below will focus on larger examples where performance really
matters and for this reason we only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a
2022 Macbook Air Apple M2 24 GB
machine.
library(RcppAlgos)
library(partitions)
library(arrangements)
#>
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#>
#> compositions
library(microbenchmark)
options(digits = 4)
options(width = 90)
pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.4.2 (2024-10-31)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Sonoma 14.5
pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.9.0'
#>
#> $arrangements
#> [1] '1.1.9'
#>
#> $partitions
#> [1] '1.10.7'
#>
#> $microbenchmark
#> [1] '1.4.10'
numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4
set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150 21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v1, m),
cbArrangements = combinations(v1, m),
times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 3.394 3.089 3.080 3.079 3.047 2.937 15
#> cbArrangements 3.230 2.931 2.921 2.922 2.885 2.793 15
v2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
cbArrangements = combinations(v2, m, replace = TRUE),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 3.020 2.955 2.944 2.960 2.960 2.781 15
#> cbArrangements 2.728 2.782 2.762 2.786 2.779 2.620 15
myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.151 3.167 3.115 3.160 3.075 2.906 10
#> cbArrangements 5.705 5.800 5.680 5.766 5.599 5.290 10
v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720 6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v4, 6),
cbArrangements = permutations(v4, 6),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 1.182 1.182 1.114 1.183 1.139 1.089 15
#> cbArrangements 2.065 2.055 1.987 2.065 2.132 1.738 15
## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)
dim(t1)
#> [1] 39916800 11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
cbRcppAlgosSer = permuteGeneral(11),
cbArrangements = permutations(11),
cbPartitions = perms(11),
times = 5, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5
#> cbRcppAlgosSer 2.489 2.494 2.423 2.798 2.778 1.885 5
#> cbArrangements 3.789 3.773 3.455 3.726 3.715 2.727 5
#> cbPartitions 10.104 10.188 9.216 10.147 10.583 6.650 5
v5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.0000 10
#> cbRcppAlgosSer 2.960 2.822 2.309 2.788 2.637 0.9449 10
#> cbArrangements 3.287 3.097 2.698 3.062 2.916 1.6584 10
v6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760 7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.457 3.503 3.197 3.510 2.614 2.720 10
#> cbArrangements 3.867 3.894 3.600 3.886 3.112 3.011 10
t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 140), all(rowSums(t2) == 140),
all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150 16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
cbArrangements = partitions(140, distinct = TRUE),
cbPartitions = diffparts(140),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.236 3.197 2.913 3.107 2.599 2.593 10
#> cbArrangements 2.565 2.540 2.321 2.469 2.153 2.203 10
#> cbPartitions 16.860 17.088 14.995 16.855 13.122 12.187 10
t1 <- comboGeneral(160, 10,
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(160, 10),
cbArrangements = partitions(160, 10, distinct = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.419 3.404 3.039 3.375 3.319 2.239 10
#> cbArrangements 4.648 4.641 3.963 4.594 4.434 2.519 10
t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 65), all(rowSums(t2) == 65),
all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558 65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
cbArrangements = partitions(65),
cbPartitions = parts(65),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 2.872 2.809 2.296 2.433 2.271 1.730 20
#> cbArrangements 2.124 2.111 1.712 1.830 1.768 1.116 20
#> cbPartitions 8.977 8.755 6.807 7.582 6.544 4.177 20
t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212 15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
cbArrangements = partitions(100, 15),
cbPartitions = restrictedparts(100, 15,
include.zero = FALSE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.344 3.331 3.032 3.142 2.783 2.758 10
#> cbArrangements 4.248 4.228 3.932 4.148 3.490 3.924 10
#> cbPartitions 14.467 14.480 12.950 13.647 11.458 11.611 10
Currenlty, RcppAlgos
is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos
and use this as a reference for future
improvements.
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 120)
dim(t1)
#> [1] 7340225 10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
times = 10)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 246.7 248.3 254.1 252.1 255.1 268.9 10
t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 15), all(rowSums(t2) == 15),
all(colSums(t3) == 15))
dim(t1)
#> [1] 16384 15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
cbArrangements = arrangements::compositions(15),
cbPartitions = partitions::compositions(15),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosSer 1.000 1.000 1.000 1.000 1.000 1.00 20
#> cbArrangements 1.193 1.213 1.197 1.199 1.197 1.15 20
#> cbPartitions 133.151 148.603 194.927 208.164 224.685 256.15 20
For the next two examples, we will exclude the
partitions
package for efficiency reasons.
t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304 23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
cbArrangements = arrangements::compositions(23),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 3.392 3.317 3.284 3.292 3.269 3.166 20
#> cbArrangements 3.792 3.701 3.659 3.666 3.639 3.528 20
t1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
cbArrangements = arrangements::compositions(30, 10),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 2.964 3.096 2.932 3.064 3.007 1.804 20
#> cbArrangements 3.185 3.178 3.025 3.158 3.102 1.862 20
We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos
. The results are
similar for the rest of the cases not shown.
pkg_arrangements <- function(n, total) {
a <- icombinations(n, as.integer(n / 2))
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- comboIter(n, as.integer(n / 2))
for (i in 1:total) a@nextIter()
}
total <- comboCount(18, 9)
total
#> [1] 48620
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total),
cbArrangements = pkg_arrangements(18, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1 1.00 1.00 15
#> cbArrangements 19.53 19.19 18.73 19 18.01 18.22 15
pkg_arrangements <- function(n, total) {
a <- ipermutations(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- permuteIter(n)
for (i in 1:total) a@nextIter()
}
total <- permuteCount(8)
total
#> [1] 40320
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total),
cbArrangements = pkg_arrangements(8, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.0 1.0 1.00 1.00 15
#> cbArrangements 19.77 19.53 19.2 19.4 18.74 18.59 15
pkg_partitions <- function(n, total) {
a <- firstpart(n)
for (i in 1:(total - 1)) a <- nextpart(a)
}
pkg_arrangements <- function(n, total) {
a <- ipartitions(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- partitionsIter(0:n, repetition = TRUE)
for (i in 1:total) a@nextIter()
}
total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total),
cbArrangements = pkg_arrangements(40, total),
cbPartitions = pkg_partitions(40, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 15.42 15.21 14.18 14.16 13.27 13.62 15
#> cbPartitions 25.12 24.90 23.20 23.18 21.70 21.44 15
pkg_partitions <- function(n, total) {
a <- firstcomposition(n)
for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
pkg_arrangements <- function(n, total) {
a <- icompositions(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- compositionsIter(0:n, repetition = TRUE)
for (i in 1:total) a@nextIter()
}
total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total),
cbArrangements = pkg_arrangements(15, total),
cbPartitions = pkg_partitions(15, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 14.46 14.20 13.34 13.99 12.91 10.24 15
#> cbPartitions 44.40 44.47 41.82 43.63 40.39 33.11 15