This document serves as an overview for measuring the performance of
RcppAlgos
against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: How to generate
permutations or combinations of object in R? has some benchmarks.
You will note that the examples in that post are relatively small. The
benchmarks below will focus on larger examples where performance really
matters and for this reason we only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a
2022 Macbook Air Apple M2 24 GB
machine.
library(RcppAlgos)
library(partitions)
library(arrangements)
#>
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#>
#> compositions
library(microbenchmark)
options(digits = 4)
options(width = 90)
<- capture.output(sessionInfo())
pertinent_output cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.2.1 (2022-06-23)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Monterey 12.6
<- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
pkgs sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.7.1'
#>
#> $arrangements
#> [1] '1.1.9'
#>
#> $partitions
#> [1] '1.10.7'
#>
#> $microbenchmark
#> [1] '1.4.7'
<- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
numThreads#> [1] 4
set.seed(13)
<- sort(sample(100, 30))
v1 <- 21
m <- comboGeneral(v1, m, Parallel = T)
t1 <- combinations(v1, m)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150 21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v1, m),
cbArrangements = combinations(v1, m),
times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 3.438 2.954 2.893 2.866 2.826 2.605 15
#> cbArrangements 3.471 3.003 2.934 2.912 2.856 2.643 15
<- v1[1:10]
v2 <- 20
m <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t1 <- combinations(v2, m, replace = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
cbArrangements = combinations(v2, m, replace = TRUE),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 2.919 2.837 2.732 2.713 2.647 2.494 15
#> cbArrangements 2.837 2.836 2.717 2.717 2.634 2.453 15
<- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
myFreqs <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
v3 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t1 <- combinations(freq = myFreqs, k = 20, x = v3)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.048 3.014 2.936 2.935 2.906 2.727 10
#> cbArrangements 5.676 5.722 5.567 5.571 5.520 5.203 10
<- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
v4 <- permuteGeneral(v4, 6, nThreads = numThreads)
t1 <- permutations(v4, 6)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720 6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v4, 6),
cbArrangements = permutations(v4, 6),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 1.462 1.437 1.296 1.418 1.345 1.167 15
#> cbArrangements 2.508 2.495 2.279 2.441 2.537 1.777 15
## Indexing permutation example with the partitions package
<- permuteGeneral(11, nThreads = 4)
t1 <- permutations(11)
t2 <- perms(11)
t3
dim(t1)
#> [1] 39916800 11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
cbRcppAlgosSer = permuteGeneral(11),
cbArrangements = permutations(11),
cbPartitions = perms(11),
times = 5, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5
#> cbRcppAlgosSer 2.545 2.843 2.787 2.809 2.799 2.918 5
#> cbArrangements 4.326 4.229 4.388 4.552 4.490 4.340 5
#> cbPartitions 7.967 8.048 8.428 8.788 8.733 8.563 5
<- v3[1:5]
v5 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t1 <- permutations(v5, 10, replace = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.0000 10
#> cbRcppAlgosSer 2.689 2.599 2.055 2.477 2.363 0.8071 10
#> cbArrangements 3.255 3.156 2.644 2.993 2.821 1.6139 10
<- sort(runif(12))
v6 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t1 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760 7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.00 1.000 10
#> cbRcppAlgosSer 3.549 3.555 3.156 3.520 2.41 2.622 10
#> cbArrangements 3.916 3.925 3.559 3.912 2.97 2.860 10
<- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 140)
<- partitions(140, distinct = TRUE)
t2 <- diffparts(140)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 140), all(rowSums(t2) == 140),
all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150 16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
cbArrangements = partitions(140, distinct = TRUE),
cbPartitions = diffparts(140),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.247 3.225 2.711 2.621 2.564 2.138 10
#> cbArrangements 2.578 2.562 2.146 2.016 2.127 1.680 10
#> cbPartitions 18.349 18.331 14.919 14.522 13.754 11.296 10
<- comboGeneral(160, 10,
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 160)
<- partitions(160, 10, distinct = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(160, 10),
cbArrangements = partitions(160, 10, distinct = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.360 3.343 3.317 3.118 3.519 3.441 10
#> cbArrangements 4.405 4.344 4.223 4.060 4.029 4.284 10
<- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 65)
<- partitions(65)
t2 <- parts(65)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 65), all(rowSums(t2) == 65),
all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558 65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
cbArrangements = partitions(65),
cbPartitions = parts(65),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 2.888 2.742 2.210 2.371 2.090 1.636 20
#> cbArrangements 2.157 2.049 1.653 1.814 1.638 1.046 20
#> cbPartitions 9.243 8.946 6.833 7.935 6.458 4.037 20
<- comboGeneral(100, 15, TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 100)
<- partitions(100, 15)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212 15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
<- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
t3 stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
cbArrangements = partitions(100, 15),
cbPartitions = restrictedparts(100, 15,
include.zero = FALSE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.420 3.341 2.948 3.131 2.682 2.479 10
#> cbArrangements 4.296 4.227 3.772 4.200 3.352 3.087 10
#> cbPartitions 15.608 15.547 13.366 14.594 11.587 10.851 10
Currenlty, RcppAlgos
is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos
and use this as a reference for future
improvements.
<- comboGeneral(120, 10, freqs=rep(1:8, 15),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 120)
dim(t1)
#> [1] 7340225 10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
times = 10)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 281.6 283.3 289.5 285.3 288.5 318.8 10
<- compositionsGeneral(0:15, repetition = TRUE)
t1 <- arrangements::compositions(15)
t2 <- partitions::compositions(15)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 15), all(rowSums(t2) == 15),
all(colSums(t3) == 15))
dim(t1)
#> [1] 16384 15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
cbArrangements = arrangements::compositions(15),
cbPartitions = partitions::compositions(15),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosSer 1.000 1.000 1.000 1.00 1.000 1.000 20
#> cbArrangements 1.173 1.233 1.232 1.23 1.229 1.184 20
#> cbPartitions 131.727 138.033 187.548 184.94 222.031 232.619 20
For the next two examples, we will exclude the
partitions
package for efficiency reasons.
<- compositionsGeneral(0:23, repetition = TRUE)
t1 <- arrangements::compositions(23)
t2
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304 23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
cbArrangements = arrangements::compositions(23),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 3.431 3.381 3.365 3.377 3.348 3.314 20
#> cbArrangements 3.836 3.777 3.758 3.772 3.743 3.680 20
<- compositionsGeneral(30, 10, repetition = TRUE)
t1 <- arrangements::compositions(30, 10)
t2
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
cbArrangements = arrangements::compositions(30, 10),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 3.050 3.082 3.040 3.048 2.956 3.305 20
#> cbArrangements 3.149 3.117 3.039 3.074 2.971 2.803 20
We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos
. The results are
similar for the rest of the cases not shown.
<- function(n, total) {
pkg_arrangements <- icombinations(n, as.integer(n / 2))
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- comboIter(n, as.integer(n / 2))
a for (i in 1:total) a@nextIter()
}
<- comboCount(18, 9)
total
total#> [1] 48620
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total),
cbArrangements = pkg_arrangements(18, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 19.78 19.54 18.99 19.07 18.56 17.43 15
<- function(n, total) {
pkg_arrangements <- ipermutations(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- permuteIter(n)
a for (i in 1:total) a@nextIter()
}
<- permuteCount(8)
total
total#> [1] 40320
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total),
cbArrangements = pkg_arrangements(8, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.0 15
#> cbArrangements 19.64 19.27 18.91 18.79 18.43 18.8 15
<- function(n, total) {
pkg_partitions <- firstpart(n)
a for (i in 1:(total - 1)) a <- nextpart(a)
}
<- function(n, total) {
pkg_arrangements <- ipartitions(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- partitionsIter(0:n, repetition = TRUE)
a for (i in 1:total) a@nextIter()
}
<- partitionsCount(0:40, repetition = TRUE)
total
total#> [1] 37338
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total),
cbArrangements = pkg_arrangements(40, total),
cbPartitions = pkg_partitions(40, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 16.07 15.87 15.08 15.73 14.21 13.51 15
#> cbPartitions 25.90 25.58 24.56 25.33 22.91 22.94 15
<- function(n, total) {
pkg_partitions <- firstcomposition(n)
a for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
<- function(n, total) {
pkg_arrangements <- icompositions(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- compositionsIter(0:n, repetition = TRUE)
a for (i in 1:total) a@nextIter()
}
<- compositionsCount(0:15, repetition = TRUE)
total
total#> [1] 16384
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total),
cbArrangements = pkg_arrangements(15, total),
cbPartitions = pkg_partitions(15, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 14.26 14.06 13.52 13.97 13.63 11.71 15
#> cbPartitions 46.44 46.50 44.77 46.16 44.63 40.38 15