I have a list of lists of the following form
xx = list("a_1" = list("A", "C"),
"a_2" = list("B", "C"),
"a_3" = list("B", "B"),
"a_4" = list("C", "B"),
"a_5" = list("B", "A"),
"a_6" = list("B", "A"))
Note that this list contains sublists that are duplicates, like "a_5" and "a_6" in the above example. Using this list of lists, I want to update a matrix of the form
m = matrix(data = 0, nrow = 3, ncol = 3) # initialise matrix of zeros
rownames(m) = c("A", "B", "C") # name rows
colnames(m) = c("A", "B", "C") # name columns
such that we add one to the index of the matrix corresponding to a given pair. This is easy to do using a for loop
for (item in xx) {
# add one to the matrix index if item in xx
m[item[[1]], item[[2]]] = m[item[[1]], item[[2]]] + 1
}
which yields the expected output
A B C
A 0 0 1
B 2 1 1
C 0 1 0
though this can be slightly cumbersome for large lists. I was hoping to do this in a vectorised way using the apply()
methods but I wasn't able to get anything working using a nested combination of lapply()
to iterate through the list and sapply()
to update the values to the matrix.
So I was wondering how to do this using variant(s) of apply()
?
Fully vectorized:
x <- matrix(unlist(xx), length(xx), 2, 1)
m + tabulate(
match(x[,1], row.names(m)) + nrow(m)*(match(x[,2], colnames(m)) - 1L),
length(m)
)
#> A B C
#> A 0 0 1
#> B 2 1 1
#> C 0 1 0
Define the various approaches as functions.
floop <- function(m, xx) {
for (item in xx) m[item[[1]], item[[2]]] = m[item[[1]], item[[2]]] + 1L
m
}
fsapply <- function(mm, xx) {
sapply(xx, \(x) mm[x[[1]], x[[2]]] <<- mm[x[[1]], x[[2]]] + 1L)
mm
}
faggregate <- function(m, xx) {
sel <- data.frame(t(sapply(xx, unlist)))
sel <- aggregate(cbind(sel[0], value=1), sel, FUN=sum)
m[as.matrix(sel[1:2])] <- m[as.matrix(sel[1:2])] + sel$value
m
}
fvectorized <- function(m, xx) {
x <- matrix(unlist(xx), length(xx), 2, 1)
m + tabulate(
match(x[,1], row.names(m)) + nrow(m)*(match(x[,2], colnames(m)) - 1L),
length(m)
)
}
ftable <- function(m, xx) {
m + c(table(as.data.frame(matrix(unlist(xx), length(xx), 2, 1))))
}
Create a larger test example:
xx <- lapply(1:1e4, \(i) as.list(sample(LETTERS, 2, 1)))
m <- matrix(0L, 26, 26, 0, rep(list(LETTERS), 2))
Benchmark:
microbenchmark::microbenchmark(
floop = floop(m, xx),
fsapply = fsapply(m, xx),
faggregate = faggregate(m, xx),
fvectorized = fvectorized(m, xx),
ftable = ftable(m, xx),
check = "equal"
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> floop 8611.9 9051.65 11519.122 9877.15 12723.70 54497.1 100
#> fsapply 15160.5 15813.15 17868.492 17069.90 19383.50 28591.2 100
#> faggregate 14888.5 15540.90 16733.166 16100.20 17810.55 20828.0 100
#> fvectorized 910.6 1019.10 1201.803 1078.90 1226.75 5886.9 100
#> ftable 1297.0 1573.40 1765.658 1691.40 1823.95 4364.7 100