Let's say I have n dictionaries of different sizes, dictA
, ..., dictN
(n may be as great as 18). They all have strings as their keys and positive numbers as their values. What I need is a dictionary dictComb
that has as its keys all combinations of keys from dictA
to dictN
(one key from each dictionary) with the value of each combination equal to the sum of the constituent keys' values.
Example (n=2):
dictA = {"Beefsteak": 20, "Campari": 100}
dictB = {"McIntosh": 5, "Fuji": 11, "Honeycrisp": 32}
dictComb = {
"Beefsteak McIntosh": 25, "Beefsteak Fuji": 31, "Beefsteak Honeycrisp": 52,
"Campari McIntosh": 105, "Campari Fuji": 111, "Campari Honeycrisp": 132
}
I tried something like this:
for (a, b) in zip(dictA, dictB):
dictComb.update({" ".join([a, b]): sum([dictA[a], dictB[b]])})
Unfortunately, that gives:
dictComb = {"Beefsteak McIntosh": 25, "Campari Fuji": 111}
One more product
usage (with dicts = dictA, dictB
):
from itertools import product
dictComb = dict(zip(
map(' '.join, product(*dicts)),
map(sum, product(*map(dict.values, dicts)))
))
With 18 dicts of two items each:
0.54 seconds no_comment
0.70 seconds no_comment_2
0.57 seconds no_comment_3
1.37 seconds Olivier_1
1.63 seconds Olivier_2
Benchmark code:
def no_comment(dicts):
return dict(zip(
map(' '.join, product(*dicts)),
map(sum, product(*map(dict.values, dicts)))
))
def no_comment_2(dicts):
result = {}
for d in dicts:
result = d.copy() if not result else {
f'{K} {k}': V + v
for K, V in result.items()
for k, v in d.items()
}
return result
def no_comment_3(dicts):
result = {}
for d in dicts:
result = d.copy() if not result else {
f'{K} {k}': V + v
for k, v in d.items()
for K, V in result.items()
}
return result
def Olivier_1(dicts):
dictComb = {}
for keys in itertools.product(*dicts):
keyComb = " ".join(keys)
sum = 0
for i, key in enumerate(keys):
sum += dicts[i][key]
dictComb[keyComb] = sum
return dictComb
def Olivier_2(dicts):
dictComb = {}
for keys in itertools.product(*dicts):
keyComb = " ".join(keys)
dictComb[keyComb] = sum(dicts[i][key] for i, key in enumerate(keys))
return dictComb
funcs = no_comment, no_comment_2, no_comment_3, Olivier_1, Olivier_2
from itertools import product
import itertools
from timeit import repeat
# Correctness
dictA = {"Beefsteak": 20, "Campari": 100}
dictB = {"McIntosh": 5, "Fuji": 11, "Honeycrisp": 32}
dicts = dictA, dictB
want = {
"Beefsteak McIntosh": 25, "Beefsteak Fuji": 31, "Beefsteak Honeycrisp": 52,
"Campari McIntosh": 105, "Campari Fuji": 111, "Campari Honeycrisp": 132
}
for f in funcs:
print(f(dicts) == want, f.__name__)
print()
# Speed
dicts = [dictA] * 18
for _ in range(2):
for f in funcs:
t = min(repeat(lambda: f(dicts), number=1))
print(f'{t:.2f} seconds ', f.__name__)
print()
(Bah, my no_comment_2
and no_comment_3
are wrong if a middle dict is empty. Oh well, since they're not better anyway, I'm too lazy to fix that. Keeping them just to show the speed of that approach.)