pythonarrayscombinationsset-intersection

Python find all non-repeat combinations and get both the values they have in common and their unique values


I have an array of sets [setA, setB, setC, setD, ..., setX] and I want to find a way to get the intersection of each combination (without repeat combos), so:

AB = setA.intersect(setB)
AC = setA.intersect(setC)
AD = setA.intersect(setD)
BC = setB.intersect(setC)
BD = setB.intersect(setD)
CD = setC.intersect(setD)
ABC = setA.intersect(setB.intersect(setC))
ABD = setA.intersect(setB.intersect(setD))
ACD = setA.intersect(setC.intersect(setD))
BCD = setB.intersect(setC.intersect(setD))
ABCD = setA.intersect(setB.intersect(setC.intersect(setD)))

I would also like to get the unique values in the different sets, which are not present in their combinations. Ergo values in setA which are not in AB, AC, AD, ABC, ABD, ACD, and ABCD. Values of AB which are not in ABC, ABD and ABCD. Values of ABC which are not in ABCD. And so forth.

I'd like the final output to be a list of tuples, where each tuple looks like this:

(combo_name, unique_values, intersected_set)

So far I have been doing it manually, which is cumbersome:

import pandas as pd 
setA_name = 'A'
setB_name = 'B'
setC_name = 'C'
setA = {1,2,3,4,5,6,7,8,9,10}
setB = {2,3,7,11,13,17,23}
setC = {3,6,7,9,10,12,13,15,16}
setA_B = setA.intersection(setB)
setA_C = setA.intersection(setC)
setB_C = setB.intersection(setC)
setA_B_C = setA.intersection(setB.intersection(setC))
setA_B_only = setA_B-setA_B_C
setA_C_only = setA_C-setA_B_C    
setB_C_only = setB_C-setA_B_C
setA_only = setA-setA_B_only-setA_C_only-setA_B_C
setB_only = setB-setA_B_only-setB_C_only-setA_B_C
setC_only = setC-setA_C_only-setB_C_only-setA_B_C
results = [
    (setA_name, setA_only, setA),
    (setB_name, setB_only, setB),
    (setC_name, setC_only, setC),
    (';'.join([setA_name, setB_name]), setA_B_only, setA_B),
    (';'.join([setA_name, setC_name]), setA_C_only, setA_C),
    (';'.join([setB_name, setC_name]), setB_C_only, setB_C),
    (';'.join([setA_name, setB_name, setC_name]), setA_B_C, setA_B_C)
    ]
tab = pd.DataFrame(results)
tab.columns = ['Set', 'Unique', 'Common']
print(tab)
     Set        Unique                            Common
0      A  {8, 1, 4, 5}   {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
1      B  {17, 11, 23}         {2, 3, 7, 11, 13, 17, 23}
2      C  {16, 12, 15}  {3, 6, 7, 9, 10, 12, 13, 15, 16}
3    A;B           {2}                         {2, 3, 7}
4    A;C    {9, 10, 6}                  {3, 6, 7, 9, 10}
5    B;C          {13}                        {3, 13, 7}
6  A;B;C        {3, 7}                            {3, 7}

I don't know where to start.

Updated method using the suggestion from @TheEngineerProgrammer

import pandas as pd
from itertools import combinations as combi
set_dict = {'A': {1,2,3,4,5,6,7,8,9,10}, 'B':{2,3,7,11,13,17,23}, 'C':{3,6,7,9,10,12,13,15,16}}
dict_keys = list(set_dict.keys())

common_dict = set_dict
for i, j in combi(dict_keys,2):
    i_set = set_dict.get(i)
    j_set = set_dict.get(j)
    common = i_set.intersection(j_set)
    key_name = ';'.join([i, j])
    common_dict[key_name] = common

for i, j, k in combi(dict_keys,3):
    i_set = set_dict.get(i)
    j_set = set_dict.get(j)
    k_set = set_dict.get(k)
    common = i_set.intersection(j_set.intersection(k_set))
    key_name = ';'.join([i, j, k])
    common_dict[key_name] = common
    

uniq_dict = dict()
for x, y in combi(list(common_dict.keys()),2):
    x_split = x.split(';')
    if all(item in y for item in x_split):
        print(x,'-',y)
        if x in uniq_dict:
            x_set = uniq_dict.get(x)
        else:
            x_set = common_dict.get(x)
        y_set = common_dict.get(y)
        x_uniq = x_set-y_set
        print(x_uniq)
        uniq_dict[x] = x_uniq

for key in set(common_dict.keys())-set(uniq_dict.keys()):
    uniq_dict[key] = common_dict.get(key)

results = []
for key in uniq_dict.keys():
    results.append((key, uniq_dict.get(key), common_dict.get(key)))

tab = pd.DataFrame(results, columns = ['Set', 'Unique', 'Common'])
print(tab)
     Set        Unique                            Common
0      A  {8, 1, 4, 5}   {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
1      B  {17, 11, 23}         {2, 3, 7, 11, 13, 17, 23}
2      C  {16, 12, 15}  {3, 6, 7, 9, 10, 12, 13, 15, 16}
3    A;B           {2}                         {2, 3, 7}
4    A;C    {9, 10, 6}                  {3, 6, 7, 9, 10}
5    B;C          {13}                        {3, 13, 7}
6  A;B;C        {3, 7}                            {3, 7}

How do I grow this part increasingly, depending on the number of items in my set_dict?

for i, j in combi(dict_keys,2):
    ...

for i, j, k in combi(dict_keys,3):
    ...

for i, j, k, l in combi(dict_keys,4):
    ...

for i, j, k, l, m in combi(dict_keys,5):
    ...

Solution

  • It is a bit hard to understand the requirement as you described, specially for the part "unique values in the different sets". I think I got it right now, but I can't imagine what would be the real world usage of that set operation result, so I invite you to review the requirement, if that is actually what is desired.

    Anyway, here it is below. With this approach, you can have as many sets as you like, without having to worry about declaring an exponential number of variables to operate them (btw, here is a bit of terminology you might find useful: you are operation on the "powerset" of your sets.)

    from itertools import combinations
    from functools import reduce
    
    
    sets = {
        "A": {1,2,3,4,5,6,7,8,9,10},
        "B": {2,3,7,11,13,17,23},
        "C": {3,6,7,9,10,12,13,15,16},
    }
    
    for r in range(len(sets)):
        for combo in combinations(sets, r+1):
            name = "".join(combo)
            intersection = reduce(set.intersection, (sets[n] for n in combo))
            unique = reduce(set.difference, (sets[n] for n in sets if n not in combo), intersection)
            print(name, unique, intersection)
    

    Output:

    A   {8, 1, 4, 5} {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
    B   {17, 11, 23} {17, 2, 3, 23, 7, 11, 13}
    C   {16, 12, 15} {3, 6, 7, 9, 10, 12, 13, 15, 16}
    AB  {2}          {2, 3, 7}
    AC  {9, 10, 6}   {3, 6, 7, 9, 10}
    BC  {13}         {3, 13, 7}
    ABC {3, 7}       {3, 7}