pythonnumpypython-itertoolspowerset

Convert itertools powerset into columnized numpy array


Given a tuple items, I get the powerset with itertools:

items = tuple(('a', 'b', 'c'))
combos = powerset(items)

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))

My goal is to convert the powerset output combos:

#combos =

(('a',), 
 ('b',), 
 ('c',), 
 ('a', 'b'), 
 ('a', 'c'), 
 ('b', 'c'), 
 ('a', 'b', 'c'))

into an np.array where for each row, each item is placed in the column corresponding to the location of that item in the original tuple cols.

Desired result:

#res = 

   [['a', '',   ''],
    ['',  'b',  ''],
    ['',  '',  'c'],
    ['a', 'b',  ''],
    ['a', '',  'c'],
    ['',  'b', 'c'],
    ['a', 'b', 'c']]

#shape = (7,3)

I was able to achieve this with the code below. However, is there a more pythonic, faster/memory efficient way to convert the output than my attempt where I loop through the array and place each individual item in the correct column in the result array?

Full code:

from itertools import combinations, chain
import numpy as np

def columnizePowerset(powerset, items):
    combo_arr = np.array([list(x) for x in powerset], dtype=object) # Convert to np.array
    res = np.empty([len(powerset), len(items)], dtype=str)          # Initialize result array

    for i in range(len(combo_arr)):                                 # Loop through rows
        for j in range(len(combo_arr[i])):                          # Loop through items in row
            col = np.where(np.array(items) == combo_arr[i][j])      # Identify column for item
            res[i][col] = combo_arr[i][j]                           # Place item in correct column

    return res

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))

if __name__ == "__main__":
    items = tuple(('a', 'b', 'c'))                                  # Given
    combos = powerset(items)                                        # Perform powerset
    res = columnizePowerset(combos, items)                          # Convert powerset to columnized array
    print(res)

Solution

  • The order isn't exactly the same as powerset yields, but the pattern reminded me of binary – you can get the same items by looking at the bits of increasing integers...

    I'm not a NumPy wizard, but this seems reasonably numpy-er. (See post edit history for worse solutions.)

    import numpy as np
    
    
    def powerset_numpy(items):
        n = len(items)
        bit_indexes = np.arange(1, 2 ** n)
        n_bits = bit_indexes.shape[0]
        item_bits = np.repeat(np.arange(0, n), n_bits).reshape((n, -1))
        item_values = np.repeat(items, n_bits).reshape((n, -1))
        n_bits = (bit_indexes & (1 << item_bits)).astype(bool)
        return np.where(n_bits, item_values, '').T
    
    print(powerset_numpy(['a', 'b', 'c']))
    

    prints out

    [['a' '' '']
     ['' 'b' '']
     ['a' 'b' '']
     ['' '' 'c']
     ['a' '' 'c']
     ['' 'b' 'c']
     ['a' 'b' 'c']]