javascriptsetsubset

Generate subsets of length n


Given a Set, generate all subsets of length n.

How to do this efficiently, without converting the Set to an Array?

I already have a good solution for arrays as input:

function* subsets(array, length, start = 0) {
  if (start >= array.length || length < 1) {
    yield new Set();
  } else {
    while (start <= array.length - length) {
      let first = array[start];
      for (subset of subsets(array, length - 1, start + 1)) {
        subset.add(first);
        yield subset;
      }
      ++start;
    }
  }
}

let array = ['a', 'b', 'c', 'd'];

for (subset of subsets(array, 2)) {
  console.log(...subset);
}

However, I couldn't come up with an efficient implementation for sets without converting the set to an array - which I would like to avoid in favour of e.g. purely using iterators.


Solution

  • I implemented your idea of deleting and reinserting values with ordered multi subsets:

    function orderedMultiSubsets(set, n) {
      if(!Number.isInteger(n) || n < 0) return function*(){}();
      var subset = new Array(n),
          iterator = set.values();
      return (function* backtrack(index) {
        if(index === n) {
          yield subset.slice();
        } else {
          for(var i=0; i<set.size; ++i) {
            subset[index] = iterator.next().value; /* Get first item */
            set.delete(subset[index]); /* Remove it */
            set.add(subset[index]); /* Insert it at the end */
            yield* backtrack(index+1);
          }
        }
      })(0);
    }
    for(var subset of orderedMultiSubsets(new Set([1,2,3]), 2)) {
      console.log(subset.join());
    }

    And then I think I managed to prune as early as possible for the unordered subsets case:

    function subsets(set, n) {
      if(!Number.isInteger(n) || n < 0 || n > set.size) return function*(){}();
      var subset = new Array(n),
          iterator = set.values();
      return (function* backtrack(index, remaining) {
        if(index === n) {
          yield subset.slice();
        } else {
          for(var i=0; i<set.size; ++i) {
            subset[index] = iterator.next().value; /* Get first item */
            set.delete(subset[index]); /* Remove it */
            set.add(subset[index]); /* Insert it at the end */
            if(i <= remaining) {
              yield* backtrack(index+1, remaining-i);
            }
          }
        }
      })(0, set.size-n);
    }
    for(var subset of subsets(new Set([1,2,3,4]), 2)) {
      console.log(subset.join());
    }

    I still use an array for the subsets, but its size is only n. So in case the size of the set is much bigger than n, this approach might use less memory than duplicating the set into an array, which I guess is the point of your question. But note that the deletions and insertions are probably more expensive than array lookups.

    Bonus: at the end, the order of the set is the same as before calling the function.