I have a numpy array of floats and a target sum. I am trying to find all possible combinations of elements that would add up to the target sum.
I am struggling to come up with anything computationally effective that wouldn't require days to run.
Constraints:
0 < array.size < 1000000
0 <= element <= 999,999,999,999.99
Here is the solution I was able to come up with so far. But trying to process even 50 elements takes forever:
import numpy as np
from typing import List, Tuple, Set
from collections import defaultdict
def generate_random_array(size=1000, min_val=0, max_val=100, seed=42):
np.random.seed(seed)
arr = np.random.uniform(min_val, max_val, size)
return np.round(arr, 2)
def find_target_sum_combinations(arr: np.ndarray, target: float, min_elements: int = 2, epsilon: float = 1e-10) -> List[Tuple[float, ...]]:
arr = np.asarray(arr, dtype=np.float64)
n = len(arr)
# HM to store combinations Key: Sum, Value: indices
dp = defaultdict(set)
# Convert sum to int
def get_sum_key(value: float) -> int:
return int(round(value / epsilon))
# Add all individual elements
for i, num in enumerate(arr):
dp[get_sum_key(num)].add((i,))
result = []
# Combining each new number with all existing combinations
for i in range(n):
curr_num = arr[i]
# Make a copy of current sums to avoid modifying while iterating
current_sums = list(dp.items())
for sum_key, combinations in current_sums:
new_sum = sum_key * epsilon + curr_num
new_sum_key = get_sum_key(new_sum)
# Add new combination
for comb in combinations:
# Check to ensure no duplicate indices
if i > max(comb):
new_comb = comb + (i,)
dp[new_sum_key].add(new_comb)
# Check for target sum
if (abs(new_sum - target) < epsilon and
len(new_comb) >= min_elements):
result.append(tuple(float(arr[idx]) for idx in new_comb))
return sorted(result, key=len)
arr=generate_random_array(size=20, min_val=1000, max_val=100000, seed=42)
target_sum=arr[1]+arr[2]+arr[4]+arr[5]
combinations = find_target_sum_combinations(arr, target_sum)
print(f"\nCombinations that sum to {target_sum}:")
for comb in combinations:
print(f"{comb} = {sum(comb)}")
As there are no negative values in the array, there are two types of early stopping that you can introduce to the code.
Adjusted code including proposed changes:
import numpy as np
from itertools import combinations
from typing import List, Tuple
def find_target_sum_combinations(arr: np.ndarray, target: float, min_elements: int = 2, epsilon: float = 1e-10) -> List[Tuple[float, ...]]:
arr.sort() # Sort array to help with early stopping
result = []
def find_combinations(start, path, current_sum):
if len(path) >= min_elements and abs(current_sum - target) < epsilon:
result.append(tuple(path))
# Continue searching for other combinations
for i in range(start, len(arr)):
if current_sum + arr[i] > target + epsilon:
break # Early stopping because the array is sorted
find_combinations(i + 1, path + [arr[i]], current_sum + arr[i])
find_combinations(0, [], 0)
print(result)
return sorted(result, key=len)
Note that this is still has exponential worst case running time so it will still not be able to handle very large arrays.
I tested the efficiency compared to the old code with timeit
and it gave an improvement of a bit more than 150x on my system with arrays of length 20.