I have an array of positive integers, for example [10,25,40,55,80,110]
.
I want to able to specify a range, say 100-150
, and generate a new array from this pre-existing array where the sum of the new array values is within the specified range. Values from the original array may be duplicated in the new array.
For example the arrays [110]
, [25, 40, 40]
are all valid arrays for the range of 100-150
.
All valid arrays should have equal probability of generation.
The algorithm should offer similar randomization like if we were to generate all possible combinations and then filter the ones in the range and then select randomly from them (which is not effectively possible since array is quite large (>1000) elements).
How can I achieve this?
What I am doing right now is:
sum = 0
while sum < lower limit of range {
sum += rand(array)
}
But this is just selecting random numbers until it gets within the range. It gives me incorrect results a lot of times and exceeds the range.
This answer is based on my previous answer at Sampling from a Superexponential Population in Polynomial Time.
First, you write a dynamic programming algorithm to count solutions.
def count_subset_sums (array, lower, upper):
# Start with the empty set.
sums = {0: 1}
for i in array:
next_sums = sums.copy()
for s, count in sums.items():
if upper < s + i:
pass # Skip too large of sums.
elif s + i not in next_sums:
next_sums[s + i] = count
else:
next_sums[s + i] += count
sums = next_sums
answer = 0
for s, count in sums.items():
if lower <= s and s <= upper:
answer += count
return answer
Then convert it to instead capture the structure of the solutions. This is a fairly straight translation - only longer because I added some comments.
def analyze_subset_sums (array, lower, upper):
sums = {0: SolutionStructure()}
for i in array:
next_sums = sums.copy()
for s, structure in sums.items():
if upper < s + i:
pass
elif s+i in next_sums:
# choice = this choice
# prev = prev part of these solution.
# prev_solutions = solutions found previously
next_sums[s+i] = SolutionStructure(
choice = i,
prev = structure,
prev_solutions = next_sums[s+i],
)
else:
next_sums[s+i] = SolutionStructure(
choice = i,
prev = structure,
)
sums = next_sums
answer = None
for s, structure in sums.items():
if lower <= s and s <= upper:
if answer is None:
answer = structure
else:
# These have a null choice that will need to be
# later filtered out.
answer = SolutionStructure(
prev = structure,
prev_solutions = answer
)
return answer
And then we use it.
import random
# Our original function.
count_solutions = count_subset_sums([10,25,40,55,80,110], 100, 150)
print("Dynamic programming gives us {} solutions.".format(count_solutions))
structure = analyze_subset_sums([10,25,40,55,80,110], 100, 150)
# Should give the same answer.
print("Our structure gives us {} solutions.".format(structure.total_count))
for _ in range(10):
index = random.randrange(0, structure.total_count)
# (choice, num) where num is in the range 0..multiplicity.
# In our case num is always zero.
decisions = structure.decisions(index)
# Look for non-null choices - that's our answer.
choices = [d[0] for d in decisions if d[0] is not None]
print("Solution {} is {}.".format(index, choices))
Here is the complete working program, including the class from my previous answer.
class SolutionStructure:
# This node gives a set of solutions, and a link to previous ones.
# The parameters are passed in decreasing likelihood of need.
# The assignments are in how to think about this.
def __init__ (self, prev=None, prev_solutions=None, choice=None, multiplicity=1):
# By which overall choice did we get here? Might not be recorded.
self.choice = choice
# How many ways can we get from prev to here? Default 1.
# When decoded we'll get a number in range(mutiplicity),
# which is to say 0, 1, ..., multiplicity-1.
self.multiplicity = multiplicity
# What came before this decision in these solutions?
self.prev = prev
# The promised link to previous solutions.
self.prev_solutions = prev_solutions
# Now let's figure out the counts.
# this_count: The count of this set of solutions.
# total_count: The count of all, including prev_solutions.
if prev is None:
# None represents an empty set, has count 1.
self.this_count = multiplicity
else:
self.this_count = multiplicity * prev.total_count
if prev_solutions is None:
# No prev_solutions means no solutions.
self.total_count = self.this_count
else:
self.total_count = self.this_count + prev_solutions.total_count
# What set of decisions lead to the solution n? (Counting from 0.)
def decisions(self, n):
if self.total_count <= n:
# No answer possible. Maybe throw an exception?
return None
# We will produce the answer as a linked list of decisions.
# From last to first.
start = []
tail = start
current = self
while current is not None:
if n < current.total_count - current.this_count:
# Give prev_solutions.
current = current.prev_solutions
else:
n -= current.total_count - current.this_count
# Made this decision.
# A decision looks like (choice, number).
decision = (current.choice, n % current.multiplicity)
tail.append([decision])
tail = tail[-1]
# Now we have to find which solution out of prev to use.
n = n // current.multiplicity
current = current.prev
# Now decode the linked list into a list.
reversed_decisions = []
if 0 < len(start):
# We have decisions.
node = start[0]
while 1 < len(node):
reversed_decisions.append(node[0])
node = node[1]
reversed_decisions.append(node[0])
return list(reversed(reversed_decisions))
def count_subset_sums (array, lower, upper):
# Start with the empty set.
sums = {0: 1}
for i in array:
next_sums = sums.copy()
for s, count in sums.items():
if upper < s + i:
pass # Skip too large of sums.
elif s + i not in next_sums:
next_sums[s + i] = count
else:
next_sums[s + i] += count
sums = next_sums
answer = 0
for s, count in sums.items():
if lower <= s and s <= upper:
answer += count
return answer
def analyze_subset_sums (array, lower, upper):
sums = {0: SolutionStructure()}
for i in array:
next_sums = sums.copy()
for s, structure in sums.items():
if upper < s + i:
pass
elif s+i in next_sums:
# choice = this choice
# prev = prev part of these solution.
# prev_solutions = solutions found previously
next_sums[s+i] = SolutionStructure(
choice = i,
prev = structure,
prev_solutions = next_sums[s+i],
)
else:
next_sums[s+i] = SolutionStructure(
choice = i,
prev = structure,
)
sums = next_sums
answer = None
for s, structure in sums.items():
if lower <= s and s <= upper:
if answer is None:
answer = structure
else:
# These have a null choice that will need to be
# later filtered out.
answer = SolutionStructure(
prev = structure,
prev_solutions = answer
)
return answer
import random
# Our original function.
count_solutions = count_subset_sums([10,25,40,55,80,110], 100, 150)
print("Dynamic programming gives us {} solutions.".format(count_solutions))
structure = analyze_subset_sums([10,25,40,55,80,110], 100, 150)
# Should give the same answer.
print("Our structure gives us {} solutions.".format(structure.total_count))
for _ in range(10):
index = random.randrange(0, structure.total_count)
# (choice, num) where num is in the range 0..multiplicity.
# In our case num is always zero.
decisions = structure.decisions(index)
# Look for non-null choices - that's our answer.
choices = [d[0] for d in decisions if d[0] is not None]
print("Solution {} is {}.".format(index, choices))