I am trying to solve a Binary Search problem, where the output is the location of the first appearance of the key in a sorted list.
I have managed to solve the first part, where I find any location where the key appears in the sorted list by using Binary Search. But the second part, where its first appearance is found, is going wrong for an unknown key and sorted list.
The code for binary search with duplicates:
import math
import random
def binary_search(keys, query):
low = 0
high = len(keys) - 1
def searchdef(keys, query, low, high):
if low > high:
return -1, 0, 0
x = math.floor(low + (high-low)/2)
if query != keys[x]:
if query > keys[x]:
for i in range(0, len(keys)-1):
return searchdef(keys, query, x+1, high)
if query < keys[x]:
return searchdef(keys, query, low, x-1)
else:
return x, high, low
def finddup2(x, low):
z = x-1
while query == keys[z] and z != -1:
return min(z, finddup2(z, low))
else: return x
x, high, low = searchdef(keys, query, low, high)
ans = finddup2(x, low)
return ans
if __name__ == '__main__':
num_keys = int(input())
input_keys = list(map(int, input().split()))
assert len(input_keys) == num_keys
num_queries = int(input())
input_queries = list(map(int, input().split()))
assert len(input_queries) == num_queries
for q in input_queries:
print(binary_search(input_keys, q), end=' ')
I have tried to stress test the code by using a linear search, but after many checks it produces a recursion error.
Stress Test Code:
import math
import random
def binary_search(keys, query):
low = 0
high = len(keys) - 1
def searchdef(keys, query, low, high):
if low > high:
return -1, 0, 0
x = math.floor(low + (high-low)/2)
if query != keys[x]:
if query > keys[x]:
for i in range(0, len(keys)-1):
return searchdef(keys, query, x+1, high)
if query < keys[x]:
return searchdef(keys, query, low, x-1)
else:
return x, high, low
def finddup2(x, low):
z = x-1
while query == keys[z] and z != -1:
return min(z, finddup2(z-1, low))
else: return x
x, high, low = searchdef(keys, query, low, high)
ans = finddup2(x, low)
return ans
def search(arr, N, x):
for i in range(0, N):
if (arr[i] == x):
return i
return -1
if __name__ == '__main__':
def random_no():
num_keys = int(50)
input_keys = random.sample(range(1, 100), 50)
input_keys.sort()
assert len(input_keys) == num_keys
num_queries = int(1)
input_queries = [random.choice(input_keys)]
assert len(input_queries) == num_queries
return input_keys, input_queries
input_keys, input_queries = random_no()
def check(input_keys, input_queries):
for q in input_queries:
a = binary_search(input_keys, q)
b = search(input_keys, len(input_keys), q)
print(a, end=' ')
print(b, end=' ')
while a == b:
print(a, end=' ')
print(b, end=' ')
print("ok")
input_keys, input_queries = random_no()
check(input_keys, input_queries)
else:
print(input_keys)
print(q)
print(a, end=' ')
print(b, end=' ')
print("wrong")
check(input_keys, input_queries)
Error I'm getting:
RecursionError: maximum recursion depth exceeded in comparison
I would like to know what has gone wrong with my code, and why the stress test is also not working.
Edit:
My new code after some changes:
import math
import random
def binary_search(keys, query):
low = 0
high = len(keys) - 1
def searchdef(keys, query, low, high):
if low > high:
return -1, high, low
x = math.floor(low + (high-low)/2)
if query != keys[x]:
if query > keys[x]:
return searchdef(keys, query, x+1, high)
if query < keys[x]:
return searchdef(keys, query, low, x-1)
else:
return x, high, low
def finddup2(x, low):
z = x-1
if query == keys[z] and z != -1:
return min(z, finddup2(z, low))
else: return x
x, high, low = searchdef(keys, query, low, high)
ans = finddup2(x, low)
return ans
if __name__ == '__main__':
num_keys = int(input())
input_keys = list(map(int, input().split()))
assert len(input_keys) == num_keys
num_queries = int(input())
input_queries = list(map(int, input().split()))
assert len(input_queries) == num_queries
for q in input_queries:
print(binary_search(input_keys, q), end=' ')
It still returns a wrong answer after black box testing. My stress test cannot find what the value causing the wrong answer is. Could you please help me?
Edit 2: I came up with a slight change. Now, the code is returning the correct answer, but is exceeding the time limit. I request help.
import math
def binary_search(keys, query):
low = 0
high = len(keys) - 1
def searchdef(keys, query, low, high):
if low > high:
return -1
if query < keys[low]:
return -1
if query > keys[high]:
return -1
if low <= high :
x = math.floor(low + (high-low)/2)
if query == keys[x]:
while query == keys[x-1] and x != 0:
x -=1
return x
if query > keys[x]:
return searchdef(keys, query, x+1, high)
if query < keys[x]:
return searchdef(keys, query, low, x-1)
x = searchdef(keys, query, low, high)
return x
if __name__ == '__main__':
num_keys = int(input())
input_keys = list(map(int, input().split()))
assert len(input_keys) == num_keys
num_queries = int(input())
input_queries = list(map(int, input().split()))
assert len(input_queries) == num_queries
for q in input_queries:
print(binary_search(input_keys, q), end=' ')
The problem with the binary search is that return min(z, finddup2(z-1, low))
skips an index: z
is already one less than the current index, so you should not subtract 1 from it again. It should be return min(z, finddup2(z, low))
Secondly, when the value is not found, and x == -1
, the function finddup2
should not be called. So:
x, high, low = searchdef(keys, query, low, high)
if x == -1:
return -1
ans = finddup2(x, low)
return ans
There are many other comments to make, like why you have a while
loop that returns in its first iteration. It should just be an if
. Also a for
loop
has a return
in its first iteration, so that loop should not be there at all. Making finddup2
recursive is not a good idea as this may use a lot of stack space. Make it iterative.
The problem with the stress test code is that it is calling itself recursively and causes a stack overflow.
Here is proposed correction of your testing code:
def random_no(num_keys=50, num_queries=10):
input_keys = random.sample(range(1, 100), num_keys)
input_keys.sort()
input_queries = random.choices(input_keys, k=num_queries)
return input_keys, input_queries
def check(input_keys, input_queries):
for q in input_queries:
a = binary_search(input_keys, q)
b = search(input_keys, len(input_keys), q)
if a != b:
print(input_keys)
print(q)
print(a, b, "wrong")
raise ValueError("wrong result")
for _ in range(100):
input_keys, input_queries = random_no()
check(input_keys, input_queries)
print("all done")
The function finddup2
is inefficient, both in terms of space (stack) and time. In the worst case it would have to scan half of the input list, which makes it O(𝑛) and kills the advantage you would have from the binary search algorithm, which has a time complexity of O(log𝑛). But O(log𝑛) + O(𝑛) = O(𝑛), so it has degraded to the performance of a linear search.
Also using math
to convert a floating point number to integer is worse than just using the integer division operator (//
).
To make this efficient, you should make sure that the binary search algorithm continues to drill down the tree until it is certain to have found the leftmost occurrence of a number.
Here is an adaptation of your binary_search
that doesn't need finddup2
. Note that you don't need to pass keys
and query
as arguments to your inner function, as it has access to those names anyhow:
def binary_search(keys, query):
low = 0
high = len(keys) - 1
def searchdef(low, high):
if low > high:
if low >= len(keys) or keys[low] != query:
return -1
return low
x = (low + high) // 2
if query > keys[x]:
return searchdef(x + 1, high)
else: # Continue also when equal!
return searchdef(low, x - 1)
return searchdef(low, high)