I have a minimax function which works fine other times. But when I implement both alpha beta pruning and hash table it loses only in one situation. If I apply one of the optimizations or none then works fine all cases. That losing case only occurs when the ai goes second:
I go 9, 1, 3, 6.
ai go 5, 7, 2.
X | O | X
---+---+---
4 | O | X
---+---+---
O | 8 | X
ai: O
function:
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = max(best_score, variables.states[t])
alpha = max(alpha, best_score)
variables.grid[i] = c
if beta <= alpha: break
else: # minimizer
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = min(best_score, variables.states[t])
beta = min(beta, best_score)
variables.grid[i] = c
if beta <= alpha: break
return best_score
.
If someone needed, I added all the code to run the game below:
core_func.py
import variables
import turns_func
def update_grid():
print(f' {variables.grid[0]} | {variables.grid[1]} | {variables.grid[2]} \n'
f'---+---+---\n'
f' {variables.grid[3]} | {variables.grid[4]} | {variables.grid[5]} \n'
f'---+---+---\n'
f' {variables.grid[6]} | {variables.grid[7]} | {variables.grid[8]} \n')
def correct_input(message, lst):
i = int(input(f'{message}: '))
if i not in lst:
print(f'input error: input is not valid. valid inputs: {lst}')
return correct_input(message, lst)
return i
def initialization():
option = correct_input(variables.initialization_message, variables.valid_initials)
if option in {1, 4, 5}:
variables.turn = 'H'
else:
variables.turn = 'C'
if option in {2, 4}:
variables.difficulty = 'R'
if option in {3, 5}:
variables.difficulty = 'P'
print('Coordinates of the grid:')
update_grid()
def check_win():
for x, y, z in (0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6):
if variables.grid[x] == variables.grid[y] == variables.grid[z]:
return True
return False
def availables():
availables = []
for c in variables.grid:
if c not in variables.valid_symbols:
availables.append(c)
return availables
def get_coordinate():
if not variables.difficulty:
return turns_func.human_turn()
elif variables.turn == 'H':
variables.turn = 'C'
return turns_func.human_turn()
else:
print(f'computer ({variables.symbol}):')
variables.turn = 'H'
if variables.difficulty == 'R':
return turns_func.random_turn()
else:
return turns_func.perfect_turn()
def play():
coordinate = get_coordinate()
variables.grid[coordinate - 1] = variables.symbol
update_grid()
variables.moves += 1
if check_win():
print(f'"{variables.symbol}" wins in {variables.moves} moves!')
elif not availables():
print('"Game Ties"')
else:
changed = variables.valid_symbols - {variables.symbol}
variables.symbol = changed.pop()
play()
.
turns_func.py
import variables
import core_func
from random import choice
def human_turn():
return core_func.correct_input(variables.get_coordinate_message + variables.symbol, core_func.availables())
def random_turn():
return choice(core_func.availables())
# alpha - maximizer's max - it should be less than its parent node minimizer's min
# beta - minimizer's min - it should be greater than its parent node maximizer's max
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = max(best_score, variables.states[t])
alpha = max(alpha, best_score)
variables.grid[i] = c
if beta <= alpha: break
else: # minimizer
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = min(best_score, variables.states[t])
beta = min(beta, best_score)
variables.grid[i] = c
if beta <= alpha: break
return best_score
def perfect_turn():
max_score = -float('inf')
coordinate = None
for c in variables.grid:
if c in variables.valid_symbols: continue
i = c - 1
variables.grid[i] = variables.symbol
score = minimax(1, -float('inf'), float('inf'))
variables.grid[i] = c
if score > max_score:
max_score = score
coordinate = c
return coordinate
.
variables.py
initialization_message = ('1. Play with Human\n'
'Play with computer:\n'
'For computer going first, Difficulties:\n'
'\t2. Random\n'
'\t3. Perfect\n'
'For computer going second, Difficulties:\n'
'\t4. Random\n'
'\t5. Perfect\n'
'Choose an option. Enter the number')
get_coordinate_message = 'Input a coordinate for '
valid_initials = (1, 2, 3, 4, 5)
grid = [1, 2, 3, 4, 5, 6, 7, 8, 9]
valid_symbols = {'X', 'O'}
symbol = 'X'
difficulty = None
turn = None
moves = 0
states = {}
.
main.py
import core_func
core_func.initialization()
core_func.play()
The problem is indeed in the combination of memoization ("hashing") and alpha-beta pruning. The key t
you use represents the state of the grid, but does not include the alpha/beta window that is currently in effect. When the value returned by minimax results from an alpha-beta cut (a break
), then it is not guaranteed to be an accurate value for the grid, just accurate enough to choose another move higher up the recursion tree, yet if you use memoization, you'll want the value to be based on a complete evaluation (without break
), as in another situation (when encountering that state again), the alpha-beta window might be different, which would require a value that results from no early exit (no break
).
So this is why it works when you remove one of the two features: memoization or alpha-beta pruning.
To have both features activated, you have some options. I would suggest that you don't store a score in your states
dict if its evaluation loop was prematurely exited because of pruning.
Here is your code adapted to do just that:
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
count = 0 # count the number of evaluated variants
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
best_score = max(best_score, score)
alpha = max(alpha, best_score)
variables.grid[i] = c
count += 1 # update counter
if beta <= alpha:
break
else: # minimizer
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
best_score = min(best_score, score)
beta = min(beta, best_score)
variables.grid[i] = c
count += 1 # update counter
if beta <= alpha:
break
if count == len(avails): # We did a complete evaluation
# Only then update variables.state
t = tuple(variables.grid)
variables.states[t] = best_score
return best_score