pythontic-tac-toeminimax

Trying to implement tic-tac-toe using minimax algortighm, but getting not optimal moves


im trying to implement a tic-tac-toe game using minimax algortihm. I have gotten the program running but it dosen't chose an optimal move and im really lost why it dosen't, it is probably a problem in minimax function but i maybe completley wrong. So for example if i put x anywhere it always begings at 3,3

  1 2 3
1
2 X
3     O

It also don't try to block the 3 in a row so if i put another x in 1,1 I get this result:

  1 2 3
1 X
2 X
3   O O

It feels like it activley have switched to doing the worst move instead, I end up with a reult looking like this:

  1 2 3
1 X X O
2 O X
3 X O O

This is consistent with different states of the board.

Cred to Luke Miles on github for pieces of the code!

from abc import ABC, abstractmethod
import numpy as np
import math
#create the nodes for a board state
class Node(ABC):
    @abstractmethod
    def find_children(self):
        "All possible successors of this board state"
        return set()
    @abstractmethod
    def is_terminal(self):
        "Returns True if the node has no children"
        return True
    @abstractmethod
    def reward(self):
        "Assumes `self` is terminal node. 1=win, 0=loss, .5=tie, etc"
        return 0

class MinimaxTreeSearch:
    def __init__(self, depth=5):
        self.depth = depth  # Define search depth limit

    def choose(self, node):
        if node.is_terminal() or self.depth==0:
            raise RuntimeError(f"choose called on terminal node {node}")
        best_move,_ = self.minimax(node, self.depth,maxi=True)
        return best_move
    def minimax(self, node, depth,maxi):
        if depth == 0 or node.is_terminal():
            return None, self.evaluate(node)
        moves = list(node.find_children()) 
        if maxi:  
            best = -math.inf
            best_move = None
            for move in moves:
                _, value = self.minimax(node=move, depth=depth-1,maxi=False)  
                if value > best:
                    best = value
                    best_move = move
            return best_move, best  
        else:  
            best = math.inf
            best_move = None
            for move in moves:
                _, value = self.minimax(node=move, depth=depth-1,maxi=True)
                if value < best:
                    best = value
                    best_move = move
            return best_move, best  

    def evaluate(self, node):
        if node.is_terminal():
            return node.reward()  # Use game-specific reward
        return 0  # Neutral score for intermediate states
    
    
class ticboard(Node):
    def __init__(board, board_state=[None,] * 9, winner=None,turn=True, terminal=False):
        board.board_state=board_state

        board.turn = turn
        board.winner = winner
        board.terminal = terminal
    def find_children(board):
        if board.terminal:
            return set()
        return{board.make_move(i) for i, value in enumerate(board.board_state) if value is None}
    
    def reward(board):
        if not board.terminal:
            raise RuntimeError(f"reward on no terminal board {board}")  
        if board.winner is None:
            return 0.5
        if board.winner == board.turn:
            return 1 
        return 0
        
    def is_terminal(board):
        return board.terminal
    
    def make_move(board, index):
        board_state= board.board_state.copy()
        board_state[index] = board.turn
        turn = not board.turn
        winner = find_winner(board_state)
        is_terminal = (winner is not None) or not any(spot is None for spot in board_state)
        return ticboard(board_state,winner,turn,is_terminal)
    
    def to_pretty_string(board):
        to_char = lambda v: ("X" if v is True else ("O" if v is False else " "))
        rows = [
            [to_char(board.board_state[3 * row + col]) for col in range(3)] for row in range(3)
        ]
        return (
            "\n  1 2 3\n"
            + "\n".join(str(i + 1) + " " + " ".join(row) for i, row in enumerate(rows))
            + "\n"
        )
    
def play_game():
    tree = MinimaxTreeSearch(depth=5)
    board = ticboard()
    print(board.to_pretty_string())
    while True:
        row_col = input("enter row,col: ")
        row, col = map(int, row_col.split(","))
        index = 3 * (row - 1) + (col - 1)
        if board.board_state[index] is not None:
            raise RuntimeError("Invalid move")
        board = board.make_move(index)
        print(board.to_pretty_string())
        if board.terminal:
            break
        board = tree.choose(board)
        print(board.to_pretty_string())
        if board.terminal:
            break

def win_combos():
    combos=[]
    for start in range(0, 9, 3):  # three in a row
        combos.append([start, start + 1, start + 2])
    for start in range(3):  # three in a column
        combos.append([start, start + 3, start + 6])
    combos.append([0, 4, 8])
    combos.append([2, 4, 6])
    return(combos)

def find_winner(board_state):
    win_combo= win_combos()
    for combo in win_combo:
        for player in [True,False]:
            a, b ,c = combo
            if board_state[a] == board_state[b] == board_state[c] and board_state[a] is player:
                return player
    return None

if __name__ == "__main__":
    play_game()

i have tried using node.turn instead of maxi(maximizingplayer) but i have a hard time grasping on how I should use it. Thanks for any help and have a wonderful day!


Solution

  • There are these issues:

    This will fix the problems you encountered.

    There is much more to say about this code. For instance: