I'm making a simple CLI tic-tac-toe game with an AI that uses a negamax algorithm with alpha-beta pruning using LISP and I am having problems with how the AI makes its move. Instead of making the single move that it should, it is playing out the game completely and so games only last two moves. I have run it through (step) and it looked like the problem is that the bestPath variable is being set in the (when (> value bestValue)) block in the negamax function even when it says that that block is not being executed as. Also, the value it is set to is not the correct one it should be if it was appropriate for it to be set. Any suggestions? Here is my code.
;
; Prints an ASCII tic tac toe board
;
(defun print-board (board)
(format t "~% ~d | ~d | ~d 0 | 1 | 2~% --------- ---------~% ~d | ~d | ~d 3 | 4 | 5~% --------- ---------~% ~d | ~d | ~d 6 | 7 | 8~%~%"
(or (nth 0 board) ".") (or (nth 1 board) ".") (or (nth 2 board) ".")
(or (nth 3 board) ".") (or (nth 4 board) ".") (or (nth 5 board) ".")
(or (nth 6 board) ".") (or (nth 7 board) ".") (or (nth 8 board) ".")))
;
; Returns the symbol representing the other player
;
(defun opposite (player)
(if (eq player 'x) 'o 'x))
;
; Checks if the player won
;
(defun won-p (board player)
(or (and (eq (nth 0 board) player)
(eq (nth 1 board) player)
(eq (nth 2 board) player))
(and (eq (nth 3 board) player)
(eq (nth 4 board) player)
(eq (nth 5 board) player))
(and (eq (nth 6 board) player)
(eq (nth 7 board) player)
(eq (nth 8 board) player))
(and (eq (nth 0 board) player)
(eq (nth 3 board) player)
(eq (nth 6 board) player))
(and (eq (nth 1 board) player)
(eq (nth 4 board) player)
(eq (nth 7 board) player))
(and (eq (nth 2 board) player)
(eq (nth 5 board) player)
(eq (nth 8 board) player))
(and (eq (nth 0 board) player)
(eq (nth 4 board) player)
(eq (nth 8 board) player))
(and (eq (nth 2 board) player)
(eq (nth 4 board) player)
(eq (nth 6 board) player))))
;
; Checks if neither player won and there are no valid moves
;
(defun draw-p (board)
(and (not (won-p board 'o))
(not (won-p board 'x))
(not (member nil board))))
;
; Places a token at the desired position unless
; it is already occupied
;
(defun make-move (board player move)
(unless (nth move board)
(let ((boardCopy (copy-list board)))
(setf (nth move boardCopy) player)
boardCopy)))
;
; Starts a human v human game of tic tac toe
;
(defun play ()
(setf currentPlayer 'x)
(setf currentBoard (list nil nil nil nil nil nil nil nil nil))
(print-board currentBoard)
(do ()
((or (won-p currentBoard 'x)
(won-p currentBoard 'o)
(draw-p currentBoard))
(opposite currentPlayer))
(format t "~%Enter move for ~a's: " currentPlayer)
(setf move (read))
(do ()
((setf nextBoard (make-move currentBoard currentPlayer move)))
(format t "~%Illegal move. Try again: ")
(setf move (read)))
(setf currentBoard nextBoard)
(print-board currentBoard)
(if (won-p currentBoard currentPlayer)
(format t "~%Player ~a wins!" currentPlayer))
(if (draw-p currentBoard)
(format t "~%Draw!"))
(setf currentPlayer (opposite currentPlayer))))
Here is the code for the AI.
;
; Evaluates the heuristic value of the board position
; from the viewpoint of the player
;
(defun evaluate (board player depth)
(cond ((won-p board player) (- 10 depth))
((won-p board (opposite player)) (+ -10 depth))
(t 0)))
;
; Generates all possible legal moves from the current position
;
(defun generate-moves (board player)
(loop for move from 0 to 8
unless (nth move board)
collect (make-move board player move)))
;
; Checks if the algorithm has searched deep enough into the tree.
;
(defun deep-enough (board player)
(or (won-p board player)
(won-p board (opposite player))
(draw-p board)))
;
; Algorithm for deciding which move to choose
;
(defun negamax(board player depth)
(cond ((deep-enough board player)
(cons (evaluate board player depth) board))
(t (setq bestValue -10)
(setq bestPath nil)
(setq successors (generate-moves board player))
(loop for successor in successors
do
(let* ((result (negamax successor (opposite player) (+ depth 1)))
(value (- (first result))))
(when (> value bestValue)
(setq bestValue value)
(setq bestPath successor))))
(cons bestValue bestPath))))
;
; Starts a game of tic-tac-toe with the computer
;
(defun play-ai()
(setq currentPlayer 'x)
(setq currentBoard (list nil nil nil nil nil nil nil nil nil))
(print-board currentBoard)
(do ()
((or (won-p currentBoard 'x)
(won-p currentBoard 'o)
(draw-p currentBoard))
(opposite currentPlayer))
(format t "~%Enter move for ~a's: " currentPlayer)
(cond ((eq currentPlayer 'x)
(setf move (read))
(do ()
((setf nextBoard (make-move currentBoard currentPlayer move)))
(format t "~%Illegal move. Try again: ")
(setf move (read)))
(setf currentBoard nextBoard)
(print-board currentBoard)
(if (won-p currentBoard currentPlayer)
(format t "~%Player ~a wins!" currentPlayer))
(if (draw-p currentBoard)
(format t "~%Draw!"))
(setf currentPlayer (opposite currentPlayer)))
(t (setq currentBoard (rest (negamax currentBoard currentPlayer 1)))
(write-line "")
(print-board currentBoard)
(if (won-p currentBoard currentPlayer)
(format t "~%Player ~a wins!" currentPlayer))
(if (draw-p currentBoard)
(format t "~%Draw!"))
(setf currentPlayer (opposite currentPlayer))))))
To solve the actual problem, you need to use LET
instead of SETQ
to define local variables. In particular in the NEGAMAX
function:
(defun negamax(board player depth)
(cond ((deep-enough board player)
(cons (evaluate board player depth) board))
(t (let ((best-value -10)
(best-path nil)
(successors (generate-moves board player)))
(loop for successor in successors
do (let* ((result (negamax successor (opposite player) (+ depth 1)))
(value (- (first result))))
(when (> value best-value)
(setf best-value value)
(setf best-path successor))))
(cons best-value best-path)))))
There are other problems with the code too. I'll focus on PLAY-AI
here to keep this short, but these apply to rest of the code too.
COND
(everything from (PRINT-BOARD ...)
). You should move that outside the COND
.IMO, it would be cleaner to use LOOP
instead of DO
here. Or even better, use iterate. If you have quicklisp set up, you can just do
(ql:quickload :iterate)
(use-package :iterate)
The ending message ("x won", "draw") could be moved outside the loop or done in another function.
DRAW-P
doesn't need to check if one of the players won, since that is checked before calling DRAW-P
anyway.Here's a somewhat cleaned up version using iterate:
(defun ask-move (board player)
(format t "~&Enter move for ~a's: " player)
(iter (for move = (make-move board player (read)))
(when move (return move))
(format t "~&Illegal move. Try again: ")))
(defun game-loop ()
(let ((player 'x)
(board (list nil nil nil nil nil nil nil nil nil)))
(print-board board)
(iter (setf board (if (eq player 'x)
(ask-move board player)
(rest (negamax board player 1))))
(print-board board)
(cond ((won-p board player) (return player))
((draw-p board) (return :draw)))
(setf player (opposite player)))))
(defun play-ai ()
(case (game-loop)
(x (format t "~&Player wins!"))
(o (format t "~&AI wins!"))
(:draw (format t "~&Draw!"))))
Or the same with the regular loop. Not much difference, but iterate has slightly nicer syntax.
(defun ask-move (board player)
(format t "~&Enter move for ~a's: " player)
(loop
for move = (make-move board player (read))
when move do (return move)
do (format t "~&Illegal move. Try again: ")))
(defun game-loop ()
(let ((player 'x)
(board (list nil nil nil nil nil nil nil nil nil)))
(print-board board)
(loop
do (setf board (if (eq player 'x)
(ask-move board player)
(rest (negamax board player 1))))
do (print-board board)
when (won-p board player) do (return player)
when (draw-p board) do (return :draw)
do (setf player (opposite player)))))
(defun play-ai ()
(case (game-loop)
(x (format t "~&Player wins!"))
(o (format t "~&AI wins!"))
(:draw (format t "~&Draw!"))))