I have this code written in Dart that implements the minmax algorithm, With a tic-tac-toe, the computer is playing as "O" and trying to maximize the score, but I get some wrong decision like this, it says (look at the top, the first is the move coordinate and the second is the score) that I have to move in (0,2). But this is not the right decision.
This is my code:
class Ai {
play(state) {
return max_Value(state);
}
// the action parameter holds the move that got as to this state
List max_Value(List state, {action}) {
List v = [null, double.negativeInfinity];
if (terminal(state)) {
return [action, utility(state)];
}
for (var action in actions(state)) {
if (v[1] == 1) return v;
v = max(v, min_Value(result(state, action), action: action));
}
return v;
}
List min_Value(List state, {action}) {
List v = [null, double.infinity];
if (terminal(state)) {
return [action, utility(state)];
}
for (var action in actions(state)) {
if (v[1] == -1) return v;
v = min(v, max_Value(result(state, action), action: action));
}
return v;
}
List min(List prv, List curr) {
return prv[1] < curr[1] ? prv : curr;
}
List max(List prv, List curr) {
return prv[1] > curr[1] ? prv : curr;
}
bool terminal(List state) {
if (state[0][0] == state[0][1] &&
state[0][0] == state[0][2] &&
state[0][0] != "") {
return true;
}
if (state[1][0] == state[1][1] &&
state[1][0] == state[1][2] &&
state[1][0] != "") {
return true;
}
if (state[2][0] == state[2][1] &&
state[2][0] == state[2][2] &&
state[2][0] != "") {
return true;
}
// vertical
if (state[0][0] == state[1][0] &&
state[0][0] == state[2][0] &&
state[0][0] != "") {
return true;
}
if (state[0][1] == state[1][1] &&
state[0][1] == state[2][1] &&
state[0][1] != "") {
return true;
}
if (state[0][2] == state[1][2] &&
state[0][2] == state[2][2] &&
state[0][2] != "") {
return true;
}
// diagonal
if (state[0][0] == state[1][1] &&
state[2][2] == state[0][0] &&
state[0][0] != "") {
return true;
}
if (state[2][0] == state[1][1] &&
state[2][0] == state[0][2] &&
state[2][0] != "") {
return true;
}
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
if (state[i][j] == "") {
return false;
}
}
}
return true;
}
double utility(List state) {
String winner = "";
// horizontal
if (state[0][0] == state[0][1] && state[0][0] == state[0][2]) {
winner = state[0][0];
}
if (state[1][0] == state[1][1] && state[1][0] == state[1][2]) {
winner = state[1][0];
}
if (state[2][0] == state[2][1] && state[2][0] == state[2][2]) {
winner = state[2][0];
}
// vertical
if (state[0][0] == state[1][0] && state[0][0] == state[2][0]) {
winner = state[0][0];
}
if (state[0][1] == state[1][1] && state[0][1] == state[2][1]) {
winner = state[0][1];
}
if (state[0][2] == state[1][2] && state[0][2] == state[2][2]) {
winner = state[0][2];
}
// diagonal
if (state[0][0] == state[1][1] && state[2][2] == state[0][0]) {
winner = state[0][0];
}
if (state[2][0] == state[1][1] && state[2][0] == state[0][2]) {
winner = state[2][0];
}
if (winner == "O") {
return 1;
} else if (winner == "X") {
return -1;
} else {
return 0;
}
}
List actions(List state) {
List acs = [];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
if (state[i][j] == "") {
acs.add([i, j]);
}
}
}
return acs;
}
List result(List state, List action) {
List newState = [
["", "", ""],
["", "", ""],
["", "", ""]
];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
newState[i][j] = state[i][j];
}
}
newState[action[0]][action[1]] = player(state);
return newState;
}
player(List state) {
int xnum = 0;
int onum = 0;
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
if (state[i][j] == "X") {
xnum++;
} else if (state[i][j] == "O") {
onum++;
}
}
}
if (xnum > onum) {
return "O";
} else {
return "X";
}
}
}
Some mistakes:
This needs to depend on the player to move whether you return max or min:
play(state) {
return max_Value(state);
}
Here (and same for min) v might become an action done later and not the actual action that was performed and lead to the better value.
v = max(v, min_Value(result(state, action), action: action));
Possible improvements:
With a little modification terminal could return the evaluation too and then you don't need the utility function.
You constantly evaluate player with the function. You should have to do that at most once (see first mistake). min and max know whether they are playing X or O. Everything could be simplified a lot and the number of functions reduced without increasing their complexity.