I tried to implement a prisoner's dilemma in Python, but my results, instead of showing that tit for tat is a better solution, it is showing that defecting is giving better results.
Can someone look at my code, and tell me what I have done wrong here?
import random
from colorama import Fore, Style
import numpy as np
# Define the actions
COOPERATE = 'cooperate'
DEFECT = 'defect'
# Define the strategies
def always_cooperate(history):
return COOPERATE
def always_defect(history):
return DEFECT
def random_choice_cooperate(history):
return COOPERATE if random.random() < 0.75 else DEFECT
def random_choice_defect(history):
return COOPERATE if random.random() < 0.25 else DEFECT
def random_choice_neutral(history):
return COOPERATE if random.random() < 0.5 else DEFECT
def tit_for_tat(history):
if not history: # If it's the first round, cooperate
return COOPERATE
opponent_last_move = history[-1][1] # Get the opponent's last move
return opponent_last_move # Mimic the opponent's last move
def tat_for_tit(history):
if not history: # If it's the first round, cooperate
return DEFECT
opponent_last_move = history[-1][1] # Get the opponent's last move
return opponent_last_move # Mimic the opponent's last move
def tit_for_two_tats(history):
if len(history) < 2: # If it's the first or second round, cooperate
return COOPERATE
opponent_last_two_moves = history[-2:] # Get the opponent's last two moves
if all(move[1] == DEFECT for move in opponent_last_two_moves): # If the opponent defected in the last two rounds
return DEFECT
return COOPERATE
# Define the payoff matrix
payoff_matrix = {
(COOPERATE, COOPERATE): (3, 3),
(COOPERATE, DEFECT): (0, 5),
(DEFECT, COOPERATE): (5, 0),
(DEFECT, DEFECT): (1, 1)
}
# Define the players
players = [always_cooperate, always_defect, random_choice_defect, tit_for_tat, tit_for_two_tats, random_choice_cooperate, tat_for_tit, random_choice_neutral]
# Assign a unique color to each player
player_colors = {
'always_cooperate': Fore.GREEN,
'always_defect': Fore.RED,
'tit_for_tat': Fore.BLUE,
'random_choice_cooperate': Fore.MAGENTA,
'random_choice_defect': Fore.LIGHTRED_EX,
'tat_for_tit': Fore.LIGHTYELLOW_EX,
'random_choice_neutral': Fore.WHITE,
'tit_for_two_tats': Fore.LIGHTBLACK_EX,
}
def tournament(players, rounds=100):
total_scores = {player.__name__: 0 for player in players}
for i in range(len(players)):
for j in range(i+1, len(players)):
player1 = players[i]
player2 = players[j]
history1 = []
history2 = []
match_scores = {player1.__name__: 0, player2.__name__: 0}
# print(f"\n{player1.__name__} vs {player2.__name__}")
for round in range(rounds):
move1 = player1(history1)
move2 = player2(history2)
score1, score2 = payoff_matrix[(move1, move2)]
match_scores[player1.__name__] += score1
match_scores[player2.__name__] += score2
total_scores[player1.__name__] += score1
total_scores[player2.__name__] += score2
history1.append((move1, move2))
history2.append((move2, move1))
# print(f"{player1.__name__} moves: {''.join([Fore.GREEN+'O'+Style.RESET_ALL if move[0]==COOPERATE else Fore.RED+'X'+Style.RESET_ALL for move in history1])}")
# print(f"{player2.__name__} moves: {''.join([Fore.GREEN+'O'+Style.RESET_ALL if move[0]==COOPERATE else Fore.RED+'X'+Style.RESET_ALL for move in history2])}")
# print(f"Match scores: {player1.__name__} {match_scores[player1.__name__]}, {player2.__name__} {match_scores[player2.__name__]}")
sorted_scores = sorted(total_scores.items(), key=lambda item: item[1], reverse=True)
return sorted_scores
# Run the tournament
# for player, score in tournament(players):
# print(f'\nFinal score: {player}: {score}')
num_tournaments = 1000
results = {player.__name__: [] for player in players}
for _ in range(num_tournaments):
for player, score in tournament(players):
results[player].append(score)
# Calculate the median score for each player and store them in a list of tuples
medians = [(player, np.median(scores)) for player, scores in results.items()]
# Sort the list of tuples based on the median score
sorted_medians = sorted(medians, key=lambda x: x[1])
num_players = len(sorted_medians)
# Print the sorted median scores with gradient color
for i, (player, median_score) in enumerate(sorted_medians):
# Calculate the ratio of green and red based on the player's position
green_ratio = i / (num_players - 1)
red_ratio = 1 - green_ratio
# Calculate the green and red components of the color
green = int(green_ratio * 255)
red = int(red_ratio * 255)
# Create the color code
color_code = f'\033[38;2;{red};{green};0m'
player_color = player_colors.get(player, Fore.RESET)
# Print the player name and median score with the color
print(f'{player_color}{player}: {median_score} coins')
The code itself create the matching for 100 rounds. But it then iterate 1000 times to get the median score over many iterations.
Here is the ouput of the results
always_cooperate: 1347.024 coins
random_choice_cooperate: 1535.651 coins
tit_for_two_tats: 1561.442 coins
tit_for_tat: 1609.444 coins
tat_for_tit: 1619.43 coins
random_choice_neutral: 1663.855 coins
always_defect: 1711.764 coins
random_choice_defect: 1726.992 coins
In the latest Veritasium video the dilemma is presented with the reward matrix, but Tit for Tat is presented as the most efficient solution. I cannot replicate that result, and thus I'm opening this question.
I think the problem lies in the setup of your tournament. It is set up in such a way that always_defect
never has to play against always_defect
. So a player of any type never plays against a player of the same type.
It seems to be an advantage to be the only always_defect
in the group.
Modifying the lines
for i in range(len(players)):
for j in range(i+1, len(players)):
to
for i in range(len(players)):
for j in range(i, len(players)):
makes it so that an always_defect
also has to play against an always_defect
, which changes the picture.
However, I am not 100% sure that the accounting is done correctly for the case that a player type plays against a player of the same type.