I am trying to write a simple python program that implements Q-Learning on the OpenAI Gym Environment Frozen Lake. I found the program code on data camp website you will find the code and link below:
Link: Q_Learning_Code
import numpy as np
import gym
import random
from tqdm import trange
env = gym.make("FrozenLake-v1", render_mode="rgb_array")
env.reset()
env.render()
print("Observation Space", env.observation_space)
print("Sample Observation", env.observation_space.sample())
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())
state_space = env.observation_space.n
print("There are ", state_space, " possible states")
action_space = env.action_space.n
print("There are ", action_space, " possible actions")
def initialize_q_table(state_space, action_space):
Qtable = np.zeros((state_space, action_space))
return Qtable
Qtable_frozenlake = initialize_q_table(state_space, action_space)
def epsilon_greedy_policy(Qtable, state, epsilon):
random_init = random.uniform(0, 1)
if(random_init > epsilon):
action = np.argmax(Qtable[state])
else:
action = env.action_space.sample()
return action
def greedy_policy(Qtable, state):
action = np.argmax(Qtable[state])
return action
n_training_episodes = 10000
learning_rate = 0.7
n_eval_episodes = 100
env_id = "FrozenLake-v1"
max_steps = 99
gamma = 0.95
eval_seed = []
max_epsilon = 1.0
min_epsilon = 0.05
decay_rate = 0.0005
def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable):
for episode in trange(n_training_episodes):
epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
state = env.reset()
step = 0
done = False
for step in range(max_steps):
action = epsilon_greedy_policy(Qtable, state, epsilon)
new_state, reward, done, trunc, info = env.step(action)
Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])
if(done):
break
state = new_state
return Qtable
Qtable_frozenlake = train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable_frozenlake)
When I run the program I get the following error:
Traceback (most recent call last):
File "/tmp/ipykernel_15859/3962363982.py", line 80, in <module>
Qtable_frozenlake = train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable_frozenlake)
File "/tmp/ipykernel_15859/3962363982.py", line 71, in train
Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
What does the error mean? How can I fix this error?
env.reset()
usually returns a tuple of state
and info
. This is the case here as well:
>> env.reset()
(0, {'prob': 1})
A tuple is then not recognized by numpy
as a way of indexing, and thus will raise an Exception. What you want to be willing to do is either:
state, info = env.reset()
# or
state, _ = env.reset()
# or
state = env.reset()[0]
Another point I would like to add, is that you will also need to check for truncation of the environment. Instead of checking if done
, you will want to check for if done or trunc
in order to reset the environment. This may not be relevant in this scenario, but is good practise for other environments.