import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os
env = gym.make('CartPole-v0')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
batch_size = 32
n_episodes = 1000
output_dir = 'model_output/cartpole'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.9
self.epsilon = 1.0
self.epsilon_decay = 0.995
self.epsilon_min = 0.05
self._learning_rate = 0.01
self.model = self._build_model()
def _build_model(self):
model = Sequential()
model.add(Dense(24, input_dim = self.state_size, activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(50,activation='relu'))
model.add(Dense(self.action_size, activation='sigmoid'))
model.compile(loss='mse', optimizer=Adam(lr=self._learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((self, state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
print(len(minibatch))
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma*np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verboss=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self,name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
agent = DQNAgent(state_size, action_size)
done = False
for e in range(n_episodes):
state = env.reset()
state = np.reshape(state, [1, state_size])
if agent.epsilon > agent.epsilon_min:
agent.epsilon *= agent.epsilon_decay
for time in range(5000):
# env.render()
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
reward = reward if not done else -10
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done:
print("episode: {}/{}, score: {}, e: {:.2}".format(e, n_episodes, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
if e % 50 == 0:
agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")
I am creating an algorithm for the cartpole environment in openai gym, but I am getting this error:
Traceback (most recent call last): File "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py", line 145, in agent.replay(batch_size) File "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py", line 93, in replay for state, action, reward, next_state, done in minibatch: ValueError: too many values to unpack (expected 5)
I am following this tutorial: https://www.youtube.com/watch?v=OYhFoMySoVs&t=2444s
Thanks
Arda
You just added an extra self. This should fix it. The error is pretty self explanatory if you think about it.
too many values to unpack (expected 5)
In the line you can see that you have 6. A verification of the code in the youtube shows the same thing. But these are easy to miss when you are starting out. Good Luck and I would encourage you to take a moment to take a breath and look it over again next time slowly. Maybe you can solve it for yourself.
self.memory.append((state, action, reward, next_state, done))