pythonpython-3.xopenai-gym

Getting error: ValueError: too many values to unpack (expected 5)


import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os


env = gym.make('CartPole-v0')
state_size = env.observation_space.shape[0]

action_size = env.action_space.n


batch_size = 32

n_episodes = 1000

output_dir = 'model_output/cartpole'

if not os.path.exists(output_dir):
     os.makedirs(output_dir)


class DQNAgent:
     def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        self.memory = deque(maxlen=2000)

        self.gamma = 0.9
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.05

        self._learning_rate = 0.01

        self.model = self._build_model()

     def _build_model(self):

         model = Sequential()

         model.add(Dense(24, input_dim = self.state_size, activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(24,activation='relu'))
         model.add(Dense(50,activation='relu'))

         model.add(Dense(self.action_size, activation='sigmoid'))
         model.compile(loss='mse', optimizer=Adam(lr=self._learning_rate))

         return model


     def remember(self, state, action, reward, next_state, done):
        self.memory.append((self, state, action, reward, next_state, done))

     def act(self, state):
        if np.random.rand() <= self.epsilon:
           return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

     def replay(self, batch_size):

         minibatch = random.sample(self.memory, batch_size)
         print(len(minibatch))
         for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma*np.amax(self.model.predict(next_state)[0]))

            target_f = self.model.predict(state)
            target_f[0][action] = target

            self.model.fit(state, target_f, epochs=1, verboss=0)

            if self.epsilon > self.epsilon_min:
               self.epsilon *= self.epsilon_decay

     def load(self,name):
         self.model.load_weights(name)

     def save(self, name):
          self.model.save_weights(name)



agent = DQNAgent(state_size, action_size)

done = False

for e in range(n_episodes):
     state = env.reset()
     state = np.reshape(state, [1, state_size])
     if agent.epsilon > agent.epsilon_min:
        agent.epsilon *= agent.epsilon_decay

     for time in range(5000):

         # env.render()
          action = agent.act(state)

          next_state, reward, done,  _ = env.step(action)

          reward = reward if not done else -10

          next_state = np.reshape(next_state, [1, state_size])

          agent.remember(state, action, reward, next_state, done)

          state = next_state

          if done:
             print("episode: {}/{}, score: {}, e: {:.2}".format(e, n_episodes, time, agent.epsilon))
             break

     if len(agent.memory) > batch_size:

        agent.replay(batch_size)

if e % 50 == 0:
    agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")          

I am creating an algorithm for the cartpole environment in openai gym, but I am getting this error:

Traceback (most recent call last): File "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py", line 145, in agent.replay(batch_size) File "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py", line 93, in replay for state, action, reward, next_state, done in minibatch: ValueError: too many values to unpack (expected 5)

I am following this tutorial: https://www.youtube.com/watch?v=OYhFoMySoVs&t=2444s

Thanks

Arda


Solution

  • You just added an extra self. This should fix it. The error is pretty self explanatory if you think about it.

    too many values to unpack (expected 5)

    In the line you can see that you have 6. A verification of the code in the youtube shows the same thing. But these are easy to miss when you are starting out. Good Luck and I would encourage you to take a moment to take a breath and look it over again next time slowly. Maybe you can solve it for yourself.

     self.memory.append((state, action, reward, next_state, done))