pythontensorflowkerasopenai-gymdqn

How to take two arrays as output from Gym.Env to fit to DQN NN


Can't figure out how to make the gym.Env put out two separate arrays. It just seems to combine them into 1 array containing 2 arrays. But fitting to DQN NN expects two arrays. I'm hoping to put the two arrays into the NN separately.

I've tried to show as much code as i can, but there's a lot.

I've tried playing around with the observation space a bit, tried box and Tuple, can't seem to figure out where i'm going wrong.

class GoEnv(gym.Env):

    def __init__(self):
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Tuple([spaces.Box(low=-np.inf, high=np.inf, shape=(2, 11), dtype=np.float32),
                                               spaces.Box(low=-np.inf, high=np.inf, shape=(1, 11), dtype=np.float32)])

    def step(self, action):
        state = [np.array(self.data), np.array(self.account)]
        return state, reward, self.done, info

envi = env.GoEnv()

def data_model():
    data_input = layers.Input(shape=(500, 2, 11))
    acc_input = layers.Input(shape=(500, 1, 11))

    dat_model = layers.Conv2D(filters=32, activation='swish', kernel_size=(500, 1),
                              padding='valid', strides=(500, 1))(data_input)
    dat_model = layers.Dense(3, activation='swish')(dat_model)
    dat_model = layers.Dense(3, activation='softmax')(dat_model)
    dat_model = layers.Flatten()(dat_model)
    dat_model = keras.Model(inputs=data_input, outputs=dat_model)

    acc_model = layers.Dense(3, activation='swish')(acc_input)
    acc_model = layers.Dense(3, activation='softmax')(acc_model)
    acc_model = layers.Flatten()(acc_model)
    acc_model = keras.Model(inputs=acc_input, outputs=acc_model)

    combined = layers.concatenate([dat_model.output, acc_model.output])

    z = layers.Flatten()(combined)
    z = layers.Dense(64, activation='swish')(z)
    z = layers.Dense(3, activation='softmax')(z)

    model = keras.Model(inputs=[dat_model.input, acc_model.input], outputs=z)

    return model

model = data_model()
model.summary()
actions = 3

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=500)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions=actions,
                   nb_steps_warmup=600,
                   target_model_update=1e-2)
    return dqn
dqn = build_agent(model, actions)
dqn.fit(envi, nb_steps=6000, visualize=False, verbose=1)
Traceback (most recent call last):
  File "C:/Users/Worrall/PycharmProjects/Prject/main.py", line 46, in <module>
    dqn.fit(envi, nb_steps=6000, visualize=False, verbose=1)
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\rl\core.py", line 168, in fit
    action = self.forward(observation)
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\rl\agents\dqn.py", line 224, in forward
    q_values = self.compute_q_values(state)
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\rl\agents\dqn.py", line 68, in compute_q_values
    q_values = self.compute_batch_q_values([state]).flatten()
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\rl\agents\dqn.py", line 63, in compute_batch_q_values
    q_values = self.model.predict_on_batch(batch)
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 1200, in predict_on_batch
    inputs, _, _ = self._standardize_user_data(
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2328, in _standardize_user_data
    return self._standardize_tensors(
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2356, in _standardize_tensors
    x = training_utils.standardize_input_data(
  File "C:\Users\Worrall\PycharmProjects\DocumentRecog\venv\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 533, in standardize_input_data
    raise ValueError('Error when checking model ' + exception_prefix +
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), for inputs ['input_1', 'input_2'] but instead got the following list of 1 arrays: [array([[[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
         array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])],
        [array([[...

Solution

  • Okay, so I found a work around. I hope this is helpful to anyone having a similar problem, as there doesn't seem to be a lot of info regarding the initial issue.

    I input the data as a single array.

    data_input = Input(shape=(500, 3, 11))
    

    and then i am able to split the input in the model itself using tf.splt()

    min1, min15, acc = tf.split(data_input, num_or_size_splits=3, axis=2)
    

    and then run them through different networks before combining them after with concatenate

        z = concatenate([acc, combined])
        z = Flatten()(z)
        z = Dense(512, activation='swish'(z)
        z = Dense(512, activation='swish'(z)
        z = Dense(3, activation='softmax')(z)
        model = keras.Model(inputs=data_input, outputs=z)
        return model