I am trying not to use model.predict() nor model.fit() in a for loop to speed up training so I'm trying to implement this solution to my case but I get an error. The model has three inputs.
This is my code:
n_possible_movements = 9
MINIBATCH_SIZE = 32
class DQNAgent(object):
def __init__(self):
self.epsilon = 1.0
self.epsilon_decay = 0.8
self.epsilon_min = 0.1
self.learning_rate = 10e-4
self.tau = 1e-3
# Main models
self.model_uav_pos = self._build_pos_model()
# Target networks
self.target_model_uav_pos = self._build_pos_model()
# Copy weights
self.target_model_uav_pos.set_weights(self.model_uav_pos.get_weights())
# An array with last n steps for training
self.replay_memory_pos_nn = deque(maxlen=REPLAY_MEMORY_SIZE)
def _build_pos_model(self): # compile the DNN
# create the DNN model
dnn = self.create_pos_dnn()
opt = Adam(learning_rate=self.learning_rate) #, decay=self.epsilon_decay)
dnn.compile(loss="mse", optimizer=opt)
return dnn
def create_pos_dnn(self):
# initialize the input shape
pos_input_shape = (2,)
requests_input_shape = (len(env.ues),)
number_of_satisfied_ues_input_shape = (1,)
# How many possible outputs we can have
output_nodes = n_possible_movements
# Initialize the inputs
uav_current_position = Input(shape=pos_input_shape, name='pos')
ues_requests = Input(shape=requests_input_shape, name='requests')
number_of_satisfied_ues = Input(shape=number_of_satisfied_ues_input_shape, name='number_of_satisfied_ues')
# Put them in a list
list_inputs = [uav_current_position, ues_requests, number_of_satisfied_ues]
# Merge all input features into a single large vector
x = layers.concatenate(list_inputs)
# Add a 1st Hidden (Dense) Layer
dense_layer_1 = Dense(512, activation="relu")(x)
# Add a 2nd Hidden (Dense) Layer
dense_layer_2 = Dense(512, activation="relu")(dense_layer_1)
# Add a 3rd Hidden (Dense) Layer
dense_layer_3 = Dense(256, activation="relu")(dense_layer_2)
# Output layer
output_layer = Dense(output_nodes, activation="linear")(dense_layer_3)
model = Model(inputs=list_inputs, outputs=output_layer)
# return the DNN
return model
def remember_pos_nn(self, state, action, reward, next_state, done):
self.replay_memory_pos_nn.append((state, action, reward, next_state, done)) # list of previous experiences, enabling re-training later
def act_upon_choosing_a_new_position(self, state): # state is a tuple (uav_position, requests_array, number_satisfaction)
if np.random.rand() <= self.epsilon: # if acting randomly, take random action
return random.randrange(n_possible_movements)
pos = np.array([state[0]])
reqs = np.array([state[1]])
number_satisfaction = np.array([state[2]])
act_values = self.model_uav_pos.predict([pos, reqs, number_satisfaction]) # if not acting randomly, predict reward value based on current state
return np.argmax(act_values[0])
def target_train(self):
weights = self.model_uav_pos.get_weights()
target_weights = self.target_model_uav_pos.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
self.target_model_uav_pos.set_weights(target_weights)
This is the training function before I introduced the changes suggested in the link
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
minibatch = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
# Enumerate our batches
for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
print('...Starting Training...')
target = 0
pos = np.array([current_state[0]])
reqs = np.array([current_state[1]])
number_satisfaction = np.array([current_state[2]])
pos_next = np.array([new_current_state[0]])
reqs_next = np.array([new_current_state[1]])
number_satisfaction_next = np.array([new_current_state[2]])
# If not a terminal state, get new q from future states, otherwise set it to 0
# almost like with Q Learning, but we use just part of equation here
if not done:
print("Predict Next State")
target = reward + DISCOUNT * np.amax(self.target_model_uav_pos.predict([pos_next, reqs_next, number_satisfaction_next]))
else:
target = reward
# Update Q value for given state
print("Predict State")
target_f = self.model_uav_pos.predict([pos, reqs, number_satisfaction])
target_f = np.array(target_f)
target_f[0][action] = target
self.model_uav_pos.fit([pos, reqs, number_satisfaction], \
target_f, \
verbose=2, \
shuffle=False, \
callbacks=None, \
epochs=1 \
)
end_time = time.time()
print("Time", end_time - start_time)
# Update target network counter every episode
self.target_train()
This is the training function after I introduced the changes:
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
# Draw a sample
samples = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
nstate = []
cstate = []
start_time_2 = time.time()
for n_state in new_state:
pos_next = np.array([n_state[0]])
reqs_next = np.array([n_state[1]])
number_satisfaction_next = np.array([n_state[2]])
nstate.append([pos_next,reqs_next,number_satisfaction_next])
for curr_state in state:
pos = np.array([curr_state[0]])
reqs = np.array([curr_state[1]])
number_satisfaction = np.array([curr_state[2]])
cstate.append([pos,reqs,number_satisfaction])
end_time_2 = time.time()
print("Time 2", end_time_2 - start_time_2)
#next_state = np.concatenate(new_state)
#next_state = np.concatenate(nstate)
#print("next_state", nstate[0], "len", len(nstate))#np.asarray(nstate).shape)# np.shape(nstate))
done = np.array(done)[:,None]
#state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model_uav_pos.predict(nstate)#np.vstack(nstate))
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)
# Fit the model
self.model.fit(cstate, targets, epochs=1, verbose=2)
end_time = time.time()
print("Time", end_time - start_time)
self.target_train()
This line q_future = self.target_model_uav_pos.predict(nstate)
throws an error ValueError: Layer "model_69" expects 3 input(s), but it received 96 input tensors
(3 inputs for each of the 32 examples in nstate & same error when I use predict_on_batch())
I don't know how to do it correctly. Any help would be appreciated.
Edit
Using the answer below and the code in here I wrote the following code:
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
samples = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
state = []
new_state = []
action, reward, done = [], [], []
# do this before prediction
# for speedup, this could be done on the tensor level
# but easier to understand using a loop
for i in range(MINIBATCH_SIZE):
state.append(samples[i][0])
action.append(samples[i][1])
reward.append(samples[i][2])
new_state.append(samples[i][3])
done.append(samples[i][4])
nstate = OrderedDict(pos=[], reqs=[], num_satisfaction=[])
cstate = OrderedDict(pos=[], reqs=[], num_satisfaction=[])
for n_state in new_state:
pos_next = np.array([n_state[0]])
reqs_next = np.array([n_state[1]])
number_satisfaction_next = np.array([n_state[2]])
# add elements
for k, v in zip(nstate.keys(), [pos_next, reqs_next, number_satisfaction_next]):
nstate[k].append(v)
for curr_state in state:
pos = np.array([curr_state[0]])
reqs = np.array([curr_state[1]])
number_satisfaction = np.array([curr_state[2]])
# add elements
for k, v in zip(cstate.keys(), [pos, reqs, number_satisfaction]):
cstate[k].append(v)
# now concat each list of values in nstate (and cstate),
# to get a list with 3 arrays each of MINIBATCH size
nstate = [np.concatenate(v, axis=0) for v in nstate.values()]
cstate = [np.concatenate(v, axis=0) for v in cstate.values()]
# do batch prediction to save speed
# predict Q-values for starting state using the main network
target = self.model_uav_pos.predict(cstate)
#q_target = target.copy()
target_ = np.array(target)
# predict Q-values for ending state using the target network
target_val = self.target_model_uav_pos.predict(nstate)
target_val_ = np.array(target_val)
for i in range(len(samples)):
# correction on the Q value for the action used
if done[i]:
target_[i][action[i]] = reward[i]
else:
# Standard - DQN
# DQN chooses the max Q value among next actions
# selection and evaluation of action is on the target Q Network
# Q_max = max_a' Q_target(s', a')
#print("Target Val", target_val)
target_[i][action[i]] = reward[i] + DISCOUNT * (np.amax(target_val_[i]))
_ = self.model_uav_pos.fit(cstate, target_, verbose=2)
end_time = time.time()
print("Time", end_time - start_time)
self.target_train()
I guess the issue is when you append the 3 inputs to nstate
and cstate
in train_pos_nn() because that, as the error suggests, yields 96 tensors meaning your nstate
is a list of lists (likewise cstate
) instead of a list of 3 numpy arrays, each of size MINIBATCH_SIZE
.
Try this:
from collections import OrderedDict
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
# Draw a sample
samples = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
nstate = OrderedDict(pos=[], reqs=[], num_satisfaction=[])
cstate = OrderedDict(pos=[], reqs=[], num_satisfaction=[])
start_time_2 = time.time()
for n_state in new_state:
pos_next = np.array([n_state[0]])
reqs_next = np.array([n_state[1]])
number_satisfaction_next = np.array([n_state[2]])
# nstate.append([pos_next,reqs_next,number_satisfaction_next])
# add elements
for k, v in zip(nstate.keys(), [pos_next, reqs_next, number_satisfaction_next]):
nstate[k].append(v)
for curr_state in state:
pos = np.array([curr_state[0]])
reqs = np.array([curr_state[1]])
number_satisfaction = np.array([curr_state[2]])
# cstate.append([pos,reqs,number_satisfaction])
# add elements
for k, v in zip(cstate.keys(), [pos, reqs, number_satisfaction]):
cstate[k].append(v)
# now concat each list of values in nstate (and cstate),
# to get a list with 3 arrays each of MINIBATCH size
nstate = [np.concatenate(v, axis=0) for v in nstate.values()]
cstate = [np.concatenate(v, axis=0) for v in cstate.values()]
end_time_2 = time.time()
print("Time 2", end_time_2 - start_time_2)
#print("next_state", nstate[0], "len", len(nstate))#np.asarray(nstate).shape)# np.shape(nstate))
done = np.array(done)[:,None]
reward = np.array(reward)[:,None]
q_future = self.target_model_uav_pos.predict(nstate)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)
# Fit the model
self.model.fit(cstate, targets, epochs=1, verbose=2)
end_time = time.time()
print("Time", end_time - start_time)
self.target_train()
I haven't tested it, so it may not run at the first try: if so check the shape of each element of nstate
and cstate
, adjust, then try again.
Hope it helps.