I am starting to learn about DQN
, and I am trying to solve the FrozenLake-v0
problem from scratch by my self and using Pytorch
so I will put the whole code since it's connected.
class LinearDeepQNetwork(nn.Module):
def __init__(self,lr,n_action,input_dim):
super(LinearDeepQNetwork,self).__init__()
self.f1=nn.Linear(input_dim,128)
self.f2=nn.Linear(128,n_action)
self.optimizer=optim.Adam(self.parameters(),lr=lr)
self.loss=nn.MSELoss()
self.device=T.device('cuda' if T.cuda.is_available() else 'cpu')
self.to(self.device)
def forward(self,state):
layer1=F.relu(self.f1(state))
actions=self.f2(layer1)
return actions
the second class is the agent, and the problem is in the learning function
nd class is the agent, and the problem is in the learning function
class Agent():
def __init__(self,input_dim,n_action,lr,gamma=0.99,
epslion=1.0,eps_dec=1e-5,eps_min=0.01):
self.input_dim=input_dim
self.n_action=n_action
self.lr=lr
self.gamma=gamma
self.epslion=epslion
self.eps_dec=eps_dec
self.eps_min=eps_min
self.action_space=[i for i in range(self.n_action)]
self.Q=LinearDeepQNetwork(self.lr,self.n_action,self.input_dim)
def choose_action(self,observation):
if np.random.random()>self.epslion:
#conveate the state into tensor
state=T.tensor(observation).to(self.Q.device)
actions=self.Q.forward(state)
action=T.argmax(actions).item()
else:
action=np.random.choice(self.action_space)
return action
def decrement_epsilon(self):
self.epslion=self.epslion-self.eps_dec \
if self.epslion > self.eps_min else self.eps_min
def OH(self,x,l):
x = T.LongTensor([[x]])
one_hot = T.FloatTensor(1,l)
return one_hot.zero_().scatter_(1,x,1)
def learn(self,state,action,reward,state_):
self.Q.optimizer.zero_grad()
states=Variable(self.OH(state,16)).to(self.Q.device)
actions=T.tensor(action).to(self.Q.device)
rewards=T.tensor(reward).to(self.Q.device)
state_s=Variable(self.OH(state_,16)).to(self.Q.device)
q_pred=self.Q.forward(states)[actions]
q_next=self.Q.forward(state_s).max()
q_target=reward+self.gamma*q_next
loss=self.Q.loss(q_target,q_pred).to(self.Q.device)
loss.backward()
self.Q.optimizer.step()
self.decrement_epsilon()
now the problem when I run the following code it gives me an error in the learning phase, and it gives me this error index 1 is out of bounds for dimension 0 with size 1.
env=gym.make('FrozenLake-v0')
n_games=5000
scores=[]
eps_history=[]
agent=Agent(env.observation_space.n,env.action_space.n,0.0001)
for i in tqdm(range(n_games)):
score=0
done=False
obs=env.reset()
while not done:
action=agent.choose_action(obs)
obs_,reward,done,_=env.step(action)
score+=reward
agent.learn(obs,action,reward,obs_)
obs=obs_
scores.append(score)
eps_history.append(agent.epslion)
if i % 100 ==0:
avg_score=np.mean(scores[-100:])
print(f'score={score} avg_score={avg_score} epsilon={agent.epslion} i={i}')
I think the problem is in the shape of the values between the NN and the agent class, but I can't figure out the problem.
Error traceback:
IndexError Traceback (most recent call last)
<ipython-input-10-2e279f658721> in <module>()
17 score+=reward
18
---> 19 agent.learn(obs,action,reward,obs_)
20 obs=obs_
21 scores.append(score)
<ipython-input-8-5359b19ec4fa> in learn(self, state, action, reward, state_)
39 state_s=Variable(self.OH(state_,16)).to(self.Q.device)
40
---> 41 q_pred=self.Q.forward(states)[actions]
42
43 q_next=self.Q.forward(state_s).max()
IndexError: index 1 is out of bounds for dimension 0 with size 1
since you are calling a tensor that contains a matrix, you need to specify which indices your calling in your case just adding [0]
to the forward statement will solve the problem and in the [actions]
, replace it with [actions.item()]
self.Q.forward(states)[0][actions.item()]