[SOLVED] index 1 is out of bounds for dimension 0 with size 1

index 1 is out of bounds for dimension 0 with size 1

I am starting to learn about DQN, and I am trying to solve the FrozenLake-v0 problem from scratch by my self and using Pytorch so I will put the whole code since it's connected.

class LinearDeepQNetwork(nn.Module):
  def __init__(self,lr,n_action,input_dim):
    super(LinearDeepQNetwork,self).__init__()
    self.f1=nn.Linear(input_dim,128)
    self.f2=nn.Linear(128,n_action)
    self.optimizer=optim.Adam(self.parameters(),lr=lr)
    self.loss=nn.MSELoss()
    self.device=T.device('cuda' if T.cuda.is_available() else 'cpu')
    self.to(self.device)

  def forward(self,state):
    layer1=F.relu(self.f1(state))
    actions=self.f2(layer1)

    return actions

the second class is the agent, and the problem is in the learning function

nd class is the agent, and the problem is in the learning function

class Agent():
  def __init__(self,input_dim,n_action,lr,gamma=0.99,
               epslion=1.0,eps_dec=1e-5,eps_min=0.01):
    self.input_dim=input_dim
    self.n_action=n_action
    self.lr=lr
    self.gamma=gamma
    self.epslion=epslion
    self.eps_dec=eps_dec
    self.eps_min=eps_min
    self.action_space=[i for i in range(self.n_action)]

    self.Q=LinearDeepQNetwork(self.lr,self.n_action,self.input_dim)
  
  def choose_action(self,observation):
    if np.random.random()>self.epslion:
      #conveate the state into tensor
      state=T.tensor(observation).to(self.Q.device)
      actions=self.Q.forward(state)
      action=T.argmax(actions).item()
    else:
      action=np.random.choice(self.action_space)

    return action
  
  def decrement_epsilon(self):
    self.epslion=self.epslion-self.eps_dec \
                  if self.epslion > self.eps_min else self.eps_min
                  
  def OH(self,x,l):
    x = T.LongTensor([[x]])
    one_hot = T.FloatTensor(1,l)
    return one_hot.zero_().scatter_(1,x,1)

  def learn(self,state,action,reward,state_):
    self.Q.optimizer.zero_grad()
    states=Variable(self.OH(state,16)).to(self.Q.device)
    actions=T.tensor(action).to(self.Q.device)
    rewards=T.tensor(reward).to(self.Q.device)
    state_s=Variable(self.OH(state_,16)).to(self.Q.device)

    q_pred=self.Q.forward(states)[actions]
    
    q_next=self.Q.forward(state_s).max()

    q_target=reward+self.gamma*q_next
    loss=self.Q.loss(q_target,q_pred).to(self.Q.device)
    loss.backward()
    self.Q.optimizer.step()
    self.decrement_epsilon()

now the problem when I run the following code it gives me an error in the learning phase, and it gives me this error index 1 is out of bounds for dimension 0 with size 1.

env=gym.make('FrozenLake-v0')    
n_games=5000
scores=[]
eps_history=[]


agent=Agent(env.observation_space.n,env.action_space.n,0.0001)

for i in tqdm(range(n_games)):
  score=0
  done=False
  obs=env.reset()

  while not done:
    action=agent.choose_action(obs)
    obs_,reward,done,_=env.step(action)
    score+=reward
    
    agent.learn(obs,action,reward,obs_)
    obs=obs_
  scores.append(score)
  eps_history.append(agent.epslion)
  if i % 100 ==0:
    avg_score=np.mean(scores[-100:])
    print(f'score={score}  avg_score={avg_score} epsilon={agent.epslion} i={i}')

I think the problem is in the shape of the values between the NN and the agent class, but I can't figure out the problem.

Error traceback:

IndexError                                Traceback (most recent call last)
<ipython-input-10-2e279f658721> in <module>()
     17     score+=reward
     18 
---> 19     agent.learn(obs,action,reward,obs_)
     20     obs=obs_
     21   scores.append(score)

<ipython-input-8-5359b19ec4fa> in learn(self, state, action, reward, state_)
     39     state_s=Variable(self.OH(state_,16)).to(self.Q.device)
     40 
---> 41     q_pred=self.Q.forward(states)[actions]
     42 
     43     q_next=self.Q.forward(state_s).max()

IndexError: index 1 is out of bounds for dimension 0 with size 1

Solution

since you are calling a tensor that contains a matrix, you need to specify which indices your calling in your case just adding [0] to the forward statement will solve the problem and in the [actions], replace it with [actions.item()]

self.Q.forward(states)[0][actions.item()]