I'm trying to make the training model for an LSTM and GRU. The LSTM is working perfectly, but once I switched into GRU training, errors pop-out such as size mismatch error.
This is my code
path = "new_z_axis"
device = "cuda:0"
in_size = 3
h_size = 50
n_layers = 3
fc = 20
out = 1
batch_size = 16
seq = 100
epoch = 100
learning_rate = 1e-3
ratio = 0.8
checkpoint = os.path.join("checkpoints","model_"+path+"_"+str(in_size)+".pth")
class GRUNet(nn.Module):
def __init__(self,in_size,h_size,n_layers,fc_out,out_size,dropout=0.5):
super(GRUNet, self).__init__()
self.gru = nn.GRU(input_size=in_size,hidden_size=h_size,num_layers=n_layers,dropout=dropout,bias=False)
self.fc = nn.Linear(in_features=h_size,out_features=fc_out,bias=False)
self.relu = nn.ReLU(inplace=True)
self.out = nn.Linear(in_features=fc_out,out_features=out_size,bias=False)
self.tanh = nn.Tanh()
def forward(self, x, hidden):
out, hidden = self.gru(x, hidden)
x = self.fc(x)
x = self.relu(x)
x = self.out(x)
x = self.tanh(x)
return x, hidden
class MyLstm(nn.Module):
def __init__(self,in_size,h_size,n_layers,fc_out,out_size,dropout=0.5):
super(MyLstm, self).__init__()
self.lstm = nn.LSTM(input_size=in_size,hidden_size=h_size,num_layers=n_layers,dropout=dropout,bias=False)
self.fc = nn.Linear(in_features=h_size,out_features=fc_out,bias=False)
self.relu = nn.ReLU(inplace=True)
self.out = nn.Linear(in_features=fc_out,out_features=out_size,bias=False)
self.tanh = nn.Tanh()
def forward(self,x,hidden):
x, hidden = self.lstm(x,hidden)
# x = x[-1:]
x = self.fc(x)
x = self.relu(x)
x = self.out(x)
x = self.tanh(x)
return x, hidden
def train(model,train_list,val_list,path,seq,epoch,batch_size,criterion,optimizer,model_type):
for e in range(epoch):
train_data = load_data(train_list,batch_size)
a_loss = 0
a_size = 0
model.train()
for x,y in train_data:
x,y = x.to(device),y.to(device)
bs = x.size()[1]
# hidden = (hidden[0].detach(),hidden[1].detach())
# print(x.size(),hidden[0].size())
if model_type == "GRU":
h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
hidden = h1
hidden = hidden.data
else:
h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
hidden = (h1,h2)
hidden = tuple([e.data for e in hidden])
model.zero_grad()
print (len(hidden))
pred,hidden = model(x,hidden)
loss = criterion(pred,y)
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(),5)
optimizer.step()
a_loss += loss.detach()
a_size += bs
# print(e,a_loss/a_size*1e+6)
model.eval()
with torch.no_grad():
val_data = load_data(val_list,batch_size)
b_loss = 0
b_size = 0
for x,y in val_data:
x,y = x.to(device),y.to(device)
bs = x.size()[1]
if model_type == "GRU":
h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
hidden = h1
hidden = hidden.data
else:
h1 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
h2 = torch.zeros((n_layers,bs,h_size)).to("cuda:0")
hidden = (h1,h2)
hidden = tuple([e.data for e in hidden])
pred,hidden = model(x,hidden)
loss = criterion(pred,y)
b_loss += loss.detach()
b_size += bs
print("epoch: {} - train_loss: {} - val_loss: {}".format(e+1,float(a_loss.item()/a_size*1e+6),b_loss.item()/b_size*1e+6))
train(modelGRU,train_list,val_list,path,seq,epoch,batch_size,criterionGRU,optimizerGRU,model_type="GRU")
This is the error i got
RuntimeError
Traceback (most recent call last) <ipython-input-9-a382a9688da2> in <module>
---- > 1 train ( modelGRU , train_list , val_list , path , seq , epoch , batch_size , criterionGRU , optimizerGRU , model_type = "GRU" )
<ipython-input-6-4565cf358824> in train (model, train_list, val_list, path, seq, epoch, batch_size, criterion, optimizer, model_type)
61 model . zero_grad ( )
62 print ( len ( hidden ) )
---> 63 pred , hidden = model ( x , hidden )
64 loss = criterion ( pred , y )
65 loss .backward ( )
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__ (self, *input, **kwargs)
539 result = self . _slow_forward ( * input , ** kwargs )
540 else :
--> 541 result = self . forward ( * input , ** kwargs )
542 for hook in self . _forward_hooks . values ( ) :
543 hook_result = hook ( self , input , result )
<ipython-input-5-4ecae472cc96> in forward (self, x, hidden)
11 def forward ( self , x , hidden ) :
12 out , hidden = self . gru ( x , hidden )
---> 13 x = self . fc ( x )
14 x = self . relu ( x )
15 x =self . out ( x )
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__ (self, *input, **kwargs)
539 result = self . _slow_forward ( * input , ** kwargs )
540 else :
--> 541 result = self . forward ( * input , ** kwargs )
542 for hook in self . _forward_hooks . values ( ) :
543 hook_result = hook ( self , input , result )
~ \ Anaconda3 \ lib \ site-packages \ torch \ nn \ modules \ linear.py in forward (self, input)
85
86 def forward ( self , input ) :
---> 87 return F . Linear ( the Input , Self . Weight , Self . negative bias ) 88 89 def extra_repr ( Self ) :
~\Anaconda3\lib\site-packages\torch\nn\functional.py in linear (input, weight, bias) 1370 ret = torch . addmm ( bias , input , weight . t ( ) ) 1371 else :
-> 1372 output = input . matmul ( weight . t ( ) ) 1373 if bias is not None : 1374 output += bias
RuntimeError : size mismatch, m1: [1600 x 3], m2: [50 x 20] at C:/w/1/s/tmp_conda_3.7_104508/conda/conda-bld/pytorch_1572950778684/work/aten/src\THC/ generic/THCTensorMathBlas.cu:290
Any advice?
This might have to do with the fact that you are not passing the output of your nn.GRU
to the first linear layer in GRUNet
's forward
function:
def forward(self, x, hidden):
out, hidden = self.gru(x, hidden)
x = self.fc(out)