Despite already numerous answers on this very topic, failing to see in the example below (extract from https://gist.github.com/lirnli/c16ef186c75588e705d9864fb816a13c on Variational Recurrent Networks) which input and output dimensions trigger the error.
Having tried to change dimensions in torch.cat
and also suppress the call to squeeze()
, the error persists:
<ipython-input-51-cdc928891ad7> in generate(self, hidden, temperature)
56 x_sample = x = x_out.div(temperature).exp().multinomial(1).squeeze()
57 x = self.phi_x(x)
---> 58 tc = torch.cat([x,z], dim=1)
59
60 hidden_next = self.rnn(tc,hidden)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Thus how to shape the dimensions in x
and z
in tc = torch.cat([x,z], dim=1)
?
Note the code as follows:
import torch
from torch import nn, optim
from torch.autograd import Variable
class VRNNCell(nn.Module):
def __init__(self):
super(VRNNCell,self).__init__()
self.phi_x = nn.Sequential(nn.Embedding(128,64), nn.Linear(64,64), nn.ELU())
self.encoder = nn.Linear(128,64*2) # output hyperparameters
self.phi_z = nn.Sequential(nn.Linear(64,64), nn.ELU())
self.decoder = nn.Linear(128,128) # logits
self.prior = nn.Linear(64,64*2) # output hyperparameters
self.rnn = nn.GRUCell(128,64)
def forward(self, x, hidden):
x = self.phi_x(x)
# 1. h => z
z_prior = self.prior(hidden)
# 2. x + h => z
z_infer = self.encoder(torch.cat([x,hidden], dim=1))
# sampling
z = Variable(torch.randn(x.size(0),64))*z_infer[:,64:].exp()+z_infer[:,:64]
z = self.phi_z(z)
# 3. h + z => x
x_out = self.decoder(torch.cat([hidden, z], dim=1))
# 4. x + z => h
hidden_next = self.rnn(torch.cat([x,z], dim=1),hidden)
return x_out, hidden_next, z_prior, z_infer
def calculate_loss(self, x, hidden):
x_out, hidden_next, z_prior, z_infer = self.forward(x, hidden)
# 1. logistic regression loss
loss1 = nn.functional.cross_entropy(x_out, x)
# 2. KL Divergence between Multivariate Gaussian
mu_infer, log_sigma_infer = z_infer[:,:64], z_infer[:,64:]
mu_prior, log_sigma_prior = z_prior[:,:64], z_prior[:,64:]
loss2 = (2*(log_sigma_infer-log_sigma_prior)).exp() \
+ ((mu_infer-mu_prior)/log_sigma_prior.exp())**2 \
- 2*(log_sigma_infer-log_sigma_prior) - 1
loss2 = 0.5*loss2.sum(dim=1).mean()
return loss1, loss2, hidden_next
def generate(self, hidden=None, temperature=None):
if hidden is None:
hidden=Variable(torch.zeros(1,64))
if temperature is None:
temperature = 0.8
# 1. h => z
z_prior = self.prior(hidden)
# sampling
z = Variable(torch.randn(z_prior.size(0),64))*z_prior[:,64:].exp()+z_prior[:,:64]
z = self.phi_z(z)
# 2. h + z => x
x_out = self.decoder(torch.cat([hidden, z], dim=1))
# sampling
x_sample = x = x_out.div(temperature).exp().multinomial(1).squeeze()
x = self.phi_x(x)
# 3. x + z => h
# hidden_next = self.rnn(torch.cat([x,z], dim=1),hidden)
tc = torch.cat([x,z], dim=1)
hidden_next = self.rnn(tc,hidden)
return x_sample, hidden_next
def generate_text(self, hidden=None,temperature=None, n=100):
res = []
hidden = None
for _ in range(n):
x_sample, hidden = self.generate(hidden,temperature)
res.append(chr(x_sample.data[0]))
return "".join(res)
# Test
net = VRNNCell()
x = Variable(torch.LongTensor([12,13,14]))
hidden = Variable(torch.rand(3,64))
output, hidden_next, z_infer, z_prior = net(x, hidden)
loss1, loss2, _ = net.calculate_loss(x, hidden)
loss1, loss2
hidden = Variable(torch.zeros(1,64))
net.generate_text()
The error
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
means that you're trying to access an index that doesn't exist in the tensor. For instance, the following code would cause the same IndexError
you're experiencing.
# sample input tensors
In [210]: x = torch.arange(4)
In [211]: z = torch.arange(6)
# trying to concatenate along the second dimension
# but the tensors have only one dimension (i.e., `0`).
In [212]: torch.cat([x, z], dim=1)
So, one way to overcome this is to promote the tensors to higher dimensions before concatenation, if that is what you need.
# promoting tensors to 2D before concatenation
In [216]: torch.cat([x[None, :], z[None, :]], dim=1)
Out[216]: tensor([[0, 1, 2, 3, 0, 1, 2, 3, 4, 5]])
Thus, in your case, you've to analyze and understand what shape you need for x
so that it can be concatenated with z
along dimension 1 and then the tc
passed as input to self.rnn()
along with hidden
.
As far as I can see, x[None, :]
, z[None, :]
should work.
The code you posted has been written for PyTorch v0.4.1
. A lot has changed in the PyTorch Python API since then, but the code was not updated.
Below are the changes you need to make the code run and train successfully. Copy the below functions and paste it at appropriate places in your code.
def generate(self, hidden=None, temperature=None):
if hidden is None:
hidden=Variable(torch.zeros(1,64))
if temperature is None:
temperature = 0.8
# 1. h => z
z_prior = self.prior(hidden)
# sampling
z = Variable(torch.randn(z_prior.size(0),64))*z_prior[:,64:].exp()+z_prior[:,:64]
z = self.phi_z(z)
# 2. h + z => x
x_out = self.decoder(torch.cat([hidden, z], dim=1))
# sampling
x_sample = x = x_out.div(temperature).exp().multinomial(1).squeeze()
x = self.phi_x(x)
# 3. x + z => h
x = x[None, ...] # changed here
xz = torch.cat([x,z], dim=1) # changed here
hidden_next = self.rnn(xz,hidden) # changed here
return x_sample, hidden_next
def generate_text(self, hidden=None,temperature=None, n=100):
res = []
hidden = None
for _ in range(n):
x_sample, hidden = self.generate(hidden,temperature)
res.append(chr(x_sample.data)) # changed here
return "".join(res)
for epoch in range(max_epoch):
batch = next(g)
loss_seq = 0
loss1_seq, loss2_seq = 0, 0
optimizer.zero_grad()
for x in batch:
loss1, loss2, hidden = net.calculate_loss(Variable(x),hidden)
loss1_seq += loss1.data # changed here
loss2_seq += loss2.data # changed here
loss_seq = loss_seq + loss1+loss2
loss_seq.backward()
optimizer.step()
hidden.detach_()
if epoch%100==0:
print('>> epoch {}, loss {:12.4f}, decoder loss {:12.4f}, latent loss {:12.4f}'.format(epoch, loss_seq.data, loss1_seq, loss2_seq)) # changed here
print(net.generate_text())
print()
Note: After these changes, the training loop at my end proceeds without any errors on PyTorch v1.7.1
. Have a look at the comments with # changed here
to understand the changes.