I am training a Graph convolutional neural network. A classifier is connected to the end of this network and when I try to train the whole model as a single (multiclass), I can achieve great success of 91% (F1 weighted score).
But when I try to train the same model as multilabel, both the training time of the model is much longer and the model fails.
Classifier code is as follows, it works separately according to single or multi.
class Classifier(nn.Module):
def __init__(self, input_dim, hidden_size, tag_size, args, pred_type='SINGLE'):
super(Classifier, self).__init__()
self.emotion_att = MaskedEmotionAtt(input_dim)
self.lin1 = nn.Linear(input_dim, hidden_size)
self.drop = nn.Dropout(args.drop_rate)
self.lin2 = nn.Linear(hidden_size, tag_size)
self.pred_type = pred_type
if args.class_weight:
self.loss_weights = (torch.rand(tag_size) * 9 + 1).to(args.device)
if self.pred_type == 'SINGLE':
self.loss_func = nn.NLLLoss(self.loss_weights)
elif self.pred_type == 'MULTI':
self.loss_func = nn.BCELoss(self.loss_weights)
else:
if self.pred_type == 'SINGLE':
self.loss_func = nn.NLLLoss()
elif self.pred_type == 'MULTI':
self.loss_func = nn.BCELoss()
def get_prob(self, h, text_len_tensor):
# h_hat = self.emotion_att(h, text_len_tensor)
hidden = self.drop(F.relu(self.lin1(h)))
scores = self.lin2(hidden)
if self.pred_type == 'SINGLE':
log_prob = F.log_softmax(scores, dim=-1)
elif self.pred_type == 'MULTI':
log_prob = F.sigmoid(scores)
else:
return scores
return log_prob
def forward(self, h, text_len_tensor):
log_prob = self.get_prob(h, text_len_tensor)
if self.pred_type == 'SINGLE':
y_hat = torch.argmax(log_prob, dim=-1)
elif self.pred_type == 'MULTI':
y_hat = []
for pred in log_prob:
y_hat.append([1 if p > 0.5 else 0 for p in pred.flatten().tolist()])
y_hat = torch.tensor(y_hat)
try:
y_hat
except NameError:
log.error('Prediction type should be one of these [\'SINGLE\', \'MULTI\']')
return y_hat
def get_loss(self, h, label_tensor, text_len_tensor):
log_prob = self.get_prob(h, text_len_tensor)
if self.pred_type == 'SINGLE':
loss = self.loss_func(log_prob, label_tensor)
elif self.pred_type == 'MULTI':
loss = self.loss_func(log_prob, label_tensor.float())
return loss
Multilabel and Single datasets can create differences when evaluating a no-label class in the data.
In my example, only 20% of the contents had labels. I was putting a representative 0-label for those without a label. While these 0 values were not included in multilabel training, single was also included. For this reason, the single dataset quickly reached a success of 85% by predicting 0s more.
In addition, reducing the model complexity (number of layers, number of neurons) also increases the number of training epochs, but faster fit. and resulted in less overfit.