pytorchtime-serieslstmearly-stopping

Validation loss and early stopping in an LSTM Model for Multivariate time series forecasting in PyTorch


This is my first attempt to train an LSTM model to predict oil prices, following some tutorials. My dataset:

Date USD Index Oil Price
12-10-2019 50 66
13-10-2019 51 60

where the Oil price is the target column. Sequences size= 7 and output = 1. I couldn't add validation data and print the validation loss in addition to the train and test loss. This is my code and attempts:

  #split to train, valid and test (the dataset size is 2380. so 150 for test, and 100 for valid and the remaining is for train)
  X_train = X_seq[:-150]
  y_train = y_seq[:-150]
  X_test = X_seq[-150:]
  y_test = y_seq[-150:] 
  X_val = X_train[-100:]
  y_val = y_train [-100:]
  X_train= X_train [:-100]
  y_train = y_train[:-100]

the LSTM model

class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super().__init__()
        self.num_classes = num_classes # output size
        self.num_layers = num_layers # number of recurrent layers in the lstm
        self.input_size = input_size # input size
        self.hidden_size = hidden_size # neurons in each lstm layer
        # LSTM model
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 
                            num_layers=num_layers,  batch_first=True, dropout=0.2) # lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) # fully connected 
        self.fc_2 = nn.Linear(128, num_classes) # fully connected last layer
        self.relu = nn.ReLU()
        
    def forward(self,x):
        # hidden state
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        # cell state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        # propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) # (input, hidden, and internal state)
        hn = hn.view(-1, self.hidden_size) # reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) # first dense
        out = self.relu(out) # relu
        out = self.fc_2(out) # final output
        return out

This is the loop:

def training_loop(n_epochs, lstm, optimiser, loss_fn, X_train, y_train, X_test, y_test,
                  X_val , y_val,):
    for epoch in range(n_epochs):
        lstm.train()
        outputs = lstm.forward(X_train) # forward pass
        optimiser.zero_grad() # calculate the gradient, manually setting to 0
        # obtain the loss function
        loss = loss_fn(outputs, y_train)
        #val_loss = loss_fn(y_val, y_test).item()
        #####
        ###
        loss.backward() # calculates the loss of the loss function
        optimiser.step() # improve from loss, i.e backprop test loss
        lstm.eval()
        test_preds = lstm(X_test)        
        test_loss = loss_fn(test_preds, y_test)
        if epoch % 100 == 0:
            print("Epoch: %d, train loss: %1.5f, test loss: %1.5f" % (epoch, 
                                                                      loss.item(), 
                                                                      test_loss.item()))

This is how I call the model:

n_epochs = 1000 
learning_rate = 0.001 

input_size = 3 # number of features
hidden_size = 2 # number of features in hidden state
num_layers = 1 # number of stacked lstm layers
num_classes = 1 # number of output classes 

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
                      
loss_fn = torch.nn.MSELoss()    # mean-squared error for regression
optimiser = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

training_loop(n_epochs=n_epochs,lstm=lstm, optimiser=optimiser, loss_fn=loss_fn, X_train=X_train_tensors,y_train=y_train_tensors, X_test=X_test_tensors, y_test=y_test_tensors, X_val=X_val_tensors,y_val=y_val_tensors)                         
              

So my problem is that I don't know how to pass the validation set to be considered during the training and calculate the validation loss and doing early stop based on it.
another question please, are 100 and 1500 valid and test is suitable in a dataset size of 2800?

Any help will be appreciated.


Solution

  • You can use the following EarlyStopping class to implement early stopping mechanism:

    class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    
    def __call__(self, val_loss, model):
    
        score = -val_loss
    
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
    
    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decrease."""
        if self.verbose:
            self.trace_func(
                f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        # torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
    

    Example usage:

    early_stopping = EarlyStopping(patience=20, verbose=True)
    for epoch in range(n_epochs):
        lstm.train()
        outputs = lstm.forward(X_train) # forward pass
        optimiser.zero_grad() # calculate the gradient, manually setting to 0
        # obtain the loss function
        loss = loss_fn(outputs, y_train)
        loss.backward() # calculates the loss of the loss function
        optimiser.step() # improve from loss, i.e backprop test loss
        lstm.eval()
        val_loss = loss_fn(y_val, y_test).item()  # get val loss
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break
    
        test_preds = lstm(X_test)