pytorchbayesianconv-neural-networkdropoutuncertainty

Measuring uncertainty using MC Dropout on pytorch


I am trying to implement Bayesian CNN using Mc Dropout on Pytorch, the main idea is that by applying dropout at test time and running over many forward passes , you get predictions from a variety of different models. I’ve found an application of the Mc Dropout and I really did not get how they applied this method and how exactly they did choose the correct prediction from the list of predictions


 def mcdropout_test(model):
    model.train()
    test_loss = 0
    correct = 0
    T = 100
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output_list = []
        for i in xrange(T):
            output_list.append(torch.unsqueeze(model(data), 0))
        output_mean = torch.cat(output_list, 0).mean(0)
        test_loss += F.nll_loss(F.log_softmax(output_mean), target, size_average=False).data[0]  # sum up batch loss
        pred = output_mean.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nMC Dropout Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


    train()
    mcdropout_test()

I have replaced

data, target = Variable(data, volatile=True), Variable(target)

by adding

with torch.no_grad(): at the beginning

And this is how I have defined my CNN

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 192, 5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(192, 192, 5, padding=2)
        self.fc1 = nn.Linear(192 * 8 * 8, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(p=0.3)
        
        nn.init.xavier_uniform_(self.conv1.weight)
        nn.init.constant_(self.conv1.bias, 0.0)
        nn.init.xavier_uniform_(self.conv2.weight)
        nn.init.constant_(self.conv2.bias, 0.0)
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.constant_(self.fc1.bias, 0.0)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.constant_(self.fc2.bias, 0.0)
        nn.init.xavier_uniform_(self.fc3.weight)
        nn.init.constant_(self.fc3.bias, 0.0)


    def forward(self, x):
        x = self.pool(F.relu(self.dropout(self.conv1(x))))  # recommended to add the relu
        x = self.pool(F.relu(self.dropout(self.conv2(x))))  # recommended to add the relu
        x = x.view(-1, 192 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(self.dropout(x)))
        x = self.fc3(self.dropout(x))  # no activation function needed for the last layer
        return x

Can anyone help me to get the right implementation of the Monte Carlo Dropout method on CNN?


Solution

  • Implementing MC Dropout in Pytorch is easy. All that is needed to be done is to set the dropout layers of your model to train mode. This allows for different dropout masks to be used during the different various forward passes. Below is an implementation of MC Dropout in Pytorch illustrating how multiple predictions from the various forward passes are stacked together and used for computing different uncertainty metrics.

    import sys
    
    import numpy as np
    
    import torch
    import torch.nn as nn
    
    def enable_dropout(model):
        """ Function to enable the dropout layers during test-time """
        for m in model.modules():
            if m.__class__.__name__.startswith('Dropout'):
                m.train()
    
    def get_monte_carlo_predictions(data_loader,
                                    forward_passes,
                                    model,
                                    n_classes,
                                    n_samples):
        """ Function to get the monte-carlo samples and uncertainty estimates
        through multiple forward passes
    
        Parameters
        ----------
        data_loader : object
            data loader object from the data loader module
        forward_passes : int
            number of monte-carlo samples/forward passes
        model : object
            keras model
        n_classes : int
            number of classes in the dataset
        n_samples : int
            number of samples in the test set
        """
    
        dropout_predictions = np.empty((0, n_samples, n_classes))
        softmax = nn.Softmax(dim=1)
        for i in range(forward_passes):
            predictions = np.empty((0, n_classes))
            model.eval()
            enable_dropout(model)
            for i, (image, label) in enumerate(data_loader):
                image = image.to(torch.device('cuda'))
                with torch.no_grad():
                    output = model(image)
                    output = softmax(output)  # shape (n_samples, n_classes)
                predictions = np.vstack((predictions, output.cpu().numpy()))
    
            dropout_predictions = np.vstack((dropout_predictions,
                                             predictions[np.newaxis, :, :]))
            # dropout predictions - shape (forward_passes, n_samples, n_classes)
    
        # Calculating mean across multiple MCD forward passes 
        mean = np.mean(dropout_predictions, axis=0)  # shape (n_samples, n_classes)
    
        # Calculating variance across multiple MCD forward passes 
        variance = np.var(dropout_predictions, axis=0)  # shape (n_samples, n_classes)
    
        epsilon = sys.float_info.min
        # Calculating entropy across multiple MCD forward passes 
        entropy = -np.sum(mean * np.log(mean + epsilon), axis=-1)  # shape (n_samples,)
    
        # Calculating mutual information across multiple MCD forward passes 
        mutual_info = entropy - np.mean(np.sum(-dropout_predictions * np.log(dropout_predictions + epsilon),
                                               axis=-1), axis=0)  # shape (n_samples,)
    

    Moving on to the implementation which is posted in the question above, multiple predictions from T different forward passes are obtained by first setting the model to train mode (model.train()). Note that this is not desirable because unwanted stochasticity will be introduced in the predictions if there are layers other than dropout such as batch-norm in the model. Hence the best way is to just set the dropout layers to train mode as shown in the snippet above.