pythontensorflowmachine-learningpytorchpysyft

PyTorch and Opacus for Differential Privacy


When testing an example code from the TensorFlow website using Jupyter Notebook, which is available here, I encountered an error. You can find my SO question about that error here.

As a result, I decided to write equivalent implementations for the same functionality using PyTorch with Opacus and PySyft. However, I unfortunately encountered another error.

Below is the code for implementing the same functionality of the example code from the TensorFlow website, but using PyTorch with Opacus and PySyft, along with the error message.

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from opacus import PrivacyEngine

# Define a simple model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.fc1 = nn.Linear(32*26*26, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = x.view(-1, 32*26*26)
        x = self.fc1(x)
        return torch.log_softmax(x, dim=1)

# Data loaders
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize model, optimizer, and loss function
model = SimpleCNN()
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.NLLLoss()

# Initialize PrivacyEngine
privacy_engine = PrivacyEngine(
    model,
    batch_size=64,
    sample_size=len(train_loader.dataset),
    epochs=1,
    max_grad_norm=1.0,
)

privacy_engine.attach(optimizer)

# Training loop
model.train()
for epoch in range(1):
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

# Print privacy statistics
epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(1e-5)
print(f"Epsilon: {epsilon}, Delta: 1e-5")

Error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 32
     29 criterion = nn.NLLLoss()
     31 # Initialize PrivacyEngine
---> 32 privacy_engine = PrivacyEngine(
     33     model,
     34     batch_size=64,
     35     sample_size=len(train_loader.dataset),
     36     epochs=1,
     37     max_grad_norm=1.0,
     38 )
     40 privacy_engine.attach(optimizer)
     42 # Training loop

TypeError: PrivacyEngine.__init__() got an unexpected keyword argument 'batch_size'

Solution

  • It's quite obvious from the error that PrivacyEngine doesn't take batch_size as a parameter. Looking at the docs, you should do something like

    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torchvision import datasets, transforms
    from torch.utils.data import DataLoader
    from opacus import PrivacyEngine
    
    class SimpleCNN(nn.Module):
        def __init__(self):
            super(SimpleCNN, self).__init__()
            self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
            self.fc1 = nn.Linear(32*26*26, 10)
    
        def forward(self, x):
            x = torch.relu(self.conv1(x))
            x = x.view(-1, 32*26*26)
            x = self.fc1(x)
            return torch.log_softmax(x, dim=1)
    
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    
    model = SimpleCNN()
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    
    privacy_engine = PrivacyEngine()
    
    model, optimizer, train_loader = privacy_engine.make_private(
        module=model,
        optimizer=optimizer,
        data_loader=train_loader,
        max_grad_norm=1.0,
        noise_multiplier=1.1,
    )
    
    criterion = nn.NLLLoss()
    model.train()
    for epoch in range(1):
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
    
    epsilon = privacy_engine.get_epsilon(1e-5)
    print(f"Epsilon: {epsilon}, Delta: 1e-5")