pythonpytorchpre-trained-model

How to traverse and copy a CNN pre-trained model by entering layer by layer?


I am trying to make a code that allows me to generalize the layers of a pre-trained model to generate a metamodel that is equivalent to twice the original model.

For this, I am using the convolutional layers that, using the group size concept, generate two outputs, and for now, I want the rest of the layers to be duplicated simply. In such a way, the first half of the convolution output will go to layer, for example, relu 1_1, and the second to relu1_2.

The main problem I have found is that many pre-trained networks are not defined using only simple layers but instead use groupings of layers in their topology.

In the following code:

    for name, layer in model.named_children():
        if (isinstance(layer, nn.Sequential) or list(layer.children())):
            for subname, sublayer in layer.named_children():

I have used it as a condition to try to differentiate between simple layers and groupings of layers, but even so, in most models, there are groupings of layers that the condition takes as if they were simple layers.

here, I leave the complete code:

import torch
import torch.nn as nn
import copy
from torchvision import models
from collections import OrderedDict

# Custom class for performing double convolutions with group_size
class ConvolutionalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=2, filter_values=None, bias=True):
        super(ConvolutionalBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=bias)

        if filter_values is not None:
            self.initialize_manual_weights(filter_values)
        else:
            self.initialize_integer_weights()
            
        # Set the weight and bias to float16 after initialization
        self.conv.weight.data = self.conv.weight.data.to(torch.float16)
        if self.conv.bias is not None:
            self.conv.bias.data = self.conv.bias.data.to(torch.float16)

    def initialize_integer_weights(self):
        int_weights = torch.randint(-5, 5, self.conv.weight.shape, dtype=torch.int16, device=self.conv.weight.device)
        self.conv.weight.data = int_weights.to(dtype=torch.float16)  # Ensure weights are float16
        if self.conv.bias is not None:
            int_bias = torch.zeros(self.conv.bias.shape, dtype=torch.int16, device=self.conv.bias.device)
            self.conv.bias.data = int_bias.to(dtype=torch.float16)  # Ensure bias is also float16

    def initialize_manual_weights(self, filter_values):
        filter_tensor = torch.tensor(filter_values, dtype=torch.float16, device=self.conv.weight.device).view(self.conv.weight.shape)
        self.conv.weight.data = filter_tensor  # Weights are already float16
        if self.conv.bias is not None:
            self.conv.bias.data.fill_(0.0)
            self.conv.bias.data = self.conv.bias.data.to(dtype=torch.float16)  # Ensure bias is float16

    def forward(self, x):
        return self.conv(x)

# Function to copy and modify the model
def copy_and_modify_cnn_model(model):
    modified_layers = OrderedDict()
    
    for name, layer in model.named_children():
        if (isinstance(layer, nn.Sequential) or list(layer.children())):
            for subname, sublayer in layer.named_children():

                if isinstance(sublayer, nn.Conv2d):
                    in_channels = sublayer.in_channels
                    out_channels = sublayer.out_channels
                    kernel_size = sublayer.kernel_size
                    stride = sublayer.stride
                    padding = sublayer.padding
                    
                    modified_layers[subname] = ConvolutionalBlock(
                        in_channels=in_channels * 2,
                        out_channels=out_channels,
                        kernel_size=kernel_size,
                        stride=stride,
                        padding=padding,
                        groups=2
                    )
                else:
                        modified_layers[subname + "_1"] = copy.deepcopy(sublayer)
                        modified_layers[subname + "_2"] = copy.deepcopy(sublayer)

        else:
            if isinstance(layer, nn.Conv2d):
                in_channels = layer.in_channels
                out_channels = layer.out_channels
                kernel_size = layer.kernel_size
                stride = layer.stride
                padding = layer.padding
                
                modified_layers[name] = ConvolutionalBlock(
                    in_channels=in_channels * 2,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=padding,
                    groups=2
                )
            else:
                    modified_layers[name + "_1"] = copy.deepcopy(layer)
                    modified_layers[name + "_2"] = copy.deepcopy(layer)
    
    new_model = nn.Sequential(modified_layers)
    return new_model

# Load a pre-trained model
original_model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

# Set the random seed for reproducibility
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Move the model to the GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
original_model.to(device)

# Create the modified model
new_model = copy_and_modify_cnn_model(original_model).to(device).half()  # Convert the model to float16


# Check the new model
print("modelo original")
print(original_model)
print("nuevo modelo ")
print(new_model)

# Create a synthetic input tensor
input_tensor_1 = torch.randn(1, 3, 224, 224, device=device)  # Original input tensor with 3 channels
synthetic_input_tensor = torch.cat((input_tensor_1, input_tensor_1), dim=1).to(torch.float16)

# Test the output of the modified model
output_modified = new_model(synthetic_input_tensor)
print("\nOutput of the modified model:")
print(output_modified)

Finally, I leave examples when trying to execute code for a model such as resnet18 or vgg16 and where you can see that when trying to duplicate the model there are layers that it does not detect or groupings of layers that it is not able to detect as loose layers.

VGG16 original layers:

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

VGG16 duplicate:

Sequential(
  (0): ConvolutionalBlock(
    (conv): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (1_1): ReLU(inplace=True)
  (1_2): ReLU(inplace=True)
  (2): ConvolutionalBlock(
    (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (3_1): Linear(in_features=4096, out_features=4096, bias=True)
  (3_2): Linear(in_features=4096, out_features=4096, bias=True)
  (4_1): ReLU(inplace=True)
  (4_2): ReLU(inplace=True)
  (5): ConvolutionalBlock(
    (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (6_1): Linear(in_features=4096, out_features=1000, bias=True)
  (6_2): Linear(in_features=4096, out_features=1000, bias=True)
  (7): ConvolutionalBlock(
    (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (8_1): ReLU(inplace=True)
  (8_2): ReLU(inplace=True)
  (9_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): ConvolutionalBlock(
    (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (11_1): ReLU(inplace=True)
  (11_2): ReLU(inplace=True)
  (12): ConvolutionalBlock(
    (conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (13_1): ReLU(inplace=True)
  (13_2): ReLU(inplace=True)
  (14): ConvolutionalBlock(
    (conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (15_1): ReLU(inplace=True)
  (15_2): ReLU(inplace=True)
  (16_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (16_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): ConvolutionalBlock(
    (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (18_1): ReLU(inplace=True)
  (18_2): ReLU(inplace=True)
  (19): ConvolutionalBlock(
    (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (20_1): ReLU(inplace=True)
  (20_2): ReLU(inplace=True)
  (21): ConvolutionalBlock(
    (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (22_1): ReLU(inplace=True)
  (22_2): ReLU(inplace=True)
  (23_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (23_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (24): ConvolutionalBlock(
    (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (25_1): ReLU(inplace=True)
  (25_2): ReLU(inplace=True)
  (26): ConvolutionalBlock(
    (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (27_1): ReLU(inplace=True)
  (27_2): ReLU(inplace=True)
  (28): ConvolutionalBlock(
    (conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
  )
  (29_1): ReLU(inplace=True)
  (29_2): ReLU(inplace=True)
  (30_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (30_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool_1): AdaptiveAvgPool2d(output_size=(7, 7))
  (avgpool_2): AdaptiveAvgPool2d(output_size=(7, 7))
  (0_1): Linear(in_features=25088, out_features=4096, bias=True)
  (0_2): Linear(in_features=25088, out_features=4096, bias=True)
  (2_1): Dropout(p=0.5, inplace=False)
  (2_2): Dropout(p=0.5, inplace=False)
  (5_1): Dropout(p=0.5, inplace=False)
  (5_2): Dropout(p=0.5, inplace=False)
)

In this case, layers 1,3,4,6 of the classifier are not detected

ResNet18 original layers :

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, aoutput
  (layer4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=512, out_features=1000, bias=True)
)

ResNet18 duplicate:

Sequential(
  (conv1): ConvolutionalBlock(
    (conv): Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), groups=2)
  )
  (bn1_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU(inplace=True)
  (relu_2): ReLU(inplace=True)
  (maxpool_1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (maxpool_2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (0_1): BasicBlock(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (0_2): BasicBlock(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1_1): BasicBlock(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1_2): BasicBlock(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (avgpool_1): AdaptiveAvgPool2d(output_size=(1, 1))
  (avgpool_2): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc_1): Linear(in_features=512, out_features=1000, bias=True)
  (fc_2): Linear(in_features=512, out_features=1000, bias=True)

In this case, which is more complicated, layers are also lost, and others are copied together as blocks and not as individual layers.

My main question is, how can I copy the topology of any pre-trained model layer by layer, regardless of how it is defined?


Solution

  • You can simply dive into the source code of your target pretrained model and directly manipulate the target layers anyway you want. They are no different from any other variables in a python class.

    By the way, the module names printed on the terminal are not their actual names in the python code. They are synthesized based on a simple rule. For example, conv1 usually means the source code looks like self.conv = nn.ModuleList([nn.Conv2d for _ in range(n)]), and it's refering to the 2nd convolution layer in the list.