How to correctly change layers in onnx model and restore them in onnxruntime

I want to change onnx model and then restore the weights when using it with onnxruntime. However, the model doesn't seem to be changed.

First, I load the existing model and change the weights based on this output. Then I save the model and try to check if the outputs are the same as in the original model.

The model could be restored (if restore_layer is True) with onnxruntime as in this example based on this output, but it doesn't seem to work, they are always the same and the model doesn't seem to be changed. The expected behavior is when change_layer = True and restore_layer = False the script should raise an error, but it doesn't.

I tried to change a part of a model weights when loading it with onnxruntime like in this example based on this issue, but it doesn't seem to work.

How can I fix it?

Here is the code to reproduce.

import onnx # onnx=="1.17.0"
import torch # torch=="2.3.0"
from onnx import numpy_helper
import numpy as np # numpy==1.26.4
import torchvision.models as models # torchvision==0.18.0
from torchvision.models import ResNet18_Weights

change_layer = True
restore_layer = False
no_changed_layers = 20

model = models.resnet18(weights=ResNet18_Weights.DEFAULT)

model.eval()

input_data = torch.randn(1, 3, 224, 224)

torch.onnx.export(model, input_data, "some_model.onnx", verbose=False)


MODEL_PATH = "some_model.onnx"
_model = onnx.load(MODEL_PATH)
INTIALIZERS=_model.graph.initializer
Weight= {}
initializer_num_name = {}
for num, initializer in enumerate(INTIALIZERS):
    W = numpy_helper.to_array(initializer)
    Weight[num] = W
    initializer_num_name[num] = initializer.name

Weight_num_name = {}
if change_layer:
    for weight_zeros in range(0, no_changed_layers):
        old_weight = numpy_helper.to_array(INTIALIZERS[weight_zeros])
        new_weight = np.zeros_like(Weight[weight_zeros])
        updated_weight = numpy_helper.from_array(new_weight)

        updating_weight_name = _model.graph.initializer[weight_zeros].name
        updated_weight.name = updating_weight_name
        Weight_num_name[weight_zeros] = updating_weight_name
        _model.graph.initializer[weight_zeros].CopyFrom(updated_weight)

onnx.save(_model, "model.onnx")


import onnxruntime
import numpy as np


if restore_layer:
    options = onnxruntime.SessionOptions()
    # options.log_verbosity_level=2
    # options.log_severity_level=0
    # options.enable_profiling=1
    ortvalue_initializers = []
    for num in range(0, no_changed_layers):
        initializer = onnxruntime.OrtValue.ortvalue_from_numpy(
            Weight[num]
        )
        ortvalue_initializers.append(initializer)
        options.add_initializer(Weight_num_name[num], initializer)
    import logging
    logging.basicConfig(level=logging.DEBUG)
    onnx_session3 = onnxruntime.InferenceSession("model.onnx",
                                                 sess_options=options,
                                                 providers=["CPUExecutionProvider"])
else:

    onnx_session3 = onnxruntime.InferenceSession("model.onnx",
                                                 providers=["CPUExecutionProvider"])


onnx_session = onnxruntime.InferenceSession(MODEL_PATH)
input = onnx_session.get_inputs()[0]



generated_dummy_input_data = torch.randn(size=(input.shape[0],
                                               input.shape[1],
                                               input.shape[2],
                                               input.shape[3]))


onnx_inputs = {input.name: generated_dummy_input_data.numpy()}
onnx_output = onnx_session.run(None, onnx_inputs)

onnx_inputs3 = {onnx_session3.get_inputs()[0].name: generated_dummy_input_data.numpy()}
onnx_output3 = onnx_session3.run(None, onnx_inputs3)

def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
    x_max = np.max(x, axis=axis, keepdims=True)
    tmp = np.exp(x - x_max)
    s = np.sum(tmp, axis=axis, keepdims=True)
    return tmp / s

onnx_model_predictions = onnx_output[0][0]
onnx_model_predictions3 = onnx_output3[0][0]
diff = np.abs(onnx_model_predictions - onnx_model_predictions3)


assert (diff > 1e-2).any(), "Big difference between logits of an original model and onnx one: \n%s\n%s"%(onnx_model_predictions[:50],
                                                                                       onnx_model_predictions3[:50])
print("all good")

Solution

I can propose this solution - load the onnx model and restore the layers in it, not in the runtime. For some reason, it doesn't work properly. Also, I modified the assertion as the previous one didn't work properly. Upvote if it helped and also leave a comment or another answer if you have another approach

import onnx  # onnx=="1.17.0"
import torch  # torch=="2.3.0"
from onnx import numpy_helper
import numpy as np  # numpy==1.26.4
import torchvision.models as models  # torchvision==0.18.0
from torchvision.models import ResNet18_Weights

change_layer = True
restore_layer = True
no_changed_layers = 20

# Load the pre-trained ResNet18 model
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
model.eval()

# Generate random input data
input_data = torch.randn(1, 3, 224, 224)

# Export the model to ONNX format
torch.onnx.export(model, input_data, "some_model.onnx", verbose=False)

# Load the ONNX model
MODEL_PATH = "some_model.onnx"
_model = onnx.load(MODEL_PATH)
INTIALIZERS = _model.graph.initializer

# Store weights and initializer names
Weight = {}
initializer_num_name = {}
for num, initializer in enumerate(INTIALIZERS):
    W = numpy_helper.to_array(initializer)
    Weight[num] = W
    initializer_num_name[num] = initializer.name

# Change layers if required
Weight_num_name = {}
if change_layer:
    for weight_zeros in range(0, no_changed_layers):
        old_weight = numpy_helper.to_array(INTIALIZERS[weight_zeros])
        new_weight = np.zeros_like(Weight[weight_zeros])
        updated_weight = numpy_helper.from_array(new_weight)

        updating_weight_name = _model.graph.initializer[weight_zeros].name
        updated_weight.name = updating_weight_name
        Weight_num_name[weight_zeros] = updating_weight_name
        _model.graph.initializer[weight_zeros].CopyFrom(updated_weight)

# Save the modified ONNX model
onnx.save(_model, "model.onnx")

# Load the modified ONNX model with ONNX Runtime
import onnxruntime

if restore_layer:
    # options = onnxruntime.SessionOptions()
    # ortvalue_initializers = []
    # for num in range(0, no_changed_layers):
    #     initializer = onnxruntime.OrtValue.ortvalue_from_numpy(Weight[num])
    #     ortvalue_initializers.append(initializer)
    #     options.add_initializer(Weight_num_name[num], initializer)
    #
    # onnx_session3 = onnxruntime.InferenceSession("model.onnx", sess_options=options, providers=["CPUExecutionProvider"])

    # Load modified model
    with open("model.onnx", "rb") as f:
        modified_onnx_bytes = f.read()

    modified_model = onnx.load_from_string(modified_onnx_bytes)

    # Replace weights in modified model
    for num, init in enumerate(modified_model.graph.initializer):
        if num<=no_changed_layers:
            init.CopyFrom(numpy_helper.from_array(Weight[num], initializer_num_name[num]))

    # Create inference session with restored model
    restored_model_bytes = modified_model.SerializeToString()
    onnx_session3 = onnxruntime.InferenceSession(restored_model_bytes, providers=['CPUExecutionProvider'])


else:
    onnx_session3 = onnxruntime.InferenceSession("model.onnx", providers=["CPUExecutionProvider"])

# Load the original ONNX model
onnx_session = onnxruntime.InferenceSession(MODEL_PATH)
input = onnx_session.get_inputs()[0]

# Generate dummy input data
generated_dummy_input_data = torch.randn(size=(input.shape[0], input.shape[1], input.shape[2], input.shape[3]))

# Run inference on the original model
onnx_inputs = {input.name: generated_dummy_input_data.numpy()}
onnx_output = onnx_session.run(None, onnx_inputs)

# Run inference on the modified model
onnx_inputs3 = {onnx_session3.get_inputs()[0].name: generated_dummy_input_data.numpy()}
onnx_output3 = onnx_session3.run(None, onnx_inputs3)

# Compare the outputs
onnx_model_predictions = onnx_output[0][0]
onnx_model_predictions3 = onnx_output3[0][0]
diff = np.abs(onnx_model_predictions - onnx_model_predictions3)

# Assert that the outputs are close
assert np.allclose(onnx_model_predictions, onnx_model_predictions3, atol=1e-2),  "Big difference between logits of an original model and onnx one: \n%s\n%s"%(onnx_model_predictions[:50],
                                                                                                                                                              onnx_model_predictions3[:50])
print("all good")