pythonpytorchonnxonnxruntime

How to correctly change layers in onnx model and restore them in onnxruntime


I want to change onnx model and then restore the weights when using it with onnxruntime. However, the model doesn't seem to be changed.

First, I load the existing model and change the weights based on this output. Then I save the model and try to check if the outputs are the same as in the original model.

The model could be restored (if restore_layer is True) with onnxruntime as in this example based on this output, but it doesn't seem to work, they are always the same and the model doesn't seem to be changed. The expected behavior is when change_layer = True and restore_layer = False the script should raise an error, but it doesn't.

I tried to change a part of a model weights when loading it with onnxruntime like in this example based on this issue, but it doesn't seem to work.

How can I fix it?

Here is the code to reproduce.

import onnx # onnx=="1.17.0"
import torch # torch=="2.3.0"
from onnx import numpy_helper
import numpy as np # numpy==1.26.4
import torchvision.models as models # torchvision==0.18.0
from torchvision.models import ResNet18_Weights

change_layer = True
restore_layer = False
no_changed_layers = 20

model = models.resnet18(weights=ResNet18_Weights.DEFAULT)

model.eval()

input_data = torch.randn(1, 3, 224, 224)

torch.onnx.export(model, input_data, "some_model.onnx", verbose=False)


MODEL_PATH = "some_model.onnx"
_model = onnx.load(MODEL_PATH)
INTIALIZERS=_model.graph.initializer
Weight= {}
initializer_num_name = {}
for num, initializer in enumerate(INTIALIZERS):
    W = numpy_helper.to_array(initializer)
    Weight[num] = W
    initializer_num_name[num] = initializer.name

Weight_num_name = {}
if change_layer:
    for weight_zeros in range(0, no_changed_layers):
        old_weight = numpy_helper.to_array(INTIALIZERS[weight_zeros])
        new_weight = np.zeros_like(Weight[weight_zeros])
        updated_weight = numpy_helper.from_array(new_weight)

        updating_weight_name = _model.graph.initializer[weight_zeros].name
        updated_weight.name = updating_weight_name
        Weight_num_name[weight_zeros] = updating_weight_name
        _model.graph.initializer[weight_zeros].CopyFrom(updated_weight)

onnx.save(_model, "model.onnx")


import onnxruntime
import numpy as np


if restore_layer:
    options = onnxruntime.SessionOptions()
    # options.log_verbosity_level=2
    # options.log_severity_level=0
    # options.enable_profiling=1
    ortvalue_initializers = []
    for num in range(0, no_changed_layers):
        initializer = onnxruntime.OrtValue.ortvalue_from_numpy(
            Weight[num]
        )
        ortvalue_initializers.append(initializer)
        options.add_initializer(Weight_num_name[num], initializer)
    import logging
    logging.basicConfig(level=logging.DEBUG)
    onnx_session3 = onnxruntime.InferenceSession("model.onnx",
                                                 sess_options=options,
                                                 providers=["CPUExecutionProvider"])
else:

    onnx_session3 = onnxruntime.InferenceSession("model.onnx",
                                                 providers=["CPUExecutionProvider"])


onnx_session = onnxruntime.InferenceSession(MODEL_PATH)
input = onnx_session.get_inputs()[0]



generated_dummy_input_data = torch.randn(size=(input.shape[0],
                                               input.shape[1],
                                               input.shape[2],
                                               input.shape[3]))


onnx_inputs = {input.name: generated_dummy_input_data.numpy()}
onnx_output = onnx_session.run(None, onnx_inputs)

onnx_inputs3 = {onnx_session3.get_inputs()[0].name: generated_dummy_input_data.numpy()}
onnx_output3 = onnx_session3.run(None, onnx_inputs3)

def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
    x_max = np.max(x, axis=axis, keepdims=True)
    tmp = np.exp(x - x_max)
    s = np.sum(tmp, axis=axis, keepdims=True)
    return tmp / s

onnx_model_predictions = onnx_output[0][0]
onnx_model_predictions3 = onnx_output3[0][0]
diff = np.abs(onnx_model_predictions - onnx_model_predictions3)


assert (diff > 1e-2).any(), "Big difference between logits of an original model and onnx one: \n%s\n%s"%(onnx_model_predictions[:50],
                                                                                       onnx_model_predictions3[:50])
print("all good")






Solution

  • I can propose this solution - load the onnx model and restore the layers in it, not in the runtime. For some reason, it doesn't work properly. Also, I modified the assertion as the previous one didn't work properly. Upvote if it helped and also leave a comment or another answer if you have another approach

    import onnx  # onnx=="1.17.0"
    import torch  # torch=="2.3.0"
    from onnx import numpy_helper
    import numpy as np  # numpy==1.26.4
    import torchvision.models as models  # torchvision==0.18.0
    from torchvision.models import ResNet18_Weights
    
    change_layer = True
    restore_layer = True
    no_changed_layers = 20
    
    # Load the pre-trained ResNet18 model
    model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
    model.eval()
    
    # Generate random input data
    input_data = torch.randn(1, 3, 224, 224)
    
    # Export the model to ONNX format
    torch.onnx.export(model, input_data, "some_model.onnx", verbose=False)
    
    # Load the ONNX model
    MODEL_PATH = "some_model.onnx"
    _model = onnx.load(MODEL_PATH)
    INTIALIZERS = _model.graph.initializer
    
    # Store weights and initializer names
    Weight = {}
    initializer_num_name = {}
    for num, initializer in enumerate(INTIALIZERS):
        W = numpy_helper.to_array(initializer)
        Weight[num] = W
        initializer_num_name[num] = initializer.name
    
    # Change layers if required
    Weight_num_name = {}
    if change_layer:
        for weight_zeros in range(0, no_changed_layers):
            old_weight = numpy_helper.to_array(INTIALIZERS[weight_zeros])
            new_weight = np.zeros_like(Weight[weight_zeros])
            updated_weight = numpy_helper.from_array(new_weight)
    
            updating_weight_name = _model.graph.initializer[weight_zeros].name
            updated_weight.name = updating_weight_name
            Weight_num_name[weight_zeros] = updating_weight_name
            _model.graph.initializer[weight_zeros].CopyFrom(updated_weight)
    
    # Save the modified ONNX model
    onnx.save(_model, "model.onnx")
    
    # Load the modified ONNX model with ONNX Runtime
    import onnxruntime
    
    if restore_layer:
        # options = onnxruntime.SessionOptions()
        # ortvalue_initializers = []
        # for num in range(0, no_changed_layers):
        #     initializer = onnxruntime.OrtValue.ortvalue_from_numpy(Weight[num])
        #     ortvalue_initializers.append(initializer)
        #     options.add_initializer(Weight_num_name[num], initializer)
        #
        # onnx_session3 = onnxruntime.InferenceSession("model.onnx", sess_options=options, providers=["CPUExecutionProvider"])
    
        # Load modified model
        with open("model.onnx", "rb") as f:
            modified_onnx_bytes = f.read()
    
        modified_model = onnx.load_from_string(modified_onnx_bytes)
    
        # Replace weights in modified model
        for num, init in enumerate(modified_model.graph.initializer):
            if num<=no_changed_layers:
                init.CopyFrom(numpy_helper.from_array(Weight[num], initializer_num_name[num]))
    
        # Create inference session with restored model
        restored_model_bytes = modified_model.SerializeToString()
        onnx_session3 = onnxruntime.InferenceSession(restored_model_bytes, providers=['CPUExecutionProvider'])
    
    
    else:
        onnx_session3 = onnxruntime.InferenceSession("model.onnx", providers=["CPUExecutionProvider"])
    
    # Load the original ONNX model
    onnx_session = onnxruntime.InferenceSession(MODEL_PATH)
    input = onnx_session.get_inputs()[0]
    
    # Generate dummy input data
    generated_dummy_input_data = torch.randn(size=(input.shape[0], input.shape[1], input.shape[2], input.shape[3]))
    
    # Run inference on the original model
    onnx_inputs = {input.name: generated_dummy_input_data.numpy()}
    onnx_output = onnx_session.run(None, onnx_inputs)
    
    # Run inference on the modified model
    onnx_inputs3 = {onnx_session3.get_inputs()[0].name: generated_dummy_input_data.numpy()}
    onnx_output3 = onnx_session3.run(None, onnx_inputs3)
    
    # Compare the outputs
    onnx_model_predictions = onnx_output[0][0]
    onnx_model_predictions3 = onnx_output3[0][0]
    diff = np.abs(onnx_model_predictions - onnx_model_predictions3)
    
    # Assert that the outputs are close
    assert np.allclose(onnx_model_predictions, onnx_model_predictions3, atol=1e-2),  "Big difference between logits of an original model and onnx one: \n%s\n%s"%(onnx_model_predictions[:50],
                                                                                                                                                                  onnx_model_predictions3[:50])
    print("all good")