pythonhuggingface-transformersonnxhuggingface-tokenizers

How can I save a tokenizer from Huggingface transformers to ONNX?


I load a tokenizer and Bert model from Huggingface transformers, and export the Bert model to ONNX:

from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

# Load the model
model = AutoModelForTokenClassification.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

# Example usage
text = "Hugging Face is creating a tool that democratizes AI."
inputs = tokenizer(text, return_tensors="pt")

# We need to use the inputs to trace the model
input_names = ["input_ids", "attention_mask"]
output_names = ["output"]

# Export the model to ONNX
torch.onnx.export(
    model,                                           # model being run
    (inputs["input_ids"], inputs["attention_mask"]), # model input (or a tuple for multiple inputs)
    "TinyBERT_General_4L_312D.onnx",                 # where to save the model
    export_params=True,                              # store the trained parameter weights inside the model file
    opset_version=11,                                # the ONNX version to export the model to
    do_constant_folding=True,                        # whether to execute constant folding for optimization
    input_names=input_names,                         # the model's input names
    output_names=output_names,                       # the model's output names
    dynamic_axes={                                   # variable length axes
        "input_ids": {0: "batch_size"}, 
        "attention_mask": {0: "batch_size"},
        "output": {0: "batch_size"}
    }
)

print("Model has been successfully exported to ONNX")

Requirements:

pip install transformers torch onnx

How should I save the tokenizer to ONNX?


Solution

  • One can follow that tutorial (MIT license), which relies on onnxruntime-extensions (MIT license), to save a tokenizer from Huggingface transformers to ONNX:

    import onnx
    import torch
    
    from pathlib import Path
    from onnxruntime_extensions import pnp, OrtPyFunction
    from transformers import AutoTokenizer
    from transformers.onnx import export, FeaturesManager
    
    # get an onnx model by converting HuggingFace pretrained model
    model_name = "bert-base-cased"
    model_path = Path("onnx-model/bert-base-cased.onnx")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if not model_path.exists():
        if not model_path.parent.exists():
            model_path.parent.mkdir(parents=True, exist_ok=True)
        model = FeaturesManager.get_model_from_feature("default", model_name)
        model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature="default")
        onnx_config = model_onnx_config(model.config)
        export(tokenizer,
               model=model,
               config=onnx_config,
               opset=12,
               output=model_path)
    
    
    # a silly post-processing example function, demo-purpose only
    def post_processing_forward(*pred):
        return torch.softmax(pred[1], dim=1)
    
    
    # mapping the BertTokenizer outputs into the onnx model inputs
    def mapping_token_output(_1, _2, _3):
        return _1.unsqueeze(0), _3.unsqueeze(0), _2.unsqueeze(0)
    
    
    test_sentence = ["this is a test sentence."]
    ort_tok = pnp.PreHuggingFaceBert(hf_tok=tokenizer)
    onnx_model = onnx.load_model(str(model_path))
    
    
    augmented_model_name = 'temp_bert_tok_all.onnx'
    # create the final onnx model which includes pre- and post- processing.
    augmented_model = pnp.export(pnp.SequentialProcessingModule(
                                 ort_tok, mapping_token_output,
                                 onnx_model, post_processing_forward),
                                 test_sentence,
                                 opset_version=12,
                                 output_path=augmented_model_name)
    
    # test the augmented onnx model with raw string input.
    model_func = OrtPyFunction.from_model(augmented_model_name)
    result = model_func(test_sentence)
    print(result)
    

    Requirements:

    pip install transformers torch onnx onnxruntime_extensions
    

    Tested with Python 3.11 on Ubuntu 20.04 with:

    onnx==1.16.1
    onnxruntime_extensions==0.11.0
    torch==2.1.2
    transformers==4.36.2
    

    Thanks Chien Nguyen for pointing me to this tutorial.

    Another option would be to use a tokenizer coded in the target language, e.g. in C++:

    or in C#: