pythontokenizellama

TypeError: llama_tokenize() missing 2 required positional arguments: 'add_bos' and 'special'


I am running python 3.11 and the latest version of llama-cpp-python with a gguf model

I expect the code to run normally like a chatbot but instead I get this error:

Traceback (most recent call last):
  File "d:\AI Custom\AI Arush\server.py", line 223, in <module>
    init()
  File "d:\AI Custom\AI Arush\server.py", line 57, in init
    m_eval(model, m_tokenize(model, PROMPT_INIT, True), False, "Starting up...")
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\AI Custom\AI Arush\server.py", line 182, in m_tokenize
    n_tokens = llama_cpp.llama_tokenize(
               ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: llama_tokenize() missing 2 required positional arguments: 'add_bos' and 'special'

This is my tokenize code:

def m_tokenize(model: llama_cpp.Llama, text: bytes, add_bos=False, special=False):
    assert model.ctx is not None
    n_ctx = llama_cpp.llama_n_ctx(model.ctx)
    tokens = (llama_cpp.llama_token * int(n_ctx))()
    n_tokens = llama_cpp.llama_tokenize(
        model.ctx,
        text,
        tokens,
        n_ctx,
        llama_cpp.c_bool(add_bos),
    )
    if int(n_tokens) < 0:
        raise RuntimeError(f'Failed to tokenize: text="{text}" n_tokens={n_tokens}')
    return list(tokens[:n_tokens])


Solution

  • TypeError: llama_tokenize() missing 2 required positional arguments: 'add_bos' and 'special'
    

    To resolve the error you need to include the arguments add_bos and special to the llama_tokenize() function.

    def m_tokenize(model: llama_cpp.Llama, text: bytes, add_bos=False, special=False):
        assert model.ctx is not None
        n_ctx = llama_cpp.llama_n_ctx(model.ctx)
        tokens = (llama_cpp.llama_token * int(n_ctx))()
        
        # Include the missing arguments in the function call
        n_tokens = llama_cpp.llama_tokenize(
            model.ctx,
            text,
            tokens,
            n_ctx,
            # You should check if llama_cpp.c_bool(add_bos) is returning a c_boo value also you have the arguments add_bos=False and special=False in this function 
            # If I am right all you need is:
            add_bos
            # Not
            # llama_cpp.c_bool(add_bos),
            # You should check if llama_cpp.c_bool(special) is returning a c_boo value
            # If I am right all you need is:
            special  # Include the special argument
            # Not 
            # llama_cpp.c_bool(special) 
        )
        
        if int(n_tokens) < 0:
            raise RuntimeError(f'Failed to tokenize: text="{text}" n_tokens={n_tokens}')
        
        return list(tokens[:n_tokens])
    

    From llama_cpp.py (GitHub), code lines starting at 1817

    def llama_tokenize(
        model: llama_model_p,
        text: bytes,
        text_len: Union[c_int, int],
        tokens,  # type: Array[llama_token]
        n_max_tokens: Union[c_int, int],
        add_bos: Union[c_bool, bool],
        special: Union[c_bool, bool],
    ) -> int:
        """Convert the provided text into tokens."""
        return _lib.llama_tokenize(
            model, text, text_len, tokens, n_max_tokens, add_bos, special
        )
    
    
    _lib.llama_tokenize.argtypes = [
        llama_model_p,
        c_char_p,
        c_int32,
        llama_token_p,
        c_int32,
        c_bool,
        c_bool,
    ]
    _lib.llama_tokenize.restype = c_int32