pythonmetaprogrammingtracebytecode-manipulationoverhead

Low-overhead tracing function in Python by modify the code object


sys.settrace is inefficient, to say the least. It adds a lot of overhead to every function call in Python.

Instead, I'm looking for a way to trace the "call" event for only a couple hundred functions in Python, without incurring additional overhead. I'm thinking of modifying the code object for each function so that they all call a tracing function.

import sys


def trace():
    frame = sys._getframe(1)
    ...


def func():
    trace()  # TODO: Add programmatically
    ...

Does anyone know how to do this? So far, I've found these resources...

I can't be the only person interested in low overhead localized tracing? Thank you for any help you can provide me!


Solution

  • Okay, so following the approach outlined in Modifying Code of Function at Runtime, I took a stab at it...

    # boo.py
    import inspect
    import sys
    import types
    
    
    def ___trace():
        """
        NOTE: This function needs a unique name, so, it doesn't interfere with globals.
        """
        frame = sys._getframe(1)
        print("Tracing:", frame.f_code.co_name)
        print("Locals:", frame.f_locals)
    
    
    def _unwrap(fn):
        while hasattr(fn, "__wrapped__"):
            fn = fn.__wrapped__
        return fn
    
    
    _orig_code = "___trace_orig_code"
    _closure = "___closure"
    
    
    def set_trace(fn):
        """Call `trace` at the beginning of `fn`."""
        fn = _unwrap(fn)
        if getattr(fn, _orig_code, None) is None:
            setattr(fn, _orig_code, fn.__code__)
        else:
            raise ValueError("Function is already being traced.")
        lines = inspect.getsourcelines(fn)[0]
        lines = lines[next(i for i, l in enumerate(lines) if "@" != l[0]) :]
        init_indent = len(lines[0]) - len(lines[0].lstrip())
        lines = ["    " + l[init_indent:] for l in lines]
        whitespace = lines[1][: len(lines[1]) - len(lines[1].lstrip())]
        # NOTE: So that the line numbers remain the name, the trace function is added to the first
        # line.
        lines[1] = f"{whitespace}{___trace.__name__}(); {lines[1].strip()}\n"
        free_vars = " ".join([f"    {var} = None;" for var in fn.__code__.co_freevars])
        code = "".join(lines)
        code = f"""
    def {_closure}():
    {free_vars}
    
    {code}
    
        return {fn.__name__}.__code__
    """
        module = fn.__globals__.copy()
        try:
            exec(code, module)
        except SyntaxError:
            raise SyntaxError("Unable to add `___trace` to function definition.")
        new = module[_closure]()
        fn.__code__ = types.CodeType(
            fn.__code__.co_argcount,
            fn.__code__.co_posonlyargcount,
            fn.__code__.co_kwonlyargcount,
            fn.__code__.co_nlocals,
            fn.__code__.co_stacksize,
            fn.__code__.co_flags,
            new.co_code,
            fn.__code__.co_consts,
            tuple([___trace.__name__] + list(fn.__code__.co_names)),
            fn.__code__.co_varnames,
            fn.__code__.co_filename,
            fn.__code__.co_name,
            fn.__code__.co_firstlineno,
            new.co_lnotab,
            fn.__code__.co_freevars,
            fn.__code__.co_cellvars,
        )
    
        if ___trace.__name__ in fn.__globals__:
            if fn.__globals__[___trace.__name__] is not ___trace:
                raise RuntimeError()
        else:
            fn.__globals__[___trace.__name__] = ___trace
    
    
    def unset_trace(fn):
        """Remove `trace` from the beginning of `fn`."""
        if hasattr(fn, _orig_code):
            fn.__code__ = getattr(fn, _orig_code)
    

    And I tested this on classes, closures, decorators, line numbers, traceback, globals...

    import functools
    import sys
    import traceback
    import typing
    
    from boo import set_trace, unset_trace
    
    
    def testing(*args, **kwargs):
        print("Inside `testing`...")
    
    
    def testing_closure(*args, **kwargs):
        variable = "hello!"
    
        def func(*args, **kwargs):
            print(f"Inside `testing_closure`... {variable}")
    
        func()
        set_trace(func)
        func()
    
    
    def test_lineno(*args, **kwargs):
        print("Inside `test_lineno`...")
        print("Line no", sys._getframe(0).f_lineno)
    
    
    def test_args(a: typing.List, b: typing.List):
        print("Inside `test_args`...")
    
    
    def test_traceback(*args, **kwargs):
        print("".join(traceback.format_stack()))
    
    
    def test_cellvars():
        print("Inside `test_cellvars`...")
    
        a = 10
    
        def func():
            return a
    
        return func()
    
    
    def test_funky_first_line():
        def func():
            return
    
    
    @functools.lru_cache()
    @functools.lru_cache()
    def testing_decorator(*args, **kwargs):
        print("Inside `testing_decorator`...")
    
    
    class Test:
        def __init__(self, *args, **kwargs):
            print("Inside `__init__`...")
    
    
    if __name__ == "__main__":
        set_trace(testing)
        testing()
        unset_trace(testing)
        testing()
    
        Test()
        set_trace(Test.__init__)
        Test()
        set_trace(testing_decorator)
        testing_decorator()
        testing_closure()
        set_trace(test_lineno)
        test_lineno()
        set_trace(test_traceback)
        test_traceback()
        set_trace(test_cellvars)
        test_cellvars()
        set_trace(test_args)
        test_args(["a"], ["b"])
        try:
            set_trace(test_funky_first_line)
        except SyntaxError:
            print("Unable to modify handle.")
    

    Let me know if I'm missing anything, please! I don't have much experience with code editing.

    Here is my fully working library implementation (tested in a production code base!): https://github.com/PetrochukM/HParams/blob/master/config/trace.py