pythonabstract-syntax-treepycparser

How to remove AST nodes with pycparser?


Let's start by considering this snippet:

import sys

from pycparser import c_parser, c_ast, c_generator


text = r"""
void main() {
    foo(1,3);

    foo1(4);

    x = 1;

     foo2(4,

        10,


        3);
    foo3(
        "xxx"

    );
}
"""


class FuncCallVisitor(c_ast.NodeVisitor):

    def visit_FuncCall(self, node):
        print('%s called at %s' % (node.name.name, node.name.coord))

        if node.args:
            self.visit(node.args)


class RemoveFuncCalls(c_generator.CGenerator):

    def visit_FuncCall(self, n):
        # fref = self._parenthesize_unless_simple(n.name)
        # return fref + '(' + self.visit(n.args) + ')'
        return ""


if __name__ == '__main__':
    parser = c_parser.CParser()
    ast = parser.parse(text)
    v = FuncCallVisitor()
    v.visit(ast)
    print('-' * 80)

    ast.show(showcoord=True)
    generator = RemoveFuncCalls()

    print('-' * 80)
    print(generator.visit(ast))

The output of the above will be:

void main()
{
  ;
  ;
  x = 1;
  ;
  ;
}

But I'd like it to become this instead:

void main()
{
  x = 1;
}

So my question is, what's the canonical/idiomatic way to delete nodes/subtrees from the AST with pycparser?


Solution

  • It looks like c_generator.CGenerator calls _generate_stmt method for scope-like structures which appends ';\n' (with indentation) to result of visit for statement even if it is an empty string.

    To remove function calls we can overload it like

    class RemoveFuncCalls(c_generator.CGenerator):
        def _generate_stmt(self, n, add_indent=False):
            if isinstance(n, c_ast.FuncCall):
                return ''
            else:
                return super()._generate_stmt(n, add_indent)
    

    with that

    void main()
    {
      x = 1;
    }
    

    which looks like what you want.

    Let's consider a case

    if (bar(42, "something"))
        return;
    

    if we need it to become

    if ()
        return;
    

    then we need to add

        def visit_FuncCall(self, n):
            return ''
    

    like in OP, because _generate_stmt isn't called by RemoveFuncCalls.visit_If method for cond field serialization.

    Going further

    I don't know what "canonical/idiomatic way to delete nodes/subtrees from the AST with pycparser", but I do know one for ast module from stdlib -- ast.NodeTransformer class (which is absent in pycparser for some reason).

    It will allow us to avoid messing with how AST is serialized to str by overriding private'ish methods and modify AST itself

    from pycparser import c_ast
    
    class NodeTransformer(c_ast.NodeVisitor):
        def generic_visit(self, node):
            for field, old_value in iter_fields(node):
                if isinstance(old_value, list):
                    new_values = []
                    for value in old_value:
                        if isinstance(value, c_ast.Node):
                            value = self.visit(value)
                            if value is None:
                                continue
                            elif not isinstance(value, c_ast.Node):
                                new_values.extend(value)
                                continue
                        new_values.append(value)
                    old_value[:] = new_values
                elif isinstance(old_value, c_ast.Node):
                    new_node = self.visit(old_value)
                    setattr(node, field, new_node)
            return node
    
    
    def iter_fields(node):
        # this doesn't look pretty because `pycparser` decided to have structure 
        # for AST node classes different from stdlib ones
        index = 0
        children = node.children()
        while index < len(children):
            name, child = children[index]
            try:
                bracket_index = name.index('[')
            except ValueError:
                yield name, child
                index += 1
            else:
                name = name[:bracket_index]
                child = getattr(node, name)
                index += len(child)
                yield name, child
    

    and for our case it can be simply subclassed

    class FuncCallsRemover(NodeTransformer):
        def visit_FuncCall(self, node):
            return None
    

    and used like

    ...
    ast = parser.parse(text)
    v = FuncCallsRemover()
    ast = v.visit(ast)  # note that `NodeTransformer` returns modified AST instead of `None`
    

    after that we can use unmodified c_generator.CGenerator instance and get the same result.