cenumsclanglibclang

Get function call argument enum type with libclang


I'm using libclang to parse some code, and I want to find calls to a specific function and the types of its arguments.

For example, let's say the code is:

void foo(int a, ...) {}

enum test {
  ENUM_VAL1,
  ENUM_VAL2
}; 

int main() {
  enum test e = ENUM_VAL1;
  int a = 1;
  foo(a, e);
}

In this case, I want to find function "foo" and see that it has two arguments, the first is an integer, and the second is an "enum test".

Running the following code:

static enum CXChildVisitResult visitFuncCalls(CXCursor current_cursor,
                                              CXCursor parent,
                                              CXClientData client_data) {
  if (clang_getCursorKind(current_cursor) != CXCursor_CallExpr) {
    return CXChildVisit_Recurse;
  }

  static const char *FUNCTION_NAME = "foo";

  const CXString spelling = clang_getCursorSpelling(current_cursor);
  if (strcmp(clang_getCString(spelling), FUNCTION_NAME) != 0) {
    return CXChildVisit_Recurse;
  }
  clang_disposeString(spelling);
  
  for (int i = 0; i < clang_Cursor_getNumArguments(current_cursor); i++) {
    CXCursor argument = clang_Cursor_getArgument(current_cursor, i);
    CXType argument_type = clang_getCursorType(argument);
    CXString argument_type_spelling = clang_getTypeSpelling(argument_type);

    printf("Argument %d: %s\n", i, clang_getCString(argument_type_spelling));

    clang_disposeString(argument_type_spelling);
  }

  return CXChildVisit_Continue;
}

int main() {
  const CXTranslationUnit unit = clang_parseTranslationUnit(
        index, "file.c", NULL, 0, NULL, 0, CXTranslationUnit_None);
  const CXCursor cursor = clang_getTranslationUnitCursor(unit);
  clang_visitChildren(cursor, visitFuncCalls, NULL /* client_data*/);
}

I get:

Argument 0: int
Argument 1: unsigned int

So, basically, the compiler is ignoring the fact that this type is an enum, and shows it as an unsigned int. Is there a way to know this argument is an enum?


Solution

  • The issue is variable arguments

    First, note that the code in the question (after fixing by declaring index) works for the case of calling a function that explicitly accepts an argument of type enum test. That is, if we change the line:

    void foo(int a, ...) {}
    

    to:

    void foo(int a, enum test) {}
    

    and move it below the declaration of enum test, then the code in the question prints:

    Argument 0: int
    Argument 1: enum test      <-- what we want
    

    So the focus of the question is how to make this work when the callee is a variable-argument function.

    Looking at the AST

    We can better understand what is going on by dumping the AST of the original file.c:

    $ clang -fsyntax-only -Xclang -ast-dump file.c -fno-diagnostics-color
    TranslationUnitDecl 0x56530660a508 <<invalid sloc>> <invalid sloc>
    |-TypedefDecl 0x56530660ad30 <<invalid sloc>> <invalid sloc> implicit __int128_t '__int128'
    | `-BuiltinType 0x56530660aad0 '__int128'
    |-TypedefDecl 0x56530660ada0 <<invalid sloc>> <invalid sloc> implicit __uint128_t 'unsigned __int128'
    | `-BuiltinType 0x56530660aaf0 'unsigned __int128'
    |-TypedefDecl 0x56530660b0a8 <<invalid sloc>> <invalid sloc> implicit __NSConstantString 'struct __NSConstantString_tag'
    | `-RecordType 0x56530660ae80 'struct __NSConstantString_tag'
    |   `-Record 0x56530660adf8 '__NSConstantString_tag'
    |-TypedefDecl 0x56530660b140 <<invalid sloc>> <invalid sloc> implicit __builtin_ms_va_list 'char *'
    | `-PointerType 0x56530660b100 'char *'
    |   `-BuiltinType 0x56530660a5b0 'char'
    |-TypedefDecl 0x56530660b438 <<invalid sloc>> <invalid sloc> implicit __builtin_va_list 'struct __va_list_tag[1]'
    | `-ConstantArrayType 0x56530660b3e0 'struct __va_list_tag[1]' 1 
    |   `-RecordType 0x56530660b220 'struct __va_list_tag'
    |     `-Record 0x56530660b198 '__va_list_tag'
    |-FunctionDecl 0x565306666930 <file.c:1:1, col:23> col:6 used foo 'void (int, ...)'
    | |-ParmVarDecl 0x565306666860 <col:10, col:14> col:14 a 'int'
    | `-CompoundStmt 0x565306666a28 <col:22, col:23>
    |-EnumDecl 0x565306666a38 <line:3:1, line:6:1> line:3:6 test
    | |-EnumConstantDecl 0x565306666b00 <line:4:3> col:3 referenced ENUM_VAL1 'int'
    | `-EnumConstantDecl 0x565306666b50 <line:5:3> col:3 ENUM_VAL2 'int'
    `-FunctionDecl 0x565306666bf0 <line:8:1, line:12:1> line:8:5 main 'int ()'
      `-CompoundStmt 0x565306666f78 <col:12, line:12:1>
        |-DeclStmt 0x565306666d90 <line:9:3, col:26>
        | `-VarDecl 0x565306666cf0 <col:3, col:17> col:13 used e 'enum test':'enum test' cinit
        |   `-ImplicitCastExpr 0x565306666d78 <col:17> 'enum test':'enum test' <IntegralCast>
        |     `-DeclRefExpr 0x565306666d58 <col:17> 'int' EnumConstant 0x565306666b00 'ENUM_VAL1' 'int'
        |-DeclStmt 0x565306666e48 <line:10:3, col:12>
        | `-VarDecl 0x565306666dc0 <col:3, col:11> col:7 used a 'int' cinit
        |   `-IntegerLiteral 0x565306666e28 <col:11> 'int' 1
        `-CallExpr 0x565306666f00 <line:11:3, col:11> 'void'
          |-ImplicitCastExpr 0x565306666ee8 <col:3> 'void (*)(int, ...)' <FunctionToPointerDecay>
          | `-DeclRefExpr 0x565306666e60 <col:3> 'void (int, ...)' Function 0x565306666930 'foo' 'void (int, ...)'
          |-ImplicitCastExpr 0x565306666f30 <col:7> 'int' <LValueToRValue>
          | `-DeclRefExpr 0x565306666e80 <col:7> 'int' lvalue Var 0x565306666dc0 'a' 'int'
          `-ImplicitCastExpr 0x565306666f60 <col:10> 'unsigned int' <IntegralCast>
            `-ImplicitCastExpr 0x565306666f48 <col:10> 'enum test':'enum test' <LValueToRValue>
              `-DeclRefExpr 0x565306666ea0 <col:10> 'enum test':'enum test' lvalue Var 0x565306666cf0 'e' 'enum test':'enum test'
    

    Note the key lines at the end:

          `-ImplicitCastExpr 0x565306666f60 <col:10> 'unsigned int' <IntegralCast>
            `-ImplicitCastExpr 0x565306666f48 <col:10> 'enum test':'enum test' <LValueToRValue>
              `-DeclRefExpr 0x565306666ea0 <col:10> 'enum test':'enum test' lvalue Var 0x565306666cf0 'e' 'enum test':'enum test'
    

    What is happening is the argument expression e undergoes two implicit conversions, the first being an lvalue-to-rvalue conversion and the second being a promotion from enum test to unsigned int. It is the second conversion that causes the type to be reported as unsigned int in the original code, because that is the correct argument type after the promotions mandated by the semantics of variable-argument function calls.

    So, our goal now is to get the type of the expression underneath the ImplicitCastExpr node.

    Getting the type underneath ImplicitCastExpr

    In the C++ API, skipping ImplicitCastExpr is easy since you just call CastExpr::getSubExpr.

    But in the C API, ImplicitCastExpr is unfortunately only indicated by the cursor kind being CXCursor_UnexposedExpr, recognizable directly or with clang_isUnexposed.

    Consequently, we have to check for that and assume it means ImplicitCastExpr. That is not safe in general, since other kinds of nodes are also mapped to CXCursor_UnexposedExpr, but in the context of having already recognized a function argument call expression in the C language, I think ImplicitCastExpr is the only possibility. (Unfortunately, the C API is often ambiguous in ways like this, requiring various fragile heuristics to overcome. I recommend using the C++ API instead if possible.)

    Given a CXCursor to such an expression, here is code that will search the tree for the first node that is not an unexposed kind and yield its type:

    // Client data for `getUnderTypeVisitor`.
    typedef struct GetUnderTypeData {
      // Underlying type, if any.
      CXType underType;
    
      // True if we find a type to use.
      bool found;
    } GetUnderTypeData;
    
    // Visitor for `getUnderType`.
    enum CXChildVisitResult getUnderTypeVisitor(
      CXCursor c, CXCursor parent, CXClientData client_data)
    {
      GetUnderTypeData *data = (GetUnderTypeData *)client_data;
    
      enum CXCursorKind kind = clang_getCursorKind(c);
    
      // The AST node `ImplicitCastExpr` is surfaced in the C API as an
      // "unexposed" kind.  So if we see an unexposed kind, assume that it
      // means `ImplicitCastExpr` and recursively search the children.
      if (clang_isUnexposed(kind)) {
        return CXChildVisit_Recurse;
      }
    
      // For any other kind, we probably have a usable type.
      else {
        data->underType = clang_getCursorType(c);
        data->found = true;
        return CXChildVisit_Break;
      }
    }
    
    // Try to get the type of `c` after skipping any `ImplicitCastExpr`
    // nodes.  Return true and set `*underType` if we can, and return false
    // otherwise.
    bool getUnderType(CXCursor c, CXType * /*OUT*/ underType)
    {
      GetUnderTypeData data;
      data.found = false;
    
      clang_visitChildren(c, getUnderTypeVisitor, &data);
      if (data.found) {
        *underType = data.underType;
        return true;
      }
      else {
        return false;
      }
    }
    

    Complete example

    Inserting the above code into the original question code (plus a couple other fixes), we have:

    // ---------------------------- BEGIN ADDED ----------------------------
    #include <clang-c/Index.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <string.h>
    
    // Client data for `getUnderTypeVisitor`.
    typedef struct GetUnderTypeData {
      // Underlying type, if any.
      CXType underType;
    
      // True if we find a type to use.
      bool found;
    } GetUnderTypeData;
    
    // Visitor for `getUnderType`.
    enum CXChildVisitResult getUnderTypeVisitor(
      CXCursor c, CXCursor parent, CXClientData client_data)
    {
      GetUnderTypeData *data = (GetUnderTypeData *)client_data;
    
      enum CXCursorKind kind = clang_getCursorKind(c);
    
      // The AST node `ImplicitCastExpr` is surfaced in the C API as an
      // "unexposed" kind.  So if we see an unexposed kind, assume that it
      // means `ImplicitCastExpr` and recursively search the children.
      if (clang_isUnexposed(kind)) {
        return CXChildVisit_Recurse;
      }
    
      // For any other kind, we probably have a usable type.
      else {
        data->underType = clang_getCursorType(c);
        data->found = true;
        return CXChildVisit_Break;
      }
    }
    
    // Try to get the type of `c` after skipping any `ImplicitCastExpr`
    // nodes.  Return true and set `*underType` if we can, and return false
    // otherwise.
    bool getUnderType(CXCursor c, CXType * /*OUT*/ underType)
    {
      GetUnderTypeData data;
      data.found = false;
    
      clang_visitChildren(c, getUnderTypeVisitor, &data);
      if (data.found) {
        *underType = data.underType;
        return true;
      }
      else {
        return false;
      }
    }
    // ----------------------------- END ADDED -----------------------------
    
    static enum CXChildVisitResult visitFuncCalls(CXCursor current_cursor,
                                                  CXCursor parent,
                                                  CXClientData client_data) {
      if (clang_getCursorKind(current_cursor) != CXCursor_CallExpr) {
        return CXChildVisit_Recurse;
      }
    
      static const char *FUNCTION_NAME = "foo";
    
      const CXString spelling = clang_getCursorSpelling(current_cursor);
      if (strcmp(clang_getCString(spelling), FUNCTION_NAME) != 0) {
        return CXChildVisit_Recurse;
      }
      clang_disposeString(spelling);
      
      for (int i = 0; i < clang_Cursor_getNumArguments(current_cursor); i++) {
        CXCursor argument = clang_Cursor_getArgument(current_cursor, i);
        CXType argument_type = clang_getCursorType(argument);
        CXString argument_type_spelling = clang_getTypeSpelling(argument_type);
    
        printf("Argument %d: %s\n", i, clang_getCString(argument_type_spelling));
    
        // -------------------------- BEGIN ADDED --------------------------
        CXType underType;
        if (getUnderType(argument, &underType)) {
          CXString underTypeSpelling = clang_getTypeSpelling(underType);
          printf("underType: %s\n", clang_getCString(underTypeSpelling));
          clang_disposeString(underTypeSpelling);
        }
        // --------------------------- END ADDED ---------------------------
    
        clang_disposeString(argument_type_spelling);
      }
    
      return CXChildVisit_Continue;
    }
    
    int main() {
      // --------------------------- BEGIN ADDED ---------------------------
      CXIndex index = clang_createIndex(0, 0);
      // ---------------------------- END ADDED ----------------------------
      const CXTranslationUnit unit = clang_parseTranslationUnit(
            index, "file.c", NULL, 0, NULL, 0, CXTranslationUnit_None);
      const CXCursor cursor = clang_getTranslationUnitCursor(unit);
      clang_visitChildren(cursor, visitFuncCalls, NULL /* client_data*/);
    }
    

    When run on the original file.c, the output is:

    Argument 0: int
    underType: int
    Argument 1: unsigned int
    underType: enum test         <--- got it
    

    Update: The above code has two bugs:

    1. getUnderType should check if c itself can yield a type.

    2. It does not properly handle the case of an enumerator passed directly as an argument.

    See this updated answer to the question How do I get the enum type of a clang::EnumConstantDecl? for fixes to those issues.