I'm using libclang to parse some code, and I want to find calls to a specific function and the types of its arguments.
For example, let's say the code is:
void foo(int a, ...) {}
enum test {
ENUM_VAL1,
ENUM_VAL2
};
int main() {
enum test e = ENUM_VAL1;
int a = 1;
foo(a, e);
}
In this case, I want to find function "foo" and see that it has two arguments, the first is an integer, and the second is an "enum test".
Running the following code:
static enum CXChildVisitResult visitFuncCalls(CXCursor current_cursor,
CXCursor parent,
CXClientData client_data) {
if (clang_getCursorKind(current_cursor) != CXCursor_CallExpr) {
return CXChildVisit_Recurse;
}
static const char *FUNCTION_NAME = "foo";
const CXString spelling = clang_getCursorSpelling(current_cursor);
if (strcmp(clang_getCString(spelling), FUNCTION_NAME) != 0) {
return CXChildVisit_Recurse;
}
clang_disposeString(spelling);
for (int i = 0; i < clang_Cursor_getNumArguments(current_cursor); i++) {
CXCursor argument = clang_Cursor_getArgument(current_cursor, i);
CXType argument_type = clang_getCursorType(argument);
CXString argument_type_spelling = clang_getTypeSpelling(argument_type);
printf("Argument %d: %s\n", i, clang_getCString(argument_type_spelling));
clang_disposeString(argument_type_spelling);
}
return CXChildVisit_Continue;
}
int main() {
const CXTranslationUnit unit = clang_parseTranslationUnit(
index, "file.c", NULL, 0, NULL, 0, CXTranslationUnit_None);
const CXCursor cursor = clang_getTranslationUnitCursor(unit);
clang_visitChildren(cursor, visitFuncCalls, NULL /* client_data*/);
}
I get:
Argument 0: int
Argument 1: unsigned int
So, basically, the compiler is ignoring the fact that this type is an enum, and shows it as an unsigned int. Is there a way to know this argument is an enum?
First, note that the code in the question (after fixing by declaring
index
) works for the case of calling a function that explicitly accepts
an argument of type enum test
. That is, if we change the line:
void foo(int a, ...) {}
to:
void foo(int a, enum test) {}
and move it below the declaration of enum test
, then the code in the
question prints:
Argument 0: int
Argument 1: enum test <-- what we want
So the focus of the question is how to make this work when the callee is a variable-argument function.
We can better understand what is going on by dumping the AST of the
original file.c
:
$ clang -fsyntax-only -Xclang -ast-dump file.c -fno-diagnostics-color
TranslationUnitDecl 0x56530660a508 <<invalid sloc>> <invalid sloc>
|-TypedefDecl 0x56530660ad30 <<invalid sloc>> <invalid sloc> implicit __int128_t '__int128'
| `-BuiltinType 0x56530660aad0 '__int128'
|-TypedefDecl 0x56530660ada0 <<invalid sloc>> <invalid sloc> implicit __uint128_t 'unsigned __int128'
| `-BuiltinType 0x56530660aaf0 'unsigned __int128'
|-TypedefDecl 0x56530660b0a8 <<invalid sloc>> <invalid sloc> implicit __NSConstantString 'struct __NSConstantString_tag'
| `-RecordType 0x56530660ae80 'struct __NSConstantString_tag'
| `-Record 0x56530660adf8 '__NSConstantString_tag'
|-TypedefDecl 0x56530660b140 <<invalid sloc>> <invalid sloc> implicit __builtin_ms_va_list 'char *'
| `-PointerType 0x56530660b100 'char *'
| `-BuiltinType 0x56530660a5b0 'char'
|-TypedefDecl 0x56530660b438 <<invalid sloc>> <invalid sloc> implicit __builtin_va_list 'struct __va_list_tag[1]'
| `-ConstantArrayType 0x56530660b3e0 'struct __va_list_tag[1]' 1
| `-RecordType 0x56530660b220 'struct __va_list_tag'
| `-Record 0x56530660b198 '__va_list_tag'
|-FunctionDecl 0x565306666930 <file.c:1:1, col:23> col:6 used foo 'void (int, ...)'
| |-ParmVarDecl 0x565306666860 <col:10, col:14> col:14 a 'int'
| `-CompoundStmt 0x565306666a28 <col:22, col:23>
|-EnumDecl 0x565306666a38 <line:3:1, line:6:1> line:3:6 test
| |-EnumConstantDecl 0x565306666b00 <line:4:3> col:3 referenced ENUM_VAL1 'int'
| `-EnumConstantDecl 0x565306666b50 <line:5:3> col:3 ENUM_VAL2 'int'
`-FunctionDecl 0x565306666bf0 <line:8:1, line:12:1> line:8:5 main 'int ()'
`-CompoundStmt 0x565306666f78 <col:12, line:12:1>
|-DeclStmt 0x565306666d90 <line:9:3, col:26>
| `-VarDecl 0x565306666cf0 <col:3, col:17> col:13 used e 'enum test':'enum test' cinit
| `-ImplicitCastExpr 0x565306666d78 <col:17> 'enum test':'enum test' <IntegralCast>
| `-DeclRefExpr 0x565306666d58 <col:17> 'int' EnumConstant 0x565306666b00 'ENUM_VAL1' 'int'
|-DeclStmt 0x565306666e48 <line:10:3, col:12>
| `-VarDecl 0x565306666dc0 <col:3, col:11> col:7 used a 'int' cinit
| `-IntegerLiteral 0x565306666e28 <col:11> 'int' 1
`-CallExpr 0x565306666f00 <line:11:3, col:11> 'void'
|-ImplicitCastExpr 0x565306666ee8 <col:3> 'void (*)(int, ...)' <FunctionToPointerDecay>
| `-DeclRefExpr 0x565306666e60 <col:3> 'void (int, ...)' Function 0x565306666930 'foo' 'void (int, ...)'
|-ImplicitCastExpr 0x565306666f30 <col:7> 'int' <LValueToRValue>
| `-DeclRefExpr 0x565306666e80 <col:7> 'int' lvalue Var 0x565306666dc0 'a' 'int'
`-ImplicitCastExpr 0x565306666f60 <col:10> 'unsigned int' <IntegralCast>
`-ImplicitCastExpr 0x565306666f48 <col:10> 'enum test':'enum test' <LValueToRValue>
`-DeclRefExpr 0x565306666ea0 <col:10> 'enum test':'enum test' lvalue Var 0x565306666cf0 'e' 'enum test':'enum test'
Note the key lines at the end:
`-ImplicitCastExpr 0x565306666f60 <col:10> 'unsigned int' <IntegralCast>
`-ImplicitCastExpr 0x565306666f48 <col:10> 'enum test':'enum test' <LValueToRValue>
`-DeclRefExpr 0x565306666ea0 <col:10> 'enum test':'enum test' lvalue Var 0x565306666cf0 'e' 'enum test':'enum test'
What is happening is the argument expression e
undergoes two implicit
conversions, the first being an lvalue-to-rvalue conversion and the
second being a promotion from enum test
to unsigned int
. It is the
second conversion that causes the type to be reported as unsigned int
in the original code, because that is the correct argument type after
the promotions mandated by the semantics of variable-argument function
calls.
So, our goal now is to get the type of the expression underneath the
ImplicitCastExpr
node.
ImplicitCastExpr
In the C++ API, skipping
ImplicitCastExpr
is easy since you just call
CastExpr::getSubExpr
.
But in the C API, ImplicitCastExpr
is unfortunately only indicated
by the cursor kind being CXCursor_UnexposedExpr
, recognizable directly
or with
clang_isUnexposed
.
Consequently, we have to check for that and assume it means
ImplicitCastExpr
. That is not safe in general, since other kinds of
nodes are also mapped to CXCursor_UnexposedExpr
, but in the context of
having already recognized a function argument call expression in the C
language, I think ImplicitCastExpr
is the only possibility.
(Unfortunately, the C API is often ambiguous in ways like this,
requiring various fragile heuristics to overcome. I recommend using the
C++ API instead if possible.)
Given a CXCursor
to such an expression, here is code that will search
the tree for the first node that is not an unexposed kind and yield
its type:
// Client data for `getUnderTypeVisitor`.
typedef struct GetUnderTypeData {
// Underlying type, if any.
CXType underType;
// True if we find a type to use.
bool found;
} GetUnderTypeData;
// Visitor for `getUnderType`.
enum CXChildVisitResult getUnderTypeVisitor(
CXCursor c, CXCursor parent, CXClientData client_data)
{
GetUnderTypeData *data = (GetUnderTypeData *)client_data;
enum CXCursorKind kind = clang_getCursorKind(c);
// The AST node `ImplicitCastExpr` is surfaced in the C API as an
// "unexposed" kind. So if we see an unexposed kind, assume that it
// means `ImplicitCastExpr` and recursively search the children.
if (clang_isUnexposed(kind)) {
return CXChildVisit_Recurse;
}
// For any other kind, we probably have a usable type.
else {
data->underType = clang_getCursorType(c);
data->found = true;
return CXChildVisit_Break;
}
}
// Try to get the type of `c` after skipping any `ImplicitCastExpr`
// nodes. Return true and set `*underType` if we can, and return false
// otherwise.
bool getUnderType(CXCursor c, CXType * /*OUT*/ underType)
{
GetUnderTypeData data;
data.found = false;
clang_visitChildren(c, getUnderTypeVisitor, &data);
if (data.found) {
*underType = data.underType;
return true;
}
else {
return false;
}
}
Inserting the above code into the original question code (plus a couple other fixes), we have:
// ---------------------------- BEGIN ADDED ----------------------------
#include <clang-c/Index.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
// Client data for `getUnderTypeVisitor`.
typedef struct GetUnderTypeData {
// Underlying type, if any.
CXType underType;
// True if we find a type to use.
bool found;
} GetUnderTypeData;
// Visitor for `getUnderType`.
enum CXChildVisitResult getUnderTypeVisitor(
CXCursor c, CXCursor parent, CXClientData client_data)
{
GetUnderTypeData *data = (GetUnderTypeData *)client_data;
enum CXCursorKind kind = clang_getCursorKind(c);
// The AST node `ImplicitCastExpr` is surfaced in the C API as an
// "unexposed" kind. So if we see an unexposed kind, assume that it
// means `ImplicitCastExpr` and recursively search the children.
if (clang_isUnexposed(kind)) {
return CXChildVisit_Recurse;
}
// For any other kind, we probably have a usable type.
else {
data->underType = clang_getCursorType(c);
data->found = true;
return CXChildVisit_Break;
}
}
// Try to get the type of `c` after skipping any `ImplicitCastExpr`
// nodes. Return true and set `*underType` if we can, and return false
// otherwise.
bool getUnderType(CXCursor c, CXType * /*OUT*/ underType)
{
GetUnderTypeData data;
data.found = false;
clang_visitChildren(c, getUnderTypeVisitor, &data);
if (data.found) {
*underType = data.underType;
return true;
}
else {
return false;
}
}
// ----------------------------- END ADDED -----------------------------
static enum CXChildVisitResult visitFuncCalls(CXCursor current_cursor,
CXCursor parent,
CXClientData client_data) {
if (clang_getCursorKind(current_cursor) != CXCursor_CallExpr) {
return CXChildVisit_Recurse;
}
static const char *FUNCTION_NAME = "foo";
const CXString spelling = clang_getCursorSpelling(current_cursor);
if (strcmp(clang_getCString(spelling), FUNCTION_NAME) != 0) {
return CXChildVisit_Recurse;
}
clang_disposeString(spelling);
for (int i = 0; i < clang_Cursor_getNumArguments(current_cursor); i++) {
CXCursor argument = clang_Cursor_getArgument(current_cursor, i);
CXType argument_type = clang_getCursorType(argument);
CXString argument_type_spelling = clang_getTypeSpelling(argument_type);
printf("Argument %d: %s\n", i, clang_getCString(argument_type_spelling));
// -------------------------- BEGIN ADDED --------------------------
CXType underType;
if (getUnderType(argument, &underType)) {
CXString underTypeSpelling = clang_getTypeSpelling(underType);
printf("underType: %s\n", clang_getCString(underTypeSpelling));
clang_disposeString(underTypeSpelling);
}
// --------------------------- END ADDED ---------------------------
clang_disposeString(argument_type_spelling);
}
return CXChildVisit_Continue;
}
int main() {
// --------------------------- BEGIN ADDED ---------------------------
CXIndex index = clang_createIndex(0, 0);
// ---------------------------- END ADDED ----------------------------
const CXTranslationUnit unit = clang_parseTranslationUnit(
index, "file.c", NULL, 0, NULL, 0, CXTranslationUnit_None);
const CXCursor cursor = clang_getTranslationUnitCursor(unit);
clang_visitChildren(cursor, visitFuncCalls, NULL /* client_data*/);
}
When run on the original file.c
, the output is:
Argument 0: int
underType: int
Argument 1: unsigned int
underType: enum test <--- got it
Update: The above code has two bugs:
getUnderType
should check if c
itself can yield a type.
It does not properly handle the case of an enumerator passed directly as an argument.
See this updated answer to the question How do I get the enum type of a clang::EnumConstantDecl? for fixes to those issues.