c++clangllvminline-assemblyclang-plugin

Rewriting MSAsmStmt with llvm plugin segfaults


I have this C code:

#include <inttypes.h>
#include <stdio.h>

uint8_t count1(uint32_t x) {
    int out;
    __asm {
        mov edx, [x]
        mov al, 0
next:   cmp edx, 0
        je done
        mov cl, dl
        and cl, 1
        add al, cl
        shr edx, 1
        jmp next
done:
        mov out, al
    }
    return out;
}

int main() {
    uint32_t x = 0x5789ABCD;
    uint8_t cnt = count1(x);
    printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}

I want to write a plugin for LLVM that will transform it behind the scene to something like this:

#include <inttypes.h>
#include <stdio.h>

uint8_t count1(uint32_t x) {
    int out;
    __asm {
        .file 1 "main.c"
        .loc 1 7
        mov edx, [x]
        .loc 1 8
        mov al, 0
        .loc 1 9
next:   cmp edx, 0
        .loc 1 10
        je done
        .loc 1 11
        mov cl, dl
        .loc 1 12
        and cl, 1
        .loc 1 13
        add al, cl
        .loc 1 14
        shr edx, 1
        .loc 1 15
        jmp next
done:
        .loc 1 17
        mov out, al
    }
    return out;
}

int main() {
    uint32_t x = 0x5789ABCD;
    uint8_t cnt = count1(x);
    printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}

This will enable me to signle step debug through the __asm block. Notice the line numbers increment only by 1. That is intentional, because if the plugin modifies the AST, the line numbers in the original file won't change.

I wrote a plugin that can detect these statement, I can compile it and I can compile this program with clang with that plugin enabled.

But when I attempt to call Rewriter.ReplaceText(SourceRange(StartLoc, EndLoc), ModifiedAsm); the plugin segfaults.

I have tried checking if the ranges are valid (they are). I have tried checking if they don't come from a macro expansion (isMacroId() returns 0).

I have narrowed it down to the Rewriter.getRangeSize(..) function. But I have no idea what I'm doing wrong.

This is the relevant part of the plugin:

class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
  public:
    explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
        : Context(Context), TheRewriter(R) {}

    bool VisitStmt(Stmt *S) {
        if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
            // Get the assembly string
            StringRef AsmString = Asm->getAsmString();

            SourceLocation StartLoc = Asm->getBeginLoc();
            SourceLocation EndLoc = Asm->getEndLoc();

            // This will segfault
            bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
                                                  AsmString);
            llvm::errs() << "Replace result: " << result << "\n";
        }
        return true;
    }

  private:
    ASTContext *Context;
    Rewriter &TheRewriter;
};

Here is the error:

0.      Program arguments: /opt/homebrew/Cellar/llvm@16/16.0.6_1/bin/clang-16 -cc1 -triple x86_64-apple-macosx15.0.0 -Wundef-prefix=TARGET_OS_ -Werror=undef-prefix -Wdeprecated-objc-isa-usage -Werror=deprecated-objc-isa-usage -emit-obj -mrelax-all -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name testfile.c -mrelocation-model pic -pic-level 2 -mframe-pointer=all -ffp-contract=on -fno-rounding-math -funwind-tables=2 -fcompatibility-qualified-id-block-type-checking -fvisibility-inlines-hidden-static-local-var -target-cpu penryn -tune-cpu generic -mllvm -treat-scalable-fixed-error-as-warning -debug-info-kind=standalone -dwarf-version=4 -debugger-tuning=lldb -target-linker-version 1053.12 -fcoverage-compilation-dir=/Users/jurajpetras/dev/asm_debug -resource-dir /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk -internal-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/local/include -internal-isystem /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16/include -internal-externc-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include -O0 -fdebug-compilation-dir=/Users/jurajpetras/dev/asm_debug -ferror-limit 19 -stack-protector 1 -fblocks -fencode-extended-block-signature -fregister-global-dtors-with-atexit -fgnuc-version=4.2.1 -fmax-type-align=16 -fcolor-diagnostics -fasm-blocks -load ./build/libasm_debug.dylib -add-plugin asm_debug -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/folders/88/yqhhhgms02vcwxdp_x2dgbp40000gn/T/testfile-0c3ab4.o -x c testfile.c
1.      <eof> parser at end of file
Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point to it):
0  libLLVM.dylib            0x0000000110344b20 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 56
1  libLLVM.dylib            0x00000001103439a4 llvm::sys::RunSignalHandlers() + 112
2  libLLVM.dylib            0x00000001103451b4 SignalHandler(int) + 360
3  libsystem_platform.dylib 0x0000000199b08184 _sigtramp + 56
4  libasm_debug.dylib       0x00000001091de914 clang::Rewriter::getRangeSize(clang::CharSourceRange const&, clang::Rewriter::RewriteOptions) const + 212
5  libasm_debug.dylib       0x00000001091deb74 clang::Rewriter::getRangeSize(clang::SourceRange, clang::Rewriter::RewriteOptions) const + 36
6  libasm_debug.dylib       0x0000000108acc49c clang::Rewriter::ReplaceText(clang::SourceRange, llvm::StringRef) + 100
7  libasm_debug.dylib       0x0000000108acc30c (anonymous namespace)::MyASTVisitor::VisitStmt(clang::Stmt*) + 180
8  libasm_debug.dylib       0x0000000108acc22c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromStmt(clang::Stmt*) + 36
9  libasm_debug.dylib       0x0000000108acc170 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromAsmStmt(clang::AsmStmt*) + 48
10 libasm_debug.dylib       0x0000000108aa3b3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromMSAsmStmt(clang::MSAsmStmt*) + 48
11 libasm_debug.dylib       0x0000000108aa3978 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseMSAsmStmt(clang::MSAsmStmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 80
12 libasm_debug.dylib       0x0000000108aa1184 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::dataTraverseNode(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 144
13 libasm_debug.dylib       0x0000000108a67288 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 672
14 libasm_debug.dylib       0x0000000108b4ef78 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionHelper(clang::FunctionDecl*) + 1404
15 libasm_debug.dylib       0x0000000108a61c3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionDecl(clang::FunctionDecl*) + 128
16 libasm_debug.dylib       0x0000000108a58914 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 2852
17 libasm_debug.dylib       0x0000000108ae473c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDeclContextHelper(clang::DeclContext*) + 216
18 libasm_debug.dylib       0x0000000108a6684c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseTranslationUnitDecl(clang::TranslationUnitDecl*) + 564
19 libasm_debug.dylib       0x0000000108a58f14 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 4388
20 libasm_debug.dylib       0x0000000108a57d8c (anonymous namespace)::MyASTConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
21 libclang-cpp.dylib       0x00000001066383a8 clang::MultiplexConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
22 libclang-cpp.dylib       0x0000000104911fa8 clang::ParseAST(clang::Sema&, bool, bool) + 752
23 libclang-cpp.dylib       0x00000001065ff750 clang::FrontendAction::Execute() + 112
24 libclang-cpp.dylib       0x0000000106582da8 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 868
25 libclang-cpp.dylib       0x0000000106677440 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 524
26 clang-16                 0x00000001004c2ca4 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) + 1464
27 clang-16                 0x00000001004bfb50 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) + 948
28 clang-16                 0x00000001004beddc clang_main(int, char**) + 11192
29 dyld                     0x0000000199750274 start + 2840
clang-16: error: unable to execute command: Segmentation fault: 11

I can read the AST however I want. I can print whatever I want. The segfault only happens when the I call the Rewriter.

I am using M2 MacBook Pro and compiling like this:

clang -Xclang -load -Xclang ./build/libasm_debug.dylib -Xclang -add-plugin -Xclang asm_debug testfile.c -target x86_64-apple-macos -fasm-blocks -g -O0

The whole code can be found here: https://github.com/Hackder/asm_debug

Is my approach even correct? If so, why is it hitting a segfault?

Minimum reproducible example:

src/main.cpp:

#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/Rewrite/Frontend/Rewriters.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;

namespace {

class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
  public:
    explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
        : Context(Context), TheRewriter(R) {}

    bool VisitStmt(Stmt *S) {
        if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
            // Get the assembly string
            StringRef AsmString = Asm->getAsmString();

            SourceLocation StartLoc = Asm->getBeginLoc();
            SourceLocation EndLoc = Asm->getEndLoc();

            // This will segfault
            bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
                                                  AsmString);
            llvm::errs() << "Replace result: " << result << "\n";
        }
        return true;
    }

  private:
    ASTContext *Context;
    Rewriter &TheRewriter;
};

class MyASTConsumer : public ASTConsumer {
  public:
    explicit MyASTConsumer(ASTContext *Context, Rewriter R)
        : Visitor(Context, R) {}

    virtual void HandleTranslationUnit(ASTContext &Context) override {
        Visitor.TraverseDecl(Context.getTranslationUnitDecl());
    }

  private:
    MyASTVisitor Visitor;
};

class MyPluginAction : public PluginASTAction {
  protected:
    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                   llvm::StringRef) override {
        TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
        return std::make_unique<MyASTConsumer>(&CI.getASTContext(),
                                               TheRewriter);
    }

    bool ParseArgs(const CompilerInstance &CI,
                   const std::vector<std::string> &args) override {
        return true;
    }

  private:
    Rewriter TheRewriter;
};

} // namespace

static FrontendPluginRegistry::Add<MyPluginAction>
    X("asm_debug", "Inject debug steps into inline assembly");

CMakeLists.txt:

cmake_minimum_required(VERSION 3.13)

project(MyClangPlugin LANGUAGES CXX C)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_OSX_ARCHITECTURES "arm64")

find_package(Clang REQUIRED CONFIG)
include_directories(${CLANG_INCLUDE_DIRS})

find_package(LLVM REQUIRED CONFIG)
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)

add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})

add_library(asm_debug SHARED src/main.cpp)
set_target_properties(asm_debug PROPERTIES
    COMPILE_FLAGS "-fno-rtti"
    LINK_FLAGS "-shared"
)

target_link_libraries(asm_debug PRIVATE LLVM clang)
target_link_libraries(asm_debug PRIVATE LLVM clangSupport clangFrontend clangAST clangBasic clangRewrite)

Tested with clang 16.0.6 and 19.1.1


Solution

  • First bug: Passing Rewriter by value

    The first bug is here:

    class MyASTConsumer : public ASTConsumer {
      public:
        explicit MyASTConsumer(ASTContext *Context, Rewriter R)   // <--- bug
            : Visitor(Context, R) {}
    

    This accepts a Rewriter by value, then passes it as a reference to the constructor of Visitor. Then, R is destroyed, leaving Visitor with a dangling reference to a destroyed object.

    To fix this, change R to be a reference:

        explicit MyASTConsumer(ASTContext *Context, Rewriter &R)
                                                             ^ inserted
    

    Second bug: Plugin action gets destroyed early

    After fixing the above, the plugin works properly with -plugin, but with -add-plugin, it still crashes at ReplaceText. The cause is that the Clang plugin infrastructure creates and destroys the PluginASTAction objects seemingly haphazardly (certainly nothing about this is explained in the documentation), and in the -add-plugin case, the object used (along with its Rewriter) gets destroyed before the rest of the code runs (whereas it survives with -plugin).

    The fix is to not store any data in the PluginASTAction, and instead move that data into the ASTConsumer (or somewhere else that will survive). Here is a diff relative to the original (unfixed) code that solves that problem, also incidentally removing the first bug:

    @@ -44,14 +44,16 @@ class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
     
     class MyASTConsumer : public ASTConsumer {
       public:
    -    explicit MyASTConsumer(ASTContext *Context, Rewriter R)
    -        : Visitor(Context, R) {}
    +    explicit MyASTConsumer(ASTContext *Context)
    +        : TheRewriter(Context->getSourceManager(), Context->getLangOpts()),
    +          Visitor(Context, TheRewriter) {}
     
         virtual void HandleTranslationUnit(ASTContext &Context) override {
             Visitor.TraverseDecl(Context.getTranslationUnitDecl());
         }
     
       private:
    +    Rewriter TheRewriter;
         MyASTVisitor Visitor;
     };
     
    @@ -59,18 +61,13 @@ class MyPluginAction : public PluginASTAction {
       protected:
         std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                        llvm::StringRef) override {
    -        TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
    -        return std::make_unique<MyASTConsumer>(&CI.getASTContext(),
    -                                               TheRewriter);
    +        return std::make_unique<MyASTConsumer>(&CI.getASTContext());
         }
     
         bool ParseArgs(const CompilerInstance &CI,
                        const std::vector<std::string> &args) override {
             return true;
         }
    -
    -  private:
    -    Rewriter TheRewriter;
     };
     
     } // namespace
    

    Post-fixes test

    With the code in the question, I get a very similar crash to that shown in the question when run on Linux. After applying the above diff, the plugin appears to work as intended (on Linux at least) when provided the test input from the question:

    $ g++ -c -o main.o src/main.cpp  -I/home/scott/opt/clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04/include -std=c++17 -fPIC -fno-rtti -Wall
    $ g++ -o rewrite.so main.o  -fPIC -shared
    $ rm test.o
    $ /home/scott/opt/clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang -fplugin=./rewrite.so -Xclang -add-plugin -Xclang asm_debug -c test.c -fasm-blocks
    Replace result: 0
    $ ls test.o
    test.o
    

    This sequence confirms that test.o gets created, which is the primary difference between -plugin (which only runs the plugin) and -add-plugin (which runs the plugin along with the rest of the Clang back end).