llvmllvm-irllc

How to get Instruction in MachineInstr?


I wanted to know variable dependence in a real register (like X86:EAX, EBX ...). So, I have created an IR-PASS that can identify dependencies on the IR. This pass uses the newly added variables unsigned HasDependency: 1; and unsigned HasMaybeDependency: 1; in the Value class.

      .
      .
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
unsigned HasHungOffUses : 1;
unsigned HasDescriptor : 1;
unsigned HasDependency : 1;
unsigned HasMaybeDependency : 1;
      .
      .
      .
void setDependency() { HasDependency = true; }
void setMaybeDependency() { HasMaybeDependency = true; }
bool hasDependency() const { return HasDependency; }
bool hasMaybeDependency() const { return HasMaybeDependency; }

  //static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned),
  //              "Value too big");

When applied to a code snippet like this:

extern int foo_called(int a);

int foo(int k)
{
    int __attribute__((annotate("xxx"))) a;
    for (int i = 0; i < k; i++)
    {
        int c = a + k;
        a += foo_called(c);
    }
    return 0;
}

which produces this bitcode:

define i32 @"\01?foo@@YAHH@Z"(i32 %k) local_unnamed_addr #0 {
entry:
  %a = alloca i32, align 4
  %0 = bitcast i32* %a to i8*
  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2
  call void @llvm.var.annotation(i8* nonnull %0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i32 0, i32 0), i32 17)
  %cmp7 = icmp sgt i32 %k, 0
  br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph:                                   ; preds = %entry
  %.pre = load i32, i32* %a, align 4, !tbaa !3
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2
  ret i32 0

for.body:                                         ; preds = %for.body, %for.body.lr.ph
  %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %add = add nsw i32 %1, %k
  %call = call i32 @"\01?foo_called@@YAHH@Z"(i32 %add)
  %2 = load i32, i32* %a, align 4, !tbaa !3
  %add2 = add nsw i32 %2, %call
  store i32 %add2, i32* %a, align 4, !tbaa !3
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, %k
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

declare i32 @"\01?foo_called@@YAHH@Z"(i32) local_unnamed_addr #3

The result of the the pass on the above bitcode is:

Function - ?foo@@YAHH@Z
    Annotated Variable List :
        - Annotated : a(message: xxx)

    Annotated-Variable : a
        (Perpect)  %add2 = add nsw i32 %2, %call
        (Perpect)  %2 = load i32, i32* %a, align 4, !tbaa !3
        (Perpect)  %a = alloca i32, align 4
        (Perpect)  %cmp7 = icmp sgt i32 %k, 0
        (Maybe)  %exitcond = icmp eq i32 %inc, %k
        (Maybe)  %inc = add nuw nsw i32 %i.08, 1
        (Maybe)  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
        (Perpect)  %call = call i32 @"\01?foo_called@@YAHH@Z"(i32 %add)
        (Perpect)  %add = add nsw i32 %1, %k
        (Perpect)  %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
        (Perpect)  %.pre = load i32, i32* %a, align 4, !tbaa !3

I followed the SelectionDAGISel.cpp: SelectAllBasicBlocks function to get information from the backend, but I was able to get only AllocaInst, StoreInst, and LoadInst using as follows:

for (MachineBasicBlock &MBB : mf) {
  for (MachineInstr& I : MBB) {
    for (MachineInstr::mmo_iterator i = I.memoperands_begin(), 
      e = I.memoperands_end();
      i != e; ++i) {
      if (const Value *V = (*i)->getValue())
        errs() << *V << "\n";
    }
  }
}

How do I know the correlation between MachineInstr and Instruction? If it is not provided in LLVM, which parts need to be fixed?


Solution

  • This is not normal. This is an trick. But I am using this method very usefully. If you know the normal way, please give me a comment.

    I solved this problem using DebugLoc. It is used to represent the line-column-row, function-name etc... information of .c, .cpp files. This information will remain from the time of ;;vm-ir until MachineInstr.

    So, if it is guaranteed that DebugLoc is not used in your compiler processing, you can put the address of the class that contains the information needed for the row information. This will allow you to cast the DebugLoc row to the desired class at the right time. (You can use column, because column must less than 2^16.)

    The following describes in detail the method I used.

    Change file and Re-Build your project.

    Several design patterns were used to maximize memory efficiency, so I could not easily change the class.

    First, modify DebugLoc-print routine. GOTO DebugLoc.cpp and delete DIScope print routine like this. This processing save you form runtime-error.

    void DebugLoc::print(raw_ostream &OS) const {
      if (!Loc)
        return;
    
      // Print source line info.
      //auto *Scope = cast<DIScope>(getScope());
      //OS << Scope->getFilename();
      OS << ':' << getLine();
      if (getCol() != 0)
        OS << ':' << getCol();
    

    Second, The verifier should be modified. This syntax will be helpful.

    void Verifier::visitDILocation(const DILocation &N) {
    -  AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
    -           "location requires a valid scope", &N, N.getRawScope());
    +  //AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
    +  //         "location requires a valid scope", &N, N.getRawScope());
       if (auto *IA = N.getRawInlinedAt())
         AssertDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
    }
    

    Third, there are some formal steps to register a class in DebugLoc. Create initialize function for this.

    static LLVMContext cnt;
    static MDNode *md;
    
    md = MDNode::get(cnt, DILocation::get(cnt, 100, 100, DIScope::get(cnt, nullptr)));
    

    Last, create register function.

    static DebugLoc getDebugLoc(DependencyInstrInfoManager *info)
    {
      return DebugLoc::get(reinterpret_cast<unsigned> (info), (uint16_t)-1, md);
    }
    
    static void setDebugLoc(Instruction *I, ...)
    {
      DependencyInstrInfoManager *mgr;
      if (I->getDebugLoc()) {
        mgr = reinterpret_cast<DependencyInstrInfoManager *>
          (I->getDebugLoc()->getLine());
      } else {
        mgr = new DependencyInstrInfoManager();
        I->setDebugLoc(getDebugLoc(mgr));
      }
      mgr->addInfo(new DependencyInstrInfo(I, S, T, ...));
    }
    

    DependencyInstrInfoManager is the class for answering the above questions.

    Finally, you can print your own information in XXXMCInstLower.cpp:EmitInstruction();(like X86MCInstLower.cpp). The following statement is an example of the output of my case.

    void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
      X86MCInstLower MCInstLowering(*MF, *this);
      const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
    
      if (MI->getDebugLoc()) {
        DependencyInstrInfoManager *mgr = reinterpret_cast<DependencyInstrInfoManager *>
          (MI->getDebugLoc()->getLine());
        mgr->doFolding();
        for (auto DI : *mgr)
          OutStreamer->AddComment(DI->getInfo());
      }
    

    Dependency Marking

    I have done dependency marking using this method.

    int foo(int k)
    {
      int ANNORATE("b") b = 0;
      int ANNORATE("a") a = 0;
    
      for (int i = 0; i < k; i++)
      {
        int c = a + k;
        int d = b + k;
        a += foo_called(c);
        b += foo_called2(c);
      }
    
      return a + foo_called(b);
    }
    

    to

    # BB#1:                                 # %for.body.preheader
    movl    %esi, %ebx
    .p2align    4, 0x90
    LBB0_2 : # %for.body
    # =>This Inner Loop Header: Depth=1
      addl  %esi, %edi              # [Perpect, Source:b]
    # [Perpect, Source: a]
      pushl %edi                    # [Maybe, Source:b]
    # [Perpect, Source: a]
      calll "?foo_called@@YAHH@Z"   # [Maybe, Source:b]
    # [Perpect, Source: a]
      addl  $4, %esp                # [Maybe, Source:b]
    # [Perpect, Source: a]
      addl  %eax, 4(%esp)
      pushl %edi                    # [Perpect, Source:b]
      calll "?foo_called2@@YAHH@Z"  # [Perpect, Source:b]
      addl  $4, %esp                # [Perpect, Source:b]
      addl(%esp), %eax            # [Annotated, Source:b]
      movl  4(%esp), %edi           # [Perpect, Source:b]
    # [Perpect, Source: a]
      decl  %ebx                    # [Maybe, Source:b]
      movl  %eax, (%esp)
      jne   LBB0_2
      jmp   LBB0_4