I wanted to know variable dependence in a real register (like X86:EAX, EBX ...). So, I have created an IR-PASS that can identify dependencies on the IR. This pass uses the newly added variables unsigned HasDependency: 1;
and unsigned HasMaybeDependency: 1;
in the Value
class.
.
.
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
unsigned HasHungOffUses : 1;
unsigned HasDescriptor : 1;
unsigned HasDependency : 1;
unsigned HasMaybeDependency : 1;
.
.
.
void setDependency() { HasDependency = true; }
void setMaybeDependency() { HasMaybeDependency = true; }
bool hasDependency() const { return HasDependency; }
bool hasMaybeDependency() const { return HasMaybeDependency; }
//static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned),
// "Value too big");
When applied to a code snippet like this:
extern int foo_called(int a);
int foo(int k)
{
int __attribute__((annotate("xxx"))) a;
for (int i = 0; i < k; i++)
{
int c = a + k;
a += foo_called(c);
}
return 0;
}
which produces this bitcode:
define i32 @"\01?foo@@YAHH@Z"(i32 %k) local_unnamed_addr #0 {
entry:
%a = alloca i32, align 4
%0 = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2
call void @llvm.var.annotation(i8* nonnull %0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i32 0, i32 0), i32 17)
%cmp7 = icmp sgt i32 %k, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%.pre = load i32, i32* %a, align 4, !tbaa !3
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2
ret i32 0
for.body: ; preds = %for.body, %for.body.lr.ph
%1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
%i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%add = add nsw i32 %1, %k
%call = call i32 @"\01?foo_called@@YAHH@Z"(i32 %add)
%2 = load i32, i32* %a, align 4, !tbaa !3
%add2 = add nsw i32 %2, %call
store i32 %add2, i32* %a, align 4, !tbaa !3
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %k
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
declare i32 @"\01?foo_called@@YAHH@Z"(i32) local_unnamed_addr #3
The result of the the pass on the above bitcode is:
Function - ?foo@@YAHH@Z
Annotated Variable List :
- Annotated : a(message: xxx)
Annotated-Variable : a
(Perpect) %add2 = add nsw i32 %2, %call
(Perpect) %2 = load i32, i32* %a, align 4, !tbaa !3
(Perpect) %a = alloca i32, align 4
(Perpect) %cmp7 = icmp sgt i32 %k, 0
(Maybe) %exitcond = icmp eq i32 %inc, %k
(Maybe) %inc = add nuw nsw i32 %i.08, 1
(Maybe) %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
(Perpect) %call = call i32 @"\01?foo_called@@YAHH@Z"(i32 %add)
(Perpect) %add = add nsw i32 %1, %k
(Perpect) %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
(Perpect) %.pre = load i32, i32* %a, align 4, !tbaa !3
I followed the SelectionDAGISel.cpp: SelectAllBasicBlocks
function to get information from the backend, but I was able to get only AllocaInst
, StoreInst
, and LoadInst
using as follows:
for (MachineBasicBlock &MBB : mf) {
for (MachineInstr& I : MBB) {
for (MachineInstr::mmo_iterator i = I.memoperands_begin(),
e = I.memoperands_end();
i != e; ++i) {
if (const Value *V = (*i)->getValue())
errs() << *V << "\n";
}
}
}
How do I know the correlation between MachineInstr
and Instruction
? If it is not provided in LLVM, which parts need to be fixed?
This is not normal. This is an trick. But I am using this method very usefully. If you know the normal way, please give me a comment.
I solved this problem using DebugLoc
. It is used to represent the line-column-row, function-name etc... information of .c
, .cpp
files. This information will remain from the time of ;;vm-ir
until MachineInstr
.
So, if it is guaranteed that DebugLoc
is not used in your compiler processing, you can put the address of the class that contains the information needed for the row information. This will allow you to cast the DebugLoc
row to the desired class at the right time. (You can use column, because column must less than 2^16.)
The following describes in detail the method I used.
Several design patterns were used to maximize memory efficiency, so I could not easily change the class.
First, modify DebugLoc-print
routine. GOTO DebugLoc.cpp
and delete DIScope
print routine like this. This processing save you form runtime-error.
void DebugLoc::print(raw_ostream &OS) const {
if (!Loc)
return;
// Print source line info.
//auto *Scope = cast<DIScope>(getScope());
//OS << Scope->getFilename();
OS << ':' << getLine();
if (getCol() != 0)
OS << ':' << getCol();
Second, The verifier should be modified. This syntax will be helpful.
void Verifier::visitDILocation(const DILocation &N) {
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "location requires a valid scope", &N, N.getRawScope());
+ //AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ // "location requires a valid scope", &N, N.getRawScope());
if (auto *IA = N.getRawInlinedAt())
AssertDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
}
Third, there are some formal steps to register a class in DebugLoc
. Create initialize function for this.
static LLVMContext cnt;
static MDNode *md;
md = MDNode::get(cnt, DILocation::get(cnt, 100, 100, DIScope::get(cnt, nullptr)));
Last, create register function.
static DebugLoc getDebugLoc(DependencyInstrInfoManager *info)
{
return DebugLoc::get(reinterpret_cast<unsigned> (info), (uint16_t)-1, md);
}
static void setDebugLoc(Instruction *I, ...)
{
DependencyInstrInfoManager *mgr;
if (I->getDebugLoc()) {
mgr = reinterpret_cast<DependencyInstrInfoManager *>
(I->getDebugLoc()->getLine());
} else {
mgr = new DependencyInstrInfoManager();
I->setDebugLoc(getDebugLoc(mgr));
}
mgr->addInfo(new DependencyInstrInfo(I, S, T, ...));
}
DependencyInstrInfoManager
is the class for answering the above questions.
Finally, you can print your own information in XXXMCInstLower.cpp:EmitInstruction();
(like X86MCInstLower.cpp). The following statement is an example of the output of my case.
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
if (MI->getDebugLoc()) {
DependencyInstrInfoManager *mgr = reinterpret_cast<DependencyInstrInfoManager *>
(MI->getDebugLoc()->getLine());
mgr->doFolding();
for (auto DI : *mgr)
OutStreamer->AddComment(DI->getInfo());
}
I have done dependency marking using this method.
int foo(int k)
{
int ANNORATE("b") b = 0;
int ANNORATE("a") a = 0;
for (int i = 0; i < k; i++)
{
int c = a + k;
int d = b + k;
a += foo_called(c);
b += foo_called2(c);
}
return a + foo_called(b);
}
to
# BB#1: # %for.body.preheader
movl %esi, %ebx
.p2align 4, 0x90
LBB0_2 : # %for.body
# =>This Inner Loop Header: Depth=1
addl %esi, %edi # [Perpect, Source:b]
# [Perpect, Source: a]
pushl %edi # [Maybe, Source:b]
# [Perpect, Source: a]
calll "?foo_called@@YAHH@Z" # [Maybe, Source:b]
# [Perpect, Source: a]
addl $4, %esp # [Maybe, Source:b]
# [Perpect, Source: a]
addl %eax, 4(%esp)
pushl %edi # [Perpect, Source:b]
calll "?foo_called2@@YAHH@Z" # [Perpect, Source:b]
addl $4, %esp # [Perpect, Source:b]
addl(%esp), %eax # [Annotated, Source:b]
movl 4(%esp), %edi # [Perpect, Source:b]
# [Perpect, Source: a]
decl %ebx # [Maybe, Source:b]
movl %eax, (%esp)
jne LBB0_2
jmp LBB0_4