What is the compiler doing in the beginning of the switch statement (snippet section below) to come up with the address in %rax so it can notrack jmpq *%rax
to the correct offset ?
Are the constants 0xe07
and 0xdfb
padding ?
lea 0xe07(%rip),%rax
lea 0xdfb(%rip),%rdx
10 [1] switch (c) {
0x5555555551ec <+ 35> 0f be 45 fc movsbl -0x4(%rbp),%eax
0x5555555551f0 <+ 39> 83 e8 2d sub $0x2d,%eax
0x5555555551f3 <+ 42> 83 f8 32 cmp $0x32,%eax
0x5555555551f6 <+ 45> 0f 87 18 01 00 00 ja 0x555555555314 <aa2i(char)+331>
0x5555555551fc <+ 51> 89 c0 mov %eax,%eax
0x5555555551fe <+ 53> 48 8d 14 85 00 00 00 00 lea 0x0(,%rax,4),%rdx
0x555555555206 <+ 61> 48 8d 05 07 0e 00 00 lea 0xe07(%rip),%rax # 0x555555556014
0x55555555520d <+ 68> 8b 04 02 mov (%rdx,%rax,1),%eax
0x555555555210 <+ 71> 48 98 cltq
0x555555555212 <+ 73> 48 8d 15 fb 0d 00 00 lea 0xdfb(%rip),%rdx # 0x555555556014
0x555555555219 <+ 80> 48 01 d0 add %rdx,%rax
0x55555555521c <+ 83> 3e ff e0 notrack jmpq *%rax
#include <QCoreApplication>
const int ANY=20; //number representing an X (any amino acid) internally
const int GAP=21; //number representing a gap internally
char aa2i(char c) {
//A R N D C Q E G H I L K M F P S T W Y V
if (c >= 'a' && c <= 'z') c += 'A' - 'a';
switch (c) {
case 'A':
return 0;
case 'R':
return 1;
case 'N':
return 2;
case 'D':
return 3;
case 'C':
return 4;
case 'Q':
return 5;
case 'E':
return 6;
case 'G':
return 7;
case 'H':
return 8;
case 'I':
return 9;
case 'L':
return 10;
case 'K':
return 11;
case 'M':
return 12;
case 'F':
return 13;
case 'P':
return 14;
case 'S':
return 15;
case 'T':
return 16;
case 'W':
return 17;
case 'Y':
return 18;
case 'V':
return 19;
case 'X':
return ANY;
case 'J':
return ANY;
case 'O':
return ANY;
case 'U':
return 4; //Selenocystein -> Cystein
case 'B':
return 3; //D (or N)
case 'Z':
return 6; //E (or Q)
case '-':
return GAP;
case '.':
return GAP;
case '_':
return GAP;
}
if (c >= 0 && c <= 32) return -1; // white space and control characters
return -2;
}
int main(int argc, char *argv[])
{
aa2i('R');
}
6 [1] char aa2i(char c) {
0x5555555551c9 f3 0f 1e fa endbr64
0x5555555551cd <+ 4> 55 push %rbp
0x5555555551ce <+ 5> 48 89 e5 mov %rsp,%rbp
0x5555555551d1 <+ 8> 89 f8 mov %edi,%eax
0x5555555551d3 <+ 10> 88 45 fc mov %al,-0x4(%rbp)
9 [1] if (c >= 'a' && c <= 'z') c += 'A' - 'a';
0x5555555551d6 <+ 13> 80 7d fc 60 cmpb $0x60,-0x4(%rbp)
0x5555555551da <+ 17> 7e 10 jle 0x5555555551ec <aa2i(char)+35>
0x5555555551dc <+ 19> 80 7d fc 7a cmpb $0x7a,-0x4(%rbp)
0x5555555551e0 <+ 23> 7f 0a jg 0x5555555551ec <aa2i(char)+35>
0x5555555551e2 <+ 25> 0f b6 45 fc movzbl -0x4(%rbp),%eax
0x5555555551e6 <+ 29> 83 e8 20 sub $0x20,%eax
0x5555555551e9 <+ 32> 88 45 fc mov %al,-0x4(%rbp)
10 [1] switch (c) {
0x5555555551ec <+ 35> 0f be 45 fc movsbl -0x4(%rbp),%eax
0x5555555551f0 <+ 39> 83 e8 2d sub $0x2d,%eax
0x5555555551f3 <+ 42> 83 f8 32 cmp $0x32,%eax
0x5555555551f6 <+ 45> 0f 87 18 01 00 00 ja 0x555555555314 <aa2i(char)+331>
0x5555555551fc <+ 51> 89 c0 mov %eax,%eax
0x5555555551fe <+ 53> 48 8d 14 85 00 00 00 00 lea 0x0(,%rax,4),%rdx
0x555555555206 <+ 61> 48 8d 05 07 0e 00 00 lea 0xe07(%rip),%rax # 0x555555556014
0x55555555520d <+ 68> 8b 04 02 mov (%rdx,%rax,1),%eax
0x555555555210 <+ 71> 48 98 cltq
0x555555555212 <+ 73> 48 8d 15 fb 0d 00 00 lea 0xdfb(%rip),%rdx # 0x555555556014
0x555555555219 <+ 80> 48 01 d0 add %rdx,%rax
0x55555555521c <+ 83> 3e ff e0 notrack jmpq *%rax
12 [1] return 0;
0x55555555521f <+ 86> b8 00 00 00 00 mov $0x0,%eax
0x555555555224 <+ 91> e9 03 01 00 00 jmpq 0x55555555532c <aa2i(char)+355>
14 [1] return 1;
0x555555555229 <+ 96> b8 01 00 00 00 mov $0x1,%eax
0x55555555522e <+ 101> e9 f9 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
16 [1] return 2;
0x555555555233 <+ 106> b8 02 00 00 00 mov $0x2,%eax
0x555555555238 <+ 111> e9 ef 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
18 [1] return 3;
0x55555555523d <+ 116> b8 03 00 00 00 mov $0x3,%eax
0x555555555242 <+ 121> e9 e5 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
20 [1] return 4;
0x555555555247 <+ 126> b8 04 00 00 00 mov $0x4,%eax
0x55555555524c <+ 131> e9 db 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
22 [1] return 5;
0x555555555251 <+ 136> b8 05 00 00 00 mov $0x5,%eax
0x555555555256 <+ 141> e9 d1 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
24 [1] return 6;
0x55555555525b <+ 146> b8 06 00 00 00 mov $0x6,%eax
0x555555555260 <+ 151> e9 c7 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
26 [1] return 7;
0x555555555265 <+ 156> b8 07 00 00 00 mov $0x7,%eax
0x55555555526a <+ 161> e9 bd 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
28 [1] return 8;
0x55555555526f <+ 166> b8 08 00 00 00 mov $0x8,%eax
0x555555555274 <+ 171> e9 b3 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
30 [1] return 9;
0x555555555279 <+ 176> b8 09 00 00 00 mov $0x9,%eax
0x55555555527e <+ 181> e9 a9 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
32 [1] return 10;
0x555555555283 <+ 186> b8 0a 00 00 00 mov $0xa,%eax
0x555555555288 <+ 191> e9 9f 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
34 [1] return 11;
0x55555555528d <+ 196> b8 0b 00 00 00 mov $0xb,%eax
0x555555555292 <+ 201> e9 95 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
36 [1] return 12;
0x555555555297 <+ 206> b8 0c 00 00 00 mov $0xc,%eax
0x55555555529c <+ 211> e9 8b 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
38 [1] return 13;
0x5555555552a1 <+ 216> b8 0d 00 00 00 mov $0xd,%eax
0x5555555552a6 <+ 221> e9 81 00 00 00 jmpq 0x55555555532c <aa2i(char)+355>
40 [1] return 14;
0x5555555552ab <+ 226> b8 0e 00 00 00 mov $0xe,%eax
0x5555555552b0 <+ 231> eb 7a jmp 0x55555555532c <aa2i(char)+355>
42 [1] return 15;
0x5555555552b2 <+ 233> b8 0f 00 00 00 mov $0xf,%eax
0x5555555552b7 <+ 238> eb 73 jmp 0x55555555532c <aa2i(char)+355>
44 [1] return 16;
0x5555555552b9 <+ 240> b8 10 00 00 00 mov $0x10,%eax
0x5555555552be <+ 245> eb 6c jmp 0x55555555532c <aa2i(char)+355>
46 [1] return 17;
0x5555555552c0 <+ 247> b8 11 00 00 00 mov $0x11,%eax
0x5555555552c5 <+ 252> eb 65 jmp 0x55555555532c <aa2i(char)+355>
48 [1] return 18;
0x5555555552c7 <+ 254> b8 12 00 00 00 mov $0x12,%eax
0x5555555552cc <+ 259> eb 5e jmp 0x55555555532c <aa2i(char)+355>
50 [1] return 19;
0x5555555552ce <+ 261> b8 13 00 00 00 mov $0x13,%eax
0x5555555552d3 <+ 266> eb 57 jmp 0x55555555532c <aa2i(char)+355>
52 [1] return ANY;
0x5555555552d5 <+ 268> b8 14 00 00 00 mov $0x14,%eax
0x5555555552da <+ 273> eb 50 jmp 0x55555555532c <aa2i(char)+355>
54 [1] return ANY;
0x5555555552dc <+ 275> b8 14 00 00 00 mov $0x14,%eax
0x5555555552e1 <+ 280> eb 49 jmp 0x55555555532c <aa2i(char)+355>
56 [1] return ANY;
0x5555555552e3 <+ 282> b8 14 00 00 00 mov $0x14,%eax
0x5555555552e8 <+ 287> eb 42 jmp 0x55555555532c <aa2i(char)+355>
58 [1] return 4; //Selenocystein -> Cystein
0x5555555552ea <+ 289> b8 04 00 00 00 mov $0x4,%eax
0x5555555552ef <+ 294> eb 3b jmp 0x55555555532c <aa2i(char)+355>
60 [1] return 3; //D (or N)
0x5555555552f1 <+ 296> b8 03 00 00 00 mov $0x3,%eax
0x5555555552f6 <+ 301> eb 34 jmp 0x55555555532c <aa2i(char)+355>
62 [1] return 6; //E (or Q)
0x5555555552f8 <+ 303> b8 06 00 00 00 mov $0x6,%eax
0x5555555552fd <+ 308> eb 2d jmp 0x55555555532c <aa2i(char)+355>
64 [1] return GAP;
0x5555555552ff <+ 310> b8 15 00 00 00 mov $0x15,%eax
0x555555555304 <+ 315> eb 26 jmp 0x55555555532c <aa2i(char)+355>
66 [1] return GAP;
0x555555555306 <+ 317> b8 15 00 00 00 mov $0x15,%eax
0x55555555530b <+ 322> eb 1f jmp 0x55555555532c <aa2i(char)+355>
68 [1] return GAP;
0x55555555530d <+ 324> b8 15 00 00 00 mov $0x15,%eax
0x555555555312 <+ 329> eb 18 jmp 0x55555555532c <aa2i(char)+355>
70 [1] if (c >= 0 && c <= 32) return -1; // white space and control characters
0x555555555314 <+ 331> 80 7d fc 00 cmpb $0x0,-0x4(%rbp)
0x555555555318 <+ 335> 78 0d js 0x555555555327 <aa2i(char)+350>
0x55555555531a <+ 337> 80 7d fc 20 cmpb $0x20,-0x4(%rbp)
0x55555555531e <+ 341> 7f 07 jg 0x555555555327 <aa2i(char)+350>
0x555555555320 <+ 343> b8 ff ff ff ff mov $0xffffffff,%eax
0x555555555325 <+ 348> eb 05 jmp 0x55555555532c <aa2i(char)+355>
71 [1] return -2;
0x555555555327 <+ 350> b8 fe ff ff ff mov $0xfffffffe,%eax
72 [1] }
0x55555555532c <+ 355> 5d pop %rbp
0x55555555532d <+ 356> c3 retq
This is the so called RIP-Relative Addressing (described in Intel Volume 2. Chapter 2.2.1.6). Basically it is adding the offset (0xe07
and 0xdfb
) to the address of the next instruction. Most probably this is the location where the jump table is located, and it looks like it is just after the code of the function.
In first case address of the next instruction is 0x55555555520d
+ 0xe07
= 0x555555556014
.
In second case address of the next instruction is 0x555555555219
+ 0xdfb
= 0x555555556014
.