test.S
.text
.global _start
_start:
xor %ax, %ax
mov %ax, %ds
mov %ax, %ss
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
I got the disassembly code file by doing this
$ x86_64-elf-gcc -g -c -O0 -m32 -fno-pie -fno-stack-protector -fno-asynchronous-unwind-tables .\test.S
$ x86_64-elf-ld .\test.o -m elf_i386 -Ttext=0x7c00 -o test.elf
$ x86_64-elf-objdump -x -d -S -m i386 ./test.elf > test_dis.txt
test_dis.txt
./test.elf: file format elf32-i386
./test.elf
architecture: i386, flags 0x00000112:
EXEC_P, HAS_SYMS, D_PAGED
start address 0x00007c00
Program Header:
LOAD off 0x00000000 vaddr 0x00007000 paddr 0x00007000 align 2**12
filesz 0x00000c0d memsz 0x00000c0d flags r-x
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 0000000d 00007c00 00007c00 00000c00 2**0
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .debug_aranges 00000020 00000000 00000000 00000c10 2**3
CONTENTS, READONLY, DEBUGGING
2 .debug_info 00000049 00000000 00000000 00000c30 2**0
CONTENTS, READONLY, DEBUGGING
3 .debug_abbrev 00000014 00000000 00000000 00000c79 2**0
CONTENTS, READONLY, DEBUGGING
4 .debug_line 0000003b 00000000 00000000 00000c8d 2**0
CONTENTS, READONLY, DEBUGGING
SYMBOL TABLE:
00007c00 l d .text 00000000 .text
00000000 l d .debug_aranges 00000000 .debug_aranges
00000000 l d .debug_info 00000000 .debug_info
00000000 l d .debug_abbrev 00000000 .debug_abbrev
00000000 l d .debug_line 00000000 .debug_line
00007c00 g .text 00000000 _start
00008c0d g .text 00000000 __bss_start
00008c0d g .text 00000000 _edata
00008c10 g .text 00000000 _end
Disassembly of section .text:
00007c00 <_start>:
.text
.global _start
_start:
xor %ax, %ax
7c00: 66 31 c0 xor %ax,%ax
mov %ax, %ds
7c03: 8e d8 mov %eax,%ds
mov %ax, %ss
7c05: 8e d0 mov %eax,%ss
mov %ax, %es
7c07: 8e c0 mov %eax,%es
mov %ax, %fs
7c09: 8e e0 mov %eax,%fs
7c0b: 8e e8 mov %eax,%gs
I wonder why I got code like this mov %eax,%ds
, which doesn't fit my original assembly code? Why does objdump output paradoxical results
I expect mov %eax,%ds
should be mov %ax,%ds
, and I don't think %eax
(32 bits) fits %ds
(16 bits)
The instructions mov %eax, %ds
and mov %ax, %ds
do the exact same thing (you could say they are really the same instruction), except that the former has a shorter encoding due to a missing 66
prefix byte. The assembler kindly picks the shorter encoding for you while the disassembler artificially distinguishes the two through a different register size.