I'm experiencing exceptions when calling functions that are at a lower memory address than the current function while in protected mode. The exception will vary depending on code configuration, sometimes a general protection fault, sometimes an invalid opcode, etc.
Here's the source code of a program that produces a general protection fault on hardware, and a double fault in DOSBox. The relevant code is in segment seg32
. The fault occurs in func1
, when it attempts to call back to func2
single segment stack
assume cs:single,ds:single,ss:single
gdesc struc ;global descriptor structure definition
limit_lo dw 0ffffh ;low word of 20-bit limit (bits 15:0)
base_lo dw ? ;low word of base address (bits 15:0)
base_mid db ? ;middle byte of base address (bits 23:16)
priv db ? ;privilege and type bits
limit_hi db ? ;granularity, operand size, hi nybble of limit (bits 19:16)
base_hi db ? ;high byte of base address (bits 31:24)
gdesc ends
idesc struc ;interrupt descriptor structure definition
offset_lo dw ? ;low word of offset
selector dw ? ;selector in gdt
unused db 0 ;always zero
type_attr db ? ;type attribute bits
offset_hi dw ? ;high word of offset
idesc ends
;global descriptor table, phys addresses calculated by init code
nulld gdesc <0,0,0,0,0,0> ;null descriptor
pcode gdesc <,,,09eh,0cfh,> ;protected mode code descriptor
pdata gdesc <,,,092h,0cfh,> ;protected mode data descriptor
rcode gdesc <,,,09ah,08fh,> ;real mode code descriptor
rdata gdesc <,,,092h,08fh,> ;real mode data descriptor
vbuff gdesc <,0,0ah,092h,0cfh,> ;vga pixel buffer data descriptor
tbuff gdesc <,8000h,0bh,092h,0cfh,> ;text buffer data descriptor
gdt_limit dw offset gdt_limit-offset nulld-1 ;gdt_limit <- gdt size in bytes-1
gdt_addr dd offset nulld ;gdt_addr <- offset of gdt, physical address calculated at runtime
idt_div idesc <div_err-offset_0,8,0,0eeh,0> ;interrupt descriptor table, div error
idesc <dont_care-offset_0,8,0,0eeh,0> ;debugger call
idesc <nmi-offset_0,8,0,0eeh,0> ;nmi interrupt
idesc <dont_care-offset_0,8,0,0eeh,0> ;breakpoint
idesc <dont_care-offset_0,8,0,0eeh,0> ;into overflow
idesc <dont_care-offset_0,8,0,0eeh,0> ;bound overflow
idesc <invalid_op-offset_0,8,0,0eeh,0> ;invalid opcode
idesc <fpu_err-offset_0,8,0,0eeh,0> ;coprocessor unavailable
idesc <double_fault-offset_0,8,0,0eeh,0> ;double fault
idesc <fpu_err-offset_0,8,0,0eeh,0> ;coprocessor overrun
idesc <dont_care-offset_0,8,0,0eeh,0> ;invalid tss
idesc <not_present-offset_0,8,0,0eeh,0> ;segment not present
idesc <dont_care-offset_0,8,0,0eeh,0> ;stack exception
idesc <gp_fault-offset_0,8,0,0eeh,0> ;general protection fault
idesc <dont_care-offset_0,8,0,0eeh,0> ;reserved
idesc <fpu_err-offset_0,8,0,0eeh,0> ;coprocessor error
idesc 16 dup (<dont_care-offset_0,8,0,0eeh,0>) ;16 reserved
idt_pit idesc <pit_isr-offset_0,8,0,0eeh,0> ;timer isr
idt_kbd idesc <kbd_isr-offset_0,8,0,0eeh,0> ;keyboard isr
idt_limit dw offset idt_limit-offset idt_div-1 ;idt_limit <- idt size in bytes-1
idt_addr dd offset idt_div ;idt_addr <- offset of idt, complete physical address
;calculated at runtime
ridt_limit dw 3ffh ;real mode idt limit
ridt_addr dd 0 ;real mode idt address
m_pic_mask db ? ;original master pic mask
s_pic_mask db ? ;original slave pic mask
start:
mov ax, cs
mov ds, ax ;ds = cs, single segment
cli ;disable maskable interrupts
in al, 70h ;al <- cmos ram index register port
or al, 80h ;set bit 7 to disable nmi
out 70h, al ;non-maskable interrupts disabled
;check for 386+
;enable a20
;reinit PICs
mov al, 11h ;ICW1, IC4 bit set, cascade bit clr, edge trig, init bit set
out 20h, al ;send ICW1 to primary pic cmd register
jmp $+2
jmp $+2 ;delay needed on older systems
out 0a0h, al ;send ICW1 to slave pic cmd register
jmp $+2
jmp $+2
mov al, 20h ;ICW2 base address for primary pic = 20h
out 21h, al ;send ICW2 to primary pic data register
jmp $+2
jmp $+2
mov al, 28h ;ICW2 base address for slave pic = 28h
out 0a1h, al ;send ICW2 to slave pic data register
jmp $+2
jmp $+2
mov al, 4 ;ICW3, on primary pic, bits map to irq lines, use irq 2 for cascade
out 21h, al ;send ICW3 to primary pic data register
jmp $+2
jmp $+2
mov al, 2 ;ICW3, on slave pic, byte value = irq line, use irq 2 for cascade
out 0a1h, al ;send ICW3 to slave pic data register
jmp $+2
jmp $+2
mov al, 1 ;ICW4 set bit 1 to enable 80x86 mode
out 21h, al ;send ICW4 to primary pic data register
jmp $+2
jmp $+2
out 0a1h, al ;send ICW4 to slave pic data register
jmp $+2
jmp $+2
xor al, al ;clear the data registers
out 21h, al
jmp $+2
jmp $+2
out 0a1h, al
jmp $+2
jmp $+2
in al, 21h ;only need keyboard and timer irq enabled for now
mov m_pic_mask, al ;store original master pic mask register, restore before exit
or al, 0fch ;mask out all but irq 0 and 1
out 21h, al ;master pic mask updated
jmp $+2
jmp $+2
in al, 0a1h
mov s_pic_mask, al ;store original slave pic mask register, restore before exit
or al, 0ffh ;mask out every slave irq
out 0a1h, al
jmp $+2
jmp $+2
.386p ;calc and insert phys address into gdt entries
xor eax, eax ;clear high word of eax
mov ax, cs ;eax <- code segment address
shl eax, 4 ;multiply segment address by 16 to convert it to physical address
add gdt_addr, eax ;gdt_addr is defined with offset of gdt, gdt_addr + cs*16 = physical addres of gdt
add idt_addr, eax ;idt_addr is defined with offset of idt, idt_addr + cs*16 = physical addres of idt
lidt idt_limit ;load idtr
lgdt gdt_limit ;load gdtr
mov rcode.base_lo, ax
mov rdata.base_lo, ax ;store low word of cs phys address to real mode descriptors
shr eax, 16 ;shift eax to access high word
mov rcode.base_mid, al
mov rdata.base_mid, al ;store middle byte of cs phys address to real mode descriptors
mov rcode.base_hi, al
mov rdata.base_hi, al ;store high byte of cs phys address to real mode descriptors
xor eax, eax ;clear high word of eax
mov ax, seg seg32 ;eax <- seg32 segment address (fixed up by dos at runtime)
shl eax, 4 ;multiply segment address by 16 to convert it to physical address
mov pcode.base_lo, ax
mov pdata.base_lo, ax ;store low word of seg32 phys address to protected mode descriptors
shr eax, 16 ;shift eax to access high word
mov pcode.base_mid, al
mov pdata.base_mid, al ;store middle byte of seg32 phys address to protected mode descriptors
mov pcode.base_hi, al
mov pdata.base_hi, al ;store high byte of seg32 phys address to protected mode descriptors
mov eax, cr0 ;load control register 0
or al, 1 ;set pe bit
mov cr0, eax ;enable protected mode
;manually encoded jmp 8h:start32
db 66h ;specify 32-bit operand
db 0eah ;jmp opcode
dd offset start32 ;32 bit offset
dw 8 ;global descriptor selector (select protected mode code segment)
real_mode: ;transition back to real mode
.386p
mov eax, cr0 ;load control register into eax
and al, 0feh ;clear pe bit
mov cr0, eax ;real mode enabled
db 0eah ;jmp single:real_cs to load cs:ip
dw offset real_cs ;offset real_cs
dw seg single ;segment single (fixed up by dos at runtime)
real_cs: ;back in real mode
.8086
mov ax, cs
mov ds, ax ;ds = cs
mov ss, ax ;ss = cs
mov al, 11h ;ICW1, IC4 bit set, cascade bit clr, edge trig, init bit set
out 20h, al ;send ICW1 to primary pic cmd register
jmp $+2
jmp $+2 ;delay needed on older systems
out 0a0h, al ;send ICW1 to slave pic cmd register
jmp $+2
jmp $+2
mov al, 8 ;ICW2 base address for primary pic = 8
out 21h, al ;send ICW2 to primary pic data register
jmp $+2
jmp $+2
mov al, 70h ;ICW2 base address for slave pic = 70h
out 0a1h, al ;send ICW2 to slave pic data register
jmp $+2
jmp $+2
mov al, 4 ;ICW3, on primary pic, bits map to irq lines, use irq 2 for cascade
out 21h, al ;send ICW3 to primary pic data register
jmp $+2
jmp $+2
mov al, 2 ;ICW3, on slave pic, byte value = irq line, use irq 2 for cascade
out 0a1h, al ;send ICW3 to slave pic data register
jmp $+2
jmp $+2
mov al, 1 ;ICW4 set bit 1 to enable 80x86 mode
out 21h, al ;send ICW4 to primary pic data register
jmp $+2
jmp $+2
out 0a1h, al ;send ICW4 to slave pic data register
jmp $+2
jmp $+2
xor al, al ;clear the data registers
out 21h, al
jmp $+2
jmp $+2
out 0a1h, al
jmp $+2
jmp $+2
mov al, m_pic_mask ;al <- master pic mask
out 21h, al ;master pic mask restored
jmp $+2
jmp $+2
mov al, s_pic_mask ;al <- slave pic mask
out 0a1h, al ;slave pic mask restored
jmp $+2
jmp $+2
.386p
lidt ridt_limit ;setup idtr for real mode
.8086
mov ax, 40h
mov es, ax ;access kbd data area via segment 40h
mov word ptr es:[1ah], 1eh ;set the kbd buff head to start of buff
mov word ptr es:[1ch], 1eh ;kbd buff tail = head to clear kbd buffer
in al, 70h ;al <- cmos ram index register port
and al, 7fh ;clear bit 7 to enable nmi
out 70h, al ;nmi enabled
sti ;interrupts enabled
mov ax, 4c00h ;Terminate process function selected
int 21h ;return to dos
align 2 ;align stack for 16-bit accesses
s16 db 256 dup (0ffh) ;256 byte stack, need at least 256 bytes to change video
single ends ;modes (int 10h) with some vga bios
.386p
seg32 segment use32
assume cs:seg32,ds:seg32,ss:seg32
offset_0: ;used to generate 16-bit offsets in idt descriptor definitions
db "start" ;used to find start of segment in debug
div_err: ;division error isr
xor edi, edi
mov byte ptr es:[edi], '0'
hlt
iretd
dont_care: ;rare/obscure faults and exceptions
xor edi, edi
mov byte ptr es:[edi], '1'
hlt
iretd
nmi: ;non maskable interrupt isr
xor edi, edi
mov byte ptr es:[edi], '2'
hlt
iretd
invalid_op: ;invalid opcode isr
xor edi, edi
mov byte ptr es:[edi], '3'
hlt
iretd
double_fault: ;double fault isr
xor edi, edi
mov byte ptr es:[edi], '4'
hlt
iretd
fpu_err: ;fpu error isr
xor edi, edi
mov byte ptr es:[edi], '5'
hlt
iretd
not_present: ;descriptor not present isr
xor edi, edi
mov byte ptr es:[edi], '6'
hlt
iretd
gp_fault: ;general protection fault isr
xor edi, edi
mov byte ptr es:[edi], '7'
hlt
iretd
pit_isr: ;int 20h timer isr
push eax
mov al, 20h
out 20h, al
pop eax
iretd
kbd_isr: ;int 21h keyboard isr
push eax
in al, 60h
mov al, 20h
out 20h, al
pop eax
iretd
sp16 dw ? ;16-bit stack pointer
start32:
mov ax, 10h
mov ds, ax ;ds <- protected mode data descriptor (same physical address as code descriptor)
mov fs, ax
mov gs, ax ;setup extra segments
mov ss, ax ;setup stack segment
mov sp16, sp ;store old stack pointer, restore before returning to real mode
mov esp, offset s32_end ;setup 32-bit stack pointer
mov ax, 30h
mov es, ax ;es <- vga compatible text buffer
sti ;ready for interrupts, leave nmi disabled
call func1
exit_pm: ;return to real mode
cli ;interrupts disabled
mov sp, sp16 ;restore 16-bit stack pointer
mov ax, 20h
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax ;load real mode data descriptor selectors
db 0eah ;jmp 18h:ret_real to load real mode code descriptor
dd offset real_mode ;offset to 16-bit code in single segment
dw 18h ;real mode code selector
db "call_here" ;use this to find call target in debug
func2 proc
push eax
push ebx
push ecx
push edx
push esi
pop esi
pop edx
pop ecx
pop ebx
pop eax
ret
func2 endp
func1 proc
push eax
push ebx
push ecx
push edx
push esi
;do arbitrary work
mov eax, 934875h
xor eax, ebx
inc ecx
mul edx
add edx, 94357h
jmp target1
xor ecx, ecx
add edx, 987h
dec esi
target1:
call func2 ;IT NEVER MAKES IT TO FUNC2
jmp over_marker
db "calladdress" ;use this to find call instruction in debug
over_marker:
pop esi
pop edx
pop ecx
pop ebx
pop eax
ret
func1 endp
align 4 ;align stack for 32-bit accesses
s32 db 256 dup (0ffh) ;256 byte stack
s32_end: ;used to initialize esp
seg32 ends
end start
I believe the issue is that MASM is generating the wrong call target and i'm executing garbage.
I tested this by loading the program with debug (just to examine the opcodes.) Debug loads the call instruction to 06CA:05A9
and the call target (push eax
) to 06CA:057B
. The call instruction is encoded as E8 CD FF 00 00
which is call loc_0000ffd2
.
0x5a9 plus 0xffd2 would roll over into 0x57b if it was a 16-bit segment. Or maybe the offset is signed and that's a negative number? Am I using the wrong type of call?
The problem is that the MASM 5.10 linker is deficient and doesn't properly handle this kind of 32-bit relocation. As you suspected it is treating the 32-bit relative displacement as a 16-bit value which as you have correctly observed produces the wrong value (notably when calling code at a negative displacement). To test your code I have been using MASM 5.10a and the linker is version 3.64.
You can continue to use MASM.EXE 5.10a, but you will need to replace your linker. The 16-bit Microsoft Overlay Linker (LINK.EXE) that comes with MASM 6.11 does work correctly. You will need to have an expanded memory manager present for the LINK.EXE and/or MASM.EXE to function correctly. MASM 6.11 was the last version of the MASM products that can be run from DOS. MASM 6.11 install disks can be downloaded from here.
If you download and install Borland's Turbo Assembler v2.0x you can assemble your code with TASM and link with TLINK. If you run TLINK on the object file produced by TASM it will actually warn you of this problem! The error will look something like:
32-bit record encountered in module Use "/3" option
If you use the /3
option it enables 32-bit processing and a proper executable should be generated.
To assemble with TASM (it will still work with MASM) a small adjustment must be made to these lines:
lidt idt_limit ;load idtr
lgdt gdt_limit ;load gdtr
...
lidt ridt_limit ;setup idtr for real mode
TASM is picky about the type and they have to be written as:
lidt fword ptr idt_limit ;load idtr
lgdt fword ptr gdt_limit ;load gdtr
...
lidt fword ptr ridt_limit ;setup idtr for real mode
JWasm is a MASM compatible open source solution based on Watcom's assembler (WASM) with more modern updates. JWAsm can also be built and run on other platforms like Windows, Linux, MacOS etc. JWasm is able to assemble files to DOS object files (OMF) like MASM, but it also has an integrated 16-bit linker which allows you to build a DOS MZ executable directly. You can download a pre-built DOS version of JWASM from here.
JWasm is picky about the types just like TASM so see the TASM section regarding fword ptr
To assemble and link a single source assembly file to a DOS executable you can simply do:
jwasmr -mz filename.asm
This should produce a file called filename.exe