assemblyx86dosmasmprotected-mode

MASM generating wrong call target in protected mode


I'm experiencing exceptions when calling functions that are at a lower memory address than the current function while in protected mode. The exception will vary depending on code configuration, sometimes a general protection fault, sometimes an invalid opcode, etc.

Here's the source code of a program that produces a general protection fault on hardware, and a double fault in DOSBox. The relevant code is in segment seg32. The fault occurs in func1, when it attempts to call back to func2

single segment stack                                                      
assume cs:single,ds:single,ss:single 

gdesc struc                                    ;global descriptor structure definition
limit_lo  dw 0ffffh                            ;low word of 20-bit limit (bits 15:0)
base_lo   dw ?                                 ;low word of base address (bits 15:0)
base_mid  db ?                                 ;middle byte of base address (bits 23:16)
priv      db ?                                 ;privilege and type bits 
limit_hi  db ?                                 ;granularity, operand size, hi nybble of limit (bits 19:16)
base_hi   db ?                                 ;high byte of base address (bits 31:24)
gdesc ends

idesc struc                                    ;interrupt descriptor structure definition
offset_lo dw ?                                 ;low word of offset
selector  dw ?                                 ;selector in gdt 
unused    db 0                                 ;always zero 
type_attr db ?                                 ;type attribute bits
offset_hi dw ?                                 ;high word of offset
idesc ends
                                               ;global descriptor table, phys addresses calculated by init code
nulld gdesc <0,0,0,0,0,0>                      ;null descriptor
pcode gdesc <,,,09eh,0cfh,>                    ;protected mode code descriptor
pdata gdesc <,,,092h,0cfh,>                    ;protected mode data descriptor
rcode gdesc <,,,09ah,08fh,>                    ;real mode code descriptor 
rdata gdesc <,,,092h,08fh,>                    ;real mode data descriptor                                
vbuff gdesc <,0,0ah,092h,0cfh,>                ;vga pixel buffer data descriptor
tbuff gdesc <,8000h,0bh,092h,0cfh,>            ;text buffer data descriptor

gdt_limit dw offset gdt_limit-offset nulld-1   ;gdt_limit <- gdt size in bytes-1
gdt_addr  dd offset nulld                      ;gdt_addr <- offset of gdt, physical address calculated at runtime

idt_div idesc <div_err-offset_0,8,0,0eeh,0>    ;interrupt descriptor table, div error
idesc <dont_care-offset_0,8,0,0eeh,0>          ;debugger call
idesc <nmi-offset_0,8,0,0eeh,0>                ;nmi interrupt
idesc <dont_care-offset_0,8,0,0eeh,0>          ;breakpoint
idesc <dont_care-offset_0,8,0,0eeh,0>          ;into overflow
idesc <dont_care-offset_0,8,0,0eeh,0>          ;bound overflow
idesc <invalid_op-offset_0,8,0,0eeh,0>         ;invalid opcode
idesc <fpu_err-offset_0,8,0,0eeh,0>            ;coprocessor unavailable 
idesc <double_fault-offset_0,8,0,0eeh,0>       ;double fault
idesc <fpu_err-offset_0,8,0,0eeh,0>            ;coprocessor overrun 
idesc <dont_care-offset_0,8,0,0eeh,0>          ;invalid tss
idesc <not_present-offset_0,8,0,0eeh,0>        ;segment not present
idesc <dont_care-offset_0,8,0,0eeh,0>          ;stack exception
idesc <gp_fault-offset_0,8,0,0eeh,0>           ;general protection fault
idesc <dont_care-offset_0,8,0,0eeh,0>          ;reserved
idesc <fpu_err-offset_0,8,0,0eeh,0>            ;coprocessor error   
idesc 16 dup (<dont_care-offset_0,8,0,0eeh,0>) ;16 reserved
idt_pit idesc <pit_isr-offset_0,8,0,0eeh,0>    ;timer isr
idt_kbd idesc <kbd_isr-offset_0,8,0,0eeh,0>    ;keyboard isr

idt_limit dw offset idt_limit-offset idt_div-1 ;idt_limit <- idt size in bytes-1
idt_addr  dd offset idt_div                    ;idt_addr <- offset of idt, complete physical address
                                               ;calculated at runtime

ridt_limit dw 3ffh                             ;real mode idt limit                            
ridt_addr dd 0                                 ;real mode idt address

m_pic_mask db ?                                ;original master pic mask
s_pic_mask db ?                                ;original slave pic mask

start:

    mov ax, cs 
    mov ds, ax                                 ;ds = cs, single segment

    cli                                        ;disable maskable interrupts
    in al, 70h                                 ;al <- cmos ram index register port
    or al, 80h                                 ;set bit 7 to disable nmi 
    out 70h, al                                ;non-maskable interrupts disabled 

    ;check for 386+              
    ;enable a20
                                               ;reinit PICs

    mov al, 11h                                ;ICW1, IC4 bit set, cascade bit clr, edge trig, init bit set
    out 20h, al                                ;send ICW1 to primary pic cmd register
    jmp $+2
    jmp $+2                                    ;delay needed on older systems
    out 0a0h, al                               ;send ICW1 to slave pic cmd register
    jmp $+2
    jmp $+2

    mov al, 20h                                ;ICW2 base address for primary pic = 20h
    out 21h, al                                ;send ICW2 to primary pic data register
    jmp $+2          
    jmp $+2
    mov al, 28h                                ;ICW2 base address for slave pic = 28h
    out 0a1h, al                               ;send ICW2 to slave pic data register
    jmp $+2
    jmp $+2

    mov al, 4                                  ;ICW3, on primary pic, bits map to irq lines, use irq 2 for cascade
    out 21h, al                                ;send ICW3 to primary pic data register
    jmp $+2
    jmp $+2
    mov al, 2                                  ;ICW3, on slave pic, byte value = irq line, use irq 2 for cascade  
    out 0a1h, al                               ;send ICW3 to slave pic data register
    jmp $+2
    jmp $+2

    mov al, 1                                  ;ICW4 set bit 1 to enable 80x86 mode
    out 21h, al                                ;send ICW4 to primary pic data register
    jmp $+2
    jmp $+2
    out 0a1h, al                               ;send ICW4 to slave pic data register
    jmp $+2
    jmp $+2

    xor al, al                                 ;clear the data registers
    out 21h, al
    jmp $+2
    jmp $+2   
    out 0a1h, al
    jmp $+2
    jmp $+2

    in al, 21h                                 ;only need keyboard and timer irq enabled for now
    mov m_pic_mask, al                         ;store original master pic mask register, restore before exit
    or al, 0fch                                ;mask out all but irq 0 and 1 
    out 21h, al                                ;master pic mask updated
    jmp $+2
    jmp $+2

    in al, 0a1h
    mov s_pic_mask, al                         ;store original slave pic mask register, restore before exit
    or al, 0ffh                                ;mask out every slave irq 
    out 0a1h, al
    jmp $+2
    jmp $+2

    .386p                                      ;calc and insert phys address into gdt entries
    xor eax, eax                               ;clear high word of eax
    mov ax, cs                                 ;eax <- code segment address
    shl eax, 4                                 ;multiply segment address by 16 to convert it to physical address
    add gdt_addr, eax                          ;gdt_addr is defined with offset of gdt, gdt_addr + cs*16 = physical addres of gdt
    add idt_addr, eax                          ;idt_addr is defined with offset of idt, idt_addr + cs*16 = physical addres of idt
    lidt idt_limit                             ;load idtr 
    lgdt gdt_limit                             ;load gdtr

    mov rcode.base_lo, ax  
    mov rdata.base_lo, ax                      ;store low word of cs phys address to real mode descriptors
    shr eax, 16                                ;shift eax to access high word
    mov rcode.base_mid, al  
    mov rdata.base_mid, al                     ;store middle byte of cs phys address to real mode descriptors
    mov rcode.base_hi, al
    mov rdata.base_hi, al                      ;store high byte of cs phys address to real mode descriptors

    xor eax, eax                               ;clear high word of eax
    mov ax, seg seg32                          ;eax <- seg32 segment address (fixed up by dos at runtime)
    shl eax, 4                                 ;multiply segment address by 16 to convert it to physical address
    mov pcode.base_lo, ax  
    mov pdata.base_lo, ax                      ;store low word of seg32 phys address to protected mode descriptors
    shr eax, 16                                ;shift eax to access high word
    mov pcode.base_mid, al 
    mov pdata.base_mid, al                     ;store middle byte of seg32 phys address to protected mode descriptors 
    mov pcode.base_hi, al
    mov pdata.base_hi, al                      ;store high byte of seg32 phys address to protected mode descriptors 

    mov eax, cr0                               ;load control register 0
    or al, 1                                   ;set pe bit
    mov cr0, eax                               ;enable protected mode 

                                               ;manually encoded jmp 8h:start32
    db 66h                                     ;specify 32-bit operand
    db 0eah                                    ;jmp opcode 
    dd offset start32                          ;32 bit offset             
    dw 8                                       ;global descriptor selector (select protected mode code segment)

real_mode:                                     ;transition back to real mode
    .386p          
    mov eax, cr0                               ;load control register into eax
    and al, 0feh                               ;clear pe bit
    mov cr0, eax                               ;real mode enabled     
    db 0eah                                    ;jmp single:real_cs to load cs:ip
    dw offset real_cs                          ;offset real_cs
    dw seg single                              ;segment single (fixed up by dos at runtime)

real_cs:                                       ;back in real mode
    .8086
    mov ax, cs                      
    mov ds, ax                                 ;ds = cs
    mov ss, ax                                 ;ss = cs

    mov al, 11h                                ;ICW1, IC4 bit set, cascade bit clr, edge trig, init bit set
    out 20h, al                                ;send ICW1 to primary pic cmd register
    jmp $+2
    jmp $+2                                    ;delay needed on older systems
    out 0a0h, al                               ;send ICW1 to slave pic cmd register
    jmp $+2
    jmp $+2

    mov al, 8                                  ;ICW2 base address for primary pic = 8
    out 21h, al                                ;send ICW2 to primary pic data register
    jmp $+2
    jmp $+2
    mov al, 70h                                ;ICW2 base address for slave pic = 70h
    out 0a1h, al                               ;send ICW2 to slave pic data register
    jmp $+2
    jmp $+2

    mov al, 4                                  ;ICW3, on primary pic, bits map to irq lines, use irq 2 for cascade
    out 21h, al                                ;send ICW3 to primary pic data register
    jmp $+2
    jmp $+2
    mov al, 2                                  ;ICW3, on slave pic, byte value = irq line, use irq 2 for cascade  
    out 0a1h, al                               ;send ICW3 to slave pic data register
    jmp $+2
    jmp $+2

    mov al, 1                                  ;ICW4 set bit 1 to enable 80x86 mode
    out 21h, al                                ;send ICW4 to primary pic data register
    jmp $+2
    jmp $+2
    out 0a1h, al                               ;send ICW4 to slave pic data register
    jmp $+2
    jmp $+2

    xor al, al                                 ;clear the data registers
    out 21h, al
    jmp $+2
    jmp $+2   
    out 0a1h, al
    jmp $+2
    jmp $+2

    mov al, m_pic_mask                         ;al <- master pic mask
    out 21h, al                                ;master pic mask restored
    jmp $+2
    jmp $+2    

    mov al, s_pic_mask                         ;al <- slave pic mask
    out 0a1h, al                               ;slave pic mask restored
    jmp $+2
    jmp $+2  

    .386p
    lidt ridt_limit                            ;setup idtr for real mode
    .8086
    mov ax, 40h
    mov es, ax                                 ;access kbd data area via segment 40h
    mov word ptr es:[1ah], 1eh                 ;set the kbd buff head to start of buff
    mov word ptr es:[1ch], 1eh                 ;kbd buff tail = head to clear kbd buffer
    in al, 70h                                 ;al <- cmos ram index register port
    and al, 7fh                                ;clear bit 7 to enable nmi 
    out 70h, al                                ;nmi enabled 
    sti                                        ;interrupts enabled   
    mov ax, 4c00h                              ;Terminate process function selected
    int 21h                                    ;return to dos

align 2                                        ;align stack for 16-bit accesses
s16 db 256 dup (0ffh)                          ;256 byte stack, need at least 256 bytes to change video
single ends                                    ;modes (int 10h) with some vga bios         



.386p
seg32 segment use32 
assume cs:seg32,ds:seg32,ss:seg32 
offset_0:                                      ;used to generate 16-bit offsets in idt descriptor definitions

db "start"                                     ;used to find start of segment in debug 

div_err:                                       ;division error isr
    xor edi, edi
    mov byte ptr es:[edi], '0'          
    hlt
    iretd

dont_care:                                     ;rare/obscure faults and exceptions
    xor edi, edi
    mov byte ptr es:[edi], '1'
    hlt
    iretd

nmi:                                           ;non maskable interrupt isr
    xor edi, edi
    mov byte ptr es:[edi], '2'
    hlt
    iretd

invalid_op:                                    ;invalid opcode isr
    xor edi, edi
    mov byte ptr es:[edi], '3'
    hlt
    iretd

double_fault:                                  ;double fault isr
    xor edi, edi
    mov byte ptr es:[edi], '4'
    hlt
    iretd

fpu_err:                                       ;fpu error isr
    xor edi, edi
    mov byte ptr es:[edi], '5'
    hlt
    iretd

not_present:                                   ;descriptor not present isr
    xor edi, edi
    mov byte ptr es:[edi], '6'
    hlt
    iretd

gp_fault:                                      ;general protection fault isr
    xor edi, edi
    mov byte ptr es:[edi], '7'
    hlt
    iretd

pit_isr:                                       ;int 20h timer isr
    push eax
    mov al, 20h
    out 20h, al
    pop eax    
    iretd

kbd_isr:                                       ;int 21h keyboard isr
    push eax
    in al, 60h
    mov al, 20h
    out 20h, al
    pop eax
    iretd

sp16 dw ?                                      ;16-bit stack pointer

start32: 

    mov ax, 10h              
    mov ds, ax                                 ;ds <- protected mode data descriptor (same physical address as code descriptor) 
    mov fs, ax               
    mov gs, ax                                 ;setup extra segments
    mov ss, ax                                 ;setup stack segment
    mov sp16, sp                               ;store old stack pointer, restore before returning to real mode
    mov esp, offset s32_end                    ;setup 32-bit stack pointer
    mov ax, 30h              
    mov es, ax                                 ;es <- vga compatible text buffer
    sti                                        ;ready for interrupts, leave nmi disabled

    call func1


exit_pm:                                       ;return to real mode
    cli                                        ;interrupts disabled
    mov sp, sp16                               ;restore 16-bit stack pointer
    mov ax, 20h
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax                                 ;load real mode data descriptor selectors 
    db 0eah                                    ;jmp 18h:ret_real to load real mode code descriptor 
    dd offset real_mode                        ;offset to 16-bit code in single segment
    dw 18h                                     ;real mode code selector 


    db "call_here"                             ;use this to find call target in debug
func2 proc

    push eax
    push ebx
    push ecx
    push edx
    push esi

    pop esi
    pop edx
    pop ecx
    pop ebx
    pop eax
    ret

func2 endp


func1 proc

    push eax
    push ebx
    push ecx
    push edx
    push esi

    ;do arbitrary work
    mov eax, 934875h
    xor eax, ebx
    inc ecx
    mul edx
    add edx, 94357h
    jmp target1
    xor ecx, ecx
    add edx, 987h
    dec esi

target1:
    call func2                                 ;IT NEVER MAKES IT TO FUNC2
    jmp over_marker
    db "calladdress"                           ;use this to find call instruction in debug
over_marker:

    pop esi
    pop edx
    pop ecx
    pop ebx
    pop eax
    ret

func1 endp    


align 4                                        ;align stack for 32-bit accesses
s32 db 256 dup (0ffh)                          ;256 byte stack
s32_end:                                       ;used to initialize esp 
seg32 ends                     
end start

I believe the issue is that MASM is generating the wrong call target and i'm executing garbage.

I tested this by loading the program with debug (just to examine the opcodes.) Debug loads the call instruction to 06CA:05A9 and the call target (push eax) to 06CA:057B. The call instruction is encoded as E8 CD FF 00 00 which is call loc_0000ffd2.

0x5a9 plus 0xffd2 would roll over into 0x57b if it was a 16-bit segment. Or maybe the offset is signed and that's a negative number? Am I using the wrong type of call?


Solution

  • The problem is that the MASM 5.10 linker is deficient and doesn't properly handle this kind of 32-bit relocation. As you suspected it is treating the 32-bit relative displacement as a 16-bit value which as you have correctly observed produces the wrong value (notably when calling code at a negative displacement). To test your code I have been using MASM 5.10a and the linker is version 3.64.

    You can continue to use MASM.EXE 5.10a, but you will need to replace your linker. The 16-bit Microsoft Overlay Linker (LINK.EXE) that comes with MASM 6.11 does work correctly. You will need to have an expanded memory manager present for the LINK.EXE and/or MASM.EXE to function correctly. MASM 6.11 was the last version of the MASM products that can be run from DOS. MASM 6.11 install disks can be downloaded from here.


    Borland's TASM and TLINK as an Alternative

    If you download and install Borland's Turbo Assembler v2.0x you can assemble your code with TASM and link with TLINK. If you run TLINK on the object file produced by TASM it will actually warn you of this problem! The error will look something like:

    32-bit record encountered in module Use "/3" option

    If you use the /3 option it enables 32-bit processing and a proper executable should be generated.

    To assemble with TASM (it will still work with MASM) a small adjustment must be made to these lines:

    lidt idt_limit                             ;load idtr 
    lgdt gdt_limit                             ;load gdtr
    
    ...
    
    lidt ridt_limit                            ;setup idtr for real mode
    

    TASM is picky about the type and they have to be written as:

    lidt fword ptr idt_limit                   ;load idtr 
    lgdt fword ptr gdt_limit                   ;load gdtr
    
    ...
    
    lidt fword ptr ridt_limit                  ;setup idtr for real mode
    

    JWasm as an Alternative

    JWasm is a MASM compatible open source solution based on Watcom's assembler (WASM) with more modern updates. JWAsm can also be built and run on other platforms like Windows, Linux, MacOS etc. JWasm is able to assemble files to DOS object files (OMF) like MASM, but it also has an integrated 16-bit linker which allows you to build a DOS MZ executable directly. You can download a pre-built DOS version of JWASM from here.

    JWasm is picky about the types just like TASM so see the TASM section regarding fword ptr

    To assemble and link a single source assembly file to a DOS executable you can simply do:

    jwasmr -mz filename.asm
    

    This should produce a file called filename.exe