I have hobby OS and I want it to jump to 64 bit long mode, everything work fine before far jump to 64 long mode entry, paging works correctly, but QEMU log file shows that EFER values is LMA
Triple fault
CPU Reset (CPU 0)
RAX=0000000000000100 RBX=0000000080000011 RCX=00000000c0000080 RDX=0000000000000000
RSI=0000000000000015 RDI=000000000020102d RBP=0000000000000000 RSP=000000000020b000
R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 R11=0000000000000000
R12=0000000000000000 R13=0000000000000000 R14=0000000000000000 R15=0000000000000000
RIP=000000008020015b RFL=00000086 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS [-WA]
CS =0008 0000000000000000 00000000 00209a00 DPL=0 CS64 [-R-]
SS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS [-WA]
DS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS [-WA]
FS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS [-WA]
GS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS [-WA]
LDT=0000 0000000000000000 0000ffff 00008200 DPL=0 LDT
TR =0000 0000000000000000 0000ffff 00008b00 DPL=0 TSS64-busy
GDT= 000000000020103b 00000017
IDT= 0000000000000000 00000000
CR0=80000011 CR2=000000008020015b CR3=0000000000202000 CR4=00000020
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
CCS=0000000000000400 CCD=ffffffff80000011 CCO=LOGICL
EFER=0000000000000500
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=0000000000000000 0000000000000000 XMM01=0000000000000000 0000000000000000
XMM02=0000000000000000 0000000000000000 XMM03=0000000000000000 0000000000000000
XMM04=0000000000000000 0000000000000000 XMM05=0000000000000000 0000000000000000
XMM06=0000000000000000 0000000000000000 XMM07=0000000000000000 0000000000000000
XMM08=0000000000000000 0000000000000000 XMM09=0000000000000000 0000000000000000
XMM10=0000000000000000 0000000000000000 XMM11=0000000000000000 0000000000000000
XMM12=0000000000000000 0000000000000000 XMM13=0000000000000000 0000000000000000
XMM14=0000000000000000 0000000000000000 XMM15=0000000000000000 0000000000000000
CR2 register shows that Page fault happened in 0x20015b virtual address this is my code:
%define KERNEL_VIRTUAL_ADDR 0xFFFFFFFF80000000
section .multiboot_header
header_start:
align 8
dd 0xE85250D6
dd 0
dd header_end - header_start
dd 0x100000000 - (0xE85250D6 + 0 + (header_end - header_start))
dw 0
dw 0
dd 8
header_end:
section .multiboot.text
global start
bits 32
;functions
check_cpuid:
pushfd
pop eax
mov ecx, eax
xor eax, 1 << 21
push eax
popfd
pushfd
pop eax
push ecx
popfd
xor eax, ecx
jz .no_cpuid
mov edi, cpuid_av - KERNEL_VIRTUAL_ADDR
call print
.cont:
ret
.no_cpuid:
mov edi, cpuid_err - KERNEL_VIRTUAL_ADDR
call print
jmp check_cpuid.cont
check_long_mode:
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .no_long
mov eax, 0x80000001
cpuid
test edx, 1 << 29
jz .no_long
mov edi, lm - KERNEL_VIRTUAL_ADDR
call print
.cont:
ret
.no_long:
mov edi, no_lm - KERNEL_VIRTUAL_ADDR
call print
jmp check_long_mode.cont
print:
mov dh, 0x0f
xor ecx, ecx
mov dl, [edi + ecx]
mov word [0xb8000 + esi*2], dx
.loopx:
inc ecx
inc esi
mov dl, [edi + ecx]
mov word [0xb8000 + esi*2], dx
cmp byte [edi + ecx], 0
jnz .loopx
ret
start:
cmp eax, 0x36d76289
je loader
loader:
mov esp, stack.top - KERNEL_VIRTUAL_ADDR
;disable paging
cli
mov eax, cr0
or eax, 0 << 31
mov cr0, eax
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
xor esi, esi
call check_cpuid
call check_long_mode
mov eax, p3_table - KERNEL_VIRTUAL_ADDR
or eax, 0b11
mov dword [p4_table - KERNEL_VIRTUAL_ADDR], eax
mov eax, p2_table - KERNEL_VIRTUAL_ADDR
or eax, 0b11
mov dword [p3_table - KERNEL_VIRTUAL_ADDR], eax
mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR
or eax, 0b11
mov dword [p2_table - KERNEL_VIRTUAL_ADDR], eax
mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR + 4096
or eax, 0b11
mov dword [p2_table - KERNEL_VIRTUAL_ADDR + 8], eax
mov ecx, 0
.map_p1_table:
mov eax, 4096
mul ecx
or eax, 0b11
mov [p1_table_1 - KERNEL_VIRTUAL_ADDR + ecx*8], eax
inc ecx
cmp ecx, 1024
jne .map_p1_table
mov eax, p4_table - KERNEL_VIRTUAL_ADDR
mov cr3, eax
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 8)
wrmsr
mov ebx, cr0
or ebx, 1 << 31
mov cr0, ebx
lgdt [gdt64.pointer_low - KERNEL_VIRTUAL_ADDR]
jmp (0x8):(kernel_jumper - KERNEL_VIRTUAL_ADDR)
[bits 64]
kernel_jumper:
.h:
jmp .h ;this is 0x20015b that made page fault
section .data
cpuid_err: db "CPUID:0 ", 0
cpuid_av: db "CPUID:1 ", 0
os_err: db " multiboot: ", 0
no_lm: db "long-mode: 0 ", 0
lm: db "long-mode: 1 ", 0
gdt64:
dq 0 ;first entry = 0
.code equ $ - gdt64
; equ tells the compiler to set the address of the variable at given address ($ - gdt64). $ is the current position.
; set the following values:
; descriptor type: bit 44 has to be 1 for code and data segments
; present: bit 47 has to be 1 if the entry is valid
; read/write: bit 41 1 means that is readable
; executable: bit 43 it has to be 1 for code segments
; 64bit: bit 53 1 if this is a 64bit gdt
dq (1 <<44) | (1 << 47) | (1 << 41) | (1 << 43) | (1 << 53) ;second entry=code=0x8
.data equ $ - gdt64
dq (1 << 44) | (1 << 47) | (1 << 41) ;third entry = data = 0x10
.pointer:
dw .pointer - gdt64 - 1
dq gdt64
.pointer_low:
dw .pointer - gdt64 - 1
dq gdt64 - KERNEL_VIRTUAL_ADDR
section .bss
align 4096
p4_table:
resb 4096
p3_table:
resb 4096
p2_table:
resb 4096
p1_table_1:
resb 8192
align 16
stack:
resb 16384
.top:
this is my linker.ld file:
OUTPUT(X86-64)
ENTRY(start)
SECTIONS {
. = 2M;
_kernel_start = .;
_kern_virtual_offset = 0xffffffff80000000;
.multiboot_header :
{
/* Be sure that multiboot header is at the beginning */
*(.multiboot_header)
}
.multiboot.text :
{
*(.multiboot.text)
}
. += _kern_virtual_offset;
/* Add a symbol that indicates the start address of the kernel. */
.text ALIGN (4K) : AT (ADDR (.text) - _kern_virtual_offset)
{
*(.text)
*(.text.*)
}
.rodata ALIGN (4K) : AT (ADDR (.rodata) - _kern_virtual_offset)
{
*(.rodata)
*(.rodata.*)
}
.data ALIGN (4K) : AT (ADDR (.data) - _kern_virtual_offset)
{
*(.data)
*(.data.*)
}
.bss ALIGN (4K) : AT (ADDR (.bss) - _kern_virtual_offset)
{
*(.bss)
}
_kernel_end = .;
_kernel_physical_end = . - _kern_virtual_offset;
}
From the given info it seems that page fault happened but if the OS does not map the 2th megabyte the code will not be fetched before that, my OS entry at 2 MB.
Seeing your linker script I can confirm that the issue is this FAR JMP to the kernel_jumper
label:
jmp (0x8):(kernel_jumper - KERNEL_VIRTUAL_ADDR)
[bits 64]
kernel_jumper:
.h:
jmp .h ;this is 0x20015b that made page fault
The problem is that kernel_jumper
is still a lower half address and requires no adjustment. It should have been:
jmp (0x8):(kernel_jumper)
There is a hint in the debug info where it shows RIP=000000008020015b
. This is neither in the first 4MiB nor is it in the higher half KERNEL_VIRTUAL_ADDR address space. It appears this address was computed as 0x20015b - 0xFFFFFFFF80000000 = 0x8020015B.
I believe this is beyond the scope of what is being asked, but the code is incomplete as it doesn't have the higher half addresses mapped and it doesn't transition from the lower half to the higher half. The following code adds additional support to handle mapping the higher half addresses; transitioning from the lower half to higher half; removing the lower half mapping; reloading the GDT from higher half; and setting the segment registers:
%define KERNEL_VIRTUAL_ADDR 0xFFFFFFFF80000000
section .multiboot_header
header_start:
align 8
dd 0xE85250D6
dd 0
dd header_end - header_start
dd 0x100000000 - (0xE85250D6 + 0 + (header_end - header_start))
dw 0
dw 0
dd 8
header_end:
section .multiboot.text
global start
bits 32
;functions
check_cpuid:
pushfd
pop eax
mov ecx, eax
xor eax, 1 << 21
push eax
popfd
pushfd
pop eax
push ecx
popfd
xor eax, ecx
jz .no_cpuid
mov edi, cpuid_av - KERNEL_VIRTUAL_ADDR
call print
.cont:
ret
.no_cpuid:
mov edi, cpuid_err - KERNEL_VIRTUAL_ADDR
call print
jmp check_cpuid.cont
check_long_mode:
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .no_long
mov eax, 0x80000001
cpuid
test edx, 1 << 29
jz .no_long
mov edi, lm - KERNEL_VIRTUAL_ADDR
call print
.cont:
ret
.no_long:
mov edi, no_lm - KERNEL_VIRTUAL_ADDR
call print
jmp check_long_mode.cont
print:
mov dh, 0x0f
xor ecx, ecx
mov dl, [edi + ecx]
mov word [0xb8000 + esi*2], dx
.loopx:
inc ecx
inc esi
mov dl, [edi + ecx]
mov word [0xb8000 + esi*2], dx
cmp byte [edi + ecx], 0
jnz .loopx
ret
start:
cmp eax, 0x36d76289
je loader
loader:
mov esp, stack.top - KERNEL_VIRTUAL_ADDR
;disable paging
cli
mov eax, cr0
or eax, 0 << 31
mov cr0, eax
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
xor esi, esi
call check_cpuid
call check_long_mode
mov eax, p3_table - KERNEL_VIRTUAL_ADDR
or eax, 0b11
; Map the lower half addresses
mov dword [p4_table - KERNEL_VIRTUAL_ADDR], eax
; Map the higher half addresses
mov dword [p4_table+511*8 - KERNEL_VIRTUAL_ADDR], eax
mov eax, p2_table - KERNEL_VIRTUAL_ADDR
or eax, 0b11
; Map the lower half addresses
mov dword [p3_table - KERNEL_VIRTUAL_ADDR], eax
; Map the higher half addresses
mov dword [p3_table+510*8 - KERNEL_VIRTUAL_ADDR], eax
mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR
or eax, 0b11
mov dword [p2_table - KERNEL_VIRTUAL_ADDR], eax
mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR + 4096
or eax, 0b11
mov dword [p2_table - KERNEL_VIRTUAL_ADDR + 8], eax
mov ecx, 0
.map_p1_table:
mov eax, 4096
mul ecx
or eax, 0b11
mov [p1_table_1 - KERNEL_VIRTUAL_ADDR + ecx*8], eax
inc ecx
cmp ecx, 1024
jne .map_p1_table
mov eax, p4_table - KERNEL_VIRTUAL_ADDR
mov cr3, eax
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 8)
wrmsr
mov ebx, cr0
or ebx, 1 << 31
mov cr0, ebx
lgdt [gdt64.pointer_low - KERNEL_VIRTUAL_ADDR]
; We need to reload CS by a FAR JMP to the lower half label kernel_jumper
jmp (0x8):(kernel_jumper)
[bits 64]
kernel_jumper:
; Jump to the higher half entry point kernel_jumper_high
mov rax, kernel_jumper_high
jmp rax
; Section .text has higher half addresses
section .text
kernel_jumper_high:
; Load the GDT from the higher half
lgdt [gdt64.pointer]
; Set a higher half stack
lea rsp, [stack.top]
; Initialize the segment registers to NULL segment
xor eax, eax
mov ds, eax
mov es, eax
mov ss, eax
mov fs, eax
mov gs, eax
; Remove the lower half page mappings
mov rax, p4_table
mov dword [rax], 0
mov rax, p3_table
mov dword [rax], 0
; Flush the TLB by reloading CR3
mov rax, cr3
mov cr3, rax
; Add higher half long mode code here
; Print HHLM to upper right of screen (white on magenta)
lea rax, [0xb8000 + KERNEL_VIRTUAL_ADDR]
mov word [rax+76*2], 0x57 << 8 | 'H'
mov word [rax+77*2], 0x57 << 8 | 'H'
mov word [rax+78*2], 0x57 << 8 | 'L'
mov word [rax+79*2], 0x57 << 8 | 'M'
;
; Infinite loop
.h:
jmp .h
section .data
cpuid_err: db "CPUID:0 ", 0
cpuid_av: db "CPUID:1 ", 0
os_err: db " multiboot: ", 0
no_lm: db "long-mode: 0 ", 0
lm: db "long-mode: 1 ", 0
gdt64:
dq 0 ;first entry = 0
.code equ $ - gdt64
; equ tells the compiler to set the address of the variable at given address ($ - gdt64).
; $ is the current position.
; set the following values:
; descriptor type: bit 44 has to be 1 for code and data segments
; present: bit 47 has to be 1 if the entry is valid
; read/write: bit 41 1 means that is readable
; executable: bit 43 it has to be 1 for code segments
; 64bit: bit 53 1 if this is a 64bit gdt
dq (1 <<44) | (1 << 47) | (1 << 41) | (1 << 43) | (1 << 53) ;second entry=code=0x8
.data equ $ - gdt64
dq (1 << 44) | (1 << 47) | (1 << 41) ;third entry = data = 0x10
.pointer:
dw .pointer - gdt64 - 1
dq gdt64
.pointer_low:
dw .pointer - gdt64 - 1
dq gdt64 - KERNEL_VIRTUAL_ADDR
section .bss
align 4096
p4_table:
resb 4096
p3_table:
resb 4096
p2_table:
resb 4096
p1_table_1:
resb 8192
align 16
stack:
resb 16384
.top:
I have added appropriate comments in the code where I made the changes.
The astute observer may have noticed that the page mappings in the code above identity maps the lower half; maps the higher half; and happens to map 2 additional regions. This is okay as the extra mappings will disappear when the lower half is unmapped. The initial mappings actually look like this:
0x0000000000000000-0x00000000003fffff -> 0x000000000000-0x0000003fffff
0x0000007f80000000-0x0000007f803fffff -> 0x000000000000-0x0000003fffff
0xffffff8000000000-0xffffff80003fffff -> 0x000000000000-0x0000003fffff
0xffffffff80000000-0xffffffff803fffff -> 0x000000000000-0x0000003fffff
After the lower half is unmapped it should look like this:
0xffffffff80000000-0xffffffff803fffff -> 0x000000000000-0x0000003fffff
This is okay, as the extra mappings simplified the code a bit and ultimately do no harm.