mirror of
https://github.com/taigrr/gopher-os
synced 2025-01-18 04:43:13 -08:00
The switch to 64-bit mode allows us to use 48-bit addressing and to relocate the kernel to virtual address 0xffff800000000000 + 1M. The actual kernel is loaded by the bootloader at physical address 1M. The rt0 code has been split in two parts. The 32-bit part provides the entrypoint that the bootloader jumps to after loading the kernel. Its purpose is to make sure that: - the kernel was booted by a multiboot-compliant bootloader - the multiboot info structures are copied to a reserved memory block where they can be accessed after enabling paging - the CPU meets the minimum requirements for the kernel (CPUID, SSE, support for long-mode) Since paging is not enabled when the 32-bit code runs, it needs to translate all memory addresses it accesses to physical memory addresses by subtracting PAGE_OFFSET. The 32-bit rt0 code will set up a page table that identity-maps region: 0 to 8M and region: PAGE_OFFSET to PAGE_OFFSET+8M. This ensures that when paging gets enabled, we will still be able to access the kernel using both physical and virtual memory addresses. After enabling paging, the 32-bit rt0 will jump to a small 64-bit trampoline function that updates the stack pointer to use the proper virtual address and jumps to the virtual address of the 64-bit entry point. The 64-bit entrypoint sets up the minimal g0 structure required by the go function prologue for stack checks and sets up the FS register to point to it. The principle is the same as with 32-bit code (a segment register has the address of a pointer to the active g) with the difference that in 64-bit mode, the FS register is used instead of GS and that in order to set its value we need to write to a MSR.
357 lines
11 KiB
ArmAsm
357 lines
11 KiB
ArmAsm
; vim: set ft=nasm :
|
||
%include "constants.inc"
|
||
|
||
section .data
|
||
align 4
|
||
|
||
; GDT definition
|
||
gdt0:
|
||
gdt0_nil_seg: dw 0 ; Limit (low)
|
||
dw 0 ; Base (low)
|
||
db 0 ; Base (middle)
|
||
db 0 ; Access (exec/read)
|
||
db 0 ; Granularity
|
||
db 0 ; Base (high)
|
||
gdt0_cs_seg: dw 0 ; Limit (low)
|
||
dw 0 ; Base (low)
|
||
db 0 ; Base (middle)
|
||
db 10011010b ; Access (exec/read)
|
||
db 00100000b ; Granularity
|
||
db 0 ; Base (high)
|
||
gdt0_ds_seg: dw 0 ; Limit (low)
|
||
dw 0 ; Base (low)
|
||
db 0 ; Base (middle)
|
||
db 10010010b ; Access (read/write)
|
||
db 00000000b ; Granularity
|
||
db 0 ; Base (high)
|
||
|
||
gdt0_desc:
|
||
dw $ - gdt0 - 1 ; gdt size should be 1 byte less than actual length
|
||
dq gdt0 - PAGE_OFFSET
|
||
|
||
NULL_SEG equ gdt0_nil_seg - gdt0
|
||
CS_SEG equ gdt0_cs_seg - gdt0
|
||
DS_SEG equ gdt0_ds_seg - gdt0
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Error messages
|
||
;------------------------------------------------------------------------------
|
||
err_unsupported_bootloader db '[rt0_32] kernel not loaded by multiboot-compliant bootloader', 0
|
||
err_multiboot_data_too_big db '[rt0_32] multiboot information data length exceeds local buffer size', 0
|
||
err_cpuid_not_supported db '[rt0_32] the processor does not support the CPUID instruction', 0
|
||
err_longmode_not_supported db '[rt0_32] the processor does not support longmode which is required by this kernel', 0
|
||
err_sse_not_supported db '[rt0_32] the processor does not support SSE instructions which are required by this kernel', 0
|
||
|
||
section .bss
|
||
align 4096
|
||
|
||
; Reserve 3 pages for the initial page tables
|
||
page_table_l4: resb 4096
|
||
page_table_l3: resb 4096
|
||
page_table_l2: resb 4096
|
||
|
||
; Reserve 16K for storing multiboot data and for the kernel stack
|
||
global multiboot_data ; Make this available to the 64-bit entrypoint
|
||
global stack_bottom
|
||
global stack_top
|
||
multiboot_data: resb 16384
|
||
stack_bottom: resb 16384
|
||
stack_top:
|
||
|
||
section .rt0
|
||
bits 32
|
||
align 4
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Kernel 32-bit entry point
|
||
;
|
||
; The boot loader will jump to this symbol after setting up the CPU according
|
||
; to the multiboot standard. At this point:
|
||
; - A20 is enabled
|
||
; - The CPU is using 32-bit protected mode
|
||
; - Interrupts are disabled
|
||
; - Paging is disabled
|
||
; - EAX contains the magic value ‘0x36d76289’; the presence of this value indicates
|
||
; to the operating system that it was loaded by a Multiboot-compliant boot loader
|
||
; - EBX contains the 32-bit physical address of the Multiboot information structure
|
||
;------------------------------------------------------------------------------
|
||
global _rt0_32_entry
|
||
_rt0_32_entry:
|
||
; Provide a stack
|
||
mov esp, stack_top - PAGE_OFFSET
|
||
|
||
; Ensure we were booted by a bootloader supporting multiboot
|
||
cmp eax, 0x36d76289
|
||
jne _rt0_32_entry.unsupported_bootloader
|
||
|
||
; Copy multiboot struct to our own buffer
|
||
call _rt0_copy_multiboot_data
|
||
|
||
; Check processor features
|
||
call _rt0_check_cpuid_support
|
||
call _rt0_check_longmode_support
|
||
call _rt0_check_sse_support
|
||
|
||
; Setup initial page tables, enable paging and enter longmode
|
||
call _rt0_populate_initial_page_tables
|
||
call _rt0_enter_long_mode
|
||
|
||
call _rt0_64_entry_trampoline
|
||
|
||
.unsupported_bootloader:
|
||
mov edi, err_unsupported_bootloader - PAGE_OFFSET
|
||
call write_string
|
||
jmp _rt0_32_entry.halt
|
||
|
||
.halt:
|
||
cli
|
||
hlt
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Copy multiboot information blocks from the address pointed to by ebx into a
|
||
; local buffer. This enables the kernel code to access them once paging is enabled.
|
||
;------------------------------------------------------------------------------
|
||
_rt0_copy_multiboot_data:
|
||
mov esi, ebx
|
||
mov edi, multiboot_data - PAGE_OFFSET
|
||
|
||
mov ecx, dword [esi]
|
||
cmp ecx, 16384
|
||
jle _rt0_copy_multiboot_data.copy
|
||
|
||
mov edi, err_multiboot_data_too_big - PAGE_OFFSET
|
||
call write_string
|
||
jmp _rt0_32_entry.halt
|
||
|
||
.copy:
|
||
test ecx, ecx
|
||
jz _rt0_copy_multiboot_data.done
|
||
|
||
mov eax, dword[esi]
|
||
mov dword [edi], eax
|
||
add esi, 4
|
||
add edi, 4
|
||
sub ecx, 4
|
||
jmp _rt0_copy_multiboot_data.copy
|
||
|
||
.done:
|
||
ret
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Check that the processor supports the CPUID instruction.
|
||
;
|
||
; To check if CPUID is supported, we need to attempt to flip the ID bit (bit 21)
|
||
; in the FLAGS register. If that works, CPUID is available.
|
||
;
|
||
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
|
||
;------------------------------------------------------------------------------
|
||
_rt0_check_cpuid_support:
|
||
; Copy FLAGS in to EAX via stack
|
||
pushfd
|
||
pop eax
|
||
|
||
; Copy to ECX as well for comparing later on
|
||
mov ecx, eax
|
||
|
||
; Flip the ID bit
|
||
xor eax, 1 << 21
|
||
|
||
; Copy EAX to FLAGS via the stack
|
||
push eax
|
||
popfd
|
||
|
||
; Copy FLAGS back to EAX (with the flipped bit if CPUID is supported)
|
||
pushfd
|
||
pop eax
|
||
|
||
; Restore FLAGS from the old version stored in ECX (i.e. flipping the
|
||
; ID bit back if it was ever flipped).
|
||
push ecx
|
||
popfd
|
||
|
||
; Compare EAX and ECX. If they are equal then that means the bit
|
||
; wasn't flipped, and CPUID isn't supported.
|
||
cmp eax, ecx
|
||
je _rt0_check_cpuid_support.no_cpuid
|
||
ret
|
||
|
||
.no_cpuid:
|
||
mov edi, err_cpuid_not_supported - PAGE_OFFSET
|
||
call write_string
|
||
jmp _rt0_32_entry.halt
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Check that the processor supports long mode
|
||
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
|
||
;------------------------------------------------------------------------------
|
||
_rt0_check_longmode_support:
|
||
; To check for longmode support we need to ensure that the CPUID instruction
|
||
; can report it. To do this we need to query it first.
|
||
mov eax, 0x80000000 ; Set the A-register to 0x80000000.
|
||
cpuid
|
||
cmp eax, 0x80000001 ; We need at least 0x80000001 to check for long mode.
|
||
jb _rt0_check_longmode_support.no_long_mode
|
||
|
||
mov eax, 0x80000001 ; Set the A-register to 0x80000001.
|
||
cpuid
|
||
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
|
||
jz _rt0_check_longmode_support.no_long_mode
|
||
ret
|
||
|
||
.no_long_mode:
|
||
mov edi, err_longmode_not_supported - PAGE_OFFSET
|
||
call write_string
|
||
jmp _rt0_32_entry.halt
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Check for and enabl SSE support. Code taken from:
|
||
; http://wiki.osdev.org/SSE#Checking_for_SSE
|
||
;------------------------------------------------------------------------------
|
||
_rt0_check_sse_support:
|
||
; check for SSE
|
||
mov eax, 0x1
|
||
cpuid
|
||
test edx, 1<<25
|
||
jz _rt0_check_sse_support.no_sse
|
||
|
||
; Enable SSE
|
||
mov eax, cr0
|
||
and ax, 0xfffb ; Clear coprocessor emulation CR0.EM
|
||
or ax, 0x2 ; Set coprocessor monitoring CR0.MP
|
||
mov cr0, eax
|
||
mov eax, cr4
|
||
or ax, 3 << 9 ; Set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||
mov cr4, eax
|
||
|
||
ret
|
||
.no_sse:
|
||
mov edi, err_sse_not_supported - PAGE_OFFSET
|
||
call write_string
|
||
jmp _rt0_32_entry.halt
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Setup minimal page tables to allow access to the following regions:
|
||
; - 0 to 8M
|
||
; - PAGE_OFFSET to PAGE_OFFSET + 8M
|
||
;
|
||
; The second region mapping allows us to access the kernel at its VMA when
|
||
; paging is enabled.
|
||
;------------------------------------------------------------------------------
|
||
PAGE_PRESENT equ (1 << 0)
|
||
PAGE_WRITABLE equ (1 << 1)
|
||
PAGE_2MB equ (1 << 7)
|
||
|
||
_rt0_populate_initial_page_tables:
|
||
; The CPU uses bits 39-47 of the virtual address as an index to the P4 table.
|
||
mov eax, page_table_l3 - PAGE_OFFSET
|
||
or eax, PAGE_PRESENT | PAGE_WRITABLE
|
||
mov ebx, page_table_l4 - PAGE_OFFSET
|
||
mov [ebx], eax
|
||
|
||
; Also map the addresses starting at PAGE_OFFSET to the same P3 table.
|
||
; To find the P4 index for PAGE_OFFSET we need to extract bits 39-47
|
||
; of its address.
|
||
mov ecx, (PAGE_OFFSET >> 39) & 511
|
||
mov [ebx + ecx*8], eax
|
||
|
||
; The CPU uses bits 30-38 as an index to the P3 table. We just need to map
|
||
; entry 0 from the P3 table to point to the P2 table .
|
||
mov eax, page_table_l2 - PAGE_OFFSET
|
||
or eax, PAGE_PRESENT | PAGE_WRITABLE
|
||
mov ebx, page_table_l3 - PAGE_OFFSET
|
||
mov [ebx], eax
|
||
|
||
; For the L2 table we enable the huge page bit which allows us to specify
|
||
; 2M pages without needing to use the L1 table. To cover the required
|
||
; 0-8M region we need to provide 4 2M page entries at indices 0 to 4.
|
||
mov ecx, 0
|
||
mov ebx, page_table_l2 - PAGE_OFFSET
|
||
.next_page:
|
||
mov eax, 1 << 21 ; 2M
|
||
mul ecx ; eax *= ecx
|
||
or eax, PAGE_PRESENT | PAGE_WRITABLE | PAGE_2MB
|
||
mov [ebx + ecx*8], eax
|
||
|
||
inc ecx
|
||
cmp ecx, 4
|
||
jne _rt0_populate_initial_page_tables.next_page
|
||
|
||
ret
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Load P4 table, enable PAE, enter long mode and finally enable paging
|
||
;------------------------------------------------------------------------------
|
||
_rt0_enter_long_mode:
|
||
; Load page table map pointer to cr3
|
||
mov eax, page_table_l4 - PAGE_OFFSET
|
||
mov cr3, eax
|
||
|
||
; Enable PAE support
|
||
mov eax, cr4
|
||
or eax, 1 << 5
|
||
mov cr4, eax
|
||
|
||
; Now enable long mode by modifying the EFER MSR
|
||
mov ecx, 0xc0000080
|
||
rdmsr ; read msr value to eax
|
||
or eax, 1 << 8
|
||
wrmsr
|
||
|
||
; Finally enable paging
|
||
mov eax, cr0
|
||
or eax, 1 << 31
|
||
mov cr0, eax
|
||
|
||
; We are in 32-bit compatibility submode. We need to load a 64bit GDT
|
||
; and perform a far jmp to switch to long mode
|
||
mov eax, gdt0_desc - PAGE_OFFSET
|
||
lgdt [eax]
|
||
|
||
; set ds and es segments
|
||
; to set the cs segment we need to perform a far jmp
|
||
mov ax, DS_SEG
|
||
mov ds, ax
|
||
mov es, ax
|
||
mov fs, ax
|
||
mov gs, ax
|
||
|
||
jmp CS_SEG:.flush_gdt - PAGE_OFFSET
|
||
.flush_gdt:
|
||
ret
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Write the NULL-terminated string contained in edi to the screen using white
|
||
; text on red background. Assumes that text-mode is enabled and that its
|
||
; physical address is 0xb8000.
|
||
;------------------------------------------------------------------------------
|
||
write_string:
|
||
mov ebx,0xb8000
|
||
mov ah, 0x4F
|
||
.next_char:
|
||
mov al, byte[edi]
|
||
test al, al
|
||
jz write_string.done
|
||
|
||
mov word [ebx], ax
|
||
add ebx, 2
|
||
inc edi
|
||
jmp write_string.next_char
|
||
|
||
.done:
|
||
ret
|
||
|
||
|
||
;------------------------------------------------------------------------------
|
||
; Set up the stack pointer to the virtual address of the stack and jump to the
|
||
; 64-bit entrypoint.
|
||
;------------------------------------------------------------------------------
|
||
bits 64
|
||
_rt0_64_entry_trampoline:
|
||
mov rsp, stack_top ; now that paging is enabled we can load the stack
|
||
; with the virtual address of the allocated stack.
|
||
|
||
; Jump to 64-bit entry
|
||
extern _rt0_64_entry
|
||
mov rax, _rt0_64_entry
|
||
jmp rax
|