1
0
mirror of https://github.com/taigrr/gopher-os synced 2025-01-18 04:43:13 -08:00

Switch to a 64-bit version of the kernel and rt0 code

The switch to 64-bit mode allows us to use 48-bit addressing and to
relocate the kernel to virtual address 0xffff800000000000 + 1M. The
actual kernel is loaded by the bootloader at physical address 1M.

The rt0 code has been split in two parts. The 32-bit part provides the
entrypoint that the bootloader jumps to after loading the kernel. Its
purpose is to make sure that:
- the kernel was booted by a multiboot-compliant bootloader
- the multiboot info structures are copied to a reserved memory block
  where they can be accessed after enabling paging
- the CPU meets the minimum requirements for the kernel (CPUID, SSE,
  support for long-mode)

Since paging is not enabled when the 32-bit code runs, it needs to
translate all memory addresses it accesses to physical memory addresses
by subtracting PAGE_OFFSET. The 32-bit rt0 code will set up a page table
that identity-maps region: 0 to 8M and region: PAGE_OFFSET to
PAGE_OFFSET+8M. This ensures that when paging gets enabled, we will still
be able to access the kernel using both physical and virtual memory
addresses. After enabling paging, the 32-bit rt0 will jump to a small
64-bit trampoline function that updates the stack pointer to use the
proper virtual address and jumps to the virtual address of the 64-bit
entry point.

The 64-bit entrypoint sets up the minimal g0 structure required by the
go function prologue for stack checks and sets up the FS register to
point to it. The principle is the same as with 32-bit code (a segment
register has the address of a pointer to the active g) with the
difference that in 64-bit mode, the FS register is used instead of GS
and that in order to set its value we need to write to a MSR.
This commit is contained in:
Achilleas Anagnostopoulos 2017-04-26 08:30:38 +01:00
parent 4829115647
commit 2558f79fbf
14 changed files with 560 additions and 288 deletions

View File

@ -1,5 +1,5 @@
OS = $(shell uname -s)
ARCH := x86
ARCH := x86_64
BUILD_DIR := build
BUILD_ABS_DIR := $(CURDIR)/$(BUILD_DIR)
@ -13,10 +13,10 @@ LD := ld
AS := nasm
GOOS := linux
GOARCH := 386
GOARCH := amd64
LD_FLAGS := -n -melf_i386 -T arch/$(ARCH)/script/linker.ld -static --no-ld-generated-unwind-info
AS_FLAGS := -g -f elf32 -F dwarf -I arch/$(ARCH)/asm/
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
AS_FLAGS := -g -f elf64 -F dwarf -I arch/$(ARCH)/asm/
MIN_OBJCOPY_VERSION := 2.26.0
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
@ -28,7 +28,7 @@ asm_obj_files := $(patsubst arch/$(ARCH)/asm/%.s, $(BUILD_DIR)/arch/$(ARCH)/asm/
kernel: binutils_version_check $(kernel_target)
$(kernel_target): $(asm_obj_files) go.o
$(kernel_target): $(asm_obj_files) linker_script go.o
@echo "[$(LD)] linking kernel-$(ARCH).bin"
@$(LD) $(LD_FLAGS) -o $(kernel_target) $(asm_obj_files) $(BUILD_DIR)/go.o
@ -36,14 +36,14 @@ go.o:
@mkdir -p $(BUILD_DIR)
@echo "[go] compiling go sources into a standalone .o file"
@GOARCH=386 GOOS=linux go build -n 2>&1 | sed \
@GOARCH=$(GOARCH) GOOS=$(GOOS) go build -n 2>&1 | sed \
-e "1s|^|set -e\n|" \
-e "1s|^|export GOOS=linux\n|" \
-e "1s|^|export GOARCH=386\n|" \
-e "1s|^|export GOOS=$(GOOS)\n|" \
-e "1s|^|export GOARCH=$(GOARCH)\n|" \
-e "1s|^|WORK='$(BUILD_ABS_DIR)'\n|" \
-e "1s|^|alias pack='go tool pack'\n|" \
-e "/^mv/d" \
-e "s|-extld|-tmpdir='$(BUILD_ABS_DIR)' -linkmode=external -extldflags='-nostdlib' -extld|g" \
-e "s|-extld|-tmpdir='$(BUILD_ABS_DIR)' -linkmode=external -extldflags='-nostartfiles -nodefaultlibs -nostdlib -r' -extld|g" \
| sh 2>&1 | sed -e "s/^/ | /g"
@# build/go.o is a elf32 object file but all go symbols are unexported. Our
@ -59,6 +59,13 @@ binutils_version_check:
@echo "[binutils] checking that installed objcopy version is >= $(MIN_OBJCOPY_VERSION)"
@if [ "$(HAVE_VALID_OBJCOPY)" != "y" ]; then echo "[binutils] error: a more up to date binutils installation is required" ; exit 1 ; fi
linker_script:
@echo "[sed] extracting LMA and VMA from constants.inc"
@echo "[gcc] pre-processing arch/$(ARCH)/script/linker.ld.in"
@gcc `cat arch/$(ARCH)/asm/constants.inc | sed -e "/^$$/d; /^;/d; s/^/-D/g; s/\s*equ\s*/=/g;" | tr '\n' ' '` \
-E -x \
c arch/$(ARCH)/script/linker.ld.in | grep -v "^#" > $(BUILD_DIR)/linker.ld
$(BUILD_DIR)/arch/$(ARCH)/asm/%.o: arch/$(ARCH)/asm/%.s
@mkdir -p $(shell dirname $@)
@echo "[$(AS)] $<"
@ -87,22 +94,20 @@ iso:
vagrant ssh -c 'cd $(VAGRANT_SRC_FOLDER); make iso'
run: iso
qemu-system-i386 -cdrom $(iso_target)
qemu-system-$(ARCH) -cdrom $(iso_target)
gdb: iso
qemu-system-i386 -s -S -cdrom $(iso_target) &
qemu-system-$(ARCH) -M accel=tcg -s -S -cdrom $(iso_target) &
sleep 1
gdb \
-ex "add-auto-load-safe-path $(pwd)" \
-ex "file $(kernel_target)" \
-ex "set disassembly-flavor intel" \
-ex 'set arch i386:intel' \
-ex 'target remote localhost:1234' \
-ex 'add-auto-load-safe-path $(pwd)' \
-ex 'set disassembly-flavor intel' \
-ex 'layout asm' \
-ex 'b _rt0_entry' \
-ex 'continue' \
-ex 'disass'
@killall qemu-system-i386 || true
-ex 'set arch i386:intel' \
-ex 'file $(kernel_target)' \
-ex 'target remote localhost:1234' \
-ex 'set arch i386:x86-64:intel'
@killall qemu-system-$(ARCH) || true
endif
clean:

View File

@ -1,31 +0,0 @@
; vim: set ft=nasm :
%define SEG_NOEXEC (0 << 3)
%define SEG_EXEC (1 << 3)
%define SEG_NORW (0 << 1)
%define SEG_R (1 << 1)
%define SEG_W (1 << 1)
%define SEG_GRAN_BYTE (0 << 7)
%define SEG_GRAN_4K_PAGE (1 << 7)
;------------------------------------------------------------------------------
; GDT_ENTRY_32 creates a GDT entry for a 32-bit descriptor. It automatically sets
; the following bits:
; - Privl (ring) bits to 00 (ring 0)
; - Pr (present) bit to 1
; - Sz (size) bit to 1 (32-bit selector)
; - L (long-mode) bit to 0
;
; Args: base, limit, access, flags
;------------------------------------------------------------------------------
%macro GDT_ENTRY_32 4
dw (%2 & 0xFFFF) ; limit 0:15
dw (%1 & 0xFFFF) ; base 0:15
db ((%1 >> 16) & 0xFF) ; base 16:23
db (0x90 | %3) ; set Pr = 1, bit 5 = 1 (required)
; and apply access byte flags
db 0x40 | (%4 & 0xC0) | ((%2 >> 16) & 0xF) ; set Sz and flags and limit bits 16:19
db ((%1 >> 24) & 0xFF) ; base 24:31
%endmacro

View File

@ -1,199 +0,0 @@
; vim: set ft=nasm :
section .bss
align 4
; Reserve 16K for our stack. Stacks should be aligned to 16 byte boundaries.
stack_bottom:
resb 16384 ; 16 KiB
stack_top:
; Reserve some extra space for our tls_0 block; GO functions expect the
; GS segment register to point to the current TLS so we need to initialize this
; first before invoking any go functions
tls0:
g0_ptr: resd 1 ; gs:0x00 is a pointer to the current g struct
; in our case it should point to g0
g0:
g0_stack_lo: resd 1
g0_stack_hi: resd 1
g0_stackguard0: resd 1 ; sp compared to this value in go stack growth prologue
g0_stackguard1: resd 1 ; sp compared to this value in C stack growth prologue
section .text
bits 32
align 4
MULTIBOOT_MAGIC equ 0x36d76289
err_unsupported_bootloader db '[rt0] kernel not loaded by multiboot-compliant bootloader', 0
err_sse_not_available db '[rt0] kernel requires a CPU with SSE support', 0
err_kmain_returned db '[rt0] kMain returned; halting system', 0
;------------------------------------------------------------------------------
; Kernel arch-specific entry point
;
; The boot loader will jump to this symbol after setting up the CPU according
; to the multiboot standard. At this point:
; - A20 is enabled
; - The CPU is using 32-bit protected mode
; - Interrupts are disabled
; - Paging is disabled
; - EAX contains the magic value 0x36d76289; the presence of this value indicates
; to the operating system that it was loaded by a Multiboot-compliant boot loader
; - EBX contains the 32-bit physical address of the Multiboot information structure
;------------------------------------------------------------------------------
global _rt0_entry
_rt0_entry:
cmp eax, MULTIBOOT_MAGIC
jne unsupported_bootloader
; Initalize our stack by pointing ESP to the BSS-allocated stack. In x86,
; stack grows downwards so we need to point ESP to stack_top
mov esp, stack_top
; Enable SSE
call _rt0_enable_sse
; Load initial GDT
call _rt0_load_gdt
; init g0 so we can invoke Go functions
mov dword [gs:0x00], g0
mov dword [g0_stack_hi], stack_top
mov dword [g0_stack_lo], stack_bottom
mov dword [g0_stackguard0], stack_bottom
; push multiboot info ptr to the stack and call the kernel entrypoint
push ebx
extern kernel.Kmain
call kernel.Kmain
; kmain should never return
mov edi, err_kmain_returned
call write_string
; Main should never return; halt the CPU
halt:
cli
hlt
unsupported_bootloader:
mov edi, err_unsupported_bootloader
call write_string
jmp halt
.end:
;------------------------------------------------------------------------------
; Write the NULL-terminated string contained in edi to the screen using white
; text on red background. Assumes that text-mode is enabled and that its
; physical address is 0xb8000.
;------------------------------------------------------------------------------
write_string:
push eax
push ebx
mov ebx,0xb8000
mov ah, 0x4F
next_char:
mov al, byte[edi]
test al, al
jz done
mov word [ebx], ax
add ebx, 2
inc edi
jmp next_char
done:
pop ebx
pop eax
ret
;------------------------------------------------------------------------------
; Load GDT and flush CPU caches
;------------------------------------------------------------------------------
_rt0_load_gdt:
push eax
push ebx
; Go code uses the GS register to access the TLS. Set the base address
; for the GS descriptor to point to our tls0 table
mov eax, tls0
mov ebx, gdt0_gs_seg
mov [ebx+2], al
mov [ebx+3], ah
shr eax, 16
mov [ebx+4], al
lgdt [gdt0_desc]
; GDT has been loaded but the CPU still has the previous GDT data in cache.
; We need to manually update the descriptors and use a JMP command to set
; the CS segment descriptor
jmp CS_SEG:update_descriptors
update_descriptors:
mov ax, DS_SEG
mov ds, ax
mov es, ax
mov fs, ax
mov ax, GS_SEG
mov gs, ax
pop ebx
pop eax
ret
;------------------------------------------------------------------------------
; GDT definition
;------------------------------------------------------------------------------
%include "gdt.inc"
align 2
gdt0:
gdt0_nil_seg: GDT_ENTRY_32 0x00, 0x0, 0x0, 0x0 ; nil descriptor (not used by CPU but required by some emulators)
gdt0_cs_seg: GDT_ENTRY_32 0x00, 0xFFFFF, SEG_EXEC | SEG_R, SEG_GRAN_4K_PAGE ; code descriptor
gdt0_ds_seg: GDT_ENTRY_32 0x00, 0xFFFFF, SEG_NOEXEC | SEG_W, SEG_GRAN_4K_PAGE ; data descriptor
gdt0_gs_seg: GDT_ENTRY_32 0x00, 0x40, SEG_NOEXEC | SEG_W, SEG_GRAN_BYTE ; TLS descriptor (required in order to use go segmented stacks)
gdt0_desc:
dw gdt0_desc - gdt0 - 1 ; gdt size should be 1 byte less than actual length
dd gdt0
NULL_SEG equ gdt0_nil_seg - gdt0
CS_SEG equ gdt0_cs_seg - gdt0
DS_SEG equ gdt0_ds_seg - gdt0
GS_SEG equ gdt0_gs_seg - gdt0
;------------------------------------------------------------------------------
; Enable SSE support. Code taken from:
; http://wiki.osdev.org/SSE#Checking_for_SSE
;------------------------------------------------------------------------------
_rt0_enable_sse:
pushad
; check for SSE
mov eax, 0x1
cpuid
test edx, 1<<25
jz .no_sse
; enable SSE
mov eax, cr0
and ax, 0xFFFB ; clear coprocessor emulation CR0.EM
or ax, 0x2 ; set coprocessor monitoring CR0.MP
mov cr0, eax
mov eax, cr4
or ax, 3 << 9 ; set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
mov cr4, eax
popad
ret
.no_sse:
mov edi, err_sse_not_available
call write_string
cli
hlt

View File

@ -1,35 +0,0 @@
ENTRY(_rt0_entry)
SECTIONS {
/* Kernel starts at 1M */
. = 1M;
/* ensure that the multiboot header is at the beginning */
.multiboot :
{
*(.multiboot_header)
}
.text BLOCK(4K) : ALIGN(4K)
{
*(.text)
}
/* Read-only data. */
.rodata BLOCK(4K) : ALIGN(4K)
{
*(.rodata)
}
/* Read-write data (initialized) */
.data BLOCK(4K) : ALIGN(4K)
{
*(.data)
}
.bss BLOCK(4K) : ALIGN(4K)
{
*(COMMON)
*(.bss)
}
}

View File

@ -0,0 +1,24 @@
; vim: set ft=nasm :
section .text
bits 64
global x_cgo_callers
global x_cgo_init
global x_cgo_mmap
global x_cgo_notify_runtime_init_done
global x_cgo_sigaction
global x_cgo_thread_start
global x_cgo_setenv
global x_cgo_unsetenv
; Stubs for missing cgo functions to keep the linker happy
x_cgo_callers:
x_cgo_init:
x_cgo_mmap:
x_cgo_notify_runtime_init_done:
x_cgo_sigaction:
x_cgo_thread_start:
x_cgo_setenv:
x_cgo_unsetenv:
ret

View File

@ -0,0 +1,12 @@
; vim: set ft=nasm :
; The bootloader load the kernel at LOAD_ADDRESS and jumps to the rt0_32 entrypoint
; at this address.
LOAD_ADDRESS equ 0x100000
; Page offset is the start of the 48-bit upper half canonical memory region
; The kernel is compiled with a VMA equal to PAGE_OFFSET + LOAD_ADDRESS but
; loaded at physical address LOAD_ADDRESS.
PAGE_OFFSET equ 0xffff800000000000

0
arch/x86_64/asm/data.s Normal file
View File

356
arch/x86_64/asm/rt0_32.s Normal file
View File

@ -0,0 +1,356 @@
; vim: set ft=nasm :
%include "constants.inc"
section .data
align 4
; GDT definition
gdt0:
gdt0_nil_seg: dw 0 ; Limit (low)
dw 0 ; Base (low)
db 0 ; Base (middle)
db 0 ; Access (exec/read)
db 0 ; Granularity
db 0 ; Base (high)
gdt0_cs_seg: dw 0 ; Limit (low)
dw 0 ; Base (low)
db 0 ; Base (middle)
db 10011010b ; Access (exec/read)
db 00100000b ; Granularity
db 0 ; Base (high)
gdt0_ds_seg: dw 0 ; Limit (low)
dw 0 ; Base (low)
db 0 ; Base (middle)
db 10010010b ; Access (read/write)
db 00000000b ; Granularity
db 0 ; Base (high)
gdt0_desc:
dw $ - gdt0 - 1 ; gdt size should be 1 byte less than actual length
dq gdt0 - PAGE_OFFSET
NULL_SEG equ gdt0_nil_seg - gdt0
CS_SEG equ gdt0_cs_seg - gdt0
DS_SEG equ gdt0_ds_seg - gdt0
;------------------------------------------------------------------------------
; Error messages
;------------------------------------------------------------------------------
err_unsupported_bootloader db '[rt0_32] kernel not loaded by multiboot-compliant bootloader', 0
err_multiboot_data_too_big db '[rt0_32] multiboot information data length exceeds local buffer size', 0
err_cpuid_not_supported db '[rt0_32] the processor does not support the CPUID instruction', 0
err_longmode_not_supported db '[rt0_32] the processor does not support longmode which is required by this kernel', 0
err_sse_not_supported db '[rt0_32] the processor does not support SSE instructions which are required by this kernel', 0
section .bss
align 4096
; Reserve 3 pages for the initial page tables
page_table_l4: resb 4096
page_table_l3: resb 4096
page_table_l2: resb 4096
; Reserve 16K for storing multiboot data and for the kernel stack
global multiboot_data ; Make this available to the 64-bit entrypoint
global stack_bottom
global stack_top
multiboot_data: resb 16384
stack_bottom: resb 16384
stack_top:
section .rt0
bits 32
align 4
;------------------------------------------------------------------------------
; Kernel 32-bit entry point
;
; The boot loader will jump to this symbol after setting up the CPU according
; to the multiboot standard. At this point:
; - A20 is enabled
; - The CPU is using 32-bit protected mode
; - Interrupts are disabled
; - Paging is disabled
; - EAX contains the magic value 0x36d76289; the presence of this value indicates
; to the operating system that it was loaded by a Multiboot-compliant boot loader
; - EBX contains the 32-bit physical address of the Multiboot information structure
;------------------------------------------------------------------------------
global _rt0_32_entry
_rt0_32_entry:
; Provide a stack
mov esp, stack_top - PAGE_OFFSET
; Ensure we were booted by a bootloader supporting multiboot
cmp eax, 0x36d76289
jne _rt0_32_entry.unsupported_bootloader
; Copy multiboot struct to our own buffer
call _rt0_copy_multiboot_data
; Check processor features
call _rt0_check_cpuid_support
call _rt0_check_longmode_support
call _rt0_check_sse_support
; Setup initial page tables, enable paging and enter longmode
call _rt0_populate_initial_page_tables
call _rt0_enter_long_mode
call _rt0_64_entry_trampoline
.unsupported_bootloader:
mov edi, err_unsupported_bootloader - PAGE_OFFSET
call write_string
jmp _rt0_32_entry.halt
.halt:
cli
hlt
;------------------------------------------------------------------------------
; Copy multiboot information blocks from the address pointed to by ebx into a
; local buffer. This enables the kernel code to access them once paging is enabled.
;------------------------------------------------------------------------------
_rt0_copy_multiboot_data:
mov esi, ebx
mov edi, multiboot_data - PAGE_OFFSET
mov ecx, dword [esi]
cmp ecx, 16384
jle _rt0_copy_multiboot_data.copy
mov edi, err_multiboot_data_too_big - PAGE_OFFSET
call write_string
jmp _rt0_32_entry.halt
.copy:
test ecx, ecx
jz _rt0_copy_multiboot_data.done
mov eax, dword[esi]
mov dword [edi], eax
add esi, 4
add edi, 4
sub ecx, 4
jmp _rt0_copy_multiboot_data.copy
.done:
ret
;------------------------------------------------------------------------------
; Check that the processor supports the CPUID instruction.
;
; To check if CPUID is supported, we need to attempt to flip the ID bit (bit 21)
; in the FLAGS register. If that works, CPUID is available.
;
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
;------------------------------------------------------------------------------
_rt0_check_cpuid_support:
; Copy FLAGS in to EAX via stack
pushfd
pop eax
; Copy to ECX as well for comparing later on
mov ecx, eax
; Flip the ID bit
xor eax, 1 << 21
; Copy EAX to FLAGS via the stack
push eax
popfd
; Copy FLAGS back to EAX (with the flipped bit if CPUID is supported)
pushfd
pop eax
; Restore FLAGS from the old version stored in ECX (i.e. flipping the
; ID bit back if it was ever flipped).
push ecx
popfd
; Compare EAX and ECX. If they are equal then that means the bit
; wasn't flipped, and CPUID isn't supported.
cmp eax, ecx
je _rt0_check_cpuid_support.no_cpuid
ret
.no_cpuid:
mov edi, err_cpuid_not_supported - PAGE_OFFSET
call write_string
jmp _rt0_32_entry.halt
;------------------------------------------------------------------------------
; Check that the processor supports long mode
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
;------------------------------------------------------------------------------
_rt0_check_longmode_support:
; To check for longmode support we need to ensure that the CPUID instruction
; can report it. To do this we need to query it first.
mov eax, 0x80000000 ; Set the A-register to 0x80000000.
cpuid
cmp eax, 0x80000001 ; We need at least 0x80000001 to check for long mode.
jb _rt0_check_longmode_support.no_long_mode
mov eax, 0x80000001 ; Set the A-register to 0x80000001.
cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz _rt0_check_longmode_support.no_long_mode
ret
.no_long_mode:
mov edi, err_longmode_not_supported - PAGE_OFFSET
call write_string
jmp _rt0_32_entry.halt
;------------------------------------------------------------------------------
; Check for and enabl SSE support. Code taken from:
; http://wiki.osdev.org/SSE#Checking_for_SSE
;------------------------------------------------------------------------------
_rt0_check_sse_support:
; check for SSE
mov eax, 0x1
cpuid
test edx, 1<<25
jz _rt0_check_sse_support.no_sse
; Enable SSE
mov eax, cr0
and ax, 0xfffb ; Clear coprocessor emulation CR0.EM
or ax, 0x2 ; Set coprocessor monitoring CR0.MP
mov cr0, eax
mov eax, cr4
or ax, 3 << 9 ; Set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
mov cr4, eax
ret
.no_sse:
mov edi, err_sse_not_supported - PAGE_OFFSET
call write_string
jmp _rt0_32_entry.halt
;------------------------------------------------------------------------------
; Setup minimal page tables to allow access to the following regions:
; - 0 to 8M
; - PAGE_OFFSET to PAGE_OFFSET + 8M
;
; The second region mapping allows us to access the kernel at its VMA when
; paging is enabled.
;------------------------------------------------------------------------------
PAGE_PRESENT equ (1 << 0)
PAGE_WRITABLE equ (1 << 1)
PAGE_2MB equ (1 << 7)
_rt0_populate_initial_page_tables:
; The CPU uses bits 39-47 of the virtual address as an index to the P4 table.
mov eax, page_table_l3 - PAGE_OFFSET
or eax, PAGE_PRESENT | PAGE_WRITABLE
mov ebx, page_table_l4 - PAGE_OFFSET
mov [ebx], eax
; Also map the addresses starting at PAGE_OFFSET to the same P3 table.
; To find the P4 index for PAGE_OFFSET we need to extract bits 39-47
; of its address.
mov ecx, (PAGE_OFFSET >> 39) & 511
mov [ebx + ecx*8], eax
; The CPU uses bits 30-38 as an index to the P3 table. We just need to map
; entry 0 from the P3 table to point to the P2 table .
mov eax, page_table_l2 - PAGE_OFFSET
or eax, PAGE_PRESENT | PAGE_WRITABLE
mov ebx, page_table_l3 - PAGE_OFFSET
mov [ebx], eax
; For the L2 table we enable the huge page bit which allows us to specify
; 2M pages without needing to use the L1 table. To cover the required
; 0-8M region we need to provide 4 2M page entries at indices 0 to 4.
mov ecx, 0
mov ebx, page_table_l2 - PAGE_OFFSET
.next_page:
mov eax, 1 << 21 ; 2M
mul ecx ; eax *= ecx
or eax, PAGE_PRESENT | PAGE_WRITABLE | PAGE_2MB
mov [ebx + ecx*8], eax
inc ecx
cmp ecx, 4
jne _rt0_populate_initial_page_tables.next_page
ret
;------------------------------------------------------------------------------
; Load P4 table, enable PAE, enter long mode and finally enable paging
;------------------------------------------------------------------------------
_rt0_enter_long_mode:
; Load page table map pointer to cr3
mov eax, page_table_l4 - PAGE_OFFSET
mov cr3, eax
; Enable PAE support
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; Now enable long mode by modifying the EFER MSR
mov ecx, 0xc0000080
rdmsr ; read msr value to eax
or eax, 1 << 8
wrmsr
; Finally enable paging
mov eax, cr0
or eax, 1 << 31
mov cr0, eax
; We are in 32-bit compatibility submode. We need to load a 64bit GDT
; and perform a far jmp to switch to long mode
mov eax, gdt0_desc - PAGE_OFFSET
lgdt [eax]
; set ds and es segments
; to set the cs segment we need to perform a far jmp
mov ax, DS_SEG
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
jmp CS_SEG:.flush_gdt - PAGE_OFFSET
.flush_gdt:
ret
;------------------------------------------------------------------------------
; Write the NULL-terminated string contained in edi to the screen using white
; text on red background. Assumes that text-mode is enabled and that its
; physical address is 0xb8000.
;------------------------------------------------------------------------------
write_string:
mov ebx,0xb8000
mov ah, 0x4F
.next_char:
mov al, byte[edi]
test al, al
jz write_string.done
mov word [ebx], ax
add ebx, 2
inc edi
jmp write_string.next_char
.done:
ret
;------------------------------------------------------------------------------
; Set up the stack pointer to the virtual address of the stack and jump to the
; 64-bit entrypoint.
;------------------------------------------------------------------------------
bits 64
_rt0_64_entry_trampoline:
mov rsp, stack_top ; now that paging is enabled we can load the stack
; with the virtual address of the allocated stack.
; Jump to 64-bit entry
extern _rt0_64_entry
mov rax, _rt0_64_entry
jmp rax

95
arch/x86_64/asm/rt0_64.s Normal file
View File

@ -0,0 +1,95 @@
; vim: set ft=nasm :
section .bss
align 8
r0_g_ptr: resq 1 ; fs:0x00 is a pointer to the current g struct
r0_g:
r0_g_stack_lo: resq 1
r0_g_stack_hi: resq 1
r0_g_stackguard0: resq 1 ; rsp compared to this value in go stack growth prologue
r0_g_stackguard1: resq 1 ; rsp compared to this value in C stack growth prologue
section .text
bits 64
;------------------------------------------------------------------------------
; Kernel 64-bit entry point
;
; The 32-bit entrypoint code jumps to this entrypoint after:
; - it has entered long mode and enabled paging
; - it has loaded a 64bit GDT
; - it has set up identity paging for the physical 0-8M region and the
; PAGE_OFFSET to PAGE_OFFSET+8M region.
;------------------------------------------------------------------------------
global _rt0_64_entry
_rt0_64_entry:
; According to the x86_64 ABI, the fs:0 should point to the address of
; the user-space thread structure. The actual TLS structure is located
; just before that (aligned). Go code tries to fetch the address to the
; active go-routine's g struct by accessing fs:-8. What we need to do
; is to setup a mock g0 struct, populate its stack_lo/hi/guard fields
; and then use wrmsr to update the FS register
extern stack_top
extern stack_bottom
; Setup r0_g
mov rax, stack_bottom
mov rbx, stack_top
mov rsi, r0_g
mov qword [rsi+0], rax ; stack_lo
mov qword [rsi+8], rbx ; stack_hi
mov qword [rsi+16], rax ; stackguard0
mov rax, r0_g_ptr
mov qword [rax], rsi
; Load 64-bit FS register address
; rax -> lower 32 bits
; rdx -> upper 32 bits
mov ecx, 0xc0000100 ; fs_base
mov rax, rsi ; lower 32 bits
shr rsi, 32
mov rdx, rsi ; high 32 bits
wrmsr
; Call the kernel entry point passing a pointer to the multiboot data
; copied by the 32-bit entry code
extern multiboot_data
extern kernel.Kmain
mov rax, multiboot_data
push rax
call kernel.Kmain
; Main should never return; halt the CPU
mov rdi, err_kmain_returned
call write_string
cli
hlt
;------------------------------------------------------------------------------
; Error messages
;------------------------------------------------------------------------------
err_kmain_returned db '[rt0_64] kmain returned', 0
;------------------------------------------------------------------------------
; Write the NULL-terminated string contained in rdi to the screen using white
; text on red background. Assumes that text-mode is enabled and that its
; physical address is 0xb8000.
;------------------------------------------------------------------------------
write_string:
mov rbx,0xb8000
mov ah, 0x4F
.next_char:
mov al, byte[rdi]
test al, al
jz write_string.done
mov word [rbx], ax
add rbx, 2
inc rdi
jmp write_string.next_char
.done:
ret

View File

@ -0,0 +1,39 @@
VMA = PAGE_OFFSET + LOAD_ADDRESS;
ENTRY(_rt0_32_entry)
SECTIONS {
/* Set the kernel VMA at PAGE_OFFSET + 1M
* but load it at physical address 1M */
. = VMA;
.text BLOCK(4K) : AT(ADDR(.text) - PAGE_OFFSET)
{
/* The multiboot header must be present in the first 4K of the kernel
* image so that the bootloader can find it */
*(.multiboot_header)
*(.rt0)
*(.text)
}
/* Read-only data. */
.rodata ALIGN(4K) : AT(ADDR(.rodata) - PAGE_OFFSET)
{
*(.rodata)
}
/* Read-write data (initialized) */
.data ALIGN(4K) : AT(ADDR(.data) - PAGE_OFFSET)
{
*(.data)
}
/* Read-write data (zeroed) */
.bss ALIGN(4K) : AT(ADDR(.bss) - PAGE_OFFSET)
{
*(COMMON)
*(.bss)
}
}

View File

@ -5,6 +5,7 @@ import (
"github.com/achilleasa/gopher-os/kernel/hal"
"github.com/achilleasa/gopher-os/kernel/hal/multiboot"
"github.com/achilleasa/gopher-os/kernel/kfmt/early"
)
// Kmain is the only Go symbol that is visible (exported) from the rt0 initialization
@ -18,10 +19,15 @@ import (
// Kmain is not expected to return. If it does, the rt0 code will halt the CPU.
//
//go:noinline
func Kmain(multibootInfoPtr uint32) {
multiboot.SetInfoPtr(uintptr(multibootInfoPtr))
func Kmain(multibootInfoPtr uintptr) {
multiboot.SetInfoPtr(multibootInfoPtr)
// Initialize and clear the terminal
hal.InitTerminal()
hal.ActiveTerminal.Clear()
early.Printf("Starting gopher-os\n")
// Prevent Kmain from returning
for {
}
}

View File

@ -2,7 +2,7 @@ package main
import "github.com/achilleasa/gopher-os/kernel"
var multibootInfoPtr uint32
var multibootInfoPtr uintptr
// main makes a dummy call to the actual kernel main entrypoint function. It
// is intentionally defined to prevent the Go compiler from optimizing away the