mirror of
https://github.com/taigrr/gopher-os
synced 2025-01-18 04:43:13 -08:00
Switch to a 64-bit version of the kernel and rt0 code
The switch to 64-bit mode allows us to use 48-bit addressing and to relocate the kernel to virtual address 0xffff800000000000 + 1M. The actual kernel is loaded by the bootloader at physical address 1M. The rt0 code has been split in two parts. The 32-bit part provides the entrypoint that the bootloader jumps to after loading the kernel. Its purpose is to make sure that: - the kernel was booted by a multiboot-compliant bootloader - the multiboot info structures are copied to a reserved memory block where they can be accessed after enabling paging - the CPU meets the minimum requirements for the kernel (CPUID, SSE, support for long-mode) Since paging is not enabled when the 32-bit code runs, it needs to translate all memory addresses it accesses to physical memory addresses by subtracting PAGE_OFFSET. The 32-bit rt0 code will set up a page table that identity-maps region: 0 to 8M and region: PAGE_OFFSET to PAGE_OFFSET+8M. This ensures that when paging gets enabled, we will still be able to access the kernel using both physical and virtual memory addresses. After enabling paging, the 32-bit rt0 will jump to a small 64-bit trampoline function that updates the stack pointer to use the proper virtual address and jumps to the virtual address of the 64-bit entry point. The 64-bit entrypoint sets up the minimal g0 structure required by the go function prologue for stack checks and sets up the FS register to point to it. The principle is the same as with 32-bit code (a segment register has the address of a pointer to the active g) with the difference that in 64-bit mode, the FS register is used instead of GS and that in order to set its value we need to write to a MSR.
This commit is contained in:
parent
4829115647
commit
2558f79fbf
45
Makefile
45
Makefile
@ -1,5 +1,5 @@
|
||||
OS = $(shell uname -s)
|
||||
ARCH := x86
|
||||
ARCH := x86_64
|
||||
BUILD_DIR := build
|
||||
BUILD_ABS_DIR := $(CURDIR)/$(BUILD_DIR)
|
||||
|
||||
@ -13,10 +13,10 @@ LD := ld
|
||||
AS := nasm
|
||||
|
||||
GOOS := linux
|
||||
GOARCH := 386
|
||||
GOARCH := amd64
|
||||
|
||||
LD_FLAGS := -n -melf_i386 -T arch/$(ARCH)/script/linker.ld -static --no-ld-generated-unwind-info
|
||||
AS_FLAGS := -g -f elf32 -F dwarf -I arch/$(ARCH)/asm/
|
||||
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
|
||||
AS_FLAGS := -g -f elf64 -F dwarf -I arch/$(ARCH)/asm/
|
||||
|
||||
MIN_OBJCOPY_VERSION := 2.26.0
|
||||
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
|
||||
@ -28,7 +28,7 @@ asm_obj_files := $(patsubst arch/$(ARCH)/asm/%.s, $(BUILD_DIR)/arch/$(ARCH)/asm/
|
||||
|
||||
kernel: binutils_version_check $(kernel_target)
|
||||
|
||||
$(kernel_target): $(asm_obj_files) go.o
|
||||
$(kernel_target): $(asm_obj_files) linker_script go.o
|
||||
@echo "[$(LD)] linking kernel-$(ARCH).bin"
|
||||
@$(LD) $(LD_FLAGS) -o $(kernel_target) $(asm_obj_files) $(BUILD_DIR)/go.o
|
||||
|
||||
@ -36,14 +36,14 @@ go.o:
|
||||
@mkdir -p $(BUILD_DIR)
|
||||
|
||||
@echo "[go] compiling go sources into a standalone .o file"
|
||||
@GOARCH=386 GOOS=linux go build -n 2>&1 | sed \
|
||||
@GOARCH=$(GOARCH) GOOS=$(GOOS) go build -n 2>&1 | sed \
|
||||
-e "1s|^|set -e\n|" \
|
||||
-e "1s|^|export GOOS=linux\n|" \
|
||||
-e "1s|^|export GOARCH=386\n|" \
|
||||
-e "1s|^|export GOOS=$(GOOS)\n|" \
|
||||
-e "1s|^|export GOARCH=$(GOARCH)\n|" \
|
||||
-e "1s|^|WORK='$(BUILD_ABS_DIR)'\n|" \
|
||||
-e "1s|^|alias pack='go tool pack'\n|" \
|
||||
-e "/^mv/d" \
|
||||
-e "s|-extld|-tmpdir='$(BUILD_ABS_DIR)' -linkmode=external -extldflags='-nostdlib' -extld|g" \
|
||||
-e "s|-extld|-tmpdir='$(BUILD_ABS_DIR)' -linkmode=external -extldflags='-nostartfiles -nodefaultlibs -nostdlib -r' -extld|g" \
|
||||
| sh 2>&1 | sed -e "s/^/ | /g"
|
||||
|
||||
@# build/go.o is a elf32 object file but all go symbols are unexported. Our
|
||||
@ -59,6 +59,13 @@ binutils_version_check:
|
||||
@echo "[binutils] checking that installed objcopy version is >= $(MIN_OBJCOPY_VERSION)"
|
||||
@if [ "$(HAVE_VALID_OBJCOPY)" != "y" ]; then echo "[binutils] error: a more up to date binutils installation is required" ; exit 1 ; fi
|
||||
|
||||
linker_script:
|
||||
@echo "[sed] extracting LMA and VMA from constants.inc"
|
||||
@echo "[gcc] pre-processing arch/$(ARCH)/script/linker.ld.in"
|
||||
@gcc `cat arch/$(ARCH)/asm/constants.inc | sed -e "/^$$/d; /^;/d; s/^/-D/g; s/\s*equ\s*/=/g;" | tr '\n' ' '` \
|
||||
-E -x \
|
||||
c arch/$(ARCH)/script/linker.ld.in | grep -v "^#" > $(BUILD_DIR)/linker.ld
|
||||
|
||||
$(BUILD_DIR)/arch/$(ARCH)/asm/%.o: arch/$(ARCH)/asm/%.s
|
||||
@mkdir -p $(shell dirname $@)
|
||||
@echo "[$(AS)] $<"
|
||||
@ -87,22 +94,20 @@ iso:
|
||||
vagrant ssh -c 'cd $(VAGRANT_SRC_FOLDER); make iso'
|
||||
|
||||
run: iso
|
||||
qemu-system-i386 -cdrom $(iso_target)
|
||||
qemu-system-$(ARCH) -cdrom $(iso_target)
|
||||
|
||||
gdb: iso
|
||||
qemu-system-i386 -s -S -cdrom $(iso_target) &
|
||||
qemu-system-$(ARCH) -M accel=tcg -s -S -cdrom $(iso_target) &
|
||||
sleep 1
|
||||
gdb \
|
||||
-ex "add-auto-load-safe-path $(pwd)" \
|
||||
-ex "file $(kernel_target)" \
|
||||
-ex "set disassembly-flavor intel" \
|
||||
-ex 'set arch i386:intel' \
|
||||
-ex 'target remote localhost:1234' \
|
||||
-ex 'add-auto-load-safe-path $(pwd)' \
|
||||
-ex 'set disassembly-flavor intel' \
|
||||
-ex 'layout asm' \
|
||||
-ex 'b _rt0_entry' \
|
||||
-ex 'continue' \
|
||||
-ex 'disass'
|
||||
@killall qemu-system-i386 || true
|
||||
-ex 'set arch i386:intel' \
|
||||
-ex 'file $(kernel_target)' \
|
||||
-ex 'target remote localhost:1234' \
|
||||
-ex 'set arch i386:x86-64:intel'
|
||||
@killall qemu-system-$(ARCH) || true
|
||||
endif
|
||||
|
||||
clean:
|
||||
|
@ -1,31 +0,0 @@
|
||||
; vim: set ft=nasm :
|
||||
|
||||
%define SEG_NOEXEC (0 << 3)
|
||||
%define SEG_EXEC (1 << 3)
|
||||
|
||||
%define SEG_NORW (0 << 1)
|
||||
%define SEG_R (1 << 1)
|
||||
%define SEG_W (1 << 1)
|
||||
|
||||
%define SEG_GRAN_BYTE (0 << 7)
|
||||
%define SEG_GRAN_4K_PAGE (1 << 7)
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; GDT_ENTRY_32 creates a GDT entry for a 32-bit descriptor. It automatically sets
|
||||
; the following bits:
|
||||
; - Privl (ring) bits to 00 (ring 0)
|
||||
; - Pr (present) bit to 1
|
||||
; - Sz (size) bit to 1 (32-bit selector)
|
||||
; - L (long-mode) bit to 0
|
||||
;
|
||||
; Args: base, limit, access, flags
|
||||
;------------------------------------------------------------------------------
|
||||
%macro GDT_ENTRY_32 4
|
||||
dw (%2 & 0xFFFF) ; limit 0:15
|
||||
dw (%1 & 0xFFFF) ; base 0:15
|
||||
db ((%1 >> 16) & 0xFF) ; base 16:23
|
||||
db (0x90 | %3) ; set Pr = 1, bit 5 = 1 (required)
|
||||
; and apply access byte flags
|
||||
db 0x40 | (%4 & 0xC0) | ((%2 >> 16) & 0xF) ; set Sz and flags and limit bits 16:19
|
||||
db ((%1 >> 24) & 0xFF) ; base 24:31
|
||||
%endmacro
|
@ -1,199 +0,0 @@
|
||||
; vim: set ft=nasm :
|
||||
|
||||
section .bss
|
||||
align 4
|
||||
|
||||
; Reserve 16K for our stack. Stacks should be aligned to 16 byte boundaries.
|
||||
stack_bottom:
|
||||
resb 16384 ; 16 KiB
|
||||
stack_top:
|
||||
|
||||
; Reserve some extra space for our tls_0 block; GO functions expect the
|
||||
; GS segment register to point to the current TLS so we need to initialize this
|
||||
; first before invoking any go functions
|
||||
tls0:
|
||||
g0_ptr: resd 1 ; gs:0x00 is a pointer to the current g struct
|
||||
; in our case it should point to g0
|
||||
g0:
|
||||
g0_stack_lo: resd 1
|
||||
g0_stack_hi: resd 1
|
||||
g0_stackguard0: resd 1 ; sp compared to this value in go stack growth prologue
|
||||
g0_stackguard1: resd 1 ; sp compared to this value in C stack growth prologue
|
||||
|
||||
section .text
|
||||
bits 32
|
||||
align 4
|
||||
|
||||
MULTIBOOT_MAGIC equ 0x36d76289
|
||||
|
||||
err_unsupported_bootloader db '[rt0] kernel not loaded by multiboot-compliant bootloader', 0
|
||||
err_sse_not_available db '[rt0] kernel requires a CPU with SSE support', 0
|
||||
err_kmain_returned db '[rt0] kMain returned; halting system', 0
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Kernel arch-specific entry point
|
||||
;
|
||||
; The boot loader will jump to this symbol after setting up the CPU according
|
||||
; to the multiboot standard. At this point:
|
||||
; - A20 is enabled
|
||||
; - The CPU is using 32-bit protected mode
|
||||
; - Interrupts are disabled
|
||||
; - Paging is disabled
|
||||
; - EAX contains the magic value ‘0x36d76289’; the presence of this value indicates
|
||||
; to the operating system that it was loaded by a Multiboot-compliant boot loader
|
||||
; - EBX contains the 32-bit physical address of the Multiboot information structure
|
||||
;------------------------------------------------------------------------------
|
||||
global _rt0_entry
|
||||
_rt0_entry:
|
||||
cmp eax, MULTIBOOT_MAGIC
|
||||
jne unsupported_bootloader
|
||||
|
||||
; Initalize our stack by pointing ESP to the BSS-allocated stack. In x86,
|
||||
; stack grows downwards so we need to point ESP to stack_top
|
||||
mov esp, stack_top
|
||||
|
||||
; Enable SSE
|
||||
call _rt0_enable_sse
|
||||
|
||||
; Load initial GDT
|
||||
call _rt0_load_gdt
|
||||
|
||||
; init g0 so we can invoke Go functions
|
||||
mov dword [gs:0x00], g0
|
||||
mov dword [g0_stack_hi], stack_top
|
||||
mov dword [g0_stack_lo], stack_bottom
|
||||
mov dword [g0_stackguard0], stack_bottom
|
||||
|
||||
; push multiboot info ptr to the stack and call the kernel entrypoint
|
||||
push ebx
|
||||
extern kernel.Kmain
|
||||
call kernel.Kmain
|
||||
|
||||
; kmain should never return
|
||||
mov edi, err_kmain_returned
|
||||
call write_string
|
||||
|
||||
; Main should never return; halt the CPU
|
||||
halt:
|
||||
cli
|
||||
hlt
|
||||
|
||||
unsupported_bootloader:
|
||||
mov edi, err_unsupported_bootloader
|
||||
call write_string
|
||||
jmp halt
|
||||
.end:
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Write the NULL-terminated string contained in edi to the screen using white
|
||||
; text on red background. Assumes that text-mode is enabled and that its
|
||||
; physical address is 0xb8000.
|
||||
;------------------------------------------------------------------------------
|
||||
write_string:
|
||||
push eax
|
||||
push ebx
|
||||
|
||||
mov ebx,0xb8000
|
||||
mov ah, 0x4F
|
||||
next_char:
|
||||
mov al, byte[edi]
|
||||
test al, al
|
||||
jz done
|
||||
|
||||
mov word [ebx], ax
|
||||
add ebx, 2
|
||||
inc edi
|
||||
jmp next_char
|
||||
|
||||
done:
|
||||
pop ebx
|
||||
pop eax
|
||||
ret
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Load GDT and flush CPU caches
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
_rt0_load_gdt:
|
||||
push eax
|
||||
push ebx
|
||||
|
||||
; Go code uses the GS register to access the TLS. Set the base address
|
||||
; for the GS descriptor to point to our tls0 table
|
||||
mov eax, tls0
|
||||
mov ebx, gdt0_gs_seg
|
||||
mov [ebx+2], al
|
||||
mov [ebx+3], ah
|
||||
shr eax, 16
|
||||
mov [ebx+4], al
|
||||
|
||||
lgdt [gdt0_desc]
|
||||
|
||||
; GDT has been loaded but the CPU still has the previous GDT data in cache.
|
||||
; We need to manually update the descriptors and use a JMP command to set
|
||||
; the CS segment descriptor
|
||||
jmp CS_SEG:update_descriptors
|
||||
update_descriptors:
|
||||
mov ax, DS_SEG
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
mov fs, ax
|
||||
mov ax, GS_SEG
|
||||
mov gs, ax
|
||||
|
||||
pop ebx
|
||||
pop eax
|
||||
ret
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; GDT definition
|
||||
;------------------------------------------------------------------------------
|
||||
%include "gdt.inc"
|
||||
|
||||
align 2
|
||||
gdt0:
|
||||
|
||||
gdt0_nil_seg: GDT_ENTRY_32 0x00, 0x0, 0x0, 0x0 ; nil descriptor (not used by CPU but required by some emulators)
|
||||
gdt0_cs_seg: GDT_ENTRY_32 0x00, 0xFFFFF, SEG_EXEC | SEG_R, SEG_GRAN_4K_PAGE ; code descriptor
|
||||
gdt0_ds_seg: GDT_ENTRY_32 0x00, 0xFFFFF, SEG_NOEXEC | SEG_W, SEG_GRAN_4K_PAGE ; data descriptor
|
||||
gdt0_gs_seg: GDT_ENTRY_32 0x00, 0x40, SEG_NOEXEC | SEG_W, SEG_GRAN_BYTE ; TLS descriptor (required in order to use go segmented stacks)
|
||||
|
||||
gdt0_desc:
|
||||
dw gdt0_desc - gdt0 - 1 ; gdt size should be 1 byte less than actual length
|
||||
dd gdt0
|
||||
|
||||
NULL_SEG equ gdt0_nil_seg - gdt0
|
||||
CS_SEG equ gdt0_cs_seg - gdt0
|
||||
DS_SEG equ gdt0_ds_seg - gdt0
|
||||
GS_SEG equ gdt0_gs_seg - gdt0
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Enable SSE support. Code taken from:
|
||||
; http://wiki.osdev.org/SSE#Checking_for_SSE
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_enable_sse:
|
||||
pushad
|
||||
|
||||
; check for SSE
|
||||
mov eax, 0x1
|
||||
cpuid
|
||||
test edx, 1<<25
|
||||
jz .no_sse
|
||||
|
||||
; enable SSE
|
||||
mov eax, cr0
|
||||
and ax, 0xFFFB ; clear coprocessor emulation CR0.EM
|
||||
or ax, 0x2 ; set coprocessor monitoring CR0.MP
|
||||
mov cr0, eax
|
||||
mov eax, cr4
|
||||
or ax, 3 << 9 ; set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||||
mov cr4, eax
|
||||
|
||||
popad
|
||||
ret
|
||||
.no_sse:
|
||||
mov edi, err_sse_not_available
|
||||
call write_string
|
||||
|
||||
cli
|
||||
hlt
|
@ -1,35 +0,0 @@
|
||||
ENTRY(_rt0_entry)
|
||||
|
||||
SECTIONS {
|
||||
/* Kernel starts at 1M */
|
||||
. = 1M;
|
||||
|
||||
/* ensure that the multiboot header is at the beginning */
|
||||
.multiboot :
|
||||
{
|
||||
*(.multiboot_header)
|
||||
}
|
||||
|
||||
.text BLOCK(4K) : ALIGN(4K)
|
||||
{
|
||||
*(.text)
|
||||
}
|
||||
|
||||
/* Read-only data. */
|
||||
.rodata BLOCK(4K) : ALIGN(4K)
|
||||
{
|
||||
*(.rodata)
|
||||
}
|
||||
|
||||
/* Read-write data (initialized) */
|
||||
.data BLOCK(4K) : ALIGN(4K)
|
||||
{
|
||||
*(.data)
|
||||
}
|
||||
|
||||
.bss BLOCK(4K) : ALIGN(4K)
|
||||
{
|
||||
*(COMMON)
|
||||
*(.bss)
|
||||
}
|
||||
}
|
24
arch/x86_64/asm/cgo_stubs.s
Normal file
24
arch/x86_64/asm/cgo_stubs.s
Normal file
@ -0,0 +1,24 @@
|
||||
; vim: set ft=nasm :
|
||||
|
||||
section .text
|
||||
bits 64
|
||||
|
||||
global x_cgo_callers
|
||||
global x_cgo_init
|
||||
global x_cgo_mmap
|
||||
global x_cgo_notify_runtime_init_done
|
||||
global x_cgo_sigaction
|
||||
global x_cgo_thread_start
|
||||
global x_cgo_setenv
|
||||
global x_cgo_unsetenv
|
||||
|
||||
; Stubs for missing cgo functions to keep the linker happy
|
||||
x_cgo_callers:
|
||||
x_cgo_init:
|
||||
x_cgo_mmap:
|
||||
x_cgo_notify_runtime_init_done:
|
||||
x_cgo_sigaction:
|
||||
x_cgo_thread_start:
|
||||
x_cgo_setenv:
|
||||
x_cgo_unsetenv:
|
||||
ret
|
12
arch/x86_64/asm/constants.inc
Normal file
12
arch/x86_64/asm/constants.inc
Normal file
@ -0,0 +1,12 @@
|
||||
; vim: set ft=nasm :
|
||||
|
||||
; The bootloader load the kernel at LOAD_ADDRESS and jumps to the rt0_32 entrypoint
|
||||
; at this address.
|
||||
LOAD_ADDRESS equ 0x100000
|
||||
|
||||
; Page offset is the start of the 48-bit upper half canonical memory region
|
||||
; The kernel is compiled with a VMA equal to PAGE_OFFSET + LOAD_ADDRESS but
|
||||
; loaded at physical address LOAD_ADDRESS.
|
||||
PAGE_OFFSET equ 0xffff800000000000
|
||||
|
||||
|
0
arch/x86_64/asm/data.s
Normal file
0
arch/x86_64/asm/data.s
Normal file
356
arch/x86_64/asm/rt0_32.s
Normal file
356
arch/x86_64/asm/rt0_32.s
Normal file
@ -0,0 +1,356 @@
|
||||
; vim: set ft=nasm :
|
||||
%include "constants.inc"
|
||||
|
||||
section .data
|
||||
align 4
|
||||
|
||||
; GDT definition
|
||||
gdt0:
|
||||
gdt0_nil_seg: dw 0 ; Limit (low)
|
||||
dw 0 ; Base (low)
|
||||
db 0 ; Base (middle)
|
||||
db 0 ; Access (exec/read)
|
||||
db 0 ; Granularity
|
||||
db 0 ; Base (high)
|
||||
gdt0_cs_seg: dw 0 ; Limit (low)
|
||||
dw 0 ; Base (low)
|
||||
db 0 ; Base (middle)
|
||||
db 10011010b ; Access (exec/read)
|
||||
db 00100000b ; Granularity
|
||||
db 0 ; Base (high)
|
||||
gdt0_ds_seg: dw 0 ; Limit (low)
|
||||
dw 0 ; Base (low)
|
||||
db 0 ; Base (middle)
|
||||
db 10010010b ; Access (read/write)
|
||||
db 00000000b ; Granularity
|
||||
db 0 ; Base (high)
|
||||
|
||||
gdt0_desc:
|
||||
dw $ - gdt0 - 1 ; gdt size should be 1 byte less than actual length
|
||||
dq gdt0 - PAGE_OFFSET
|
||||
|
||||
NULL_SEG equ gdt0_nil_seg - gdt0
|
||||
CS_SEG equ gdt0_cs_seg - gdt0
|
||||
DS_SEG equ gdt0_ds_seg - gdt0
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Error messages
|
||||
;------------------------------------------------------------------------------
|
||||
err_unsupported_bootloader db '[rt0_32] kernel not loaded by multiboot-compliant bootloader', 0
|
||||
err_multiboot_data_too_big db '[rt0_32] multiboot information data length exceeds local buffer size', 0
|
||||
err_cpuid_not_supported db '[rt0_32] the processor does not support the CPUID instruction', 0
|
||||
err_longmode_not_supported db '[rt0_32] the processor does not support longmode which is required by this kernel', 0
|
||||
err_sse_not_supported db '[rt0_32] the processor does not support SSE instructions which are required by this kernel', 0
|
||||
|
||||
section .bss
|
||||
align 4096
|
||||
|
||||
; Reserve 3 pages for the initial page tables
|
||||
page_table_l4: resb 4096
|
||||
page_table_l3: resb 4096
|
||||
page_table_l2: resb 4096
|
||||
|
||||
; Reserve 16K for storing multiboot data and for the kernel stack
|
||||
global multiboot_data ; Make this available to the 64-bit entrypoint
|
||||
global stack_bottom
|
||||
global stack_top
|
||||
multiboot_data: resb 16384
|
||||
stack_bottom: resb 16384
|
||||
stack_top:
|
||||
|
||||
section .rt0
|
||||
bits 32
|
||||
align 4
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Kernel 32-bit entry point
|
||||
;
|
||||
; The boot loader will jump to this symbol after setting up the CPU according
|
||||
; to the multiboot standard. At this point:
|
||||
; - A20 is enabled
|
||||
; - The CPU is using 32-bit protected mode
|
||||
; - Interrupts are disabled
|
||||
; - Paging is disabled
|
||||
; - EAX contains the magic value ‘0x36d76289’; the presence of this value indicates
|
||||
; to the operating system that it was loaded by a Multiboot-compliant boot loader
|
||||
; - EBX contains the 32-bit physical address of the Multiboot information structure
|
||||
;------------------------------------------------------------------------------
|
||||
global _rt0_32_entry
|
||||
_rt0_32_entry:
|
||||
; Provide a stack
|
||||
mov esp, stack_top - PAGE_OFFSET
|
||||
|
||||
; Ensure we were booted by a bootloader supporting multiboot
|
||||
cmp eax, 0x36d76289
|
||||
jne _rt0_32_entry.unsupported_bootloader
|
||||
|
||||
; Copy multiboot struct to our own buffer
|
||||
call _rt0_copy_multiboot_data
|
||||
|
||||
; Check processor features
|
||||
call _rt0_check_cpuid_support
|
||||
call _rt0_check_longmode_support
|
||||
call _rt0_check_sse_support
|
||||
|
||||
; Setup initial page tables, enable paging and enter longmode
|
||||
call _rt0_populate_initial_page_tables
|
||||
call _rt0_enter_long_mode
|
||||
|
||||
call _rt0_64_entry_trampoline
|
||||
|
||||
.unsupported_bootloader:
|
||||
mov edi, err_unsupported_bootloader - PAGE_OFFSET
|
||||
call write_string
|
||||
jmp _rt0_32_entry.halt
|
||||
|
||||
.halt:
|
||||
cli
|
||||
hlt
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Copy multiboot information blocks from the address pointed to by ebx into a
|
||||
; local buffer. This enables the kernel code to access them once paging is enabled.
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_copy_multiboot_data:
|
||||
mov esi, ebx
|
||||
mov edi, multiboot_data - PAGE_OFFSET
|
||||
|
||||
mov ecx, dword [esi]
|
||||
cmp ecx, 16384
|
||||
jle _rt0_copy_multiboot_data.copy
|
||||
|
||||
mov edi, err_multiboot_data_too_big - PAGE_OFFSET
|
||||
call write_string
|
||||
jmp _rt0_32_entry.halt
|
||||
|
||||
.copy:
|
||||
test ecx, ecx
|
||||
jz _rt0_copy_multiboot_data.done
|
||||
|
||||
mov eax, dword[esi]
|
||||
mov dword [edi], eax
|
||||
add esi, 4
|
||||
add edi, 4
|
||||
sub ecx, 4
|
||||
jmp _rt0_copy_multiboot_data.copy
|
||||
|
||||
.done:
|
||||
ret
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Check that the processor supports the CPUID instruction.
|
||||
;
|
||||
; To check if CPUID is supported, we need to attempt to flip the ID bit (bit 21)
|
||||
; in the FLAGS register. If that works, CPUID is available.
|
||||
;
|
||||
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_check_cpuid_support:
|
||||
; Copy FLAGS in to EAX via stack
|
||||
pushfd
|
||||
pop eax
|
||||
|
||||
; Copy to ECX as well for comparing later on
|
||||
mov ecx, eax
|
||||
|
||||
; Flip the ID bit
|
||||
xor eax, 1 << 21
|
||||
|
||||
; Copy EAX to FLAGS via the stack
|
||||
push eax
|
||||
popfd
|
||||
|
||||
; Copy FLAGS back to EAX (with the flipped bit if CPUID is supported)
|
||||
pushfd
|
||||
pop eax
|
||||
|
||||
; Restore FLAGS from the old version stored in ECX (i.e. flipping the
|
||||
; ID bit back if it was ever flipped).
|
||||
push ecx
|
||||
popfd
|
||||
|
||||
; Compare EAX and ECX. If they are equal then that means the bit
|
||||
; wasn't flipped, and CPUID isn't supported.
|
||||
cmp eax, ecx
|
||||
je _rt0_check_cpuid_support.no_cpuid
|
||||
ret
|
||||
|
||||
.no_cpuid:
|
||||
mov edi, err_cpuid_not_supported - PAGE_OFFSET
|
||||
call write_string
|
||||
jmp _rt0_32_entry.halt
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Check that the processor supports long mode
|
||||
; Code taken from: http://wiki.osdev.org/Setting_Up_Long_Mode#x86_or_x86-64
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_check_longmode_support:
|
||||
; To check for longmode support we need to ensure that the CPUID instruction
|
||||
; can report it. To do this we need to query it first.
|
||||
mov eax, 0x80000000 ; Set the A-register to 0x80000000.
|
||||
cpuid
|
||||
cmp eax, 0x80000001 ; We need at least 0x80000001 to check for long mode.
|
||||
jb _rt0_check_longmode_support.no_long_mode
|
||||
|
||||
mov eax, 0x80000001 ; Set the A-register to 0x80000001.
|
||||
cpuid
|
||||
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
|
||||
jz _rt0_check_longmode_support.no_long_mode
|
||||
ret
|
||||
|
||||
.no_long_mode:
|
||||
mov edi, err_longmode_not_supported - PAGE_OFFSET
|
||||
call write_string
|
||||
jmp _rt0_32_entry.halt
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Check for and enabl SSE support. Code taken from:
|
||||
; http://wiki.osdev.org/SSE#Checking_for_SSE
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_check_sse_support:
|
||||
; check for SSE
|
||||
mov eax, 0x1
|
||||
cpuid
|
||||
test edx, 1<<25
|
||||
jz _rt0_check_sse_support.no_sse
|
||||
|
||||
; Enable SSE
|
||||
mov eax, cr0
|
||||
and ax, 0xfffb ; Clear coprocessor emulation CR0.EM
|
||||
or ax, 0x2 ; Set coprocessor monitoring CR0.MP
|
||||
mov cr0, eax
|
||||
mov eax, cr4
|
||||
or ax, 3 << 9 ; Set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time
|
||||
mov cr4, eax
|
||||
|
||||
ret
|
||||
.no_sse:
|
||||
mov edi, err_sse_not_supported - PAGE_OFFSET
|
||||
call write_string
|
||||
jmp _rt0_32_entry.halt
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Setup minimal page tables to allow access to the following regions:
|
||||
; - 0 to 8M
|
||||
; - PAGE_OFFSET to PAGE_OFFSET + 8M
|
||||
;
|
||||
; The second region mapping allows us to access the kernel at its VMA when
|
||||
; paging is enabled.
|
||||
;------------------------------------------------------------------------------
|
||||
PAGE_PRESENT equ (1 << 0)
|
||||
PAGE_WRITABLE equ (1 << 1)
|
||||
PAGE_2MB equ (1 << 7)
|
||||
|
||||
_rt0_populate_initial_page_tables:
|
||||
; The CPU uses bits 39-47 of the virtual address as an index to the P4 table.
|
||||
mov eax, page_table_l3 - PAGE_OFFSET
|
||||
or eax, PAGE_PRESENT | PAGE_WRITABLE
|
||||
mov ebx, page_table_l4 - PAGE_OFFSET
|
||||
mov [ebx], eax
|
||||
|
||||
; Also map the addresses starting at PAGE_OFFSET to the same P3 table.
|
||||
; To find the P4 index for PAGE_OFFSET we need to extract bits 39-47
|
||||
; of its address.
|
||||
mov ecx, (PAGE_OFFSET >> 39) & 511
|
||||
mov [ebx + ecx*8], eax
|
||||
|
||||
; The CPU uses bits 30-38 as an index to the P3 table. We just need to map
|
||||
; entry 0 from the P3 table to point to the P2 table .
|
||||
mov eax, page_table_l2 - PAGE_OFFSET
|
||||
or eax, PAGE_PRESENT | PAGE_WRITABLE
|
||||
mov ebx, page_table_l3 - PAGE_OFFSET
|
||||
mov [ebx], eax
|
||||
|
||||
; For the L2 table we enable the huge page bit which allows us to specify
|
||||
; 2M pages without needing to use the L1 table. To cover the required
|
||||
; 0-8M region we need to provide 4 2M page entries at indices 0 to 4.
|
||||
mov ecx, 0
|
||||
mov ebx, page_table_l2 - PAGE_OFFSET
|
||||
.next_page:
|
||||
mov eax, 1 << 21 ; 2M
|
||||
mul ecx ; eax *= ecx
|
||||
or eax, PAGE_PRESENT | PAGE_WRITABLE | PAGE_2MB
|
||||
mov [ebx + ecx*8], eax
|
||||
|
||||
inc ecx
|
||||
cmp ecx, 4
|
||||
jne _rt0_populate_initial_page_tables.next_page
|
||||
|
||||
ret
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Load P4 table, enable PAE, enter long mode and finally enable paging
|
||||
;------------------------------------------------------------------------------
|
||||
_rt0_enter_long_mode:
|
||||
; Load page table map pointer to cr3
|
||||
mov eax, page_table_l4 - PAGE_OFFSET
|
||||
mov cr3, eax
|
||||
|
||||
; Enable PAE support
|
||||
mov eax, cr4
|
||||
or eax, 1 << 5
|
||||
mov cr4, eax
|
||||
|
||||
; Now enable long mode by modifying the EFER MSR
|
||||
mov ecx, 0xc0000080
|
||||
rdmsr ; read msr value to eax
|
||||
or eax, 1 << 8
|
||||
wrmsr
|
||||
|
||||
; Finally enable paging
|
||||
mov eax, cr0
|
||||
or eax, 1 << 31
|
||||
mov cr0, eax
|
||||
|
||||
; We are in 32-bit compatibility submode. We need to load a 64bit GDT
|
||||
; and perform a far jmp to switch to long mode
|
||||
mov eax, gdt0_desc - PAGE_OFFSET
|
||||
lgdt [eax]
|
||||
|
||||
; set ds and es segments
|
||||
; to set the cs segment we need to perform a far jmp
|
||||
mov ax, DS_SEG
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
mov fs, ax
|
||||
mov gs, ax
|
||||
|
||||
jmp CS_SEG:.flush_gdt - PAGE_OFFSET
|
||||
.flush_gdt:
|
||||
ret
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Write the NULL-terminated string contained in edi to the screen using white
|
||||
; text on red background. Assumes that text-mode is enabled and that its
|
||||
; physical address is 0xb8000.
|
||||
;------------------------------------------------------------------------------
|
||||
write_string:
|
||||
mov ebx,0xb8000
|
||||
mov ah, 0x4F
|
||||
.next_char:
|
||||
mov al, byte[edi]
|
||||
test al, al
|
||||
jz write_string.done
|
||||
|
||||
mov word [ebx], ax
|
||||
add ebx, 2
|
||||
inc edi
|
||||
jmp write_string.next_char
|
||||
|
||||
.done:
|
||||
ret
|
||||
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Set up the stack pointer to the virtual address of the stack and jump to the
|
||||
; 64-bit entrypoint.
|
||||
;------------------------------------------------------------------------------
|
||||
bits 64
|
||||
_rt0_64_entry_trampoline:
|
||||
mov rsp, stack_top ; now that paging is enabled we can load the stack
|
||||
; with the virtual address of the allocated stack.
|
||||
|
||||
; Jump to 64-bit entry
|
||||
extern _rt0_64_entry
|
||||
mov rax, _rt0_64_entry
|
||||
jmp rax
|
95
arch/x86_64/asm/rt0_64.s
Normal file
95
arch/x86_64/asm/rt0_64.s
Normal file
@ -0,0 +1,95 @@
|
||||
; vim: set ft=nasm :
|
||||
|
||||
section .bss
|
||||
align 8
|
||||
|
||||
r0_g_ptr: resq 1 ; fs:0x00 is a pointer to the current g struct
|
||||
r0_g:
|
||||
r0_g_stack_lo: resq 1
|
||||
r0_g_stack_hi: resq 1
|
||||
r0_g_stackguard0: resq 1 ; rsp compared to this value in go stack growth prologue
|
||||
r0_g_stackguard1: resq 1 ; rsp compared to this value in C stack growth prologue
|
||||
|
||||
section .text
|
||||
bits 64
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Kernel 64-bit entry point
|
||||
;
|
||||
; The 32-bit entrypoint code jumps to this entrypoint after:
|
||||
; - it has entered long mode and enabled paging
|
||||
; - it has loaded a 64bit GDT
|
||||
; - it has set up identity paging for the physical 0-8M region and the
|
||||
; PAGE_OFFSET to PAGE_OFFSET+8M region.
|
||||
;------------------------------------------------------------------------------
|
||||
global _rt0_64_entry
|
||||
_rt0_64_entry:
|
||||
; According to the x86_64 ABI, the fs:0 should point to the address of
|
||||
; the user-space thread structure. The actual TLS structure is located
|
||||
; just before that (aligned). Go code tries to fetch the address to the
|
||||
; active go-routine's g struct by accessing fs:-8. What we need to do
|
||||
; is to setup a mock g0 struct, populate its stack_lo/hi/guard fields
|
||||
; and then use wrmsr to update the FS register
|
||||
extern stack_top
|
||||
extern stack_bottom
|
||||
|
||||
; Setup r0_g
|
||||
mov rax, stack_bottom
|
||||
mov rbx, stack_top
|
||||
mov rsi, r0_g
|
||||
mov qword [rsi+0], rax ; stack_lo
|
||||
mov qword [rsi+8], rbx ; stack_hi
|
||||
mov qword [rsi+16], rax ; stackguard0
|
||||
mov rax, r0_g_ptr
|
||||
mov qword [rax], rsi
|
||||
|
||||
; Load 64-bit FS register address
|
||||
; rax -> lower 32 bits
|
||||
; rdx -> upper 32 bits
|
||||
mov ecx, 0xc0000100 ; fs_base
|
||||
mov rax, rsi ; lower 32 bits
|
||||
shr rsi, 32
|
||||
mov rdx, rsi ; high 32 bits
|
||||
wrmsr
|
||||
|
||||
; Call the kernel entry point passing a pointer to the multiboot data
|
||||
; copied by the 32-bit entry code
|
||||
extern multiboot_data
|
||||
extern kernel.Kmain
|
||||
|
||||
mov rax, multiboot_data
|
||||
push rax
|
||||
call kernel.Kmain
|
||||
|
||||
; Main should never return; halt the CPU
|
||||
mov rdi, err_kmain_returned
|
||||
call write_string
|
||||
|
||||
cli
|
||||
hlt
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Error messages
|
||||
;------------------------------------------------------------------------------
|
||||
err_kmain_returned db '[rt0_64] kmain returned', 0
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; Write the NULL-terminated string contained in rdi to the screen using white
|
||||
; text on red background. Assumes that text-mode is enabled and that its
|
||||
; physical address is 0xb8000.
|
||||
;------------------------------------------------------------------------------
|
||||
write_string:
|
||||
mov rbx,0xb8000
|
||||
mov ah, 0x4F
|
||||
.next_char:
|
||||
mov al, byte[rdi]
|
||||
test al, al
|
||||
jz write_string.done
|
||||
|
||||
mov word [rbx], ax
|
||||
add rbx, 2
|
||||
inc rdi
|
||||
jmp write_string.next_char
|
||||
|
||||
.done:
|
||||
ret
|
39
arch/x86_64/script/linker.ld.in
Normal file
39
arch/x86_64/script/linker.ld.in
Normal file
@ -0,0 +1,39 @@
|
||||
VMA = PAGE_OFFSET + LOAD_ADDRESS;
|
||||
|
||||
ENTRY(_rt0_32_entry)
|
||||
|
||||
SECTIONS {
|
||||
/* Set the kernel VMA at PAGE_OFFSET + 1M
|
||||
* but load it at physical address 1M */
|
||||
. = VMA;
|
||||
|
||||
.text BLOCK(4K) : AT(ADDR(.text) - PAGE_OFFSET)
|
||||
{
|
||||
/* The multiboot header must be present in the first 4K of the kernel
|
||||
* image so that the bootloader can find it */
|
||||
*(.multiboot_header)
|
||||
|
||||
*(.rt0)
|
||||
|
||||
*(.text)
|
||||
}
|
||||
|
||||
/* Read-only data. */
|
||||
.rodata ALIGN(4K) : AT(ADDR(.rodata) - PAGE_OFFSET)
|
||||
{
|
||||
*(.rodata)
|
||||
}
|
||||
|
||||
/* Read-write data (initialized) */
|
||||
.data ALIGN(4K) : AT(ADDR(.data) - PAGE_OFFSET)
|
||||
{
|
||||
*(.data)
|
||||
}
|
||||
|
||||
/* Read-write data (zeroed) */
|
||||
.bss ALIGN(4K) : AT(ADDR(.bss) - PAGE_OFFSET)
|
||||
{
|
||||
*(COMMON)
|
||||
*(.bss)
|
||||
}
|
||||
}
|
@ -5,6 +5,7 @@ import (
|
||||
|
||||
"github.com/achilleasa/gopher-os/kernel/hal"
|
||||
"github.com/achilleasa/gopher-os/kernel/hal/multiboot"
|
||||
"github.com/achilleasa/gopher-os/kernel/kfmt/early"
|
||||
)
|
||||
|
||||
// Kmain is the only Go symbol that is visible (exported) from the rt0 initialization
|
||||
@ -18,10 +19,15 @@ import (
|
||||
// Kmain is not expected to return. If it does, the rt0 code will halt the CPU.
|
||||
//
|
||||
//go:noinline
|
||||
func Kmain(multibootInfoPtr uint32) {
|
||||
multiboot.SetInfoPtr(uintptr(multibootInfoPtr))
|
||||
func Kmain(multibootInfoPtr uintptr) {
|
||||
multiboot.SetInfoPtr(multibootInfoPtr)
|
||||
|
||||
// Initialize and clear the terminal
|
||||
hal.InitTerminal()
|
||||
hal.ActiveTerminal.Clear()
|
||||
early.Printf("Starting gopher-os\n")
|
||||
|
||||
// Prevent Kmain from returning
|
||||
for {
|
||||
}
|
||||
}
|
||||
|
2
stub.go
2
stub.go
@ -2,7 +2,7 @@ package main
|
||||
|
||||
import "github.com/achilleasa/gopher-os/kernel"
|
||||
|
||||
var multibootInfoPtr uint32
|
||||
var multibootInfoPtr uintptr
|
||||
|
||||
// main makes a dummy call to the actual kernel main entrypoint function. It
|
||||
// is intentionally defined to prevent the Go compiler from optimizing away the
|
||||
|
Loading…
x
Reference in New Issue
Block a user