mirror of
https://github.com/taigrr/gopher-os
synced 2025-01-18 04:43:13 -08:00
rt0_64: preserve XMM registers when calling gate handlers
Some gate handlers may invoke Go runtime code that clobbers the XMM registers that could be in use by the code that triggered the gate handler. One case where this happens is when a growing a slice and the new location for the slice data resides in a RO page with a CoW flag. In this scenario, runtime.growslice will invoke runtime.memmove which may take a code path that uses XMM registers for copying data around. During the copy, a page fault occurs and the kernel page fault handler will detect the CoW flag, allocate a new frame and copy the original data to the new frame before resuming execution. As the page copy is performed using the built-in copy function this will cause the XMM registers to be clobbered. To prevent this from happening, the asm gate code that gets executed when an exception occurs will now preserve the XMM regs on the stack before invoking the registered exception handler. In addition, for Go 1.9+ the gate code will also temporarily disable use of AVX instructions by runtime.memmove by setting runtime.useAVXmemmove to 0. This is required so the gate does not need to also preserve any AVX registers.
This commit is contained in:
parent
f1cf7466c7
commit
fd862d0d15
6
Makefile
6
Makefile
@ -32,7 +32,8 @@ GOROOT := $(shell $(GO) env GOROOT)
|
|||||||
GC_FLAGS ?=
|
GC_FLAGS ?=
|
||||||
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
|
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
|
||||||
AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \
|
AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \
|
||||||
-dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count)
|
-dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count) \
|
||||||
|
-dWITH_RUNTIME_AVXMEMMOVE=$(shell grep -r "var useAVXmemmove" $(GOROOT)/src/runtime/ | wc -l)
|
||||||
|
|
||||||
MIN_OBJCOPY_VERSION := 2.26.0
|
MIN_OBJCOPY_VERSION := 2.26.0
|
||||||
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
|
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
|
||||||
@ -74,13 +75,14 @@ go.o:
|
|||||||
@# objcopy to make that symbol exportable. Since nasm does not support externs
|
@# objcopy to make that symbol exportable. Since nasm does not support externs
|
||||||
@# with slashes we create a global symbol alias for kernel.Kmain
|
@# with slashes we create a global symbol alias for kernel.Kmain
|
||||||
@echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain"
|
@echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain"
|
||||||
@echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize}"
|
@echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize/useAVXmemmove}"
|
||||||
@objcopy \
|
@objcopy \
|
||||||
--add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \
|
--add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \
|
||||||
--globalize-symbol _rt0_interrupt_handlers \
|
--globalize-symbol _rt0_interrupt_handlers \
|
||||||
--globalize-symbol runtime.g0 \
|
--globalize-symbol runtime.g0 \
|
||||||
--globalize-symbol runtime.m0 \
|
--globalize-symbol runtime.m0 \
|
||||||
--globalize-symbol runtime.physPageSize \
|
--globalize-symbol runtime.physPageSize \
|
||||||
|
--globalize-symbol runtime.useAVXmemmove \
|
||||||
$(BUILD_DIR)/go.o $(BUILD_DIR)/go.o
|
$(BUILD_DIR)/go.o $(BUILD_DIR)/go.o
|
||||||
|
|
||||||
binutils_version_check:
|
binutils_version_check:
|
||||||
|
@ -27,6 +27,14 @@ _rt0_irq_handlers resq IDT_ENTRIES
|
|||||||
r0_g_ptr: resq 1
|
r0_g_ptr: resq 1
|
||||||
tcb_ptr: resq 1
|
tcb_ptr: resq 1
|
||||||
|
|
||||||
|
; Go < 1.9 does not define runtime.useAVXmemmove; to avoid linker errors define
|
||||||
|
; a dummy symbol so that the gate entry code can work as expected.
|
||||||
|
%if WITH_RUNTIME_AVXMEMMOVE == 0
|
||||||
|
runtime.useAVXmemmove resb 1
|
||||||
|
%else
|
||||||
|
extern runtime.useAVXmemmove
|
||||||
|
%endif
|
||||||
|
|
||||||
section .text
|
section .text
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
@ -182,7 +190,7 @@ _rt0_64_gate_entry_%+ gate_num:
|
|||||||
%assign gate_num gate_num+1
|
%assign gate_num gate_num+1
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
%macro save_regs 0
|
%macro save_gp_regs 0
|
||||||
push r15
|
push r15
|
||||||
push r14
|
push r14
|
||||||
push r13
|
push r13
|
||||||
@ -200,7 +208,7 @@ _rt0_64_gate_entry_%+ gate_num:
|
|||||||
push rax
|
push rax
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro restore_regs 0
|
%macro restore_gp_regs 0
|
||||||
pop rax
|
pop rax
|
||||||
pop rbx
|
pop rbx
|
||||||
pop rcx
|
pop rcx
|
||||||
@ -218,97 +226,163 @@ _rt0_64_gate_entry_%+ gate_num:
|
|||||||
pop r15
|
pop r15
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%macro save_xmm_regs 0
|
||||||
|
sub rsp, 16*16
|
||||||
|
movdqu [rsp+0*16], xmm0
|
||||||
|
movdqu [rsp+1*16], xmm1
|
||||||
|
movdqu [rsp+2*16], xmm2
|
||||||
|
movdqu [rsp+3*16], xmm3
|
||||||
|
movdqu [rsp+4*16], xmm4
|
||||||
|
movdqu [rsp+5*16], xmm5
|
||||||
|
movdqu [rsp+6*16], xmm6
|
||||||
|
movdqu [rsp+7*16], xmm7
|
||||||
|
movdqu [rsp+8*16], xmm8
|
||||||
|
movdqu [rsp+9*16], xmm9
|
||||||
|
movdqu [rsp+10*16], xmm10
|
||||||
|
movdqu [rsp+11*16], xmm11
|
||||||
|
movdqu [rsp+12*16], xmm12
|
||||||
|
movdqu [rsp+13*16], xmm13
|
||||||
|
movdqu [rsp+14*16], xmm14
|
||||||
|
movdqu [rsp+15*16], xmm15
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro restore_xmm_regs 0
|
||||||
|
movdqu xmm0, [rsp+0*16]
|
||||||
|
movdqu xmm1, [rsp+1*16]
|
||||||
|
movdqu xmm2, [rsp+2*16]
|
||||||
|
movdqu xmm3, [rsp+3*16]
|
||||||
|
movdqu xmm4, [rsp+4*16]
|
||||||
|
movdqu xmm5, [rsp+5*16]
|
||||||
|
movdqu xmm6, [rsp+6*16]
|
||||||
|
movdqu xmm7, [rsp+7*16]
|
||||||
|
movdqu xmm8, [rsp+8*16]
|
||||||
|
movdqu xmm9, [rsp+9*16]
|
||||||
|
movdqu xmm10, [rsp+10*16]
|
||||||
|
movdqu xmm11, [rsp+11*16]
|
||||||
|
movdqu xmm12, [rsp+12*16]
|
||||||
|
movdqu xmm13, [rsp+13*16]
|
||||||
|
movdqu xmm14, [rsp+14*16]
|
||||||
|
movdqu xmm15, [rsp+15*16]
|
||||||
|
add rsp, 16*16
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
; This dispatcher is invoked by gate entries that expect a code to be pushed
|
; This dispatcher is invoked by gate entries that expect a code to be pushed
|
||||||
; by the CPU to the stack. It performs the following functions:
|
; by the CPU to the stack.
|
||||||
; - save registers
|
|
||||||
; - push pointer to saved regs
|
|
||||||
; - push pointer to stack frame
|
|
||||||
; - read and push exception code
|
|
||||||
; - invoke handler(code, &frame, ®s)
|
|
||||||
; - restore registers
|
|
||||||
; - pop exception code from stack so rsp points to the stack frame
|
|
||||||
;------------------------------------------------------------------------------
|
|
||||||
_rt0_64_gate_dispatcher_with_code:
|
|
||||||
; This is how the stack looks like when entering this function:
|
|
||||||
; (each item is 8-bytes wide)
|
|
||||||
;
|
;
|
||||||
;------------------
|
; This is the stack layout used by this function. Items are 8-bytes
|
||||||
; handler address | <- pushed by gate_entry_xxx (RSP points here)
|
; wide with the exception of the xmm regs that are 16 bytes wide
|
||||||
|
;
|
||||||
|
; ----------------|
|
||||||
|
; useAVXmemmove | <- original value of runtime.useAVXmemmove
|
||||||
;-----------------|
|
;-----------------|
|
||||||
; Exception code | <- needs to be removed from stack before calling iretq
|
; xmm regs (16) |
|
||||||
|
;-----------------| <- RBP will point at the last pushed GP reg
|
||||||
|
; gp regs (15) |
|
||||||
;-----------------|
|
;-----------------|
|
||||||
; RIP | <- exception frame
|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
|
||||||
|
;-----------------|
|
||||||
|
; exception code | <- pushed by CPU (must be popped before returning)
|
||||||
|
;-----------------|
|
||||||
|
; RIP | <- pushed by CPU (exception frame)
|
||||||
; CS |
|
; CS |
|
||||||
; RFLAGS |
|
; RFLAGS |
|
||||||
; RSP |
|
; RSP |
|
||||||
; SS |
|
; SS |
|
||||||
;-----------------
|
;-----------------
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
_rt0_64_gate_dispatcher_with_code:
|
||||||
cld
|
cld
|
||||||
|
|
||||||
; save regs and push a pointer to them
|
; save general-purpose regs
|
||||||
save_regs
|
save_gp_regs
|
||||||
mov rax, rsp ; rax points to saved rax
|
mov rbp, rsp ; rbp points to saved rax
|
||||||
push rax ; push pointer to saved regs
|
|
||||||
|
|
||||||
; push pointer to exception stack frame (we have used 15 qwords for the
|
; save xmm regs as the fault handler may clobber them by calling an
|
||||||
; saved registers plus one qword for the data pushed by the gate entry
|
; SSE-enabled runtime function like copy (calls runtime.memmove). In
|
||||||
; plus one extra qword to jump over the exception code)
|
; addition temporarily disable AVX support for runtime.memmove so we
|
||||||
add rax, 17*8
|
; don't need to also preserve the avx regs.
|
||||||
push rax
|
save_xmm_regs
|
||||||
|
mov rax, runtime.useAVXmemmove
|
||||||
; push exception code (located between the stack frame and the saved regs)
|
|
||||||
sub rax, 8
|
|
||||||
push qword [rax]
|
push qword [rax]
|
||||||
|
mov byte [rax], 0
|
||||||
|
|
||||||
call [rsp + 18*8] ; call registered irq handler
|
; push exception handler args and call registered handler
|
||||||
|
push qword [rbp] ; ptr to regs
|
||||||
|
push qword [rbp+17*8] ; ptr to exception frame
|
||||||
|
push qword [rbp+16*8] ; exception code
|
||||||
|
call qword [rbp+15*8]
|
||||||
|
add rsp, 3 * 8
|
||||||
|
|
||||||
add rsp, 3 * 8 ; unshift the pushed arguments so rsp points to the saved regs
|
; restore xmm regs and restore AVX support for runtime.memmove
|
||||||
restore_regs
|
mov rax, runtime.useAVXmemmove
|
||||||
|
pop rbx
|
||||||
|
mov byte [rax], bl
|
||||||
|
restore_xmm_regs
|
||||||
|
|
||||||
add rsp, 16 ; pop handler address and exception code off the stack before returning
|
; restore general purpose regs
|
||||||
|
restore_gp_regs
|
||||||
|
|
||||||
|
; pop handler address + exception code so RSP points to the stack frame.
|
||||||
|
add rsp, 2*8
|
||||||
iretq
|
iretq
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
; This dispatcher is invoked by gate entries that do not use exception codes.
|
; This dispatcher is invoked by gate entries that do not use exception codes.
|
||||||
; It performs the following functions:
|
|
||||||
; - save registers
|
|
||||||
; - push pointer to saved regs
|
|
||||||
; - push pointer to stack frame
|
|
||||||
; - invoke handler(&frame, ®s)
|
|
||||||
; - restore registers
|
|
||||||
;------------------------------------------------------------------------------
|
|
||||||
_rt0_64_gate_dispatcher_without_code:
|
|
||||||
; This is how the stack looks like when entering this function:
|
|
||||||
; (each item is 8-bytes wide)
|
|
||||||
;
|
;
|
||||||
;------------------
|
; This is the stack layout used by this function. Items are 8-bytes
|
||||||
; handler address | <- pushed by gate_entry_xxx (RSP points here)
|
; wide with the exception of the xmm regs that are 16 bytes wide
|
||||||
|
;
|
||||||
|
; ----------------|
|
||||||
|
; useAVXmemmove | <- original value of runtime.useAVXmemmove
|
||||||
;-----------------|
|
;-----------------|
|
||||||
; RIP | <- exception frame
|
; xmm regs (16) |
|
||||||
|
;-----------------| <- RBP will point at the last pushed GP reg
|
||||||
|
; gp regs (15) |
|
||||||
|
;-----------------|
|
||||||
|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
|
||||||
|
;-----------------|
|
||||||
|
; RIP | <- pushed by CPU (exception frame)
|
||||||
; CS |
|
; CS |
|
||||||
; RFLAGS |
|
; RFLAGS |
|
||||||
; RSP |
|
; RSP |
|
||||||
; SS |
|
; SS |
|
||||||
;-----------------
|
;-----------------
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
_rt0_64_gate_dispatcher_without_code:
|
||||||
cld
|
cld
|
||||||
|
|
||||||
; save regs and push a pointer to them
|
; save general-purpose regs
|
||||||
save_regs
|
save_gp_regs
|
||||||
mov rax, rsp ; rax points to saved rax
|
mov rbp, rsp ; rbp points to saved rax
|
||||||
push rax ; push pointer to saved regs
|
|
||||||
|
|
||||||
; push pointer to exception stack frame (we have used 15 qwords for the
|
; save xmm regs as the fault handler may clobber them by calling an
|
||||||
; saved registers plus one qword for the data pushed by the gate entry)
|
; SSE-enabled runtime function like copy (calls runtime.memmove). In
|
||||||
add rax, 16*8
|
; addition temporarily disable AVX support for runtime.memmove so we
|
||||||
push rax
|
; don't need to also preserve the avx regs.
|
||||||
|
save_xmm_regs
|
||||||
|
mov rax, runtime.useAVXmemmove
|
||||||
|
push qword [rax]
|
||||||
|
mov byte [rax], 0
|
||||||
|
|
||||||
call [rsp + 17*8] ; call registered irq handler
|
; push exception handler args and call registered handler
|
||||||
|
push qword [rbp] ; ptr to regs
|
||||||
|
push qword [rbp+16*8] ; ptr to exception frame
|
||||||
|
call qword [rbp+15*8]
|
||||||
|
add rsp, 2 * 8
|
||||||
|
|
||||||
add rsp, 2 * 8 ; unshift the pushed arguments so rsp points to the saved regs
|
; restore xmm regs and restore AVX support for runtime.memmove
|
||||||
restore_regs
|
mov rax, runtime.useAVXmemmove
|
||||||
|
pop rbx
|
||||||
|
mov byte [rax], bl
|
||||||
|
restore_xmm_regs
|
||||||
|
|
||||||
add rsp, 8 ; pop handler address off the stack before returning
|
; restore general purpose regs
|
||||||
|
restore_gp_regs
|
||||||
|
|
||||||
|
; pop handler address so RSP points to the stack frame.
|
||||||
|
add rsp, 8
|
||||||
iretq
|
iretq
|
||||||
|
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
|
Loading…
x
Reference in New Issue
Block a user