1
0
mirror of https://github.com/taigrr/gopher-os synced 2025-01-18 04:43:13 -08:00

rt0_64: preserve XMM registers when calling gate handlers

Some gate handlers may invoke Go runtime code that clobbers the XMM
registers that could be in use by the code that triggered the gate
handler.

One case where this happens is when a growing a slice and the new
location for the slice data resides in a RO page with a CoW flag.  In
this scenario, runtime.growslice will invoke runtime.memmove which may
take a code path that uses XMM registers for copying data around.

During the copy, a page fault occurs and the kernel page fault handler
will detect the CoW flag, allocate a new frame and copy the original
data to the new frame before resuming execution. As the page copy is
performed using the built-in copy function this will cause the XMM
registers to be clobbered.

To prevent this from happening, the asm gate code that gets executed
when an exception occurs will now preserve the XMM regs on the stack
before invoking the registered exception handler.

In addition, for Go 1.9+ the gate code will also temporarily disable use
of AVX instructions by runtime.memmove by setting runtime.useAVXmemmove
to 0. This is required so the gate does not need to also preserve any
AVX registers.
This commit is contained in:
Achilleas Anagnostopoulos 2018-04-27 20:13:57 +01:00
parent f1cf7466c7
commit fd862d0d15
2 changed files with 148 additions and 72 deletions

View File

@ -32,7 +32,8 @@ GOROOT := $(shell $(GO) env GOROOT)
GC_FLAGS ?= GC_FLAGS ?=
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \ AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \
-dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count) -dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count) \
-dWITH_RUNTIME_AVXMEMMOVE=$(shell grep -r "var useAVXmemmove" $(GOROOT)/src/runtime/ | wc -l)
MIN_OBJCOPY_VERSION := 2.26.0 MIN_OBJCOPY_VERSION := 2.26.0
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y") HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
@ -74,13 +75,14 @@ go.o:
@# objcopy to make that symbol exportable. Since nasm does not support externs @# objcopy to make that symbol exportable. Since nasm does not support externs
@# with slashes we create a global symbol alias for kernel.Kmain @# with slashes we create a global symbol alias for kernel.Kmain
@echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain" @echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain"
@echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize}" @echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize/useAVXmemmove}"
@objcopy \ @objcopy \
--add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \ --add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \
--globalize-symbol _rt0_interrupt_handlers \ --globalize-symbol _rt0_interrupt_handlers \
--globalize-symbol runtime.g0 \ --globalize-symbol runtime.g0 \
--globalize-symbol runtime.m0 \ --globalize-symbol runtime.m0 \
--globalize-symbol runtime.physPageSize \ --globalize-symbol runtime.physPageSize \
--globalize-symbol runtime.useAVXmemmove \
$(BUILD_DIR)/go.o $(BUILD_DIR)/go.o $(BUILD_DIR)/go.o $(BUILD_DIR)/go.o
binutils_version_check: binutils_version_check:

View File

@ -27,6 +27,14 @@ _rt0_irq_handlers resq IDT_ENTRIES
r0_g_ptr: resq 1 r0_g_ptr: resq 1
tcb_ptr: resq 1 tcb_ptr: resq 1
; Go < 1.9 does not define runtime.useAVXmemmove; to avoid linker errors define
; a dummy symbol so that the gate entry code can work as expected.
%if WITH_RUNTIME_AVXMEMMOVE == 0
runtime.useAVXmemmove resb 1
%else
extern runtime.useAVXmemmove
%endif
section .text section .text
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
@ -182,7 +190,7 @@ _rt0_64_gate_entry_%+ gate_num:
%assign gate_num gate_num+1 %assign gate_num gate_num+1
%endrep %endrep
%macro save_regs 0 %macro save_gp_regs 0
push r15 push r15
push r14 push r14
push r13 push r13
@ -200,7 +208,7 @@ _rt0_64_gate_entry_%+ gate_num:
push rax push rax
%endmacro %endmacro
%macro restore_regs 0 %macro restore_gp_regs 0
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
@ -218,97 +226,163 @@ _rt0_64_gate_entry_%+ gate_num:
pop r15 pop r15
%endmacro %endmacro
%macro save_xmm_regs 0
sub rsp, 16*16
movdqu [rsp+0*16], xmm0
movdqu [rsp+1*16], xmm1
movdqu [rsp+2*16], xmm2
movdqu [rsp+3*16], xmm3
movdqu [rsp+4*16], xmm4
movdqu [rsp+5*16], xmm5
movdqu [rsp+6*16], xmm6
movdqu [rsp+7*16], xmm7
movdqu [rsp+8*16], xmm8
movdqu [rsp+9*16], xmm9
movdqu [rsp+10*16], xmm10
movdqu [rsp+11*16], xmm11
movdqu [rsp+12*16], xmm12
movdqu [rsp+13*16], xmm13
movdqu [rsp+14*16], xmm14
movdqu [rsp+15*16], xmm15
%endmacro
%macro restore_xmm_regs 0
movdqu xmm0, [rsp+0*16]
movdqu xmm1, [rsp+1*16]
movdqu xmm2, [rsp+2*16]
movdqu xmm3, [rsp+3*16]
movdqu xmm4, [rsp+4*16]
movdqu xmm5, [rsp+5*16]
movdqu xmm6, [rsp+6*16]
movdqu xmm7, [rsp+7*16]
movdqu xmm8, [rsp+8*16]
movdqu xmm9, [rsp+9*16]
movdqu xmm10, [rsp+10*16]
movdqu xmm11, [rsp+11*16]
movdqu xmm12, [rsp+12*16]
movdqu xmm13, [rsp+13*16]
movdqu xmm14, [rsp+14*16]
movdqu xmm15, [rsp+15*16]
add rsp, 16*16
%endmacro
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; This dispatcher is invoked by gate entries that expect a code to be pushed ; This dispatcher is invoked by gate entries that expect a code to be pushed
; by the CPU to the stack. It performs the following functions: ; by the CPU to the stack.
; - save registers ;
; - push pointer to saved regs ; This is the stack layout used by this function. Items are 8-bytes
; - push pointer to stack frame ; wide with the exception of the xmm regs that are 16 bytes wide
; - read and push exception code ;
; - invoke handler(code, &frame, &regs) ; ----------------|
; - restore registers ; useAVXmemmove | <- original value of runtime.useAVXmemmove
; - pop exception code from stack so rsp points to the stack frame ;-----------------|
; xmm regs (16) |
;-----------------| <- RBP will point at the last pushed GP reg
; gp regs (15) |
;-----------------|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
;-----------------|
; exception code | <- pushed by CPU (must be popped before returning)
;-----------------|
; RIP | <- pushed by CPU (exception frame)
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
_rt0_64_gate_dispatcher_with_code: _rt0_64_gate_dispatcher_with_code:
; This is how the stack looks like when entering this function:
; (each item is 8-bytes wide)
;
;------------------
; handler address | <- pushed by gate_entry_xxx (RSP points here)
;-----------------|
; Exception code | <- needs to be removed from stack before calling iretq
;-----------------|
; RIP | <- exception frame
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
cld cld
; save regs and push a pointer to them ; save general-purpose regs
save_regs save_gp_regs
mov rax, rsp ; rax points to saved rax mov rbp, rsp ; rbp points to saved rax
push rax ; push pointer to saved regs
; push pointer to exception stack frame (we have used 15 qwords for the ; save xmm regs as the fault handler may clobber them by calling an
; saved registers plus one qword for the data pushed by the gate entry ; SSE-enabled runtime function like copy (calls runtime.memmove). In
; plus one extra qword to jump over the exception code) ; addition temporarily disable AVX support for runtime.memmove so we
add rax, 17*8 ; don't need to also preserve the avx regs.
push rax save_xmm_regs
mov rax, runtime.useAVXmemmove
push qword [rax]
mov byte [rax], 0
; push exception code (located between the stack frame and the saved regs) ; push exception handler args and call registered handler
sub rax, 8 push qword [rbp] ; ptr to regs
push qword [rax] push qword [rbp+17*8] ; ptr to exception frame
push qword [rbp+16*8] ; exception code
call qword [rbp+15*8]
add rsp, 3 * 8
call [rsp + 18*8] ; call registered irq handler ; restore xmm regs and restore AVX support for runtime.memmove
mov rax, runtime.useAVXmemmove
pop rbx
mov byte [rax], bl
restore_xmm_regs
add rsp, 3 * 8 ; unshift the pushed arguments so rsp points to the saved regs ; restore general purpose regs
restore_regs restore_gp_regs
add rsp, 16 ; pop handler address and exception code off the stack before returning ; pop handler address + exception code so RSP points to the stack frame.
add rsp, 2*8
iretq iretq
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; This dispatcher is invoked by gate entries that do not use exception codes. ; This dispatcher is invoked by gate entries that do not use exception codes.
; It performs the following functions: ;
; - save registers ; This is the stack layout used by this function. Items are 8-bytes
; - push pointer to saved regs ; wide with the exception of the xmm regs that are 16 bytes wide
; - push pointer to stack frame ;
; - invoke handler(&frame, &regs) ; ----------------|
; - restore registers ; useAVXmemmove | <- original value of runtime.useAVXmemmove
;-----------------|
; xmm regs (16) |
;-----------------| <- RBP will point at the last pushed GP reg
; gp regs (15) |
;-----------------|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
;-----------------|
; RIP | <- pushed by CPU (exception frame)
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
_rt0_64_gate_dispatcher_without_code: _rt0_64_gate_dispatcher_without_code:
; This is how the stack looks like when entering this function:
; (each item is 8-bytes wide)
;
;------------------
; handler address | <- pushed by gate_entry_xxx (RSP points here)
;-----------------|
; RIP | <- exception frame
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
cld cld
; save regs and push a pointer to them ; save general-purpose regs
save_regs save_gp_regs
mov rax, rsp ; rax points to saved rax mov rbp, rsp ; rbp points to saved rax
push rax ; push pointer to saved regs
; push pointer to exception stack frame (we have used 15 qwords for the ; save xmm regs as the fault handler may clobber them by calling an
; saved registers plus one qword for the data pushed by the gate entry) ; SSE-enabled runtime function like copy (calls runtime.memmove). In
add rax, 16*8 ; addition temporarily disable AVX support for runtime.memmove so we
push rax ; don't need to also preserve the avx regs.
save_xmm_regs
mov rax, runtime.useAVXmemmove
push qword [rax]
mov byte [rax], 0
call [rsp + 17*8] ; call registered irq handler ; push exception handler args and call registered handler
push qword [rbp] ; ptr to regs
push qword [rbp+16*8] ; ptr to exception frame
call qword [rbp+15*8]
add rsp, 2 * 8
add rsp, 2 * 8 ; unshift the pushed arguments so rsp points to the saved regs ; restore xmm regs and restore AVX support for runtime.memmove
restore_regs mov rax, runtime.useAVXmemmove
pop rbx
mov byte [rax], bl
restore_xmm_regs
add rsp, 8 ; pop handler address off the stack before returning ; restore general purpose regs
restore_gp_regs
; pop handler address so RSP points to the stack frame.
add rsp, 8
iretq iretq
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------