1
0
mirror of https://github.com/taigrr/gopher-os synced 2025-01-18 04:43:13 -08:00

Merge pull request #67 from achilleasa/preserve-xmm-regs-when-handling-exceptions

Preserve xmm regs when handling exceptions
This commit is contained in:
Achilleas Anagnostopoulos 2018-04-28 11:21:07 +01:00 committed by GitHub
commit 170e2449d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 156 additions and 81 deletions

View File

@ -32,7 +32,8 @@ GOROOT := $(shell $(GO) env GOROOT)
GC_FLAGS ?= GC_FLAGS ?=
LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info LD_FLAGS := -n -T $(BUILD_DIR)/linker.ld -static --no-ld-generated-unwind-info
AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \ AS_FLAGS := -g -f elf64 -F dwarf -I $(BUILD_DIR)/ -I src/arch/$(ARCH)/asm/ \
-dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count) -dNUM_REDIRECTS=$(shell GOPATH=$(GOPATH) $(GO) run tools/redirects/redirects.go count) \
-dWITH_RUNTIME_AVXMEMMOVE=$(shell grep -r "var useAVXmemmove" $(GOROOT)/src/runtime/ | wc -l)
MIN_OBJCOPY_VERSION := 2.26.0 MIN_OBJCOPY_VERSION := 2.26.0
HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y") HAVE_VALID_OBJCOPY := $(shell objcopy -V | head -1 | awk -F ' ' '{print "$(MIN_OBJCOPY_VERSION)\n" $$NF}' | sort -ct. -k1,1n -k2,2n && echo "y")
@ -74,13 +75,14 @@ go.o:
@# objcopy to make that symbol exportable. Since nasm does not support externs @# objcopy to make that symbol exportable. Since nasm does not support externs
@# with slashes we create a global symbol alias for kernel.Kmain @# with slashes we create a global symbol alias for kernel.Kmain
@echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain" @echo "[objcopy] create kernel.Kmain alias to gopheros/kernel/kmain.Kmain"
@echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize}" @echo "[objcopy] globalizing symbols {_rt0_interrupt_handlers, runtime.g0/m0/physPageSize/useAVXmemmove}"
@objcopy \ @objcopy \
--add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \ --add-symbol kernel.Kmain=.text:0x`nm $(BUILD_DIR)/go.o | grep "kmain.Kmain$$" | cut -d' ' -f1` \
--globalize-symbol _rt0_interrupt_handlers \ --globalize-symbol _rt0_interrupt_handlers \
--globalize-symbol runtime.g0 \ --globalize-symbol runtime.g0 \
--globalize-symbol runtime.m0 \ --globalize-symbol runtime.m0 \
--globalize-symbol runtime.physPageSize \ --globalize-symbol runtime.physPageSize \
--globalize-symbol runtime.useAVXmemmove \
$(BUILD_DIR)/go.o $(BUILD_DIR)/go.o $(BUILD_DIR)/go.o $(BUILD_DIR)/go.o
binutils_version_check: binutils_version_check:

View File

@ -27,6 +27,14 @@ _rt0_irq_handlers resq IDT_ENTRIES
r0_g_ptr: resq 1 r0_g_ptr: resq 1
tcb_ptr: resq 1 tcb_ptr: resq 1
; Go < 1.9 does not define runtime.useAVXmemmove; to avoid linker errors define
; a dummy symbol so that the gate entry code can work as expected.
%if WITH_RUNTIME_AVXMEMMOVE == 0
runtime.useAVXmemmove resb 1
%else
extern runtime.useAVXmemmove
%endif
section .text section .text
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
@ -182,7 +190,7 @@ _rt0_64_gate_entry_%+ gate_num:
%assign gate_num gate_num+1 %assign gate_num gate_num+1
%endrep %endrep
%macro save_regs 0 %macro save_gp_regs 0
push r15 push r15
push r14 push r14
push r13 push r13
@ -200,7 +208,7 @@ _rt0_64_gate_entry_%+ gate_num:
push rax push rax
%endmacro %endmacro
%macro restore_regs 0 %macro restore_gp_regs 0
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
@ -218,97 +226,163 @@ _rt0_64_gate_entry_%+ gate_num:
pop r15 pop r15
%endmacro %endmacro
%macro save_xmm_regs 0
sub rsp, 16*16
movdqu [rsp+0*16], xmm0
movdqu [rsp+1*16], xmm1
movdqu [rsp+2*16], xmm2
movdqu [rsp+3*16], xmm3
movdqu [rsp+4*16], xmm4
movdqu [rsp+5*16], xmm5
movdqu [rsp+6*16], xmm6
movdqu [rsp+7*16], xmm7
movdqu [rsp+8*16], xmm8
movdqu [rsp+9*16], xmm9
movdqu [rsp+10*16], xmm10
movdqu [rsp+11*16], xmm11
movdqu [rsp+12*16], xmm12
movdqu [rsp+13*16], xmm13
movdqu [rsp+14*16], xmm14
movdqu [rsp+15*16], xmm15
%endmacro
%macro restore_xmm_regs 0
movdqu xmm0, [rsp+0*16]
movdqu xmm1, [rsp+1*16]
movdqu xmm2, [rsp+2*16]
movdqu xmm3, [rsp+3*16]
movdqu xmm4, [rsp+4*16]
movdqu xmm5, [rsp+5*16]
movdqu xmm6, [rsp+6*16]
movdqu xmm7, [rsp+7*16]
movdqu xmm8, [rsp+8*16]
movdqu xmm9, [rsp+9*16]
movdqu xmm10, [rsp+10*16]
movdqu xmm11, [rsp+11*16]
movdqu xmm12, [rsp+12*16]
movdqu xmm13, [rsp+13*16]
movdqu xmm14, [rsp+14*16]
movdqu xmm15, [rsp+15*16]
add rsp, 16*16
%endmacro
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; This dispatcher is invoked by gate entries that expect a code to be pushed ; This dispatcher is invoked by gate entries that expect a code to be pushed
; by the CPU to the stack. It performs the following functions: ; by the CPU to the stack.
; - save registers ;
; - push pointer to saved regs ; This is the stack layout used by this function. Items are 8-bytes
; - push pointer to stack frame ; wide with the exception of the xmm regs that are 16 bytes wide
; - read and push exception code ;
; - invoke handler(code, &frame, &regs) ; ----------------|
; - restore registers ; useAVXmemmove | <- original value of runtime.useAVXmemmove
; - pop exception code from stack so rsp points to the stack frame ;-----------------|
; xmm regs (16) |
;-----------------| <- RBP will point at the last pushed GP reg
; gp regs (15) |
;-----------------|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
;-----------------|
; exception code | <- pushed by CPU (must be popped before returning)
;-----------------|
; RIP | <- pushed by CPU (exception frame)
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
_rt0_64_gate_dispatcher_with_code: _rt0_64_gate_dispatcher_with_code:
; This is how the stack looks like when entering this function:
; (each item is 8-bytes wide)
;
;------------------
; handler address | <- pushed by gate_entry_xxx (RSP points here)
;-----------------|
; Exception code | <- needs to be removed from stack before calling iretq
;-----------------|
; RIP | <- exception frame
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
cld cld
; save regs and push a pointer to them ; save general-purpose regs
save_regs save_gp_regs
mov rax, rsp ; rax points to saved rax mov rbp, rsp ; rbp points to saved rax
push rax ; push pointer to saved regs
; push pointer to exception stack frame (we have used 15 qwords for the ; save xmm regs as the fault handler may clobber them by calling an
; saved registers plus one qword for the data pushed by the gate entry ; SSE-enabled runtime function like copy (calls runtime.memmove). In
; plus one extra qword to jump over the exception code) ; addition temporarily disable AVX support for runtime.memmove so we
add rax, 17*8 ; don't need to also preserve the avx regs.
push rax save_xmm_regs
mov rax, runtime.useAVXmemmove
push qword [rax]
mov byte [rax], 0
; push exception code (located between the stack frame and the saved regs) ; push exception handler args and call registered handler
sub rax, 8 push qword [rbp] ; ptr to regs
push qword [rax] push qword [rbp+17*8] ; ptr to exception frame
push qword [rbp+16*8] ; exception code
call qword [rbp+15*8]
add rsp, 3 * 8
call [rsp + 18*8] ; call registered irq handler ; restore xmm regs and restore AVX support for runtime.memmove
mov rax, runtime.useAVXmemmove
pop rbx
mov byte [rax], bl
restore_xmm_regs
add rsp, 3 * 8 ; unshift the pushed arguments so rsp points to the saved regs ; restore general purpose regs
restore_regs restore_gp_regs
add rsp, 16 ; pop handler address and exception code off the stack before returning ; pop handler address + exception code so RSP points to the stack frame.
add rsp, 2*8
iretq iretq
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; This dispatcher is invoked by gate entries that do not use exception codes. ; This dispatcher is invoked by gate entries that do not use exception codes.
; It performs the following functions: ;
; - save registers ; This is the stack layout used by this function. Items are 8-bytes
; - push pointer to saved regs ; wide with the exception of the xmm regs that are 16 bytes wide
; - push pointer to stack frame ;
; - invoke handler(&frame, &regs) ; ----------------|
; - restore registers ; useAVXmemmove | <- original value of runtime.useAVXmemmove
;-----------------|
; xmm regs (16) |
;-----------------| <- RBP will point at the last pushed GP reg
; gp regs (15) |
;-----------------|
; handler address | <- pushed by gate_entry_xxx (RSP initially points here)
;-----------------|
; RIP | <- pushed by CPU (exception frame)
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
_rt0_64_gate_dispatcher_without_code: _rt0_64_gate_dispatcher_without_code:
; This is how the stack looks like when entering this function:
; (each item is 8-bytes wide)
;
;------------------
; handler address | <- pushed by gate_entry_xxx (RSP points here)
;-----------------|
; RIP | <- exception frame
; CS |
; RFLAGS |
; RSP |
; SS |
;-----------------
cld cld
; save regs and push a pointer to them ; save general-purpose regs
save_regs save_gp_regs
mov rax, rsp ; rax points to saved rax mov rbp, rsp ; rbp points to saved rax
push rax ; push pointer to saved regs
; push pointer to exception stack frame (we have used 15 qwords for the ; save xmm regs as the fault handler may clobber them by calling an
; saved registers plus one qword for the data pushed by the gate entry) ; SSE-enabled runtime function like copy (calls runtime.memmove). In
add rax, 16*8 ; addition temporarily disable AVX support for runtime.memmove so we
push rax ; don't need to also preserve the avx regs.
save_xmm_regs
mov rax, runtime.useAVXmemmove
push qword [rax]
mov byte [rax], 0
call [rsp + 17*8] ; call registered irq handler ; push exception handler args and call registered handler
push qword [rbp] ; ptr to regs
push qword [rbp+16*8] ; ptr to exception frame
call qword [rbp+15*8]
add rsp, 2 * 8
add rsp, 2 * 8 ; unshift the pushed arguments so rsp points to the saved regs ; restore xmm regs and restore AVX support for runtime.memmove
restore_regs mov rax, runtime.useAVXmemmove
pop rbx
mov byte [rax], bl
restore_xmm_regs
add rsp, 8 ; pop handler address off the stack before returning ; restore general purpose regs
restore_gp_regs
; pop handler address so RSP points to the stack frame.
add rsp, 8
iretq iretq
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------

View File

@ -40,9 +40,10 @@ func (rb *ringBuffer) Read(p []byte) (n int, err error) {
n = pLen n = pLen
} }
for i := 0; i < n; i, rb.rIndex = i+1, rb.rIndex+1 { copy(p, rb.buffer[rb.rIndex:rb.rIndex+n])
p[i] = rb.buffer[rb.rIndex] rb.rIndex += n
}
return n, nil
case rb.rIndex > rb.wIndex: case rb.rIndex > rb.wIndex:
// Read up to min(len(buf) - rIndex, len(p)) bytes // Read up to min(len(buf) - rIndex, len(p)) bytes
n = len(rb.buffer) - rb.rIndex n = len(rb.buffer) - rb.rIndex
@ -50,17 +51,15 @@ func (rb *ringBuffer) Read(p []byte) (n int, err error) {
n = pLen n = pLen
} }
for i := 0; i < n; i, rb.rIndex = i+1, rb.rIndex+1 { copy(p, rb.buffer[rb.rIndex:rb.rIndex+n])
p[i] = rb.buffer[rb.rIndex] rb.rIndex += n
}
if rb.rIndex == len(rb.buffer) { if rb.rIndex == len(rb.buffer) {
rb.rIndex = 0 rb.rIndex = 0
} }
return n, nil
default: // rIndex == wIndex default: // rIndex == wIndex
n, err = 0, io.EOF return 0, io.EOF
} }
return n, err
} }