[haiku-commits] haiku: hrev54291 - src/system/kernel/arch/x86/64 src/system/kernel/arch/x86 headers/private/kernel/arch/x86 src/system/kernel headers/posix/arch/x86_64

  • From: Adrien Destugues <pulkomandy@xxxxxxxxx>
  • To: haiku-commits@xxxxxxxxxxxxx
  • Date: Wed, 3 Jun 2020 02:16:51 -0400 (EDT)

hrev54291 adds 1 changeset to branch 'master'
old head: 734c1e049163f67ee02bf02caae5fb9a0108db84
new head: 9495126984d664dfa8afc1382b5614ee69ba2a1e
overview: 
https://git.haiku-os.org/haiku/log/?qt=range&q=9495126984d6+%5E734c1e049163

----------------------------------------------------------------------------

9495126984d6: kernel/x86_64: AVX support
  
  xsave or xsavec are supported.
  breaks vregs compatibility.
  change the thread structure object cache alignment to 64
  the xsave fpu_state size isn't defined, it is for instance 832 here, thus I 
picked 1024.
  
  Change-Id: I4a0cab0bc42c1d37f24dcafb8259f8ff24a330d2
  Reviewed-on: https://review.haiku-os.org/c/haiku/+/2849
  Reviewed-by: Adrien Destugues <pulkomandy@xxxxxxxxx>

                                   [ Jérôme Duval <jerome.duval@xxxxxxxxx> ]

----------------------------------------------------------------------------

Revision:    hrev54291
Commit:      9495126984d664dfa8afc1382b5614ee69ba2a1e
URL:         https://git.haiku-os.org/haiku/commit/?id=9495126984d6
Author:      Jérôme Duval <jerome.duval@xxxxxxxxx>
Date:        Tue May  5 21:03:39 2020 UTC
Committer:   Adrien Destugues <pulkomandy@xxxxxxxxx>
Commit-Date: Wed Jun  3 06:16:48 2020 UTC

----------------------------------------------------------------------------

10 files changed, 191 insertions(+), 27 deletions(-)
headers/posix/arch/x86_64/signal.h               | 14 ++++-
.../private/kernel/arch/x86/arch_altcodepatch.h  |  2 +
headers/private/kernel/arch/x86/arch_cpu.h       | 16 +++++
.../private/kernel/arch/x86/arch_thread_types.h  |  5 ++
src/system/kernel/arch/x86/64/arch.S             | 13 ++++
src/system/kernel/arch/x86/64/interrupts.S       | 66 ++++++++++++++++----
src/system/kernel/arch/x86/64/thread.cpp         | 49 +++++++++++----
src/system/kernel/arch/x86/arch_altcodepatch.cpp |  3 +-
src/system/kernel/arch/x86/arch_cpu.cpp          | 46 ++++++++++++++
src/system/kernel/thread.cpp                     |  4 +-

----------------------------------------------------------------------------

diff --git a/headers/posix/arch/x86_64/signal.h 
b/headers/posix/arch/x86_64/signal.h
index 871d1f59b6..ad4fa4b128 100644
--- a/headers/posix/arch/x86_64/signal.h
+++ b/headers/posix/arch/x86_64/signal.h
@@ -89,6 +89,18 @@ struct fpu_state {
        unsigned char           _reserved_416_511[96];
 };
 
+struct xstate_hdr {
+       unsigned long           bv;
+       unsigned long           xcomp_bv;
+       unsigned char           _reserved[48];
+};
+
+struct savefpu {
+       struct fpu_state        fp_fxsave;
+       struct xstate_hdr       fp_xstate;
+       unsigned long           fp_ymm[16][2];
+};
+
 struct vregs {
        unsigned long           rax;
        unsigned long           rbx;
@@ -110,7 +122,7 @@ struct vregs {
        unsigned long           rip;
        unsigned long           rflags;
 
-       struct fpu_state        fpu;
+       struct savefpu          fpu;
 };
 
 
diff --git a/headers/private/kernel/arch/x86/arch_altcodepatch.h 
b/headers/private/kernel/arch/x86/arch_altcodepatch.h
index 99f01e2784..762a083de8 100644
--- a/headers/private/kernel/arch/x86/arch_altcodepatch.h
+++ b/headers/private/kernel/arch/x86/arch_altcodepatch.h
@@ -22,6 +22,8 @@
 
 #define ALTCODEPATCH_TAG_STAC          1
 #define ALTCODEPATCH_TAG_CLAC          2
+#define ALTCODEPATCH_TAG_XSAVE         3
+#define ALTCODEPATCH_TAG_XRSTOR                4
 
 
 #ifdef _ASSEMBLER
diff --git a/headers/private/kernel/arch/x86/arch_cpu.h 
b/headers/private/kernel/arch/x86/arch_cpu.h
index fa5215eead..155c88d9d2 100644
--- a/headers/private/kernel/arch/x86/arch_cpu.h
+++ b/headers/private/kernel/arch/x86/arch_cpu.h
@@ -354,9 +354,15 @@
 #define IA32_CR4_GLOBAL_PAGES  (1UL << 7)
 #define CR4_OS_FXSR                            (1UL << 9)
 #define CR4_OS_XMM_EXCEPTION   (1UL << 10)
+#define IA32_CR4_OSXSAVE               (1UL << 18)
 #define IA32_CR4_SMEP                  (1UL << 20)
 #define IA32_CR4_SMAP                  (1UL << 21)
 
+// Extended Control Register XCR0 flags
+#define IA32_XCR0_X87                  (1UL << 0)
+#define IA32_XCR0_SSE                  (1UL << 1)
+#define IA32_XCR0_AVX                  (1UL << 2)
+
 // page fault error codes (http://wiki.osdev.org/Page_Fault)
 #define PGFAULT_P                                              0x01    // 
Protection violation
 #define PGFAULT_W                                              0x02    // Write
@@ -547,6 +553,16 @@ struct intel_microcode_extended_signature {
 #define clear_ac() \
        __asm__ volatile (ASM_CLAC : : : "memory")
 
+#define xgetbv(reg) ({ \
+       uint32 low, high; \
+       __asm__ volatile ("xgetbv" : "=a" (low), "=d" (high), "c" (reg)); \
+       (low | (uint64)high << 32); \
+})
+
+#define xsetbv(reg, value) { \
+       uint32 low = value; uint32 high = value >> 32; \
+       __asm__ volatile ("xsetbv" : : "a" (low), "d" (high), "c" (reg)); }
+
 #define out8(value,port) \
        __asm__ ("outb %%al,%%dx" : : "a" (value), "d" (port))
 
diff --git a/headers/private/kernel/arch/x86/arch_thread_types.h 
b/headers/private/kernel/arch/x86/arch_thread_types.h
index f02ae41ad6..5bd1b637d9 100644
--- a/headers/private/kernel/arch/x86/arch_thread_types.h
+++ b/headers/private/kernel/arch/x86/arch_thread_types.h
@@ -53,8 +53,13 @@ struct arch_thread {
        struct farcall  interrupt_stack;
 #endif
 
+#ifndef __x86_64__
        // 512 byte floating point save point - this must be 16 byte aligned
        uint8                   fpu_state[512] _ALIGNED(16);
+#else
+       // floating point save point - this must be 64 byte aligned for xsave
+       uint8                   fpu_state[1024] _ALIGNED(64);
+#endif
 
        addr_t                  GetFramePointer() const;
 } _ALIGNED(16);
diff --git a/src/system/kernel/arch/x86/64/arch.S 
b/src/system/kernel/arch/x86/64/arch.S
index d88b05bfed..4c4aaa66cc 100644
--- a/src/system/kernel/arch/x86/64/arch.S
+++ b/src/system/kernel/arch/x86/64/arch.S
@@ -119,3 +119,16 @@ FUNCTION_END(_stac)
 FUNCTION(_clac):
        clac
 FUNCTION_END(_clac)
+
+FUNCTION(_xsave):
+       xsave64         (%rdi)
+FUNCTION_END(_xsave)
+
+FUNCTION(_xsavec):
+       xsavec64        (%rdi)
+FUNCTION_END(_xsavec)
+
+FUNCTION(_xrstor):
+       xrstor64        (%rdi)
+FUNCTION_END(_xrstor)
+
diff --git a/src/system/kernel/arch/x86/64/interrupts.S 
b/src/system/kernel/arch/x86/64/interrupts.S
index f58135cace..124fe061a3 100644
--- a/src/system/kernel/arch/x86/64/interrupts.S
+++ b/src/system/kernel/arch/x86/64/interrupts.S
@@ -221,16 +221,32 @@ STATIC_FUNCTION(int_bottom):
        // exception.
        orq             $X86_EFLAGS_RESUME, IFRAME_flags(%rbp)
 
-       subq    $512, %rsp
-       andq    $~15, %rsp
-       fxsaveq (%rsp)
+       // xsave needs a 64-byte alignment
+       andq    $~63, %rsp
+       movq    (gFPUSaveLength), %rcx
+       subq    %rcx, %rsp
+       leaq    (%rsp), %rdi
+       shrq    $3, %rcx
+       movq    $0, %rax
+       rep stosq
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+       movq    %rsp, %rdi
+       CODEPATCH_START
+       fxsaveq (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XSAVE)
 
        // Call the interrupt handler.
        movq    %rbp, %rdi
        movq    IFRAME_vector(%rbp), %rax
        call    *gInterruptHandlerTable(, %rax, 8)
 
-       fxrstorq        (%rsp)
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+       movq    %rsp, %rdi
+       CODEPATCH_START
+       fxrstorq        (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
        movq    %rbp, %rsp
 
        // Restore the saved registers.
@@ -253,9 +269,22 @@ STATIC_FUNCTION(int_bottom_user):
        // Frame pointer is the iframe.
        movq    %rsp, %rbp
 
-       subq    $512, %rsp
-       andq    $~15, %rsp
-       fxsaveq (%rsp)
+       // xsave needs a 64-byte alignment
+       andq    $~63, %rsp
+       movq    (gFPUSaveLength), %rcx
+       subq    %rcx, %rsp
+       leaq    (%rsp), %rdi
+       shrq    $3, %rcx
+       movq    $0, %rax
+       rep stosq
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+
+       movq    %rsp, %rdi
+       CODEPATCH_START
+       fxsaveq (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XSAVE)
+
        movq    %rsp, IFRAME_fpu(%rbp)
 
        // Set the RF (resume flag) in RFLAGS. This prevents an instruction
@@ -286,7 +315,12 @@ STATIC_FUNCTION(int_bottom_user):
 
        UPDATE_THREAD_KERNEL_TIME()
 
-       fxrstorq        (%rsp)
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+       movq    %rsp, %rdi
+       CODEPATCH_START
+       fxrstorq        (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
        movq    %rbp, %rsp
 
        // Restore the saved registers.
@@ -315,7 +349,12 @@ STATIC_FUNCTION(int_bottom_user):
        movq    %rbp, %rdi
        call    x86_init_user_debug_at_kernel_exit
 1:
-       fxrstorq        (%rsp)
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+       movq    %rsp, %rdi
+       CODEPATCH_START
+       fxrstorq        (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
        movq    %rbp, %rsp
 
        // Restore the saved registers.
@@ -522,8 +561,13 @@ FUNCTION(x86_64_syscall_entry):
        jmp .Liret
 
 .Lrestore_fpu:
-       movq    IFRAME_fpu(%rbp), %rax
-       fxrstorq        (%rax)
+       movq    IFRAME_fpu(%rbp), %rdi
+
+       movl    (gXsaveMask), %eax
+       movl    (gXsaveMask+4), %edx
+       CODEPATCH_START
+       fxrstorq        (%rdi)
+       CODEPATCH_END(ALTCODEPATCH_TAG_XRSTOR)
 .Liret:
        // Restore the saved registers.
        RESTORE_IFRAME()
diff --git a/src/system/kernel/arch/x86/64/thread.cpp 
b/src/system/kernel/arch/x86/64/thread.cpp
index a29697a03f..e50c3a8fa2 100644
--- a/src/system/kernel/arch/x86/64/thread.cpp
+++ b/src/system/kernel/arch/x86/64/thread.cpp
@@ -68,7 +68,10 @@ class RestartSyscall : public AbstractTraceEntry {
 extern "C" void x86_64_thread_entry();
 
 // Initial thread saved state.
-static arch_thread sInitialState;
+static arch_thread sInitialState _ALIGNED(64);
+extern uint64 gFPUSaveLength;
+extern bool gHasXsave;
+extern bool gHasXsavec;
 
 
 void
@@ -140,12 +143,36 @@ arch_thread_init(kernel_args* args)
 {
        // Save one global valid FPU state; it will be copied in the arch 
dependent
        // part of each new thread.
-       asm volatile (
-               "clts;"         \
-               "fninit;"       \
-               "fnclex;"       \
-               "fxsave %0;"
-               : "=m" (sInitialState.fpu_state));
+       if (gHasXsave || gHasXsavec) {
+               ASSERT(gFPUSaveLength <= sizeof(sInitialState.fpu_state));
+               memset(sInitialState.fpu_state, 0, gFPUSaveLength);
+               if (gHasXsavec) {
+                       asm volatile (
+                               "clts;"         \
+                               "fninit;"       \
+                               "fnclex;"       \
+                               "movl $0x7,%%eax;"      \
+                               "movl $0x0,%%edx;"      \
+                               "xsavec64 %0"
+                               :: "m" (sInitialState.fpu_state));
+               } else {
+                       asm volatile (
+                               "clts;"         \
+                               "fninit;"       \
+                               "fnclex;"       \
+                               "movl $0x7,%%eax;"      \
+                               "movl $0x0,%%edx;"      \
+                               "xsave64 %0"
+                               :: "m" (sInitialState.fpu_state));
+               }
+       } else {
+               asm volatile (
+                       "clts;"         \
+                       "fninit;"       \
+                       "fnclex;"       \
+                       "fxsaveq %0"
+                       :: "m" (sInitialState.fpu_state));
+       }
        return B_OK;
 }
 
@@ -309,11 +336,10 @@ arch_setup_signal_frame(Thread* thread, struct sigaction* 
action,
 
        if (frame->fpu != nullptr) {
                memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, 
frame->fpu,
-                       sizeof(signalFrameData->context.uc_mcontext.fpu));
+                       gFPUSaveLength);
        } else {
                memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
-                       sInitialState.fpu_state,
-                       sizeof(signalFrameData->context.uc_mcontext.fpu));
+                       sInitialState.fpu_state, gFPUSaveLength);
        }
 
        // Fill in signalFrameData->context.uc_stack.
@@ -385,8 +411,7 @@ arch_restore_signal_frame(struct signal_frame_data* 
signalFrameData)
        Thread* thread = thread_get_current_thread();
 
        memcpy(thread->arch_info.fpu_state,
-               (void*)&signalFrameData->context.uc_mcontext.fpu,
-               sizeof(thread->arch_info.fpu_state));
+               (void*)&signalFrameData->context.uc_mcontext.fpu, 
gFPUSaveLength);
        frame->fpu = &thread->arch_info.fpu_state;
 
        // The syscall return code overwrites frame->ax with the return value of
diff --git a/src/system/kernel/arch/x86/arch_altcodepatch.cpp 
b/src/system/kernel/arch/x86/arch_altcodepatch.cpp
index b99a7d12c7..958448a12b 100644
--- a/src/system/kernel/arch/x86/arch_altcodepatch.cpp
+++ b/src/system/kernel/arch/x86/arch_altcodepatch.cpp
@@ -52,6 +52,7 @@ arch_altcodepatch_replace(uint16 tag, void* newcodepatch, 
size_t length)
        // disable write after patch
        set_area_protection(info->text_region.id, kernelProtection);
 
-       dprintf("arch_altcodepatch_replace found %" B_PRIu32 " 
altcodepatches\n", count);
+       dprintf("arch_altcodepatch_replace found %" B_PRIu32 " altcodepatches "
+               "for tag %u\n", count, tag);
 }
 
diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp 
b/src/system/kernel/arch/x86/arch_cpu.cpp
index d6fbd74c10..e62c85e28f 100644
--- a/src/system/kernel/arch/x86/arch_cpu.cpp
+++ b/src/system/kernel/arch/x86/arch_cpu.cpp
@@ -84,6 +84,13 @@ struct set_mtrrs_parameter {
 #ifdef __x86_64__
 extern addr_t _stac;
 extern addr_t _clac;
+extern addr_t _xsave;
+extern addr_t _xsavec;
+extern addr_t _xrstor;
+uint64 gXsaveMask;
+uint64 gFPUSaveLength = 512;
+bool gHasXsave = false;
+bool gHasXsavec = false;
 #endif
 
 extern "C" void x86_reboot(void);
@@ -1406,6 +1413,20 @@ enable_smep(void* dummy, int cpu)
 {
        x86_write_cr4(x86_read_cr4() | IA32_CR4_SMEP);
 }
+
+
+static void
+enable_osxsave(void* dummy, int cpu)
+{
+       x86_write_cr4(x86_read_cr4() | IA32_CR4_OSXSAVE);
+}
+
+
+static void
+enable_xsavemask(void* dummy, int cpu)
+{
+       xsetbv(0, gXsaveMask);
+}
 #endif
 
 
@@ -1459,6 +1480,31 @@ arch_cpu_init_post_vm(kernel_args* args)
                } else
                        dprintf("SMAP disabled per safemode setting\n");
        }
+
+       // if available enable XSAVE (XSAVE and extended states)
+       gHasXsave = x86_check_feature(IA32_FEATURE_EXT_XSAVE, FEATURE_EXT);
+       if (gHasXsave) {
+               gHasXsavec = x86_check_feature(IA32_FEATURE_XSAVEC,
+                       FEATURE_D_1_EAX);
+
+               call_all_cpus_sync(&enable_osxsave, NULL);
+               gXsaveMask = IA32_XCR0_X87 | IA32_XCR0_SSE;
+               cpuid_info cpuid;
+               get_current_cpuid(&cpuid, 0xd, 0);
+               gXsaveMask |= (cpuid.regs.eax & IA32_XCR0_AVX);
+               call_all_cpus_sync(&enable_xsavemask, NULL);
+               get_current_cpuid(&cpuid, 0xd, 0);
+               gFPUSaveLength = cpuid.regs.ebx;
+
+               arch_altcodepatch_replace(ALTCODEPATCH_TAG_XSAVE,
+                       gHasXsavec ? &_xsavec : &_xsave, 4);
+               arch_altcodepatch_replace(ALTCODEPATCH_TAG_XRSTOR,
+                       &_xrstor, 4);
+
+               dprintf("enable %s 0x%" B_PRIx64 " %" B_PRId64 "\n",
+                       gHasXsavec ? "XSAVEC" : "XSAVE", gXsaveMask, 
gFPUSaveLength);
+       }
+
 #endif
 
        return B_OK;
diff --git a/src/system/kernel/thread.cpp b/src/system/kernel/thread.cpp
index 4ea3f605c7..2fae73aa2f 100644
--- a/src/system/kernel/thread.cpp
+++ b/src/system/kernel/thread.cpp
@@ -2679,9 +2679,9 @@ thread_init(kernel_args *args)
                panic("thread_init(): failed to init thread hash table!");
 
        // create the thread structure object cache
-       sThreadCache = create_object_cache("threads", sizeof(Thread), 16, NULL,
+       sThreadCache = create_object_cache("threads", sizeof(Thread), 64, NULL,
                NULL, NULL);
-               // Note: The x86 port requires 16 byte alignment of thread 
structures.
+               // Note: The x86 port requires 64 byte alignment of thread 
structures.
        if (sThreadCache == NULL)
                panic("thread_init(): failed to allocate thread object cache!");
 


Other related posts:

  • » [haiku-commits] haiku: hrev54291 - src/system/kernel/arch/x86/64 src/system/kernel/arch/x86 headers/private/kernel/arch/x86 src/system/kernel headers/posix/arch/x86_64 - Adrien Destugues