[haiku-commits] Change in haiku[master]: [WIP] Various fixes for AVX/AVX512 support

  • From: Gerrit <review@xxxxxxxxxxxxxxxxxxx>
  • To: waddlesplash <waddlesplash@xxxxxxxxx>, haiku-commits@xxxxxxxxxxxxx
  • Date: Thu, 16 Jul 2020 18:55:36 +0000

From Adrien Destugues <pulkomandy@xxxxxxxxx>:

Adrien Destugues has uploaded this change for review. ( 
https://review.haiku-os.org/c/haiku/+/3038 ;)


Change subject: [WIP] Various fixes for AVX/AVX512 support
......................................................................

[WIP] Various fixes for AVX/AVX512 support

Completely untested. I don't have a 64bit install to try these changes.
Feel free to adopt this commit, test it and complete it.
---
M headers/os/arch/x86_64/arch_debugger.h
M headers/posix/arch/x86_64/signal.h
M src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
M src/kits/debugger/arch/x86_64/ArchitectureX8664.h
M src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
M src/kits/debugger/arch/x86_64/CpuStateX8664.h
6 files changed, 140 insertions(+), 75 deletions(-)



  git pull ssh://git.haiku-os.org:22/haiku refs/changes/38/3038/1

diff --git a/headers/os/arch/x86_64/arch_debugger.h 
b/headers/os/arch/x86_64/arch_debugger.h
index e4607d2..97ec49f 100644
--- a/headers/os/arch/x86_64/arch_debugger.h
+++ b/headers/os/arch/x86_64/arch_debugger.h
@@ -16,6 +16,14 @@
        uint8           value[16];
 } x86_64_xmm_register;

+typedef struct x86_64_ymm_register {
+       uint8           value[32];
+} x86_64_ymm_register;
+
+typedef struct x86_64_zmm_register {
+       uint8           value[64];
+} x86_64_zmm_register;
+

 typedef struct x86_64_extended_registers {
        uint16                                  control;
@@ -31,8 +39,8 @@
                x86_64_fp_register      fp_registers[8];        // st0-st7
                x86_64_fp_register      mmx_registers[8];       // mm0-mm7
        };
-       x86_64_xmm_register             xmm_registers[16];      // xmm0-xmm15
-       uint8                                   reserved2[96];          // 416 
- 512
+       x86_64_zmm_register             zmm_registers[32];      // zmm0-zmm31
+       uint8                                   reserved2[1888];        // 2208 
- 4096
 } x86_64_extended_registers;


diff --git a/headers/posix/arch/x86_64/signal.h 
b/headers/posix/arch/x86_64/signal.h
index ad4fa4b..2d69a1d 100644
--- a/headers/posix/arch/x86_64/signal.h
+++ b/headers/posix/arch/x86_64/signal.h
@@ -98,7 +98,7 @@
 struct savefpu {
        struct fpu_state        fp_fxsave;
        struct xstate_hdr       fp_xstate;
-       unsigned long           fp_ymm[16][2];
+       unsigned long           fp_zmm[32][8];
 };

 struct vregs {
diff --git a/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp 
b/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
index cb03343..ca29023 100644
--- a/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
+++ b/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
@@ -29,6 +29,7 @@

 #include "disasm/DisassemblerX8664.h"

+#define X86_64_EXTENDED_FEATURE_AVX    (1 << 28)

 static const int32 kFromDwarfRegisters[] = {
        X86_64_REGISTER_RAX,
@@ -48,22 +49,22 @@
        X86_64_REGISTER_R14,
        X86_64_REGISTER_R15,
        X86_64_REGISTER_RIP,
-       X86_64_REGISTER_XMM0,
-       X86_64_REGISTER_XMM1,
-       X86_64_REGISTER_XMM2,
-       X86_64_REGISTER_XMM3,
-       X86_64_REGISTER_XMM4,
-       X86_64_REGISTER_XMM5,
-       X86_64_REGISTER_XMM6,
-       X86_64_REGISTER_XMM7,
-       X86_64_REGISTER_XMM8,
-       X86_64_REGISTER_XMM9,
-       X86_64_REGISTER_XMM10,
-       X86_64_REGISTER_XMM11,
-       X86_64_REGISTER_XMM12,
-       X86_64_REGISTER_XMM13,
-       X86_64_REGISTER_XMM14,
-       X86_64_REGISTER_XMM15,
+       X86_64_REGISTER_YMM0,
+       X86_64_REGISTER_YMM1,
+       X86_64_REGISTER_YMM2,
+       X86_64_REGISTER_YMM3,
+       X86_64_REGISTER_YMM4,
+       X86_64_REGISTER_YMM5,
+       X86_64_REGISTER_YMM6,
+       X86_64_REGISTER_YMM7,
+       X86_64_REGISTER_YMM8,
+       X86_64_REGISTER_YMM9,
+       X86_64_REGISTER_YMM10,
+       X86_64_REGISTER_YMM11,
+       X86_64_REGISTER_YMM12,
+       X86_64_REGISTER_YMM13,
+       X86_64_REGISTER_YMM14,
+       X86_64_REGISTER_YMM15,
        X86_64_REGISTER_ST0,
        X86_64_REGISTER_ST1,
        X86_64_REGISTER_ST2,
@@ -170,6 +171,20 @@
        if (fAssemblyLanguage == NULL)
                return B_NO_MEMORY;

+#if defined(__i386__)
+       // TODO: this needs to be determined/retrieved indirectly from the
+       // target host interface, as in the remote case the CPU features may
+       // differ from those of the local CPU.
+       cpuid_info info;
+       status_t error = get_cpuid(&info, 1, 0);
+       if (error != B_OK)
+               return error;
+
+       if ((info.eax_1.extended_features & IA32_EXTENDED_FEATURE_AVX) != 0)
+               fFeatureFlags |= X86_CPU_FEATURE_FLAG_AVX;
+
+#endif
+
        try {
                _AddIntegerRegister(X86_64_REGISTER_RIP, "rip", B_UINT64_TYPE,
                        REGISTER_TYPE_INSTRUCTION_POINTER, false);
@@ -240,38 +255,73 @@
                _AddSIMDRegister(X86_64_REGISTER_MM6, "mm6", sizeof(uint64));
                _AddSIMDRegister(X86_64_REGISTER_MM7, "mm7", sizeof(uint64));

-               _AddSIMDRegister(X86_64_REGISTER_XMM0, "xmm0",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM1, "xmm1",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM2, "xmm2",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM3, "xmm3",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM4, "xmm4",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM5, "xmm5",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM6, "xmm6",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM7, "xmm7",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM8, "xmm8",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM9, "xmm9",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM10, "xmm10",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM11, "xmm11",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM12, "xmm12",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM13, "xmm13",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM14, "xmm14",
-                       sizeof(x86_64_xmm_register));
-               _AddSIMDRegister(X86_64_REGISTER_XMM15, "xmm15",
-                       sizeof(x86_64_xmm_register));
+               if ((fFeatureFlags & X86_CPU_FEATURE_FLAG_AVX) != 0) {
+                       _AddSIMDRegister(X86_64_REGISTER_XMM0, "ymm0",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM1, "ymm1",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM2, "ymm2",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM3, "ymm3",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM4, "ymm4",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM5, "ymm5",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM6, "ymm6",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM7, "ymm7",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM8, "ymm8",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM9, "ymm9",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM10, "ymm10",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM11, "ymm11",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM12, "ymm12",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM13, "ymm13",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM14, "ymm14",
+                                       sizeof(x86_64_ymm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM15, "ymm15",
+                                       sizeof(x86_64_ymm_register));
+               } else {
+                       _AddSIMDRegister(X86_64_REGISTER_XMM0, "xmm0",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM1, "xmm1",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM2, "xmm2",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM3, "xmm3",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM4, "xmm4",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM5, "xmm5",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM6, "xmm6",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM7, "xmm7",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM8, "xmm8",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM9, "xmm9",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM10, "xmm10",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM11, "xmm11",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM12, "xmm12",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM13, "xmm13",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM14, "xmm14",
+                                       sizeof(x86_64_xmm_register));
+                       _AddSIMDRegister(X86_64_REGISTER_XMM15, "xmm15",
+                                       sizeof(x86_64_xmm_register));
+               }

        } catch (std::bad_alloc&) {
                return B_NO_MEMORY;
diff --git a/src/kits/debugger/arch/x86_64/ArchitectureX8664.h 
b/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
index dfeb2ed..b0188d2 100644
--- a/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
+++ b/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
@@ -14,6 +14,13 @@
 #include "Register.h"


+enum {
+       X86_CPU_FEATURE_FLAG_NONE = 0,
+       X86_CPU_FEATURE_FLAG_AVX = 1,
+       X86_CPU_FEATURE_FLAG_AVX512 = 2
+};
+
+
 class SourceLanguage;


diff --git a/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp 
b/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
index 36e4b08..0c540e4 100644
--- a/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
+++ b/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
@@ -78,22 +78,22 @@
        SetMMXRegister(X86_64_REGISTER_MM6, extended.mmx_registers[6].value);
        SetMMXRegister(X86_64_REGISTER_MM7, extended.mmx_registers[7].value);

-       SetXMMRegister(X86_64_REGISTER_XMM0, extended.xmm_registers[0].value);
-       SetXMMRegister(X86_64_REGISTER_XMM1, extended.xmm_registers[1].value);
-       SetXMMRegister(X86_64_REGISTER_XMM2, extended.xmm_registers[2].value);
-       SetXMMRegister(X86_64_REGISTER_XMM3, extended.xmm_registers[3].value);
-       SetXMMRegister(X86_64_REGISTER_XMM4, extended.xmm_registers[4].value);
-       SetXMMRegister(X86_64_REGISTER_XMM5, extended.xmm_registers[5].value);
-       SetXMMRegister(X86_64_REGISTER_XMM6, extended.xmm_registers[6].value);
-       SetXMMRegister(X86_64_REGISTER_XMM7, extended.xmm_registers[7].value);
-       SetXMMRegister(X86_64_REGISTER_XMM8, extended.xmm_registers[8].value);
-       SetXMMRegister(X86_64_REGISTER_XMM9, extended.xmm_registers[9].value);
-       SetXMMRegister(X86_64_REGISTER_XMM10, extended.xmm_registers[10].value);
-       SetXMMRegister(X86_64_REGISTER_XMM11, extended.xmm_registers[11].value);
-       SetXMMRegister(X86_64_REGISTER_XMM12, extended.xmm_registers[12].value);
-       SetXMMRegister(X86_64_REGISTER_XMM13, extended.xmm_registers[13].value);
-       SetXMMRegister(X86_64_REGISTER_XMM14, extended.xmm_registers[14].value);
-       SetXMMRegister(X86_64_REGISTER_XMM15, extended.xmm_registers[15].value);
+       SetXMMRegister(X86_64_REGISTER_XMM0, extended.zmm_registers[0].value);
+       SetXMMRegister(X86_64_REGISTER_XMM1, extended.zmm_registers[1].value);
+       SetXMMRegister(X86_64_REGISTER_XMM2, extended.zmm_registers[2].value);
+       SetXMMRegister(X86_64_REGISTER_XMM3, extended.zmm_registers[3].value);
+       SetXMMRegister(X86_64_REGISTER_XMM4, extended.zmm_registers[4].value);
+       SetXMMRegister(X86_64_REGISTER_XMM5, extended.zmm_registers[5].value);
+       SetXMMRegister(X86_64_REGISTER_XMM6, extended.zmm_registers[6].value);
+       SetXMMRegister(X86_64_REGISTER_XMM7, extended.zmm_registers[7].value);
+       SetXMMRegister(X86_64_REGISTER_XMM8, extended.zmm_registers[8].value);
+       SetXMMRegister(X86_64_REGISTER_XMM9, extended.zmm_registers[9].value);
+       SetXMMRegister(X86_64_REGISTER_XMM10, extended.zmm_registers[10].value);
+       SetXMMRegister(X86_64_REGISTER_XMM11, extended.zmm_registers[11].value);
+       SetXMMRegister(X86_64_REGISTER_XMM12, extended.zmm_registers[12].value);
+       SetXMMRegister(X86_64_REGISTER_XMM13, extended.zmm_registers[13].value);
+       SetXMMRegister(X86_64_REGISTER_XMM14, extended.zmm_registers[14].value);
+       SetXMMRegister(X86_64_REGISTER_XMM15, extended.zmm_registers[15].value);

        fInterruptVector = state.vector;
 }
@@ -171,11 +171,11 @@

        for (int32 i = 0; i < 16; i++) {
                if (IsRegisterSet(X86_64_REGISTER_XMM0 + i)) {
-                       memcpy(&x64State->extended_registers.xmm_registers[i],
-                               &fXMMRegisters[i], sizeof(x86_64_xmm_register));
+                       memcpy(&x64State->extended_registers.zmm_registers[i],
+                               &fXMMRegisters[i], sizeof(x86_64_ymm_register));
                } else {
-                       memset(&x64State->extended_registers.xmm_registers[i],
-                               0, sizeof(x86_64_xmm_register));
+                       memset(&x64State->extended_registers.zmm_registers[i],
+                               0, sizeof(x86_64_ymm_register));
                }
        }

@@ -269,11 +269,11 @@
                        value.ToPointer(), value.Size());
        } else if (index >= X86_64_REGISTER_XMM0
                        && index < X86_64_XMM_REGISTER_END) {
-               if (value.Size() > sizeof(x86_64_xmm_register))
+               if (value.Size() > sizeof(x86_64_ymm_register))
                        return false;

                memset(&fXMMRegisters[index - X86_64_REGISTER_XMM0], 0,
-                       sizeof(x86_64_xmm_register));
+                       sizeof(x86_64_ymm_register));
                memcpy(fXMMRegisters[index - X86_64_REGISTER_XMM0].value,
                        value.ToPointer(), value.Size());
        } else
@@ -378,7 +378,7 @@
                return;

        memcpy(fXMMRegisters[index - X86_64_REGISTER_XMM0].value, value,
-               sizeof(x86_64_xmm_register));
+               sizeof(x86_64_ymm_register));
        fSetRegisters[index] = 1;
 }

diff --git a/src/kits/debugger/arch/x86_64/CpuStateX8664.h 
b/src/kits/debugger/arch/x86_64/CpuStateX8664.h
index da7ca8a..c7cc276 100644
--- a/src/kits/debugger/arch/x86_64/CpuStateX8664.h
+++ b/src/kits/debugger/arch/x86_64/CpuStateX8664.h
@@ -148,7 +148,7 @@
                        uint64                          
fIntRegisters[X86_64_INT_REGISTER_COUNT];
                        double                          
fFloatRegisters[X86_64_FP_REGISTER_COUNT];
                        x86_64_fp_register      
fMMXRegisters[X86_64_MMX_REGISTER_COUNT];
-                       x86_64_xmm_register     
fXMMRegisters[X86_64_XMM_REGISTER_COUNT];
+                       x86_64_ymm_register     
fXMMRegisters[X86_64_XMM_REGISTER_COUNT];
                        RegisterBitSet          fSetRegisters;
                        uint64                          fInterruptVector;
 };

--
To view, visit https://review.haiku-os.org/c/haiku/+/3038
To unsubscribe, or for help writing mail filters, visit 
https://review.haiku-os.org/settings

Gerrit-Project: haiku
Gerrit-Branch: master
Gerrit-Change-Id: If93680ffa0339c19bab517876b4e029f5d66b240
Gerrit-Change-Number: 3038
Gerrit-PatchSet: 1
Gerrit-Owner: Adrien Destugues <pulkomandy@xxxxxxxxx>
Gerrit-MessageType: newchange

Other related posts:

  • » [haiku-commits] Change in haiku[master]: [WIP] Various fixes for AVX/AVX512 support - Gerrit