From Adrien Destugues <pulkomandy@xxxxxxxxx>:
Adrien Destugues has uploaded this change for review. (
https://review.haiku-os.org/c/haiku/+/3038 ;)
Change subject: [WIP] Various fixes for AVX/AVX512 support
......................................................................
[WIP] Various fixes for AVX/AVX512 support
Completely untested. I don't have a 64bit install to try these changes.
Feel free to adopt this commit, test it and complete it.
---
M headers/os/arch/x86_64/arch_debugger.h
M headers/posix/arch/x86_64/signal.h
M src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
M src/kits/debugger/arch/x86_64/ArchitectureX8664.h
M src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
M src/kits/debugger/arch/x86_64/CpuStateX8664.h
6 files changed, 140 insertions(+), 75 deletions(-)
git pull ssh://git.haiku-os.org:22/haiku refs/changes/38/3038/1
diff --git a/headers/os/arch/x86_64/arch_debugger.h
b/headers/os/arch/x86_64/arch_debugger.h
index e4607d2..97ec49f 100644
--- a/headers/os/arch/x86_64/arch_debugger.h
+++ b/headers/os/arch/x86_64/arch_debugger.h
@@ -16,6 +16,14 @@
uint8 value[16];
} x86_64_xmm_register;
+typedef struct x86_64_ymm_register {
+ uint8 value[32];
+} x86_64_ymm_register;
+
+typedef struct x86_64_zmm_register {
+ uint8 value[64];
+} x86_64_zmm_register;
+
typedef struct x86_64_extended_registers {
uint16 control;
@@ -31,8 +39,8 @@
x86_64_fp_register fp_registers[8]; // st0-st7
x86_64_fp_register mmx_registers[8]; // mm0-mm7
};
- x86_64_xmm_register xmm_registers[16]; // xmm0-xmm15
- uint8 reserved2[96]; // 416
- 512
+ x86_64_zmm_register zmm_registers[32]; // zmm0-zmm31
+ uint8 reserved2[1888]; // 2208
- 4096
} x86_64_extended_registers;
diff --git a/headers/posix/arch/x86_64/signal.h
b/headers/posix/arch/x86_64/signal.h
index ad4fa4b..2d69a1d 100644
--- a/headers/posix/arch/x86_64/signal.h
+++ b/headers/posix/arch/x86_64/signal.h
@@ -98,7 +98,7 @@
struct savefpu {
struct fpu_state fp_fxsave;
struct xstate_hdr fp_xstate;
- unsigned long fp_ymm[16][2];
+ unsigned long fp_zmm[32][8];
};
struct vregs {
diff --git a/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
b/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
index cb03343..ca29023 100644
--- a/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
+++ b/src/kits/debugger/arch/x86_64/ArchitectureX8664.cpp
@@ -29,6 +29,7 @@
#include "disasm/DisassemblerX8664.h"
+#define X86_64_EXTENDED_FEATURE_AVX (1 << 28)
static const int32 kFromDwarfRegisters[] = {
X86_64_REGISTER_RAX,
@@ -48,22 +49,22 @@
X86_64_REGISTER_R14,
X86_64_REGISTER_R15,
X86_64_REGISTER_RIP,
- X86_64_REGISTER_XMM0,
- X86_64_REGISTER_XMM1,
- X86_64_REGISTER_XMM2,
- X86_64_REGISTER_XMM3,
- X86_64_REGISTER_XMM4,
- X86_64_REGISTER_XMM5,
- X86_64_REGISTER_XMM6,
- X86_64_REGISTER_XMM7,
- X86_64_REGISTER_XMM8,
- X86_64_REGISTER_XMM9,
- X86_64_REGISTER_XMM10,
- X86_64_REGISTER_XMM11,
- X86_64_REGISTER_XMM12,
- X86_64_REGISTER_XMM13,
- X86_64_REGISTER_XMM14,
- X86_64_REGISTER_XMM15,
+ X86_64_REGISTER_YMM0,
+ X86_64_REGISTER_YMM1,
+ X86_64_REGISTER_YMM2,
+ X86_64_REGISTER_YMM3,
+ X86_64_REGISTER_YMM4,
+ X86_64_REGISTER_YMM5,
+ X86_64_REGISTER_YMM6,
+ X86_64_REGISTER_YMM7,
+ X86_64_REGISTER_YMM8,
+ X86_64_REGISTER_YMM9,
+ X86_64_REGISTER_YMM10,
+ X86_64_REGISTER_YMM11,
+ X86_64_REGISTER_YMM12,
+ X86_64_REGISTER_YMM13,
+ X86_64_REGISTER_YMM14,
+ X86_64_REGISTER_YMM15,
X86_64_REGISTER_ST0,
X86_64_REGISTER_ST1,
X86_64_REGISTER_ST2,
@@ -170,6 +171,20 @@
if (fAssemblyLanguage == NULL)
return B_NO_MEMORY;
+#if defined(__i386__)
+ // TODO: this needs to be determined/retrieved indirectly from the
+ // target host interface, as in the remote case the CPU features may
+ // differ from those of the local CPU.
+ cpuid_info info;
+ status_t error = get_cpuid(&info, 1, 0);
+ if (error != B_OK)
+ return error;
+
+ if ((info.eax_1.extended_features & IA32_EXTENDED_FEATURE_AVX) != 0)
+ fFeatureFlags |= X86_CPU_FEATURE_FLAG_AVX;
+
+#endif
+
try {
_AddIntegerRegister(X86_64_REGISTER_RIP, "rip", B_UINT64_TYPE,
REGISTER_TYPE_INSTRUCTION_POINTER, false);
@@ -240,38 +255,73 @@
_AddSIMDRegister(X86_64_REGISTER_MM6, "mm6", sizeof(uint64));
_AddSIMDRegister(X86_64_REGISTER_MM7, "mm7", sizeof(uint64));
- _AddSIMDRegister(X86_64_REGISTER_XMM0, "xmm0",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM1, "xmm1",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM2, "xmm2",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM3, "xmm3",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM4, "xmm4",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM5, "xmm5",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM6, "xmm6",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM7, "xmm7",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM8, "xmm8",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM9, "xmm9",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM10, "xmm10",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM11, "xmm11",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM12, "xmm12",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM13, "xmm13",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM14, "xmm14",
- sizeof(x86_64_xmm_register));
- _AddSIMDRegister(X86_64_REGISTER_XMM15, "xmm15",
- sizeof(x86_64_xmm_register));
+ if ((fFeatureFlags & X86_CPU_FEATURE_FLAG_AVX) != 0) {
+ _AddSIMDRegister(X86_64_REGISTER_XMM0, "ymm0",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM1, "ymm1",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM2, "ymm2",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM3, "ymm3",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM4, "ymm4",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM5, "ymm5",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM6, "ymm6",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM7, "ymm7",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM8, "ymm8",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM9, "ymm9",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM10, "ymm10",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM11, "ymm11",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM12, "ymm12",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM13, "ymm13",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM14, "ymm14",
+ sizeof(x86_64_ymm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM15, "ymm15",
+ sizeof(x86_64_ymm_register));
+ } else {
+ _AddSIMDRegister(X86_64_REGISTER_XMM0, "xmm0",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM1, "xmm1",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM2, "xmm2",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM3, "xmm3",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM4, "xmm4",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM5, "xmm5",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM6, "xmm6",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM7, "xmm7",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM8, "xmm8",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM9, "xmm9",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM10, "xmm10",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM11, "xmm11",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM12, "xmm12",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM13, "xmm13",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM14, "xmm14",
+ sizeof(x86_64_xmm_register));
+ _AddSIMDRegister(X86_64_REGISTER_XMM15, "xmm15",
+ sizeof(x86_64_xmm_register));
+ }
} catch (std::bad_alloc&) {
return B_NO_MEMORY;
diff --git a/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
b/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
index dfeb2ed..b0188d2 100644
--- a/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
+++ b/src/kits/debugger/arch/x86_64/ArchitectureX8664.h
@@ -14,6 +14,13 @@
#include "Register.h"
+enum {
+ X86_CPU_FEATURE_FLAG_NONE = 0,
+ X86_CPU_FEATURE_FLAG_AVX = 1,
+ X86_CPU_FEATURE_FLAG_AVX512 = 2
+};
+
+
class SourceLanguage;
diff --git a/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
b/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
index 36e4b08..0c540e4 100644
--- a/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
+++ b/src/kits/debugger/arch/x86_64/CpuStateX8664.cpp
@@ -78,22 +78,22 @@
SetMMXRegister(X86_64_REGISTER_MM6, extended.mmx_registers[6].value);
SetMMXRegister(X86_64_REGISTER_MM7, extended.mmx_registers[7].value);
- SetXMMRegister(X86_64_REGISTER_XMM0, extended.xmm_registers[0].value);
- SetXMMRegister(X86_64_REGISTER_XMM1, extended.xmm_registers[1].value);
- SetXMMRegister(X86_64_REGISTER_XMM2, extended.xmm_registers[2].value);
- SetXMMRegister(X86_64_REGISTER_XMM3, extended.xmm_registers[3].value);
- SetXMMRegister(X86_64_REGISTER_XMM4, extended.xmm_registers[4].value);
- SetXMMRegister(X86_64_REGISTER_XMM5, extended.xmm_registers[5].value);
- SetXMMRegister(X86_64_REGISTER_XMM6, extended.xmm_registers[6].value);
- SetXMMRegister(X86_64_REGISTER_XMM7, extended.xmm_registers[7].value);
- SetXMMRegister(X86_64_REGISTER_XMM8, extended.xmm_registers[8].value);
- SetXMMRegister(X86_64_REGISTER_XMM9, extended.xmm_registers[9].value);
- SetXMMRegister(X86_64_REGISTER_XMM10, extended.xmm_registers[10].value);
- SetXMMRegister(X86_64_REGISTER_XMM11, extended.xmm_registers[11].value);
- SetXMMRegister(X86_64_REGISTER_XMM12, extended.xmm_registers[12].value);
- SetXMMRegister(X86_64_REGISTER_XMM13, extended.xmm_registers[13].value);
- SetXMMRegister(X86_64_REGISTER_XMM14, extended.xmm_registers[14].value);
- SetXMMRegister(X86_64_REGISTER_XMM15, extended.xmm_registers[15].value);
+ SetXMMRegister(X86_64_REGISTER_XMM0, extended.zmm_registers[0].value);
+ SetXMMRegister(X86_64_REGISTER_XMM1, extended.zmm_registers[1].value);
+ SetXMMRegister(X86_64_REGISTER_XMM2, extended.zmm_registers[2].value);
+ SetXMMRegister(X86_64_REGISTER_XMM3, extended.zmm_registers[3].value);
+ SetXMMRegister(X86_64_REGISTER_XMM4, extended.zmm_registers[4].value);
+ SetXMMRegister(X86_64_REGISTER_XMM5, extended.zmm_registers[5].value);
+ SetXMMRegister(X86_64_REGISTER_XMM6, extended.zmm_registers[6].value);
+ SetXMMRegister(X86_64_REGISTER_XMM7, extended.zmm_registers[7].value);
+ SetXMMRegister(X86_64_REGISTER_XMM8, extended.zmm_registers[8].value);
+ SetXMMRegister(X86_64_REGISTER_XMM9, extended.zmm_registers[9].value);
+ SetXMMRegister(X86_64_REGISTER_XMM10, extended.zmm_registers[10].value);
+ SetXMMRegister(X86_64_REGISTER_XMM11, extended.zmm_registers[11].value);
+ SetXMMRegister(X86_64_REGISTER_XMM12, extended.zmm_registers[12].value);
+ SetXMMRegister(X86_64_REGISTER_XMM13, extended.zmm_registers[13].value);
+ SetXMMRegister(X86_64_REGISTER_XMM14, extended.zmm_registers[14].value);
+ SetXMMRegister(X86_64_REGISTER_XMM15, extended.zmm_registers[15].value);
fInterruptVector = state.vector;
}
@@ -171,11 +171,11 @@
for (int32 i = 0; i < 16; i++) {
if (IsRegisterSet(X86_64_REGISTER_XMM0 + i)) {
- memcpy(&x64State->extended_registers.xmm_registers[i],
- &fXMMRegisters[i], sizeof(x86_64_xmm_register));
+ memcpy(&x64State->extended_registers.zmm_registers[i],
+ &fXMMRegisters[i], sizeof(x86_64_ymm_register));
} else {
- memset(&x64State->extended_registers.xmm_registers[i],
- 0, sizeof(x86_64_xmm_register));
+ memset(&x64State->extended_registers.zmm_registers[i],
+ 0, sizeof(x86_64_ymm_register));
}
}
@@ -269,11 +269,11 @@
value.ToPointer(), value.Size());
} else if (index >= X86_64_REGISTER_XMM0
&& index < X86_64_XMM_REGISTER_END) {
- if (value.Size() > sizeof(x86_64_xmm_register))
+ if (value.Size() > sizeof(x86_64_ymm_register))
return false;
memset(&fXMMRegisters[index - X86_64_REGISTER_XMM0], 0,
- sizeof(x86_64_xmm_register));
+ sizeof(x86_64_ymm_register));
memcpy(fXMMRegisters[index - X86_64_REGISTER_XMM0].value,
value.ToPointer(), value.Size());
} else
@@ -378,7 +378,7 @@
return;
memcpy(fXMMRegisters[index - X86_64_REGISTER_XMM0].value, value,
- sizeof(x86_64_xmm_register));
+ sizeof(x86_64_ymm_register));
fSetRegisters[index] = 1;
}
diff --git a/src/kits/debugger/arch/x86_64/CpuStateX8664.h
b/src/kits/debugger/arch/x86_64/CpuStateX8664.h
index da7ca8a..c7cc276 100644
--- a/src/kits/debugger/arch/x86_64/CpuStateX8664.h
+++ b/src/kits/debugger/arch/x86_64/CpuStateX8664.h
@@ -148,7 +148,7 @@
uint64
fIntRegisters[X86_64_INT_REGISTER_COUNT];
double
fFloatRegisters[X86_64_FP_REGISTER_COUNT];
x86_64_fp_register
fMMXRegisters[X86_64_MMX_REGISTER_COUNT];
- x86_64_xmm_register
fXMMRegisters[X86_64_XMM_REGISTER_COUNT];
+ x86_64_ymm_register
fXMMRegisters[X86_64_XMM_REGISTER_COUNT];
RegisterBitSet fSetRegisters;
uint64 fInterruptVector;
};
--
To view, visit https://review.haiku-os.org/c/haiku/+/3038
To unsubscribe, or for help writing mail filters, visit
https://review.haiku-os.org/settings
Gerrit-Project: haiku
Gerrit-Branch: master
Gerrit-Change-Id: If93680ffa0339c19bab517876b4e029f5d66b240
Gerrit-Change-Number: 3038
Gerrit-PatchSet: 1
Gerrit-Owner: Adrien Destugues <pulkomandy@xxxxxxxxx>
Gerrit-MessageType: newchange