added 1 changeset to branch 'refs/remotes/pdziepak-github/lock_elision' old head: 13afefafb877852db7fbb6acde12add448ea56eb new head: a18aae134137f5e8e1142d4fdbd038c50298cd56 overview: https://github.com/pdziepak/Haiku/compare/13afefa...a18aae1 ---------------------------------------------------------------------------- a18aae1: libroot, kernel: Introduce atomic_{get,set}_aligned() functions Atomic accessing unaligned locations in memory is very costly on x86 and x86_64. It may require locking the bus and is not wait free. Moreover, atomic_get() writes to the location the value is got from what may result in cache line bouncing and abortion of a memory transaction. atomic_{get,set}_aligned() functions assume that access is to an aligned memory location what on x86 and x86_64 architectures reduces these functions to just simple mov operation and an appropriate memory barrier. In a result bus is not locked, both functions are wait free and atomic_get() doesn't perform any write operation on shared memory location. x86 versions of these functions use "lock; addl $0, (%esp)" which acts as a full memory barrier. Despite explicit "lock" prefix, the bus is not locked as long as top of the stack is in the cache (what is a safe thing to assume). Also, since the thread stack isn't supposed to be accessed by other threads this instruction in most cases won't invalidate any cache line on other CPUs. [ Pawel Dziepak <pdziepak@xxxxxxxxxxx> ] ---------------------------------------------------------------------------- Commit: a18aae134137f5e8e1142d4fdbd038c50298cd56 Author: Pawel Dziepak <pdziepak@xxxxxxxxxxx> Date: Mon Jul 22 20:10:07 2013 UTC ---------------------------------------------------------------------------- 8 files changed, 205 insertions(+), 2 deletions(-) headers/os/support/SupportDefs.h | 4 ++ headers/private/kernel/util/atomic.h | 6 ++- src/system/libroot/os/arch/arm/atomic.S | 22 ++++++++++ src/system/libroot/os/arch/m68k/atomic.S | 58 ++++++++++++++++++++++++++ src/system/libroot/os/arch/mipsel/atomic.S | 12 ++++++ src/system/libroot/os/arch/ppc/atomic.S | 20 +++++++++ src/system/libroot/os/arch/x86/atomic.S | 56 +++++++++++++++++++++++++ src/system/libroot/os/arch/x86_64/atomic.S | 29 +++++++++++++ ---------------------------------------------------------------------------- diff --git a/headers/os/support/SupportDefs.h b/headers/os/support/SupportDefs.h index 7cc5aaf..8b91d30 100644 --- a/headers/os/support/SupportDefs.h +++ b/headers/os/support/SupportDefs.h @@ -197,18 +197,22 @@ extern "C" { /* Atomic functions; previous value is returned */ extern int32 atomic_set(vint32 *value, int32 newValue); +extern void atomic_set_aligned(vint32* value, int32 newValue); extern int32 atomic_test_and_set(vint32 *value, int32 newValue, int32 testAgainst); extern int32 atomic_add(vint32 *value, int32 addValue); extern int32 atomic_and(vint32 *value, int32 andValue); extern int32 atomic_or(vint32 *value, int32 orValue); extern int32 atomic_get(vint32 *value); +extern int32 atomic_get_aligned(vint32 *value); extern int64 atomic_set64(vint64 *value, int64 newValue); +extern void atomic_set64_aligned(vint64* value, int64 newValue); extern int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst); extern int64 atomic_add64(vint64 *value, int64 addValue); extern int64 atomic_and64(vint64 *value, int64 andValue); extern int64 atomic_or64(vint64 *value, int64 orValue); extern int64 atomic_get64(vint64 *value); +extern int64 atomic_get64_aligned(vint64* value); /* Other stuff */ extern void* get_stack_frame(void); diff --git a/headers/private/kernel/util/atomic.h b/headers/private/kernel/util/atomic.h index 510df65..f69c0f8 100644 --- a/headers/private/kernel/util/atomic.h +++ b/headers/private/kernel/util/atomic.h @@ -30,6 +30,7 @@ atomic_pointer_test_and_set(PointerType** _pointer, const PointerType* set, template<typename PointerType> PointerType* atomic_pointer_set(PointerType** _pointer, const PointerType* set) { + ASSERT((_pointer & sizeof(PointerType*) - 1) == 0); #if LONG_MAX == INT_MAX return (PointerType*)atomic_set((vint32*)_pointer, (int32)set); #else @@ -41,10 +42,11 @@ atomic_pointer_set(PointerType** _pointer, const PointerType* set) template<typename PointerType> PointerType* atomic_pointer_get(PointerType** _pointer) { + ASSERT((_pointer & sizeof(PointerType*) - 1) == 0); #if LONG_MAX == INT_MAX - return (PointerType*)atomic_get((vint32*)_pointer); + return (PointerType*)atomic_get_aligned((vint32*)_pointer); #else - return (PointerType*)atomic_get64((vint64*)_pointer); + return (PointerType*)atomic_get64_aligned((vint64*)_pointer); #endif } diff --git a/src/system/libroot/os/arch/arm/atomic.S b/src/system/libroot/os/arch/arm/atomic.S index 0127fd0..1446abd 100644 --- a/src/system/libroot/os/arch/arm/atomic.S +++ b/src/system/libroot/os/arch/arm/atomic.S @@ -126,6 +126,22 @@ miss4: ldrex r12, [r0] #endif FUNCTION_END(atomic_set) +/* int atomic_set_aligned(int *value, int setTo) + */ +FUNCTION(atomic_set_aligned): +#if __ARM_ARCH__ >= 6 +0: ldrex r12, [r0] + strex r3, r1, [r0] + teq r3, #0 + bne 0b + bx lr +#else + mov r3, r0 + swp r0, r1, [r3] + bx lr +#endif +FUNCTION_END(atomic_set_aligned) + /* int atomic_test_and_set(int *value, int setTo, int testValue) */ FUNCTION(atomic_test_and_set): @@ -164,6 +180,12 @@ FUNCTION(atomic_get): bx lr FUNCTION_END(atomic_get) +/* int atomic_get_aligned(int *value) + */ +FUNCTION(atomic_get_aligned): + ldr r0, [r0] + bx lr +FUNCTION_END(atomic_get_aligned) /* int64 atomic_add64(vint64 *value, int64 addValue) */ //FUNCTION(atomic_add64): diff --git a/src/system/libroot/os/arch/m68k/atomic.S b/src/system/libroot/os/arch/m68k/atomic.S index e23c558..2d6107a 100644 --- a/src/system/libroot/os/arch/m68k/atomic.S +++ b/src/system/libroot/os/arch/m68k/atomic.S @@ -69,6 +69,17 @@ miss4: cas.l %d0,%d1,(%a0) rts FUNCTION_END(atomic_set) +/* int atomic_set_aligned(int *value, int setTo) + */ +FUNCTION(atomic_set_aligned): + move.l (4,%a7),%a0 + move.l (%a0),%d0 + move.l (8,%a7),%d1 +miss4: cas.l %d0,%d1,(%a0) + bne miss4 + rts +FUNCTION_END(atomic_set_aligned) + /* int atomic_test_and_set(int *value, int setTo, int testValue) */ FUNCTION(atomic_test_and_set): @@ -91,6 +102,18 @@ FUNCTION(atomic_get): rts FUNCTION_END(atomic_get) +/* int atomic_get_aligned(int *value) + */ +FUNCTION(atomic_get_aligned): + move.l (4,%a7),%a0 + move.l (%a0),%d0 + move.l %d0,%d1 + cas.l %d0,%d1,(%a0) + // we must use cas... so we change to the same value if matching, + // else we get the correct one anyway + rts +FUNCTION_END(atomic_get_aligned) + /* m68k elf convention is to return structs in (a0) * but use d0/d1 for int64 and small structs. * d0 MSB, d1 LSB @@ -176,6 +199,24 @@ miss8: cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) rts FUNCTION_END(atomic_set64) +/* int64 atomic_set64_aligned(vint64 *value, int64 newValue) */ +FUNCTION(atomic_set64_aligned): + movem.l %d2-%d3/%a2,-(%a7) + move.l (4,%a7),%a2 + lea.l (4,%a2),%a1 + // new value + move.l (12,%a7),%d3 /*LSB*/ + move.l (8,%a7),%d2 /*MSB*/ + // old value + move.l (%a1),%d1 /*LSB*/ + move.l (%a2),%d0 /*MSB*/ +miss8: cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) + bne miss8 + // return value d0:d1 + movem.l (%a7)+,%d2-%d3/%a2 + rts +FUNCTION_END(atomic_set64_aligned) + /* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */ FUNCTION(atomic_test_and_set64): movem.l %d2-%d3/%a2,-(%a7) @@ -209,3 +250,20 @@ FUNCTION(atomic_get64): movem.l (%a7)+,%d2-%d3/%a2 rts FUNCTION_END(atomic_get64) + +/* int64 atomic_get64_aligned(vint64 *value) */ +FUNCTION(atomic_get64_aligned): + movem.l %d2-%d3/%a2,-(%a7) + move.l (4,%a7),%a2 + lea.l (4,%a2),%a1 + move.l (%a1),%d1 /*LSB*/ + move.l (%a2),%d0 /*MSB*/ + move.l %d1,%d3 + move.l %d0,%d2 + // we must use cas... so we change to the same value if matching, + // else we get the correct one anyway + cas2.l %d0:%d1,%d2:%d3,(%a2):(%a1) + // return value + movem.l (%a7)+,%d2-%d3/%a2 + rts +FUNCTION_END(atomic_get64_aligned) diff --git a/src/system/libroot/os/arch/mipsel/atomic.S b/src/system/libroot/os/arch/mipsel/atomic.S index 8270007..70efab9 100644 --- a/src/system/libroot/os/arch/mipsel/atomic.S +++ b/src/system/libroot/os/arch/mipsel/atomic.S @@ -44,3 +44,15 @@ lost5: jr $ra */ FUNCTION(atomic_get): lost6: jr $ra + +/* int atomic_set_aligned(int *value, int setTo) + * (r3) r3 r4 + */ +FUNCTION(atomic_set_aligned): +lost7: jr $ra + +/* int atomic_get_aligned(int *value) + * (r3) r3 + */ +FUNCTION(atomic_get_aligned): +lost8: jr $ra diff --git a/src/system/libroot/os/arch/ppc/atomic.S b/src/system/libroot/os/arch/ppc/atomic.S index cde60fd..9396c5c 100644 --- a/src/system/libroot/os/arch/ppc/atomic.S +++ b/src/system/libroot/os/arch/ppc/atomic.S @@ -50,6 +50,16 @@ lost4: lwarx %r5, 0, %r3 mr %r3, %r5 blr +/* int atomic_set_aligned(int *value, int setTo) + * (r3) r3 r4 + */ +FUNCTION(atomic_set_aligned): +0: lwarx %r5, 0, %r3 + stwcx. %r4, 0, %r3 + bne- 0b + mr %r3, %r5 + blr + /* int atomic_test_and_set(int *value, int setTo, int testValue) * (r3) r3 r4 r5 */ @@ -71,3 +81,13 @@ lost6: lwarx %r5, 0, %r3 bne- lost6 mr %r3, %r5 blr + +/* int atomic_get_aligned(int *value) + * (r3) r3 + */ +FUNCTION(atomic_get_aligned): +0: lwarx %r5, 0, %r3 + stwcx. %r5, 0, %r3 + bne- 0b + mr %r3, %r5 + blr diff --git a/src/system/libroot/os/arch/x86/atomic.S b/src/system/libroot/os/arch/x86/atomic.S index 31b46f1..e20b355 100644 --- a/src/system/libroot/os/arch/x86/atomic.S +++ b/src/system/libroot/os/arch/x86/atomic.S @@ -20,6 +20,16 @@ FUNCTION(atomic_set): ret FUNCTION_END(atomic_set) +/* void atomic_set_aligned(vint32 *value, int32 newValue) */ +FUNCTION(atomic_set_aligned): + movl 4(%esp),%edx + movl 8(%esp),%eax + lock + addl $0, (%esp) + movl %eax, (%edx) + ret +FUNCTION_END(atomic_set_aligned) + /* int32 atomic_test_and_set(vint32 *value, int32 newValue, int32 testAgainst) */ FUNCTION(atomic_test_and_set): movl 4(%esp),%edx @@ -77,6 +87,15 @@ _atomic_get1: ret FUNCTION_END(atomic_get) +/* int32 atomic_get_aligned(vint32* value) */ +FUNCTION(atomic_get_aligned): + movl 4(%esp), %edx + movl (%edx), %eax + lock + addl $0, (%esp) + ret +FUNCTION_END(atomic_get_aligned) + /* int64 atomic_set64(vint64 *value, int64 newValue) */ FUNCTION(atomic_set64): push %esi @@ -95,6 +114,24 @@ _atomic_set64_1: ret FUNCTION_END(atomic_set64) +/* void atomic_set64_aligned(vint64 *value, int64 newValue) */ +FUNCTION(atomic_set64_aligned): + push %esi + push %ebx + movl 12(%esp), %esi /* value */ + movl 16(%esp), %ebx /* newValue low */ + movl 20(%esp), %ecx /* newValue high */ +1: + movl (%esi), %eax /* testAgainst low */ + movl 4(%esi), %edx /* testAgainst high */ + lock + cmpxchg8b (%esi) + jnz 1b + pop %ebx + pop %esi + ret +FUNCTION_END(atomic_set64_aligned) + /* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */ FUNCTION(atomic_test_and_set64): push %esi @@ -188,3 +225,22 @@ _atomic_get64_1: pop %esi ret FUNCTION_END(atomic_get64) + +/* int64 atomic_get64_aligned(vint64 *value) */ +FUNCTION(atomic_get64_aligned): + push %esi + push %ebx + movl 12(%esp), %esi +1: + movl (%esi), %eax + movl 4(%esi), %edx + movl %eax, %ebx + movl %edx, %ecx + lock + cmpxchg8b (%esi) + jnz 1b + pop %ebx + pop %esi + ret +FUNCTION_END(atomic_get64_aligned) + diff --git a/src/system/libroot/os/arch/x86_64/atomic.S b/src/system/libroot/os/arch/x86_64/atomic.S index 7abf2d2..33c4efe 100644 --- a/src/system/libroot/os/arch/x86_64/atomic.S +++ b/src/system/libroot/os/arch/x86_64/atomic.S @@ -17,6 +17,13 @@ FUNCTION(atomic_set): ret FUNCTION_END(atomic_set) +/* void atomic_set_aligned(vint64 *value, int64 newValue) */ +FUNCTION(atomic_set_aligned): + sfence + movl %esi, (%rdi) + ret +FUNCTION_END(atomic_set_aligned) + /* int32 atomic_test_and_set(vint32 *value, int32 newValue, int32 testAgainst) */ FUNCTION(atomic_test_and_set): movl %edx, %eax @@ -68,6 +75,13 @@ FUNCTION(atomic_get): ret FUNCTION_END(atomic_get) +/* int32 atomic_get_aligned(vint32* value) */ +FUNCTION(atomic_get_aligned): + movl (%rdi), %eax + lfence + ret +FUNCTION_END(atomic_get_aligned) + /* int64 atomic_set64(vint64 *value, int64 newValue) */ FUNCTION(atomic_set64): movq %rsi, %rax @@ -76,6 +90,13 @@ FUNCTION(atomic_set64): ret FUNCTION_END(atomic_set64) +/* void atomic_set64_aligned(vint64 *value, int64 newValue) */ +FUNCTION(atomic_set64_aligned): + sfence + movq %rsi, (%rdi) + ret +FUNCTION_END(atomic_set64_aligned) + /* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 testAgainst) */ FUNCTION(atomic_test_and_set64): movq %rdx, %rax @@ -126,3 +147,11 @@ FUNCTION(atomic_get64): jnz 1b ret FUNCTION_END(atomic_get64) + +/* int32 atomic_get64_aligned(vint32* value) */ +FUNCTION(atomic_get64_aligned): + movq (%rdi), %rax + lfence + ret +FUNCTION_END(atomic_get64_aligned) +