[haiku-commits] BRANCH pdziepak-github.lock_elision [a18aae1] in src/system/libroot/os/arch: m68k x86 x86_64 arm ppc

  • From: pdziepak-github.lock_elision <community@xxxxxxxxxxxx>
  • To: haiku-commits@xxxxxxxxxxxxx
  • Date: Mon, 22 Jul 2013 22:30:34 +0200 (CEST)

added 1 changeset to branch 'refs/remotes/pdziepak-github/lock_elision'
old head: 13afefafb877852db7fbb6acde12add448ea56eb
new head: a18aae134137f5e8e1142d4fdbd038c50298cd56
overview: https://github.com/pdziepak/Haiku/compare/13afefa...a18aae1

----------------------------------------------------------------------------

a18aae1: libroot, kernel: Introduce atomic_{get,set}_aligned() functions
  
  Atomic accessing unaligned locations in memory is very costly on x86
  and x86_64. It may require locking the bus and is not wait free.
  Moreover, atomic_get() writes to the location the value is got from
  what may result in cache line bouncing and abortion of a memory
  transaction.
  
  atomic_{get,set}_aligned() functions assume that access is to an aligned
  memory location what on x86 and x86_64 architectures reduces these
  functions to just simple mov operation and an appropriate memory barrier.
  In a result bus is not locked, both functions are wait free and
  atomic_get() doesn't perform any write operation on shared memory location.
  
  x86 versions of these functions use "lock; addl $0, (%esp)" which acts
  as a full memory barrier. Despite explicit "lock" prefix, the bus is
  not locked as long as top of the stack is in the cache (what is a safe
  thing to assume). Also, since the thread stack isn't supposed to be
  accessed by other threads this instruction in most cases won't invalidate
  any cache line on other CPUs.

                                    [ Pawel Dziepak <pdziepak@xxxxxxxxxxx> ]

----------------------------------------------------------------------------

Commit:      a18aae134137f5e8e1142d4fdbd038c50298cd56
Author:      Pawel Dziepak <pdziepak@xxxxxxxxxxx>
Date:        Mon Jul 22 20:10:07 2013 UTC

----------------------------------------------------------------------------

8 files changed, 205 insertions(+), 2 deletions(-)
headers/os/support/SupportDefs.h           |  4 ++
headers/private/kernel/util/atomic.h       |  6 ++-
src/system/libroot/os/arch/arm/atomic.S    | 22 ++++++++++
src/system/libroot/os/arch/m68k/atomic.S   | 58 ++++++++++++++++++++++++++
src/system/libroot/os/arch/mipsel/atomic.S | 12 ++++++
src/system/libroot/os/arch/ppc/atomic.S    | 20 +++++++++
src/system/libroot/os/arch/x86/atomic.S    | 56 +++++++++++++++++++++++++
src/system/libroot/os/arch/x86_64/atomic.S | 29 +++++++++++++

----------------------------------------------------------------------------

diff --git a/headers/os/support/SupportDefs.h b/headers/os/support/SupportDefs.h
index 7cc5aaf..8b91d30 100644
--- a/headers/os/support/SupportDefs.h
+++ b/headers/os/support/SupportDefs.h
@@ -197,18 +197,22 @@ extern "C" {
 
 /* Atomic functions; previous value is returned */
 extern int32   atomic_set(vint32 *value, int32 newValue);
+extern void            atomic_set_aligned(vint32* value, int32 newValue);
 extern int32   atomic_test_and_set(vint32 *value, int32 newValue, int32 
testAgainst);
 extern int32   atomic_add(vint32 *value, int32 addValue);
 extern int32   atomic_and(vint32 *value, int32 andValue);
 extern int32   atomic_or(vint32 *value, int32 orValue);
 extern int32   atomic_get(vint32 *value);
+extern int32   atomic_get_aligned(vint32 *value);
 
 extern int64   atomic_set64(vint64 *value, int64 newValue);
+extern void            atomic_set64_aligned(vint64* value, int64 newValue);
 extern int64   atomic_test_and_set64(vint64 *value, int64 newValue, int64 
testAgainst);
 extern int64   atomic_add64(vint64 *value, int64 addValue);
 extern int64   atomic_and64(vint64 *value, int64 andValue);
 extern int64   atomic_or64(vint64 *value, int64 orValue);
 extern int64   atomic_get64(vint64 *value);
+extern int64   atomic_get64_aligned(vint64* value);
 
 /* Other stuff */
 extern void*   get_stack_frame(void);
diff --git a/headers/private/kernel/util/atomic.h 
b/headers/private/kernel/util/atomic.h
index 510df65..f69c0f8 100644
--- a/headers/private/kernel/util/atomic.h
+++ b/headers/private/kernel/util/atomic.h
@@ -30,6 +30,7 @@ atomic_pointer_test_and_set(PointerType** _pointer, const 
PointerType* set,
 template<typename PointerType> PointerType*
 atomic_pointer_set(PointerType** _pointer, const PointerType* set)
 {
+       ASSERT((_pointer & sizeof(PointerType*) - 1) == 0);
 #if LONG_MAX == INT_MAX
        return (PointerType*)atomic_set((vint32*)_pointer, (int32)set);
 #else
@@ -41,10 +42,11 @@ atomic_pointer_set(PointerType** _pointer, const 
PointerType* set)
 template<typename PointerType> PointerType*
 atomic_pointer_get(PointerType** _pointer)
 {
+       ASSERT((_pointer & sizeof(PointerType*) - 1) == 0);
 #if LONG_MAX == INT_MAX
-       return (PointerType*)atomic_get((vint32*)_pointer);
+       return (PointerType*)atomic_get_aligned((vint32*)_pointer);
 #else
-       return (PointerType*)atomic_get64((vint64*)_pointer);
+       return (PointerType*)atomic_get64_aligned((vint64*)_pointer);
 #endif
 }
 
diff --git a/src/system/libroot/os/arch/arm/atomic.S 
b/src/system/libroot/os/arch/arm/atomic.S
index 0127fd0..1446abd 100644
--- a/src/system/libroot/os/arch/arm/atomic.S
+++ b/src/system/libroot/os/arch/arm/atomic.S
@@ -126,6 +126,22 @@ miss4:             ldrex   r12, [r0]
 #endif
 FUNCTION_END(atomic_set)
 
+/* int atomic_set_aligned(int *value, int setTo)
+ */
+FUNCTION(atomic_set_aligned):
+#if __ARM_ARCH__ >= 6
+0:             ldrex   r12, [r0]
+                       strex   r3, r1, [r0]
+                       teq     r3, #0
+                       bne     0b
+               bx  lr
+#else
+       mov     r3, r0
+       swp     r0, r1, [r3]
+        bx      lr
+#endif
+FUNCTION_END(atomic_set_aligned)
+
 /* int atomic_test_and_set(int *value, int setTo, int testValue) 
  */
 FUNCTION(atomic_test_and_set):
@@ -164,6 +180,12 @@ FUNCTION(atomic_get):
         bx     lr
 FUNCTION_END(atomic_get)
 
+/* int atomic_get_aligned(int *value) 
+ */
+FUNCTION(atomic_get_aligned):
+       ldr     r0, [r0]
+        bx     lr
+FUNCTION_END(atomic_get_aligned)
 
 /* int64       atomic_add64(vint64 *value, int64 addValue) */
 //FUNCTION(atomic_add64):
diff --git a/src/system/libroot/os/arch/m68k/atomic.S 
b/src/system/libroot/os/arch/m68k/atomic.S
index e23c558..2d6107a 100644
--- a/src/system/libroot/os/arch/m68k/atomic.S
+++ b/src/system/libroot/os/arch/m68k/atomic.S
@@ -69,6 +69,17 @@ miss4:       cas.l   %d0,%d1,(%a0)
                rts
 FUNCTION_END(atomic_set)
 
+/* int atomic_set_aligned(int *value, int setTo)
+ */
+FUNCTION(atomic_set_aligned):
+               move.l  (4,%a7),%a0
+               move.l  (%a0),%d0
+               move.l  (8,%a7),%d1
+miss4: cas.l   %d0,%d1,(%a0)
+               bne             miss4
+               rts
+FUNCTION_END(atomic_set_aligned)
+
 /* int atomic_test_and_set(int *value, int setTo, int testValue) 
  */
 FUNCTION(atomic_test_and_set):
@@ -91,6 +102,18 @@ FUNCTION(atomic_get):
                rts
 FUNCTION_END(atomic_get)
 
+/* int atomic_get_aligned(int *value) 
+ */
+FUNCTION(atomic_get_aligned):
+               move.l  (4,%a7),%a0
+               move.l  (%a0),%d0
+               move.l  %d0,%d1
+               cas.l   %d0,%d1,(%a0)
+               // we must use cas... so we change to the same value if 
matching,
+               // else we get the correct one anyway
+               rts
+FUNCTION_END(atomic_get_aligned)
+
 /* m68k elf convention is to return structs in (a0)
  * but use d0/d1 for int64 and small structs.
  * d0 MSB, d1 LSB
@@ -176,6 +199,24 @@ miss8:     cas2.l  %d0:%d1,%d2:%d3,(%a2):(%a1)
                rts
 FUNCTION_END(atomic_set64)
 
+/* int64       atomic_set64_aligned(vint64 *value, int64 newValue) */
+FUNCTION(atomic_set64_aligned):
+               movem.l %d2-%d3/%a2,-(%a7)
+               move.l  (4,%a7),%a2
+               lea.l   (4,%a2),%a1
+               // new value
+               move.l  (12,%a7),%d3    /*LSB*/
+               move.l  (8,%a7),%d2             /*MSB*/
+               // old value
+               move.l  (%a1),%d1               /*LSB*/
+               move.l  (%a2),%d0               /*MSB*/
+miss8: cas2.l  %d0:%d1,%d2:%d3,(%a2):(%a1)
+               bne             miss8
+               // return value d0:d1
+               movem.l (%a7)+,%d2-%d3/%a2
+               rts
+FUNCTION_END(atomic_set64_aligned)
+
 /* int64       atomic_test_and_set64(vint64 *value, int64 newValue, int64 
testAgainst) */
 FUNCTION(atomic_test_and_set64):
                movem.l %d2-%d3/%a2,-(%a7)
@@ -209,3 +250,20 @@ FUNCTION(atomic_get64):
                movem.l (%a7)+,%d2-%d3/%a2
                rts
 FUNCTION_END(atomic_get64)
+
+/* int64       atomic_get64_aligned(vint64 *value) */
+FUNCTION(atomic_get64_aligned):
+               movem.l %d2-%d3/%a2,-(%a7)
+               move.l  (4,%a7),%a2
+               lea.l   (4,%a2),%a1
+               move.l  (%a1),%d1       /*LSB*/
+               move.l  (%a2),%d0               /*MSB*/
+               move.l  %d1,%d3
+               move.l  %d0,%d2
+               // we must use cas... so we change to the same value if 
matching,
+               // else we get the correct one anyway
+               cas2.l  %d0:%d1,%d2:%d3,(%a2):(%a1)
+               // return value
+               movem.l (%a7)+,%d2-%d3/%a2
+               rts
+FUNCTION_END(atomic_get64_aligned)
diff --git a/src/system/libroot/os/arch/mipsel/atomic.S 
b/src/system/libroot/os/arch/mipsel/atomic.S
index 8270007..70efab9 100644
--- a/src/system/libroot/os/arch/mipsel/atomic.S
+++ b/src/system/libroot/os/arch/mipsel/atomic.S
@@ -44,3 +44,15 @@ lost5:               jr      $ra
  */
 FUNCTION(atomic_get):
 lost6:         jr      $ra
+
+/* int atomic_set_aligned(int *value, int setTo)
+ * (r3)           r3          r4
+ */
+FUNCTION(atomic_set_aligned):
+lost7:         jr      $ra
+
+/* int atomic_get_aligned(int *value)
+ * (r3)           r3
+ */
+FUNCTION(atomic_get_aligned):
+lost8:         jr      $ra
diff --git a/src/system/libroot/os/arch/ppc/atomic.S 
b/src/system/libroot/os/arch/ppc/atomic.S
index cde60fd..9396c5c 100644
--- a/src/system/libroot/os/arch/ppc/atomic.S
+++ b/src/system/libroot/os/arch/ppc/atomic.S
@@ -50,6 +50,16 @@ lost4:       lwarx   %r5, 0, %r3
                mr              %r3, %r5
                blr
 
+/* int atomic_set_aligned(int *value, int setTo)
+ * (r3)           r3          r4
+ */
+FUNCTION(atomic_set_aligned):
+0:             lwarx   %r5, 0, %r3
+               stwcx.  %r4, 0, %r3
+               bne-    0b
+               mr              %r3, %r5
+               blr
+
 /* int atomic_test_and_set(int *value, int setTo, int testValue)
  * (r3)                    r3          r4         r5
  */
@@ -71,3 +81,13 @@ lost6:       lwarx   %r5, 0, %r3
                bne-    lost6
                mr              %r3, %r5
                blr
+
+/* int atomic_get_aligned(int *value)
+ * (r3)           r3
+ */
+FUNCTION(atomic_get_aligned):
+0:             lwarx   %r5, 0, %r3
+               stwcx.  %r5, 0, %r3
+               bne-    0b
+               mr              %r3, %r5
+               blr
diff --git a/src/system/libroot/os/arch/x86/atomic.S 
b/src/system/libroot/os/arch/x86/atomic.S
index 31b46f1..e20b355 100644
--- a/src/system/libroot/os/arch/x86/atomic.S
+++ b/src/system/libroot/os/arch/x86/atomic.S
@@ -20,6 +20,16 @@ FUNCTION(atomic_set):
        ret
 FUNCTION_END(atomic_set)
 
+/* void                atomic_set_aligned(vint32 *value, int32 newValue) */
+FUNCTION(atomic_set_aligned):
+       movl            4(%esp),%edx
+       movl            8(%esp),%eax
+       lock
+       addl            $0, (%esp)
+       movl            %eax, (%edx)
+       ret
+FUNCTION_END(atomic_set_aligned)
+
 /* int32       atomic_test_and_set(vint32 *value, int32 newValue, int32 
testAgainst) */
 FUNCTION(atomic_test_and_set):
        movl            4(%esp),%edx
@@ -77,6 +87,15 @@ _atomic_get1:
        ret
 FUNCTION_END(atomic_get)
 
+/* int32       atomic_get_aligned(vint32* value) */
+FUNCTION(atomic_get_aligned):
+       movl            4(%esp), %edx
+       movl            (%edx), %eax
+       lock
+       addl            $0, (%esp)
+       ret
+FUNCTION_END(atomic_get_aligned)
+
 /* int64       atomic_set64(vint64 *value, int64 newValue) */
 FUNCTION(atomic_set64):
        push            %esi
@@ -95,6 +114,24 @@ _atomic_set64_1:
        ret
 FUNCTION_END(atomic_set64)
 
+/* void        atomic_set64_aligned(vint64 *value, int64 newValue) */
+FUNCTION(atomic_set64_aligned):
+       push            %esi
+       push            %ebx
+       movl            12(%esp), %esi  /* value */
+       movl            16(%esp), %ebx  /* newValue low */
+       movl            20(%esp), %ecx  /* newValue high */
+1:
+       movl            (%esi), %eax    /* testAgainst low */
+       movl            4(%esi), %edx   /* testAgainst high */
+       lock
+       cmpxchg8b       (%esi)
+       jnz                     1b
+       pop                     %ebx
+       pop                     %esi
+       ret
+FUNCTION_END(atomic_set64_aligned)
+
 /* int64       atomic_test_and_set64(vint64 *value, int64 newValue, int64 
testAgainst) */
 FUNCTION(atomic_test_and_set64):
        push            %esi
@@ -188,3 +225,22 @@ _atomic_get64_1:
        pop                     %esi
        ret
 FUNCTION_END(atomic_get64)
+
+/* int64       atomic_get64_aligned(vint64 *value) */
+FUNCTION(atomic_get64_aligned):
+       push            %esi
+       push            %ebx
+       movl            12(%esp), %esi
+1:
+       movl            (%esi), %eax
+       movl            4(%esi), %edx
+       movl            %eax, %ebx
+       movl            %edx, %ecx
+       lock
+       cmpxchg8b       (%esi)
+       jnz                     1b
+       pop                     %ebx
+       pop                     %esi
+       ret
+FUNCTION_END(atomic_get64_aligned)
+
diff --git a/src/system/libroot/os/arch/x86_64/atomic.S 
b/src/system/libroot/os/arch/x86_64/atomic.S
index 7abf2d2..33c4efe 100644
--- a/src/system/libroot/os/arch/x86_64/atomic.S
+++ b/src/system/libroot/os/arch/x86_64/atomic.S
@@ -17,6 +17,13 @@ FUNCTION(atomic_set):
        ret
 FUNCTION_END(atomic_set)
 
+/* void atomic_set_aligned(vint64 *value, int64 newValue) */
+FUNCTION(atomic_set_aligned):
+       sfence
+       movl            %esi, (%rdi)
+       ret
+FUNCTION_END(atomic_set_aligned)
+
 /* int32 atomic_test_and_set(vint32 *value, int32 newValue, int32 testAgainst) 
*/
 FUNCTION(atomic_test_and_set):
        movl            %edx, %eax
@@ -68,6 +75,13 @@ FUNCTION(atomic_get):
        ret
 FUNCTION_END(atomic_get)
 
+/* int32       atomic_get_aligned(vint32* value) */
+FUNCTION(atomic_get_aligned):
+       movl            (%rdi), %eax
+       lfence
+       ret
+FUNCTION_END(atomic_get_aligned)
+
 /* int64 atomic_set64(vint64 *value, int64 newValue) */
 FUNCTION(atomic_set64):
        movq            %rsi, %rax
@@ -76,6 +90,13 @@ FUNCTION(atomic_set64):
        ret
 FUNCTION_END(atomic_set64)
 
+/* void atomic_set64_aligned(vint64 *value, int64 newValue) */
+FUNCTION(atomic_set64_aligned):
+       sfence
+       movq            %rsi, (%rdi)
+       ret
+FUNCTION_END(atomic_set64_aligned)
+
 /* int64 atomic_test_and_set64(vint64 *value, int64 newValue, int64 
testAgainst) */
 FUNCTION(atomic_test_and_set64):
        movq            %rdx, %rax
@@ -126,3 +147,11 @@ FUNCTION(atomic_get64):
        jnz                     1b
        ret
 FUNCTION_END(atomic_get64)
+
+/* int32       atomic_get64_aligned(vint32* value) */
+FUNCTION(atomic_get64_aligned):
+       movq            (%rdi), %rax
+       lfence
+       ret
+FUNCTION_END(atomic_get64_aligned)
+


Other related posts: