[haiku-commits] BRANCH xyzzy-github.x86_64 - src/system/kernel/lib/arch/x86_64

  • From: xyzzy-github.x86_64 <community@xxxxxxxxxxxx>
  • To: haiku-commits@xxxxxxxxxxxxx
  • Date: Sat, 21 Jul 2012 13:49:07 +0200 (CEST)

added 1 changeset to branch 'refs/remotes/xyzzy-github/x86_64'
old head: 6497f6b1ec4dd21d85ec01a18098138b03986a98
new head: 5234e66d32184c0843e7c5020c23e28f88e50569

----------------------------------------------------------------------------

5234e66: Optimized memcpy/memset for x86_64.

                                      [ Alex Smith <alex@xxxxxxxxxxxxxxxx> ]

----------------------------------------------------------------------------

Commit:      5234e66d32184c0843e7c5020c23e28f88e50569

Author:      Alex Smith <alex@xxxxxxxxxxxxxxxx>
Date:        Sat Jul 21 10:55:13 2012 UTC

----------------------------------------------------------------------------

4 files changed, 106 insertions(+), 40 deletions(-)
src/system/kernel/arch/x86/arch_cpu.cpp           |    3 -
src/system/kernel/lib/arch/x86_64/Jamfile         |   11 +-
src/system/kernel/lib/arch/x86_64/arch_string.S   |   96 +++++++++++++++++
src/system/kernel/lib/arch/x86_64/arch_string.cpp |   36 -------

----------------------------------------------------------------------------

diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp 
b/src/system/kernel/arch/x86/arch_cpu.cpp
index 4fc23e4..bcc9050 100644
--- a/src/system/kernel/arch/x86/arch_cpu.cpp
+++ b/src/system/kernel/arch/x86/arch_cpu.cpp
@@ -113,15 +113,12 @@ extern int memcpy_generic_end;
 extern "C" void memset_generic(void* dest, int value, size_t count);
 extern int memset_generic_end;
 
-// TODO x86_64
-#ifndef __x86_64__
 x86_optimized_functions gOptimizedFunctions = {
        memcpy_generic,
        &memcpy_generic_end,
        memset_generic,
        &memset_generic_end
 };
-#endif
 
 
 static status_t
diff --git a/src/system/kernel/lib/arch/x86_64/Jamfile 
b/src/system/kernel/lib/arch/x86_64/Jamfile
index 2e43cfd..66f5fb8 100644
--- a/src/system/kernel/lib/arch/x86_64/Jamfile
+++ b/src/system/kernel/lib/arch/x86_64/Jamfile
@@ -1,5 +1,9 @@
 SubDir HAIKU_TOP src system kernel lib arch x86_64 ;
 
+# find the generated asm_offsets.h
+SubDirHdrs [ FDirName $(TARGET_COMMON_DEBUG_OBJECT_DIR) system kernel arch
+       $(TARGET_KERNEL_ARCH) ] ;
+
 SEARCH_SOURCE += [ FDirName $(SUBDIR) $(DOTDOT) generic ] ;
 
 local librootSources = [ FDirName $(HAIKU_TOP) src system libroot ] ;
@@ -24,7 +28,12 @@ KernelMergeObject kernel_lib_posix_arch_$(TARGET_ARCH).o :
        kernel_longjmp_return.c
        kernel_setjmp_save_sigs.c
 
-       arch_string.cpp
+       arch_string.S
 
        : $(TARGET_KERNEL_PIC_CCFLAGS)
 ;
+
+# Explicitly tell the build system that arch_string.S includes the generated
+# asm_offsets.h.
+Includes [ FGristFiles arch_string.S ]
+       : <src!system!kernel!arch!x86>asm_offsets.h ;
diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.S 
b/src/system/kernel/lib/arch/x86_64/arch_string.S
new file mode 100644
index 0000000..a24bbc8
--- /dev/null
+++ b/src/system/kernel/lib/arch/x86_64/arch_string.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2012, Alex Smith, alex@xxxxxxxxxxxxxxxxx
+ * Distributed under the terms of the MIT License.
+ */
+
+
+#include <asm_defs.h>
+
+#include "asm_offsets.h"
+
+
+.align 8
+FUNCTION(memcpy_generic):
+       push    %rbp
+       movq    %rsp, %rbp
+
+       // Preserve original destination address for return value.
+       movq    %rdi, %rax
+
+       // size -> %rcx
+       movq    %rdx, %rcx
+
+       // For small copies, always do it bytewise, the additional overhead is
+       // not worth it.
+       cmp             $24, %rcx
+       jl              .Lmemcpy_generic_byte_copy
+
+       // Do both source and dest have the same alignment?
+       movq    %rsi, %r8
+       xorq    %rdi, %r8
+       test    $7, %r8
+       jnz             .Lmemcpy_generic_byte_copy
+
+       // Align up to an 8-byte boundary.
+       movq    %rdi, %r8
+       andq    $7, %r8
+       jz              .Lmemcpy_generic_qword_copy
+       movq    $8, %rcx
+       subq    %r8, %rcx
+       subq    %rcx, %rdx                      // Subtract from the overall 
count.
+       rep
+       movsb
+
+       // Get back the original count value.
+       movq    %rdx, %rcx
+.Lmemcpy_generic_qword_copy:
+       // Move by quadwords.
+       shrq    $3, %rcx
+       rep
+       movsq
+
+       // Get the remaining count.
+       movq    %rdx, %rcx
+       andq    $7, %rcx
+.Lmemcpy_generic_byte_copy:
+       // Move any remaining data by bytes.
+       rep
+       movsb
+
+       pop             %rbp
+       ret
+FUNCTION_END(memcpy_generic)
+SYMBOL(memcpy_generic_end):
+
+
+.align 8
+FUNCTION(memset_generic):
+       push    %rbp
+       movq    %rsp, %rbp
+
+       // Preserve original destination address for return value.
+       movq    %rdi, %r8
+
+       // size -> %rcx, value -> %al
+       movq    %rdx, %rcx
+       movl    %esi, %eax
+
+       // Move by bytes.
+       rep
+       stosb
+
+       movq    %r8, %rax
+       pop             %rbp
+       ret
+FUNCTION_END(memset_generic)
+SYMBOL(memset_generic_end):
+
+
+FUNCTION(memcpy):
+       jmp             *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memcpy)
+FUNCTION_END(memcpy)
+
+FUNCTION(memset):
+       jmp             *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memset)
+FUNCTION_END(memset)
+
diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.cpp 
b/src/system/kernel/lib/arch/x86_64/arch_string.cpp
deleted file mode 100644
index a3d6c8b..0000000
--- a/src/system/kernel/lib/arch/x86_64/arch_string.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2012, Alex Smith, alex@xxxxxxxxxxxxxxxxx
- * Distributed under the terms of the MIT License.
-*/
-
-// TODO: Replace these with optimized implementations.
-
-
-#include <string.h>
-
-
-void *
-memcpy(void *dest, const void *src, size_t count)
-{
-       const unsigned char *s = reinterpret_cast<const unsigned char *>(src);
-       unsigned char *d = reinterpret_cast<unsigned char *>(dest);
-
-       for (; count != 0; count--) {
-               *d++ = *s++;
-       }
-
-       return dest;
-}
-
-
-void *
-memset(void *dest, int val, size_t count)
-{
-       unsigned char *d = reinterpret_cast<unsigned char *>(dest);
-
-       for (; count != 0; count--) {
-               *d++ = static_cast<unsigned char>(val);
-       }
-
-       return dest;
-}


Other related posts:

  • » [haiku-commits] BRANCH xyzzy-github.x86_64 - src/system/kernel/lib/arch/x86_64 - xyzzy-github . x86_64