added 1 changeset to branch 'refs/remotes/xyzzy-github/x86_64' old head: 6497f6b1ec4dd21d85ec01a18098138b03986a98 new head: 5234e66d32184c0843e7c5020c23e28f88e50569 ---------------------------------------------------------------------------- 5234e66: Optimized memcpy/memset for x86_64. [ Alex Smith <alex@xxxxxxxxxxxxxxxx> ] ---------------------------------------------------------------------------- Commit: 5234e66d32184c0843e7c5020c23e28f88e50569 Author: Alex Smith <alex@xxxxxxxxxxxxxxxx> Date: Sat Jul 21 10:55:13 2012 UTC ---------------------------------------------------------------------------- 4 files changed, 106 insertions(+), 40 deletions(-) src/system/kernel/arch/x86/arch_cpu.cpp | 3 - src/system/kernel/lib/arch/x86_64/Jamfile | 11 +- src/system/kernel/lib/arch/x86_64/arch_string.S | 96 +++++++++++++++++ src/system/kernel/lib/arch/x86_64/arch_string.cpp | 36 ------- ---------------------------------------------------------------------------- diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp b/src/system/kernel/arch/x86/arch_cpu.cpp index 4fc23e4..bcc9050 100644 --- a/src/system/kernel/arch/x86/arch_cpu.cpp +++ b/src/system/kernel/arch/x86/arch_cpu.cpp @@ -113,15 +113,12 @@ extern int memcpy_generic_end; extern "C" void memset_generic(void* dest, int value, size_t count); extern int memset_generic_end; -// TODO x86_64 -#ifndef __x86_64__ x86_optimized_functions gOptimizedFunctions = { memcpy_generic, &memcpy_generic_end, memset_generic, &memset_generic_end }; -#endif static status_t diff --git a/src/system/kernel/lib/arch/x86_64/Jamfile b/src/system/kernel/lib/arch/x86_64/Jamfile index 2e43cfd..66f5fb8 100644 --- a/src/system/kernel/lib/arch/x86_64/Jamfile +++ b/src/system/kernel/lib/arch/x86_64/Jamfile @@ -1,5 +1,9 @@ SubDir HAIKU_TOP src system kernel lib arch x86_64 ; +# find the generated asm_offsets.h +SubDirHdrs [ FDirName $(TARGET_COMMON_DEBUG_OBJECT_DIR) system kernel arch + $(TARGET_KERNEL_ARCH) ] ; + SEARCH_SOURCE += [ FDirName $(SUBDIR) $(DOTDOT) generic ] ; local librootSources = [ FDirName $(HAIKU_TOP) src system libroot ] ; @@ -24,7 +28,12 @@ KernelMergeObject kernel_lib_posix_arch_$(TARGET_ARCH).o : kernel_longjmp_return.c kernel_setjmp_save_sigs.c - arch_string.cpp + arch_string.S : $(TARGET_KERNEL_PIC_CCFLAGS) ; + +# Explicitly tell the build system that arch_string.S includes the generated +# asm_offsets.h. +Includes [ FGristFiles arch_string.S ] + : <src!system!kernel!arch!x86>asm_offsets.h ; diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.S b/src/system/kernel/lib/arch/x86_64/arch_string.S new file mode 100644 index 0000000..a24bbc8 --- /dev/null +++ b/src/system/kernel/lib/arch/x86_64/arch_string.S @@ -0,0 +1,96 @@ +/* + * Copyright 2012, Alex Smith, alex@xxxxxxxxxxxxxxxxx + * Distributed under the terms of the MIT License. + */ + + +#include <asm_defs.h> + +#include "asm_offsets.h" + + +.align 8 +FUNCTION(memcpy_generic): + push %rbp + movq %rsp, %rbp + + // Preserve original destination address for return value. + movq %rdi, %rax + + // size -> %rcx + movq %rdx, %rcx + + // For small copies, always do it bytewise, the additional overhead is + // not worth it. + cmp $24, %rcx + jl .Lmemcpy_generic_byte_copy + + // Do both source and dest have the same alignment? + movq %rsi, %r8 + xorq %rdi, %r8 + test $7, %r8 + jnz .Lmemcpy_generic_byte_copy + + // Align up to an 8-byte boundary. + movq %rdi, %r8 + andq $7, %r8 + jz .Lmemcpy_generic_qword_copy + movq $8, %rcx + subq %r8, %rcx + subq %rcx, %rdx // Subtract from the overall count. + rep + movsb + + // Get back the original count value. + movq %rdx, %rcx +.Lmemcpy_generic_qword_copy: + // Move by quadwords. + shrq $3, %rcx + rep + movsq + + // Get the remaining count. + movq %rdx, %rcx + andq $7, %rcx +.Lmemcpy_generic_byte_copy: + // Move any remaining data by bytes. + rep + movsb + + pop %rbp + ret +FUNCTION_END(memcpy_generic) +SYMBOL(memcpy_generic_end): + + +.align 8 +FUNCTION(memset_generic): + push %rbp + movq %rsp, %rbp + + // Preserve original destination address for return value. + movq %rdi, %r8 + + // size -> %rcx, value -> %al + movq %rdx, %rcx + movl %esi, %eax + + // Move by bytes. + rep + stosb + + movq %r8, %rax + pop %rbp + ret +FUNCTION_END(memset_generic) +SYMBOL(memset_generic_end): + + +FUNCTION(memcpy): + jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memcpy) +FUNCTION_END(memcpy) + +FUNCTION(memset): + jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memset) +FUNCTION_END(memset) + diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.cpp b/src/system/kernel/lib/arch/x86_64/arch_string.cpp deleted file mode 100644 index a3d6c8b..0000000 --- a/src/system/kernel/lib/arch/x86_64/arch_string.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2012, Alex Smith, alex@xxxxxxxxxxxxxxxxx - * Distributed under the terms of the MIT License. -*/ - -// TODO: Replace these with optimized implementations. - - -#include <string.h> - - -void * -memcpy(void *dest, const void *src, size_t count) -{ - const unsigned char *s = reinterpret_cast<const unsigned char *>(src); - unsigned char *d = reinterpret_cast<unsigned char *>(dest); - - for (; count != 0; count--) { - *d++ = *s++; - } - - return dest; -} - - -void * -memset(void *dest, int val, size_t count) -{ - unsigned char *d = reinterpret_cast<unsigned char *>(dest); - - for (; count != 0; count--) { - *d++ = static_cast<unsigned char>(val); - } - - return dest; -}