[haiku-commits] Re: BRANCH pdziepak-github.memcpy [ae2225b] in src/system: kernel/lib/arch/x86_64 kernel/arch/x86 libroot/posix/string/arch/x86_64 kernel/arch/x86/32 kernel/lib/arch/x86

  • From: Paweł Dziepak <pdziepak@xxxxxxxxxxx>
  • To: haiku-commits@xxxxxxxxxxxxx
  • Date: Sat, 6 Sep 2014 19:26:28 +0200

2014-09-06 17:01 GMT+02:00 pdziepak-github.memcpy <community@xxxxxxxxxxxx>:
<snip, lengthy introduction>

> Commit:      238ce16756c9be013262c224da70d8758ff4162a
> Author:      Paweł Dziepak <pdziepak@xxxxxxxxxxx>
> Date:        Sat Sep  6 14:25:00 2014 UTC
>
> kernel/x86[_64]: remove get_optimized_functions from cpu modules
>
> The possibility to specify custom memcpy and memset implementations
> in cpu modules is currently unused and there is generally no point
> in such feature.
>
> There are only 2 x86 vendors that really matter and there isn't
> very big difference in performance of the generic optmized versions
> of these funcions across different models. Even if we wanted different
> versions of memset and memcpy depending on the processor model or
> features much better solution would be to use STT_GNU_IFUNC and save
> one indirect call.
>
> Long story short, we don't really benefit in any way from
> get_optimized_functions and the feature it implements and it only adds
> unnecessary complexity to the code.
>
> Signed-off-by: Paweł Dziepak <pdziepak@xxxxxxxxxxx>
>
>
> ----------------------------------------------------------------------------
>
> diff --git a/headers/private/kernel/arch/x86/arch_cpu.h
> b/headers/private/kernel/arch/x86/arch_cpu.h
> index 88c5eb8..800f489 100644
> --- a/headers/private/kernel/arch/x86/arch_cpu.h
> +++ b/headers/private/kernel/arch/x86/arch_cpu.h
> @@ -262,13 +262,6 @@ typedef struct x86_mtrr_info {
>         uint8   type;
>  } x86_mtrr_info;
>
> -typedef struct x86_optimized_functions {
> -       void    (*memcpy)(void* dest, const void* source, size_t count);
> -       void*   memcpy_end;
> -       void    (*memset)(void* dest, int value, size_t count);
> -       void*   memset_end;
> -} x86_optimized_functions;
> -
>  typedef struct x86_cpu_module_info {
>         module_info     info;
>         uint32          (*count_mtrrs)(void);
> @@ -280,8 +273,6 @@ typedef struct x86_cpu_module_info {
>                                         uint8* _type);
>         void            (*set_mtrrs)(uint8 defaultType, const
> x86_mtrr_info* infos,
>                                         uint32 count);
> -
> -       void
> (*get_optimized_functions)(x86_optimized_functions* functions);
>  } x86_cpu_module_info;
>
>  // features
> diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp
> b/src/system/kernel/arch/x86/arch_cpu.cpp
> index 93d58f8..085cdc6 100644
> --- a/src/system/kernel/arch/x86/arch_cpu.cpp
> +++ b/src/system/kernel/arch/x86/arch_cpu.cpp
> @@ -105,17 +105,8 @@ static const size_t kDoubleFaultStackSize = 4096;  //
> size per CPU
>  static x86_cpu_module_info* sCpuModule;
>
>
> -extern "C" void memcpy_generic(void* dest, const void* source, size_t
> count);
> -extern int memcpy_generic_end;
> -extern "C" void memset_generic(void* dest, int value, size_t count);
> -extern int memset_generic_end;
> -
> -x86_optimized_functions gOptimizedFunctions = {
> -       memcpy_generic,
> -       &memcpy_generic_end,
> -       memset_generic,
> -       &memset_generic_end
> -};
> +extern int memcpy_end;
> +extern int memset_end;
>
>  /* CPU topology information */
>  static uint32 (*sGetCPUTopologyID)(int currentCPU);
> @@ -1163,33 +1154,13 @@ arch_cpu_init_post_modules(kernel_args* args)
>                 call_all_cpus(&init_mtrrs, NULL);
>         }
>
> -       // get optimized functions from the CPU module
> -       if (sCpuModule != NULL && sCpuModule->get_optimized_functions !=
> NULL) {
> -               x86_optimized_functions functions;
> -               memset(&functions, 0, sizeof(functions));
> -
> -               sCpuModule->get_optimized_functions(&functions);
> -
> -               if (functions.memcpy != NULL) {
> -                       gOptimizedFunctions.memcpy = functions.memcpy;
> -                       gOptimizedFunctions.memcpy_end =
> functions.memcpy_end;
> -               }
> -
> -               if (functions.memset != NULL) {
> -                       gOptimizedFunctions.memset = functions.memset;
> -                       gOptimizedFunctions.memset_end =
> functions.memset_end;
> -               }
> -       }
> -
>         // put the optimized functions into the commpage
> -       size_t memcpyLen = (addr_t)gOptimizedFunctions.memcpy_end
> -               - (addr_t)gOptimizedFunctions.memcpy;
> +       size_t memcpyLen = (addr_t)memcpy_end - (addr_t)memcpy;
>         addr_t memcpyPosition =
> fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMCPY,
> -               (const void*)gOptimizedFunctions.memcpy, memcpyLen);
> -       size_t memsetLen = (addr_t)gOptimizedFunctions.memset_end
> -               - (addr_t)gOptimizedFunctions.memset;
> +               (const void*)memcpy, memcpyLen);
> +       size_t memsetLen = (addr_t)memset_end - (addr_t)memset;
>         addr_t memsetPosition =
> fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMSET,
> -               (const void*)gOptimizedFunctions.memset, memsetLen);
> +               (const void*)memset, memsetLen);
>

Yep, I it should be &memcpy_end and &memset_end instead of memcpy_end and
memset_end. I will fix that when I respin the whole patchset (hopefully)
before the merge.


>         size_t threadExitLen = (addr_t)x86_end_userspace_thread_exit
>                 - (addr_t)x86_userspace_thread_exit;
>         addr_t threadExitPosition = fill_commpage_entry(
> diff --git a/src/system/kernel/arch/x86/asm_offsets.cpp
> b/src/system/kernel/arch/x86/asm_offsets.cpp
> index 787fef1..db89298 100644
> --- a/src/system/kernel/arch/x86/asm_offsets.cpp
> +++ b/src/system/kernel/arch/x86/asm_offsets.cpp
> @@ -79,12 +79,6 @@ dummy()
>         DEFINE_OFFSET_MACRO(SYSCALL_INFO, syscall_info, function);
>         DEFINE_OFFSET_MACRO(SYSCALL_INFO, syscall_info, parameter_size);
>
> -       // struct x86_optimized_functions
> -       DEFINE_OFFSET_MACRO(X86_OPTIMIZED_FUNCTIONS,
> x86_optimized_functions,
> -               memcpy);
> -       DEFINE_OFFSET_MACRO(X86_OPTIMIZED_FUNCTIONS,
> x86_optimized_functions,
> -               memset);
> -
>         // struct signal_frame_data
>         DEFINE_SIZEOF_MACRO(SIGNAL_FRAME_DATA, signal_frame_data);
>         DEFINE_OFFSET_MACRO(SIGNAL_FRAME_DATA, signal_frame_data, info);
> diff --git a/src/system/kernel/lib/arch/x86/arch_string.S
> b/src/system/kernel/lib/arch/x86/arch_string.S
> index 6263062..7a3480b 100644
> --- a/src/system/kernel/lib/arch/x86/arch_string.S
> +++ b/src/system/kernel/lib/arch/x86/arch_string.S
> @@ -6,22 +6,12 @@
>   * Distributed under the terms of the NewOS License.
>  */
>
> -#if !_BOOT_MODE
> -#      include "asm_offsets.h"
> -#endif
>
>  #include <asm_defs.h>
>
>
> -// We don't need the indirection in the boot loader.
> -#if _BOOT_MODE
> -#      define memcpy_generic   memcpy
> -#      define memset_generic   memset
> -#endif
> -
> -
>  .align 4
> -FUNCTION(memcpy_generic):
> +FUNCTION(memcpy):
>         pushl   %esi
>         pushl   %edi
>         movl    12(%esp),%edi   /* dest */
> @@ -45,13 +35,13 @@ FUNCTION(memcpy_generic):
>         popl    %edi
>         popl    %esi
>         ret
> -FUNCTION_END(memcpy_generic)
> -SYMBOL(memcpy_generic_end):
> +FUNCTION_END(memcpy)
> +SYMBOL(memcpy_end):
>
>
>  /* void *memset(void *dest, int value, size_t length); */
>  .align 4
> -FUNCTION(memset_generic):
> +FUNCTION(memset):
>         push    %ebp
>         mov             %esp, %ebp
>
> @@ -111,19 +101,6 @@ FUNCTION(memset_generic):
>         mov             %ebp, %esp
>         pop             %ebp
>         ret
> -FUNCTION_END(memset_generic)
> -SYMBOL(memset_generic_end):
> -
> -
> -#if !_BOOT_MODE
> -
> -.align 4
> -FUNCTION(memcpy):
> -       jmp             *(gOptimizedFunctions +
> X86_OPTIMIZED_FUNCTIONS_memcpy)
> -FUNCTION_END(memcpy)
> -
> -FUNCTION(memset):
> -       jmp             *(gOptimizedFunctions +
> X86_OPTIMIZED_FUNCTIONS_memset)
>  FUNCTION_END(memset)
> +SYMBOL(memset_end):
>
> -#endif // !_BOOT_MODE
> diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.S
> b/src/system/kernel/lib/arch/x86_64/arch_string.S
> index a24bbc8..f0849e8 100644
> --- a/src/system/kernel/lib/arch/x86_64/arch_string.S
> +++ b/src/system/kernel/lib/arch/x86_64/arch_string.S
> @@ -6,11 +6,9 @@
>
>  #include <asm_defs.h>
>
> -#include "asm_offsets.h"
> -
>
>  .align 8
> -FUNCTION(memcpy_generic):
> +FUNCTION(memcpy):
>         push    %rbp
>         movq    %rsp, %rbp
>
> @@ -59,12 +57,12 @@ FUNCTION(memcpy_generic):
>
>         pop             %rbp
>         ret
> -FUNCTION_END(memcpy_generic)
> -SYMBOL(memcpy_generic_end):
> +FUNCTION_END(memcpy)
> +SYMBOL(memcpy_end):
>
>
>  .align 8
> -FUNCTION(memset_generic):
> +FUNCTION(memset):
>         push    %rbp
>         movq    %rsp, %rbp
>
> @@ -82,15 +80,6 @@ FUNCTION(memset_generic):
>         movq    %r8, %rax
>         pop             %rbp
>         ret
> -FUNCTION_END(memset_generic)
> -SYMBOL(memset_generic_end):
> -
> -
> -FUNCTION(memcpy):
> -       jmp             *(gOptimizedFunctions +
> X86_OPTIMIZED_FUNCTIONS_memcpy)
> -FUNCTION_END(memcpy)
> -
> -FUNCTION(memset):
> -       jmp             *(gOptimizedFunctions +
> X86_OPTIMIZED_FUNCTIONS_memset)
>  FUNCTION_END(memset)
> +SYMBOL(memset_end):
>
>
> <snip, another patch>

Other related posts: