2014-09-06 17:01 GMT+02:00 pdziepak-github.memcpy <community@xxxxxxxxxxxx>: <snip, lengthy introduction> > Commit: 238ce16756c9be013262c224da70d8758ff4162a > Author: Paweł Dziepak <pdziepak@xxxxxxxxxxx> > Date: Sat Sep 6 14:25:00 2014 UTC > > kernel/x86[_64]: remove get_optimized_functions from cpu modules > > The possibility to specify custom memcpy and memset implementations > in cpu modules is currently unused and there is generally no point > in such feature. > > There are only 2 x86 vendors that really matter and there isn't > very big difference in performance of the generic optmized versions > of these funcions across different models. Even if we wanted different > versions of memset and memcpy depending on the processor model or > features much better solution would be to use STT_GNU_IFUNC and save > one indirect call. > > Long story short, we don't really benefit in any way from > get_optimized_functions and the feature it implements and it only adds > unnecessary complexity to the code. > > Signed-off-by: Paweł Dziepak <pdziepak@xxxxxxxxxxx> > > > ---------------------------------------------------------------------------- > > diff --git a/headers/private/kernel/arch/x86/arch_cpu.h > b/headers/private/kernel/arch/x86/arch_cpu.h > index 88c5eb8..800f489 100644 > --- a/headers/private/kernel/arch/x86/arch_cpu.h > +++ b/headers/private/kernel/arch/x86/arch_cpu.h > @@ -262,13 +262,6 @@ typedef struct x86_mtrr_info { > uint8 type; > } x86_mtrr_info; > > -typedef struct x86_optimized_functions { > - void (*memcpy)(void* dest, const void* source, size_t count); > - void* memcpy_end; > - void (*memset)(void* dest, int value, size_t count); > - void* memset_end; > -} x86_optimized_functions; > - > typedef struct x86_cpu_module_info { > module_info info; > uint32 (*count_mtrrs)(void); > @@ -280,8 +273,6 @@ typedef struct x86_cpu_module_info { > uint8* _type); > void (*set_mtrrs)(uint8 defaultType, const > x86_mtrr_info* infos, > uint32 count); > - > - void > (*get_optimized_functions)(x86_optimized_functions* functions); > } x86_cpu_module_info; > > // features > diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp > b/src/system/kernel/arch/x86/arch_cpu.cpp > index 93d58f8..085cdc6 100644 > --- a/src/system/kernel/arch/x86/arch_cpu.cpp > +++ b/src/system/kernel/arch/x86/arch_cpu.cpp > @@ -105,17 +105,8 @@ static const size_t kDoubleFaultStackSize = 4096; // > size per CPU > static x86_cpu_module_info* sCpuModule; > > > -extern "C" void memcpy_generic(void* dest, const void* source, size_t > count); > -extern int memcpy_generic_end; > -extern "C" void memset_generic(void* dest, int value, size_t count); > -extern int memset_generic_end; > - > -x86_optimized_functions gOptimizedFunctions = { > - memcpy_generic, > - &memcpy_generic_end, > - memset_generic, > - &memset_generic_end > -}; > +extern int memcpy_end; > +extern int memset_end; > > /* CPU topology information */ > static uint32 (*sGetCPUTopologyID)(int currentCPU); > @@ -1163,33 +1154,13 @@ arch_cpu_init_post_modules(kernel_args* args) > call_all_cpus(&init_mtrrs, NULL); > } > > - // get optimized functions from the CPU module > - if (sCpuModule != NULL && sCpuModule->get_optimized_functions != > NULL) { > - x86_optimized_functions functions; > - memset(&functions, 0, sizeof(functions)); > - > - sCpuModule->get_optimized_functions(&functions); > - > - if (functions.memcpy != NULL) { > - gOptimizedFunctions.memcpy = functions.memcpy; > - gOptimizedFunctions.memcpy_end = > functions.memcpy_end; > - } > - > - if (functions.memset != NULL) { > - gOptimizedFunctions.memset = functions.memset; > - gOptimizedFunctions.memset_end = > functions.memset_end; > - } > - } > - > // put the optimized functions into the commpage > - size_t memcpyLen = (addr_t)gOptimizedFunctions.memcpy_end > - - (addr_t)gOptimizedFunctions.memcpy; > + size_t memcpyLen = (addr_t)memcpy_end - (addr_t)memcpy; > addr_t memcpyPosition = > fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMCPY, > - (const void*)gOptimizedFunctions.memcpy, memcpyLen); > - size_t memsetLen = (addr_t)gOptimizedFunctions.memset_end > - - (addr_t)gOptimizedFunctions.memset; > + (const void*)memcpy, memcpyLen); > + size_t memsetLen = (addr_t)memset_end - (addr_t)memset; > addr_t memsetPosition = > fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMSET, > - (const void*)gOptimizedFunctions.memset, memsetLen); > + (const void*)memset, memsetLen); > Yep, I it should be &memcpy_end and &memset_end instead of memcpy_end and memset_end. I will fix that when I respin the whole patchset (hopefully) before the merge. > size_t threadExitLen = (addr_t)x86_end_userspace_thread_exit > - (addr_t)x86_userspace_thread_exit; > addr_t threadExitPosition = fill_commpage_entry( > diff --git a/src/system/kernel/arch/x86/asm_offsets.cpp > b/src/system/kernel/arch/x86/asm_offsets.cpp > index 787fef1..db89298 100644 > --- a/src/system/kernel/arch/x86/asm_offsets.cpp > +++ b/src/system/kernel/arch/x86/asm_offsets.cpp > @@ -79,12 +79,6 @@ dummy() > DEFINE_OFFSET_MACRO(SYSCALL_INFO, syscall_info, function); > DEFINE_OFFSET_MACRO(SYSCALL_INFO, syscall_info, parameter_size); > > - // struct x86_optimized_functions > - DEFINE_OFFSET_MACRO(X86_OPTIMIZED_FUNCTIONS, > x86_optimized_functions, > - memcpy); > - DEFINE_OFFSET_MACRO(X86_OPTIMIZED_FUNCTIONS, > x86_optimized_functions, > - memset); > - > // struct signal_frame_data > DEFINE_SIZEOF_MACRO(SIGNAL_FRAME_DATA, signal_frame_data); > DEFINE_OFFSET_MACRO(SIGNAL_FRAME_DATA, signal_frame_data, info); > diff --git a/src/system/kernel/lib/arch/x86/arch_string.S > b/src/system/kernel/lib/arch/x86/arch_string.S > index 6263062..7a3480b 100644 > --- a/src/system/kernel/lib/arch/x86/arch_string.S > +++ b/src/system/kernel/lib/arch/x86/arch_string.S > @@ -6,22 +6,12 @@ > * Distributed under the terms of the NewOS License. > */ > > -#if !_BOOT_MODE > -# include "asm_offsets.h" > -#endif > > #include <asm_defs.h> > > > -// We don't need the indirection in the boot loader. > -#if _BOOT_MODE > -# define memcpy_generic memcpy > -# define memset_generic memset > -#endif > - > - > .align 4 > -FUNCTION(memcpy_generic): > +FUNCTION(memcpy): > pushl %esi > pushl %edi > movl 12(%esp),%edi /* dest */ > @@ -45,13 +35,13 @@ FUNCTION(memcpy_generic): > popl %edi > popl %esi > ret > -FUNCTION_END(memcpy_generic) > -SYMBOL(memcpy_generic_end): > +FUNCTION_END(memcpy) > +SYMBOL(memcpy_end): > > > /* void *memset(void *dest, int value, size_t length); */ > .align 4 > -FUNCTION(memset_generic): > +FUNCTION(memset): > push %ebp > mov %esp, %ebp > > @@ -111,19 +101,6 @@ FUNCTION(memset_generic): > mov %ebp, %esp > pop %ebp > ret > -FUNCTION_END(memset_generic) > -SYMBOL(memset_generic_end): > - > - > -#if !_BOOT_MODE > - > -.align 4 > -FUNCTION(memcpy): > - jmp *(gOptimizedFunctions + > X86_OPTIMIZED_FUNCTIONS_memcpy) > -FUNCTION_END(memcpy) > - > -FUNCTION(memset): > - jmp *(gOptimizedFunctions + > X86_OPTIMIZED_FUNCTIONS_memset) > FUNCTION_END(memset) > +SYMBOL(memset_end): > > -#endif // !_BOOT_MODE > diff --git a/src/system/kernel/lib/arch/x86_64/arch_string.S > b/src/system/kernel/lib/arch/x86_64/arch_string.S > index a24bbc8..f0849e8 100644 > --- a/src/system/kernel/lib/arch/x86_64/arch_string.S > +++ b/src/system/kernel/lib/arch/x86_64/arch_string.S > @@ -6,11 +6,9 @@ > > #include <asm_defs.h> > > -#include "asm_offsets.h" > - > > .align 8 > -FUNCTION(memcpy_generic): > +FUNCTION(memcpy): > push %rbp > movq %rsp, %rbp > > @@ -59,12 +57,12 @@ FUNCTION(memcpy_generic): > > pop %rbp > ret > -FUNCTION_END(memcpy_generic) > -SYMBOL(memcpy_generic_end): > +FUNCTION_END(memcpy) > +SYMBOL(memcpy_end): > > > .align 8 > -FUNCTION(memset_generic): > +FUNCTION(memset): > push %rbp > movq %rsp, %rbp > > @@ -82,15 +80,6 @@ FUNCTION(memset_generic): > movq %r8, %rax > pop %rbp > ret > -FUNCTION_END(memset_generic) > -SYMBOL(memset_generic_end): > - > - > -FUNCTION(memcpy): > - jmp *(gOptimizedFunctions + > X86_OPTIMIZED_FUNCTIONS_memcpy) > -FUNCTION_END(memcpy) > - > -FUNCTION(memset): > - jmp *(gOptimizedFunctions + > X86_OPTIMIZED_FUNCTIONS_memset) > FUNCTION_END(memset) > +SYMBOL(memset_end): > > > <snip, another patch>