Author: mmlr Date: 2009-10-22 10:57:34 +0200 (Thu, 22 Oct 2009) New Revision: 33721 Changeset: http://dev.haiku-os.org/changeset/33721/haiku Modified: haiku/trunk/headers/private/kernel/heap.h haiku/trunk/src/system/kernel/device_manager/IORequest.cpp haiku/trunk/src/system/kernel/heap.cpp Log: * Implement per-CPU heaps. They only get enabled in case there's enough memory. * Allow an allocator to be created on the heap to allow for non-locked allocators to be created. * Some cleanup. Modified: haiku/trunk/headers/private/kernel/heap.h =================================================================== --- haiku/trunk/headers/private/kernel/heap.h 2009-10-22 08:30:06 UTC (rev 33720) +++ haiku/trunk/headers/private/kernel/heap.h 2009-10-22 08:57:34 UTC (rev 33721) @@ -55,7 +55,7 @@ void malloc_referenced_release(void* data); heap_allocator* heap_create_allocator(const char* name, addr_t base, - size_t size, const heap_class* heapClass); + size_t size, const heap_class* heapClass, bool allocateOnHeap); void* heap_memalign(heap_allocator* heap, size_t alignment, size_t size); status_t heap_free(heap_allocator* heap, void* address); Modified: haiku/trunk/src/system/kernel/device_manager/IORequest.cpp =================================================================== --- haiku/trunk/src/system/kernel/device_manager/IORequest.cpp 2009-10-22 08:30:06 UTC (rev 33720) +++ haiku/trunk/src/system/kernel/device_manager/IORequest.cpp 2009-10-22 08:57:34 UTC (rev 33721) @@ -1395,7 +1395,7 @@ } sVIPHeap = heap_create_allocator("VIP I/O heap", (addr_t)address, - VIP_HEAP_SIZE, &heapClass); + VIP_HEAP_SIZE, &heapClass, false); if (sVIPHeap == NULL) { panic("vip_io_request_allocator_init(): failed to create VIP I/O " "heap\n"); Modified: haiku/trunk/src/system/kernel/heap.cpp =================================================================== --- haiku/trunk/src/system/kernel/heap.cpp 2009-10-22 08:30:06 UTC (rev 33720) +++ haiku/trunk/src/system/kernel/heap.cpp 2009-10-22 08:57:34 UTC (rev 33721) @@ -24,6 +24,7 @@ #include <tracing.h> #include <util/AutoLock.h> #include <vm.h> +#include <vm_page.h> //#define TRACE_HEAP @@ -53,8 +54,10 @@ static int32 sCallerInfoCount = 0; #endif + typedef struct heap_page_s heap_page; + typedef struct heap_area_s { area_id area; @@ -72,6 +75,9 @@ heap_area_s * all_next; } heap_area; + +#define MAX_BIN_COUNT 31 // depends on the size of the bin_index field + typedef struct heap_page_s { heap_area * area; uint16 index; @@ -87,6 +93,7 @@ addr_t * free_list; } heap_page; + typedef struct heap_bin_s { mutex lock; uint32 element_size; @@ -94,6 +101,7 @@ heap_page * page_list; // sorted so that the desired page is always first } heap_bin; + struct heap_allocator_s { rw_lock area_lock; mutex page_lock; @@ -115,6 +123,7 @@ heap_area * all_areas; // all areas including full ones }; + static const uint32 kAreaAllocationMagic = 'AAMG'; typedef struct area_allocation_info_s { area_id area; @@ -126,12 +135,15 @@ void * allocation_base; } area_allocation_info; + struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> { }; + typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList; typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList; + // Heap class configuration #define HEAP_CLASS_COUNT 3 static heap_class sHeapClasses[HEAP_CLASS_COUNT] = { @@ -167,15 +179,17 @@ } }; -static heap_allocator *sHeaps[HEAP_CLASS_COUNT]; -static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT]; -static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT]; + +static uint32 sHeapCount; +static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT]; +static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT]; +static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT]; + static heap_allocator *sGrowHeap = NULL; static thread_id sHeapGrowThread = -1; static sem_id sHeapGrowSem = -1; static sem_id sHeapGrownNotify = -1; static bool sAddGrowHeap = false; - static DeferredFreeList sDeferredFreeList; static DeferredDeletableList sDeferredDeletableList; static spinlock sDeferredFreeListLock; @@ -355,7 +369,7 @@ kprintf("dedicated grow heap:\n"); dump_allocator(sGrowHeap, true, true); } else if (strcmp(argv[1], "stats") == 0) { - for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) + for (uint32 i = 0; i < sHeapCount; i++) dump_allocator(sHeaps[i], false, false); } else if (evaluate_debug_expression(argv[1], &heapAddress, true)) { dump_allocator((heap_allocator*)(addr_t)heapAddress, true, true); @@ -365,7 +379,7 @@ return 0; } - for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) + for (uint32 i = 0; i < sHeapCount; i++) dump_allocator(sHeaps[i], true, true); return 0; @@ -390,8 +404,8 @@ size_t totalSize = 0; uint32 totalCount = 0; - for (uint32 classIndex = 0; classIndex < HEAP_CLASS_COUNT; classIndex++) { - heap_allocator *heap = sHeaps[classIndex]; + for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) { + heap_allocator *heap = sHeaps[heapIndex]; if (heapAddress != 0) heap = (heap_allocator *)(addr_t)heapAddress; @@ -495,8 +509,8 @@ size_t totalSize = 0; uint32 totalCount = 0; - for (uint32 classIndex = 0; classIndex < HEAP_CLASS_COUNT; classIndex++) { - heap_allocator *heap = sHeaps[classIndex]; + for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) { + heap_allocator *heap = sHeaps[heapIndex]; // go through all the pages in all the areas heap_area *area = heap->all_areas; @@ -738,9 +752,8 @@ if (!analyze_allocation_callers(heap)) return 0; } else { - for (uint32 classIndex = 0; classIndex < HEAP_CLASS_COUNT; - classIndex++) { - if (!analyze_allocation_callers(sHeaps[classIndex])) + for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) { + if (!analyze_allocation_callers(sHeaps[heapIndex])) return 0; } } @@ -1090,11 +1103,19 @@ heap_allocator * heap_create_allocator(const char *name, addr_t base, size_t size, - const heap_class *heapClass) + const heap_class *heapClass, bool allocateOnHeap) { - heap_allocator *heap = (heap_allocator *)base; - base += sizeof(heap_allocator); - size -= sizeof(heap_allocator); + heap_allocator *heap; + if (allocateOnHeap) { + // allocate seperately on the heap + heap = (heap_allocator *)malloc(sizeof(heap_allocator) + + sizeof(heap_bin) * MAX_BIN_COUNT); + } else { + // use up the first part of the area + heap = (heap_allocator *)base; + base += sizeof(heap_allocator); + size -= sizeof(heap_allocator); + } heap->name = name; heap->page_size = heapClass->page_size; @@ -1123,7 +1144,7 @@ bin->page_list = NULL; heap->bin_count++; - if (heap->bin_count >= 32) + if (heap->bin_count > MAX_BIN_COUNT) panic("heap configuration invalid - max bin count reached\n"); }; @@ -1751,7 +1772,7 @@ inline uint32 -heap_class_for(size_t size) +heap_index_for(size_t size, int32 cpu) { #if KERNEL_HEAP_LEAK_CHECK // take the extra info size into account @@ -1761,10 +1782,10 @@ uint32 index = 0; for (; index < HEAP_CLASS_COUNT - 1; index++) { if (size <= sHeapClasses[index].max_allocation_size) - return index; + break; } - return index; + return index + cpu * HEAP_CLASS_COUNT; } @@ -1834,7 +1855,7 @@ dprintf("heap_grower: failed to create new grow heap area\n"); } - for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) { + for (uint32 i = 0; i < sHeapCount; i++) { heap_allocator *heap = sHeaps[i]; if (sLastGrowRequest[i] > sLastHandledGrowRequest[i] || heap_should_grow(heap)) { @@ -1863,9 +1884,10 @@ for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) { size_t partSize = size * sHeapClasses[i].initial_percentage / 100; sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize, - &sHeapClasses[i]); + &sHeapClasses[i], false); sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0; base += partSize; + sHeapCount++; } // set up some debug commands @@ -1944,14 +1966,14 @@ area_id growHeapArea = create_area("dedicated grow heap", &address, B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); - if (growHeapArea < B_OK) { + if (growHeapArea < 0) { panic("heap_init_post_thread(): couldn't allocate dedicate grow heap " "area"); return growHeapArea; } sGrowHeap = heap_create_allocator("grow", (addr_t)address, - HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0]); + HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false); if (sGrowHeap == NULL) { panic("heap_init_post_thread(): failed to create dedicated grow heap\n"); return B_ERROR; @@ -1964,6 +1986,30 @@ return sHeapGrowThread; } + // create per-cpu heaps if there's enough memory + int32 heapCount = MIN(smp_get_num_cpus(), + (int32)vm_page_num_pages() / 60 / 1024); + for (int32 i = 1; i < heapCount; i++) { + addr_t base = 0; + size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT; + area_id perCPUHeapArea = create_area("per cpu inital heap", + (void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK, + B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); + if (perCPUHeapArea < 0) + break; + + for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) { + int32 heapIndex = i * HEAP_CLASS_COUNT + j; + size_t partSize = size * sHeapClasses[j].initial_percentage / 100; + sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name, + base, partSize, &sHeapClasses[j], false); + sLastGrowRequest[heapIndex] = 0; + sLastHandledGrowRequest[heapIndex] = 0; + base += partSize; + sHeapCount++; + } + } + // run the deferred deleter roughly once a second if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK) panic("heap_init_post_thread(): failed to init deferred deleter"); @@ -2028,20 +2074,33 @@ return address; } - uint32 heapClass = heap_class_for(size); - heap_allocator *heap = sHeaps[heapClass]; - void *result = heap_memalign(heap, alignment, size); + void *result = NULL; + bool shouldGrow = false; + int32 cpuCount = MIN(smp_get_num_cpus(), + (int32)sHeapCount / HEAP_CLASS_COUNT); + int32 cpuNumber = smp_get_current_cpu(); + for (int32 i = 0; i < cpuCount; i++) { + uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount); + heap_allocator *heap = sHeaps[heapIndex]; + result = heap_memalign(heap, alignment, size); + if (result != NULL) { + shouldGrow = heap_should_grow(heap); + break; + } + } + if (result == NULL) { // request an urgent grow and wait - we don't do it ourselfs here to // serialize growing through the grow thread, as otherwise multiple // threads hitting this situation (likely when memory ran out) would // all add areas - sLastGrowRequest[heapClass]++; + uint32 heapIndex = heap_index_for(size, smp_get_current_cpu()); + sLastGrowRequest[heapIndex]++; switch_sem(sHeapGrowSem, sHeapGrownNotify); // and then try again - result = heap_memalign(heap, alignment, size); - } else if (heap_should_grow(heap)) { + result = heap_memalign(sHeaps[heapIndex], alignment, size); + } else if (shouldGrow) { // should grow sometime soon, notify the grower release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE); } @@ -2075,10 +2134,17 @@ } // try public memory, there might be something available - heap_allocator *heap = sHeaps[heap_class_for(size)]; - void *result = heap_memalign(heap, alignment, size); - if (result != NULL) - return result; + void *result = NULL; + int32 cpuCount = MIN(smp_get_num_cpus(), + (int32)sHeapCount / HEAP_CLASS_COUNT); + int32 cpuNumber = smp_get_current_cpu(); + for (int32 i = 0; i < cpuCount; i++) { + uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount); + heap_allocator *heap = sHeaps[heapIndex]; + result = heap_memalign(heap, alignment, size); + if (result != NULL) + return result; + } // no memory available if (thread_get_current_thread_id() == sHeapGrowThread) @@ -2112,8 +2178,9 @@ return; } - for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) { - heap_allocator *heap = sHeaps[i]; + int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT; + for (uint32 i = 0; i < sHeapCount; i++) { + heap_allocator *heap = sHeaps[(i + offset) % sHeapCount]; if (heap_free(heap, address) == B_OK) { #if PARANOID_HEAP_VALIDATION heap_validate_heap(heap); @@ -2164,8 +2231,9 @@ } void *newAddress = NULL; - for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) { - heap_allocator *heap = sHeaps[i]; + int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT; + for (uint32 i = 0; i < sHeapCount; i++) { + heap_allocator *heap = sHeaps[(i + offset) % sHeapCount]; if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) { #if PARANOID_HEAP_VALIDATION heap_validate_heap(heap);