From c773e2ed07b0e10d28ef7bcc3800995117f2583e Mon Sep 17 00:00:00 2001 From: Bananymous Date: Thu, 14 Dec 2023 23:42:25 +0200 Subject: [PATCH] LibC: Optimize malloc even further aoc2023/day12 now runs in 3.5 seconds on my machine. This is way better than the old almost hour. --- libc/malloc.cpp | 142 ++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 77 deletions(-) diff --git a/libc/malloc.cpp b/libc/malloc.cpp index e0da1326..aef0fccd 100644 --- a/libc/malloc.cpp +++ b/libc/malloc.cpp @@ -17,14 +17,19 @@ static consteval size_t log_size_t(size_t value, size_t base) static constexpr size_t s_malloc_pool_size_initial = 4096; static constexpr size_t s_malloc_pool_size_multiplier = 2; static constexpr size_t s_malloc_pool_count = sizeof(size_t) * 8 - log_size_t(s_malloc_pool_size_initial, s_malloc_pool_size_multiplier); -static constexpr size_t s_malloc_default_align = 16; +static constexpr size_t s_malloc_default_align = alignof(max_align_t); +// This is indirectly smallest allowed allocation +static constexpr size_t s_malloc_shrink_threshold = 64; struct malloc_node_t { + // TODO: these two pointers could be put into data region + malloc_node_t* prev_free; + malloc_node_t* next_free; + size_t size; bool allocated; bool last; - size_t size; - uint8_t data[0]; + alignas(s_malloc_default_align) uint8_t data[0]; size_t data_size() const { return size - sizeof(malloc_node_t); } malloc_node_t* next() { return (malloc_node_t*)(data + data_size()); } @@ -35,7 +40,7 @@ struct malloc_pool_t uint8_t* start; size_t size; - malloc_node_t* first_free; + malloc_node_t* free_list; uint8_t* end() { return start + size; } bool contains(malloc_node_t* node) { return start <= (uint8_t*)node && (uint8_t*)node < end(); } @@ -50,7 +55,7 @@ void init_malloc() { s_malloc_pools[i].start = nullptr; s_malloc_pools[i].size = pool_size; - s_malloc_pools[i].first_free = nullptr; + s_malloc_pools[i].free_list = nullptr;; pool_size *= s_malloc_pool_size_multiplier; } } @@ -72,12 +77,31 @@ static bool allocate_pool(size_t pool_index) node->allocated = false; node->size = pool.size; node->last = true; + node->prev_free = nullptr; + node->next_free = nullptr; - pool.first_free = node; + pool.free_list = node; return true; } +static void remove_node_from_pool_free_list(malloc_pool_t& pool, malloc_node_t* node) +{ + if (node == pool.free_list) + { + pool.free_list = pool.free_list->next_free; + if (pool.free_list) + pool.free_list->prev_free = nullptr; + } + else + { + if (node->next_free) + node->next_free->prev_free = node->prev_free; + if (node->prev_free) + node->prev_free->next_free = node->next_free; + } +} + static void* allocate_from_pool(size_t pool_index, size_t size) { assert(size % s_malloc_default_align == 0); @@ -85,39 +109,30 @@ static void* allocate_from_pool(size_t pool_index, size_t size) auto& pool = s_malloc_pools[pool_index]; assert(pool.start != nullptr); - if (!pool.first_free) + if (!pool.free_list) return nullptr; - assert(!pool.first_free->allocated); - for (auto* node = pool.first_free;; node = node->next()) + for (auto* node = pool.free_list; node; node = node->next_free) { - if (node->allocated) + assert(!node->allocated); + + // merge nodes right after current one + while (!node->last && !node->next()->allocated) { - if (node->last) - break; - continue; + auto* next = node->next(); + remove_node_from_pool_free_list(pool, next); + node->last = next->last; + node->size += next->size; } - if (!node->last && !node->next()->allocated) - { - node->last = node->next()->last; - node->size += node->next()->size; - } - if (node->data_size() < size) - { - if (node->last) - break; continue; - } node->allocated = true; - - if (node == pool.first_free) - pool.first_free = nullptr; + remove_node_from_pool_free_list(pool, node); // shrink node if needed - if (node->data_size() - size > sizeof(malloc_node_t)) + if (node->data_size() - size >= sizeof(malloc_node_t) + s_malloc_shrink_threshold) { uint8_t* node_end = (uint8_t*)node->next(); @@ -130,23 +145,12 @@ static void* allocate_from_pool(size_t pool_index, size_t size) node->last = false; - if (!pool.first_free || next < pool.first_free) - pool.first_free = next; - } - - // Find next free node - if (!pool.first_free) - { - for (auto* free_node = node;; free_node = free_node->next()) - { - if (!free_node->allocated) - { - pool.first_free = free_node; - break; - } - if (free_node->last) - break; - } + // insert excess node to free list + if (pool.free_list) + pool.free_list->prev_free = next; + next->next_free = pool.free_list; + next->prev_free = nullptr; + pool.free_list = next; } return node->data; @@ -216,31 +220,7 @@ void* realloc(void* ptr, size_t size) if (oldsize == size) return ptr; - // shrink allocation if needed - if (oldsize > size) - { - if (node->data_size() - size > sizeof(malloc_node_t)) - { - uint8_t* node_end = (uint8_t*)node->next(); - - node->size = sizeof(malloc_node_t) + size; - - auto* next = node->next(); - next->allocated = false; - next->size = node_end - (uint8_t*)next; - next->last = node->last; - - node->last = false; - - auto& pool = pool_from_node(node); - if (!pool.first_free || next < pool.first_free) - pool.first_free = next; - } - - return ptr; - } - - // FIXME: try to expand allocation + // TODO: try to shrink or expand allocation // allocate new pointer void* new_ptr = malloc(size); @@ -262,17 +242,25 @@ void free(void* ptr) auto* node = node_from_data_pointer(ptr); - // mark node as unallocated and try to merge with the next node node->allocated = false; - if (!node->last && !node->next()->allocated) - { - node->last = node->next()->last; - node->size += node->next()->size; - } auto& pool = pool_from_node(node); - if (!pool.first_free || node < pool.first_free) - pool.first_free = node; + + // merge nodes right after freed one + while (!node->last && !node->next()->allocated) + { + auto* next = node->next(); + remove_node_from_pool_free_list(pool, next); + node->last = next->last; + node->size += next->size; + } + + // add node to free list + if (pool.free_list) + pool.free_list->prev_free = node; + node->prev_free = nullptr; + node->next_free = pool.free_list; + pool.free_list = node; } void* calloc(size_t nmemb, size_t size)