LibC: Optimize malloc even further

aoc2023/day12 now runs in 3.5 seconds on my machine. This is way
better than the old almost hour.
This commit is contained in:
Bananymous 2023-12-14 23:42:25 +02:00
parent c4186bd5f0
commit c773e2ed07
1 changed files with 65 additions and 77 deletions

View File

@ -17,14 +17,19 @@ static consteval size_t log_size_t(size_t value, size_t base)
static constexpr size_t s_malloc_pool_size_initial = 4096; static constexpr size_t s_malloc_pool_size_initial = 4096;
static constexpr size_t s_malloc_pool_size_multiplier = 2; static constexpr size_t s_malloc_pool_size_multiplier = 2;
static constexpr size_t s_malloc_pool_count = sizeof(size_t) * 8 - log_size_t(s_malloc_pool_size_initial, s_malloc_pool_size_multiplier); static constexpr size_t s_malloc_pool_count = sizeof(size_t) * 8 - log_size_t(s_malloc_pool_size_initial, s_malloc_pool_size_multiplier);
static constexpr size_t s_malloc_default_align = 16; static constexpr size_t s_malloc_default_align = alignof(max_align_t);
// This is indirectly smallest allowed allocation
static constexpr size_t s_malloc_shrink_threshold = 64;
struct malloc_node_t struct malloc_node_t
{ {
// TODO: these two pointers could be put into data region
malloc_node_t* prev_free;
malloc_node_t* next_free;
size_t size;
bool allocated; bool allocated;
bool last; bool last;
size_t size; alignas(s_malloc_default_align) uint8_t data[0];
uint8_t data[0];
size_t data_size() const { return size - sizeof(malloc_node_t); } size_t data_size() const { return size - sizeof(malloc_node_t); }
malloc_node_t* next() { return (malloc_node_t*)(data + data_size()); } malloc_node_t* next() { return (malloc_node_t*)(data + data_size()); }
@ -35,7 +40,7 @@ struct malloc_pool_t
uint8_t* start; uint8_t* start;
size_t size; size_t size;
malloc_node_t* first_free; malloc_node_t* free_list;
uint8_t* end() { return start + size; } uint8_t* end() { return start + size; }
bool contains(malloc_node_t* node) { return start <= (uint8_t*)node && (uint8_t*)node < end(); } bool contains(malloc_node_t* node) { return start <= (uint8_t*)node && (uint8_t*)node < end(); }
@ -50,7 +55,7 @@ void init_malloc()
{ {
s_malloc_pools[i].start = nullptr; s_malloc_pools[i].start = nullptr;
s_malloc_pools[i].size = pool_size; s_malloc_pools[i].size = pool_size;
s_malloc_pools[i].first_free = nullptr; s_malloc_pools[i].free_list = nullptr;;
pool_size *= s_malloc_pool_size_multiplier; pool_size *= s_malloc_pool_size_multiplier;
} }
} }
@ -72,12 +77,31 @@ static bool allocate_pool(size_t pool_index)
node->allocated = false; node->allocated = false;
node->size = pool.size; node->size = pool.size;
node->last = true; node->last = true;
node->prev_free = nullptr;
node->next_free = nullptr;
pool.first_free = node; pool.free_list = node;
return true; return true;
} }
static void remove_node_from_pool_free_list(malloc_pool_t& pool, malloc_node_t* node)
{
if (node == pool.free_list)
{
pool.free_list = pool.free_list->next_free;
if (pool.free_list)
pool.free_list->prev_free = nullptr;
}
else
{
if (node->next_free)
node->next_free->prev_free = node->prev_free;
if (node->prev_free)
node->prev_free->next_free = node->next_free;
}
}
static void* allocate_from_pool(size_t pool_index, size_t size) static void* allocate_from_pool(size_t pool_index, size_t size)
{ {
assert(size % s_malloc_default_align == 0); assert(size % s_malloc_default_align == 0);
@ -85,39 +109,30 @@ static void* allocate_from_pool(size_t pool_index, size_t size)
auto& pool = s_malloc_pools[pool_index]; auto& pool = s_malloc_pools[pool_index];
assert(pool.start != nullptr); assert(pool.start != nullptr);
if (!pool.first_free) if (!pool.free_list)
return nullptr; return nullptr;
assert(!pool.first_free->allocated);
for (auto* node = pool.first_free;; node = node->next()) for (auto* node = pool.free_list; node; node = node->next_free)
{ {
if (node->allocated) assert(!node->allocated);
// merge nodes right after current one
while (!node->last && !node->next()->allocated)
{ {
if (node->last) auto* next = node->next();
break; remove_node_from_pool_free_list(pool, next);
continue; node->last = next->last;
node->size += next->size;
} }
if (!node->last && !node->next()->allocated)
{
node->last = node->next()->last;
node->size += node->next()->size;
}
if (node->data_size() < size) if (node->data_size() < size)
{
if (node->last)
break;
continue; continue;
}
node->allocated = true; node->allocated = true;
remove_node_from_pool_free_list(pool, node);
if (node == pool.first_free)
pool.first_free = nullptr;
// shrink node if needed // shrink node if needed
if (node->data_size() - size > sizeof(malloc_node_t)) if (node->data_size() - size >= sizeof(malloc_node_t) + s_malloc_shrink_threshold)
{ {
uint8_t* node_end = (uint8_t*)node->next(); uint8_t* node_end = (uint8_t*)node->next();
@ -130,23 +145,12 @@ static void* allocate_from_pool(size_t pool_index, size_t size)
node->last = false; node->last = false;
if (!pool.first_free || next < pool.first_free) // insert excess node to free list
pool.first_free = next; if (pool.free_list)
} pool.free_list->prev_free = next;
next->next_free = pool.free_list;
// Find next free node next->prev_free = nullptr;
if (!pool.first_free) pool.free_list = next;
{
for (auto* free_node = node;; free_node = free_node->next())
{
if (!free_node->allocated)
{
pool.first_free = free_node;
break;
}
if (free_node->last)
break;
}
} }
return node->data; return node->data;
@ -216,31 +220,7 @@ void* realloc(void* ptr, size_t size)
if (oldsize == size) if (oldsize == size)
return ptr; return ptr;
// shrink allocation if needed // TODO: try to shrink or expand allocation
if (oldsize > size)
{
if (node->data_size() - size > sizeof(malloc_node_t))
{
uint8_t* node_end = (uint8_t*)node->next();
node->size = sizeof(malloc_node_t) + size;
auto* next = node->next();
next->allocated = false;
next->size = node_end - (uint8_t*)next;
next->last = node->last;
node->last = false;
auto& pool = pool_from_node(node);
if (!pool.first_free || next < pool.first_free)
pool.first_free = next;
}
return ptr;
}
// FIXME: try to expand allocation
// allocate new pointer // allocate new pointer
void* new_ptr = malloc(size); void* new_ptr = malloc(size);
@ -262,17 +242,25 @@ void free(void* ptr)
auto* node = node_from_data_pointer(ptr); auto* node = node_from_data_pointer(ptr);
// mark node as unallocated and try to merge with the next node
node->allocated = false; node->allocated = false;
if (!node->last && !node->next()->allocated)
{
node->last = node->next()->last;
node->size += node->next()->size;
}
auto& pool = pool_from_node(node); auto& pool = pool_from_node(node);
if (!pool.first_free || node < pool.first_free)
pool.first_free = node; // merge nodes right after freed one
while (!node->last && !node->next()->allocated)
{
auto* next = node->next();
remove_node_from_pool_free_list(pool, next);
node->last = next->last;
node->size += next->size;
}
// add node to free list
if (pool.free_list)
pool.free_list->prev_free = node;
node->prev_free = nullptr;
node->next_free = pool.free_list;
pool.free_list = node;
} }
void* calloc(size_t nmemb, size_t size) void* calloc(size_t nmemb, size_t size)