From d9c05b737891723095e42abacf7b548b8778fc36 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Wed, 22 Feb 2023 16:18:14 +0200 Subject: [PATCH] Kernel: rewrite the whole kmalloc (again) Performance of the old kmalloc implementation was terrible. We now use fixed-width linked list allocations for sizes <= 60 bytes. This is much faster than variable size allocation. We don't use bitmap scanning anymore since it was probably the slow part. Instead we use headers that tell allocations size and aligment. I removed the kmalloc_eternal, even though it was very fast, there is not really any need for it, since the only place it was used in was IDT. These changes allowed my psf (font) parsing to go from ~500 ms to ~20 ms. (coming soon :D) --- kernel/arch/i386/IDT.cpp | 2 +- kernel/arch/x86_64/IDT.cpp | 2 +- kernel/include/kernel/kmalloc.h | 1 - kernel/kernel/kmalloc.cpp | 391 ++++++++++++++++++++++++-------- 4 files changed, 295 insertions(+), 101 deletions(-) diff --git a/kernel/arch/i386/IDT.cpp b/kernel/arch/i386/IDT.cpp index 6908a421..b9f7ce4c 100644 --- a/kernel/arch/i386/IDT.cpp +++ b/kernel/arch/i386/IDT.cpp @@ -179,7 +179,7 @@ found: { constexpr size_t idt_size = 0x100 * sizeof(GateDescriptor); - s_idt = (GateDescriptor*)kmalloc_eternal(idt_size); + s_idt = (GateDescriptor*)kmalloc(idt_size); memset(s_idt, 0x00, idt_size); s_idtr.offset = s_idt; diff --git a/kernel/arch/x86_64/IDT.cpp b/kernel/arch/x86_64/IDT.cpp index 5c035dd2..e6439133 100644 --- a/kernel/arch/x86_64/IDT.cpp +++ b/kernel/arch/x86_64/IDT.cpp @@ -205,7 +205,7 @@ namespace IDT void initialize() { - s_idt = (GateDescriptor*)kmalloc_eternal(0x100 * sizeof(GateDescriptor)); + s_idt = (GateDescriptor*)kmalloc(0x100 * sizeof(GateDescriptor)); memset(s_idt, 0x00, 0x100 * sizeof(GateDescriptor)); s_idtr.offset = (uint64_t)s_idt; diff --git a/kernel/include/kernel/kmalloc.h b/kernel/include/kernel/kmalloc.h index aaf7010b..ba7d288e 100644 --- a/kernel/include/kernel/kmalloc.h +++ b/kernel/include/kernel/kmalloc.h @@ -5,7 +5,6 @@ void kmalloc_initialize(); void kmalloc_dump_info(); -void* kmalloc_eternal(size_t); void* kmalloc(size_t); void* kmalloc(size_t, size_t); void kfree(void*); diff --git a/kernel/kernel/kmalloc.cpp b/kernel/kernel/kmalloc.cpp index fbc5ccea..585642e1 100644 --- a/kernel/kernel/kmalloc.cpp +++ b/kernel/kernel/kmalloc.cpp @@ -8,57 +8,115 @@ #define MB (1 << 20) -/* +static constexpr size_t s_kmalloc_min_align = alignof(max_align_t); -Kmalloc holds a bitmap of free/allocated chunks +struct kmalloc_node +{ + void set_align(ptrdiff_t align) { m_align = align; } + void set_end(uintptr_t end) { m_size = end - (uintptr_t)m_data; } + void set_used(bool used) { m_used = used; } -When allocating n chunks, kmalloc will put the number of chunks -to address, and return pointer to the byte after the stored size + bool can_align(uint32_t align) { return align < m_size; } + bool can_fit_before() { return m_align > sizeof(kmalloc_node); } + bool can_fit_after(size_t new_size) { return data() + new_size < end() - sizeof(kmalloc_node); } -*/ + void split_in_align() + { + uintptr_t node_end = end(); + set_end(data() - sizeof(kmalloc_node)); + set_align(0); -static constexpr uintptr_t s_kmalloc_base = 0x00200000; -static constexpr size_t s_kmalloc_size = 1 * MB; -static constexpr uintptr_t s_kmalloc_end = s_kmalloc_base + s_kmalloc_size; + auto* next = after(); + next->set_end(node_end); + next->set_align(0); + } -static constexpr uintptr_t s_kmalloc_eternal_base = s_kmalloc_end; -static constexpr size_t s_kmalloc_eternal_size = 1 * MB; -static constexpr uintptr_t s_kmalloc_eternal_end = s_kmalloc_eternal_base + s_kmalloc_eternal_size; -static uintptr_t s_kmalloc_eternal_ptr = s_kmalloc_eternal_base; + void split_after_size(size_t size) + { + uintptr_t node_end = end(); + set_end(data() + size); + + auto* next = after(); + next->set_end(node_end); + next->set_align(0); + } -static constexpr size_t s_kmalloc_default_align = alignof(max_align_t); -static constexpr size_t s_kmalloc_chunk_size = s_kmalloc_default_align; -static constexpr size_t s_kmalloc_chunks_per_size = sizeof(size_t) * 8 / s_kmalloc_chunk_size; -static constexpr size_t s_kmalloc_total_chunks = s_kmalloc_size / s_kmalloc_chunk_size; -static uint8_t s_kmalloc_bitmap[s_kmalloc_total_chunks / 8] { 0 }; + bool used() { return m_used; } + uintptr_t size_no_align() { return m_size; } + uintptr_t size() { return size_no_align() - m_align; } + uintptr_t data_no_align() { return (uintptr_t)m_data; } + uintptr_t data() { return data_no_align() + m_align; } + uintptr_t end() { return data_no_align() + m_size; } + kmalloc_node* after() { return (kmalloc_node*)end(); } -static size_t s_kmalloc_free = s_kmalloc_size; -static size_t s_kmalloc_used = 0; +private: + uint32_t m_size; + uint32_t m_align; + bool m_used; + uint8_t m_padding[s_kmalloc_min_align - sizeof(m_size) - sizeof(m_align) - sizeof(m_used)]; + uint8_t m_data[0]; +}; +static_assert(sizeof(kmalloc_node) == s_kmalloc_min_align); -static size_t s_kmalloc_eternal_free = s_kmalloc_eternal_size; -static size_t s_kmalloc_eternal_used = 0; +struct kmalloc_info +{ + static constexpr uintptr_t base = 0x00200000; + static constexpr size_t size = 1 * MB; + static constexpr uintptr_t end = base + size; + + kmalloc_node* first() { return (kmalloc_node*)base; } + kmalloc_node* from_address(void* addr) + { + for (auto* node = first(); node->end() < end; node = node->after()) + if (node->data() == (uintptr_t)addr) + return node; + return nullptr; + } + + size_t used = 0; + size_t free = size; +}; +static kmalloc_info s_kmalloc_info; + +template +struct kmalloc_fixed_node +{ + uint8_t data[SIZE - 2 * sizeof(uint16_t)]; + uint16_t prev = NULL; + uint16_t next = NULL; + static constexpr uint16_t invalid = ~0; +}; + +struct kmalloc_fixed_info +{ + using node = kmalloc_fixed_node<64>; + + static constexpr uintptr_t base = s_kmalloc_info.end; + static constexpr size_t size = 1 * MB; + static constexpr uintptr_t end = base + size; + static constexpr size_t node_count = size / sizeof(node); + static_assert(node_count < (1 << 16)); + + node* free_list_head = NULL; + node* used_list_head = NULL; + + node* node_at(size_t index) { return (node*)(base + index * sizeof(node)); } + uint16_t index_of(const node* p) { return ((uintptr_t)p - base) / sizeof(node); } + + size_t used = 0; + size_t free = size; +}; +static kmalloc_fixed_info s_kmalloc_fixed_info; extern "C" uintptr_t g_kernel_end; -static bool is_kmalloc_chunk_used(size_t index) -{ - ASSERT(index < s_kmalloc_total_chunks); - return s_kmalloc_bitmap[index / 8] & (1 << (index % 8)); -} - -static uintptr_t chunk_address(size_t index) -{ - ASSERT(index < s_kmalloc_total_chunks); - return s_kmalloc_base + s_kmalloc_chunk_size * index; -} - void kmalloc_initialize() { if (!(g_multiboot_info->flags & (1 << 6))) Kernel::panic("Kmalloc: Bootloader didn't provide a memory map"); - if ((uintptr_t)&g_kernel_end > s_kmalloc_base) - Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_base); + if ((uintptr_t)&g_kernel_end > s_kmalloc_info.base) + Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_info.base); // Validate kmalloc memory bool valid = false; @@ -68,7 +126,7 @@ void kmalloc_initialize() if (mmmt->type == 1) { - if (mmmt->base_addr <= s_kmalloc_base && s_kmalloc_eternal_end <= mmmt->base_addr + mmmt->length) + if (mmmt->base_addr <= s_kmalloc_info.base && s_kmalloc_fixed_info.end <= mmmt->base_addr + mmmt->length) { dprintln("Total usable RAM: {}.{} MB", mmmt->length / MB, mmmt->length % MB); valid = true; @@ -81,100 +139,237 @@ void kmalloc_initialize() if (!valid) { + size_t kmalloc_total_size = s_kmalloc_info.size + s_kmalloc_fixed_info.size; Kernel::panic("Kmalloc: Could not find {}.{} MB of memory", - (s_kmalloc_eternal_end - s_kmalloc_base) / MB, - (s_kmalloc_eternal_end - s_kmalloc_base) % MB + kmalloc_total_size / MB, + kmalloc_total_size % MB ); } + + // initialize fixed size allocations + { + auto& info = s_kmalloc_fixed_info; + + for (size_t i = 0; i < info.node_count; i++) + { + auto* node = info.node_at(i); + node->next = i - 1; + node->prev = i + 1; + } + + info.node_at(0)->next = kmalloc_fixed_info::node::invalid; + info.node_at(info.node_count - 1)->prev = kmalloc_fixed_info::node::invalid; + + info.free_list_head = info.node_at(0); + info.used_list_head = nullptr; + } + + // initial general allocations + { + auto& info = s_kmalloc_info; + auto* node = info.first(); + node->set_end(info.end); + node->set_align(0); + node->set_used(false); + } } void kmalloc_dump_info() { - kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_base, s_kmalloc_end); - kprintln(" used: 0x{8H}", s_kmalloc_used); - kprintln(" free: 0x{8H}", s_kmalloc_free); + kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_info.base, s_kmalloc_info.end); + kprintln(" used: 0x{8H}", s_kmalloc_info.used); + kprintln(" free: 0x{8H}", s_kmalloc_info.free); - kprintln("kmalloc eternal: 0x{8H}->0x{8H}", s_kmalloc_eternal_base, s_kmalloc_eternal_end); - kprintln(" used: 0x{8H}", s_kmalloc_eternal_used); - kprintln(" free: 0x{8H}", s_kmalloc_eternal_free); + kprintln("kmalloc fixed {} byte: 0x{8H}->0x{8H}", sizeof(kmalloc_fixed_info::node), s_kmalloc_fixed_info.base, s_kmalloc_fixed_info.end); + kprintln(" used: 0x{8H}", s_kmalloc_fixed_info.used); + kprintln(" free: 0x{8H}", s_kmalloc_fixed_info.free); } -void* kmalloc_eternal(size_t size) +static void* kmalloc_fixed() { - if (size_t rem = size % alignof(max_align_t)) - size += alignof(max_align_t) - rem; - ASSERT(s_kmalloc_eternal_ptr + size < s_kmalloc_eternal_end); - void* result = (void*)s_kmalloc_eternal_ptr; - s_kmalloc_eternal_ptr += size; - s_kmalloc_eternal_used += size; - s_kmalloc_eternal_free -= size; - return result; + auto& info = s_kmalloc_fixed_info; + + if (!info.free_list_head) + return nullptr; + + // allocate the node on top of free list + auto* node = info.free_list_head; + ASSERT(node->next == kmalloc_fixed_info::node::invalid); + + // remove the node from free list + node->prev = kmalloc_fixed_info::node::invalid; + node->next = kmalloc_fixed_info::node::invalid; + if (info.free_list_head->prev != kmalloc_fixed_info::node::invalid) + { + info.free_list_head = info.node_at(info.free_list_head->prev); + info.free_list_head->next = kmalloc_fixed_info::node::invalid; + } + else + { + info.free_list_head = nullptr; + } + + // move the node to the top of used nodes + if (info.used_list_head) + { + info.used_list_head->next = info.index_of(node); + node->prev = info.index_of(info.used_list_head); + } + info.used_list_head = node; + + info.used += sizeof(kmalloc_fixed_info::node); + info.free -= sizeof(kmalloc_fixed_info::node); + + return (void*)node->data; +} + +static void* kmalloc_impl(size_t size, size_t align) +{ + ASSERT(align % s_kmalloc_min_align == 0); + + auto& info = s_kmalloc_info; + + for (auto* node = info.first(); node->end() <= info.end; node = node->after()) + { + if (node->used()) + continue; + + if (auto* next = node->after(); next->end() <= info.end) + if (!next->used()) + node->set_end(next->end()); + + if (node->size_no_align() < size) + continue; + + ptrdiff_t needed_align = 0; + if (ptrdiff_t rem = node->data_no_align() % align) + needed_align = align - rem; + + if (!node->can_align(needed_align)) + continue; + + node->set_align(needed_align); + ASSERT(node->data() % align == 0); + + if (node->size() < size) + continue; + + if (node->can_fit_before()) + { + node->split_in_align(); + node->set_used(false); + + node = node->after(); + ASSERT(node->data() % align == 0); + } + + node->set_used(true); + + if (node->can_fit_after(size)) + { + node->split_after_size(size); + node->after()->set_used(false); + ASSERT(node->data() % align == 0); + } + + info.used += sizeof(kmalloc_node) + node->size_no_align(); + info.free -= sizeof(kmalloc_node) + node->size_no_align(); + + return (void*)node->data(); + } + + return nullptr; } void* kmalloc(size_t size) { - return kmalloc(size, s_kmalloc_default_align); + return kmalloc(size, s_kmalloc_min_align); } void* kmalloc(size_t size, size_t align) { - if (size == 0 || size >= s_kmalloc_size) + kmalloc_info& info = s_kmalloc_info; + + if (size == 0 || size >= info.size) return nullptr; - if (align == 0) - align = s_kmalloc_chunk_size; + ASSERT(align); - if (align < s_kmalloc_chunk_size || align % s_kmalloc_chunk_size) + if (align % s_kmalloc_min_align) { - size_t new_align = BAN::Math::lcm(align, s_kmalloc_chunk_size); - dwarnln("kmalloc asked to align to {}, aliging to {} instead", align, new_align); + size_t new_align = BAN::Math::lcm(align, s_kmalloc_min_align); + dwarnln("Asked to align to {}, aliging to {} instead", align, new_align); align = new_align; } - size_t needed_chunks = (size - 1) / s_kmalloc_chunk_size + 1 + s_kmalloc_chunks_per_size; - for (size_t i = 0; i < s_kmalloc_total_chunks - needed_chunks; i++) - { - if (chunk_address(i + s_kmalloc_chunks_per_size) % align) - continue; + // if the size fits into fixed node, we will try to use that since it is faster + if (align == s_kmalloc_min_align && size < sizeof(kmalloc_fixed_info::node::data)) + if (void* result = kmalloc_fixed()) + return result; - bool free = true; - for (size_t j = 0; j < needed_chunks; j++) - { - if (is_kmalloc_chunk_used(i + j)) - { - free = false; - i += j; - break; - } - } - if (free) - { - *(size_t*)chunk_address(i) = needed_chunks; - for (size_t j = 0; j < needed_chunks; j++) - s_kmalloc_bitmap[(i + j) / 8] |= (1 << ((i + j) % 8)); - s_kmalloc_used += needed_chunks * s_kmalloc_chunk_size; - s_kmalloc_free -= needed_chunks * s_kmalloc_chunk_size; - return (void*)chunk_address(i + s_kmalloc_chunks_per_size); - } - } - - dwarnln("Could not allocate {} bytes", size); - return nullptr; + return kmalloc_impl(size, align); } void kfree(void* address) { - if (!address) + if (address == nullptr) return; - ASSERT(((uintptr_t)address % s_kmalloc_chunk_size) == 0); - ASSERT(s_kmalloc_base <= (uintptr_t)address && (uintptr_t)address < s_kmalloc_end); - size_t first_chunk = ((uintptr_t)address - s_kmalloc_base) / s_kmalloc_chunk_size - s_kmalloc_chunks_per_size; - ASSERT(is_kmalloc_chunk_used(first_chunk)); + uintptr_t address_uint = (uintptr_t)address; + ASSERT(address_uint % s_kmalloc_min_align == 0); + + if (s_kmalloc_fixed_info.base <= address_uint && address_uint < s_kmalloc_fixed_info.end) + { + auto& info = s_kmalloc_fixed_info; + ASSERT(info.used_list_head); + + // get node from fixed info buffer + auto* node = (kmalloc_fixed_info::node*)address; + ASSERT(node->next < info.node_count || node->next == kmalloc_fixed_info::node::invalid); + ASSERT(node->prev < info.node_count || node->prev == kmalloc_fixed_info::node::invalid); + + // remove from used list + if (node->prev != kmalloc_fixed_info::node::invalid) + info.node_at(node->prev)->next = node->next; + if (node->next != kmalloc_fixed_info::node::invalid) + info.node_at(node->next)->prev = node->prev; + if (info.used_list_head == node) + info.used_list_head = info.used_list_head->prev != kmalloc_fixed_info::node::invalid ? info.node_at(info.used_list_head->prev) : nullptr; + + // add to free list + node->next = kmalloc_fixed_info::node::invalid; + node->prev = kmalloc_fixed_info::node::invalid; + if (info.free_list_head) + { + info.free_list_head->next = info.index_of(node); + node->prev = info.index_of(info.free_list_head); + } + info.free_list_head = node; + + info.used -= sizeof(kmalloc_fixed_info::node); + info.free += sizeof(kmalloc_fixed_info::node); + } + else if (s_kmalloc_info.base <= address_uint && address_uint < s_kmalloc_info.end) + { + auto& info = s_kmalloc_info; + + auto* node = info.from_address(address); + ASSERT(node && node->data() == (uintptr_t)address); + ASSERT(node->used()); + + ptrdiff_t size = node->size_no_align(); + + if (auto* next = node->after(); next->end() <= info.end) + if (!next->used()) + node->set_end(node->after()->end()); + node->set_used(false); + + info.used -= sizeof(kmalloc_node) + size; + info.free += sizeof(kmalloc_node) + size; + } + else + { + Kernel::panic("Trying to free a pointer outsize of kmalloc memory"); + } - size_t size = *(size_t*)chunk_address(first_chunk); - for (size_t i = 0; i < size; i++) - s_kmalloc_bitmap[(first_chunk + i) / 8] &= ~(1 << ((first_chunk + i) % 8)); - s_kmalloc_used -= size * s_kmalloc_chunk_size; - s_kmalloc_free += size * s_kmalloc_chunk_size; } \ No newline at end of file