Kernel: rewrite the whole kmalloc (again)

Performance of the old kmalloc implementation was terrible.
We now use fixed-width linked list allocations for sizes <= 60 bytes.
This is much faster than variable size allocation.

We don't use bitmap scanning anymore since it was probably the slow
part. Instead we use headers that tell allocations size and aligment.

I removed the kmalloc_eternal, even though it was very fast, there is
not really any need for it, since the only place it was used in was IDT.

These changes allowed my psf (font) parsing to go from ~500 ms to ~20 ms.
(coming soon :D)
This commit is contained in:
Bananymous 2023-02-22 16:18:14 +02:00
parent 4afc4660a4
commit d9c05b7378
4 changed files with 295 additions and 101 deletions

View File

@ -179,7 +179,7 @@ found:
{ {
constexpr size_t idt_size = 0x100 * sizeof(GateDescriptor); constexpr size_t idt_size = 0x100 * sizeof(GateDescriptor);
s_idt = (GateDescriptor*)kmalloc_eternal(idt_size); s_idt = (GateDescriptor*)kmalloc(idt_size);
memset(s_idt, 0x00, idt_size); memset(s_idt, 0x00, idt_size);
s_idtr.offset = s_idt; s_idtr.offset = s_idt;

View File

@ -205,7 +205,7 @@ namespace IDT
void initialize() void initialize()
{ {
s_idt = (GateDescriptor*)kmalloc_eternal(0x100 * sizeof(GateDescriptor)); s_idt = (GateDescriptor*)kmalloc(0x100 * sizeof(GateDescriptor));
memset(s_idt, 0x00, 0x100 * sizeof(GateDescriptor)); memset(s_idt, 0x00, 0x100 * sizeof(GateDescriptor));
s_idtr.offset = (uint64_t)s_idt; s_idtr.offset = (uint64_t)s_idt;

View File

@ -5,7 +5,6 @@
void kmalloc_initialize(); void kmalloc_initialize();
void kmalloc_dump_info(); void kmalloc_dump_info();
void* kmalloc_eternal(size_t);
void* kmalloc(size_t); void* kmalloc(size_t);
void* kmalloc(size_t, size_t); void* kmalloc(size_t, size_t);
void kfree(void*); void kfree(void*);

View File

@ -8,57 +8,115 @@
#define MB (1 << 20) #define MB (1 << 20)
/* static constexpr size_t s_kmalloc_min_align = alignof(max_align_t);
Kmalloc holds a bitmap of free/allocated chunks struct kmalloc_node
{
void set_align(ptrdiff_t align) { m_align = align; }
void set_end(uintptr_t end) { m_size = end - (uintptr_t)m_data; }
void set_used(bool used) { m_used = used; }
When allocating n chunks, kmalloc will put the number of chunks bool can_align(uint32_t align) { return align < m_size; }
to address, and return pointer to the byte after the stored size bool can_fit_before() { return m_align > sizeof(kmalloc_node); }
bool can_fit_after(size_t new_size) { return data() + new_size < end() - sizeof(kmalloc_node); }
*/ void split_in_align()
{
uintptr_t node_end = end();
set_end(data() - sizeof(kmalloc_node));
set_align(0);
static constexpr uintptr_t s_kmalloc_base = 0x00200000; auto* next = after();
static constexpr size_t s_kmalloc_size = 1 * MB; next->set_end(node_end);
static constexpr uintptr_t s_kmalloc_end = s_kmalloc_base + s_kmalloc_size; next->set_align(0);
}
static constexpr uintptr_t s_kmalloc_eternal_base = s_kmalloc_end; void split_after_size(size_t size)
static constexpr size_t s_kmalloc_eternal_size = 1 * MB; {
static constexpr uintptr_t s_kmalloc_eternal_end = s_kmalloc_eternal_base + s_kmalloc_eternal_size; uintptr_t node_end = end();
static uintptr_t s_kmalloc_eternal_ptr = s_kmalloc_eternal_base; set_end(data() + size);
auto* next = after();
next->set_end(node_end);
next->set_align(0);
}
static constexpr size_t s_kmalloc_default_align = alignof(max_align_t); bool used() { return m_used; }
static constexpr size_t s_kmalloc_chunk_size = s_kmalloc_default_align; uintptr_t size_no_align() { return m_size; }
static constexpr size_t s_kmalloc_chunks_per_size = sizeof(size_t) * 8 / s_kmalloc_chunk_size; uintptr_t size() { return size_no_align() - m_align; }
static constexpr size_t s_kmalloc_total_chunks = s_kmalloc_size / s_kmalloc_chunk_size; uintptr_t data_no_align() { return (uintptr_t)m_data; }
static uint8_t s_kmalloc_bitmap[s_kmalloc_total_chunks / 8] { 0 }; uintptr_t data() { return data_no_align() + m_align; }
uintptr_t end() { return data_no_align() + m_size; }
kmalloc_node* after() { return (kmalloc_node*)end(); }
static size_t s_kmalloc_free = s_kmalloc_size; private:
static size_t s_kmalloc_used = 0; uint32_t m_size;
uint32_t m_align;
bool m_used;
uint8_t m_padding[s_kmalloc_min_align - sizeof(m_size) - sizeof(m_align) - sizeof(m_used)];
uint8_t m_data[0];
};
static_assert(sizeof(kmalloc_node) == s_kmalloc_min_align);
static size_t s_kmalloc_eternal_free = s_kmalloc_eternal_size; struct kmalloc_info
static size_t s_kmalloc_eternal_used = 0; {
static constexpr uintptr_t base = 0x00200000;
static constexpr size_t size = 1 * MB;
static constexpr uintptr_t end = base + size;
kmalloc_node* first() { return (kmalloc_node*)base; }
kmalloc_node* from_address(void* addr)
{
for (auto* node = first(); node->end() < end; node = node->after())
if (node->data() == (uintptr_t)addr)
return node;
return nullptr;
}
size_t used = 0;
size_t free = size;
};
static kmalloc_info s_kmalloc_info;
template<size_t SIZE>
struct kmalloc_fixed_node
{
uint8_t data[SIZE - 2 * sizeof(uint16_t)];
uint16_t prev = NULL;
uint16_t next = NULL;
static constexpr uint16_t invalid = ~0;
};
struct kmalloc_fixed_info
{
using node = kmalloc_fixed_node<64>;
static constexpr uintptr_t base = s_kmalloc_info.end;
static constexpr size_t size = 1 * MB;
static constexpr uintptr_t end = base + size;
static constexpr size_t node_count = size / sizeof(node);
static_assert(node_count < (1 << 16));
node* free_list_head = NULL;
node* used_list_head = NULL;
node* node_at(size_t index) { return (node*)(base + index * sizeof(node)); }
uint16_t index_of(const node* p) { return ((uintptr_t)p - base) / sizeof(node); }
size_t used = 0;
size_t free = size;
};
static kmalloc_fixed_info s_kmalloc_fixed_info;
extern "C" uintptr_t g_kernel_end; extern "C" uintptr_t g_kernel_end;
static bool is_kmalloc_chunk_used(size_t index)
{
ASSERT(index < s_kmalloc_total_chunks);
return s_kmalloc_bitmap[index / 8] & (1 << (index % 8));
}
static uintptr_t chunk_address(size_t index)
{
ASSERT(index < s_kmalloc_total_chunks);
return s_kmalloc_base + s_kmalloc_chunk_size * index;
}
void kmalloc_initialize() void kmalloc_initialize()
{ {
if (!(g_multiboot_info->flags & (1 << 6))) if (!(g_multiboot_info->flags & (1 << 6)))
Kernel::panic("Kmalloc: Bootloader didn't provide a memory map"); Kernel::panic("Kmalloc: Bootloader didn't provide a memory map");
if ((uintptr_t)&g_kernel_end > s_kmalloc_base) if ((uintptr_t)&g_kernel_end > s_kmalloc_info.base)
Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_base); Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_info.base);
// Validate kmalloc memory // Validate kmalloc memory
bool valid = false; bool valid = false;
@ -68,7 +126,7 @@ void kmalloc_initialize()
if (mmmt->type == 1) if (mmmt->type == 1)
{ {
if (mmmt->base_addr <= s_kmalloc_base && s_kmalloc_eternal_end <= mmmt->base_addr + mmmt->length) if (mmmt->base_addr <= s_kmalloc_info.base && s_kmalloc_fixed_info.end <= mmmt->base_addr + mmmt->length)
{ {
dprintln("Total usable RAM: {}.{} MB", mmmt->length / MB, mmmt->length % MB); dprintln("Total usable RAM: {}.{} MB", mmmt->length / MB, mmmt->length % MB);
valid = true; valid = true;
@ -81,100 +139,237 @@ void kmalloc_initialize()
if (!valid) if (!valid)
{ {
size_t kmalloc_total_size = s_kmalloc_info.size + s_kmalloc_fixed_info.size;
Kernel::panic("Kmalloc: Could not find {}.{} MB of memory", Kernel::panic("Kmalloc: Could not find {}.{} MB of memory",
(s_kmalloc_eternal_end - s_kmalloc_base) / MB, kmalloc_total_size / MB,
(s_kmalloc_eternal_end - s_kmalloc_base) % MB kmalloc_total_size % MB
); );
} }
// initialize fixed size allocations
{
auto& info = s_kmalloc_fixed_info;
for (size_t i = 0; i < info.node_count; i++)
{
auto* node = info.node_at(i);
node->next = i - 1;
node->prev = i + 1;
}
info.node_at(0)->next = kmalloc_fixed_info::node::invalid;
info.node_at(info.node_count - 1)->prev = kmalloc_fixed_info::node::invalid;
info.free_list_head = info.node_at(0);
info.used_list_head = nullptr;
}
// initial general allocations
{
auto& info = s_kmalloc_info;
auto* node = info.first();
node->set_end(info.end);
node->set_align(0);
node->set_used(false);
}
} }
void kmalloc_dump_info() void kmalloc_dump_info()
{ {
kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_base, s_kmalloc_end); kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_info.base, s_kmalloc_info.end);
kprintln(" used: 0x{8H}", s_kmalloc_used); kprintln(" used: 0x{8H}", s_kmalloc_info.used);
kprintln(" free: 0x{8H}", s_kmalloc_free); kprintln(" free: 0x{8H}", s_kmalloc_info.free);
kprintln("kmalloc eternal: 0x{8H}->0x{8H}", s_kmalloc_eternal_base, s_kmalloc_eternal_end); kprintln("kmalloc fixed {} byte: 0x{8H}->0x{8H}", sizeof(kmalloc_fixed_info::node), s_kmalloc_fixed_info.base, s_kmalloc_fixed_info.end);
kprintln(" used: 0x{8H}", s_kmalloc_eternal_used); kprintln(" used: 0x{8H}", s_kmalloc_fixed_info.used);
kprintln(" free: 0x{8H}", s_kmalloc_eternal_free); kprintln(" free: 0x{8H}", s_kmalloc_fixed_info.free);
} }
void* kmalloc_eternal(size_t size) static void* kmalloc_fixed()
{ {
if (size_t rem = size % alignof(max_align_t)) auto& info = s_kmalloc_fixed_info;
size += alignof(max_align_t) - rem;
ASSERT(s_kmalloc_eternal_ptr + size < s_kmalloc_eternal_end); if (!info.free_list_head)
void* result = (void*)s_kmalloc_eternal_ptr; return nullptr;
s_kmalloc_eternal_ptr += size;
s_kmalloc_eternal_used += size; // allocate the node on top of free list
s_kmalloc_eternal_free -= size; auto* node = info.free_list_head;
return result; ASSERT(node->next == kmalloc_fixed_info::node::invalid);
// remove the node from free list
node->prev = kmalloc_fixed_info::node::invalid;
node->next = kmalloc_fixed_info::node::invalid;
if (info.free_list_head->prev != kmalloc_fixed_info::node::invalid)
{
info.free_list_head = info.node_at(info.free_list_head->prev);
info.free_list_head->next = kmalloc_fixed_info::node::invalid;
}
else
{
info.free_list_head = nullptr;
}
// move the node to the top of used nodes
if (info.used_list_head)
{
info.used_list_head->next = info.index_of(node);
node->prev = info.index_of(info.used_list_head);
}
info.used_list_head = node;
info.used += sizeof(kmalloc_fixed_info::node);
info.free -= sizeof(kmalloc_fixed_info::node);
return (void*)node->data;
}
static void* kmalloc_impl(size_t size, size_t align)
{
ASSERT(align % s_kmalloc_min_align == 0);
auto& info = s_kmalloc_info;
for (auto* node = info.first(); node->end() <= info.end; node = node->after())
{
if (node->used())
continue;
if (auto* next = node->after(); next->end() <= info.end)
if (!next->used())
node->set_end(next->end());
if (node->size_no_align() < size)
continue;
ptrdiff_t needed_align = 0;
if (ptrdiff_t rem = node->data_no_align() % align)
needed_align = align - rem;
if (!node->can_align(needed_align))
continue;
node->set_align(needed_align);
ASSERT(node->data() % align == 0);
if (node->size() < size)
continue;
if (node->can_fit_before())
{
node->split_in_align();
node->set_used(false);
node = node->after();
ASSERT(node->data() % align == 0);
}
node->set_used(true);
if (node->can_fit_after(size))
{
node->split_after_size(size);
node->after()->set_used(false);
ASSERT(node->data() % align == 0);
}
info.used += sizeof(kmalloc_node) + node->size_no_align();
info.free -= sizeof(kmalloc_node) + node->size_no_align();
return (void*)node->data();
}
return nullptr;
} }
void* kmalloc(size_t size) void* kmalloc(size_t size)
{ {
return kmalloc(size, s_kmalloc_default_align); return kmalloc(size, s_kmalloc_min_align);
} }
void* kmalloc(size_t size, size_t align) void* kmalloc(size_t size, size_t align)
{ {
if (size == 0 || size >= s_kmalloc_size) kmalloc_info& info = s_kmalloc_info;
if (size == 0 || size >= info.size)
return nullptr; return nullptr;
if (align == 0) ASSERT(align);
align = s_kmalloc_chunk_size;
if (align < s_kmalloc_chunk_size || align % s_kmalloc_chunk_size) if (align % s_kmalloc_min_align)
{ {
size_t new_align = BAN::Math::lcm(align, s_kmalloc_chunk_size); size_t new_align = BAN::Math::lcm(align, s_kmalloc_min_align);
dwarnln("kmalloc asked to align to {}, aliging to {} instead", align, new_align); dwarnln("Asked to align to {}, aliging to {} instead", align, new_align);
align = new_align; align = new_align;
} }
size_t needed_chunks = (size - 1) / s_kmalloc_chunk_size + 1 + s_kmalloc_chunks_per_size; // if the size fits into fixed node, we will try to use that since it is faster
for (size_t i = 0; i < s_kmalloc_total_chunks - needed_chunks; i++) if (align == s_kmalloc_min_align && size < sizeof(kmalloc_fixed_info::node::data))
{ if (void* result = kmalloc_fixed())
if (chunk_address(i + s_kmalloc_chunks_per_size) % align) return result;
continue;
bool free = true; return kmalloc_impl(size, align);
for (size_t j = 0; j < needed_chunks; j++)
{
if (is_kmalloc_chunk_used(i + j))
{
free = false;
i += j;
break;
}
}
if (free)
{
*(size_t*)chunk_address(i) = needed_chunks;
for (size_t j = 0; j < needed_chunks; j++)
s_kmalloc_bitmap[(i + j) / 8] |= (1 << ((i + j) % 8));
s_kmalloc_used += needed_chunks * s_kmalloc_chunk_size;
s_kmalloc_free -= needed_chunks * s_kmalloc_chunk_size;
return (void*)chunk_address(i + s_kmalloc_chunks_per_size);
}
}
dwarnln("Could not allocate {} bytes", size);
return nullptr;
} }
void kfree(void* address) void kfree(void* address)
{ {
if (!address) if (address == nullptr)
return; return;
ASSERT(((uintptr_t)address % s_kmalloc_chunk_size) == 0);
ASSERT(s_kmalloc_base <= (uintptr_t)address && (uintptr_t)address < s_kmalloc_end);
size_t first_chunk = ((uintptr_t)address - s_kmalloc_base) / s_kmalloc_chunk_size - s_kmalloc_chunks_per_size; uintptr_t address_uint = (uintptr_t)address;
ASSERT(is_kmalloc_chunk_used(first_chunk)); ASSERT(address_uint % s_kmalloc_min_align == 0);
if (s_kmalloc_fixed_info.base <= address_uint && address_uint < s_kmalloc_fixed_info.end)
{
auto& info = s_kmalloc_fixed_info;
ASSERT(info.used_list_head);
// get node from fixed info buffer
auto* node = (kmalloc_fixed_info::node*)address;
ASSERT(node->next < info.node_count || node->next == kmalloc_fixed_info::node::invalid);
ASSERT(node->prev < info.node_count || node->prev == kmalloc_fixed_info::node::invalid);
// remove from used list
if (node->prev != kmalloc_fixed_info::node::invalid)
info.node_at(node->prev)->next = node->next;
if (node->next != kmalloc_fixed_info::node::invalid)
info.node_at(node->next)->prev = node->prev;
if (info.used_list_head == node)
info.used_list_head = info.used_list_head->prev != kmalloc_fixed_info::node::invalid ? info.node_at(info.used_list_head->prev) : nullptr;
// add to free list
node->next = kmalloc_fixed_info::node::invalid;
node->prev = kmalloc_fixed_info::node::invalid;
if (info.free_list_head)
{
info.free_list_head->next = info.index_of(node);
node->prev = info.index_of(info.free_list_head);
}
info.free_list_head = node;
info.used -= sizeof(kmalloc_fixed_info::node);
info.free += sizeof(kmalloc_fixed_info::node);
}
else if (s_kmalloc_info.base <= address_uint && address_uint < s_kmalloc_info.end)
{
auto& info = s_kmalloc_info;
auto* node = info.from_address(address);
ASSERT(node && node->data() == (uintptr_t)address);
ASSERT(node->used());
ptrdiff_t size = node->size_no_align();
if (auto* next = node->after(); next->end() <= info.end)
if (!next->used())
node->set_end(node->after()->end());
node->set_used(false);
info.used -= sizeof(kmalloc_node) + size;
info.free += sizeof(kmalloc_node) + size;
}
else
{
Kernel::panic("Trying to free a pointer outsize of kmalloc memory");
}
size_t size = *(size_t*)chunk_address(first_chunk);
for (size_t i = 0; i < size; i++)
s_kmalloc_bitmap[(first_chunk + i) / 8] &= ~(1 << ((first_chunk + i) % 8));
s_kmalloc_used -= size * s_kmalloc_chunk_size;
s_kmalloc_free += size * s_kmalloc_chunk_size;
} }