Kernel: rewrite the whole kmalloc (again)

Performance of the old kmalloc implementation was terrible.
We now use fixed-width linked list allocations for sizes <= 60 bytes.
This is much faster than variable size allocation.

We don't use bitmap scanning anymore since it was probably the slow
part. Instead we use headers that tell allocations size and aligment.

I removed the kmalloc_eternal, even though it was very fast, there is
not really any need for it, since the only place it was used in was IDT.

These changes allowed my psf (font) parsing to go from ~500 ms to ~20 ms.
(coming soon :D)
This commit is contained in:
Bananymous 2023-02-22 16:18:14 +02:00
parent 4afc4660a4
commit d9c05b7378
4 changed files with 295 additions and 101 deletions

View File

@ -179,7 +179,7 @@ found:
{
constexpr size_t idt_size = 0x100 * sizeof(GateDescriptor);
s_idt = (GateDescriptor*)kmalloc_eternal(idt_size);
s_idt = (GateDescriptor*)kmalloc(idt_size);
memset(s_idt, 0x00, idt_size);
s_idtr.offset = s_idt;

View File

@ -205,7 +205,7 @@ namespace IDT
void initialize()
{
s_idt = (GateDescriptor*)kmalloc_eternal(0x100 * sizeof(GateDescriptor));
s_idt = (GateDescriptor*)kmalloc(0x100 * sizeof(GateDescriptor));
memset(s_idt, 0x00, 0x100 * sizeof(GateDescriptor));
s_idtr.offset = (uint64_t)s_idt;

View File

@ -5,7 +5,6 @@
void kmalloc_initialize();
void kmalloc_dump_info();
void* kmalloc_eternal(size_t);
void* kmalloc(size_t);
void* kmalloc(size_t, size_t);
void kfree(void*);

View File

@ -8,57 +8,115 @@
#define MB (1 << 20)
/*
static constexpr size_t s_kmalloc_min_align = alignof(max_align_t);
Kmalloc holds a bitmap of free/allocated chunks
struct kmalloc_node
{
void set_align(ptrdiff_t align) { m_align = align; }
void set_end(uintptr_t end) { m_size = end - (uintptr_t)m_data; }
void set_used(bool used) { m_used = used; }
When allocating n chunks, kmalloc will put the number of chunks
to address, and return pointer to the byte after the stored size
bool can_align(uint32_t align) { return align < m_size; }
bool can_fit_before() { return m_align > sizeof(kmalloc_node); }
bool can_fit_after(size_t new_size) { return data() + new_size < end() - sizeof(kmalloc_node); }
*/
void split_in_align()
{
uintptr_t node_end = end();
set_end(data() - sizeof(kmalloc_node));
set_align(0);
static constexpr uintptr_t s_kmalloc_base = 0x00200000;
static constexpr size_t s_kmalloc_size = 1 * MB;
static constexpr uintptr_t s_kmalloc_end = s_kmalloc_base + s_kmalloc_size;
auto* next = after();
next->set_end(node_end);
next->set_align(0);
}
static constexpr uintptr_t s_kmalloc_eternal_base = s_kmalloc_end;
static constexpr size_t s_kmalloc_eternal_size = 1 * MB;
static constexpr uintptr_t s_kmalloc_eternal_end = s_kmalloc_eternal_base + s_kmalloc_eternal_size;
static uintptr_t s_kmalloc_eternal_ptr = s_kmalloc_eternal_base;
void split_after_size(size_t size)
{
uintptr_t node_end = end();
set_end(data() + size);
static constexpr size_t s_kmalloc_default_align = alignof(max_align_t);
static constexpr size_t s_kmalloc_chunk_size = s_kmalloc_default_align;
static constexpr size_t s_kmalloc_chunks_per_size = sizeof(size_t) * 8 / s_kmalloc_chunk_size;
static constexpr size_t s_kmalloc_total_chunks = s_kmalloc_size / s_kmalloc_chunk_size;
static uint8_t s_kmalloc_bitmap[s_kmalloc_total_chunks / 8] { 0 };
auto* next = after();
next->set_end(node_end);
next->set_align(0);
}
static size_t s_kmalloc_free = s_kmalloc_size;
static size_t s_kmalloc_used = 0;
bool used() { return m_used; }
uintptr_t size_no_align() { return m_size; }
uintptr_t size() { return size_no_align() - m_align; }
uintptr_t data_no_align() { return (uintptr_t)m_data; }
uintptr_t data() { return data_no_align() + m_align; }
uintptr_t end() { return data_no_align() + m_size; }
kmalloc_node* after() { return (kmalloc_node*)end(); }
static size_t s_kmalloc_eternal_free = s_kmalloc_eternal_size;
static size_t s_kmalloc_eternal_used = 0;
private:
uint32_t m_size;
uint32_t m_align;
bool m_used;
uint8_t m_padding[s_kmalloc_min_align - sizeof(m_size) - sizeof(m_align) - sizeof(m_used)];
uint8_t m_data[0];
};
static_assert(sizeof(kmalloc_node) == s_kmalloc_min_align);
struct kmalloc_info
{
static constexpr uintptr_t base = 0x00200000;
static constexpr size_t size = 1 * MB;
static constexpr uintptr_t end = base + size;
kmalloc_node* first() { return (kmalloc_node*)base; }
kmalloc_node* from_address(void* addr)
{
for (auto* node = first(); node->end() < end; node = node->after())
if (node->data() == (uintptr_t)addr)
return node;
return nullptr;
}
size_t used = 0;
size_t free = size;
};
static kmalloc_info s_kmalloc_info;
template<size_t SIZE>
struct kmalloc_fixed_node
{
uint8_t data[SIZE - 2 * sizeof(uint16_t)];
uint16_t prev = NULL;
uint16_t next = NULL;
static constexpr uint16_t invalid = ~0;
};
struct kmalloc_fixed_info
{
using node = kmalloc_fixed_node<64>;
static constexpr uintptr_t base = s_kmalloc_info.end;
static constexpr size_t size = 1 * MB;
static constexpr uintptr_t end = base + size;
static constexpr size_t node_count = size / sizeof(node);
static_assert(node_count < (1 << 16));
node* free_list_head = NULL;
node* used_list_head = NULL;
node* node_at(size_t index) { return (node*)(base + index * sizeof(node)); }
uint16_t index_of(const node* p) { return ((uintptr_t)p - base) / sizeof(node); }
size_t used = 0;
size_t free = size;
};
static kmalloc_fixed_info s_kmalloc_fixed_info;
extern "C" uintptr_t g_kernel_end;
static bool is_kmalloc_chunk_used(size_t index)
{
ASSERT(index < s_kmalloc_total_chunks);
return s_kmalloc_bitmap[index / 8] & (1 << (index % 8));
}
static uintptr_t chunk_address(size_t index)
{
ASSERT(index < s_kmalloc_total_chunks);
return s_kmalloc_base + s_kmalloc_chunk_size * index;
}
void kmalloc_initialize()
{
if (!(g_multiboot_info->flags & (1 << 6)))
Kernel::panic("Kmalloc: Bootloader didn't provide a memory map");
if ((uintptr_t)&g_kernel_end > s_kmalloc_base)
Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_base);
if ((uintptr_t)&g_kernel_end > s_kmalloc_info.base)
Kernel::panic("Kmalloc: Kernel end ({}) is over kmalloc base ({})", &g_kernel_end, (void*)s_kmalloc_info.base);
// Validate kmalloc memory
bool valid = false;
@ -68,7 +126,7 @@ void kmalloc_initialize()
if (mmmt->type == 1)
{
if (mmmt->base_addr <= s_kmalloc_base && s_kmalloc_eternal_end <= mmmt->base_addr + mmmt->length)
if (mmmt->base_addr <= s_kmalloc_info.base && s_kmalloc_fixed_info.end <= mmmt->base_addr + mmmt->length)
{
dprintln("Total usable RAM: {}.{} MB", mmmt->length / MB, mmmt->length % MB);
valid = true;
@ -81,100 +139,237 @@ void kmalloc_initialize()
if (!valid)
{
size_t kmalloc_total_size = s_kmalloc_info.size + s_kmalloc_fixed_info.size;
Kernel::panic("Kmalloc: Could not find {}.{} MB of memory",
(s_kmalloc_eternal_end - s_kmalloc_base) / MB,
(s_kmalloc_eternal_end - s_kmalloc_base) % MB
kmalloc_total_size / MB,
kmalloc_total_size % MB
);
}
// initialize fixed size allocations
{
auto& info = s_kmalloc_fixed_info;
for (size_t i = 0; i < info.node_count; i++)
{
auto* node = info.node_at(i);
node->next = i - 1;
node->prev = i + 1;
}
info.node_at(0)->next = kmalloc_fixed_info::node::invalid;
info.node_at(info.node_count - 1)->prev = kmalloc_fixed_info::node::invalid;
info.free_list_head = info.node_at(0);
info.used_list_head = nullptr;
}
// initial general allocations
{
auto& info = s_kmalloc_info;
auto* node = info.first();
node->set_end(info.end);
node->set_align(0);
node->set_used(false);
}
}
void kmalloc_dump_info()
{
kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_base, s_kmalloc_end);
kprintln(" used: 0x{8H}", s_kmalloc_used);
kprintln(" free: 0x{8H}", s_kmalloc_free);
kprintln("kmalloc: 0x{8H}->0x{8H}", s_kmalloc_info.base, s_kmalloc_info.end);
kprintln(" used: 0x{8H}", s_kmalloc_info.used);
kprintln(" free: 0x{8H}", s_kmalloc_info.free);
kprintln("kmalloc eternal: 0x{8H}->0x{8H}", s_kmalloc_eternal_base, s_kmalloc_eternal_end);
kprintln(" used: 0x{8H}", s_kmalloc_eternal_used);
kprintln(" free: 0x{8H}", s_kmalloc_eternal_free);
kprintln("kmalloc fixed {} byte: 0x{8H}->0x{8H}", sizeof(kmalloc_fixed_info::node), s_kmalloc_fixed_info.base, s_kmalloc_fixed_info.end);
kprintln(" used: 0x{8H}", s_kmalloc_fixed_info.used);
kprintln(" free: 0x{8H}", s_kmalloc_fixed_info.free);
}
void* kmalloc_eternal(size_t size)
static void* kmalloc_fixed()
{
if (size_t rem = size % alignof(max_align_t))
size += alignof(max_align_t) - rem;
ASSERT(s_kmalloc_eternal_ptr + size < s_kmalloc_eternal_end);
void* result = (void*)s_kmalloc_eternal_ptr;
s_kmalloc_eternal_ptr += size;
s_kmalloc_eternal_used += size;
s_kmalloc_eternal_free -= size;
return result;
auto& info = s_kmalloc_fixed_info;
if (!info.free_list_head)
return nullptr;
// allocate the node on top of free list
auto* node = info.free_list_head;
ASSERT(node->next == kmalloc_fixed_info::node::invalid);
// remove the node from free list
node->prev = kmalloc_fixed_info::node::invalid;
node->next = kmalloc_fixed_info::node::invalid;
if (info.free_list_head->prev != kmalloc_fixed_info::node::invalid)
{
info.free_list_head = info.node_at(info.free_list_head->prev);
info.free_list_head->next = kmalloc_fixed_info::node::invalid;
}
else
{
info.free_list_head = nullptr;
}
// move the node to the top of used nodes
if (info.used_list_head)
{
info.used_list_head->next = info.index_of(node);
node->prev = info.index_of(info.used_list_head);
}
info.used_list_head = node;
info.used += sizeof(kmalloc_fixed_info::node);
info.free -= sizeof(kmalloc_fixed_info::node);
return (void*)node->data;
}
static void* kmalloc_impl(size_t size, size_t align)
{
ASSERT(align % s_kmalloc_min_align == 0);
auto& info = s_kmalloc_info;
for (auto* node = info.first(); node->end() <= info.end; node = node->after())
{
if (node->used())
continue;
if (auto* next = node->after(); next->end() <= info.end)
if (!next->used())
node->set_end(next->end());
if (node->size_no_align() < size)
continue;
ptrdiff_t needed_align = 0;
if (ptrdiff_t rem = node->data_no_align() % align)
needed_align = align - rem;
if (!node->can_align(needed_align))
continue;
node->set_align(needed_align);
ASSERT(node->data() % align == 0);
if (node->size() < size)
continue;
if (node->can_fit_before())
{
node->split_in_align();
node->set_used(false);
node = node->after();
ASSERT(node->data() % align == 0);
}
node->set_used(true);
if (node->can_fit_after(size))
{
node->split_after_size(size);
node->after()->set_used(false);
ASSERT(node->data() % align == 0);
}
info.used += sizeof(kmalloc_node) + node->size_no_align();
info.free -= sizeof(kmalloc_node) + node->size_no_align();
return (void*)node->data();
}
return nullptr;
}
void* kmalloc(size_t size)
{
return kmalloc(size, s_kmalloc_default_align);
return kmalloc(size, s_kmalloc_min_align);
}
void* kmalloc(size_t size, size_t align)
{
if (size == 0 || size >= s_kmalloc_size)
kmalloc_info& info = s_kmalloc_info;
if (size == 0 || size >= info.size)
return nullptr;
if (align == 0)
align = s_kmalloc_chunk_size;
ASSERT(align);
if (align < s_kmalloc_chunk_size || align % s_kmalloc_chunk_size)
if (align % s_kmalloc_min_align)
{
size_t new_align = BAN::Math::lcm(align, s_kmalloc_chunk_size);
dwarnln("kmalloc asked to align to {}, aliging to {} instead", align, new_align);
size_t new_align = BAN::Math::lcm(align, s_kmalloc_min_align);
dwarnln("Asked to align to {}, aliging to {} instead", align, new_align);
align = new_align;
}
size_t needed_chunks = (size - 1) / s_kmalloc_chunk_size + 1 + s_kmalloc_chunks_per_size;
for (size_t i = 0; i < s_kmalloc_total_chunks - needed_chunks; i++)
{
if (chunk_address(i + s_kmalloc_chunks_per_size) % align)
continue;
// if the size fits into fixed node, we will try to use that since it is faster
if (align == s_kmalloc_min_align && size < sizeof(kmalloc_fixed_info::node::data))
if (void* result = kmalloc_fixed())
return result;
bool free = true;
for (size_t j = 0; j < needed_chunks; j++)
{
if (is_kmalloc_chunk_used(i + j))
{
free = false;
i += j;
break;
}
}
if (free)
{
*(size_t*)chunk_address(i) = needed_chunks;
for (size_t j = 0; j < needed_chunks; j++)
s_kmalloc_bitmap[(i + j) / 8] |= (1 << ((i + j) % 8));
s_kmalloc_used += needed_chunks * s_kmalloc_chunk_size;
s_kmalloc_free -= needed_chunks * s_kmalloc_chunk_size;
return (void*)chunk_address(i + s_kmalloc_chunks_per_size);
}
}
dwarnln("Could not allocate {} bytes", size);
return nullptr;
return kmalloc_impl(size, align);
}
void kfree(void* address)
{
if (!address)
if (address == nullptr)
return;
ASSERT(((uintptr_t)address % s_kmalloc_chunk_size) == 0);
ASSERT(s_kmalloc_base <= (uintptr_t)address && (uintptr_t)address < s_kmalloc_end);
size_t first_chunk = ((uintptr_t)address - s_kmalloc_base) / s_kmalloc_chunk_size - s_kmalloc_chunks_per_size;
ASSERT(is_kmalloc_chunk_used(first_chunk));
uintptr_t address_uint = (uintptr_t)address;
ASSERT(address_uint % s_kmalloc_min_align == 0);
if (s_kmalloc_fixed_info.base <= address_uint && address_uint < s_kmalloc_fixed_info.end)
{
auto& info = s_kmalloc_fixed_info;
ASSERT(info.used_list_head);
// get node from fixed info buffer
auto* node = (kmalloc_fixed_info::node*)address;
ASSERT(node->next < info.node_count || node->next == kmalloc_fixed_info::node::invalid);
ASSERT(node->prev < info.node_count || node->prev == kmalloc_fixed_info::node::invalid);
// remove from used list
if (node->prev != kmalloc_fixed_info::node::invalid)
info.node_at(node->prev)->next = node->next;
if (node->next != kmalloc_fixed_info::node::invalid)
info.node_at(node->next)->prev = node->prev;
if (info.used_list_head == node)
info.used_list_head = info.used_list_head->prev != kmalloc_fixed_info::node::invalid ? info.node_at(info.used_list_head->prev) : nullptr;
// add to free list
node->next = kmalloc_fixed_info::node::invalid;
node->prev = kmalloc_fixed_info::node::invalid;
if (info.free_list_head)
{
info.free_list_head->next = info.index_of(node);
node->prev = info.index_of(info.free_list_head);
}
info.free_list_head = node;
info.used -= sizeof(kmalloc_fixed_info::node);
info.free += sizeof(kmalloc_fixed_info::node);
}
else if (s_kmalloc_info.base <= address_uint && address_uint < s_kmalloc_info.end)
{
auto& info = s_kmalloc_info;
auto* node = info.from_address(address);
ASSERT(node && node->data() == (uintptr_t)address);
ASSERT(node->used());
ptrdiff_t size = node->size_no_align();
if (auto* next = node->after(); next->end() <= info.end)
if (!next->used())
node->set_end(node->after()->end());
node->set_used(false);
info.used -= sizeof(kmalloc_node) + size;
info.free += sizeof(kmalloc_node) + size;
}
else
{
Kernel::panic("Trying to free a pointer outsize of kmalloc memory");
}
size_t size = *(size_t*)chunk_address(first_chunk);
for (size_t i = 0; i < size; i++)
s_kmalloc_bitmap[(first_chunk + i) / 8] &= ~(1 << ((first_chunk + i) % 8));
s_kmalloc_used -= size * s_kmalloc_chunk_size;
s_kmalloc_free += size * s_kmalloc_chunk_size;
}