Kernel: Move TLB invalidation out of standard SMPMessages

This makes accessing TLB messages much faster as TLB flushes are very
frequent in comparison to other messages
This commit is contained in:
2026-05-19 23:51:01 +03:00
parent fb9c67ab15
commit 24c37e7381
2 changed files with 108 additions and 40 deletions

View File

@@ -29,6 +29,13 @@ namespace Kernel
BAN_NON_MOVABLE(Processor);
public:
struct TLBEntry
{
vaddr_t vaddr;
size_t page_count;
class PageTable* page_table;
};
struct SMPMessage
{
enum class Type
@@ -43,12 +50,7 @@ namespace Kernel
Type type;
union
{
struct
{
uintptr_t vaddr;
size_t page_count;
void* page_table;
} flush_tlb;
TLBEntry flush_tlb;
SchedulerQueue::Node* new_thread;
SchedulerQueue::Node* unblock_thread;
bool dummy;
@@ -178,6 +180,9 @@ namespace Kernel
asm volatile("mov %[value], %%gs:%a[offset]" :: [value]"r"(value), [offset]"ir"(offset) : "memory");
}
void lock_tlb_lock();
void unlock_tlb_lock();
private:
static ProcessorID s_bsp_id;
static BAN::Atomic<uint8_t> s_processor_count;
@@ -211,6 +216,11 @@ namespace Kernel
BAN::Atomic<SMPMessage*> m_smp_free { nullptr };
SMPMessage* m_smp_message_storage { nullptr };
BAN::Atomic<bool> m_tlb_lock { false };
size_t m_tlb_entry_count { 0 };
BAN::Array<TLBEntry, 32> m_tlb_entries;
bool m_tlb_global { false };
void* m_current_page_table { nullptr };
friend class BAN::Array<Processor, 0xFF>;

View File

@@ -351,6 +351,51 @@ namespace Kernel
handle_smp_messages();
}
void Processor::load_segments()
{
load_fsbase();
load_gsbase();
}
void Processor::load_fsbase()
{
const auto addr = scheduler().current_thread().get_fsbase();
#if ARCH(x86_64)
const uint32_t addr_hi = addr >> 32;
const uint32_t addr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_FS_BASE));
#elif ARCH(i686)
gdt().set_fsbase(addr);
#endif
}
void Processor::load_gsbase()
{
const auto addr = scheduler().current_thread().get_gsbase();
#if ARCH(x86_64)
const uint32_t addr_hi = addr >> 32;
const uint32_t addr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_KERNEL_GS_BASE));
#elif ARCH(i686)
gdt().set_gsbase(addr);
#endif
}
void Processor::lock_tlb_lock()
{
bool expected = false;
while (!m_tlb_lock.compare_exchange(expected, true, BAN::MemoryOrder::memory_order_acquire))
{
__builtin_ia32_pause();
expected = false;
}
}
void Processor::unlock_tlb_lock()
{
m_tlb_lock.store(false, BAN::MemoryOrder::memory_order_release);
}
void Processor::handle_smp_messages()
{
auto state = get_interrupt_state();
@@ -386,10 +431,7 @@ namespace Kernel
switch (message->type)
{
case SMPMessage::Type::FlushTLB:
if (message->flush_tlb.page_table && message->flush_tlb.page_table != processor.m_current_page_table)
break;
PageTable::current().invalidate_range(message->flush_tlb.vaddr, message->flush_tlb.page_count, false);
break;
ASSERT_NOT_REACHED();
case SMPMessage::Type::NewThread:
processor.m_scheduler->add_thread(message->new_thread);
break;
@@ -420,39 +462,32 @@ namespace Kernel
last_handled->next = processor.m_smp_free;
}
{
processor.lock_tlb_lock();
const size_t tlb_entry_count = processor.m_tlb_entry_count;
const auto tlb_entries = processor.m_tlb_entries;
const bool tlb_global = processor.m_tlb_global;
processor.m_tlb_entry_count = 0;
processor.m_tlb_global = false;
processor.unlock_tlb_lock();
auto& page_table = PageTable::current();
size_t pages = 0;
for (size_t i = 0; i < tlb_entry_count; i++)
if (tlb_entries[i].page_table == nullptr || tlb_entries[i].page_table == &page_table)
pages += tlb_entries[i].page_count;
if (pages >= PageTable::full_tlb_flush_threshold || tlb_entry_count >= processor.m_tlb_entries.size())
page_table.invalidate_full_address_space(tlb_global);
else for (size_t i = 0; i < tlb_entry_count; i++)
if (tlb_entries[i].page_table == nullptr || tlb_entries[i].page_table == &page_table)
page_table.invalidate_range(tlb_entries[i].vaddr, tlb_entries[i].page_count, false);
}
set_interrupt_state(state);
}
void Processor::load_segments()
{
load_fsbase();
load_gsbase();
}
void Processor::load_fsbase()
{
const auto addr = scheduler().current_thread().get_fsbase();
#if ARCH(x86_64)
const uint32_t addr_hi = addr >> 32;
const uint32_t addr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_FS_BASE));
#elif ARCH(i686)
gdt().set_fsbase(addr);
#endif
}
void Processor::load_gsbase()
{
const auto addr = scheduler().current_thread().get_gsbase();
#if ARCH(x86_64)
const uint32_t addr_hi = addr >> 32;
const uint32_t addr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_KERNEL_GS_BASE));
#elif ARCH(i686)
gdt().set_gsbase(addr);
#endif
}
void Processor::send_smp_message(ProcessorID processor_id, const SMPMessage& message, bool send_ipi)
{
auto state = get_interrupt_state();
@@ -460,6 +495,29 @@ namespace Kernel
auto& processor = s_processors[processor_id.m_id];
if (message.type == SMPMessage::Type::FlushTLB)
{
processor.lock_tlb_lock();
const auto& tlb_msg = message.flush_tlb;
processor.m_tlb_global |= (tlb_msg.page_table == nullptr);
if (processor.m_tlb_entry_count < processor.m_tlb_entries.size())
{
processor.m_tlb_entries[processor.m_tlb_entry_count++] = {
.vaddr = tlb_msg.vaddr,
.page_count = tlb_msg.page_count,
.page_table = static_cast<PageTable*>(tlb_msg.page_table),
};
}
processor.unlock_tlb_lock();
set_interrupt_state(state);
return;
}
// find a slot for message
auto* storage = processor.m_smp_free.exchange(nullptr);
while (storage == nullptr)