From 4af9699b22e68075df2d660bb29f322d67a691e1 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Sun, 11 Jan 2026 03:06:39 +0200 Subject: [PATCH] Kernel: Only save/load sse state when it is used There is no need to save and load sse state on every interrupt. Instead we can use CR0.TS to make threads trigger an interrupt when they use sse instructions. This can be used to only save and load sse state when needed. Processor now keeps track of its current "sse thread" and the scheduler either enabled or disabled sse based on which thread it is starting up. When a thread dies, it checks if it was the current sse thread to avoid use after free bugs. When load balancing, processor has to save the thread's sse state before sending it to a new processor (if it was the current sse thread). This ensures thread's sse state will be correct when the new processor ends up loading it. --- kernel/arch/i686/Thread.S | 2 - kernel/arch/x86_64/Thread.S | 2 - kernel/include/kernel/Processor.h | 20 ++++++++ kernel/kernel/IDT.cpp | 77 ++++++++++++++++--------------- kernel/kernel/Processor.cpp | 2 + kernel/kernel/Scheduler.cpp | 12 +++++ kernel/kernel/Thread.cpp | 20 ++++++-- 7 files changed, 89 insertions(+), 46 deletions(-) diff --git a/kernel/arch/i686/Thread.S b/kernel/arch/i686/Thread.S index de3b9f98..bd74655a 100644 --- a/kernel/arch/i686/Thread.S +++ b/kernel/arch/i686/Thread.S @@ -34,8 +34,6 @@ start_kernel_thread: .global start_userspace_thread start_userspace_thread: - call load_thread_sse - call get_thread_start_sp movl %eax, %esp diff --git a/kernel/arch/x86_64/Thread.S b/kernel/arch/x86_64/Thread.S index 1f58e228..dfd09801 100644 --- a/kernel/arch/x86_64/Thread.S +++ b/kernel/arch/x86_64/Thread.S @@ -27,8 +27,6 @@ start_kernel_thread: .global start_userspace_thread start_userspace_thread: - call load_thread_sse - call get_thread_start_sp movq %rax, %rsp diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h index 66b85c26..d7ea6d16 100644 --- a/kernel/include/kernel/Processor.h +++ b/kernel/include/kernel/Processor.h @@ -120,6 +120,9 @@ namespace Kernel static void update_tsc(); static uint64_t ns_since_boot_tsc(); + static Thread* get_current_sse_thread() { return read_gs_sized(offsetof(Processor, m_sse_thread)); }; + static void set_current_sse_thread(Thread* thread) { write_gs_sized(offsetof(Processor, m_sse_thread), thread); }; + static paddr_t shared_page_paddr() { return s_shared_page_paddr; } static volatile API::SharedPage& shared_page() { return *reinterpret_cast(s_shared_page_vaddr); } @@ -133,6 +136,21 @@ namespace Kernel static void load_fsbase(); static void load_gsbase(); + static void disable_sse() + { + uintptr_t dummy; +#if ARCH(x86_64) + asm volatile("movq %%cr0, %0; orq $0x08, %0; movq %0, %%cr0" : "=r"(dummy)); +#elif ARCH(i686) + asm volatile("movl %%cr0, %0; orl $0x08, %0; movl %0, %%cr0" : "=r"(dummy)); +#endif + } + + static void enable_sse() + { + asm volatile("clts"); + } + private: Processor() = default; ~Processor() { ASSERT_NOT_REACHED(); } @@ -194,6 +212,8 @@ namespace Kernel vaddr_t m_thread_syscall_stack; + Thread* m_sse_thread { nullptr }; + static constexpr size_t s_stack_size { 4096 }; void* m_stack { nullptr }; diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index 03b532e1..2350d5ba 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -177,34 +177,47 @@ namespace Kernel const pid_t tid = Thread::current_tid(); const pid_t pid = (tid && Thread::current().has_process()) ? Process::current().pid() : 0; - const char* process_name = ""; - - if (tid) + switch (isr) { - auto& thread = Thread::current(); - thread.save_sse(); - - if (isr == ISR::PageFault && Thread::current().is_userspace()) + case ISR::PageFault: { - if (pid) + if (pid == 0 || !Thread::current().is_userspace()) + break; + + PageFaultError page_fault_error; + page_fault_error.raw = error; + + Processor::set_interrupt_state(InterruptState::Enabled); + auto result = Process::current().allocate_page_for_demand_paging(regs->cr2, page_fault_error.write, page_fault_error.instruction); + Processor::set_interrupt_state(InterruptState::Disabled); + + if (result.is_error()) { - PageFaultError page_fault_error; - page_fault_error.raw = error; - - Processor::set_interrupt_state(InterruptState::Enabled); - auto result = Process::current().allocate_page_for_demand_paging(regs->cr2, page_fault_error.write, page_fault_error.instruction); - Processor::set_interrupt_state(InterruptState::Disabled); - - if (!result.is_error() && result.value()) - goto done; - - if (result.is_error()) - { - dwarnln("Demand paging: {}", result.error()); - Thread::current().handle_signal(SIGKILL, {}); - goto done; - } + dwarnln("Demand paging: {}", result.error()); + Thread::current().handle_signal(SIGKILL, {}); + return; } + + if (result.value()) + return; + + break; + } + case ISR::DeviceNotAvailable: + { + if (pid == 0 || !Thread::current().is_userspace()) + break; + + Processor::enable_sse(); + + if (auto* sse_thread = Processor::get_current_sse_thread()) + sse_thread->save_sse(); + + auto* current_thread = &Thread::current(); + current_thread->load_sse(); + Processor::set_current_sse_thread(current_thread); + + return; } } @@ -225,8 +238,9 @@ namespace Kernel ); } - if (Thread::current().has_process()) - process_name = Process::current().name(); + const char* process_name = (tid && Thread::current().has_process()) + ? Process::current().name() + : nullptr; #if ARCH(x86_64) dwarnln( @@ -320,9 +334,6 @@ namespace Kernel } ASSERT(Thread::current().state() != Thread::State::Terminated); - - done: - Thread::current().load_sse(); } extern "C" void cpp_ipi_handler() @@ -343,8 +354,6 @@ namespace Kernel asm volatile("cli; 1: hlt; jmp 1b"); } - Thread::current().save_sse(); - ASSERT(InterruptController::get().is_in_service(IRQ_TIMER - IRQ_VECTOR_BASE)); InterruptController::get().eoi(IRQ_TIMER - IRQ_VECTOR_BASE); @@ -356,8 +365,6 @@ namespace Kernel auto& current_thread = Thread::current(); if (current_thread.can_add_signal_to_execute()) current_thread.handle_signal(); - - Thread::current().load_sse(); } extern "C" void cpp_irq_handler(uint32_t irq) @@ -375,8 +382,6 @@ namespace Kernel if (!InterruptController::get().is_in_service(irq)) return; - Thread::current().save_sse(); - InterruptController::get().eoi(irq); if (auto* handler = s_interruptables[irq]) handler->handle_irq(); @@ -390,8 +395,6 @@ namespace Kernel Processor::scheduler().reschedule_if_idle(); ASSERT(Thread::current().state() != Thread::State::Terminated); - - Thread::current().load_sse(); } void IDT::register_interrupt_handler(uint8_t index, void (*handler)(), uint8_t ist) diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp index 1ff9d8fd..d5e27a37 100644 --- a/kernel/kernel/Processor.cpp +++ b/kernel/kernel/Processor.cpp @@ -147,6 +147,8 @@ namespace Kernel ASSERT(processor.m_idt); processor.idt().load(); + disable_sse(); + return processor; } diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index 6cb7953e..0decdfde 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -294,6 +294,10 @@ namespace Kernel Processor::load_segments(); } + (Processor::get_current_sse_thread() == thread) + ? Processor::enable_sse() + : Processor::disable_sse(); + *yield_registers = thread->yield_registers(); m_current->last_start_ns = SystemTimer::get().ns_since_boot(); @@ -568,6 +572,14 @@ namespace Kernel dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id); } + if (auto* thread = thread_info.node->thread; thread == Processor::get_current_sse_thread()) + { + Processor::enable_sse(); + thread->save_sse(); + Processor::set_current_sse_thread(nullptr); + Processor::disable_sse(); + } + thread_info.node->time_used_ns = 0; { diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index 9f690493..287562f7 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -29,11 +29,6 @@ namespace Kernel return Thread::current().yield_registers().sp; } - extern "C" void load_thread_sse() - { - Thread::current().load_sse(); - } - static pid_t s_next_tid = 1; alignas(16) static uint8_t s_default_sse_storage[512]; @@ -51,6 +46,11 @@ namespace Kernel return; } + const auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); + + Processor::enable_sse(); + const uint32_t mxcsr = 0x1F80; asm volatile( "finit;" @@ -66,6 +66,10 @@ namespace Kernel : [mxcsr]"m"(mxcsr) ); + Processor::disable_sse(); + + Processor::set_interrupt_state(state); + s_default_sse_storage_initialized = true; } @@ -261,6 +265,12 @@ namespace Kernel Thread::~Thread() { + if (Processor::get_current_sse_thread() == this) + { + Processor::set_current_sse_thread(nullptr); + Processor::disable_sse(); + } + if (m_delete_process) { ASSERT(m_process);