Kernel: Only save/load sse state when it is used

There is no need to save and load sse state on every interrupt. Instead
we can use CR0.TS to make threads trigger an interrupt when they use sse
instructions. This can be used to only save and load sse state when
needed.

Processor now keeps track of its current "sse thread" and the scheduler
either enabled or disabled sse based on which thread it is starting up.
When a thread dies, it checks if it was the current sse thread to avoid
use after free bugs. When load balancing, processor has to save the
thread's sse state before sending it to a new processor (if it was the
current sse thread). This ensures thread's sse state will be correct
when the new processor ends up loading it.
This commit is contained in:
Bananymous 2026-01-11 03:06:39 +02:00
parent 35c97e2ff8
commit 4af9699b22
7 changed files with 89 additions and 46 deletions

View File

@ -34,8 +34,6 @@ start_kernel_thread:
.global start_userspace_thread
start_userspace_thread:
call load_thread_sse
call get_thread_start_sp
movl %eax, %esp

View File

@ -27,8 +27,6 @@ start_kernel_thread:
.global start_userspace_thread
start_userspace_thread:
call load_thread_sse
call get_thread_start_sp
movq %rax, %rsp

View File

@ -120,6 +120,9 @@ namespace Kernel
static void update_tsc();
static uint64_t ns_since_boot_tsc();
static Thread* get_current_sse_thread() { return read_gs_sized<Thread*>(offsetof(Processor, m_sse_thread)); };
static void set_current_sse_thread(Thread* thread) { write_gs_sized<Thread*>(offsetof(Processor, m_sse_thread), thread); };
static paddr_t shared_page_paddr() { return s_shared_page_paddr; }
static volatile API::SharedPage& shared_page() { return *reinterpret_cast<API::SharedPage*>(s_shared_page_vaddr); }
@ -133,6 +136,21 @@ namespace Kernel
static void load_fsbase();
static void load_gsbase();
static void disable_sse()
{
uintptr_t dummy;
#if ARCH(x86_64)
asm volatile("movq %%cr0, %0; orq $0x08, %0; movq %0, %%cr0" : "=r"(dummy));
#elif ARCH(i686)
asm volatile("movl %%cr0, %0; orl $0x08, %0; movl %0, %%cr0" : "=r"(dummy));
#endif
}
static void enable_sse()
{
asm volatile("clts");
}
private:
Processor() = default;
~Processor() { ASSERT_NOT_REACHED(); }
@ -194,6 +212,8 @@ namespace Kernel
vaddr_t m_thread_syscall_stack;
Thread* m_sse_thread { nullptr };
static constexpr size_t s_stack_size { 4096 };
void* m_stack { nullptr };

View File

@ -177,17 +177,13 @@ namespace Kernel
const pid_t tid = Thread::current_tid();
const pid_t pid = (tid && Thread::current().has_process()) ? Process::current().pid() : 0;
const char* process_name = "";
switch (isr)
{
case ISR::PageFault:
{
if (pid == 0 || !Thread::current().is_userspace())
break;
if (tid)
{
auto& thread = Thread::current();
thread.save_sse();
if (isr == ISR::PageFault && Thread::current().is_userspace())
{
if (pid)
{
PageFaultError page_fault_error;
page_fault_error.raw = error;
@ -195,16 +191,33 @@ namespace Kernel
auto result = Process::current().allocate_page_for_demand_paging(regs->cr2, page_fault_error.write, page_fault_error.instruction);
Processor::set_interrupt_state(InterruptState::Disabled);
if (!result.is_error() && result.value())
goto done;
if (result.is_error())
{
dwarnln("Demand paging: {}", result.error());
Thread::current().handle_signal(SIGKILL, {});
goto done;
return;
}
if (result.value())
return;
break;
}
case ISR::DeviceNotAvailable:
{
if (pid == 0 || !Thread::current().is_userspace())
break;
Processor::enable_sse();
if (auto* sse_thread = Processor::get_current_sse_thread())
sse_thread->save_sse();
auto* current_thread = &Thread::current();
current_thread->load_sse();
Processor::set_current_sse_thread(current_thread);
return;
}
}
@ -225,8 +238,9 @@ namespace Kernel
);
}
if (Thread::current().has_process())
process_name = Process::current().name();
const char* process_name = (tid && Thread::current().has_process())
? Process::current().name()
: nullptr;
#if ARCH(x86_64)
dwarnln(
@ -320,9 +334,6 @@ namespace Kernel
}
ASSERT(Thread::current().state() != Thread::State::Terminated);
done:
Thread::current().load_sse();
}
extern "C" void cpp_ipi_handler()
@ -343,8 +354,6 @@ namespace Kernel
asm volatile("cli; 1: hlt; jmp 1b");
}
Thread::current().save_sse();
ASSERT(InterruptController::get().is_in_service(IRQ_TIMER - IRQ_VECTOR_BASE));
InterruptController::get().eoi(IRQ_TIMER - IRQ_VECTOR_BASE);
@ -356,8 +365,6 @@ namespace Kernel
auto& current_thread = Thread::current();
if (current_thread.can_add_signal_to_execute())
current_thread.handle_signal();
Thread::current().load_sse();
}
extern "C" void cpp_irq_handler(uint32_t irq)
@ -375,8 +382,6 @@ namespace Kernel
if (!InterruptController::get().is_in_service(irq))
return;
Thread::current().save_sse();
InterruptController::get().eoi(irq);
if (auto* handler = s_interruptables[irq])
handler->handle_irq();
@ -390,8 +395,6 @@ namespace Kernel
Processor::scheduler().reschedule_if_idle();
ASSERT(Thread::current().state() != Thread::State::Terminated);
Thread::current().load_sse();
}
void IDT::register_interrupt_handler(uint8_t index, void (*handler)(), uint8_t ist)

View File

@ -147,6 +147,8 @@ namespace Kernel
ASSERT(processor.m_idt);
processor.idt().load();
disable_sse();
return processor;
}

View File

@ -294,6 +294,10 @@ namespace Kernel
Processor::load_segments();
}
(Processor::get_current_sse_thread() == thread)
? Processor::enable_sse()
: Processor::disable_sse();
*yield_registers = thread->yield_registers();
m_current->last_start_ns = SystemTimer::get().ns_since_boot();
@ -568,6 +572,14 @@ namespace Kernel
dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id);
}
if (auto* thread = thread_info.node->thread; thread == Processor::get_current_sse_thread())
{
Processor::enable_sse();
thread->save_sse();
Processor::set_current_sse_thread(nullptr);
Processor::disable_sse();
}
thread_info.node->time_used_ns = 0;
{

View File

@ -29,11 +29,6 @@ namespace Kernel
return Thread::current().yield_registers().sp;
}
extern "C" void load_thread_sse()
{
Thread::current().load_sse();
}
static pid_t s_next_tid = 1;
alignas(16) static uint8_t s_default_sse_storage[512];
@ -51,6 +46,11 @@ namespace Kernel
return;
}
const auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
Processor::enable_sse();
const uint32_t mxcsr = 0x1F80;
asm volatile(
"finit;"
@ -66,6 +66,10 @@ namespace Kernel
: [mxcsr]"m"(mxcsr)
);
Processor::disable_sse();
Processor::set_interrupt_state(state);
s_default_sse_storage_initialized = true;
}
@ -261,6 +265,12 @@ namespace Kernel
Thread::~Thread()
{
if (Processor::get_current_sse_thread() == this)
{
Processor::set_current_sse_thread(nullptr);
Processor::disable_sse();
}
if (m_delete_process)
{
ASSERT(m_process);