From 001e95f973fcd0af92adb410b9645df6cc3daf34 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Wed, 3 Jan 2024 02:06:49 +0200 Subject: [PATCH] Kernel: Optimize sse saving/loading Only save and load sse when new thread is trying to execute sse instruction. There is no need to do that every time we enter kernel. --- kernel/arch/x86_64/IDT.cpp | 33 +++++++++++++++++---------------- kernel/include/kernel/Thread.h | 7 ++++--- kernel/kernel/Scheduler.cpp | 11 +++++++++++ kernel/kernel/Syscall.cpp | 8 -------- kernel/kernel/Thread.cpp | 29 +++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/kernel/arch/x86_64/IDT.cpp b/kernel/arch/x86_64/IDT.cpp index 2f1a963c17..0139c90496 100644 --- a/kernel/arch/x86_64/IDT.cpp +++ b/kernel/arch/x86_64/IDT.cpp @@ -218,6 +218,23 @@ namespace Kernel::IDT } } } +#if __enable_sse + else if (isr == ISR::DeviceNotAvailable) + { + asm volatile( + "movq %cr0, %rax;" + "andq $~(1 << 3), %rax;" + "movq %rax, %cr0;" + ); + if (auto* current = &Thread::current(); current != Thread::sse_thread()) + { + if (auto* sse = Thread::sse_thread()) + sse->save_sse(); + current->load_sse(); + } + goto done; + } +#endif } if (PageTable::current().get_page_flags(interrupt_stack.rip & PAGE_ADDR_MASK) & PageTable::Flags::Present) @@ -259,7 +276,6 @@ namespace Kernel::IDT int signal = 0; switch (isr) { - case ISR::DeviceNotAvailable: case ISR::DivisionError: case ISR::SIMDFloatingPointException: case ISR::x87FloatingPointException: @@ -290,22 +306,11 @@ namespace Kernel::IDT ASSERT(Thread::current().state() != Thread::State::Terminated); done: -#if __enable_sse - if (from_userspace) - { - ASSERT(Thread::current().state() == Thread::State::Executing); - Thread::current().load_sse(); - } -#endif return; } extern "C" void cpp_irq_handler(uint64_t irq, InterruptStack& interrupt_stack) { -#if __enable_sse - Thread::current().save_sse(); -#endif - if (Scheduler::current_tid()) { Thread::current().set_return_rsp(interrupt_stack.rsp); @@ -326,10 +331,6 @@ done: Scheduler::get().reschedule_if_idling(); ASSERT(Thread::current().state() != Thread::State::Terminated); - -#if __enable_sse - Thread::current().load_sse(); -#endif } static void flush_idt() diff --git a/kernel/include/kernel/Thread.h b/kernel/include/kernel/Thread.h index 7ae32ec15e..1692579d2d 100644 --- a/kernel/include/kernel/Thread.h +++ b/kernel/include/kernel/Thread.h @@ -75,7 +75,7 @@ namespace Kernel vaddr_t interrupt_stack_base() const { return m_interrupt_stack ? m_interrupt_stack->vaddr() : 0; } size_t interrupt_stack_size() const { return m_interrupt_stack ? m_interrupt_stack->size() : 0; } - static Thread& current() ; + static Thread& current(); static pid_t current_tid(); Process& process(); @@ -87,8 +87,9 @@ namespace Kernel size_t physical_page_count() const { return virtual_page_count(); } #if __enable_sse - void save_sse() { asm volatile("fxsave %0" :: "m"(m_sse_storage)); } - void load_sse() { asm volatile("fxrstor %0" :: "m"(m_sse_storage)); } + void save_sse(); + void load_sse(); + static Thread* sse_thread(); #endif private: diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index bd5ebdc6a9..fefce50446 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -231,6 +231,17 @@ namespace Kernel Thread* current = ¤t_thread(); +#if __enable_sse + if (current != Thread::sse_thread()) + { + asm volatile( + "movq %cr0, %rax;" + "orq $(1 << 3), %rax;" + "movq %rax, %cr0" + ); + } +#endif + while (current->state() == Thread::State::Terminated) { Thread* thread = m_current_thread->thread; diff --git a/kernel/kernel/Syscall.cpp b/kernel/kernel/Syscall.cpp index 4e8b54bc82..2b5f266847 100644 --- a/kernel/kernel/Syscall.cpp +++ b/kernel/kernel/Syscall.cpp @@ -26,10 +26,6 @@ namespace Kernel Thread::current().set_return_rsp(interrupt_stack.rsp); Thread::current().set_return_rip(interrupt_stack.rip); -#if __enable_sse - Thread::current().save_sse(); -#endif - asm volatile("sti"); (void)arg1; @@ -230,10 +226,6 @@ namespace Kernel ASSERT(Kernel::Thread::current().state() == Kernel::Thread::State::Executing); -#if __enable_sse - current_thread.load_sse(); -#endif - if (ret.is_error()) return -ret.error().get_error_code(); return ret.value(); diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index 9727291677..58cabb3194 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -101,7 +101,16 @@ namespace Kernel : m_tid(tid), m_process(process) { #if __enable_sse + uintptr_t cr0; + asm volatile( + "movq %%cr0, %%rax;" + "movq %%rax, %%rbx;" + "andq $~(1 << 3), %%rax;" + "movq %%rax, %%cr0;" + : "=b"(cr0) + ); save_sse(); + asm volatile("movq %0, %%cr0" :: "r"(cr0)); #endif } @@ -361,4 +370,24 @@ namespace Kernel ASSERT_NOT_REACHED(); } +#if __enable_sse + static Thread* s_sse_thread = nullptr; + + void Thread::save_sse() + { + asm volatile("fxsave %0" :: "m"(m_sse_storage)); + } + + void Thread::load_sse() + { + asm volatile("fxrstor %0" :: "m"(m_sse_storage)); + s_sse_thread = this; + } + + Thread* Thread::sse_thread() + { + return s_sse_thread; + } +#endif + } \ No newline at end of file