Kernel: optimize yielding

Doing a yield no longer raises a software interrupt. Instead it just
saves all the callee saved registers, ip, sp and return value. Because
yield is only called in the kernel, it can just restore registers and
jump to the target address. There is never a need to use iret :)
This commit is contained in:
2026-01-11 01:31:09 +02:00
parent 83e5cb81e8
commit 35c97e2ff8
13 changed files with 109 additions and 118 deletions

View File

@@ -325,14 +325,6 @@ namespace Kernel
Thread::current().load_sse();
}
extern "C" void cpp_yield_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
{
// yield is raised through kernel software interrupt
ASSERT(!InterruptController::get().is_in_service(IRQ_YIELD - IRQ_VECTOR_BASE));
ASSERT(!GDT::is_user_segment(interrupt_stack->cs));
Processor::scheduler().reschedule(interrupt_stack, interrupt_registers);
}
extern "C" void cpp_ipi_handler()
{
ASSERT(InterruptController::get().is_in_service(IRQ_IPI - IRQ_VECTOR_BASE));
@@ -477,7 +469,6 @@ namespace Kernel
static_assert(DoubleFault == 8);
#endif
idt->register_interrupt_handler(IRQ_YIELD, asm_yield_handler);
idt->register_interrupt_handler(IRQ_IPI, asm_ipi_handler);
idt->register_interrupt_handler(IRQ_TIMER, asm_timer_handler);
#if ARCH(i686)

View File

@@ -36,6 +36,7 @@ namespace Kernel
static BAN::Array<ProcessorID, 0xFF> s_processor_ids { PROCESSOR_NONE };
extern "C" void asm_syscall_handler();
extern "C" void asm_yield_trampoline(uintptr_t);
ProcessorID Processor::read_processor_id()
{
@@ -556,33 +557,7 @@ namespace Kernel
if (!scheduler().is_idle())
Thread::current().set_cpu_time_stop();
#if ARCH(x86_64)
asm volatile(
"movq %%rsp, %%rcx;"
"movq %[load_sp], %%rsp;"
"int %[yield];"
"movq %%rcx, %%rsp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(static_cast<int>(IRQ_YIELD)) // WTF GCC 15
: "memory", "rcx"
);
#elif ARCH(i686)
asm volatile(
"movl %%esp, %%ecx;"
"movl %[load_sp], %%esp;"
"int %[yield];"
"movl %%ecx, %%esp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(static_cast<int>(IRQ_YIELD)) // WTF GCC 15
: "memory", "ecx"
);
#else
#error
#endif
asm_yield_trampoline(Processor::current_stack_top());
processor_info.m_start_ns = SystemTimer::get().ns_since_boot();

View File

@@ -207,7 +207,7 @@ namespace Kernel
m_most_loaded_threads.back().queue = nullptr;
}
void Scheduler::reschedule(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
void Scheduler::reschedule(YieldRegisters* yield_registers)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
@@ -232,8 +232,7 @@ namespace Kernel
case Thread::State::Executing:
{
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
m_current->thread->interrupt_stack() = *interrupt_stack;
m_current->thread->interrupt_registers() = *interrupt_registers;
m_current->thread->yield_registers() = *yield_registers;
m_current->time_used_ns += current_ns - m_current->last_start_ns;
add_current_to_most_loaded(m_current->blocked ? &m_block_queue : &m_run_queue);
if (!m_current->blocked)
@@ -267,8 +266,7 @@ namespace Kernel
{
if (&PageTable::current() != &PageTable::kernel())
PageTable::kernel().load();
*interrupt_stack = m_idle_thread->interrupt_stack();
*interrupt_registers = m_idle_thread->interrupt_registers();
*yield_registers = m_idle_thread->yield_registers();
m_idle_thread->m_state = Thread::State::Executing;
m_idle_start_ns = SystemTimer::get().ns_since_boot();
return;
@@ -296,8 +294,7 @@ namespace Kernel
Processor::load_segments();
}
*interrupt_stack = thread->interrupt_stack();
*interrupt_registers = thread->interrupt_registers();
*yield_registers = thread->yield_registers();
m_current->last_start_ns = SystemTimer::get().ns_since_boot();
}
@@ -333,6 +330,11 @@ namespace Kernel
Processor::yield();
}
extern "C" void scheduler_on_yield(YieldRegisters* yield_registers)
{
Processor::scheduler().reschedule(yield_registers);
}
void Scheduler::timer_interrupt()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);

View File

@@ -26,7 +26,7 @@ namespace Kernel
extern "C" uintptr_t get_thread_start_sp()
{
return Thread::current().interrupt_stack().sp;
return Thread::current().yield_registers().sp;
}
extern "C" void load_thread_sse()
@@ -179,13 +179,9 @@ namespace Kernel
write_to_stack(sp, data);
write_to_stack(sp, entry);
thread->m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
thread->m_interrupt_stack.cs = 0x08;
thread->m_interrupt_stack.flags = 0x002;
thread->m_interrupt_stack.sp = sp;
thread->m_interrupt_stack.ss = 0x10;
memset(&thread->m_interrupt_registers, 0, sizeof(InterruptRegisters));
thread->m_yield_registers = {};
thread->m_yield_registers.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
thread->m_yield_registers.sp = sp;
thread_deleter.disable();
@@ -347,20 +343,13 @@ namespace Kernel
thread->m_state = State::NotStarted;
thread->m_interrupt_stack.ip = ip;
thread->m_interrupt_stack.cs = 0x08;
thread->m_interrupt_stack.flags = 0x002;
thread->m_interrupt_stack.sp = sp;
thread->m_interrupt_stack.ss = 0x10;
save_sse();
memcpy(thread->m_sse_storage, m_sse_storage, sizeof(m_sse_storage));
#if ARCH(x86_64)
thread->m_interrupt_registers.rax = 0;
#elif ARCH(i686)
thread->m_interrupt_registers.eax = 0;
#endif
thread->m_yield_registers = {};
thread->m_yield_registers.ip = ip;
thread->m_yield_registers.sp = sp;
thread->m_yield_registers.ret = 0;
thread_deleter.disable();
@@ -498,13 +487,9 @@ namespace Kernel
write_to_stack(cur_sp, ip);
});
m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_userspace_thread);
m_interrupt_stack.cs = 0x08;
m_interrupt_stack.flags = 0x002;
m_interrupt_stack.sp = kernel_stack_top() - 5 * sizeof(uintptr_t);
m_interrupt_stack.ss = 0x10;
memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters));
m_yield_registers = {};
m_yield_registers.ip = reinterpret_cast<vaddr_t>(start_userspace_thread);
m_yield_registers.sp = kernel_stack_top() - 5 * sizeof(uintptr_t);
}
void Thread::setup_process_cleanup()
@@ -539,13 +524,9 @@ namespace Kernel
write_to_stack(sp, entry);
});
m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
m_interrupt_stack.cs = 0x08;
m_interrupt_stack.flags = 0x002;
m_interrupt_stack.sp = kernel_stack_top() - 4 * sizeof(uintptr_t);
m_interrupt_stack.ss = 0x10;
memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters));
m_yield_registers = {};
m_yield_registers.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
m_yield_registers.sp = kernel_stack_top() - 4 * sizeof(uintptr_t);
}
bool Thread::is_interrupted_by_signal(bool skip_stop_and_cont) const
@@ -811,7 +792,7 @@ namespace Kernel
const vaddr_t stack_bottom = reinterpret_cast<vaddr_t>(m_signal_alt_stack.ss_sp);
const vaddr_t stack_top = stack_bottom + m_signal_alt_stack.ss_size;
const vaddr_t sp = m_interrupt_stack.sp;
const vaddr_t sp = m_yield_registers.sp;
return stack_bottom <= sp && sp <= stack_top;
}