From 5050047cef0683e16fc67c054a9e362a6580e850 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Fri, 29 Mar 2024 18:02:12 +0200 Subject: [PATCH] Kernel: Rewrite whole scheduler Current context saving was very hacky and dependant on compiler behaviour that was not consistent. Now we always use iret for context saving. This makes everything more clean. --- kernel/arch/x86_64/Thread.S | 45 +--- kernel/arch/x86_64/interrupts.S | 14 +- kernel/include/kernel/InterruptStack.h | 20 ++ kernel/include/kernel/Lock/Mutex.h | 4 +- kernel/include/kernel/Processor.h | 9 + kernel/include/kernel/Scheduler.h | 13 +- kernel/include/kernel/SchedulerQueue.h | 1 + kernel/include/kernel/Thread.h | 29 +- kernel/kernel/IDT.cpp | 31 +-- kernel/kernel/Networking/ARPTable.cpp | 2 +- kernel/kernel/Networking/UNIX/Socket.cpp | 2 +- kernel/kernel/Process.cpp | 30 ++- kernel/kernel/Processor.cpp | 30 +++ kernel/kernel/Scheduler.cpp | 312 +++++++++------------- kernel/kernel/Storage/ATA/AHCI/Device.cpp | 2 +- kernel/kernel/Syscall.cpp | 3 - kernel/kernel/Thread.cpp | 146 ++++++---- libc/arch/x86_64/crt0.S | 35 ++- 18 files changed, 364 insertions(+), 364 deletions(-) diff --git a/kernel/arch/x86_64/Thread.S b/kernel/arch/x86_64/Thread.S index de1e59ab..e7ec7015 100644 --- a/kernel/arch/x86_64/Thread.S +++ b/kernel/arch/x86_64/Thread.S @@ -4,36 +4,19 @@ read_ip: popq %rax jmp *%rax -exit_thread_trampoline: +# void start_thread() +.global start_kernel_thread +start_kernel_thread: + # STACK LAYOUT + # on_exit arg + # on_exit func + # entry arg + # entry func + movq 8(%rsp), %rdi - ret + movq 0(%rsp), %rsi + call *%rsi -# void start_thread(uint64_t sp, uint64_t ip) -.global start_thread -start_thread: - movq %rdi, %rsp - popq %rdi - movq $0, %rbp - pushq $exit_thread_trampoline - sti - jmp *%rsi - -# void continue_thread(uint64_t sp, uint64_t ip) -.global continue_thread -continue_thread: - movq %rdi, %rsp - movq $0, %rax - jmp *%rsi - -# void thread_userspace_trampoline(uint64_t sp, uint64_t ip, int argc, char** argv, char** envp) -.global thread_userspace_trampoline -thread_userspace_trampoline: - pushq $0x23 - pushq %rdi - pushfq - pushq $0x1B - pushq %rsi - movq %rdx, %rdi - movq %rcx, %rsi - movq %r8, %rdx - iretq + movq 24(%rsp), %rdi + movq 16(%rsp), %rsi + call *%rsi diff --git a/kernel/arch/x86_64/interrupts.S b/kernel/arch/x86_64/interrupts.S index a9aabc05..7d3e5f4d 100644 --- a/kernel/arch/x86_64/interrupts.S +++ b/kernel/arch/x86_64/interrupts.S @@ -72,9 +72,7 @@ isr_stub: irq_stub: pushaq - movq 0x78(%rsp), %rdi # irq number - movq %rsp, %rsi - addq $136, %rsi + movq 120(%rsp), %rdi # irq number call cpp_irq_handler popaq addq $16, %rsp @@ -168,7 +166,15 @@ irq 28 irq 29 irq 30 irq 31 -irq 32 + +.global asm_reschedule_handler +asm_reschedule_handler: + pushaq + leaq 120(%rsp), %rdi # interrupt stack ptr + movq %rsp, %rsi # interrupt register ptr + call cpp_reschedule_handler + popaq + iretq // arguments in RAX, RBX, RCX, RDX, RSI, RDI // System V ABI: RDI, RSI, RDX, RCX, R8, R9 diff --git a/kernel/include/kernel/InterruptStack.h b/kernel/include/kernel/InterruptStack.h index 6a112874..cd91c0fb 100644 --- a/kernel/include/kernel/InterruptStack.h +++ b/kernel/include/kernel/InterruptStack.h @@ -14,4 +14,24 @@ namespace Kernel uintptr_t ss; }; + struct InterruptRegisters + { + uintptr_t r15; + uintptr_t r14; + uintptr_t r13; + uintptr_t r12; + uintptr_t r11; + uintptr_t r10; + uintptr_t r9; + uintptr_t r8; + + uintptr_t rdi; + uintptr_t rsi; + uintptr_t rbp; + uintptr_t rbx; + uintptr_t rdx; + uintptr_t rcx; + uintptr_t rax; + }; + } diff --git a/kernel/include/kernel/Lock/Mutex.h b/kernel/include/kernel/Lock/Mutex.h index 257c1050..a4c895a2 100644 --- a/kernel/include/kernel/Lock/Mutex.h +++ b/kernel/include/kernel/Lock/Mutex.h @@ -25,7 +25,7 @@ namespace Kernel else { while (!m_locker.compare_exchange(-1, tid)) - Scheduler::get().reschedule(); + Scheduler::get().yield(); ASSERT(m_lock_depth == 0); } m_lock_depth++; @@ -81,7 +81,7 @@ namespace Kernel if (has_priority) m_queue_length++; while (!(has_priority || m_queue_length == 0) || !m_locker.compare_exchange(-1, tid)) - Scheduler::get().reschedule(); + Scheduler::get().yield(); ASSERT(m_lock_depth == 0); } m_lock_depth++; diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h index f01f1163..c9d6234c 100644 --- a/kernel/include/kernel/Processor.h +++ b/kernel/include/kernel/Processor.h @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace Kernel @@ -68,6 +69,11 @@ namespace Kernel static SchedulerQueue::Node* get_current_thread() { return reinterpret_cast(read_gs_ptr(offsetof(Processor, m_current_thread))); } static void set_current_thread(SchedulerQueue::Node* thread) { write_gs_ptr(offsetof(Processor, m_current_thread), thread); } + static void enter_interrupt(InterruptStack*, InterruptRegisters*); + static void leave_interrupt(); + static InterruptStack& get_interrupt_stack(); + static InterruptRegisters& get_interrupt_registers(); + private: Processor() = default; ~Processor() { ASSERT_NOT_REACHED(); } @@ -121,6 +127,9 @@ namespace Kernel Thread* m_idle_thread { nullptr }; SchedulerQueue::Node* m_current_thread { nullptr }; + InterruptStack* m_interrupt_stack { nullptr }; + InterruptRegisters* m_interrupt_registers { nullptr }; + void* m_current_page_table { nullptr }; friend class BAN::Array; diff --git a/kernel/include/kernel/Scheduler.h b/kernel/include/kernel/Scheduler.h index c2091d68..2784d269 100644 --- a/kernel/include/kernel/Scheduler.h +++ b/kernel/include/kernel/Scheduler.h @@ -16,8 +16,10 @@ namespace Kernel [[noreturn]] void start(); + void yield(); + void timer_reschedule(); - void reschedule(); + void irq_reschedule(); void reschedule_if_idling(); void set_current_thread_sleeping(uint64_t wake_time); @@ -30,9 +32,6 @@ namespace Kernel Thread& current_thread(); static pid_t current_tid(); - [[noreturn]] void execute_current_thread(); - [[noreturn]] void delete_current_process_and_thread(); - // This is no return if called on current thread void terminate_thread(Thread*); @@ -41,11 +40,7 @@ namespace Kernel void set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time); - [[nodiscard]] bool save_current_thread(); - void advance_current_thread(); - - [[noreturn]] void execute_current_thread_locked(); - [[noreturn]] void execute_current_thread_stack_loaded(); + void setup_next_thread(); BAN::ErrorOr add_thread(Thread*); diff --git a/kernel/include/kernel/SchedulerQueue.h b/kernel/include/kernel/SchedulerQueue.h index c43455df..8c6d51a9 100644 --- a/kernel/include/kernel/SchedulerQueue.h +++ b/kernel/include/kernel/SchedulerQueue.h @@ -26,6 +26,7 @@ namespace Kernel Thread* thread; uint64_t wake_time { 0 }; Semaphore* semaphore { nullptr }; + bool should_block { false }; private: Node* next { nullptr }; diff --git a/kernel/include/kernel/Thread.h b/kernel/include/kernel/Thread.h index dd2e2763..fb3b0d79 100644 --- a/kernel/include/kernel/Thread.h +++ b/kernel/include/kernel/Thread.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -25,7 +26,7 @@ namespace Kernel { NotStarted, Executing, - Terminated + Terminated, }; public: @@ -52,19 +53,8 @@ namespace Kernel BAN::ErrorOr block_or_eintr_or_timeout(Semaphore& semaphore, uint64_t timeout_ms, bool etimedout); BAN::ErrorOr block_or_eintr_or_waketime(Semaphore& semaphore, uint64_t wake_time_ms, bool etimedout); - void set_return_sp(uintptr_t& sp) { m_return_sp = &sp; } - void set_return_ip(uintptr_t& ip) { m_return_ip = &ip; } - uintptr_t return_sp() { ASSERT(m_return_sp); return *m_return_sp; } - uintptr_t return_ip() { ASSERT(m_return_ip); return *m_return_ip; } - pid_t tid() const { return m_tid; } - void set_sp(uintptr_t sp) { m_sp = sp; validate_stack(); } - void set_ip(uintptr_t ip) { m_ip = ip; } - uintptr_t sp() const { return m_sp; } - uintptr_t ip() const { return m_ip; } - - void set_started() { ASSERT(m_state == State::NotStarted); m_state = State::Executing; } State state() const { return m_state; } vaddr_t kernel_stack_bottom() const { return m_kernel_stack->vaddr(); } @@ -87,6 +77,10 @@ namespace Kernel size_t virtual_page_count() const { return (m_kernel_stack->size() / PAGE_SIZE) + (m_userspace_stack->size() / PAGE_SIZE); } size_t physical_page_count() const { return virtual_page_count(); } + uintptr_t& interrupt_sp() { return m_interrupt_sp; } + InterruptStack& interrupt_stack() { return m_interrupt_stack; } + InterruptRegisters& interrupt_registers() { return m_interrupt_registers; } + #if __enable_sse void save_sse(); void load_sse(); @@ -97,22 +91,20 @@ namespace Kernel Thread(pid_t tid, Process*); void on_exit(); - void validate_stack() const; - private: static constexpr size_t m_kernel_stack_size = PAGE_SIZE * 4; static constexpr size_t m_userspace_stack_size = PAGE_SIZE * 4; BAN::UniqPtr m_kernel_stack; BAN::UniqPtr m_userspace_stack; - uintptr_t m_ip { 0 }; - uintptr_t m_sp { 0 }; const pid_t m_tid { 0 }; State m_state { State::NotStarted }; Process* m_process { nullptr }; bool m_is_userspace { false }; + bool m_delete_process { false }; - uintptr_t* m_return_sp { nullptr }; - uintptr_t* m_return_ip { nullptr }; + InterruptStack m_interrupt_stack { }; + InterruptRegisters m_interrupt_registers { }; + uintptr_t m_interrupt_sp { }; uint64_t m_signal_pending_mask { 0 }; uint64_t m_signal_block_mask { 0 }; @@ -123,6 +115,7 @@ namespace Kernel alignas(16) uint8_t m_sse_storage[512] {}; #endif + friend class Process; friend class Scheduler; }; diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index 2d52b2a0..434aef33 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -10,7 +10,7 @@ #include #define ISR_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) -#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) X(32) +#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) namespace Kernel { @@ -173,9 +173,6 @@ namespace Kernel if (tid) { - Thread::current().set_return_sp(interrupt_stack->sp); - Thread::current().set_return_ip(interrupt_stack->ip); - if (isr == ISR::PageFault) { // Check if stack is OOB @@ -202,9 +199,9 @@ namespace Kernel page_fault_error.raw = error; if (!page_fault_error.present) { - asm volatile("sti"); + Processor::set_interrupt_state(InterruptState::Enabled); auto result = Process::current().allocate_page_for_demand_paging(regs->cr2); - asm volatile("cli"); + Processor::set_interrupt_state(InterruptState::Disabled); if (!result.is_error() && result.value()) goto done; @@ -332,7 +329,14 @@ done: return; } - extern "C" void cpp_irq_handler(uint32_t irq, InterruptStack* interrupt_stack) + extern "C" void cpp_reschedule_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) + { + Processor::enter_interrupt(interrupt_stack, interrupt_registers); + Scheduler::get().irq_reschedule(); + Processor::leave_interrupt(); + } + + extern "C" void cpp_irq_handler(uint32_t irq) { if (g_paniced) { @@ -342,20 +346,14 @@ done: asm volatile("cli; 1: hlt; jmp 1b"); } - if (Scheduler::current_tid()) - { - Thread::current().set_return_sp(interrupt_stack->sp); - Thread::current().set_return_ip(interrupt_stack->ip); - } + ASSERT(irq != IRQ_IPI); if (!InterruptController::get().is_in_service(irq)) dprintln("spurious irq 0x{2H}", irq); else { InterruptController::get().eoi(irq); - if (irq == IRQ_IPI) - Scheduler::get().reschedule(); - else if (auto* handler = s_interruptables[irq]) + if (auto* handler = s_interruptables[irq]) handler->handle_irq(); else dprintln("no handler for irq 0x{2H}", irq); @@ -402,6 +400,7 @@ done: IRQ_LIST_X #undef X + extern "C" void asm_reschedule_handler(); extern "C" void syscall_asm(); IDT* IDT::create() @@ -419,6 +418,8 @@ done: IRQ_LIST_X #undef X + idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_IPI, asm_reschedule_handler); + idt->register_syscall_handler(0x80, syscall_asm); return idt; diff --git a/kernel/kernel/Networking/ARPTable.cpp b/kernel/kernel/Networking/ARPTable.cpp index bdf62f88..38deb152 100644 --- a/kernel/kernel/Networking/ARPTable.cpp +++ b/kernel/kernel/Networking/ARPTable.cpp @@ -79,7 +79,7 @@ namespace Kernel if (it != m_arp_table.end()) return it->value; } - Scheduler::get().reschedule(); + Scheduler::get().yield(); } return BAN::Error::from_errno(ETIMEDOUT); diff --git a/kernel/kernel/Networking/UNIX/Socket.cpp b/kernel/kernel/Networking/UNIX/Socket.cpp index e986cbc1..16a0e8ce 100644 --- a/kernel/kernel/Networking/UNIX/Socket.cpp +++ b/kernel/kernel/Networking/UNIX/Socket.cpp @@ -164,7 +164,7 @@ namespace Kernel } while (!connection_info.connection_done) - Scheduler::get().reschedule(); + Scheduler::get().yield(); return {}; } diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index 927ac3fa..74bd73e4 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -127,9 +127,6 @@ namespace Kernel } process->m_loadable_elf->reserve_address_space(); - process->m_is_userspace = true; - process->m_userspace_info.entry = process->m_loadable_elf->entry_point(); - char** argv = nullptr; { size_t needed_bytes = sizeof(char*) * 2 + path.size() + 1; @@ -155,6 +152,8 @@ namespace Kernel MUST(process->m_mapped_regions.push_back(BAN::move(argv_region))); } + process->m_is_userspace = true; + process->m_userspace_info.entry = process->m_loadable_elf->entry_point(); process->m_userspace_info.argc = 1; process->m_userspace_info.argv = argv; process->m_userspace_info.envp = nullptr; @@ -207,7 +206,7 @@ namespace Kernel m_exit_status.semaphore.unblock(); while (m_exit_status.waiting > 0) - Scheduler::get().reschedule(); + Scheduler::get().yield(); m_process_lock.lock(); @@ -220,7 +219,7 @@ namespace Kernel bool Process::on_thread_exit(Thread& thread) { - ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + LockGuard _(m_process_lock); ASSERT(m_threads.size() > 0); @@ -228,8 +227,6 @@ namespace Kernel { ASSERT(m_threads.front() == &thread); m_threads.clear(); - - thread.setup_process_cleanup(); return true; } @@ -248,11 +245,18 @@ namespace Kernel void Process::exit(int status, int signal) { m_exit_status.exit_code = __WGENEXITCODE(status, signal); - for (auto* thread : m_threads) - if (thread != &Thread::current()) - Scheduler::get().terminate_thread(thread); - if (this == &Process::current()) - Scheduler::get().terminate_thread(&Thread::current()); + while (!m_threads.empty()) + m_threads.front()->on_exit(); + //for (auto* thread : m_threads) + // if (thread != &Thread::current()) + // Scheduler::get().terminate_thread(thread); + //if (this == &Process::current()) + //{ + // m_threads.clear(); + // Processor::set_interrupt_state(InterruptState::Disabled); + // Thread::current().setup_process_cleanup(); + // Scheduler::get().yield(); + //} } size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const @@ -533,7 +537,7 @@ namespace Kernel m_has_called_exec = true; m_threads.front()->setup_exec(); - Scheduler::get().execute_current_thread(); + Scheduler::get().yield(); ASSERT_NOT_REACHED(); } diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp index 2fbab9b8..12be1c22 100644 --- a/kernel/kernel/Processor.cpp +++ b/kernel/kernel/Processor.cpp @@ -79,4 +79,34 @@ namespace Kernel write_gs_ptr(offsetof(Processor, m_idle_thread), idle_thread); } + void Processor::enter_interrupt(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) + { + ASSERT(get_interrupt_state() == InterruptState::Disabled); + ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) == nullptr); + write_gs_ptr(offsetof(Processor, m_interrupt_stack), interrupt_stack); + write_gs_ptr(offsetof(Processor, m_interrupt_registers), interrupt_registers); + } + + void Processor::leave_interrupt() + { + ASSERT(get_interrupt_state() == InterruptState::Disabled); + ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) != nullptr); + write_gs_ptr(offsetof(Processor, m_interrupt_stack), nullptr); + write_gs_ptr(offsetof(Processor, m_interrupt_registers), nullptr); + } + + InterruptStack& Processor::get_interrupt_stack() + { + ASSERT(get_interrupt_state() == InterruptState::Disabled); + ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack))); + return *read_gs_sized(offsetof(Processor, m_interrupt_stack)); + } + + InterruptRegisters& Processor::get_interrupt_registers() + { + ASSERT(get_interrupt_state() == InterruptState::Disabled); + ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_registers))); + return *read_gs_sized(offsetof(Processor, m_interrupt_registers)); + } + } diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index cf59a8a3..4e33af33 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -11,9 +11,6 @@ namespace Kernel { - extern "C" [[noreturn]] void start_thread(uintptr_t sp, uintptr_t ip); - extern "C" [[noreturn]] void continue_thread(uintptr_t sp, uintptr_t ip); - static Scheduler* s_instance = nullptr; static BAN::Atomic s_started { false }; @@ -46,10 +43,8 @@ namespace Kernel void Scheduler::start() { ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); - m_lock.lock(); - s_started = true; - advance_current_thread(); - execute_current_thread_locked(); + ASSERT(!m_active_threads.empty()); + yield(); ASSERT_NOT_REACHED(); } @@ -71,41 +66,125 @@ namespace Kernel return Scheduler::get().current_thread().tid(); } + void Scheduler::setup_next_thread() + { + ASSERT(m_lock.current_processor_has_lock()); + + if (auto* current = Processor::get_current_thread()) + { + auto* thread = current->thread; + + if (thread->state() == Thread::State::Terminated) + { + PageTable::kernel().load(); + delete thread; + delete current; + } + else + { + // thread->state() can be NotStarted when calling exec or cleaning up process + if (thread->state() != Thread::State::NotStarted) + { + thread->interrupt_stack() = Processor::get_interrupt_stack(); + thread->interrupt_stack().sp = thread->interrupt_sp(); + thread->interrupt_registers() = Processor::get_interrupt_registers(); + } + + if (current->should_block) + { + current->should_block = false; + m_blocking_threads.add_with_wake_time(current); + } + else + { + m_active_threads.push_back(current); + } + } + } + + SchedulerQueue::Node* node = nullptr; + while (!m_active_threads.empty()) + { + node = m_active_threads.pop_front(); + if (node->thread->state() != Thread::State::Terminated) + break; + + PageTable::kernel().load(); + delete node->thread; + delete node; + node = nullptr; + } + + Processor::set_current_thread(node); + + auto* thread = node ? node->thread : Processor::idle_thread(); + + if (thread->has_process()) + thread->process().page_table().load(); + else + PageTable::kernel().load(); + + if (thread->state() == Thread::State::NotStarted) + thread->m_state = Thread::State::Executing; + + ASSERT(thread->interrupt_stack().ip); + ASSERT(thread->interrupt_stack().sp); + + Processor::gdt().set_tss_stack(thread->kernel_stack_top()); + + Processor::get_interrupt_stack() = thread->interrupt_stack(); + Processor::get_interrupt_registers() = thread->interrupt_registers(); + } + void Scheduler::timer_reschedule() { // Broadcast IPI to all other processors for them // to perform reschedule InterruptController::get().broadcast_ipi(); - auto state = m_lock.lock(); - m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot()); - if (save_current_thread()) - return Processor::set_interrupt_state(state); - advance_current_thread(); - execute_current_thread_locked(); - ASSERT_NOT_REACHED(); + { + SpinLockGuard _(m_lock); + m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot()); + } + + yield(); } - void Scheduler::reschedule() + void Scheduler::yield() { - auto state = m_lock.lock(); - if (save_current_thread()) - return Processor::set_interrupt_state(state); - advance_current_thread(); - execute_current_thread_locked(); - ASSERT_NOT_REACHED(); + auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); + + asm volatile( + "movq %%rsp, %[save_sp];" + "movq %[load_sp], %%rsp;" + "int %[ipi];" + : [save_sp]"=m"(Thread::current().interrupt_sp()) + : [load_sp]"r"(Processor::current_stack_top()), + [ipi]"i"(IRQ_VECTOR_BASE + IRQ_IPI) + : "memory" + ); + + Processor::set_interrupt_state(state); + } + + void Scheduler::irq_reschedule() + { + SpinLockGuard _(m_lock); + setup_next_thread(); } void Scheduler::reschedule_if_idling() { - auto state = m_lock.lock(); - if (m_active_threads.empty() || Processor::get_current_thread()) - return m_lock.unlock(state); - if (save_current_thread()) - return Processor::set_interrupt_state(state); - advance_current_thread(); - execute_current_thread_locked(); - ASSERT_NOT_REACHED(); + { + SpinLockGuard _(m_lock); + if (Processor::get_current_thread()) + return; + if (m_active_threads.empty()) + return; + } + + yield(); } BAN::ErrorOr Scheduler::add_thread(Thread* thread) @@ -120,190 +199,49 @@ namespace Kernel void Scheduler::terminate_thread(Thread* thread) { - SpinLockGuard _(m_lock); + auto state = m_lock.lock(); + + ASSERT(thread->state() == Thread::State::Executing); thread->m_state = Thread::State::Terminated; - if (thread == ¤t_thread()) - execute_current_thread_locked(); - } + thread->interrupt_stack().sp = Processor::current_stack_top(); - void Scheduler::advance_current_thread() - { - ASSERT(m_lock.current_processor_has_lock()); + m_lock.unlock(InterruptState::Disabled); - if (auto* current = Processor::get_current_thread()) - m_active_threads.push_back(current); - Processor::set_current_thread(nullptr); + // actual deletion will be done while rescheduling - if (!m_active_threads.empty()) - Processor::set_current_thread(m_active_threads.pop_front()); - } - - // NOTE: this is declared always inline, so we don't corrupt the stack - // after getting the rsp - ALWAYS_INLINE bool Scheduler::save_current_thread() - { - ASSERT(m_lock.current_processor_has_lock()); - - uintptr_t sp, ip; - push_callee_saved(); - if (!(ip = read_ip())) + if (¤t_thread() == thread) { - pop_callee_saved(); - return true; - } - read_rsp(sp); - - Thread& current = current_thread(); - current.set_ip(ip); - current.set_sp(sp); - - load_temp_stack(); - - return false; - } - - void Scheduler::delete_current_process_and_thread() - { - m_lock.lock(); - - load_temp_stack(); - PageTable::kernel().load(); - - auto* current = Processor::get_current_thread(); - ASSERT(current); - delete ¤t->thread->process(); - delete current->thread; - delete current; - Processor::set_current_thread(nullptr); - - advance_current_thread(); - execute_current_thread_locked(); - ASSERT_NOT_REACHED(); - } - - void Scheduler::execute_current_thread() - { - m_lock.lock(); - load_temp_stack(); - PageTable::kernel().load(); - execute_current_thread_stack_loaded(); - ASSERT_NOT_REACHED(); - } - - void Scheduler::execute_current_thread_locked() - { - ASSERT(m_lock.current_processor_has_lock()); - load_temp_stack(); - PageTable::kernel().load(); - execute_current_thread_stack_loaded(); - ASSERT_NOT_REACHED(); - } - - NEVER_INLINE void Scheduler::execute_current_thread_stack_loaded() - { - ASSERT(m_lock.current_processor_has_lock()); - -#if SCHEDULER_VERIFY_STACK - vaddr_t rsp; - read_rsp(rsp); - ASSERT(Processor::current_stack_bottom() <= rsp && rsp <= Processor::current_stack_top()); - ASSERT(&PageTable::current() == &PageTable::kernel()); -#endif - - Thread* current = ¤t_thread(); - -#if __enable_sse - if (current != Thread::sse_thread()) - { -#if ARCH(x86_64) - asm volatile( - "movq %cr0, %rax;" - "orq $(1 << 3), %rax;" - "movq %rax, %cr0" - ); -#elif ARCH(i686) - asm volatile( - "movl %cr0, %eax;" - "orl $(1 << 3), %eax;" - "movl %eax, %cr0" - ); -#else - #error -#endif - } -#endif - - while (current->state() == Thread::State::Terminated) - { - auto* node = Processor::get_current_thread(); - if (node->thread->has_process()) - if (node->thread->process().on_thread_exit(*node->thread)) - break; - - delete node->thread; - delete node; - Processor::set_current_thread(nullptr); - - advance_current_thread(); - current = ¤t_thread(); + yield(); + ASSERT_NOT_REACHED(); } - if (current->has_process()) - { - current->process().page_table().load(); - Processor::gdt().set_tss_stack(current->kernel_stack_top()); - } - else - PageTable::kernel().load(); - - switch (current->state()) - { - case Thread::State::NotStarted: - current->set_started(); - m_lock.unlock(InterruptState::Disabled); - start_thread(current->sp(), current->ip()); - case Thread::State::Executing: - m_lock.unlock(InterruptState::Disabled); - while (current->can_add_signal_to_execute()) - current->handle_signal(); - continue_thread(current->sp(), current->ip()); - case Thread::State::Terminated: - ASSERT_NOT_REACHED(); - } - - ASSERT_NOT_REACHED(); + Processor::set_interrupt_state(state); } void Scheduler::set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time) { - ASSERT(m_lock.current_processor_has_lock()); - - if (save_current_thread()) - return; + auto state = m_lock.lock(); auto* current = Processor::get_current_thread(); current->semaphore = semaphore; current->wake_time = wake_time; - m_blocking_threads.add_with_wake_time(current); - Processor::set_current_thread(nullptr); + current->should_block = true; - advance_current_thread(); - execute_current_thread_locked(); - ASSERT_NOT_REACHED(); + m_lock.unlock(InterruptState::Disabled); + + yield(); + + Processor::set_interrupt_state(state); } void Scheduler::set_current_thread_sleeping(uint64_t wake_time) { - auto state = m_lock.lock(); set_current_thread_sleeping_impl(nullptr, wake_time); - Processor::set_interrupt_state(state); } void Scheduler::block_current_thread(Semaphore* semaphore, uint64_t wake_time) { - auto state = m_lock.lock(); set_current_thread_sleeping_impl(semaphore, wake_time); - Processor::set_interrupt_state(state); } void Scheduler::unblock_threads(Semaphore* semaphore) diff --git a/kernel/kernel/Storage/ATA/AHCI/Device.cpp b/kernel/kernel/Storage/ATA/AHCI/Device.cpp index c671f633..1bc945a8 100644 --- a/kernel/kernel/Storage/ATA/AHCI/Device.cpp +++ b/kernel/kernel/Storage/ATA/AHCI/Device.cpp @@ -168,7 +168,7 @@ namespace Kernel // This doesn't allow scheduler to go properly idle. while (SystemTimer::get().ms_since_boot() < start_time + s_ata_timeout) { - Scheduler::get().reschedule(); + Scheduler::get().yield(); if (!(m_port->ci & (1 << command_slot))) return {}; } diff --git a/kernel/kernel/Syscall.cpp b/kernel/kernel/Syscall.cpp index c941bb4d..22ca55e1 100644 --- a/kernel/kernel/Syscall.cpp +++ b/kernel/kernel/Syscall.cpp @@ -32,9 +32,6 @@ namespace Kernel { ASSERT((interrupt_stack.cs & 0b11) == 0b11); - Thread::current().set_return_sp(interrupt_stack.sp); - Thread::current().set_return_ip(interrupt_stack.ip); - asm volatile("sti"); BAN::ErrorOr ret = BAN::Error::from_errno(ENOSYS); diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index 7a38469d..a7ad2e22 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -12,8 +12,8 @@ namespace Kernel { - extern "C" void thread_userspace_trampoline(uint64_t sp, uint64_t ip, int argc, char** argv, char** envp); - extern "C" uintptr_t read_ip(); + extern "C" [[noreturn]] void start_userspace_thread(); + extern "C" [[noreturn]] void start_kernel_thread(); extern "C" void signal_trampoline(); @@ -46,14 +46,21 @@ namespace Kernel PageTable::Flags::ReadWrite | PageTable::Flags::Present, true )); - thread->m_sp = thread->kernel_stack_top(); - thread->m_ip = (uintptr_t)entry; // Initialize stack for returning - write_to_stack(thread->m_sp, nullptr); // alignment - write_to_stack(thread->m_sp, thread); - write_to_stack(thread->m_sp, &Thread::on_exit); - write_to_stack(thread->m_sp, data); + uintptr_t sp = thread->kernel_stack_top(); + write_to_stack(sp, thread); + write_to_stack(sp, &Thread::on_exit); + write_to_stack(sp, data); + write_to_stack(sp, entry); + + thread->m_interrupt_stack.ip = reinterpret_cast(start_kernel_thread); + thread->m_interrupt_stack.cs = 0x08; + thread->m_interrupt_stack.flags = 0x202; + thread->m_interrupt_stack.sp = sp; + thread->m_interrupt_stack.ss = 0x10; + + memset(&thread->m_interrupt_registers, 0, sizeof(InterruptRegisters)); thread_deleter.disable(); @@ -72,14 +79,6 @@ namespace Kernel thread->m_is_userspace = true; - thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range( - process->page_table(), - 0x300000, KERNEL_OFFSET, - m_userspace_stack_size, - PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present, - true - )); - thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range( process->page_table(), 0x300000, KERNEL_OFFSET, @@ -88,6 +87,14 @@ namespace Kernel true )); + thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range( + process->page_table(), + 0x300000, KERNEL_OFFSET, + m_userspace_stack_size, + PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present, + true + )); + thread->setup_exec(); thread_deleter.disable(); @@ -148,6 +155,11 @@ namespace Kernel Thread::~Thread() { + if (m_delete_process) + { + ASSERT(m_process); + delete m_process; + } } BAN::ErrorOr Thread::clone(Process* new_process, uintptr_t sp, uintptr_t ip) @@ -165,10 +177,13 @@ namespace Kernel thread->m_kernel_stack = TRY(m_kernel_stack->clone(new_process->page_table())); thread->m_userspace_stack = TRY(m_userspace_stack->clone(new_process->page_table())); - thread->m_state = State::Executing; + thread->m_state = State::NotStarted; - thread->m_ip = ip; - thread->m_sp = sp; + thread->m_interrupt_stack.ip = ip; + thread->m_interrupt_stack.cs = 0x08; + thread->m_interrupt_stack.flags = 0x002; + thread->m_interrupt_stack.sp = sp; + thread->m_interrupt_stack.ss = 0x10; thread_deleter.disable(); @@ -179,58 +194,69 @@ namespace Kernel { ASSERT(is_userspace()); m_state = State::NotStarted; - static entry_t entry_trampoline( - [](void*) - { - const auto& info = Process::current().userspace_info(); - thread_userspace_trampoline(Thread::current().userspace_stack_top(), info.entry, info.argc, info.argv, info.envp); - ASSERT_NOT_REACHED(); - } - ); - m_sp = kernel_stack_top(); - m_ip = (uintptr_t)entry_trampoline; // Signal mask is inherited - // Setup stack for returning - ASSERT(m_sp % PAGE_SIZE == 0); - PageTable::with_fast_page(process().page_table().physical_address_of(m_sp - PAGE_SIZE), [&] { + auto& userspace_info = process().userspace_info(); + ASSERT(userspace_info.entry); + + // Initialize stack for returning + PageTable::with_fast_page(process().page_table().physical_address_of(userspace_stack_top() - PAGE_SIZE), [&] { uintptr_t sp = PageTable::fast_page() + PAGE_SIZE; - write_to_stack(sp, nullptr); // alignment - write_to_stack(sp, this); - write_to_stack(sp, &Thread::on_exit); write_to_stack(sp, nullptr); - m_sp -= 4 * sizeof(uintptr_t); + write_to_stack(sp, userspace_info.argc); + write_to_stack(sp, userspace_info.argv); + write_to_stack(sp, userspace_info.envp); }); + + m_interrupt_stack.ip = userspace_info.entry; + m_interrupt_stack.cs = 0x18 | 3; + m_interrupt_stack.flags = 0x202; + m_interrupt_stack.sp = userspace_stack_top() - 4 * sizeof(uintptr_t); + m_interrupt_stack.ss = 0x20 | 3; + + memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters)); } void Thread::setup_process_cleanup() { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + m_state = State::NotStarted; static entry_t entry( [](void* process_ptr) { - auto& process = *reinterpret_cast(process_ptr); - process.cleanup_function(); - Scheduler::get().delete_current_process_and_thread(); - ASSERT_NOT_REACHED(); + auto* thread = &Thread::current(); + auto* process = static_cast(process_ptr); + + ASSERT(thread->m_process == process); + + process->cleanup_function(); + + thread->m_delete_process = true; + + // will call on thread exit after return } ); - m_sp = kernel_stack_top(); - m_ip = (uintptr_t)entry; m_signal_pending_mask = 0; m_signal_block_mask = ~0ull; - ASSERT(m_sp % PAGE_SIZE == 0); - PageTable::with_fast_page(process().page_table().physical_address_of(m_sp - PAGE_SIZE), [&] { + PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] { uintptr_t sp = PageTable::fast_page() + PAGE_SIZE; - write_to_stack(sp, nullptr); // alignment write_to_stack(sp, this); write_to_stack(sp, &Thread::on_exit); write_to_stack(sp, m_process); - m_sp -= 4 * sizeof(uintptr_t); + write_to_stack(sp, entry); }); + + m_interrupt_stack.ip = reinterpret_cast(start_kernel_thread); + m_interrupt_stack.cs = 0x08; + m_interrupt_stack.flags = 0x202; + m_interrupt_stack.sp = kernel_stack_top() - 4 * sizeof(uintptr_t); + m_interrupt_stack.ss = 0x10; + + memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters)); } bool Thread::is_interrupted_by_signal() @@ -396,22 +422,24 @@ namespace Kernel return {}; } - void Thread::validate_stack() const - { - if (kernel_stack_bottom() <= m_sp && m_sp <= kernel_stack_top()) - return; - if (userspace_stack_bottom() <= m_sp && m_sp <= userspace_stack_top()) - return; - Kernel::panic("sp {8H}, kernel stack {8H}->{8H}, userspace stack {8H}->{8H}", m_sp, - kernel_stack_bottom(), kernel_stack_top(), - userspace_stack_bottom(), userspace_stack_top() - ); - } - void Thread::on_exit() { ASSERT(this == &Thread::current()); - Scheduler::get().terminate_thread(this); + if (!m_delete_process && has_process()) + { + if (process().on_thread_exit(*this)) + { + Processor::set_interrupt_state(InterruptState::Disabled); + setup_process_cleanup(); + Scheduler::get().yield(); + } + else + Scheduler::get().terminate_thread(this); + } + else + { + Scheduler::get().terminate_thread(this); + } ASSERT_NOT_REACHED(); } diff --git a/libc/arch/x86_64/crt0.S b/libc/arch/x86_64/crt0.S index 67804f19..474a8bf4 100644 --- a/libc/arch/x86_64/crt0.S +++ b/libc/arch/x86_64/crt0.S @@ -2,34 +2,29 @@ .global _start _start: - # Set up end of the stack frame linked list. - movq $0, %rbp - pushq %rbp # rip=0 - pushq %rbp # rbp=0 - movq %rsp, %rbp + # STACK LAYOUT + # null + # argc + # argv + # envp - # Save argc, argv, environ - pushq %rdx - pushq %rsi - pushq %rdi + xorq %rbp, %rbp - # Prepare malloc, environment - movq %rdx, %rdi + # init libc + movq 0(%rsp), %rdi call _init_libc - # Call global constructos + # call global constructors call _init - # Restore argc, argv, environ - popq %rdi - popq %rsi - popq %rdx - - # Run main + # call main + movq 16(%rsp), %rdi + movq 8(%rsp), %rsi + movq 0(%rsp), %rdx call main - # Cleanly exit the process - movl %eax, %edi + # call exit + movq %rax, %rdi call exit .size _start, . - _start