From f8261c60c0ffc01b76428f1c8675a8d74fe89815 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Mon, 22 Jul 2024 00:33:50 +0300 Subject: [PATCH] Kernel: Rewrite the whole scheduler and re-architecture SMP handling Change Semaphore -> ThreadBlocker This was not a semaphore, I just named it one because I didn't know what semaphore was. I have meant to change this sooner, but it was in no way urgent :D Implement SMP events. Processors can now be sent SMP events through IPIs. SMP events can be sent either to a single processor or broadcasted to every processor. PageTable::{map_page,map_range,unmap_page,unmap_range}() now send SMP event to invalidate TLB caches for the changed pages. Scheduler no longer uses a global run queue. Each processor has its own scheduler that keeps track of the load on the processor. Once every second schedulers do load balancing. Schedulers have no access to other processors' schedulers, they just see approximate loads. If scheduler decides that it has too much load, it will send a thread to another processor through a SMP event. Schedulers are currently run using the timer interrupt on BSB. This should be not the case, and each processor should use its LAPIC timer for interrupts. There is no reason to broadcast SMP event to all processors when BSB gets timer interrupt. Old scheduler only achieved 20% idle load on qemu. That was probably a very inefficient implementation. This new scheduler seems to average around 1% idle load. This is much closer to what I would expect. On my own laptop idle load seems to be only around 0.5% on each processor. --- kernel/CMakeLists.txt | 2 +- kernel/arch/i686/PageTable.cpp | 57 +- kernel/arch/i686/interrupts.S | 17 +- kernel/arch/x86_64/PageTable.cpp | 58 +- kernel/arch/x86_64/interrupts.S | 8 +- kernel/include/kernel/ACPI/ACPI.h | 3 +- kernel/include/kernel/ACPI/AML/Mutex.h | 2 +- kernel/include/kernel/ACPI/AML/Sleep.h | 2 +- kernel/include/kernel/FS/DevFS/FileSystem.h | 6 +- kernel/include/kernel/FS/Pipe.h | 4 +- kernel/include/kernel/IDT.h | 4 +- kernel/include/kernel/Input/InputDevice.h | 11 +- kernel/include/kernel/InterruptStack.h | 2 + kernel/include/kernel/Lock/Mutex.h | 31 +- kernel/include/kernel/Lock/SpinLock.h | 58 +- kernel/include/kernel/Memory/PageTable.h | 7 +- kernel/include/kernel/Networking/ARPTable.h | 4 +- kernel/include/kernel/Networking/IPv4Layer.h | 2 +- kernel/include/kernel/Networking/TCPSocket.h | 4 +- kernel/include/kernel/Networking/UDPSocket.h | 4 +- .../include/kernel/Networking/UNIX/Socket.h | 4 +- kernel/include/kernel/Process.h | 2 +- kernel/include/kernel/Processor.h | 123 ++- kernel/include/kernel/Scheduler.h | 144 ++- kernel/include/kernel/SchedulerQueue.h | 127 --- kernel/include/kernel/Semaphore.h | 15 - .../include/kernel/Storage/ATA/AHCI/Device.h | 2 +- kernel/include/kernel/Storage/NVMe/Queue.h | 4 +- kernel/include/kernel/Terminal/TTY.h | 6 +- kernel/include/kernel/Terminal/VirtualTTY.h | 2 +- kernel/include/kernel/Thread.h | 10 +- kernel/include/kernel/ThreadBlocker.h | 17 + kernel/include/kernel/Timer/Timer.h | 3 +- kernel/include/kernel/USB/XHCI/Controller.h | 3 +- kernel/kernel/ACPI/ACPI.cpp | 6 +- kernel/kernel/APIC.cpp | 27 +- kernel/kernel/FS/DevFS/FileSystem.cpp | 10 +- kernel/kernel/FS/Pipe.cpp | 8 +- kernel/kernel/IDT.cpp | 31 +- kernel/kernel/Input/InputDevice.cpp | 8 +- kernel/kernel/Networking/ARPTable.cpp | 6 +- kernel/kernel/Networking/IPv4Layer.cpp | 4 +- kernel/kernel/Networking/TCPSocket.cpp | 22 +- kernel/kernel/Networking/UDPSocket.cpp | 4 +- kernel/kernel/Networking/UNIX/Socket.cpp | 18 +- kernel/kernel/Process.cpp | 57 +- kernel/kernel/Processor.cpp | 312 ++++++- kernel/kernel/Scheduler.cpp | 825 ++++++++++++++---- kernel/kernel/Semaphore.cpp | 28 - kernel/kernel/Storage/ATA/AHCI/Device.cpp | 16 +- kernel/kernel/Storage/ATA/ATABus.cpp | 8 +- kernel/kernel/Storage/NVMe/Queue.cpp | 14 +- kernel/kernel/Terminal/TTY.cpp | 12 +- kernel/kernel/Thread.cpp | 36 +- kernel/kernel/ThreadBlocker.cpp | 28 + kernel/kernel/Timer/HPET.cpp | 4 +- kernel/kernel/Timer/PIT.cpp | 4 +- kernel/kernel/Timer/Timer.cpp | 16 +- kernel/kernel/USB/XHCI/Controller.cpp | 4 +- kernel/kernel/kernel.cpp | 18 +- 60 files changed, 1559 insertions(+), 715 deletions(-) delete mode 100644 kernel/include/kernel/SchedulerQueue.h delete mode 100644 kernel/include/kernel/Semaphore.h create mode 100644 kernel/include/kernel/ThreadBlocker.h delete mode 100644 kernel/kernel/Semaphore.cpp create mode 100644 kernel/kernel/ThreadBlocker.cpp diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index cf185ea7..c3307b9e 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -71,7 +71,7 @@ set(KERNEL_SOURCES kernel/Processor.cpp kernel/Random.cpp kernel/Scheduler.cpp - kernel/Semaphore.cpp + kernel/ThreadBlocker.cpp kernel/SSP.cpp kernel/Storage/ATA/AHCI/Controller.cpp kernel/Storage/ATA/AHCI/Device.cpp diff --git a/kernel/arch/i686/PageTable.cpp b/kernel/arch/i686/PageTable.cpp index 94e98039..c953b2b0 100644 --- a/kernel/arch/i686/PageTable.cpp +++ b/kernel/arch/i686/PageTable.cpp @@ -204,7 +204,7 @@ namespace Kernel ASSERT(!(pt[pte] & Flags::Present)); pt[pte] = paddr | Flags::ReadWrite | Flags::Present; - invalidate(fast_page()); + invalidate(fast_page(), false); } void PageTable::unmap_fast_page() @@ -224,7 +224,7 @@ namespace Kernel ASSERT(pt[pte] & Flags::Present); pt[pte] = 0; - invalidate(fast_page()); + invalidate(fast_page(), false); } BAN::ErrorOr PageTable::create_userspace() @@ -283,13 +283,24 @@ namespace Kernel Processor::set_current_page_table(this); } - void PageTable::invalidate(vaddr_t vaddr) + void PageTable::invalidate(vaddr_t vaddr, bool send_smp_message) { ASSERT(vaddr % PAGE_SIZE == 0); asm volatile("invlpg (%0)" :: "r"(vaddr) : "memory"); + + if (send_smp_message) + { + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = 1 + } + }); + } } - void PageTable::unmap_page(vaddr_t vaddr) + void PageTable::unmap_page(vaddr_t vaddr, bool send_smp_message) { ASSERT(vaddr); ASSERT(vaddr % PAGE_SIZE == 0); @@ -306,30 +317,36 @@ namespace Kernel SpinLockGuard _(m_lock); if (is_page_free(vaddr)) - { - dwarnln("unmapping unmapped page {8H}", vaddr); - return; - } + Kernel::panic("trying to unmap unmapped page 0x{H}", vaddr); uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); pt[pte] = 0; - invalidate(vaddr); + invalidate(vaddr, send_smp_message); } void PageTable::unmap_range(vaddr_t vaddr, size_t size) { - vaddr_t s_page = vaddr / PAGE_SIZE; - vaddr_t e_page = BAN::Math::div_round_up(vaddr + size, PAGE_SIZE); + ASSERT(vaddr % PAGE_SIZE == 0); + + size_t page_count = range_page_count(vaddr, size); SpinLockGuard _(m_lock); - for (vaddr_t page = s_page; page < e_page; page++) - unmap_page(page * PAGE_SIZE); + for (vaddr_t page = 0; page < page_count; page++) + unmap_page(vaddr + page * PAGE_SIZE, false); + + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = page_count + } + }); } - void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags, MemoryType memory_type) + void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags, MemoryType memory_type, bool send_smp_message) { ASSERT(vaddr); ASSERT(vaddr != fast_page()); @@ -383,7 +400,7 @@ namespace Kernel uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); pt[pte] = paddr | uwr_flags | extra_flags; - invalidate(vaddr); + invalidate(vaddr, send_smp_message); } void PageTable::map_range_at(paddr_t paddr, vaddr_t vaddr, size_t size, flags_t flags, MemoryType memory_type) @@ -396,7 +413,15 @@ namespace Kernel SpinLockGuard _(m_lock); for (size_t page = 0; page < page_count; page++) - map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags, memory_type); + map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags, memory_type, false); + + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = page_count + } + }); } uint64_t PageTable::get_page_data(vaddr_t vaddr) const diff --git a/kernel/arch/i686/interrupts.S b/kernel/arch/i686/interrupts.S index f93d1905..43b8ee69 100644 --- a/kernel/arch/i686/interrupts.S +++ b/kernel/arch/i686/interrupts.S @@ -97,6 +97,22 @@ asm_yield_handler: popal iret +.global asm_ipi_handler +asm_ipi_handler: + push_userspace + load_kernel_segments + + movl %esp, %ebp + subl $15, %esp + andl $0xFFFFFFF0, %esp + + call cpp_ipi_handler + + movl %ebp, %esp + + pop_userspace + iret + .macro isr n .global isr\n isr\n: @@ -185,4 +201,3 @@ irq 28 irq 29 irq 30 irq 31 -irq 32 diff --git a/kernel/arch/x86_64/PageTable.cpp b/kernel/arch/x86_64/PageTable.cpp index 26b7ca53..313d8d2b 100644 --- a/kernel/arch/x86_64/PageTable.cpp +++ b/kernel/arch/x86_64/PageTable.cpp @@ -78,7 +78,6 @@ namespace Kernel ASSERT(s_kernel); s_kernel->initialize_kernel(); - s_kernel->initial_load(); } void PageTable::initial_load() @@ -237,7 +236,7 @@ namespace Kernel ASSERT(!(pt[pte] & Flags::Present)); pt[pte] = paddr | Flags::ReadWrite | Flags::Present; - invalidate(fast_page()); + invalidate(fast_page(), false); } void PageTable::unmap_fast_page() @@ -260,7 +259,7 @@ namespace Kernel ASSERT(pt[pte] & Flags::Present); pt[pte] = 0; - invalidate(fast_page()); + invalidate(fast_page(), false); } BAN::ErrorOr PageTable::create_userspace() @@ -322,13 +321,24 @@ namespace Kernel Processor::set_current_page_table(this); } - void PageTable::invalidate(vaddr_t vaddr) + void PageTable::invalidate(vaddr_t vaddr, bool send_smp_message) { ASSERT(vaddr % PAGE_SIZE == 0); asm volatile("invlpg (%0)" :: "r"(vaddr) : "memory"); + + if (send_smp_message) + { + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = 1 + } + }); + } } - void PageTable::unmap_page(vaddr_t vaddr) + void PageTable::unmap_page(vaddr_t vaddr, bool send_smp_message) { ASSERT(vaddr); ASSERT(vaddr != fast_page()); @@ -350,10 +360,7 @@ namespace Kernel SpinLockGuard _(m_lock); if (is_page_free(vaddr)) - { - dwarnln("unmapping unmapped page {8H}", vaddr); - return; - } + Kernel::panic("trying to unmap unmapped page 0x{H}", vaddr); uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); @@ -361,20 +368,29 @@ namespace Kernel uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); pt[pte] = 0; - invalidate(vaddr); + invalidate(vaddr, send_smp_message); } void PageTable::unmap_range(vaddr_t vaddr, size_t size) { - vaddr_t s_page = vaddr / PAGE_SIZE; - vaddr_t e_page = BAN::Math::div_round_up(vaddr + size, PAGE_SIZE); + ASSERT(vaddr % PAGE_SIZE == 0); + + size_t page_count = range_page_count(vaddr, size); SpinLockGuard _(m_lock); - for (vaddr_t page = s_page; page < e_page; page++) - unmap_page(page * PAGE_SIZE); + for (vaddr_t page = 0; page < page_count; page++) + unmap_page(vaddr + page * PAGE_SIZE, false); + + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = page_count + } + }); } - void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags, MemoryType memory_type) + void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags, MemoryType memory_type, bool send_smp_message) { ASSERT(vaddr); ASSERT(vaddr != fast_page()); @@ -441,7 +457,7 @@ namespace Kernel uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); pt[pte] = paddr | uwr_flags | extra_flags; - invalidate(vaddr); + invalidate(vaddr, send_smp_message); } void PageTable::map_range_at(paddr_t paddr, vaddr_t vaddr, size_t size, flags_t flags, MemoryType memory_type) @@ -456,7 +472,15 @@ namespace Kernel SpinLockGuard _(m_lock); for (size_t page = 0; page < page_count; page++) - map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags, memory_type); + map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags, memory_type, false); + + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::FlushTLB, + .flush_tlb = { + .vaddr = vaddr, + .page_count = page_count + } + }); } uint64_t PageTable::get_page_data(vaddr_t vaddr) const diff --git a/kernel/arch/x86_64/interrupts.S b/kernel/arch/x86_64/interrupts.S index 3d01e09b..ca9fb50b 100644 --- a/kernel/arch/x86_64/interrupts.S +++ b/kernel/arch/x86_64/interrupts.S @@ -70,6 +70,13 @@ asm_yield_handler: popaq iretq +.global asm_ipi_handler +asm_ipi_handler: + pushaq + call cpp_ipi_handler + popaq + iretq + .macro isr n .global isr\n isr\n: @@ -158,4 +165,3 @@ irq 28 irq 29 irq 30 irq 31 -irq 32 diff --git a/kernel/include/kernel/ACPI/ACPI.h b/kernel/include/kernel/ACPI/ACPI.h index f5bccd48..4706da17 100644 --- a/kernel/include/kernel/ACPI/ACPI.h +++ b/kernel/include/kernel/ACPI/ACPI.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace Kernel::ACPI { @@ -63,7 +64,7 @@ namespace Kernel::ACPI FADT* m_fadt { nullptr }; - Semaphore m_event_semaphore; + ThreadBlocker m_event_thread_blocker; BAN::Array, 0xFF> m_gpe_methods; bool m_hardware_reduced { false }; diff --git a/kernel/include/kernel/ACPI/AML/Mutex.h b/kernel/include/kernel/ACPI/AML/Mutex.h index f29dfa65..4fa9ee94 100644 --- a/kernel/include/kernel/ACPI/AML/Mutex.h +++ b/kernel/include/kernel/ACPI/AML/Mutex.h @@ -120,7 +120,7 @@ namespace Kernel::ACPI::AML { if (SystemTimer::get().ms_since_boot() >= wake_time) return ParseResult(Integer::Constants::Ones); - SystemTimer::get().sleep(1); + Processor::yield(); } } diff --git a/kernel/include/kernel/ACPI/AML/Sleep.h b/kernel/include/kernel/ACPI/AML/Sleep.h index 599a99d1..fe23293d 100644 --- a/kernel/include/kernel/ACPI/AML/Sleep.h +++ b/kernel/include/kernel/ACPI/AML/Sleep.h @@ -30,7 +30,7 @@ namespace Kernel::ACPI::AML AML_DEBUG_PRINTLN("Sleeping for {} ms", sleep_time.value()); #endif - SystemTimer::get().sleep(sleep_time.value()); + SystemTimer::get().sleep_ms(sleep_time.value()); return ParseResult::Success; } }; diff --git a/kernel/include/kernel/FS/DevFS/FileSystem.h b/kernel/include/kernel/FS/DevFS/FileSystem.h index a27c606e..18ebbce5 100644 --- a/kernel/include/kernel/FS/DevFS/FileSystem.h +++ b/kernel/include/kernel/FS/DevFS/FileSystem.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace Kernel { @@ -34,8 +34,8 @@ namespace Kernel BAN::Vector> m_devices; - Semaphore m_sync_done; - Semaphore m_sync_semaphore; + ThreadBlocker m_sync_done; + ThreadBlocker m_sync_thread_blocker; volatile bool m_should_sync { false }; }; diff --git a/kernel/include/kernel/FS/Pipe.h b/kernel/include/kernel/FS/Pipe.h index 2d36dd72..d8bd0372 100644 --- a/kernel/include/kernel/FS/Pipe.h +++ b/kernel/include/kernel/FS/Pipe.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace Kernel { @@ -47,7 +47,7 @@ namespace Kernel timespec m_mtime {}; timespec m_ctime {}; BAN::Vector m_buffer; - Semaphore m_semaphore; + ThreadBlocker m_thread_blocker; uint32_t m_writing_count { 1 }; }; diff --git a/kernel/include/kernel/IDT.h b/kernel/include/kernel/IDT.h index 24dd302a..921819e9 100644 --- a/kernel/include/kernel/IDT.h +++ b/kernel/include/kernel/IDT.h @@ -8,8 +8,8 @@ #include constexpr uint8_t IRQ_VECTOR_BASE = 0x20; -constexpr uint8_t IRQ_IPI = 32; -constexpr uint8_t IRQ_YIELD = 33; +constexpr uint8_t IRQ_YIELD = 32; +constexpr uint8_t IRQ_IPI = 33; namespace Kernel { diff --git a/kernel/include/kernel/Input/InputDevice.h b/kernel/include/kernel/Input/InputDevice.h index a01efda4..5adf0b89 100644 --- a/kernel/include/kernel/Input/InputDevice.h +++ b/kernel/include/kernel/Input/InputDevice.h @@ -3,6 +3,7 @@ #include #include +#include namespace Kernel { @@ -42,7 +43,7 @@ namespace Kernel const Type m_type; mutable SpinLock m_event_lock; - Semaphore m_event_semaphore; + ThreadBlocker m_event_thread_blocker; static constexpr size_t m_max_event_count { 128 }; @@ -63,7 +64,7 @@ namespace Kernel public: static BAN::ErrorOr> create(mode_t mode, uid_t uid, gid_t gid); - void notify() { m_semaphore.unblock(); } + void notify() { m_thread_blocker.unblock(); } private: KeyboardDevice(mode_t mode, uid_t uid, gid_t gid); @@ -79,7 +80,7 @@ namespace Kernel private: const dev_t m_rdev; const BAN::StringView m_name; - Semaphore m_semaphore; + ThreadBlocker m_thread_blocker; friend class BAN::RefPtr; }; @@ -89,7 +90,7 @@ namespace Kernel public: static BAN::ErrorOr> create(mode_t mode, uid_t uid, gid_t gid); - void notify() { m_semaphore.unblock(); } + void notify() { m_thread_blocker.unblock(); } private: MouseDevice(mode_t mode, uid_t uid, gid_t gid); @@ -105,7 +106,7 @@ namespace Kernel private: const dev_t m_rdev; const BAN::StringView m_name; - Semaphore m_semaphore; + ThreadBlocker m_thread_blocker; friend class BAN::RefPtr; }; diff --git a/kernel/include/kernel/InterruptStack.h b/kernel/include/kernel/InterruptStack.h index f08581ca..4828bbf3 100644 --- a/kernel/include/kernel/InterruptStack.h +++ b/kernel/include/kernel/InterruptStack.h @@ -1,5 +1,7 @@ #pragma once +#include + #include namespace Kernel diff --git a/kernel/include/kernel/Lock/Mutex.h b/kernel/include/kernel/Lock/Mutex.h index 691e2d01..db9946af 100644 --- a/kernel/include/kernel/Lock/Mutex.h +++ b/kernel/include/kernel/Lock/Mutex.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include @@ -19,7 +19,7 @@ namespace Kernel void lock() { - auto tid = Scheduler::current_tid(); + const auto tid = Thread::current_tid(); if (tid == m_locker) ASSERT(m_lock_depth > 0); else @@ -27,11 +27,11 @@ namespace Kernel pid_t expected = -1; while (!m_locker.compare_exchange(expected, tid)) { - Scheduler::get().yield(); + Processor::yield(); expected = -1; } ASSERT(m_lock_depth == 0); - if (Scheduler::current_tid()) + if (tid) Thread::current().add_mutex(); } m_lock_depth++; @@ -39,7 +39,7 @@ namespace Kernel bool try_lock() { - auto tid = Scheduler::current_tid(); + const auto tid = Thread::current_tid(); if (tid == m_locker) ASSERT(m_lock_depth > 0); else @@ -48,7 +48,7 @@ namespace Kernel if (!m_locker.compare_exchange(expected, tid)) return false; ASSERT(m_lock_depth == 0); - if (Scheduler::current_tid()) + if (tid) Thread::current().add_mutex(); } m_lock_depth++; @@ -57,12 +57,13 @@ namespace Kernel void unlock() { - ASSERT(m_locker == Scheduler::current_tid()); + const auto tid = Thread::current_tid(); + ASSERT(m_locker == tid); ASSERT(m_lock_depth > 0); if (--m_lock_depth == 0) { m_locker = -1; - if (Scheduler::current_tid()) + if (tid) Thread::current().remove_mutex(); } } @@ -86,7 +87,7 @@ namespace Kernel void lock() { - auto tid = Scheduler::current_tid(); + const auto tid = Thread::current_tid(); if (tid == m_locker) ASSERT(m_lock_depth > 0); else @@ -97,11 +98,11 @@ namespace Kernel pid_t expected = -1; while (!(has_priority || m_queue_length == 0) || !m_locker.compare_exchange(expected, tid)) { - Scheduler::get().yield(); + Processor::yield(); expected = -1; } ASSERT(m_lock_depth == 0); - if (Scheduler::current_tid()) + if (tid) Thread::current().add_mutex(); } m_lock_depth++; @@ -109,7 +110,7 @@ namespace Kernel bool try_lock() { - auto tid = Scheduler::current_tid(); + const auto tid = Thread::current_tid(); if (tid == m_locker) ASSERT(m_lock_depth > 0); else @@ -121,7 +122,7 @@ namespace Kernel if (has_priority) m_queue_length++; ASSERT(m_lock_depth == 0); - if (Scheduler::current_tid()) + if (tid) Thread::current().add_mutex(); } m_lock_depth++; @@ -130,7 +131,7 @@ namespace Kernel void unlock() { - auto tid = Scheduler::current_tid(); + const auto tid = Thread::current_tid(); ASSERT(m_locker == tid); ASSERT(m_lock_depth > 0); if (--m_lock_depth == 0) @@ -139,7 +140,7 @@ namespace Kernel if (has_priority) m_queue_length--; m_locker = -1; - if (Scheduler::current_tid()) + if (tid) Thread::current().remove_mutex(); } } diff --git a/kernel/include/kernel/Lock/SpinLock.h b/kernel/include/kernel/Lock/SpinLock.h index 563bf8bc..ff2f6fa5 100644 --- a/kernel/include/kernel/Lock/SpinLock.h +++ b/kernel/include/kernel/Lock/SpinLock.h @@ -23,34 +23,45 @@ namespace Kernel auto state = Processor::get_interrupt_state(); Processor::set_interrupt_state(InterruptState::Disabled); - auto id = Processor::current_id(); - ASSERT(m_locker != id); + auto id = Processor::current_id().as_u32(); + ASSERT(m_locker.load(BAN::MemoryOrder::memory_order_relaxed) != id); - ProcessorID expected = PROCESSOR_NONE; + auto expected = PROCESSOR_NONE.as_u32(); while (!m_locker.compare_exchange(expected, id, BAN::MemoryOrder::memory_order_acquire)) { - __builtin_ia32_pause(); - expected = PROCESSOR_NONE; + Processor::pause(); + expected = PROCESSOR_NONE.as_u32(); } return state; } + bool try_lock_interrupts_disabled() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + auto id = Processor::current_id().as_u32(); + ASSERT(m_locker.load(BAN::MemoryOrder::memory_order_relaxed) != id); + + auto expected = PROCESSOR_NONE.as_u32(); + return m_locker.compare_exchange(expected, id, BAN::MemoryOrder::memory_order_acquire); + } + void unlock(InterruptState state) { ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); - ASSERT(m_locker == Processor::current_id()); - m_locker.store(PROCESSOR_NONE, BAN::MemoryOrder::memory_order_release); + ASSERT(current_processor_has_lock()); + m_locker.store(PROCESSOR_NONE.as_u32(), BAN::MemoryOrder::memory_order_release); Processor::set_interrupt_state(state); } bool current_processor_has_lock() const { - return m_locker == Processor::current_id(); + return m_locker.load(BAN::MemoryOrder::memory_order_relaxed) == Processor::current_id().as_u32(); } private: - BAN::Atomic m_locker { PROCESSOR_NONE }; + BAN::Atomic m_locker { PROCESSOR_NONE.as_u32() }; }; class RecursiveSpinLock @@ -66,18 +77,15 @@ namespace Kernel auto state = Processor::get_interrupt_state(); Processor::set_interrupt_state(InterruptState::Disabled); - auto id = Processor::current_id(); - if (m_locker == id) - ASSERT(m_lock_depth > 0); - else + auto id = Processor::current_id().as_u32(); + + ProcessorID::value_type expected = PROCESSOR_NONE.as_u32(); + while (!m_locker.compare_exchange(expected, id, BAN::MemoryOrder::memory_order_acq_rel)) { - ProcessorID expected = PROCESSOR_NONE; - while (!m_locker.compare_exchange(expected, id, BAN::MemoryOrder::memory_order_acquire)) - { - __builtin_ia32_pause(); - expected = PROCESSOR_NONE; - } - ASSERT(m_lock_depth == 0); + if (expected == id) + break; + Processor::pause(); + expected = PROCESSOR_NONE.as_u32(); } m_lock_depth++; @@ -88,21 +96,21 @@ namespace Kernel void unlock(InterruptState state) { ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); - ASSERT(m_locker == Processor::current_id()); + ASSERT(current_processor_has_lock()); ASSERT(m_lock_depth > 0); if (--m_lock_depth == 0) - m_locker.store(PROCESSOR_NONE, BAN::MemoryOrder::memory_order_release); + m_locker.store(PROCESSOR_NONE.as_u32(), BAN::MemoryOrder::memory_order_release); Processor::set_interrupt_state(state); } bool current_processor_has_lock() const { - return m_locker == Processor::current_id(); + return m_locker.load(BAN::MemoryOrder::memory_order_relaxed) == Processor::current_id().as_u32(); } private: - BAN::Atomic m_locker { PROCESSOR_NONE }; - uint32_t m_lock_depth { 0 }; + BAN::Atomic m_locker { PROCESSOR_NONE.as_u32() }; + uint32_t m_lock_depth { 0 }; }; template diff --git a/kernel/include/kernel/Memory/PageTable.h b/kernel/include/kernel/Memory/PageTable.h index d67618ad..aa92b273 100644 --- a/kernel/include/kernel/Memory/PageTable.h +++ b/kernel/include/kernel/Memory/PageTable.h @@ -95,11 +95,11 @@ namespace Kernel static BAN::ErrorOr create_userspace(); ~PageTable(); - void unmap_page(vaddr_t); + void unmap_page(vaddr_t, bool send_smp_message = true); void unmap_range(vaddr_t, size_t bytes); + void map_page_at(paddr_t, vaddr_t, flags_t, MemoryType = MemoryType::Normal, bool send_smp_message = true); void map_range_at(paddr_t, vaddr_t, size_t bytes, flags_t, MemoryType = MemoryType::Normal); - void map_page_at(paddr_t, vaddr_t, flags_t, MemoryType = MemoryType::Normal); paddr_t physical_address_of(vaddr_t) const; flags_t get_page_flags(vaddr_t) const; @@ -127,7 +127,8 @@ namespace Kernel void initialize_kernel(); void map_kernel_memory(); void prepare_fast_page(); - static void invalidate(vaddr_t); + + static void invalidate(vaddr_t, bool send_smp_message); static void map_fast_page(paddr_t); static void unmap_fast_page(); diff --git a/kernel/include/kernel/Networking/ARPTable.h b/kernel/include/kernel/Networking/ARPTable.h index 7df13abb..ae93f6d7 100644 --- a/kernel/include/kernel/Networking/ARPTable.h +++ b/kernel/include/kernel/Networking/ARPTable.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace Kernel { @@ -58,7 +58,7 @@ namespace Kernel Process* m_process = nullptr; BAN::CircularQueue m_pending_packets; - Semaphore m_pending_semaphore; + ThreadBlocker m_pending_thread_blocker; friend class BAN::UniqPtr; }; diff --git a/kernel/include/kernel/Networking/IPv4Layer.h b/kernel/include/kernel/Networking/IPv4Layer.h index 0fb1b208..b8c23c6a 100644 --- a/kernel/include/kernel/Networking/IPv4Layer.h +++ b/kernel/include/kernel/Networking/IPv4Layer.h @@ -77,7 +77,7 @@ namespace Kernel static constexpr size_t pending_packet_buffer_size = 128 * PAGE_SIZE; BAN::UniqPtr m_pending_packet_buffer; BAN::CircularQueue m_pending_packets; - Semaphore m_pending_semaphore; + ThreadBlocker m_pending_thread_blocker; SpinLock m_pending_lock; size_t m_pending_total_size { 0 }; diff --git a/kernel/include/kernel/Networking/TCPSocket.h b/kernel/include/kernel/Networking/TCPSocket.h index f52f8702..bbdbc19a 100644 --- a/kernel/include/kernel/Networking/TCPSocket.h +++ b/kernel/include/kernel/Networking/TCPSocket.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace Kernel { @@ -161,7 +161,7 @@ namespace Kernel uint64_t m_time_wait_start_ms { 0 }; - Semaphore m_semaphore; + ThreadBlocker m_thread_blocker; RecvWindowInfo m_recv_window; SendWindowInfo m_send_window; diff --git a/kernel/include/kernel/Networking/UDPSocket.h b/kernel/include/kernel/Networking/UDPSocket.h index 76714699..0cbda27d 100644 --- a/kernel/include/kernel/Networking/UDPSocket.h +++ b/kernel/include/kernel/Networking/UDPSocket.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace Kernel { @@ -57,7 +57,7 @@ namespace Kernel BAN::CircularQueue m_packets; size_t m_packet_total_size { 0 }; SpinLock m_packet_lock; - Semaphore m_packet_semaphore; + ThreadBlocker m_packet_thread_blocker; friend class BAN::RefPtr; }; diff --git a/kernel/include/kernel/Networking/UNIX/Socket.h b/kernel/include/kernel/Networking/UNIX/Socket.h index 867aa777..a2a78945 100644 --- a/kernel/include/kernel/Networking/UNIX/Socket.h +++ b/kernel/include/kernel/Networking/UNIX/Socket.h @@ -48,7 +48,7 @@ namespace Kernel mutable BAN::Atomic target_closed { false }; BAN::WeakPtr connection; BAN::Queue> pending_connections; - Semaphore pending_semaphore; + ThreadBlocker pending_thread_blocker; SpinLock pending_lock; }; @@ -67,7 +67,7 @@ namespace Kernel size_t m_packet_size_total { 0 }; BAN::UniqPtr m_packet_buffer; SpinLock m_packet_lock; - Semaphore m_packet_semaphore; + ThreadBlocker m_packet_thread_blocker; friend class BAN::RefPtr; }; diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index ad702826..7c750eee 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -251,7 +251,7 @@ namespace Kernel private: struct ExitStatus { - Semaphore semaphore; + ThreadBlocker thread_blocker; int exit_code { 0 }; BAN::Atomic exited { false }; BAN::Atomic waiting { 0 }; diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h index c9d6234c..e13d4033 100644 --- a/kernel/include/kernel/Processor.h +++ b/kernel/include/kernel/Processor.h @@ -1,12 +1,14 @@ #pragma once +#include +#include #include #include #include #include #include -#include +#include namespace Kernel { @@ -17,8 +19,28 @@ namespace Kernel Enabled, }; - using ProcessorID = uint32_t; - constexpr ProcessorID PROCESSOR_NONE = 0xFFFFFFFF; + class ProcessorID + { + public: + using value_type = uint32_t; + + public: + ProcessorID() = default; + + uint32_t as_u32() const { return m_id; } + bool operator==(ProcessorID other) const { return m_id == other.m_id; } + + private: + explicit ProcessorID(uint32_t id) : m_id(id) {} + + private: + uint32_t m_id = static_cast(-1); + + friend class Processor; + friend class APIC; + }; + + constexpr ProcessorID PROCESSOR_NONE { }; #if ARCH(x86_64) || ARCH(i686) class Processor @@ -26,12 +48,44 @@ namespace Kernel BAN_NON_COPYABLE(Processor); BAN_NON_MOVABLE(Processor); + public: + struct SMPMessage + { + enum class Type + { + FlushTLB, + NewThread, + UnblockThread, + // FIXME: all processors should LAPIC for their preemption + SchedulerPreemption, + }; + SMPMessage* next { nullptr }; + Type type; + union + { + struct + { + uintptr_t vaddr; + size_t page_count; + } flush_tlb; + Scheduler::NewThreadRequest new_thread; + Scheduler::UnblockRequest unblock_thread; + uintptr_t scheduler_preemption; + }; + }; + public: static Processor& create(ProcessorID id); static Processor& initialize(); - static void allocate_idle_thread(); static ProcessorID current_id() { return read_gs_sized(offsetof(Processor, m_id)); } + static ProcessorID id_from_index(size_t index); + + static uint8_t count() { return s_processor_count; } + static bool is_smp_enabled() { return s_is_smp_enabled; } + static void wait_until_processors_ready(); + + static void toggle_should_print_cpu_load() { s_should_print_cpu_load = !s_should_print_cpu_load; } static ProcessorID bsb_id() { return s_bsb_id; } static bool current_is_bsb() { return current_id() == bsb_id(); } @@ -53,31 +107,40 @@ namespace Kernel return InterruptState::Disabled; }; - static uintptr_t current_stack_bottom() { return reinterpret_cast(read_gs_ptr(offsetof(Processor, m_stack))); } + static void pause() + { + __builtin_ia32_pause(); + if (is_smp_enabled()) + handle_smp_messages(); + } + + static uintptr_t current_stack_bottom() { return read_gs_sized(offsetof(Processor, m_stack)); } static uintptr_t current_stack_top() { return current_stack_bottom() + s_stack_size; } uintptr_t stack_bottom() const { return reinterpret_cast(m_stack); } uintptr_t stack_top() const { return stack_bottom() + s_stack_size; } - static GDT& gdt() { return *reinterpret_cast(read_gs_ptr(offsetof(Processor, m_gdt))); } - static IDT& idt() { return *reinterpret_cast(read_gs_ptr(offsetof(Processor, m_idt))); } + static GDT& gdt() { return *read_gs_sized(offsetof(Processor, m_gdt)); } + static IDT& idt() { return *read_gs_sized(offsetof(Processor, m_idt)); } - static void* get_current_page_table() { return read_gs_ptr(offsetof(Processor, m_current_page_table)); } - static void set_current_page_table(void* page_table) { write_gs_ptr(offsetof(Processor, m_current_page_table), page_table); } + static void* get_current_page_table() { return read_gs_sized(offsetof(Processor, m_current_page_table)); } + static void set_current_page_table(void* page_table) { write_gs_sized(offsetof(Processor, m_current_page_table), page_table); } - static Thread* idle_thread() { return reinterpret_cast(read_gs_ptr(offsetof(Processor, m_idle_thread))); } - static SchedulerQueue::Node* get_current_thread() { return reinterpret_cast(read_gs_ptr(offsetof(Processor, m_current_thread))); } - static void set_current_thread(SchedulerQueue::Node* thread) { write_gs_ptr(offsetof(Processor, m_current_thread), thread); } + static void yield(); + static Scheduler& scheduler() { return *read_gs_sized(offsetof(Processor, m_scheduler)); } - static void enter_interrupt(InterruptStack*, InterruptRegisters*); - static void leave_interrupt(); - static InterruptStack& get_interrupt_stack(); - static InterruptRegisters& get_interrupt_registers(); + static void handle_ipi(); + + static void handle_smp_messages(); + static void send_smp_message(ProcessorID, const SMPMessage&, bool send_ipi = true); + static void broadcast_smp_message(const SMPMessage&); private: Processor() = default; ~Processor() { ASSERT_NOT_REACHED(); } + static ProcessorID read_processor_id(); + template static T read_gs_sized(uintptr_t offset) requires(sizeof(T) <= 8) { @@ -110,11 +173,10 @@ namespace Kernel #undef __ASM_INPUT } - static void* read_gs_ptr(uintptr_t offset) { return read_gs_sized(offset); } - static void write_gs_ptr(uintptr_t offset, void* value) { write_gs_sized(offset, value); } - private: static ProcessorID s_bsb_id; + static BAN::Atomic s_processor_count; + static BAN::Atomic s_is_smp_enabled; ProcessorID m_id { PROCESSOR_NONE }; @@ -124,11 +186,15 @@ namespace Kernel GDT* m_gdt { nullptr }; IDT* m_idt { nullptr }; - Thread* m_idle_thread { nullptr }; - SchedulerQueue::Node* m_current_thread { nullptr }; + Scheduler* m_scheduler { nullptr }; - InterruptStack* m_interrupt_stack { nullptr }; - InterruptRegisters* m_interrupt_registers { nullptr }; + BAN::Atomic m_smp_pending_lock { false }; + SMPMessage* m_smp_pending { nullptr }; + + BAN::Atomic m_smp_free_lock { false }; + SMPMessage* m_smp_free { nullptr }; + + SMPMessage* m_smp_message_storage; void* m_current_page_table { nullptr }; @@ -139,3 +205,14 @@ namespace Kernel #endif } + +namespace BAN::Formatter +{ + + template + void print_argument(F putc, Kernel::ProcessorID processor_id, const ValueFormat& format) + { + print_argument(putc, processor_id.as_u32(), format); + } + +} diff --git a/kernel/include/kernel/Scheduler.h b/kernel/include/kernel/Scheduler.h index bd0a75ed..3b4a8644 100644 --- a/kernel/include/kernel/Scheduler.h +++ b/kernel/include/kernel/Scheduler.h @@ -1,55 +1,149 @@ #pragma once -#include -#include -#include +#include +#include +#include +#include + +#include namespace Kernel { - class Scheduler + class Thread; + class ThreadBlocker; + + class SchedulerQueue { public: - static BAN::ErrorOr initialize(); - static Scheduler& get(); + struct Node + { + Node(Thread* thread) + : thread(thread) + {} - [[noreturn]] void start(); + Node* next { nullptr }; + Node* prev { nullptr }; - void yield(); + Thread* thread; + ThreadBlocker* blocker { nullptr }; + uint64_t wake_time_ns { static_cast(-1) }; - void timer_reschedule(); - void irq_reschedule(); - void reschedule_if_idling(); + uint64_t last_start_ns { 0 }; + uint64_t time_used_ns { 0 }; + }; - void set_current_thread_sleeping(uint64_t wake_time); + public: + void add_thread_to_back(Node*); + void add_thread_with_wake_time(Node*); + template + Node* remove_with_condition(F callback); + void remove_node(Node*); + Node* front(); + Node* pop_front(); - void block_current_thread(Semaphore*, uint64_t wake_time); - void unblock_threads(Semaphore*); - // Makes sleeping or blocked thread with tid active. + bool empty() const { return m_head == nullptr; } + + private: + Node* m_head { nullptr }; + Node* m_tail { nullptr }; + }; + + class Scheduler + { + BAN_NON_COPYABLE(Scheduler); + BAN_NON_MOVABLE(Scheduler); + + public: + struct NewThreadRequest + { + SchedulerQueue::Node* node; + bool blocked; + }; + + struct UnblockRequest + { + enum class Type + { + ThreadBlocker, + ThreadID, + }; + Type type; + union + { + ThreadBlocker* blocker; + pid_t tid; + }; + }; + + public: + static BAN::ErrorOr create(); + BAN::ErrorOr initialize(); + + void reschedule(InterruptStack*, InterruptRegisters*); + void reschedule_if_idle(); + + void timer_interrupt(); + + BAN::ErrorOr add_thread(Thread*); + + void block_current_thread(ThreadBlocker* thread_blocker, uint64_t wake_time_ns); + void unblock_threads(ThreadBlocker*); void unblock_thread(pid_t tid); Thread& current_thread(); - static pid_t current_tid(); + Thread& idle_thread(); - // This is no return if called on current thread - void terminate_thread(Thread*); + pid_t current_tid() const; + bool is_idle() const; private: Scheduler() = default; - void set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time); + void add_current_to_most_loaded(SchedulerQueue* target_queue); + void update_most_loaded_node_queue(SchedulerQueue::Node*, SchedulerQueue* target_queue); + void remove_node_from_most_loaded(SchedulerQueue::Node*); - void setup_next_thread(); + bool do_unblock(ThreadBlocker*); + bool do_unblock(pid_t); + void do_load_balancing(); - BAN::ErrorOr add_thread(Thread*); + class ProcessorID find_least_loaded_processor() const; + + void preempt(); + + void handle_unblock_request(const UnblockRequest&); + void handle_new_thread_request(const NewThreadRequest&); private: - SpinLock m_lock; + SchedulerQueue m_run_queue; + SchedulerQueue m_block_queue; + SchedulerQueue::Node* m_current { nullptr }; + bool m_current_will_block { false }; - SchedulerQueue m_active_threads; - SchedulerQueue m_blocking_threads; + uint32_t m_thread_count { 0 }; - friend class Process; + InterruptStack* m_interrupt_stack { nullptr }; + InterruptRegisters* m_interrupt_registers { nullptr }; + + uint64_t m_last_reschedule_ns { 0 }; + uint64_t m_last_load_balance_ns { 0 }; + + struct ThreadInfo + { + SchedulerQueue* queue { nullptr }; + SchedulerQueue::Node* node { nullptr }; + }; + BAN::Array m_most_loaded_threads; + + uint64_t m_idle_start_ns { 0 }; + uint64_t m_idle_ns { 0 }; + + bool m_should_calculate_max_load_threads { true }; + + Thread* m_idle_thread { nullptr }; + + friend class Processor; }; } diff --git a/kernel/include/kernel/SchedulerQueue.h b/kernel/include/kernel/SchedulerQueue.h deleted file mode 100644 index 8c6d51a9..00000000 --- a/kernel/include/kernel/SchedulerQueue.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include -#include - -#include - -namespace Kernel -{ - - class Thread; - class Semaphore; - - class SchedulerQueue - { - BAN_NON_COPYABLE(SchedulerQueue); - BAN_NON_MOVABLE(SchedulerQueue); - - public: - struct Node - { - Node(Thread* thread) - : thread(thread) - {} - - Thread* thread; - uint64_t wake_time { 0 }; - Semaphore* semaphore { nullptr }; - bool should_block { false }; - - private: - Node* next { nullptr }; - friend class SchedulerQueue; - friend class Scheduler; - }; - - public: - SchedulerQueue() = default; - ~SchedulerQueue() { ASSERT_NOT_REACHED(); } - - bool empty() const { return m_front == nullptr; } - - Node* pop_front() - { - ASSERT(!empty()); - - Node* node = m_front; - - m_front = m_front->next; - if (m_front == nullptr) - m_back = nullptr; - - node->next = nullptr; - - return node; - } - - void push_back(Node* node) - { - ASSERT(node); - node->next = nullptr; - - (empty() ? m_front : m_back->next) = node; - m_back = node; - } - - void add_with_wake_time(Node* node) - { - ASSERT(node); - node->next = nullptr; - - if (empty() || node->wake_time >= m_back->wake_time) - { - push_back(node); - return; - } - - if (node->wake_time < m_front->wake_time) - { - node->next = m_front; - m_front = node; - return; - } - - Node* prev = m_front; - for (; node->wake_time >= prev->next->wake_time; prev = prev->next) - continue; - node->next = prev->next; - prev->next = node; - } - - void remove_with_wake_time(SchedulerQueue& out, uint64_t current_time) - { - while (!empty() && m_front->wake_time <= current_time) - out.push_back(pop_front()); - } - - template - void remove_with_condition(SchedulerQueue& out, F comp) - { - while (!empty() && comp(m_front)) - out.push_back(pop_front()); - - if (empty()) - return; - - for (Node* prev = m_front; prev->next;) - { - Node* node = prev->next; - if (!comp(node)) - prev = prev->next; - else - { - prev->next = node->next; - if (node == m_back) - m_back = prev; - out.push_back(node); - } - } - } - - private: - Node* m_front { nullptr }; - Node* m_back { nullptr }; - }; - -} diff --git a/kernel/include/kernel/Semaphore.h b/kernel/include/kernel/Semaphore.h deleted file mode 100644 index 4bced988..00000000 --- a/kernel/include/kernel/Semaphore.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -namespace Kernel -{ - - class Semaphore - { - public: - void block_indefinite(); - void block_with_timeout(uint64_t timeout_ms); - void block_with_wake_time(uint64_t wake_time_ms); - void unblock(); - }; - -} diff --git a/kernel/include/kernel/Storage/ATA/AHCI/Device.h b/kernel/include/kernel/Storage/ATA/AHCI/Device.h index bf6fe840..0281e38e 100644 --- a/kernel/include/kernel/Storage/ATA/AHCI/Device.h +++ b/kernel/include/kernel/Storage/ATA/AHCI/Device.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/kernel/include/kernel/Storage/NVMe/Queue.h b/kernel/include/kernel/Storage/NVMe/Queue.h index 8cbe97d0..1206c28c 100644 --- a/kernel/include/kernel/Storage/NVMe/Queue.h +++ b/kernel/include/kernel/Storage/NVMe/Queue.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include namespace Kernel @@ -31,7 +31,7 @@ namespace Kernel uint32_t m_cq_head { 0 }; uint16_t m_cq_valid_phase { 1 }; - Semaphore m_semaphore; + ThreadBlocker m_thread_blocker; SpinLock m_lock; BAN::Atomic m_used_mask { 0 }; BAN::Atomic m_done_mask { 0 }; diff --git a/kernel/include/kernel/Terminal/TTY.h b/kernel/include/kernel/Terminal/TTY.h index 9ec278bb..11545d78 100644 --- a/kernel/include/kernel/Terminal/TTY.h +++ b/kernel/include/kernel/Terminal/TTY.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace Kernel @@ -74,7 +74,7 @@ namespace Kernel { bool draw_graphics { true }; bool receive_input { true }; - Semaphore semaphore; + ThreadBlocker thread_blocker; }; tty_ctrl_t m_tty_ctrl; @@ -83,7 +83,7 @@ namespace Kernel BAN::Array buffer; size_t bytes { 0 }; bool flush { false }; - Semaphore semaphore; + ThreadBlocker thread_blocker; }; Buffer m_output; diff --git a/kernel/include/kernel/Terminal/VirtualTTY.h b/kernel/include/kernel/Terminal/VirtualTTY.h index 781495a7..949f710b 100644 --- a/kernel/include/kernel/Terminal/VirtualTTY.h +++ b/kernel/include/kernel/Terminal/VirtualTTY.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace Kernel { diff --git a/kernel/include/kernel/Thread.h b/kernel/include/kernel/Thread.h index 701fab7a..35ba9840 100644 --- a/kernel/include/kernel/Thread.h +++ b/kernel/include/kernel/Thread.h @@ -47,10 +47,12 @@ namespace Kernel void handle_signal(int signal = 0); bool add_signal(int signal); - // blocks semaphore and returns either on unblock, eintr, spuriously or after timeout - BAN::ErrorOr block_or_eintr_indefinite(Semaphore& semaphore); - BAN::ErrorOr block_or_eintr_or_timeout(Semaphore& semaphore, uint64_t timeout_ms, bool etimedout); - BAN::ErrorOr block_or_eintr_or_waketime(Semaphore& semaphore, uint64_t wake_time_ms, bool etimedout); + // blocks current thread and returns either on unblock, eintr, spuriously or after timeout + BAN::ErrorOr block_or_eintr_indefinite(ThreadBlocker& thread_blocker); + BAN::ErrorOr block_or_eintr_or_timeout_ms(ThreadBlocker& thread_blocker, uint64_t timeout_ms, bool etimedout) { return block_or_eintr_or_timeout_ns(thread_blocker, timeout_ms * 1'000'000, etimedout); } + BAN::ErrorOr block_or_eintr_or_waketime_ms(ThreadBlocker& thread_blocker, uint64_t wake_time_ms, bool etimedout) { return block_or_eintr_or_waketime_ns(thread_blocker, wake_time_ms * 1'000'000, etimedout); } + BAN::ErrorOr block_or_eintr_or_timeout_ns(ThreadBlocker& thread_blocker, uint64_t timeout_ns, bool etimedout); + BAN::ErrorOr block_or_eintr_or_waketime_ns(ThreadBlocker& thread_blocker, uint64_t wake_time_ns, bool etimedout); pid_t tid() const { return m_tid; } diff --git a/kernel/include/kernel/ThreadBlocker.h b/kernel/include/kernel/ThreadBlocker.h new file mode 100644 index 00000000..856a7c60 --- /dev/null +++ b/kernel/include/kernel/ThreadBlocker.h @@ -0,0 +1,17 @@ +#pragma once + +namespace Kernel +{ + + class ThreadBlocker + { + public: + void block_indefinite(); + void block_with_timeout_ms(uint64_t timeout_ms) { return block_with_timeout_ns(timeout_ms * 1'000'000); } + void block_with_wake_time_ms(uint64_t wake_time_ms) { return block_with_wake_time_ns(wake_time_ms * 1'000'000); } + void block_with_timeout_ns(uint64_t timeout_ns); + void block_with_wake_time_ns(uint64_t wake_time_ns); + void unblock(); + }; + +} diff --git a/kernel/include/kernel/Timer/Timer.h b/kernel/include/kernel/Timer/Timer.h index 1afe6461..941febd0 100644 --- a/kernel/include/kernel/Timer/Timer.h +++ b/kernel/include/kernel/Timer/Timer.h @@ -29,7 +29,8 @@ namespace Kernel virtual uint64_t ns_since_boot() const override; virtual timespec time_since_boot() const override; - void sleep(uint64_t ms) const; + void sleep_ms(uint64_t ms) const { return sleep_ns(ms * 1'000'000); } + void sleep_ns(uint64_t ns) const; timespec real_time() const; diff --git a/kernel/include/kernel/USB/XHCI/Controller.h b/kernel/include/kernel/USB/XHCI/Controller.h index 92820261..aa7582f1 100644 --- a/kernel/include/kernel/USB/XHCI/Controller.h +++ b/kernel/include/kernel/USB/XHCI/Controller.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -79,7 +80,7 @@ namespace Kernel Mutex m_mutex; Process* m_port_updater { nullptr }; - Semaphore m_port_semaphore; + ThreadBlocker m_port_thread_blocker; BAN::Atomic m_port_changed { false }; PCI::Device& m_pci_device; diff --git a/kernel/kernel/ACPI/ACPI.cpp b/kernel/kernel/ACPI/ACPI.cpp index 33999d79..716f8c9e 100644 --- a/kernel/kernel/ACPI/ACPI.cpp +++ b/kernel/kernel/ACPI/ACPI.cpp @@ -602,7 +602,7 @@ acpi_release_global_lock: { if (IO::inw(fadt().pm1a_cnt_blk) & PM1_CNT_SCI_EN) break; - SystemTimer::get().sleep(10); + SystemTimer::get().sleep_ms(10); } if (!(IO::inw(fadt().pm1a_cnt_blk) & PM1_CNT_SCI_EN)) @@ -761,7 +761,7 @@ acpi_release_global_lock: // FIXME: this can cause missing of event if it happens between // reading the status and blocking - m_event_semaphore.block_with_timeout(100); + m_event_thread_blocker.block_with_timeout_ms(100); continue; handle_event: @@ -782,7 +782,7 @@ handle_event: void ACPI::handle_irq() { - m_event_semaphore.unblock(); + m_event_thread_blocker.unblock(); } } diff --git a/kernel/kernel/APIC.cpp b/kernel/kernel/APIC.cpp index 393c4506..d3f0e425 100644 --- a/kernel/kernel/APIC.cpp +++ b/kernel/kernel/APIC.cpp @@ -244,7 +244,7 @@ namespace Kernel dprintln("System has {} processors", m_processors.size()); - uint8_t bsp_id = Kernel::Processor::current_id(); + uint8_t bsp_id = Kernel::Processor::current_id().as_u32(); dprintln("BSP lapic id: {}", bsp_id); if (m_processors.size() == 1) @@ -267,7 +267,7 @@ namespace Kernel dprintln("Trying to enable processor (lapic id {})", processor.apic_id); - auto& proc = Kernel::Processor::create(processor.apic_id); + auto& proc = Kernel::Processor::create(ProcessorID(processor.apic_id)); PageTable::with_fast_page((paddr_t)g_ap_init_addr, [&] { PageTable::fast_page_as_sized(2) = V2P(proc.stack_top()); }); @@ -308,14 +308,24 @@ namespace Kernel } // give processor upto 100 * 100 us + 200 us to boot - for (int i = 0; *g_ap_stack_loaded == 0 && i < 100; i++) + for (int i = 0; i < 100; i++) + { + if (__atomic_load_n(&g_ap_stack_loaded[0], __ATOMIC_SEQ_CST)) + break; udelay(100); + } } - *g_ap_startup_done = 1; + __atomic_store_n(&g_ap_startup_done[0], 1, __ATOMIC_SEQ_CST); + + const size_t timeout_ms = SystemTimer::get().ms_since_boot() + 500; + while (__atomic_load_n(&g_ap_running_count[0], __ATOMIC_SEQ_CST) < m_processors.size() - 1) + { + if (SystemTimer::get().ms_since_boot() >= timeout_ms) + Kernel::panic("Could not start all processors"); + __builtin_ia32_pause(); + } - // give processors 100 us time to increment running count - udelay(100); dprintln("{} processors started", *g_ap_running_count); } @@ -325,7 +335,7 @@ namespace Kernel ASSERT(Kernel::Processor::get_interrupt_state() == InterruptState::Disabled); while ((read_from_local_apic(LAPIC_ICR_LO_REG) & ICR_LO_delivery_status_send_pending) == ICR_LO_delivery_status_send_pending) __builtin_ia32_pause(); - write_to_local_apic(LAPIC_ICR_HI_REG, (read_from_local_apic(LAPIC_ICR_HI_REG) & 0x00FFFFFF) | (target << 24)); + write_to_local_apic(LAPIC_ICR_HI_REG, (read_from_local_apic(LAPIC_ICR_HI_REG) & 0x00FFFFFF) | (target.as_u32() << 24)); write_to_local_apic(LAPIC_ICR_LO_REG, (read_from_local_apic(LAPIC_ICR_LO_REG) & ICR_LO_reserved_mask) | ICR_LO_delivery_mode_fixed @@ -359,7 +369,6 @@ namespace Kernel write_to_local_apic(LAPIC_SIV_REG, read_from_local_apic(LAPIC_SIV_REG) | 0x1FF); } - uint32_t APIC::read_from_local_apic(ptrdiff_t offset) { return MMIO::read32(m_local_apic_vaddr + offset); @@ -418,7 +427,7 @@ namespace Kernel redir.vector = IRQ_VECTOR_BASE + irq; redir.mask = 0; // FIXME: distribute IRQs more evenly? - redir.destination = Kernel::Processor::bsb_id(); + redir.destination = Kernel::Processor::bsb_id().as_u32(); ioapic->write(IOAPIC_REDIRS + gsi * 2, redir.lo_dword); ioapic->write(IOAPIC_REDIRS + gsi * 2 + 1, redir.hi_dword); diff --git a/kernel/kernel/FS/DevFS/FileSystem.cpp b/kernel/kernel/FS/DevFS/FileSystem.cpp index 5316bd47..8db2b3d4 100644 --- a/kernel/kernel/FS/DevFS/FileSystem.cpp +++ b/kernel/kernel/FS/DevFS/FileSystem.cpp @@ -49,7 +49,7 @@ namespace Kernel for (auto& device : s_instance->m_devices) device->update(); } - SystemTimer::get().sleep(10); + SystemTimer::get().sleep_ms(10); } }, nullptr ); @@ -65,7 +65,7 @@ namespace Kernel while (!s_instance->m_should_sync) { LockFreeGuard _(s_instance->m_device_lock); - s_instance->m_sync_semaphore.block_indefinite(); + s_instance->m_sync_thread_blocker.block_indefinite(); } for (auto& device : s_instance->m_devices) @@ -84,11 +84,11 @@ namespace Kernel { while (true) { - SystemTimer::get().sleep(10000); + SystemTimer::get().sleep_ms(10'000); LockGuard _(s_instance->m_device_lock); s_instance->m_should_sync = true; - s_instance->m_sync_semaphore.unblock(); + s_instance->m_sync_thread_blocker.unblock(); } }, nullptr, sync_process ))); @@ -101,7 +101,7 @@ namespace Kernel { LockGuard _(m_device_lock); m_should_sync = true; - m_sync_semaphore.unblock(); + m_sync_thread_blocker.unblock(); } if (should_block) m_sync_done.block_indefinite(); diff --git a/kernel/kernel/FS/Pipe.cpp b/kernel/kernel/FS/Pipe.cpp index 8d159b5c..4afe645f 100644 --- a/kernel/kernel/FS/Pipe.cpp +++ b/kernel/kernel/FS/Pipe.cpp @@ -37,7 +37,7 @@ namespace Kernel ASSERT(m_writing_count > 0); m_writing_count--; if (m_writing_count == 0) - m_semaphore.unblock(); + m_thread_blocker.unblock(); } BAN::ErrorOr Pipe::read_impl(off_t, BAN::ByteSpan buffer) @@ -48,7 +48,7 @@ namespace Kernel if (m_writing_count == 0) return 0; LockFreeGuard lock_free(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } size_t to_copy = BAN::Math::min(buffer.size(), m_buffer.size()); @@ -59,7 +59,7 @@ namespace Kernel m_atime = SystemTimer::get().real_time(); - m_semaphore.unblock(); + m_thread_blocker.unblock(); return to_copy; } @@ -77,7 +77,7 @@ namespace Kernel m_mtime = current_time; m_ctime = current_time; - m_semaphore.unblock(); + m_thread_blocker.unblock(); return buffer.size(); } diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index de6af94d..c200e3d1 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -10,7 +10,7 @@ #include #define ISR_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) -#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) X(32) +#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) namespace Kernel { @@ -168,8 +168,8 @@ namespace Kernel asm volatile("cli; 1: hlt; jmp 1b"); } - pid_t tid = Scheduler::current_tid(); - pid_t pid = tid ? Process::current().pid() : 0; + const pid_t tid = Thread::current_tid(); + const pid_t pid = (tid && Thread::current().has_process()) ? Process::current().pid() : 0; if (tid) { @@ -241,13 +241,13 @@ namespace Kernel #if ARCH(x86_64) dwarnln( - "{} (error code: 0x{8H}), pid {}, tid {}\r\n" + "CPU {}: {} (error code: 0x{8H}), pid {}, tid {}\r\n" "Register dump\r\n" "rax=0x{16H}, rbx=0x{16H}, rcx=0x{16H}, rdx=0x{16H}\r\n" "rsp=0x{16H}, rbp=0x{16H}, rdi=0x{16H}, rsi=0x{16H}\r\n" "rip=0x{16H}, rflags=0x{16H}\r\n" "cr0=0x{16H}, cr2=0x{16H}, cr3=0x{16H}, cr4=0x{16H}", - isr_exceptions[isr], error, pid, tid, + Processor::current_id(), isr_exceptions[isr], error, pid, tid, regs->rax, regs->rbx, regs->rcx, regs->rdx, interrupt_stack->sp, regs->rbp, regs->rdi, regs->rsi, interrupt_stack->ip, interrupt_stack->flags, @@ -255,13 +255,13 @@ namespace Kernel ); #elif ARCH(i686) dwarnln( - "{} (error code: 0x{8H}), pid {}, tid {}\r\n" + "CPU {}: {} (error code: 0x{8H}), pid {}, tid {}\r\n" "Register dump\r\n" "eax=0x{8H}, ebx=0x{8H}, ecx=0x{8H}, edx=0x{8H}\r\n" "esp=0x{8H}, ebp=0x{8H}, edi=0x{8H}, esi=0x{8H}\r\n" "eip=0x{8H}, eflags=0x{8H}\r\n" "cr0=0x{8H}, cr2=0x{8H}, cr3=0x{8H}, cr4=0x{8H}", - isr_exceptions[isr], error, pid, tid, + Processor::current_id(), isr_exceptions[isr], error, pid, tid, regs->eax, regs->ebx, regs->ecx, regs->edx, interrupt_stack->sp, regs->ebp, regs->edi, regs->esi, interrupt_stack->ip, interrupt_stack->flags, @@ -320,12 +320,17 @@ done: extern "C" void cpp_yield_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) { + // yield is raised through kernel software interrupt ASSERT(!InterruptController::get().is_in_service(IRQ_YIELD)); ASSERT(!GDT::is_user_segment(interrupt_stack->cs)); + Processor::scheduler().reschedule(interrupt_stack, interrupt_registers); + } - Processor::enter_interrupt(interrupt_stack, interrupt_registers); - Scheduler::get().irq_reschedule(); - Processor::leave_interrupt(); + extern "C" void cpp_ipi_handler() + { + ASSERT(InterruptController::get().is_in_service(IRQ_IPI)); + InterruptController::get().eoi(IRQ_IPI); + Processor::handle_ipi(); } extern "C" void cpp_irq_handler(uint32_t irq) @@ -349,8 +354,6 @@ done: InterruptController::get().eoi(irq); if (auto* handler = s_interruptables[irq]) handler->handle_irq(); - else if (irq == IRQ_IPI) - Scheduler::get().yield(); else dprintln("no handler for irq 0x{2H}", irq); } @@ -359,7 +362,7 @@ done: if (current_thread.can_add_signal_to_execute()) current_thread.handle_signal(); - Scheduler::get().reschedule_if_idling(); + Processor::scheduler().reschedule_if_idle(); ASSERT(Thread::current().state() != Thread::State::Terminated); @@ -405,6 +408,7 @@ done: #undef X extern "C" void asm_yield_handler(); + extern "C" void asm_ipi_handler(); extern "C" void asm_syscall_handler(); IDT* IDT::create() @@ -423,6 +427,7 @@ done: #undef X idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_YIELD, asm_yield_handler); + idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_IPI, asm_ipi_handler); idt->register_syscall_handler(0x80, asm_syscall_handler); diff --git a/kernel/kernel/Input/InputDevice.cpp b/kernel/kernel/Input/InputDevice.cpp index 273a1c44..da6fe0fc 100644 --- a/kernel/kernel/Input/InputDevice.cpp +++ b/kernel/kernel/Input/InputDevice.cpp @@ -131,7 +131,7 @@ namespace Kernel m_event_head = (m_event_head + 1) % m_max_event_count; m_event_count++; - m_event_semaphore.unblock(); + m_event_thread_blocker.unblock(); if (m_type == Type::Keyboard && s_keyboard_device) s_keyboard_device->notify(); if (m_type == Type::Mouse && s_mouse_device) @@ -149,7 +149,7 @@ namespace Kernel m_event_lock.unlock(state); { LockFreeGuard _(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_event_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_event_thread_blocker)); } state = m_event_lock.lock(); } @@ -213,7 +213,7 @@ namespace Kernel } LockFreeGuard _(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } } @@ -259,7 +259,7 @@ namespace Kernel } LockFreeGuard _(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } } diff --git a/kernel/kernel/Networking/ARPTable.cpp b/kernel/kernel/Networking/ARPTable.cpp index 38deb152..a91de20e 100644 --- a/kernel/kernel/Networking/ARPTable.cpp +++ b/kernel/kernel/Networking/ARPTable.cpp @@ -79,7 +79,7 @@ namespace Kernel if (it != m_arp_table.end()) return it->value; } - Scheduler::get().yield(); + Processor::yield(); } return BAN::Error::from_errno(ETIMEDOUT); @@ -150,7 +150,7 @@ namespace Kernel while (m_pending_packets.empty()) { m_pending_lock.unlock(state); - m_pending_semaphore.block_indefinite(); + m_pending_thread_blocker.block_indefinite(); state = m_pending_lock.lock(); } auto packet = m_pending_packets.front(); @@ -178,7 +178,7 @@ namespace Kernel } m_pending_packets.push({ .interface = interface, .packet = arp_packet }); - m_pending_semaphore.unblock(); + m_pending_thread_blocker.unblock(); } } diff --git a/kernel/kernel/Networking/IPv4Layer.cpp b/kernel/kernel/Networking/IPv4Layer.cpp index 7fa678a5..5e22a463 100644 --- a/kernel/kernel/Networking/IPv4Layer.cpp +++ b/kernel/kernel/Networking/IPv4Layer.cpp @@ -308,7 +308,7 @@ namespace Kernel while (m_pending_packets.empty()) { m_pending_lock.unlock(state); - m_pending_semaphore.block_indefinite(); + m_pending_thread_blocker.block_indefinite(); state = m_pending_lock.lock(); } auto packet = m_pending_packets.front(); @@ -367,7 +367,7 @@ namespace Kernel m_pending_total_size += ipv4_header.total_length; m_pending_packets.push({ .interface = interface }); - m_pending_semaphore.unblock(); + m_pending_thread_blocker.unblock(); } } diff --git a/kernel/kernel/Networking/TCPSocket.cpp b/kernel/kernel/Networking/TCPSocket.cpp index dbf0b30e..09a09c74 100644 --- a/kernel/kernel/Networking/TCPSocket.cpp +++ b/kernel/kernel/Networking/TCPSocket.cpp @@ -75,7 +75,7 @@ namespace Kernel while (m_pending_connections.empty()) { LockFreeGuard _(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } auto connection = m_pending_connections.front(); @@ -113,7 +113,7 @@ namespace Kernel if (SystemTimer::get().ms_since_boot() >= wake_time_ms) return BAN::Error::from_errno(ECONNABORTED); LockFreeGuard free(m_mutex); - TRY(Thread::current().block_or_eintr_or_waketime(return_inode->m_semaphore, wake_time_ms, true)); + TRY(Thread::current().block_or_eintr_or_waketime_ms(return_inode->m_thread_blocker, wake_time_ms, true)); } if (address) @@ -170,7 +170,7 @@ namespace Kernel if (SystemTimer::get().ms_since_boot() >= wake_time_ms) return BAN::Error::from_errno(ECONNREFUSED); LockFreeGuard free(m_mutex); - TRY(Thread::current().block_or_eintr_or_waketime(m_semaphore, wake_time_ms, true)); + TRY(Thread::current().block_or_eintr_or_waketime_ms(m_thread_blocker, wake_time_ms, true)); } return {}; @@ -207,7 +207,7 @@ namespace Kernel if (m_state != State::Established) return return_with_maybe_zero(); LockFreeGuard free(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } const uint32_t to_recv = BAN::Math::min(buffer.size(), m_recv_window.data_size); @@ -249,7 +249,7 @@ namespace Kernel if (m_send_window.data_size + message.size() <= m_send_window.buffer->size()) break; LockFreeGuard free(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } { @@ -259,14 +259,14 @@ namespace Kernel } const uint32_t target_ack = m_send_window.start_seq + m_send_window.data_size; - m_semaphore.unblock(); + m_thread_blocker.unblock(); while (m_send_window.current_ack < target_ack) { if (m_state != State::Established) return return_with_maybe_zero(); LockFreeGuard free(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker)); } return message.size(); @@ -597,7 +597,7 @@ namespace Kernel } } - m_semaphore.unblock(); + m_thread_blocker.unblock(); } void TCPSocket::set_connection_as_closed() @@ -743,11 +743,11 @@ namespace Kernel } } - m_semaphore.unblock(); - m_semaphore.block_with_wake_time(current_ms + retransmit_timeout_ms); + m_thread_blocker.unblock(); + m_thread_blocker.block_with_wake_time_ms(current_ms + retransmit_timeout_ms); } - m_semaphore.unblock(); + m_thread_blocker.unblock(); } } diff --git a/kernel/kernel/Networking/UDPSocket.cpp b/kernel/kernel/Networking/UDPSocket.cpp index 6a6f4c8d..1db1666c 100644 --- a/kernel/kernel/Networking/UDPSocket.cpp +++ b/kernel/kernel/Networking/UDPSocket.cpp @@ -70,7 +70,7 @@ namespace Kernel m_packets.emplace(packet_info); m_packet_total_size += payload.size(); - m_packet_semaphore.unblock(); + m_packet_thread_blocker.unblock(); } BAN::ErrorOr UDPSocket::bind_impl(const sockaddr* address, socklen_t address_len) @@ -93,7 +93,7 @@ namespace Kernel while (m_packets.empty()) { m_packet_lock.unlock(state); - TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker)); state = m_packet_lock.lock(); } diff --git a/kernel/kernel/Networking/UNIX/Socket.cpp b/kernel/kernel/Networking/UNIX/Socket.cpp index 7258f505..4bc8c462 100644 --- a/kernel/kernel/Networking/UNIX/Socket.cpp +++ b/kernel/kernel/Networking/UNIX/Socket.cpp @@ -73,7 +73,7 @@ namespace Kernel return BAN::Error::from_errno(EINVAL); while (connection_info.pending_connections.empty()) - TRY(Thread::current().block_or_eintr_indefinite(connection_info.pending_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(connection_info.pending_thread_blocker)); BAN::RefPtr pending; @@ -81,7 +81,7 @@ namespace Kernel SpinLockGuard _(connection_info.pending_lock); pending = connection_info.pending_connections.front(); connection_info.pending_connections.pop(); - connection_info.pending_semaphore.unblock(); + connection_info.pending_thread_blocker.unblock(); } BAN::RefPtr return_inode; @@ -162,15 +162,15 @@ namespace Kernel if (target_info.pending_connections.size() < target_info.pending_connections.capacity()) { MUST(target_info.pending_connections.push(this)); - target_info.pending_semaphore.unblock(); + target_info.pending_thread_blocker.unblock(); break; } } - TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker)); } while (!connection_info.connection_done) - Scheduler::get().yield(); + Processor::yield(); return {}; } @@ -241,7 +241,7 @@ namespace Kernel while (m_packet_sizes.full() || m_packet_size_total + packet.size() > s_packet_buffer_size) { m_packet_lock.unlock(state); - TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker)); state = m_packet_lock.lock(); } @@ -252,7 +252,7 @@ namespace Kernel if (!is_streaming()) m_packet_sizes.push(packet.size()); - m_packet_semaphore.unblock(); + m_packet_thread_blocker.unblock(); m_packet_lock.unlock(state); return {}; } @@ -357,7 +357,7 @@ namespace Kernel while (m_packet_size_total == 0) { m_packet_lock.unlock(state); - TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker)); state = m_packet_lock.lock(); } @@ -376,7 +376,7 @@ namespace Kernel memmove(packet_buffer, packet_buffer + nread, m_packet_size_total - nread); m_packet_size_total -= nread; - m_packet_semaphore.unblock(); + m_packet_thread_blocker.unblock(); m_packet_lock.unlock(state); return nread; diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index 922776a0..dcbfdc57 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -82,12 +82,13 @@ namespace Kernel void Process::register_to_scheduler() { + // FIXME: Allow failing... { SpinLockGuard _(s_process_lock); MUST(s_processes.push_back(this)); } for (auto* thread : m_threads) - MUST(Scheduler::get().add_thread(thread)); + MUST(Processor::scheduler().add_thread(thread)); } Process* Process::create_kernel() @@ -206,10 +207,10 @@ namespace Kernel ProcFileSystem::get().on_process_delete(*this); m_exit_status.exited = true; - m_exit_status.semaphore.unblock(); + m_exit_status.thread_blocker.unblock(); while (m_exit_status.waiting > 0) - Scheduler::get().yield(); + Processor::yield(); m_process_lock.lock(); @@ -250,16 +251,6 @@ namespace Kernel m_exit_status.exit_code = __WGENEXITCODE(status, signal); while (!m_threads.empty()) m_threads.front()->on_exit(); - //for (auto* thread : m_threads) - // if (thread != &Thread::current()) - // Scheduler::get().terminate_thread(thread); - //if (this == &Process::current()) - //{ - // m_threads.clear(); - // Processor::set_interrupt_state(InterruptState::Disabled); - // Thread::current().setup_process_cleanup(); - // Scheduler::get().yield(); - //} } size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const @@ -534,13 +525,13 @@ namespace Kernel m_cmdline = BAN::move(str_argv); m_environ = BAN::move(str_envp); - asm volatile("cli"); + Processor::set_interrupt_state(InterruptState::Disabled); } m_has_called_exec = true; m_threads.front()->setup_exec(); - Scheduler::get().yield(); + Processor::yield(); ASSERT_NOT_REACHED(); } @@ -603,12 +594,12 @@ namespace Kernel if (seconds == 0) return 0; - uint64_t wake_time = SystemTimer::get().ms_since_boot() + seconds * 1000; - Scheduler::get().set_current_thread_sleeping(wake_time); + const uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + (seconds * 1000); + SystemTimer::get().sleep_ms(seconds * 1000); - uint64_t current_time = SystemTimer::get().ms_since_boot(); - if (current_time < wake_time) - return BAN::Math::div_round_up(wake_time - current_time, 1000); + const uint64_t current_ms = SystemTimer::get().ms_since_boot(); + if (current_ms < wake_time_ms) + return BAN::Math::div_round_up(wake_time_ms - current_ms, 1000); return 0; } @@ -622,23 +613,21 @@ namespace Kernel TRY(validate_pointer_access(rmtp, sizeof(timespec))); } - uint64_t sleep_ms = rqtp->tv_sec * 1000 + BAN::Math::div_round_up(rqtp->tv_nsec, 1'000'000); - if (sleep_ms == 0) + const uint64_t sleep_ns = (rqtp->tv_sec * 1'000'000'000) + rqtp->tv_nsec; + if (sleep_ns == 0) return 0; - uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + sleep_ms; + const uint64_t wake_time_ns = SystemTimer::get().ns_since_boot() + sleep_ns; + SystemTimer::get().sleep_ns(sleep_ns); - Scheduler::get().set_current_thread_sleeping(wake_time_ms); - - uint64_t current_ms = SystemTimer::get().ms_since_boot(); - - if (current_ms < wake_time_ms) + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + if (current_ns < wake_time_ns) { if (rmtp) { - uint64_t remaining_ms = wake_time_ms - current_ms; - rmtp->tv_sec = remaining_ms / 1000; - rmtp->tv_nsec = (remaining_ms % 1000) * 1'000'000; + const uint64_t remaining_ns = wake_time_ns - current_ns; + rmtp->tv_sec = remaining_ns / 1'000'000'000; + rmtp->tv_nsec = remaining_ns % 1'000'000'000; } return BAN::Error::from_errno(EINTR); } @@ -1140,7 +1129,7 @@ namespace Kernel break; LockFreeGuard free(m_process_lock); - SystemTimer::get().sleep(1); + SystemTimer::get().sleep_ms(1); } if (arguments->readfds) @@ -1347,7 +1336,7 @@ namespace Kernel TRY(validate_pointer_access(args, sizeof(sys_mmap_t))); } - if (args->prot != PROT_NONE && args->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) + if (args->prot != PROT_NONE && (args->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))) return BAN::Error::from_errno(EINVAL); if (args->flags & MAP_FIXED) @@ -1628,7 +1617,7 @@ namespace Kernel { process.add_pending_signal(signal); // FIXME: This feels hacky - Scheduler::get().unblock_thread(process.m_threads.front()->tid()); + Processor::scheduler().unblock_thread(process.m_threads.front()->tid()); } return (pid > 0) ? BAN::Iteration::Break : BAN::Iteration::Continue; } diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp index 12be1c22..67fe4e26 100644 --- a/kernel/kernel/Processor.cpp +++ b/kernel/kernel/Processor.cpp @@ -1,17 +1,31 @@ +#include #include #include +#include #include +#include + +extern Kernel::TerminalDriver* g_terminal_driver; namespace Kernel { static constexpr uint32_t MSR_IA32_GS_BASE = 0xC0000101; - ProcessorID Processor::s_bsb_id { PROCESSOR_NONE }; + ProcessorID Processor::s_bsb_id { PROCESSOR_NONE }; + BAN::Atomic Processor::s_processor_count { 0 }; + BAN::Atomic Processor::s_is_smp_enabled { false }; + BAN::Atomic Processor::s_should_print_cpu_load { false }; - static BAN::Array s_processors; + static BAN::Atomic s_processors_created { 0 }; - static ProcessorID read_processor_id() + // 32 bit milli seconds are definitely enough as APs start on boot + static BAN::Atomic s_first_ap_ready_ms { 0 }; + + static BAN::Array s_processors; + static BAN::Array s_processor_ids { PROCESSOR_NONE }; + + ProcessorID Processor::read_processor_id() { uint32_t id; asm volatile( @@ -21,16 +35,18 @@ namespace Kernel : "=b"(id) :: "eax", "ecx", "edx" ); - return id; + return ProcessorID(id); } Processor& Processor::create(ProcessorID id) { // bsb is the first processor - if (s_bsb_id == PROCESSOR_NONE) + if (s_bsb_id == PROCESSOR_NONE && id == PROCESSOR_NONE) s_bsb_id = id = read_processor_id(); + if (s_bsb_id == PROCESSOR_NONE || id == PROCESSOR_NONE || id.m_id >= s_processors.size()) + Kernel::panic("Trying to initialize invalid processor {}", id.m_id); - auto& processor = s_processors[id]; + auto& processor = s_processors[id.m_id]; ASSERT(processor.m_id == PROCESSOR_NONE); processor.m_id = id; @@ -44,13 +60,27 @@ namespace Kernel processor.m_idt = IDT::create(); ASSERT(processor.m_idt); + processor.m_scheduler = MUST(Scheduler::create()); + ASSERT(processor.m_scheduler); + + SMPMessage* smp_storage = new SMPMessage[0x1000]; + ASSERT(smp_storage); + for (size_t i = 0; i < 0xFFF; i++) + smp_storage[i].next = &smp_storage[i + 1]; + smp_storage[0xFFF].next = nullptr; + + processor.m_smp_pending = nullptr; + processor.m_smp_free = smp_storage; + + s_processors_created++; + return processor; } Processor& Processor::initialize() { auto id = read_processor_id(); - auto& processor = s_processors[id]; + auto& processor = s_processors[id.m_id]; ASSERT(processor.m_gdt); processor.m_gdt->load(); @@ -72,41 +102,265 @@ namespace Kernel return processor; } - void Processor::allocate_idle_thread() + ProcessorID Processor::id_from_index(size_t index) { - ASSERT(idle_thread() == nullptr); - auto* idle_thread = MUST(Thread::create_kernel([](void*) { for (;;) asm volatile("hlt"); }, nullptr, nullptr)); - write_gs_ptr(offsetof(Processor, m_idle_thread), idle_thread); + ASSERT(index < s_processor_count); + ASSERT(s_processor_ids[index] != PROCESSOR_NONE); + return s_processor_ids[index]; } - void Processor::enter_interrupt(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) + void Processor::wait_until_processors_ready() { - ASSERT(get_interrupt_state() == InterruptState::Disabled); - ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) == nullptr); - write_gs_ptr(offsetof(Processor, m_interrupt_stack), interrupt_stack); - write_gs_ptr(offsetof(Processor, m_interrupt_registers), interrupt_registers); + if (s_processors_created == 1) + { + ASSERT(current_is_bsb()); + s_processor_count++; + s_processor_ids[0] = current_id(); + } + + // wait until bsb is ready + if (current_is_bsb()) + { + s_processor_count = 1; + s_processor_ids[0] = current_id(); + + // single processor system + if (s_processors_created == 1) + return; + + // wait until first AP is ready + const uint64_t timeout_ms = SystemTimer::get().ms_since_boot() + 1000; + while (s_first_ap_ready_ms == 0) + { + if (SystemTimer::get().ms_since_boot() >= timeout_ms) + { + dprintln("Could not initialize any APs :("); + return; + } + __builtin_ia32_pause(); + } + } + else + { + // wait until bsb is ready, it shall get index 0 + while (s_processor_count == 0) + __builtin_ia32_pause(); + + auto lookup_index = s_processor_count++; + ASSERT(s_processor_ids[lookup_index] == PROCESSOR_NONE); + s_processor_ids[lookup_index] = current_id(); + + uint32_t expected = 0; + s_first_ap_ready_ms.compare_exchange(expected, SystemTimer::get().ms_since_boot()); + } + + // wait until all processors are initialized + { + const uint32_t timeout_ms = s_first_ap_ready_ms + 1000; + while (s_processor_count < s_processors_created) + { + if (SystemTimer::get().ms_since_boot() >= timeout_ms) + { + if (current_is_bsb()) + dprintln("Could not initialize {} processors :(", s_processors_created - s_processor_count); + break; + } + __builtin_ia32_pause(); + } + } + + s_is_smp_enabled = true; } - void Processor::leave_interrupt() + void Processor::handle_ipi() { - ASSERT(get_interrupt_state() == InterruptState::Disabled); - ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) != nullptr); - write_gs_ptr(offsetof(Processor, m_interrupt_stack), nullptr); - write_gs_ptr(offsetof(Processor, m_interrupt_registers), nullptr); + handle_smp_messages(); } - InterruptStack& Processor::get_interrupt_stack() + template + void with_atomic_lock(BAN::Atomic& lock, F callback) { - ASSERT(get_interrupt_state() == InterruptState::Disabled); - ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack))); - return *read_gs_sized(offsetof(Processor, m_interrupt_stack)); + bool expected = false; + while (!lock.compare_exchange(expected, true, BAN::MemoryOrder::memory_order_acquire)) + { + __builtin_ia32_pause(); + expected = false; + } + + callback(); + + lock.store(false, BAN::MemoryOrder::memory_order_release); } - InterruptRegisters& Processor::get_interrupt_registers() + void Processor::handle_smp_messages() { - ASSERT(get_interrupt_state() == InterruptState::Disabled); - ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_registers))); - return *read_gs_sized(offsetof(Processor, m_interrupt_registers)); + auto state = get_interrupt_state(); + set_interrupt_state(InterruptState::Disabled); + + auto processor_id = current_id(); + auto& processor = s_processors[processor_id.m_id]; + + SMPMessage* pending = nullptr; + with_atomic_lock(processor.m_smp_pending_lock, + [&]() + { + pending = processor.m_smp_pending; + processor.m_smp_pending = nullptr; + } + ); + + bool should_preempt = false; + + if (pending) + { + // reverse smp message queue from LIFO to FIFO + { + SMPMessage* reversed = nullptr; + + for (SMPMessage* message = pending; message;) + { + SMPMessage* next = message->next; + message->next = reversed; + reversed = message; + message = next; + } + + pending = reversed; + } + + SMPMessage* last_handled = nullptr; + + // handle messages + for (auto* message = pending; message; message = message->next) + { + switch (message->type) + { + case SMPMessage::Type::FlushTLB: + for (size_t i = 0; i < message->flush_tlb.page_count; i++) + asm volatile("invlpg (%0)" :: "r"(message->flush_tlb.vaddr + i * PAGE_SIZE) : "memory"); + break; + case SMPMessage::Type::NewThread: + processor.m_scheduler->handle_new_thread_request(message->new_thread); + break; + case SMPMessage::Type::UnblockThread: + processor.m_scheduler->handle_unblock_request(message->unblock_thread); + break; + case SMPMessage::Type::SchedulerPreemption: + should_preempt = true; + break; + } + + last_handled = message; + } + + with_atomic_lock(processor.m_smp_free_lock, + [&]() + { + last_handled->next = processor.m_smp_free; + processor.m_smp_free = pending; + } + ); + } + + if (should_preempt) + processor.m_scheduler->preempt(); + + set_interrupt_state(state); + } + + void Processor::send_smp_message(ProcessorID processor_id, const SMPMessage& message, bool send_ipi) + { + ASSERT(processor_id != current_id()); + + auto state = get_interrupt_state(); + set_interrupt_state(InterruptState::Disabled); + + auto& processor = s_processors[processor_id.m_id]; + + // take free message slot + SMPMessage* storage = nullptr; + with_atomic_lock(processor.m_smp_free_lock, + [&]() + { + storage = processor.m_smp_free; + ASSERT(storage && storage->next); + + processor.m_smp_free = storage->next; + } + ); + + // write message + *storage = message; + + // push message to pending queue + with_atomic_lock(processor.m_smp_pending_lock, + [&]() + { + storage->next = processor.m_smp_pending; + processor.m_smp_pending = storage; + } + ); + + if (send_ipi) + InterruptController::get().send_ipi(processor_id); + + set_interrupt_state(state); + } + + void Processor::broadcast_smp_message(const SMPMessage& message) + { + if (!is_smp_enabled()) + return; + + auto state = get_interrupt_state(); + set_interrupt_state(InterruptState::Disabled); + + for (size_t i = 0; i < Processor::count(); i++) + { + auto processor_id = s_processor_ids[i]; + if (processor_id != current_id()) + send_smp_message(processor_id, message, false); + } + + InterruptController::get().broadcast_ipi(); + + set_interrupt_state(state); + } + + void Processor::yield() + { + auto state = get_interrupt_state(); + set_interrupt_state(InterruptState::Disabled); + +#if ARCH(x86_64) + asm volatile( + "movq %%rsp, %%rcx;" + "movq %[load_sp], %%rsp;" + "int %[yield];" + "movq %%rcx, %%rsp;" + // NOTE: This is offset by 2 pointers since interrupt without PL change + // does not push SP and SS. This allows accessing "whole" interrupt stack. + :: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)), + [yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD) + : "memory", "rcx" + ); +#elif ARCH(i686) + asm volatile( + "movl %%esp, %%ecx;" + "movl %[load_sp], %%esp;" + "int %[yield];" + "movl %%ecx, %%esp;" + // NOTE: This is offset by 2 pointers since interrupt without PL change + // does not push SP and SS. This allows accessing "whole" interrupt stack. + :: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)), + [yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD) + : "memory", "ecx" + ); +#else + #error +#endif + + Processor::set_interrupt_state(state); } } diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index 19c41090..445072bf 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -1,260 +1,715 @@ -#include -#include -#include +#include +#include #include #include #include +#include #include -#define SCHEDULER_VERIFY_STACK 1 +#define DEBUG_SCHEDULER 0 +#define SCHEDULER_ASSERT 1 + +#if SCHEDULER_ASSERT == 0 +#undef ASSERT +#define ASSERT(...) +#endif namespace Kernel { - static Scheduler* s_instance = nullptr; + static constexpr uint64_t s_reschedule_interval_ns = 10'000'000; + static constexpr uint64_t s_load_balance_interval_ns = 1'000'000'000; + + static BAN::Atomic s_schedulers_initialized { 0 }; + + struct ProcessorInfo + { + uint64_t idle_time_ns { s_load_balance_interval_ns }; + uint32_t max_load_threads { 0 }; + }; + + static SpinLock s_processor_info_time_lock; + static BAN::Array s_processor_infos; + + + static BAN::Atomic s_next_processor_index { 0 }; + + + void SchedulerQueue::add_thread_to_back(Node* node) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + node->next = nullptr; + node->prev = m_tail; + (m_tail ? m_tail->next : m_head) = node; + m_tail = node; + } + + void SchedulerQueue::add_thread_with_wake_time(Node* node) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + if (m_tail == nullptr || node->wake_time_ns >= m_tail->wake_time_ns) + return add_thread_to_back(node); + + Node* next = m_head; + Node* prev = nullptr; + while (next && node->wake_time_ns > next->wake_time_ns) + { + prev = next; + next = next->next; + } + + node->next = next; + node->prev = prev; + (next ? next->prev : m_tail) = node; + (prev ? prev->next : m_head) = node; + } + + template + SchedulerQueue::Node* SchedulerQueue::remove_with_condition(F callback) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + for (Node* node = m_head; node; node = node->next) + { + if (!callback(node)) + continue; + remove_node(node); + return node; + } + + return nullptr; + } + + void SchedulerQueue::remove_node(Node* node) + { + (node->prev ? node->prev->next : m_head) = node->next; + (node->next ? node->next->prev : m_tail) = node->prev; + node->prev = nullptr; + node->next = nullptr; + } + + SchedulerQueue::Node* SchedulerQueue::front() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + ASSERT(!empty()); + return m_head; + } + + SchedulerQueue::Node* SchedulerQueue::pop_front() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + if (empty()) + return nullptr; + Node* result = m_head; + m_head = m_head->next; + (m_head ? m_head->prev : m_tail) = nullptr; + result->next = nullptr; + return result; + } + + BAN::ErrorOr Scheduler::create() + { + auto* scheduler = new Scheduler(); + if (scheduler == nullptr) + return BAN::Error::from_errno(ENOMEM); + return scheduler; + } BAN::ErrorOr Scheduler::initialize() { - ASSERT(s_instance == nullptr); - s_instance = new Scheduler(); - ASSERT(s_instance); - Processor::allocate_idle_thread(); + m_idle_thread = TRY(Thread::create_kernel([](void*) { asm volatile("1: hlt; jmp 1b"); }, nullptr, nullptr)); + ASSERT(m_idle_thread); + + size_t processor_index = 0; + for (; processor_index < Processor::count(); processor_index++) + if (Processor::id_from_index(processor_index) == Processor::current_id()) + break; + ASSERT(processor_index < Processor::count()); + + // each CPU does load balance at different times. This calulates the offset to other CPUs + m_last_load_balance_ns = s_load_balance_interval_ns * processor_index / Processor::count(); + m_idle_ns = -m_last_load_balance_ns; + + s_schedulers_initialized++; + while (s_schedulers_initialized < Processor::count()) + __builtin_ia32_pause(); + return {}; } - Scheduler& Scheduler::get() - { - ASSERT(s_instance); - return *s_instance; - } - - void Scheduler::start() + void Scheduler::add_current_to_most_loaded(SchedulerQueue* target_queue) { ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); - ASSERT(!m_active_threads.empty()); - // broadcast ipi (yield) for each processor - InterruptController::get().broadcast_ipi(); - yield(); - - ASSERT_NOT_REACHED(); - } - - Thread& Scheduler::current_thread() - { - auto* current = Processor::get_current_thread(); - return current ? *current->thread : *Processor::idle_thread(); - } - - pid_t Scheduler::current_tid() - { - if (s_instance == nullptr) - return 0; - return Scheduler::get().current_thread().tid(); - } - - void Scheduler::setup_next_thread() - { - ASSERT(m_lock.current_processor_has_lock()); - - if (auto* current = Processor::get_current_thread()) + bool has_current = false; + for (auto& info : m_most_loaded_threads) { - auto* thread = current->thread; - - if (thread->state() == Thread::State::Terminated) + if (info.node == m_current) { - PageTable::kernel().load(); - delete thread; - delete current; - } - else - { - // thread->state() can be NotStarted when calling exec or cleaning up process - if (thread->state() != Thread::State::NotStarted) - { - thread->interrupt_stack() = Processor::get_interrupt_stack(); - thread->interrupt_registers() = Processor::get_interrupt_registers(); - } - - if (current->should_block) - { - current->should_block = false; - m_blocking_threads.add_with_wake_time(current); - } - else - { - m_active_threads.push_back(current); - } + info.queue = target_queue; + has_current = true; + break; } } - SchedulerQueue::Node* node = nullptr; - while (!m_active_threads.empty()) + if (!has_current) { - node = m_active_threads.pop_front(); - if (node->thread->state() != Thread::State::Terminated) + size_t index = 0; + for (; index < m_most_loaded_threads.size() - 1; index++) + if (m_most_loaded_threads[index].node == nullptr) + break; + m_most_loaded_threads[index].queue = target_queue; + m_most_loaded_threads[index].node = m_current; + } + + BAN::sort::sort(m_most_loaded_threads.begin(), m_most_loaded_threads.end(), + [](const ThreadInfo& a, const ThreadInfo& b) -> bool + { + if (a.node == nullptr || b.node == nullptr) + return a.node; + return a.node->time_used_ns > b.node->time_used_ns; + } + ); + } + + void Scheduler::update_most_loaded_node_queue(SchedulerQueue::Node* node, SchedulerQueue* target_queue) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + for (auto& info : m_most_loaded_threads) + { + if (info.node == node) + { + info.queue = target_queue; + break; + } + } + } + + void Scheduler::remove_node_from_most_loaded(SchedulerQueue::Node* node) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + size_t i = 0; + for (; i < m_most_loaded_threads.size(); i++) + if (m_most_loaded_threads[i].node == node) break; - PageTable::kernel().load(); - delete node->thread; - delete node; - node = nullptr; + for (; i < m_most_loaded_threads.size() - 1; i++) + m_most_loaded_threads[i] = m_most_loaded_threads[i + 1]; + + m_most_loaded_threads.back().node = nullptr; + m_most_loaded_threads.back().queue = nullptr; + } + + void Scheduler::reschedule(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + // If there are no other threads in run queue, reschedule can be no-op :) + if (m_run_queue.empty() && !m_current_will_block && current_thread().state() == Thread::State::Executing) + return; + + if (m_current == nullptr) + m_idle_ns += SystemTimer::get().ns_since_boot() - m_idle_start_ns; + else + { + switch (m_current->thread->state()) + { + case Thread::State::Terminated: + remove_node_from_most_loaded(m_current); + PageTable::kernel().load(); + delete m_current->thread; + delete m_current; + m_thread_count--; + break; + case Thread::State::Executing: + { + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + m_current->thread->interrupt_stack() = *interrupt_stack; + m_current->thread->interrupt_registers() = *interrupt_registers; + m_current->time_used_ns += current_ns - m_current->last_start_ns; + add_current_to_most_loaded(m_current_will_block ? &m_block_queue : &m_run_queue); + if (!m_current_will_block) + m_run_queue.add_thread_to_back(m_current); + else + { + m_current_will_block = false; + m_block_queue.add_thread_with_wake_time(m_current); + } + break; + } + case Thread::State::NotStarted: + ASSERT(!m_current_will_block); + m_current->time_used_ns = 0; + remove_node_from_most_loaded(m_current); + m_run_queue.add_thread_to_back(m_current); + break; + } } - Processor::set_current_thread(node); - - auto* thread = node ? node->thread : Processor::idle_thread(); - - if (thread->has_process()) - thread->process().page_table().load(); - else + while ((m_current = m_run_queue.pop_front())) + { + if (m_current->thread->state() != Thread::State::Terminated) + break; + remove_node_from_most_loaded(m_current); PageTable::kernel().load(); + delete m_current->thread; + delete m_current; + m_thread_count--; + } + + if (m_current == nullptr) + { + PageTable::kernel().load(); + *interrupt_stack = m_idle_thread->interrupt_stack(); + *interrupt_registers = m_idle_thread->interrupt_registers(); + m_idle_thread->m_state = Thread::State::Executing; + m_idle_start_ns = SystemTimer::get().ns_since_boot(); + return; + } + + update_most_loaded_node_queue(m_current, nullptr); + + auto* thread = m_current->thread; + + auto& page_table = thread->has_process() ? thread->process().page_table() : PageTable::kernel(); + page_table.load(); if (thread->state() == Thread::State::NotStarted) thread->m_state = Thread::State::Executing; Processor::gdt().set_tss_stack(thread->kernel_stack_top()); - Processor::get_interrupt_stack() = thread->interrupt_stack(); - Processor::get_interrupt_registers() = thread->interrupt_registers(); + *interrupt_stack = thread->interrupt_stack(); + *interrupt_registers = thread->interrupt_registers(); + + m_current->last_start_ns = SystemTimer::get().ns_since_boot(); } - void Scheduler::timer_reschedule() + void Scheduler::reschedule_if_idle() { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + if (!m_current && !m_run_queue.empty()) + Processor::yield(); + } + + void Scheduler::preempt() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + if (Processor::is_smp_enabled()) + do_load_balancing(); + { - SpinLockGuard _(m_lock); - m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot()); + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + while (!m_block_queue.empty() && current_ns >= m_block_queue.front()->wake_time_ns) + { + auto* node = m_block_queue.pop_front(); + update_most_loaded_node_queue(node, &m_run_queue); + m_run_queue.add_thread_to_back(node); + } } - // Broadcast IPI to all other processors for them - // to perform reschedule - InterruptController::get().broadcast_ipi(); - yield(); - } - - void Scheduler::yield() - { - auto state = Processor::get_interrupt_state(); - Processor::set_interrupt_state(InterruptState::Disabled); - - ASSERT(!m_lock.current_processor_has_lock()); - -#if ARCH(x86_64) - asm volatile( - "movq %%rsp, %%rcx;" - "movq %[load_sp], %%rsp;" - "int %[yield];" - "movq %%rcx, %%rsp;" - // NOTE: This is offset by 2 pointers since interrupt without PL change - // does not push SP and SS. This allows accessing "whole" interrupt stack. - :: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)), - [yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD) - : "memory", "rcx" - ); -#elif ARCH(i686) - asm volatile( - "movl %%esp, %%ecx;" - "movl %[load_sp], %%esp;" - "int %[yield];" - "movl %%ecx, %%esp;" - // NOTE: This is offset by 2 pointers since interrupt without PL change - // does not push SP and SS. This allows accessing "whole" interrupt stack. - :: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)), - [yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD) - : "memory", "ecx" - ); -#else - #error -#endif - - Processor::set_interrupt_state(state); - } - - void Scheduler::irq_reschedule() - { - SpinLockGuard _(m_lock); - setup_next_thread(); - } - - void Scheduler::reschedule_if_idling() - { { - SpinLockGuard _(m_lock); - if (Processor::get_current_thread()) - return; - if (m_active_threads.empty()) - return; + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + if (current_ns >= m_last_reschedule_ns + s_reschedule_interval_ns) + { + m_last_reschedule_ns = current_ns; + Processor::yield(); + } + } + } + + void Scheduler::timer_interrupt() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + // FIXME: all processors should LAPIC for their preemption + if (Processor::is_smp_enabled()) + { + ASSERT(Processor::current_is_bsb()); + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::SchedulerPreemption, + .scheduler_preemption = 0 // dummy value + }); } - yield(); + preempt(); + } + + void Scheduler::handle_unblock_request(const UnblockRequest& request) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + switch (request.type) + { + case UnblockRequest::Type::ThreadBlocker: + do_unblock(request.blocker); + break; + case UnblockRequest::Type::ThreadID: + do_unblock(request.tid); + break; + default: + ASSERT_NOT_REACHED(); + } + } + + void Scheduler::handle_new_thread_request(const NewThreadRequest& reqeuest) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + if (reqeuest.blocked) + m_block_queue.add_thread_with_wake_time(reqeuest.node); + else + m_run_queue.add_thread_to_back(reqeuest.node); + } + + bool Scheduler::do_unblock(ThreadBlocker* blocker) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + // FIXME: This could _easily_ be O(1) + + bool did_unblock = false; + + if (m_current && m_current->blocker == blocker && m_current_will_block) + { + m_current_will_block = false; + did_unblock = true; + } + + SchedulerQueue::Node* match; + while ((match = m_block_queue.remove_with_condition([blocker](const auto* node) { return node->blocker == blocker; }))) + { + dprintln_if(DEBUG_SCHEDULER, "CPU {}: unblock blocker {} (tid {})", Processor::current_id(), blocker, match->thread->tid()); + update_most_loaded_node_queue(match, &m_run_queue); + m_run_queue.add_thread_to_back(match); + did_unblock = true; + } + + return did_unblock; + } + + bool Scheduler::do_unblock(pid_t tid) + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + // FIXME: This could _easily_ be O(1) + + if (m_current && m_current->thread->tid() == tid && m_current_will_block) + { + m_current_will_block = false; + return true; + } + + auto* match = m_block_queue.remove_with_condition([tid](const auto* node) { return node->thread->tid() == tid; }); + if (match == nullptr) + return false; + + dprintln_if(DEBUG_SCHEDULER, "CPU {}: unblock tid {}", Processor::current_id(), tid); + update_most_loaded_node_queue(match, &m_run_queue); + m_run_queue.add_thread_to_back(match); + return true; + } + + ProcessorID Scheduler::find_least_loaded_processor() const + { + ProcessorID least_loaded_id = Processor::current_id(); + uint64_t most_idle_ns = m_idle_ns; + uint32_t least_max_load_threads = static_cast(-1); + for (uint8_t i = 0; i < Processor::count(); i++) + { + auto processor_id = Processor::id_from_index(i); + if (processor_id == Processor::current_id()) + continue; + const auto& info = s_processor_infos[i]; + if (info.idle_time_ns < most_idle_ns || info.max_load_threads > least_max_load_threads) + continue; + least_loaded_id = processor_id; + most_idle_ns = info.idle_time_ns; + least_max_load_threads = info.max_load_threads; + } + return least_loaded_id; + } + + void Scheduler::do_load_balancing() + { + ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled); + + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + if (current_ns < m_last_load_balance_ns + s_load_balance_interval_ns) + return; + + if (m_current == nullptr) + { + m_idle_ns += current_ns - m_idle_start_ns; + m_idle_start_ns = current_ns; + } + else + { + m_current->time_used_ns += current_ns - m_current->last_start_ns; + m_current->last_start_ns = current_ns; + add_current_to_most_loaded(nullptr); + } + + if constexpr(DEBUG_SCHEDULER) + { + const uint64_t duration_ns = current_ns - m_last_load_balance_ns; + const uint64_t processing_ns = duration_ns - m_idle_ns; + + { + const uint64_t load_percent_x1000 = BAN::Math::div_round_up(processing_ns * 100'000, duration_ns); + dprintln("CPU {}: { 2}.{3}% ({} threads)", Processor::current_id(), load_percent_x1000 / 1000, load_percent_x1000 % 1000, m_thread_count); + } + + if (m_current) + { + const char* name = "unknown"; + if (m_current->thread->has_process() && m_current->thread->process().is_userspace() && m_current->thread->process().userspace_info().argv) + name = m_current->thread->process().userspace_info().argv[0]; + const uint64_t load_percent_x1000 = BAN::Math::div_round_up(m_current->time_used_ns * 100'000, processing_ns); + dprintln(" tid { 2}: { 3}.{3}% <{}> current", m_current->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000, name); + } + m_run_queue.remove_with_condition( + [&](SchedulerQueue::Node* node) + { + const uint64_t load_percent_x1000 = BAN::Math::div_round_up(node->time_used_ns * 100'000, processing_ns); + dprintln(" tid { 2}: { 3}.{3}% active", node->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000); + return false; + } + ); + m_block_queue.remove_with_condition( + [&](SchedulerQueue::Node* node) + { + const uint64_t load_percent_x1000 = BAN::Math::div_round_up(node->time_used_ns * 100'000, processing_ns); + dprintln(" tid { 2}: { 3}.{3}% blocked", node->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000); + return false; + } + ); + } + + if (!s_processor_info_time_lock.try_lock_interrupts_disabled()) + { + dprintln_if(DEBUG_SCHEDULER, "Load balancing cannot keep up"); + return; + } + + if (m_idle_ns == 0 && m_should_calculate_max_load_threads) + { + const auto& most_loaded_thread = m_most_loaded_threads.front(); + if (most_loaded_thread.node == nullptr || most_loaded_thread.node->time_used_ns == 0) + s_processor_infos[Processor::current_id().as_u32()].max_load_threads = 0; + else + { + const uint64_t duration_ns = current_ns - m_last_load_balance_ns; + const uint64_t max_thread_load_x1000 = 1000 * m_most_loaded_threads.front().node->time_used_ns / duration_ns; + const uint64_t max_load_thread_count = ((2000 / max_thread_load_x1000) + 1) / 2; + s_processor_infos[Processor::current_id().as_u32()].max_load_threads = max_load_thread_count; + } + } + + constexpr auto absolute_difference_u64 = [](uint64_t a, uint64_t b) { return (a < b) ? (b - a) : (a - b); }; + + for (size_t i = 1; i < m_most_loaded_threads.size(); i++) + { + auto& thread_info = m_most_loaded_threads[i]; + if (thread_info.node == nullptr) + break; + if (thread_info.node == m_current || thread_info.queue == nullptr) + continue; + + auto least_loaded_id = find_least_loaded_processor(); + if (least_loaded_id == Processor::current_id()) + break; + + auto& most_idle_info = s_processor_infos[least_loaded_id.as_u32()]; + auto& my_info = s_processor_infos[Processor::current_id().as_u32()]; + + if (m_idle_ns == 0) + { + if (my_info.max_load_threads == 0) + break; + + if (most_idle_info.idle_time_ns == 0) + { + if (most_idle_info.max_load_threads + 1 > my_info.max_load_threads - 1) + break; + + my_info.max_load_threads -= 1; + most_idle_info.max_load_threads += 1; + + dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {} (max load)", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id); + } + else + { + my_info.max_load_threads -= 1; + most_idle_info.idle_time_ns = 0; + most_idle_info.max_load_threads = 1; + + dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id); + } + } + else + { + const uint64_t my_current_proc_ns = s_load_balance_interval_ns - BAN::Math::min(s_load_balance_interval_ns, m_idle_ns); + const uint64_t other_current_proc_ns = s_load_balance_interval_ns - BAN::Math::min(s_load_balance_interval_ns, most_idle_info.idle_time_ns); + const uint64_t current_proc_diff_ns = absolute_difference_u64(my_current_proc_ns, other_current_proc_ns); + + const uint64_t my_new_proc_ns = my_current_proc_ns - BAN::Math::min(thread_info.node->time_used_ns, my_current_proc_ns); + const uint64_t other_new_proc_ns = other_current_proc_ns + thread_info.node->time_used_ns; + const uint64_t new_proc_diff_ns = absolute_difference_u64(my_new_proc_ns, other_new_proc_ns); + + // require 10% decrease between CPU loads to do send thread to other CPU + if (new_proc_diff_ns >= current_proc_diff_ns || (100 * (current_proc_diff_ns - new_proc_diff_ns) / current_proc_diff_ns) < 10) + continue; + + most_idle_info.idle_time_ns -= BAN::Math::min(thread_info.node->time_used_ns, most_idle_info.idle_time_ns); + m_idle_ns += thread_info.node->time_used_ns; + + dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id); + } + + thread_info.node->time_used_ns = 0; + + { + auto& my_queue = (thread_info.queue == &m_run_queue) ? m_run_queue : m_block_queue; + my_queue.remove_node(thread_info.node); + m_thread_count--; + } + + Processor::send_smp_message(least_loaded_id, { + .type = Processor::SMPMessage::Type::NewThread, + .new_thread = { + .node = thread_info.node, + .blocked = thread_info.queue == &m_block_queue + } + }); + + thread_info.node = nullptr; + thread_info.queue = nullptr; + + if (m_idle_ns == 0) + break; + } + + s_processor_infos[Processor::current_id().as_u32()].idle_time_ns = m_idle_ns; + s_processor_info_time_lock.unlock(InterruptState::Disabled); + + if (m_current) + m_current->time_used_ns = 0; + for (auto& thread_info : m_most_loaded_threads) + thread_info = {}; + m_run_queue .remove_with_condition([&](SchedulerQueue::Node* node) { node->time_used_ns = 0; return false; }); + m_block_queue.remove_with_condition([&](SchedulerQueue::Node* node) { node->time_used_ns = 0; return false; }); + m_idle_ns = 0; + + m_should_calculate_max_load_threads = true; + + m_last_load_balance_ns += s_load_balance_interval_ns; } BAN::ErrorOr Scheduler::add_thread(Thread* thread) { - auto* node = new SchedulerQueue::Node(thread); - if (node == nullptr) + auto* new_node = new SchedulerQueue::Node(thread); + if (new_node == nullptr) return BAN::Error::from_errno(ENOMEM); - SpinLockGuard _(m_lock); - m_active_threads.push_back(node); + + const size_t processor_index = s_next_processor_index++ % Processor::count(); + const auto processor_id = Processor::id_from_index(processor_index); + + if (processor_id == Processor::current_id()) + { + auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); + m_run_queue.add_thread_to_back(new_node); + m_thread_count++; + Processor::set_interrupt_state(state); + } + else + { + Processor::send_smp_message(processor_id, { + .type = Processor::SMPMessage::Type::NewThread, + .new_thread = { + .node = new_node, + .blocked = false + } + }); + } + return {}; } - void Scheduler::terminate_thread(Thread* thread) + void Scheduler::block_current_thread(ThreadBlocker* blocker, uint64_t wake_time_ns) { - auto state = m_lock.lock(); + auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); - ASSERT(thread->state() == Thread::State::Executing); - thread->m_state = Thread::State::Terminated; - thread->interrupt_stack().sp = Processor::current_stack_top(); + m_current->blocker = blocker; + m_current->wake_time_ns = wake_time_ns; + m_current_will_block = true; + Processor::yield(); - m_lock.unlock(InterruptState::Disabled); + Processor::set_interrupt_state(state); + } - // actual deletion will be done while rescheduling + void Scheduler::unblock_threads(ThreadBlocker* blocker) + { + auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); - if (¤t_thread() == thread) + do_unblock(blocker); + + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::UnblockThread, + .unblock_thread = { + .type = UnblockRequest::Type::ThreadBlocker, + .blocker = blocker + } + }); + + Processor::set_interrupt_state(state); + } + + void Scheduler::unblock_thread(pid_t tid) + { + auto state = Processor::get_interrupt_state(); + Processor::set_interrupt_state(InterruptState::Disabled); + + if (!do_unblock(tid)) { - yield(); - ASSERT_NOT_REACHED(); + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::UnblockThread, + .unblock_thread = { + .type = UnblockRequest::Type::ThreadID, + .tid = tid + } + }); } Processor::set_interrupt_state(state); } - void Scheduler::set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time) + Thread& Scheduler::current_thread() { - auto state = m_lock.lock(); - - auto* current = Processor::get_current_thread(); - current->semaphore = semaphore; - current->wake_time = wake_time; - current->should_block = true; - - m_lock.unlock(InterruptState::Disabled); - - yield(); - - Processor::set_interrupt_state(state); + if (m_current) + return *m_current->thread; + return *m_idle_thread; } - void Scheduler::set_current_thread_sleeping(uint64_t wake_time) + Thread& Scheduler::idle_thread() { - set_current_thread_sleeping_impl(nullptr, wake_time); + return *m_idle_thread; } - void Scheduler::block_current_thread(Semaphore* semaphore, uint64_t wake_time) + pid_t Scheduler::current_tid() const { - set_current_thread_sleeping_impl(semaphore, wake_time); + return m_current ? m_current->thread->tid() : 0; } - void Scheduler::unblock_threads(Semaphore* semaphore) + bool Scheduler::is_idle() const { - SpinLockGuard _(m_lock); - m_blocking_threads.remove_with_condition(m_active_threads, [&](auto* node) { return node->semaphore == semaphore; }); - } - - void Scheduler::unblock_thread(pid_t tid) - { - SpinLockGuard _(m_lock); - m_blocking_threads.remove_with_condition(m_active_threads, [&](auto* node) { return node->thread->tid() == tid; }); + return m_current == nullptr; } } diff --git a/kernel/kernel/Semaphore.cpp b/kernel/kernel/Semaphore.cpp deleted file mode 100644 index 6f57b6d4..00000000 --- a/kernel/kernel/Semaphore.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include - -namespace Kernel -{ - - void Semaphore::block_indefinite() - { - Scheduler::get().block_current_thread(this, ~(uint64_t)0); - } - - void Semaphore::block_with_timeout(uint64_t timeout_ms) - { - Scheduler::get().block_current_thread(this, SystemTimer::get().ms_since_boot() + timeout_ms); - } - - void Semaphore::block_with_wake_time(uint64_t wake_time) - { - Scheduler::get().block_current_thread(this, wake_time); - } - - void Semaphore::unblock() - { - Scheduler::get().unblock_threads(this); - } - -} diff --git a/kernel/kernel/Storage/ATA/AHCI/Device.cpp b/kernel/kernel/Storage/ATA/AHCI/Device.cpp index 1bc945a8..92b70844 100644 --- a/kernel/kernel/Storage/ATA/AHCI/Device.cpp +++ b/kernel/kernel/Storage/ATA/AHCI/Device.cpp @@ -8,7 +8,7 @@ namespace Kernel { - static constexpr uint64_t s_ata_timeout = 1000; + static constexpr uint64_t s_ata_timeout_ms = 1000; static void start_cmd(volatile HBAPortMemorySpace* port) { @@ -118,9 +118,9 @@ namespace Kernel command.c = 1; command.command = ATA_COMMAND_IDENTIFY; - uint64_t timeout = SystemTimer::get().ms_since_boot() + s_ata_timeout; + const uint64_t timeout_ms = SystemTimer::get().ms_since_boot() + s_ata_timeout_ms; while (m_port->tfd & (ATA_STATUS_BSY | ATA_STATUS_DRQ)) - if (SystemTimer::get().ms_since_boot() >= timeout) + if (SystemTimer::get().ms_since_boot() >= timeout_ms) return BAN::Error::from_errno(ETIMEDOUT); m_port->ci = 1 << slot.value(); @@ -158,17 +158,17 @@ namespace Kernel { static constexpr uint64_t poll_timeout_ms = 10; - auto start_time = SystemTimer::get().ms_since_boot(); + const auto start_time_ms = SystemTimer::get().ms_since_boot(); - while (SystemTimer::get().ms_since_boot() < start_time + poll_timeout_ms) + while (SystemTimer::get().ms_since_boot() < start_time_ms + poll_timeout_ms) if (!(m_port->ci & (1 << command_slot))) return {}; - // FIXME: This should actually block once semaphores support blocking with timeout. + // FIXME: This should actually block once ThreadBlocker support blocking with timeout. // This doesn't allow scheduler to go properly idle. - while (SystemTimer::get().ms_since_boot() < start_time + s_ata_timeout) + while (SystemTimer::get().ms_since_boot() < start_time_ms + s_ata_timeout_ms) { - Scheduler::get().yield(); + Processor::yield(); if (!(m_port->ci & (1 << command_slot))) return {}; } diff --git a/kernel/kernel/Storage/ATA/ATABus.cpp b/kernel/kernel/Storage/ATA/ATABus.cpp index 1084651c..2b6922f6 100644 --- a/kernel/kernel/Storage/ATA/ATABus.cpp +++ b/kernel/kernel/Storage/ATA/ATABus.cpp @@ -67,9 +67,7 @@ namespace Kernel static void select_delay() { - auto time = SystemTimer::get().ns_since_boot(); - while (SystemTimer::get().ns_since_boot() < time + 400) - continue; + SystemTimer::get().sleep_ns(400); } void ATABus::select_device(bool secondary) @@ -106,7 +104,7 @@ namespace Kernel io_write(ATA_PORT_CONTROL, ATA_CONTROL_nIEN); io_write(ATA_PORT_COMMAND, ATA_COMMAND_IDENTIFY); - SystemTimer::get().sleep(1); + SystemTimer::get().sleep_ms(1); // No device on port if (io_read(ATA_PORT_STATUS) == 0) @@ -130,7 +128,7 @@ namespace Kernel } io_write(ATA_PORT_COMMAND, ATA_COMMAND_IDENTIFY_PACKET); - SystemTimer::get().sleep(1); + SystemTimer::get().sleep_ms(1); if (auto res = wait(true); res.is_error()) { diff --git a/kernel/kernel/Storage/NVMe/Queue.cpp b/kernel/kernel/Storage/NVMe/Queue.cpp index 63b48d01..3960299a 100644 --- a/kernel/kernel/Storage/NVMe/Queue.cpp +++ b/kernel/kernel/Storage/NVMe/Queue.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #include namespace Kernel @@ -44,7 +44,7 @@ namespace Kernel m_doorbell.cq_head = m_cq_head; - m_semaphore.unblock(); + m_thread_blocker.unblock(); } uint16_t NVMeQueue::submit_command(NVMe::SubmissionQueueEntry& sqe) @@ -66,15 +66,15 @@ namespace Kernel m_doorbell.sq_tail = m_sq_tail; } - const uint64_t start_time = SystemTimer::get().ms_since_boot(); - while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_poll_timeout_ms) + const uint64_t start_time_ms = SystemTimer::get().ms_since_boot(); + while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time_ms + s_nvme_command_poll_timeout_ms) continue; // FIXME: Here is a possible race condition if done mask is set before // scheduler has put the current thread blocking. // EINTR should also be handled here. - while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_timeout_ms) - Scheduler::get().block_current_thread(&m_semaphore, start_time + s_nvme_command_timeout_ms); + while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time_ms + s_nvme_command_timeout_ms) + m_thread_blocker.block_with_wake_time_ms(start_time_ms + s_nvme_command_timeout_ms); if (m_done_mask & cid_mask) { @@ -93,7 +93,7 @@ namespace Kernel while (~m_used_mask == 0) { m_lock.unlock(state); - m_semaphore.block_with_timeout(s_nvme_command_timeout_ms); + m_thread_blocker.block_with_timeout_ms(s_nvme_command_timeout_ms); state = m_lock.lock(); } diff --git a/kernel/kernel/Terminal/TTY.cpp b/kernel/kernel/Terminal/TTY.cpp index 92b92019..0eb02f4c 100644 --- a/kernel/kernel/Terminal/TTY.cpp +++ b/kernel/kernel/Terminal/TTY.cpp @@ -57,7 +57,7 @@ namespace Kernel if ((flags & TTY_FLAG_ENABLE_INPUT) && !m_tty_ctrl.receive_input) { m_tty_ctrl.receive_input = true; - m_tty_ctrl.semaphore.unblock(); + m_tty_ctrl.thread_blocker.unblock(); } if (flags & TTY_FLAG_ENABLE_OUTPUT) m_tty_ctrl.draw_graphics = true; @@ -94,7 +94,7 @@ namespace Kernel while (true) { while (!TTY::current()->m_tty_ctrl.receive_input) - TTY::current()->m_tty_ctrl.semaphore.block_indefinite(); + TTY::current()->m_tty_ctrl.thread_blocker.block_indefinite(); LibInput::RawKeyEvent event; size_t read = MUST(inode->read(0, BAN::ByteSpan::from(event))); @@ -237,7 +237,7 @@ namespace Kernel if (ch == '\x04' && m_termios.canonical) { m_output.flush = true; - m_output.semaphore.unblock(); + m_output.thread_blocker.unblock(); return; } @@ -279,7 +279,7 @@ namespace Kernel if (ch == '\n' || !m_termios.canonical) { m_output.flush = true; - m_output.semaphore.unblock(); + m_output.thread_blocker.unblock(); } } @@ -338,7 +338,7 @@ namespace Kernel while (!m_output.flush) { LockFreeGuard _(m_mutex); - TRY(Thread::current().block_or_eintr_indefinite(m_output.semaphore)); + TRY(Thread::current().block_or_eintr_indefinite(m_output.thread_blocker)); } if (m_output.bytes == 0) @@ -356,7 +356,7 @@ namespace Kernel if (m_output.bytes == 0) m_output.flush = false; - m_output.semaphore.unblock(); + m_output.thread_blocker.unblock(); return to_copy; } diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index c7d255b0..71cf31f4 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -120,7 +120,12 @@ namespace Kernel Thread& Thread::current() { - return Scheduler::get().current_thread(); + return Processor::scheduler().current_thread(); + } + + pid_t Thread::current_tid() + { + return Processor::scheduler().current_tid(); } Process& Thread::process() @@ -396,36 +401,36 @@ namespace Kernel { m_signal_pending_mask |= mask; if (this != &Thread::current()) - Scheduler::get().unblock_thread(tid()); + Processor::scheduler().unblock_thread(tid()); return true; } return false; } - BAN::ErrorOr Thread::block_or_eintr_indefinite(Semaphore& semaphore) + BAN::ErrorOr Thread::block_or_eintr_indefinite(ThreadBlocker& thread_blocker) { if (is_interrupted_by_signal()) return BAN::Error::from_errno(EINTR); - semaphore.block_indefinite(); + thread_blocker.block_indefinite(); if (is_interrupted_by_signal()) return BAN::Error::from_errno(EINTR); return {}; } - BAN::ErrorOr Thread::block_or_eintr_or_timeout(Semaphore& semaphore, uint64_t timeout_ms, bool etimedout) + BAN::ErrorOr Thread::block_or_eintr_or_timeout_ns(ThreadBlocker& thread_blocker, uint64_t timeout_ns, bool etimedout) { - uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + timeout_ms; - return block_or_eintr_or_waketime(semaphore, wake_time_ms, etimedout); + const uint64_t wake_time_ns = SystemTimer::get().ns_since_boot() + timeout_ns; + return block_or_eintr_or_waketime_ns(thread_blocker, wake_time_ns, etimedout); } - BAN::ErrorOr Thread::block_or_eintr_or_waketime(Semaphore& semaphore, uint64_t wake_time_ms, bool etimedout) + BAN::ErrorOr Thread::block_or_eintr_or_waketime_ns(ThreadBlocker& thread_blocker, uint64_t wake_time_ns, bool etimedout) { if (is_interrupted_by_signal()) return BAN::Error::from_errno(EINTR); - semaphore.block_with_wake_time(wake_time_ms); + thread_blocker.block_with_timeout_ns(wake_time_ns); if (is_interrupted_by_signal()) return BAN::Error::from_errno(EINTR); - if (etimedout && SystemTimer::get().ms_since_boot() >= wake_time_ms) + if (etimedout && SystemTimer::get().ms_since_boot() >= wake_time_ns) return BAN::Error::from_errno(ETIMEDOUT); return {}; } @@ -444,15 +449,12 @@ namespace Kernel { Processor::set_interrupt_state(InterruptState::Disabled); setup_process_cleanup(); - Scheduler::get().yield(); + Processor::yield(); + ASSERT_NOT_REACHED(); } - else - Scheduler::get().terminate_thread(this); - } - else - { - Scheduler::get().terminate_thread(this); } + m_state = State::Terminated; + Processor::yield(); ASSERT_NOT_REACHED(); } diff --git a/kernel/kernel/ThreadBlocker.cpp b/kernel/kernel/ThreadBlocker.cpp new file mode 100644 index 00000000..6d55dc97 --- /dev/null +++ b/kernel/kernel/ThreadBlocker.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +namespace Kernel +{ + + void ThreadBlocker::block_indefinite() + { + Processor::scheduler().block_current_thread(this, ~static_cast(0)); + } + + void ThreadBlocker::block_with_timeout_ns(uint64_t timeout_ns) + { + Processor::scheduler().block_current_thread(this, SystemTimer::get().ns_since_boot() + timeout_ns); + } + + void ThreadBlocker::block_with_wake_time_ns(uint64_t wake_time_ns) + { + Processor::scheduler().block_current_thread(this, wake_time_ns); + } + + void ThreadBlocker::unblock() + { + Processor::scheduler().unblock_threads(this); + } + +} diff --git a/kernel/kernel/Timer/HPET.cpp b/kernel/kernel/Timer/HPET.cpp index 226ca60b..c5b8e05d 100644 --- a/kernel/kernel/Timer/HPET.cpp +++ b/kernel/kernel/Timer/HPET.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #define HPET_PERIOD_MAX 0x05F5E100 @@ -272,7 +272,7 @@ namespace Kernel m_last_ticks = current_ticks; } - Scheduler::get().timer_reschedule(); + Processor::scheduler().timer_interrupt(); } uint64_t HPET::ms_since_boot() const diff --git a/kernel/kernel/Timer/PIT.cpp b/kernel/kernel/Timer/PIT.cpp index f4419c88..ec15b5f1 100644 --- a/kernel/kernel/Timer/PIT.cpp +++ b/kernel/kernel/Timer/PIT.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #define PIT_IRQ 0 @@ -58,7 +58,7 @@ namespace Kernel SpinLockGuard _(m_lock); m_system_time++; } - Kernel::Scheduler::get().timer_reschedule(); + Processor::scheduler().timer_interrupt(); } uint64_t PIT::read_counter() const diff --git a/kernel/kernel/Timer/Timer.cpp b/kernel/kernel/Timer/Timer.cpp index 06d2df3b..1c3186f6 100644 --- a/kernel/kernel/Timer/Timer.cpp +++ b/kernel/kernel/Timer/Timer.cpp @@ -69,15 +69,17 @@ namespace Kernel return m_timer->time_since_boot(); } - void SystemTimer::sleep(uint64_t ms) const + void SystemTimer::sleep_ns(uint64_t ns) const { - if (ms == 0) + if (ns == 0) return; - uint64_t wake_time = ms_since_boot() + ms; - Scheduler::get().set_current_thread_sleeping(wake_time); - uint64_t current_time = ms_since_boot(); - if (current_time < wake_time) - dwarnln("sleep woke {} ms too soon", wake_time - current_time); + + const uint64_t wake_time_ns = ns_since_boot() + ns; + Processor::scheduler().block_current_thread(nullptr, wake_time_ns); + + //const uint64_t current_time_ns = ns_since_boot(); + //if (current_time_ns < wake_time_ns) + // dwarnln("sleep woke {} ms too soon", BAN::Math::div_round_up(wake_time_ns - current_time_ns, 1'000'000)); } timespec SystemTimer::real_time() const diff --git a/kernel/kernel/USB/XHCI/Controller.cpp b/kernel/kernel/USB/XHCI/Controller.cpp index b546aefa..9b30ab94 100644 --- a/kernel/kernel/USB/XHCI/Controller.cpp +++ b/kernel/kernel/USB/XHCI/Controller.cpp @@ -250,7 +250,7 @@ namespace Kernel bool expected { true }; while (!m_port_changed.compare_exchange(expected, false)) { - m_port_semaphore.block_with_timeout(100); + m_port_thread_blocker.block_with_timeout_ms(100); expected = true; } } @@ -482,7 +482,7 @@ namespace Kernel break; } m_port_changed = true; - m_port_semaphore.unblock(); + m_port_thread_blocker.unblock(); break; } case XHCI::TRBType::BandwidthRequestEvent: diff --git a/kernel/kernel/kernel.cpp b/kernel/kernel/kernel.cpp index f9718003..753c32b8 100644 --- a/kernel/kernel/kernel.cpp +++ b/kernel/kernel/kernel.cpp @@ -108,7 +108,7 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info) parse_boot_info(boot_magic, boot_info); dprintln("boot info parsed"); - Processor::create(0); + Processor::create(PROCESSOR_NONE); Processor::initialize(); dprintln("BSP initialized"); @@ -167,12 +167,11 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info) Random::initialize(); dprintln("RNG initialized"); - MUST(Scheduler::initialize()); - dprintln("Scheduler initialized"); + Processor::wait_until_processors_ready(); + MUST(Processor::scheduler().initialize()); - Scheduler& scheduler = Scheduler::get(); Process::create_kernel(init2, nullptr); - scheduler.start(); + Processor::yield(); ASSERT_NOT_REACHED(); } @@ -233,14 +232,11 @@ extern "C" void ap_main() Processor::initialize(); PageTable::kernel().initial_load(); - Processor::allocate_idle_thread(); InterruptController::get().enable(); - dprintln("ap{} initialized", Processor::current_id()); + Processor::wait_until_processors_ready(); + MUST(Processor::scheduler().initialize()); - // wait until scheduler is started and we get irq for reschedule - Processor::set_interrupt_state(InterruptState::Enabled); - while (true) - asm volatile("hlt"); + asm volatile("sti; 1: hlt; jmp 1b"); ASSERT_NOT_REACHED(); }