Kernel: Rewrite the whole scheduler and re-architecture SMP handling

Change Semaphore -> ThreadBlocker
  This was not a semaphore, I just named it one because I didn't know
  what semaphore was. I have meant to change this sooner, but it was in
  no way urgent :D

Implement SMP events. Processors can now be sent SMP events through
IPIs. SMP events can be sent either to a single processor or broadcasted
to every processor.

PageTable::{map_page,map_range,unmap_page,unmap_range}() now send SMP
event to invalidate TLB caches for the changed pages.

Scheduler no longer uses a global run queue. Each processor has its own
scheduler that keeps track of the load on the processor. Once every
second schedulers do load balancing. Schedulers have no access to other
processors' schedulers, they just see approximate loads. If scheduler
decides that it has too much load, it will send a thread to another
processor through a SMP event.

Schedulers are currently run using the timer interrupt on BSB. This
should be not the case, and each processor should use its LAPIC timer
for interrupts. There is no reason to broadcast SMP event to all
processors when BSB gets timer interrupt.

Old scheduler only achieved 20% idle load on qemu. That was probably a
very inefficient implementation. This new scheduler seems to average
around 1% idle load. This is much closer to what I would expect. On my
own laptop idle load seems to be only around 0.5% on each processor.
This commit is contained in:
2024-07-22 00:33:50 +03:00
parent 9f90eeab05
commit f8261c60c0
60 changed files with 1559 additions and 715 deletions

View File

@@ -602,7 +602,7 @@ acpi_release_global_lock:
{
if (IO::inw(fadt().pm1a_cnt_blk) & PM1_CNT_SCI_EN)
break;
SystemTimer::get().sleep(10);
SystemTimer::get().sleep_ms(10);
}
if (!(IO::inw(fadt().pm1a_cnt_blk) & PM1_CNT_SCI_EN))
@@ -761,7 +761,7 @@ acpi_release_global_lock:
// FIXME: this can cause missing of event if it happens between
// reading the status and blocking
m_event_semaphore.block_with_timeout(100);
m_event_thread_blocker.block_with_timeout_ms(100);
continue;
handle_event:
@@ -782,7 +782,7 @@ handle_event:
void ACPI::handle_irq()
{
m_event_semaphore.unblock();
m_event_thread_blocker.unblock();
}
}

View File

@@ -244,7 +244,7 @@ namespace Kernel
dprintln("System has {} processors", m_processors.size());
uint8_t bsp_id = Kernel::Processor::current_id();
uint8_t bsp_id = Kernel::Processor::current_id().as_u32();
dprintln("BSP lapic id: {}", bsp_id);
if (m_processors.size() == 1)
@@ -267,7 +267,7 @@ namespace Kernel
dprintln("Trying to enable processor (lapic id {})", processor.apic_id);
auto& proc = Kernel::Processor::create(processor.apic_id);
auto& proc = Kernel::Processor::create(ProcessorID(processor.apic_id));
PageTable::with_fast_page((paddr_t)g_ap_init_addr, [&] {
PageTable::fast_page_as_sized<uint32_t>(2) = V2P(proc.stack_top());
});
@@ -308,14 +308,24 @@ namespace Kernel
}
// give processor upto 100 * 100 us + 200 us to boot
for (int i = 0; *g_ap_stack_loaded == 0 && i < 100; i++)
for (int i = 0; i < 100; i++)
{
if (__atomic_load_n(&g_ap_stack_loaded[0], __ATOMIC_SEQ_CST))
break;
udelay(100);
}
}
*g_ap_startup_done = 1;
__atomic_store_n(&g_ap_startup_done[0], 1, __ATOMIC_SEQ_CST);
const size_t timeout_ms = SystemTimer::get().ms_since_boot() + 500;
while (__atomic_load_n(&g_ap_running_count[0], __ATOMIC_SEQ_CST) < m_processors.size() - 1)
{
if (SystemTimer::get().ms_since_boot() >= timeout_ms)
Kernel::panic("Could not start all processors");
__builtin_ia32_pause();
}
// give processors 100 us time to increment running count
udelay(100);
dprintln("{} processors started", *g_ap_running_count);
}
@@ -325,7 +335,7 @@ namespace Kernel
ASSERT(Kernel::Processor::get_interrupt_state() == InterruptState::Disabled);
while ((read_from_local_apic(LAPIC_ICR_LO_REG) & ICR_LO_delivery_status_send_pending) == ICR_LO_delivery_status_send_pending)
__builtin_ia32_pause();
write_to_local_apic(LAPIC_ICR_HI_REG, (read_from_local_apic(LAPIC_ICR_HI_REG) & 0x00FFFFFF) | (target << 24));
write_to_local_apic(LAPIC_ICR_HI_REG, (read_from_local_apic(LAPIC_ICR_HI_REG) & 0x00FFFFFF) | (target.as_u32() << 24));
write_to_local_apic(LAPIC_ICR_LO_REG,
(read_from_local_apic(LAPIC_ICR_LO_REG) & ICR_LO_reserved_mask)
| ICR_LO_delivery_mode_fixed
@@ -359,7 +369,6 @@ namespace Kernel
write_to_local_apic(LAPIC_SIV_REG, read_from_local_apic(LAPIC_SIV_REG) | 0x1FF);
}
uint32_t APIC::read_from_local_apic(ptrdiff_t offset)
{
return MMIO::read32(m_local_apic_vaddr + offset);
@@ -418,7 +427,7 @@ namespace Kernel
redir.vector = IRQ_VECTOR_BASE + irq;
redir.mask = 0;
// FIXME: distribute IRQs more evenly?
redir.destination = Kernel::Processor::bsb_id();
redir.destination = Kernel::Processor::bsb_id().as_u32();
ioapic->write(IOAPIC_REDIRS + gsi * 2, redir.lo_dword);
ioapic->write(IOAPIC_REDIRS + gsi * 2 + 1, redir.hi_dword);

View File

@@ -49,7 +49,7 @@ namespace Kernel
for (auto& device : s_instance->m_devices)
device->update();
}
SystemTimer::get().sleep(10);
SystemTimer::get().sleep_ms(10);
}
}, nullptr
);
@@ -65,7 +65,7 @@ namespace Kernel
while (!s_instance->m_should_sync)
{
LockFreeGuard _(s_instance->m_device_lock);
s_instance->m_sync_semaphore.block_indefinite();
s_instance->m_sync_thread_blocker.block_indefinite();
}
for (auto& device : s_instance->m_devices)
@@ -84,11 +84,11 @@ namespace Kernel
{
while (true)
{
SystemTimer::get().sleep(10000);
SystemTimer::get().sleep_ms(10'000);
LockGuard _(s_instance->m_device_lock);
s_instance->m_should_sync = true;
s_instance->m_sync_semaphore.unblock();
s_instance->m_sync_thread_blocker.unblock();
}
}, nullptr, sync_process
)));
@@ -101,7 +101,7 @@ namespace Kernel
{
LockGuard _(m_device_lock);
m_should_sync = true;
m_sync_semaphore.unblock();
m_sync_thread_blocker.unblock();
}
if (should_block)
m_sync_done.block_indefinite();

View File

@@ -37,7 +37,7 @@ namespace Kernel
ASSERT(m_writing_count > 0);
m_writing_count--;
if (m_writing_count == 0)
m_semaphore.unblock();
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> Pipe::read_impl(off_t, BAN::ByteSpan buffer)
@@ -48,7 +48,7 @@ namespace Kernel
if (m_writing_count == 0)
return 0;
LockFreeGuard lock_free(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
size_t to_copy = BAN::Math::min<size_t>(buffer.size(), m_buffer.size());
@@ -59,7 +59,7 @@ namespace Kernel
m_atime = SystemTimer::get().real_time();
m_semaphore.unblock();
m_thread_blocker.unblock();
return to_copy;
}
@@ -77,7 +77,7 @@ namespace Kernel
m_mtime = current_time;
m_ctime = current_time;
m_semaphore.unblock();
m_thread_blocker.unblock();
return buffer.size();
}

View File

@@ -10,7 +10,7 @@
#include <kernel/Timer/PIT.h>
#define ISR_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31)
#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) X(32)
#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31)
namespace Kernel
{
@@ -168,8 +168,8 @@ namespace Kernel
asm volatile("cli; 1: hlt; jmp 1b");
}
pid_t tid = Scheduler::current_tid();
pid_t pid = tid ? Process::current().pid() : 0;
const pid_t tid = Thread::current_tid();
const pid_t pid = (tid && Thread::current().has_process()) ? Process::current().pid() : 0;
if (tid)
{
@@ -241,13 +241,13 @@ namespace Kernel
#if ARCH(x86_64)
dwarnln(
"{} (error code: 0x{8H}), pid {}, tid {}\r\n"
"CPU {}: {} (error code: 0x{8H}), pid {}, tid {}\r\n"
"Register dump\r\n"
"rax=0x{16H}, rbx=0x{16H}, rcx=0x{16H}, rdx=0x{16H}\r\n"
"rsp=0x{16H}, rbp=0x{16H}, rdi=0x{16H}, rsi=0x{16H}\r\n"
"rip=0x{16H}, rflags=0x{16H}\r\n"
"cr0=0x{16H}, cr2=0x{16H}, cr3=0x{16H}, cr4=0x{16H}",
isr_exceptions[isr], error, pid, tid,
Processor::current_id(), isr_exceptions[isr], error, pid, tid,
regs->rax, regs->rbx, regs->rcx, regs->rdx,
interrupt_stack->sp, regs->rbp, regs->rdi, regs->rsi,
interrupt_stack->ip, interrupt_stack->flags,
@@ -255,13 +255,13 @@ namespace Kernel
);
#elif ARCH(i686)
dwarnln(
"{} (error code: 0x{8H}), pid {}, tid {}\r\n"
"CPU {}: {} (error code: 0x{8H}), pid {}, tid {}\r\n"
"Register dump\r\n"
"eax=0x{8H}, ebx=0x{8H}, ecx=0x{8H}, edx=0x{8H}\r\n"
"esp=0x{8H}, ebp=0x{8H}, edi=0x{8H}, esi=0x{8H}\r\n"
"eip=0x{8H}, eflags=0x{8H}\r\n"
"cr0=0x{8H}, cr2=0x{8H}, cr3=0x{8H}, cr4=0x{8H}",
isr_exceptions[isr], error, pid, tid,
Processor::current_id(), isr_exceptions[isr], error, pid, tid,
regs->eax, regs->ebx, regs->ecx, regs->edx,
interrupt_stack->sp, regs->ebp, regs->edi, regs->esi,
interrupt_stack->ip, interrupt_stack->flags,
@@ -320,12 +320,17 @@ done:
extern "C" void cpp_yield_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
{
// yield is raised through kernel software interrupt
ASSERT(!InterruptController::get().is_in_service(IRQ_YIELD));
ASSERT(!GDT::is_user_segment(interrupt_stack->cs));
Processor::scheduler().reschedule(interrupt_stack, interrupt_registers);
}
Processor::enter_interrupt(interrupt_stack, interrupt_registers);
Scheduler::get().irq_reschedule();
Processor::leave_interrupt();
extern "C" void cpp_ipi_handler()
{
ASSERT(InterruptController::get().is_in_service(IRQ_IPI));
InterruptController::get().eoi(IRQ_IPI);
Processor::handle_ipi();
}
extern "C" void cpp_irq_handler(uint32_t irq)
@@ -349,8 +354,6 @@ done:
InterruptController::get().eoi(irq);
if (auto* handler = s_interruptables[irq])
handler->handle_irq();
else if (irq == IRQ_IPI)
Scheduler::get().yield();
else
dprintln("no handler for irq 0x{2H}", irq);
}
@@ -359,7 +362,7 @@ done:
if (current_thread.can_add_signal_to_execute())
current_thread.handle_signal();
Scheduler::get().reschedule_if_idling();
Processor::scheduler().reschedule_if_idle();
ASSERT(Thread::current().state() != Thread::State::Terminated);
@@ -405,6 +408,7 @@ done:
#undef X
extern "C" void asm_yield_handler();
extern "C" void asm_ipi_handler();
extern "C" void asm_syscall_handler();
IDT* IDT::create()
@@ -423,6 +427,7 @@ done:
#undef X
idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_YIELD, asm_yield_handler);
idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_IPI, asm_ipi_handler);
idt->register_syscall_handler(0x80, asm_syscall_handler);

View File

@@ -131,7 +131,7 @@ namespace Kernel
m_event_head = (m_event_head + 1) % m_max_event_count;
m_event_count++;
m_event_semaphore.unblock();
m_event_thread_blocker.unblock();
if (m_type == Type::Keyboard && s_keyboard_device)
s_keyboard_device->notify();
if (m_type == Type::Mouse && s_mouse_device)
@@ -149,7 +149,7 @@ namespace Kernel
m_event_lock.unlock(state);
{
LockFreeGuard _(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_event_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_event_thread_blocker));
}
state = m_event_lock.lock();
}
@@ -213,7 +213,7 @@ namespace Kernel
}
LockFreeGuard _(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
}
@@ -259,7 +259,7 @@ namespace Kernel
}
LockFreeGuard _(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
}

View File

@@ -79,7 +79,7 @@ namespace Kernel
if (it != m_arp_table.end())
return it->value;
}
Scheduler::get().yield();
Processor::yield();
}
return BAN::Error::from_errno(ETIMEDOUT);
@@ -150,7 +150,7 @@ namespace Kernel
while (m_pending_packets.empty())
{
m_pending_lock.unlock(state);
m_pending_semaphore.block_indefinite();
m_pending_thread_blocker.block_indefinite();
state = m_pending_lock.lock();
}
auto packet = m_pending_packets.front();
@@ -178,7 +178,7 @@ namespace Kernel
}
m_pending_packets.push({ .interface = interface, .packet = arp_packet });
m_pending_semaphore.unblock();
m_pending_thread_blocker.unblock();
}
}

View File

@@ -308,7 +308,7 @@ namespace Kernel
while (m_pending_packets.empty())
{
m_pending_lock.unlock(state);
m_pending_semaphore.block_indefinite();
m_pending_thread_blocker.block_indefinite();
state = m_pending_lock.lock();
}
auto packet = m_pending_packets.front();
@@ -367,7 +367,7 @@ namespace Kernel
m_pending_total_size += ipv4_header.total_length;
m_pending_packets.push({ .interface = interface });
m_pending_semaphore.unblock();
m_pending_thread_blocker.unblock();
}
}

View File

@@ -75,7 +75,7 @@ namespace Kernel
while (m_pending_connections.empty())
{
LockFreeGuard _(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
auto connection = m_pending_connections.front();
@@ -113,7 +113,7 @@ namespace Kernel
if (SystemTimer::get().ms_since_boot() >= wake_time_ms)
return BAN::Error::from_errno(ECONNABORTED);
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_or_waketime(return_inode->m_semaphore, wake_time_ms, true));
TRY(Thread::current().block_or_eintr_or_waketime_ms(return_inode->m_thread_blocker, wake_time_ms, true));
}
if (address)
@@ -170,7 +170,7 @@ namespace Kernel
if (SystemTimer::get().ms_since_boot() >= wake_time_ms)
return BAN::Error::from_errno(ECONNREFUSED);
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_or_waketime(m_semaphore, wake_time_ms, true));
TRY(Thread::current().block_or_eintr_or_waketime_ms(m_thread_blocker, wake_time_ms, true));
}
return {};
@@ -207,7 +207,7 @@ namespace Kernel
if (m_state != State::Established)
return return_with_maybe_zero();
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
const uint32_t to_recv = BAN::Math::min<uint32_t>(buffer.size(), m_recv_window.data_size);
@@ -249,7 +249,7 @@ namespace Kernel
if (m_send_window.data_size + message.size() <= m_send_window.buffer->size())
break;
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
{
@@ -259,14 +259,14 @@ namespace Kernel
}
const uint32_t target_ack = m_send_window.start_seq + m_send_window.data_size;
m_semaphore.unblock();
m_thread_blocker.unblock();
while (m_send_window.current_ack < target_ack)
{
if (m_state != State::Established)
return return_with_maybe_zero();
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_thread_blocker));
}
return message.size();
@@ -597,7 +597,7 @@ namespace Kernel
}
}
m_semaphore.unblock();
m_thread_blocker.unblock();
}
void TCPSocket::set_connection_as_closed()
@@ -743,11 +743,11 @@ namespace Kernel
}
}
m_semaphore.unblock();
m_semaphore.block_with_wake_time(current_ms + retransmit_timeout_ms);
m_thread_blocker.unblock();
m_thread_blocker.block_with_wake_time_ms(current_ms + retransmit_timeout_ms);
}
m_semaphore.unblock();
m_thread_blocker.unblock();
}
}

View File

@@ -70,7 +70,7 @@ namespace Kernel
m_packets.emplace(packet_info);
m_packet_total_size += payload.size();
m_packet_semaphore.unblock();
m_packet_thread_blocker.unblock();
}
BAN::ErrorOr<void> UDPSocket::bind_impl(const sockaddr* address, socklen_t address_len)
@@ -93,7 +93,7 @@ namespace Kernel
while (m_packets.empty())
{
m_packet_lock.unlock(state);
TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker));
state = m_packet_lock.lock();
}

View File

@@ -73,7 +73,7 @@ namespace Kernel
return BAN::Error::from_errno(EINVAL);
while (connection_info.pending_connections.empty())
TRY(Thread::current().block_or_eintr_indefinite(connection_info.pending_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(connection_info.pending_thread_blocker));
BAN::RefPtr<UnixDomainSocket> pending;
@@ -81,7 +81,7 @@ namespace Kernel
SpinLockGuard _(connection_info.pending_lock);
pending = connection_info.pending_connections.front();
connection_info.pending_connections.pop();
connection_info.pending_semaphore.unblock();
connection_info.pending_thread_blocker.unblock();
}
BAN::RefPtr<UnixDomainSocket> return_inode;
@@ -162,15 +162,15 @@ namespace Kernel
if (target_info.pending_connections.size() < target_info.pending_connections.capacity())
{
MUST(target_info.pending_connections.push(this));
target_info.pending_semaphore.unblock();
target_info.pending_thread_blocker.unblock();
break;
}
}
TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker));
}
while (!connection_info.connection_done)
Scheduler::get().yield();
Processor::yield();
return {};
}
@@ -241,7 +241,7 @@ namespace Kernel
while (m_packet_sizes.full() || m_packet_size_total + packet.size() > s_packet_buffer_size)
{
m_packet_lock.unlock(state);
TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker));
state = m_packet_lock.lock();
}
@@ -252,7 +252,7 @@ namespace Kernel
if (!is_streaming())
m_packet_sizes.push(packet.size());
m_packet_semaphore.unblock();
m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state);
return {};
}
@@ -357,7 +357,7 @@ namespace Kernel
while (m_packet_size_total == 0)
{
m_packet_lock.unlock(state);
TRY(Thread::current().block_or_eintr_indefinite(m_packet_semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_packet_thread_blocker));
state = m_packet_lock.lock();
}
@@ -376,7 +376,7 @@ namespace Kernel
memmove(packet_buffer, packet_buffer + nread, m_packet_size_total - nread);
m_packet_size_total -= nread;
m_packet_semaphore.unblock();
m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state);
return nread;

View File

@@ -82,12 +82,13 @@ namespace Kernel
void Process::register_to_scheduler()
{
// FIXME: Allow failing...
{
SpinLockGuard _(s_process_lock);
MUST(s_processes.push_back(this));
}
for (auto* thread : m_threads)
MUST(Scheduler::get().add_thread(thread));
MUST(Processor::scheduler().add_thread(thread));
}
Process* Process::create_kernel()
@@ -206,10 +207,10 @@ namespace Kernel
ProcFileSystem::get().on_process_delete(*this);
m_exit_status.exited = true;
m_exit_status.semaphore.unblock();
m_exit_status.thread_blocker.unblock();
while (m_exit_status.waiting > 0)
Scheduler::get().yield();
Processor::yield();
m_process_lock.lock();
@@ -250,16 +251,6 @@ namespace Kernel
m_exit_status.exit_code = __WGENEXITCODE(status, signal);
while (!m_threads.empty())
m_threads.front()->on_exit();
//for (auto* thread : m_threads)
// if (thread != &Thread::current())
// Scheduler::get().terminate_thread(thread);
//if (this == &Process::current())
//{
// m_threads.clear();
// Processor::set_interrupt_state(InterruptState::Disabled);
// Thread::current().setup_process_cleanup();
// Scheduler::get().yield();
//}
}
size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const
@@ -534,13 +525,13 @@ namespace Kernel
m_cmdline = BAN::move(str_argv);
m_environ = BAN::move(str_envp);
asm volatile("cli");
Processor::set_interrupt_state(InterruptState::Disabled);
}
m_has_called_exec = true;
m_threads.front()->setup_exec();
Scheduler::get().yield();
Processor::yield();
ASSERT_NOT_REACHED();
}
@@ -603,12 +594,12 @@ namespace Kernel
if (seconds == 0)
return 0;
uint64_t wake_time = SystemTimer::get().ms_since_boot() + seconds * 1000;
Scheduler::get().set_current_thread_sleeping(wake_time);
const uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + (seconds * 1000);
SystemTimer::get().sleep_ms(seconds * 1000);
uint64_t current_time = SystemTimer::get().ms_since_boot();
if (current_time < wake_time)
return BAN::Math::div_round_up<long>(wake_time - current_time, 1000);
const uint64_t current_ms = SystemTimer::get().ms_since_boot();
if (current_ms < wake_time_ms)
return BAN::Math::div_round_up<long>(wake_time_ms - current_ms, 1000);
return 0;
}
@@ -622,23 +613,21 @@ namespace Kernel
TRY(validate_pointer_access(rmtp, sizeof(timespec)));
}
uint64_t sleep_ms = rqtp->tv_sec * 1000 + BAN::Math::div_round_up<uint64_t>(rqtp->tv_nsec, 1'000'000);
if (sleep_ms == 0)
const uint64_t sleep_ns = (rqtp->tv_sec * 1'000'000'000) + rqtp->tv_nsec;
if (sleep_ns == 0)
return 0;
uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + sleep_ms;
const uint64_t wake_time_ns = SystemTimer::get().ns_since_boot() + sleep_ns;
SystemTimer::get().sleep_ns(sleep_ns);
Scheduler::get().set_current_thread_sleeping(wake_time_ms);
uint64_t current_ms = SystemTimer::get().ms_since_boot();
if (current_ms < wake_time_ms)
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
if (current_ns < wake_time_ns)
{
if (rmtp)
{
uint64_t remaining_ms = wake_time_ms - current_ms;
rmtp->tv_sec = remaining_ms / 1000;
rmtp->tv_nsec = (remaining_ms % 1000) * 1'000'000;
const uint64_t remaining_ns = wake_time_ns - current_ns;
rmtp->tv_sec = remaining_ns / 1'000'000'000;
rmtp->tv_nsec = remaining_ns % 1'000'000'000;
}
return BAN::Error::from_errno(EINTR);
}
@@ -1140,7 +1129,7 @@ namespace Kernel
break;
LockFreeGuard free(m_process_lock);
SystemTimer::get().sleep(1);
SystemTimer::get().sleep_ms(1);
}
if (arguments->readfds)
@@ -1347,7 +1336,7 @@ namespace Kernel
TRY(validate_pointer_access(args, sizeof(sys_mmap_t)));
}
if (args->prot != PROT_NONE && args->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
if (args->prot != PROT_NONE && (args->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)))
return BAN::Error::from_errno(EINVAL);
if (args->flags & MAP_FIXED)
@@ -1628,7 +1617,7 @@ namespace Kernel
{
process.add_pending_signal(signal);
// FIXME: This feels hacky
Scheduler::get().unblock_thread(process.m_threads.front()->tid());
Processor::scheduler().unblock_thread(process.m_threads.front()->tid());
}
return (pid > 0) ? BAN::Iteration::Break : BAN::Iteration::Continue;
}

View File

@@ -1,17 +1,31 @@
#include <kernel/InterruptController.h>
#include <kernel/Memory/kmalloc.h>
#include <kernel/Processor.h>
#include <kernel/Terminal/TerminalDriver.h>
#include <kernel/Thread.h>
#include <kernel/Timer/Timer.h>
extern Kernel::TerminalDriver* g_terminal_driver;
namespace Kernel
{
static constexpr uint32_t MSR_IA32_GS_BASE = 0xC0000101;
ProcessorID Processor::s_bsb_id { PROCESSOR_NONE };
ProcessorID Processor::s_bsb_id { PROCESSOR_NONE };
BAN::Atomic<uint8_t> Processor::s_processor_count { 0 };
BAN::Atomic<bool> Processor::s_is_smp_enabled { false };
BAN::Atomic<bool> Processor::s_should_print_cpu_load { false };
static BAN::Array<Processor, 0xFF> s_processors;
static BAN::Atomic<uint8_t> s_processors_created { 0 };
static ProcessorID read_processor_id()
// 32 bit milli seconds are definitely enough as APs start on boot
static BAN::Atomic<uint32_t> s_first_ap_ready_ms { 0 };
static BAN::Array<Processor, 0xFF> s_processors;
static BAN::Array<ProcessorID, 0xFF> s_processor_ids { PROCESSOR_NONE };
ProcessorID Processor::read_processor_id()
{
uint32_t id;
asm volatile(
@@ -21,16 +35,18 @@ namespace Kernel
: "=b"(id)
:: "eax", "ecx", "edx"
);
return id;
return ProcessorID(id);
}
Processor& Processor::create(ProcessorID id)
{
// bsb is the first processor
if (s_bsb_id == PROCESSOR_NONE)
if (s_bsb_id == PROCESSOR_NONE && id == PROCESSOR_NONE)
s_bsb_id = id = read_processor_id();
if (s_bsb_id == PROCESSOR_NONE || id == PROCESSOR_NONE || id.m_id >= s_processors.size())
Kernel::panic("Trying to initialize invalid processor {}", id.m_id);
auto& processor = s_processors[id];
auto& processor = s_processors[id.m_id];
ASSERT(processor.m_id == PROCESSOR_NONE);
processor.m_id = id;
@@ -44,13 +60,27 @@ namespace Kernel
processor.m_idt = IDT::create();
ASSERT(processor.m_idt);
processor.m_scheduler = MUST(Scheduler::create());
ASSERT(processor.m_scheduler);
SMPMessage* smp_storage = new SMPMessage[0x1000];
ASSERT(smp_storage);
for (size_t i = 0; i < 0xFFF; i++)
smp_storage[i].next = &smp_storage[i + 1];
smp_storage[0xFFF].next = nullptr;
processor.m_smp_pending = nullptr;
processor.m_smp_free = smp_storage;
s_processors_created++;
return processor;
}
Processor& Processor::initialize()
{
auto id = read_processor_id();
auto& processor = s_processors[id];
auto& processor = s_processors[id.m_id];
ASSERT(processor.m_gdt);
processor.m_gdt->load();
@@ -72,41 +102,265 @@ namespace Kernel
return processor;
}
void Processor::allocate_idle_thread()
ProcessorID Processor::id_from_index(size_t index)
{
ASSERT(idle_thread() == nullptr);
auto* idle_thread = MUST(Thread::create_kernel([](void*) { for (;;) asm volatile("hlt"); }, nullptr, nullptr));
write_gs_ptr(offsetof(Processor, m_idle_thread), idle_thread);
ASSERT(index < s_processor_count);
ASSERT(s_processor_ids[index] != PROCESSOR_NONE);
return s_processor_ids[index];
}
void Processor::enter_interrupt(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
void Processor::wait_until_processors_ready()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) == nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_stack), interrupt_stack);
write_gs_ptr(offsetof(Processor, m_interrupt_registers), interrupt_registers);
if (s_processors_created == 1)
{
ASSERT(current_is_bsb());
s_processor_count++;
s_processor_ids[0] = current_id();
}
// wait until bsb is ready
if (current_is_bsb())
{
s_processor_count = 1;
s_processor_ids[0] = current_id();
// single processor system
if (s_processors_created == 1)
return;
// wait until first AP is ready
const uint64_t timeout_ms = SystemTimer::get().ms_since_boot() + 1000;
while (s_first_ap_ready_ms == 0)
{
if (SystemTimer::get().ms_since_boot() >= timeout_ms)
{
dprintln("Could not initialize any APs :(");
return;
}
__builtin_ia32_pause();
}
}
else
{
// wait until bsb is ready, it shall get index 0
while (s_processor_count == 0)
__builtin_ia32_pause();
auto lookup_index = s_processor_count++;
ASSERT(s_processor_ids[lookup_index] == PROCESSOR_NONE);
s_processor_ids[lookup_index] = current_id();
uint32_t expected = 0;
s_first_ap_ready_ms.compare_exchange(expected, SystemTimer::get().ms_since_boot());
}
// wait until all processors are initialized
{
const uint32_t timeout_ms = s_first_ap_ready_ms + 1000;
while (s_processor_count < s_processors_created)
{
if (SystemTimer::get().ms_since_boot() >= timeout_ms)
{
if (current_is_bsb())
dprintln("Could not initialize {} processors :(", s_processors_created - s_processor_count);
break;
}
__builtin_ia32_pause();
}
}
s_is_smp_enabled = true;
}
void Processor::leave_interrupt()
void Processor::handle_ipi()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) != nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_stack), nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_registers), nullptr);
handle_smp_messages();
}
InterruptStack& Processor::get_interrupt_stack()
template<typename F>
void with_atomic_lock(BAN::Atomic<bool>& lock, F callback)
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)));
return *read_gs_sized<InterruptStack*>(offsetof(Processor, m_interrupt_stack));
bool expected = false;
while (!lock.compare_exchange(expected, true, BAN::MemoryOrder::memory_order_acquire))
{
__builtin_ia32_pause();
expected = false;
}
callback();
lock.store(false, BAN::MemoryOrder::memory_order_release);
}
InterruptRegisters& Processor::get_interrupt_registers()
void Processor::handle_smp_messages()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_registers)));
return *read_gs_sized<InterruptRegisters*>(offsetof(Processor, m_interrupt_registers));
auto state = get_interrupt_state();
set_interrupt_state(InterruptState::Disabled);
auto processor_id = current_id();
auto& processor = s_processors[processor_id.m_id];
SMPMessage* pending = nullptr;
with_atomic_lock(processor.m_smp_pending_lock,
[&]()
{
pending = processor.m_smp_pending;
processor.m_smp_pending = nullptr;
}
);
bool should_preempt = false;
if (pending)
{
// reverse smp message queue from LIFO to FIFO
{
SMPMessage* reversed = nullptr;
for (SMPMessage* message = pending; message;)
{
SMPMessage* next = message->next;
message->next = reversed;
reversed = message;
message = next;
}
pending = reversed;
}
SMPMessage* last_handled = nullptr;
// handle messages
for (auto* message = pending; message; message = message->next)
{
switch (message->type)
{
case SMPMessage::Type::FlushTLB:
for (size_t i = 0; i < message->flush_tlb.page_count; i++)
asm volatile("invlpg (%0)" :: "r"(message->flush_tlb.vaddr + i * PAGE_SIZE) : "memory");
break;
case SMPMessage::Type::NewThread:
processor.m_scheduler->handle_new_thread_request(message->new_thread);
break;
case SMPMessage::Type::UnblockThread:
processor.m_scheduler->handle_unblock_request(message->unblock_thread);
break;
case SMPMessage::Type::SchedulerPreemption:
should_preempt = true;
break;
}
last_handled = message;
}
with_atomic_lock(processor.m_smp_free_lock,
[&]()
{
last_handled->next = processor.m_smp_free;
processor.m_smp_free = pending;
}
);
}
if (should_preempt)
processor.m_scheduler->preempt();
set_interrupt_state(state);
}
void Processor::send_smp_message(ProcessorID processor_id, const SMPMessage& message, bool send_ipi)
{
ASSERT(processor_id != current_id());
auto state = get_interrupt_state();
set_interrupt_state(InterruptState::Disabled);
auto& processor = s_processors[processor_id.m_id];
// take free message slot
SMPMessage* storage = nullptr;
with_atomic_lock(processor.m_smp_free_lock,
[&]()
{
storage = processor.m_smp_free;
ASSERT(storage && storage->next);
processor.m_smp_free = storage->next;
}
);
// write message
*storage = message;
// push message to pending queue
with_atomic_lock(processor.m_smp_pending_lock,
[&]()
{
storage->next = processor.m_smp_pending;
processor.m_smp_pending = storage;
}
);
if (send_ipi)
InterruptController::get().send_ipi(processor_id);
set_interrupt_state(state);
}
void Processor::broadcast_smp_message(const SMPMessage& message)
{
if (!is_smp_enabled())
return;
auto state = get_interrupt_state();
set_interrupt_state(InterruptState::Disabled);
for (size_t i = 0; i < Processor::count(); i++)
{
auto processor_id = s_processor_ids[i];
if (processor_id != current_id())
send_smp_message(processor_id, message, false);
}
InterruptController::get().broadcast_ipi();
set_interrupt_state(state);
}
void Processor::yield()
{
auto state = get_interrupt_state();
set_interrupt_state(InterruptState::Disabled);
#if ARCH(x86_64)
asm volatile(
"movq %%rsp, %%rcx;"
"movq %[load_sp], %%rsp;"
"int %[yield];"
"movq %%rcx, %%rsp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD)
: "memory", "rcx"
);
#elif ARCH(i686)
asm volatile(
"movl %%esp, %%ecx;"
"movl %[load_sp], %%esp;"
"int %[yield];"
"movl %%ecx, %%esp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD)
: "memory", "ecx"
);
#else
#error
#endif
Processor::set_interrupt_state(state);
}
}

View File

@@ -1,260 +1,715 @@
#include <kernel/Arch.h>
#include <kernel/Attributes.h>
#include <kernel/GDT.h>
#include <BAN/Optional.h>
#include <BAN/Sort.h>
#include <kernel/InterruptController.h>
#include <kernel/Process.h>
#include <kernel/Scheduler.h>
#include <kernel/Thread.h>
#include <kernel/Timer/Timer.h>
#define SCHEDULER_VERIFY_STACK 1
#define DEBUG_SCHEDULER 0
#define SCHEDULER_ASSERT 1
#if SCHEDULER_ASSERT == 0
#undef ASSERT
#define ASSERT(...)
#endif
namespace Kernel
{
static Scheduler* s_instance = nullptr;
static constexpr uint64_t s_reschedule_interval_ns = 10'000'000;
static constexpr uint64_t s_load_balance_interval_ns = 1'000'000'000;
static BAN::Atomic<uint8_t> s_schedulers_initialized { 0 };
struct ProcessorInfo
{
uint64_t idle_time_ns { s_load_balance_interval_ns };
uint32_t max_load_threads { 0 };
};
static SpinLock s_processor_info_time_lock;
static BAN::Array<ProcessorInfo, 0xFF> s_processor_infos;
static BAN::Atomic<size_t> s_next_processor_index { 0 };
void SchedulerQueue::add_thread_to_back(Node* node)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
node->next = nullptr;
node->prev = m_tail;
(m_tail ? m_tail->next : m_head) = node;
m_tail = node;
}
void SchedulerQueue::add_thread_with_wake_time(Node* node)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
if (m_tail == nullptr || node->wake_time_ns >= m_tail->wake_time_ns)
return add_thread_to_back(node);
Node* next = m_head;
Node* prev = nullptr;
while (next && node->wake_time_ns > next->wake_time_ns)
{
prev = next;
next = next->next;
}
node->next = next;
node->prev = prev;
(next ? next->prev : m_tail) = node;
(prev ? prev->next : m_head) = node;
}
template<typename F>
SchedulerQueue::Node* SchedulerQueue::remove_with_condition(F callback)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
for (Node* node = m_head; node; node = node->next)
{
if (!callback(node))
continue;
remove_node(node);
return node;
}
return nullptr;
}
void SchedulerQueue::remove_node(Node* node)
{
(node->prev ? node->prev->next : m_head) = node->next;
(node->next ? node->next->prev : m_tail) = node->prev;
node->prev = nullptr;
node->next = nullptr;
}
SchedulerQueue::Node* SchedulerQueue::front()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
ASSERT(!empty());
return m_head;
}
SchedulerQueue::Node* SchedulerQueue::pop_front()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
if (empty())
return nullptr;
Node* result = m_head;
m_head = m_head->next;
(m_head ? m_head->prev : m_tail) = nullptr;
result->next = nullptr;
return result;
}
BAN::ErrorOr<Scheduler*> Scheduler::create()
{
auto* scheduler = new Scheduler();
if (scheduler == nullptr)
return BAN::Error::from_errno(ENOMEM);
return scheduler;
}
BAN::ErrorOr<void> Scheduler::initialize()
{
ASSERT(s_instance == nullptr);
s_instance = new Scheduler();
ASSERT(s_instance);
Processor::allocate_idle_thread();
m_idle_thread = TRY(Thread::create_kernel([](void*) { asm volatile("1: hlt; jmp 1b"); }, nullptr, nullptr));
ASSERT(m_idle_thread);
size_t processor_index = 0;
for (; processor_index < Processor::count(); processor_index++)
if (Processor::id_from_index(processor_index) == Processor::current_id())
break;
ASSERT(processor_index < Processor::count());
// each CPU does load balance at different times. This calulates the offset to other CPUs
m_last_load_balance_ns = s_load_balance_interval_ns * processor_index / Processor::count();
m_idle_ns = -m_last_load_balance_ns;
s_schedulers_initialized++;
while (s_schedulers_initialized < Processor::count())
__builtin_ia32_pause();
return {};
}
Scheduler& Scheduler::get()
{
ASSERT(s_instance);
return *s_instance;
}
void Scheduler::start()
void Scheduler::add_current_to_most_loaded(SchedulerQueue* target_queue)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
ASSERT(!m_active_threads.empty());
// broadcast ipi (yield) for each processor
InterruptController::get().broadcast_ipi();
yield();
ASSERT_NOT_REACHED();
}
Thread& Scheduler::current_thread()
{
auto* current = Processor::get_current_thread();
return current ? *current->thread : *Processor::idle_thread();
}
pid_t Scheduler::current_tid()
{
if (s_instance == nullptr)
return 0;
return Scheduler::get().current_thread().tid();
}
void Scheduler::setup_next_thread()
{
ASSERT(m_lock.current_processor_has_lock());
if (auto* current = Processor::get_current_thread())
bool has_current = false;
for (auto& info : m_most_loaded_threads)
{
auto* thread = current->thread;
if (thread->state() == Thread::State::Terminated)
if (info.node == m_current)
{
PageTable::kernel().load();
delete thread;
delete current;
}
else
{
// thread->state() can be NotStarted when calling exec or cleaning up process
if (thread->state() != Thread::State::NotStarted)
{
thread->interrupt_stack() = Processor::get_interrupt_stack();
thread->interrupt_registers() = Processor::get_interrupt_registers();
}
if (current->should_block)
{
current->should_block = false;
m_blocking_threads.add_with_wake_time(current);
}
else
{
m_active_threads.push_back(current);
}
info.queue = target_queue;
has_current = true;
break;
}
}
SchedulerQueue::Node* node = nullptr;
while (!m_active_threads.empty())
if (!has_current)
{
node = m_active_threads.pop_front();
if (node->thread->state() != Thread::State::Terminated)
size_t index = 0;
for (; index < m_most_loaded_threads.size() - 1; index++)
if (m_most_loaded_threads[index].node == nullptr)
break;
m_most_loaded_threads[index].queue = target_queue;
m_most_loaded_threads[index].node = m_current;
}
BAN::sort::sort(m_most_loaded_threads.begin(), m_most_loaded_threads.end(),
[](const ThreadInfo& a, const ThreadInfo& b) -> bool
{
if (a.node == nullptr || b.node == nullptr)
return a.node;
return a.node->time_used_ns > b.node->time_used_ns;
}
);
}
void Scheduler::update_most_loaded_node_queue(SchedulerQueue::Node* node, SchedulerQueue* target_queue)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
for (auto& info : m_most_loaded_threads)
{
if (info.node == node)
{
info.queue = target_queue;
break;
}
}
}
void Scheduler::remove_node_from_most_loaded(SchedulerQueue::Node* node)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
size_t i = 0;
for (; i < m_most_loaded_threads.size(); i++)
if (m_most_loaded_threads[i].node == node)
break;
PageTable::kernel().load();
delete node->thread;
delete node;
node = nullptr;
for (; i < m_most_loaded_threads.size() - 1; i++)
m_most_loaded_threads[i] = m_most_loaded_threads[i + 1];
m_most_loaded_threads.back().node = nullptr;
m_most_loaded_threads.back().queue = nullptr;
}
void Scheduler::reschedule(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
// If there are no other threads in run queue, reschedule can be no-op :)
if (m_run_queue.empty() && !m_current_will_block && current_thread().state() == Thread::State::Executing)
return;
if (m_current == nullptr)
m_idle_ns += SystemTimer::get().ns_since_boot() - m_idle_start_ns;
else
{
switch (m_current->thread->state())
{
case Thread::State::Terminated:
remove_node_from_most_loaded(m_current);
PageTable::kernel().load();
delete m_current->thread;
delete m_current;
m_thread_count--;
break;
case Thread::State::Executing:
{
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
m_current->thread->interrupt_stack() = *interrupt_stack;
m_current->thread->interrupt_registers() = *interrupt_registers;
m_current->time_used_ns += current_ns - m_current->last_start_ns;
add_current_to_most_loaded(m_current_will_block ? &m_block_queue : &m_run_queue);
if (!m_current_will_block)
m_run_queue.add_thread_to_back(m_current);
else
{
m_current_will_block = false;
m_block_queue.add_thread_with_wake_time(m_current);
}
break;
}
case Thread::State::NotStarted:
ASSERT(!m_current_will_block);
m_current->time_used_ns = 0;
remove_node_from_most_loaded(m_current);
m_run_queue.add_thread_to_back(m_current);
break;
}
}
Processor::set_current_thread(node);
auto* thread = node ? node->thread : Processor::idle_thread();
if (thread->has_process())
thread->process().page_table().load();
else
while ((m_current = m_run_queue.pop_front()))
{
if (m_current->thread->state() != Thread::State::Terminated)
break;
remove_node_from_most_loaded(m_current);
PageTable::kernel().load();
delete m_current->thread;
delete m_current;
m_thread_count--;
}
if (m_current == nullptr)
{
PageTable::kernel().load();
*interrupt_stack = m_idle_thread->interrupt_stack();
*interrupt_registers = m_idle_thread->interrupt_registers();
m_idle_thread->m_state = Thread::State::Executing;
m_idle_start_ns = SystemTimer::get().ns_since_boot();
return;
}
update_most_loaded_node_queue(m_current, nullptr);
auto* thread = m_current->thread;
auto& page_table = thread->has_process() ? thread->process().page_table() : PageTable::kernel();
page_table.load();
if (thread->state() == Thread::State::NotStarted)
thread->m_state = Thread::State::Executing;
Processor::gdt().set_tss_stack(thread->kernel_stack_top());
Processor::get_interrupt_stack() = thread->interrupt_stack();
Processor::get_interrupt_registers() = thread->interrupt_registers();
*interrupt_stack = thread->interrupt_stack();
*interrupt_registers = thread->interrupt_registers();
m_current->last_start_ns = SystemTimer::get().ns_since_boot();
}
void Scheduler::timer_reschedule()
void Scheduler::reschedule_if_idle()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
if (!m_current && !m_run_queue.empty())
Processor::yield();
}
void Scheduler::preempt()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
if (Processor::is_smp_enabled())
do_load_balancing();
{
SpinLockGuard _(m_lock);
m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot());
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
while (!m_block_queue.empty() && current_ns >= m_block_queue.front()->wake_time_ns)
{
auto* node = m_block_queue.pop_front();
update_most_loaded_node_queue(node, &m_run_queue);
m_run_queue.add_thread_to_back(node);
}
}
// Broadcast IPI to all other processors for them
// to perform reschedule
InterruptController::get().broadcast_ipi();
yield();
}
void Scheduler::yield()
{
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
ASSERT(!m_lock.current_processor_has_lock());
#if ARCH(x86_64)
asm volatile(
"movq %%rsp, %%rcx;"
"movq %[load_sp], %%rsp;"
"int %[yield];"
"movq %%rcx, %%rsp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD)
: "memory", "rcx"
);
#elif ARCH(i686)
asm volatile(
"movl %%esp, %%ecx;"
"movl %[load_sp], %%esp;"
"int %[yield];"
"movl %%ecx, %%esp;"
// NOTE: This is offset by 2 pointers since interrupt without PL change
// does not push SP and SS. This allows accessing "whole" interrupt stack.
:: [load_sp]"r"(Processor::current_stack_top() - 2 * sizeof(uintptr_t)),
[yield]"i"(IRQ_VECTOR_BASE + IRQ_YIELD)
: "memory", "ecx"
);
#else
#error
#endif
Processor::set_interrupt_state(state);
}
void Scheduler::irq_reschedule()
{
SpinLockGuard _(m_lock);
setup_next_thread();
}
void Scheduler::reschedule_if_idling()
{
{
SpinLockGuard _(m_lock);
if (Processor::get_current_thread())
return;
if (m_active_threads.empty())
return;
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
if (current_ns >= m_last_reschedule_ns + s_reschedule_interval_ns)
{
m_last_reschedule_ns = current_ns;
Processor::yield();
}
}
}
void Scheduler::timer_interrupt()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
// FIXME: all processors should LAPIC for their preemption
if (Processor::is_smp_enabled())
{
ASSERT(Processor::current_is_bsb());
Processor::broadcast_smp_message({
.type = Processor::SMPMessage::Type::SchedulerPreemption,
.scheduler_preemption = 0 // dummy value
});
}
yield();
preempt();
}
void Scheduler::handle_unblock_request(const UnblockRequest& request)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
switch (request.type)
{
case UnblockRequest::Type::ThreadBlocker:
do_unblock(request.blocker);
break;
case UnblockRequest::Type::ThreadID:
do_unblock(request.tid);
break;
default:
ASSERT_NOT_REACHED();
}
}
void Scheduler::handle_new_thread_request(const NewThreadRequest& reqeuest)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
if (reqeuest.blocked)
m_block_queue.add_thread_with_wake_time(reqeuest.node);
else
m_run_queue.add_thread_to_back(reqeuest.node);
}
bool Scheduler::do_unblock(ThreadBlocker* blocker)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
// FIXME: This could _easily_ be O(1)
bool did_unblock = false;
if (m_current && m_current->blocker == blocker && m_current_will_block)
{
m_current_will_block = false;
did_unblock = true;
}
SchedulerQueue::Node* match;
while ((match = m_block_queue.remove_with_condition([blocker](const auto* node) { return node->blocker == blocker; })))
{
dprintln_if(DEBUG_SCHEDULER, "CPU {}: unblock blocker {} (tid {})", Processor::current_id(), blocker, match->thread->tid());
update_most_loaded_node_queue(match, &m_run_queue);
m_run_queue.add_thread_to_back(match);
did_unblock = true;
}
return did_unblock;
}
bool Scheduler::do_unblock(pid_t tid)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
// FIXME: This could _easily_ be O(1)
if (m_current && m_current->thread->tid() == tid && m_current_will_block)
{
m_current_will_block = false;
return true;
}
auto* match = m_block_queue.remove_with_condition([tid](const auto* node) { return node->thread->tid() == tid; });
if (match == nullptr)
return false;
dprintln_if(DEBUG_SCHEDULER, "CPU {}: unblock tid {}", Processor::current_id(), tid);
update_most_loaded_node_queue(match, &m_run_queue);
m_run_queue.add_thread_to_back(match);
return true;
}
ProcessorID Scheduler::find_least_loaded_processor() const
{
ProcessorID least_loaded_id = Processor::current_id();
uint64_t most_idle_ns = m_idle_ns;
uint32_t least_max_load_threads = static_cast<uint32_t>(-1);
for (uint8_t i = 0; i < Processor::count(); i++)
{
auto processor_id = Processor::id_from_index(i);
if (processor_id == Processor::current_id())
continue;
const auto& info = s_processor_infos[i];
if (info.idle_time_ns < most_idle_ns || info.max_load_threads > least_max_load_threads)
continue;
least_loaded_id = processor_id;
most_idle_ns = info.idle_time_ns;
least_max_load_threads = info.max_load_threads;
}
return least_loaded_id;
}
void Scheduler::do_load_balancing()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
if (current_ns < m_last_load_balance_ns + s_load_balance_interval_ns)
return;
if (m_current == nullptr)
{
m_idle_ns += current_ns - m_idle_start_ns;
m_idle_start_ns = current_ns;
}
else
{
m_current->time_used_ns += current_ns - m_current->last_start_ns;
m_current->last_start_ns = current_ns;
add_current_to_most_loaded(nullptr);
}
if constexpr(DEBUG_SCHEDULER)
{
const uint64_t duration_ns = current_ns - m_last_load_balance_ns;
const uint64_t processing_ns = duration_ns - m_idle_ns;
{
const uint64_t load_percent_x1000 = BAN::Math::div_round_up<uint64_t>(processing_ns * 100'000, duration_ns);
dprintln("CPU {}: { 2}.{3}% ({} threads)", Processor::current_id(), load_percent_x1000 / 1000, load_percent_x1000 % 1000, m_thread_count);
}
if (m_current)
{
const char* name = "unknown";
if (m_current->thread->has_process() && m_current->thread->process().is_userspace() && m_current->thread->process().userspace_info().argv)
name = m_current->thread->process().userspace_info().argv[0];
const uint64_t load_percent_x1000 = BAN::Math::div_round_up<uint64_t>(m_current->time_used_ns * 100'000, processing_ns);
dprintln(" tid { 2}: { 3}.{3}% <{}> current", m_current->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000, name);
}
m_run_queue.remove_with_condition(
[&](SchedulerQueue::Node* node)
{
const uint64_t load_percent_x1000 = BAN::Math::div_round_up<uint64_t>(node->time_used_ns * 100'000, processing_ns);
dprintln(" tid { 2}: { 3}.{3}% active", node->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000);
return false;
}
);
m_block_queue.remove_with_condition(
[&](SchedulerQueue::Node* node)
{
const uint64_t load_percent_x1000 = BAN::Math::div_round_up<uint64_t>(node->time_used_ns * 100'000, processing_ns);
dprintln(" tid { 2}: { 3}.{3}% blocked", node->thread->tid(), load_percent_x1000 / 1000, load_percent_x1000 % 1000);
return false;
}
);
}
if (!s_processor_info_time_lock.try_lock_interrupts_disabled())
{
dprintln_if(DEBUG_SCHEDULER, "Load balancing cannot keep up");
return;
}
if (m_idle_ns == 0 && m_should_calculate_max_load_threads)
{
const auto& most_loaded_thread = m_most_loaded_threads.front();
if (most_loaded_thread.node == nullptr || most_loaded_thread.node->time_used_ns == 0)
s_processor_infos[Processor::current_id().as_u32()].max_load_threads = 0;
else
{
const uint64_t duration_ns = current_ns - m_last_load_balance_ns;
const uint64_t max_thread_load_x1000 = 1000 * m_most_loaded_threads.front().node->time_used_ns / duration_ns;
const uint64_t max_load_thread_count = ((2000 / max_thread_load_x1000) + 1) / 2;
s_processor_infos[Processor::current_id().as_u32()].max_load_threads = max_load_thread_count;
}
}
constexpr auto absolute_difference_u64 = [](uint64_t a, uint64_t b) { return (a < b) ? (b - a) : (a - b); };
for (size_t i = 1; i < m_most_loaded_threads.size(); i++)
{
auto& thread_info = m_most_loaded_threads[i];
if (thread_info.node == nullptr)
break;
if (thread_info.node == m_current || thread_info.queue == nullptr)
continue;
auto least_loaded_id = find_least_loaded_processor();
if (least_loaded_id == Processor::current_id())
break;
auto& most_idle_info = s_processor_infos[least_loaded_id.as_u32()];
auto& my_info = s_processor_infos[Processor::current_id().as_u32()];
if (m_idle_ns == 0)
{
if (my_info.max_load_threads == 0)
break;
if (most_idle_info.idle_time_ns == 0)
{
if (most_idle_info.max_load_threads + 1 > my_info.max_load_threads - 1)
break;
my_info.max_load_threads -= 1;
most_idle_info.max_load_threads += 1;
dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {} (max load)", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id);
}
else
{
my_info.max_load_threads -= 1;
most_idle_info.idle_time_ns = 0;
most_idle_info.max_load_threads = 1;
dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id);
}
}
else
{
const uint64_t my_current_proc_ns = s_load_balance_interval_ns - BAN::Math::min(s_load_balance_interval_ns, m_idle_ns);
const uint64_t other_current_proc_ns = s_load_balance_interval_ns - BAN::Math::min(s_load_balance_interval_ns, most_idle_info.idle_time_ns);
const uint64_t current_proc_diff_ns = absolute_difference_u64(my_current_proc_ns, other_current_proc_ns);
const uint64_t my_new_proc_ns = my_current_proc_ns - BAN::Math::min(thread_info.node->time_used_ns, my_current_proc_ns);
const uint64_t other_new_proc_ns = other_current_proc_ns + thread_info.node->time_used_ns;
const uint64_t new_proc_diff_ns = absolute_difference_u64(my_new_proc_ns, other_new_proc_ns);
// require 10% decrease between CPU loads to do send thread to other CPU
if (new_proc_diff_ns >= current_proc_diff_ns || (100 * (current_proc_diff_ns - new_proc_diff_ns) / current_proc_diff_ns) < 10)
continue;
most_idle_info.idle_time_ns -= BAN::Math::min(thread_info.node->time_used_ns, most_idle_info.idle_time_ns);
m_idle_ns += thread_info.node->time_used_ns;
dprintln_if(DEBUG_SCHEDULER, "CPU {}: sending tid {} to CPU {}", Processor::current_id(), thread_info.node->thread->tid(), least_loaded_id);
}
thread_info.node->time_used_ns = 0;
{
auto& my_queue = (thread_info.queue == &m_run_queue) ? m_run_queue : m_block_queue;
my_queue.remove_node(thread_info.node);
m_thread_count--;
}
Processor::send_smp_message(least_loaded_id, {
.type = Processor::SMPMessage::Type::NewThread,
.new_thread = {
.node = thread_info.node,
.blocked = thread_info.queue == &m_block_queue
}
});
thread_info.node = nullptr;
thread_info.queue = nullptr;
if (m_idle_ns == 0)
break;
}
s_processor_infos[Processor::current_id().as_u32()].idle_time_ns = m_idle_ns;
s_processor_info_time_lock.unlock(InterruptState::Disabled);
if (m_current)
m_current->time_used_ns = 0;
for (auto& thread_info : m_most_loaded_threads)
thread_info = {};
m_run_queue .remove_with_condition([&](SchedulerQueue::Node* node) { node->time_used_ns = 0; return false; });
m_block_queue.remove_with_condition([&](SchedulerQueue::Node* node) { node->time_used_ns = 0; return false; });
m_idle_ns = 0;
m_should_calculate_max_load_threads = true;
m_last_load_balance_ns += s_load_balance_interval_ns;
}
BAN::ErrorOr<void> Scheduler::add_thread(Thread* thread)
{
auto* node = new SchedulerQueue::Node(thread);
if (node == nullptr)
auto* new_node = new SchedulerQueue::Node(thread);
if (new_node == nullptr)
return BAN::Error::from_errno(ENOMEM);
SpinLockGuard _(m_lock);
m_active_threads.push_back(node);
const size_t processor_index = s_next_processor_index++ % Processor::count();
const auto processor_id = Processor::id_from_index(processor_index);
if (processor_id == Processor::current_id())
{
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
m_run_queue.add_thread_to_back(new_node);
m_thread_count++;
Processor::set_interrupt_state(state);
}
else
{
Processor::send_smp_message(processor_id, {
.type = Processor::SMPMessage::Type::NewThread,
.new_thread = {
.node = new_node,
.blocked = false
}
});
}
return {};
}
void Scheduler::terminate_thread(Thread* thread)
void Scheduler::block_current_thread(ThreadBlocker* blocker, uint64_t wake_time_ns)
{
auto state = m_lock.lock();
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
ASSERT(thread->state() == Thread::State::Executing);
thread->m_state = Thread::State::Terminated;
thread->interrupt_stack().sp = Processor::current_stack_top();
m_current->blocker = blocker;
m_current->wake_time_ns = wake_time_ns;
m_current_will_block = true;
Processor::yield();
m_lock.unlock(InterruptState::Disabled);
Processor::set_interrupt_state(state);
}
// actual deletion will be done while rescheduling
void Scheduler::unblock_threads(ThreadBlocker* blocker)
{
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
if (&current_thread() == thread)
do_unblock(blocker);
Processor::broadcast_smp_message({
.type = Processor::SMPMessage::Type::UnblockThread,
.unblock_thread = {
.type = UnblockRequest::Type::ThreadBlocker,
.blocker = blocker
}
});
Processor::set_interrupt_state(state);
}
void Scheduler::unblock_thread(pid_t tid)
{
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
if (!do_unblock(tid))
{
yield();
ASSERT_NOT_REACHED();
Processor::broadcast_smp_message({
.type = Processor::SMPMessage::Type::UnblockThread,
.unblock_thread = {
.type = UnblockRequest::Type::ThreadID,
.tid = tid
}
});
}
Processor::set_interrupt_state(state);
}
void Scheduler::set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time)
Thread& Scheduler::current_thread()
{
auto state = m_lock.lock();
auto* current = Processor::get_current_thread();
current->semaphore = semaphore;
current->wake_time = wake_time;
current->should_block = true;
m_lock.unlock(InterruptState::Disabled);
yield();
Processor::set_interrupt_state(state);
if (m_current)
return *m_current->thread;
return *m_idle_thread;
}
void Scheduler::set_current_thread_sleeping(uint64_t wake_time)
Thread& Scheduler::idle_thread()
{
set_current_thread_sleeping_impl(nullptr, wake_time);
return *m_idle_thread;
}
void Scheduler::block_current_thread(Semaphore* semaphore, uint64_t wake_time)
pid_t Scheduler::current_tid() const
{
set_current_thread_sleeping_impl(semaphore, wake_time);
return m_current ? m_current->thread->tid() : 0;
}
void Scheduler::unblock_threads(Semaphore* semaphore)
bool Scheduler::is_idle() const
{
SpinLockGuard _(m_lock);
m_blocking_threads.remove_with_condition(m_active_threads, [&](auto* node) { return node->semaphore == semaphore; });
}
void Scheduler::unblock_thread(pid_t tid)
{
SpinLockGuard _(m_lock);
m_blocking_threads.remove_with_condition(m_active_threads, [&](auto* node) { return node->thread->tid() == tid; });
return m_current == nullptr;
}
}

View File

@@ -1,28 +0,0 @@
#include <kernel/Scheduler.h>
#include <kernel/Semaphore.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
{
void Semaphore::block_indefinite()
{
Scheduler::get().block_current_thread(this, ~(uint64_t)0);
}
void Semaphore::block_with_timeout(uint64_t timeout_ms)
{
Scheduler::get().block_current_thread(this, SystemTimer::get().ms_since_boot() + timeout_ms);
}
void Semaphore::block_with_wake_time(uint64_t wake_time)
{
Scheduler::get().block_current_thread(this, wake_time);
}
void Semaphore::unblock()
{
Scheduler::get().unblock_threads(this);
}
}

View File

@@ -8,7 +8,7 @@
namespace Kernel
{
static constexpr uint64_t s_ata_timeout = 1000;
static constexpr uint64_t s_ata_timeout_ms = 1000;
static void start_cmd(volatile HBAPortMemorySpace* port)
{
@@ -118,9 +118,9 @@ namespace Kernel
command.c = 1;
command.command = ATA_COMMAND_IDENTIFY;
uint64_t timeout = SystemTimer::get().ms_since_boot() + s_ata_timeout;
const uint64_t timeout_ms = SystemTimer::get().ms_since_boot() + s_ata_timeout_ms;
while (m_port->tfd & (ATA_STATUS_BSY | ATA_STATUS_DRQ))
if (SystemTimer::get().ms_since_boot() >= timeout)
if (SystemTimer::get().ms_since_boot() >= timeout_ms)
return BAN::Error::from_errno(ETIMEDOUT);
m_port->ci = 1 << slot.value();
@@ -158,17 +158,17 @@ namespace Kernel
{
static constexpr uint64_t poll_timeout_ms = 10;
auto start_time = SystemTimer::get().ms_since_boot();
const auto start_time_ms = SystemTimer::get().ms_since_boot();
while (SystemTimer::get().ms_since_boot() < start_time + poll_timeout_ms)
while (SystemTimer::get().ms_since_boot() < start_time_ms + poll_timeout_ms)
if (!(m_port->ci & (1 << command_slot)))
return {};
// FIXME: This should actually block once semaphores support blocking with timeout.
// FIXME: This should actually block once ThreadBlocker support blocking with timeout.
// This doesn't allow scheduler to go properly idle.
while (SystemTimer::get().ms_since_boot() < start_time + s_ata_timeout)
while (SystemTimer::get().ms_since_boot() < start_time_ms + s_ata_timeout_ms)
{
Scheduler::get().yield();
Processor::yield();
if (!(m_port->ci & (1 << command_slot)))
return {};
}

View File

@@ -67,9 +67,7 @@ namespace Kernel
static void select_delay()
{
auto time = SystemTimer::get().ns_since_boot();
while (SystemTimer::get().ns_since_boot() < time + 400)
continue;
SystemTimer::get().sleep_ns(400);
}
void ATABus::select_device(bool secondary)
@@ -106,7 +104,7 @@ namespace Kernel
io_write(ATA_PORT_CONTROL, ATA_CONTROL_nIEN);
io_write(ATA_PORT_COMMAND, ATA_COMMAND_IDENTIFY);
SystemTimer::get().sleep(1);
SystemTimer::get().sleep_ms(1);
// No device on port
if (io_read(ATA_PORT_STATUS) == 0)
@@ -130,7 +128,7 @@ namespace Kernel
}
io_write(ATA_PORT_COMMAND, ATA_COMMAND_IDENTIFY_PACKET);
SystemTimer::get().sleep(1);
SystemTimer::get().sleep_ms(1);
if (auto res = wait(true); res.is_error())
{

View File

@@ -1,6 +1,6 @@
#include <kernel/Lock/LockGuard.h>
#include <kernel/Scheduler.h>
#include <kernel/Storage/NVMe/Queue.h>
#include <kernel/Thread.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
@@ -44,7 +44,7 @@ namespace Kernel
m_doorbell.cq_head = m_cq_head;
m_semaphore.unblock();
m_thread_blocker.unblock();
}
uint16_t NVMeQueue::submit_command(NVMe::SubmissionQueueEntry& sqe)
@@ -66,15 +66,15 @@ namespace Kernel
m_doorbell.sq_tail = m_sq_tail;
}
const uint64_t start_time = SystemTimer::get().ms_since_boot();
while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_poll_timeout_ms)
const uint64_t start_time_ms = SystemTimer::get().ms_since_boot();
while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time_ms + s_nvme_command_poll_timeout_ms)
continue;
// FIXME: Here is a possible race condition if done mask is set before
// scheduler has put the current thread blocking.
// EINTR should also be handled here.
while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time + s_nvme_command_timeout_ms)
Scheduler::get().block_current_thread(&m_semaphore, start_time + s_nvme_command_timeout_ms);
while (!(m_done_mask & cid_mask) && SystemTimer::get().ms_since_boot() < start_time_ms + s_nvme_command_timeout_ms)
m_thread_blocker.block_with_wake_time_ms(start_time_ms + s_nvme_command_timeout_ms);
if (m_done_mask & cid_mask)
{
@@ -93,7 +93,7 @@ namespace Kernel
while (~m_used_mask == 0)
{
m_lock.unlock(state);
m_semaphore.block_with_timeout(s_nvme_command_timeout_ms);
m_thread_blocker.block_with_timeout_ms(s_nvme_command_timeout_ms);
state = m_lock.lock();
}

View File

@@ -57,7 +57,7 @@ namespace Kernel
if ((flags & TTY_FLAG_ENABLE_INPUT) && !m_tty_ctrl.receive_input)
{
m_tty_ctrl.receive_input = true;
m_tty_ctrl.semaphore.unblock();
m_tty_ctrl.thread_blocker.unblock();
}
if (flags & TTY_FLAG_ENABLE_OUTPUT)
m_tty_ctrl.draw_graphics = true;
@@ -94,7 +94,7 @@ namespace Kernel
while (true)
{
while (!TTY::current()->m_tty_ctrl.receive_input)
TTY::current()->m_tty_ctrl.semaphore.block_indefinite();
TTY::current()->m_tty_ctrl.thread_blocker.block_indefinite();
LibInput::RawKeyEvent event;
size_t read = MUST(inode->read(0, BAN::ByteSpan::from(event)));
@@ -237,7 +237,7 @@ namespace Kernel
if (ch == '\x04' && m_termios.canonical)
{
m_output.flush = true;
m_output.semaphore.unblock();
m_output.thread_blocker.unblock();
return;
}
@@ -279,7 +279,7 @@ namespace Kernel
if (ch == '\n' || !m_termios.canonical)
{
m_output.flush = true;
m_output.semaphore.unblock();
m_output.thread_blocker.unblock();
}
}
@@ -338,7 +338,7 @@ namespace Kernel
while (!m_output.flush)
{
LockFreeGuard _(m_mutex);
TRY(Thread::current().block_or_eintr_indefinite(m_output.semaphore));
TRY(Thread::current().block_or_eintr_indefinite(m_output.thread_blocker));
}
if (m_output.bytes == 0)
@@ -356,7 +356,7 @@ namespace Kernel
if (m_output.bytes == 0)
m_output.flush = false;
m_output.semaphore.unblock();
m_output.thread_blocker.unblock();
return to_copy;
}

View File

@@ -120,7 +120,12 @@ namespace Kernel
Thread& Thread::current()
{
return Scheduler::get().current_thread();
return Processor::scheduler().current_thread();
}
pid_t Thread::current_tid()
{
return Processor::scheduler().current_tid();
}
Process& Thread::process()
@@ -396,36 +401,36 @@ namespace Kernel
{
m_signal_pending_mask |= mask;
if (this != &Thread::current())
Scheduler::get().unblock_thread(tid());
Processor::scheduler().unblock_thread(tid());
return true;
}
return false;
}
BAN::ErrorOr<void> Thread::block_or_eintr_indefinite(Semaphore& semaphore)
BAN::ErrorOr<void> Thread::block_or_eintr_indefinite(ThreadBlocker& thread_blocker)
{
if (is_interrupted_by_signal())
return BAN::Error::from_errno(EINTR);
semaphore.block_indefinite();
thread_blocker.block_indefinite();
if (is_interrupted_by_signal())
return BAN::Error::from_errno(EINTR);
return {};
}
BAN::ErrorOr<void> Thread::block_or_eintr_or_timeout(Semaphore& semaphore, uint64_t timeout_ms, bool etimedout)
BAN::ErrorOr<void> Thread::block_or_eintr_or_timeout_ns(ThreadBlocker& thread_blocker, uint64_t timeout_ns, bool etimedout)
{
uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + timeout_ms;
return block_or_eintr_or_waketime(semaphore, wake_time_ms, etimedout);
const uint64_t wake_time_ns = SystemTimer::get().ns_since_boot() + timeout_ns;
return block_or_eintr_or_waketime_ns(thread_blocker, wake_time_ns, etimedout);
}
BAN::ErrorOr<void> Thread::block_or_eintr_or_waketime(Semaphore& semaphore, uint64_t wake_time_ms, bool etimedout)
BAN::ErrorOr<void> Thread::block_or_eintr_or_waketime_ns(ThreadBlocker& thread_blocker, uint64_t wake_time_ns, bool etimedout)
{
if (is_interrupted_by_signal())
return BAN::Error::from_errno(EINTR);
semaphore.block_with_wake_time(wake_time_ms);
thread_blocker.block_with_timeout_ns(wake_time_ns);
if (is_interrupted_by_signal())
return BAN::Error::from_errno(EINTR);
if (etimedout && SystemTimer::get().ms_since_boot() >= wake_time_ms)
if (etimedout && SystemTimer::get().ms_since_boot() >= wake_time_ns)
return BAN::Error::from_errno(ETIMEDOUT);
return {};
}
@@ -444,15 +449,12 @@ namespace Kernel
{
Processor::set_interrupt_state(InterruptState::Disabled);
setup_process_cleanup();
Scheduler::get().yield();
Processor::yield();
ASSERT_NOT_REACHED();
}
else
Scheduler::get().terminate_thread(this);
}
else
{
Scheduler::get().terminate_thread(this);
}
m_state = State::Terminated;
Processor::yield();
ASSERT_NOT_REACHED();
}

View File

@@ -0,0 +1,28 @@
#include <kernel/Processor.h>
#include <kernel/ThreadBlocker.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
{
void ThreadBlocker::block_indefinite()
{
Processor::scheduler().block_current_thread(this, ~static_cast<uint64_t>(0));
}
void ThreadBlocker::block_with_timeout_ns(uint64_t timeout_ns)
{
Processor::scheduler().block_current_thread(this, SystemTimer::get().ns_since_boot() + timeout_ns);
}
void ThreadBlocker::block_with_wake_time_ns(uint64_t wake_time_ns)
{
Processor::scheduler().block_current_thread(this, wake_time_ns);
}
void ThreadBlocker::unblock()
{
Processor::scheduler().unblock_threads(this);
}
}

View File

@@ -4,7 +4,7 @@
#include <kernel/InterruptController.h>
#include <kernel/Memory/PageTable.h>
#include <kernel/MMIO.h>
#include <kernel/Scheduler.h>
#include <kernel/Processor.h>
#include <kernel/Timer/HPET.h>
#define HPET_PERIOD_MAX 0x05F5E100
@@ -272,7 +272,7 @@ namespace Kernel
m_last_ticks = current_ticks;
}
Scheduler::get().timer_reschedule();
Processor::scheduler().timer_interrupt();
}
uint64_t HPET::ms_since_boot() const

View File

@@ -1,7 +1,7 @@
#include <kernel/IDT.h>
#include <kernel/InterruptController.h>
#include <kernel/IO.h>
#include <kernel/Scheduler.h>
#include <kernel/Processor.h>
#include <kernel/Timer/PIT.h>
#define PIT_IRQ 0
@@ -58,7 +58,7 @@ namespace Kernel
SpinLockGuard _(m_lock);
m_system_time++;
}
Kernel::Scheduler::get().timer_reschedule();
Processor::scheduler().timer_interrupt();
}
uint64_t PIT::read_counter() const

View File

@@ -69,15 +69,17 @@ namespace Kernel
return m_timer->time_since_boot();
}
void SystemTimer::sleep(uint64_t ms) const
void SystemTimer::sleep_ns(uint64_t ns) const
{
if (ms == 0)
if (ns == 0)
return;
uint64_t wake_time = ms_since_boot() + ms;
Scheduler::get().set_current_thread_sleeping(wake_time);
uint64_t current_time = ms_since_boot();
if (current_time < wake_time)
dwarnln("sleep woke {} ms too soon", wake_time - current_time);
const uint64_t wake_time_ns = ns_since_boot() + ns;
Processor::scheduler().block_current_thread(nullptr, wake_time_ns);
//const uint64_t current_time_ns = ns_since_boot();
//if (current_time_ns < wake_time_ns)
// dwarnln("sleep woke {} ms too soon", BAN::Math::div_round_up<uint64_t>(wake_time_ns - current_time_ns, 1'000'000));
}
timespec SystemTimer::real_time() const

View File

@@ -250,7 +250,7 @@ namespace Kernel
bool expected { true };
while (!m_port_changed.compare_exchange(expected, false))
{
m_port_semaphore.block_with_timeout(100);
m_port_thread_blocker.block_with_timeout_ms(100);
expected = true;
}
}
@@ -482,7 +482,7 @@ namespace Kernel
break;
}
m_port_changed = true;
m_port_semaphore.unblock();
m_port_thread_blocker.unblock();
break;
}
case XHCI::TRBType::BandwidthRequestEvent:

View File

@@ -108,7 +108,7 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info)
parse_boot_info(boot_magic, boot_info);
dprintln("boot info parsed");
Processor::create(0);
Processor::create(PROCESSOR_NONE);
Processor::initialize();
dprintln("BSP initialized");
@@ -167,12 +167,11 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info)
Random::initialize();
dprintln("RNG initialized");
MUST(Scheduler::initialize());
dprintln("Scheduler initialized");
Processor::wait_until_processors_ready();
MUST(Processor::scheduler().initialize());
Scheduler& scheduler = Scheduler::get();
Process::create_kernel(init2, nullptr);
scheduler.start();
Processor::yield();
ASSERT_NOT_REACHED();
}
@@ -233,14 +232,11 @@ extern "C" void ap_main()
Processor::initialize();
PageTable::kernel().initial_load();
Processor::allocate_idle_thread();
InterruptController::get().enable();
dprintln("ap{} initialized", Processor::current_id());
Processor::wait_until_processors_ready();
MUST(Processor::scheduler().initialize());
// wait until scheduler is started and we get irq for reschedule
Processor::set_interrupt_state(InterruptState::Enabled);
while (true)
asm volatile("hlt");
asm volatile("sti; 1: hlt; jmp 1b");
ASSERT_NOT_REACHED();
}