From 9eb3834ae54f9586a900aec956c38f21c6d0cc06 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Thu, 8 Jan 2026 13:30:04 +0200 Subject: [PATCH] Kernel: Add syscall-less clock_gettime If the processor has invariant TSC it can be used to measure time. We keep track of the last nanosecond and TSC values and offset them based on the current TSC. This allows getting current time in userspace. The implementation maps a single RO page to every processes' address space. The page contains the TSC info which gets updated every 100 ms. If the processor does not have invariant TSC, this page will not indicate the capability for TSC based timing. There was the problem about how does a processor know which cpu it is running without doing syscall. TSC counters may or may not be synchronized between cores, so we need a separate TSC info for each processor. I ended up adding sequence of bytes 0..255 at the start of the shared page. When a scheduler gets a new thread, it updates the threads gs/fs segment to point to the byte corresponding to the current cpu. This TSC based timing is also used in kernel. With 64 bit HPET this probably does not bring much of a benefit, but on PIT or 32 bit HPET this removes the need to aquire a spinlock to get the current time. This change does force the userspace to not use gs/fs themselves and they are both now reserved. Other one is used for TLS (this can be technically used if user does not call libc code) and the other for the current processor index (cannot be used as kernel unconditionally resets it after each load balance). I was looking at how many times timer's current time was polled (userspace and kernel combined). When idling in window manager, it was around 8k times/s. When running doom it peaked at over 1 million times per second when loading and settled at ~30k times/s. --- kernel/arch/i686/boot.S | 9 ++ kernel/arch/x86_64/boot.S | 9 ++ kernel/include/kernel/API/SharedPage.h | 37 +++++ kernel/include/kernel/CPUID.h | 1 + kernel/include/kernel/Process.h | 4 + kernel/include/kernel/Processor.h | 15 +++ kernel/include/kernel/Thread.h | 2 + kernel/include/kernel/Timer/Timer.h | 9 ++ kernel/kernel/CPUID.cpp | 10 ++ kernel/kernel/Process.cpp | 39 ++++++ kernel/kernel/Processor.cpp | 127 ++++++++++++++++-- kernel/kernel/Scheduler.cpp | 3 + kernel/kernel/Thread.cpp | 14 ++ kernel/kernel/Timer/HPET.cpp | 2 + kernel/kernel/Timer/PIT.cpp | 2 + kernel/kernel/Timer/Timer.cpp | 99 +++++++++++++- kernel/kernel/kernel.cpp | 2 + userspace/libraries/LibC/time.cpp | 55 +++++++- userspace/libraries/LibC/unistd.cpp | 22 +++ .../LibELF/include/LibELF/AuxiliaryVector.h | 2 + 20 files changed, 448 insertions(+), 15 deletions(-) create mode 100644 kernel/include/kernel/API/SharedPage.h diff --git a/kernel/arch/i686/boot.S b/kernel/arch/i686/boot.S index ba9e0bac..40aae335 100644 --- a/kernel/arch/i686/boot.S +++ b/kernel/arch/i686/boot.S @@ -184,6 +184,13 @@ enable_sse: movl %eax, %cr4 ret +enable_tsc: + # allow userspace to use RDTSC + movl %cr4, %ecx + andl $0xFFFFFFFB, %ecx + movl %ecx, %cr4 + ret + initialize_paging: # enable PAE movl %cr4, %ecx @@ -226,6 +233,7 @@ gdt_flush: # do processor initialization call check_requirements call enable_sse + call enable_tsc call initialize_paging # load higher half stack pointer @@ -302,6 +310,7 @@ ap_protected_mode: movb $1, AP_V2P(ap_stack_loaded) leal V2P(enable_sse), %ecx; call *%ecx + leal V2P(enable_tsc), %ecx; call *%ecx leal V2P(initialize_paging), %ecx; call *%ecx # load boot gdt and enter long mode diff --git a/kernel/arch/x86_64/boot.S b/kernel/arch/x86_64/boot.S index 1af0f72b..49612f81 100644 --- a/kernel/arch/x86_64/boot.S +++ b/kernel/arch/x86_64/boot.S @@ -179,6 +179,13 @@ enable_sse: movl %eax, %cr4 ret +enable_tsc: + # allow userspace to use RDTSC + movl %cr4, %ecx + andl $0xFFFFFFFB, %ecx + movl %ecx, %cr4 + ret + initialize_paging: # enable PAE movl %cr4, %ecx @@ -215,6 +222,7 @@ _start: call check_requirements call enable_sse + call enable_tsc call initialize_paging # flush gdt and jump to 64 bit @@ -301,6 +309,7 @@ ap_protected_mode: movb $1, AP_V2P(ap_stack_loaded) leal V2P(enable_sse), %ecx; call *%ecx + leal V2P(enable_tsc), %ecx; call *%ecx leal V2P(initialize_paging), %ecx; call *%ecx # load boot gdt and enter long mode diff --git a/kernel/include/kernel/API/SharedPage.h b/kernel/include/kernel/API/SharedPage.h new file mode 100644 index 00000000..1685f445 --- /dev/null +++ b/kernel/include/kernel/API/SharedPage.h @@ -0,0 +1,37 @@ +#pragma once + +#include + +namespace Kernel::API +{ + + enum SharedPageFeature : uint32_t + { + SPF_GETTIME = 1 << 0, + }; + + struct SharedPage + { + uint8_t __sequence[0x100]; + + uint32_t features; + + struct + { + uint8_t shift; + uint64_t mult; + uint64_t realtime_seconds; + } gettime_shared; + + struct + { + struct + { + uint32_t seq; + uint64_t last_ns; + uint64_t last_tsc; + } gettime_local; + } cpus[]; + }; + +} diff --git a/kernel/include/kernel/CPUID.h b/kernel/include/kernel/CPUID.h index e4aef13a..9d0c7c4c 100644 --- a/kernel/include/kernel/CPUID.h +++ b/kernel/include/kernel/CPUID.h @@ -81,5 +81,6 @@ namespace CPUID bool has_pge(); bool has_pat(); bool has_1gib_pages(); + bool has_invariant_tsc(); } diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index f10fabc5..d619d0c0 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -228,6 +228,8 @@ namespace Kernel static Process& current() { return Thread::current().process(); } + vaddr_t shared_page_vaddr() const { return m_shared_page_vaddr; } + PageTable& page_table() { return m_page_table ? *m_page_table : PageTable::kernel(); } size_t proc_meminfo(off_t offset, BAN::ByteSpan) const; @@ -342,6 +344,8 @@ namespace Kernel VirtualFileSystem::File m_working_directory; VirtualFileSystem::File m_root_file; + vaddr_t m_shared_page_vaddr { 0 }; + BAN::Vector m_threads; struct pthread_info_t diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h index c63b34ec..d7e7fbc9 100644 --- a/kernel/include/kernel/Processor.h +++ b/kernel/include/kernel/Processor.h @@ -3,10 +3,12 @@ #include #include +#include #include #include #include #include +#include #include #include @@ -33,6 +35,7 @@ namespace Kernel FlushTLB, NewThread, UnblockThread, + UpdateTSC, StackTrace, }; SMPMessage* next { nullptr }; @@ -55,6 +58,7 @@ namespace Kernel static Processor& initialize(); static ProcessorID current_id() { return read_gs_sized(offsetof(Processor, m_id)); } + static uint8_t current_index() { return read_gs_sized(offsetof(Processor, m_index)); } static ProcessorID id_from_index(size_t index); static uint8_t count() { return s_processor_count; } @@ -107,6 +111,13 @@ namespace Kernel static void yield(); static Scheduler& scheduler() { return *read_gs_sized(offsetof(Processor, m_scheduler)); } + static void initialize_tsc(uint8_t shift, uint64_t mult, uint64_t realtime_seconds); + static void update_tsc(); + static uint64_t ns_since_boot_tsc(); + + static paddr_t shared_page_paddr() { return s_shared_page_paddr; } + static volatile API::SharedPage& shared_page() { return *reinterpret_cast(s_shared_page_vaddr); } + static void handle_ipi(); static void handle_smp_messages(); @@ -124,6 +135,7 @@ namespace Kernel static ProcessorID read_processor_id(); static void initialize_smp(); + static void initialize_shared_page(); template static T read_gs_sized(uintptr_t offset) requires(sizeof(T) <= 8) @@ -162,8 +174,11 @@ namespace Kernel static BAN::Atomic s_processor_count; static BAN::Atomic s_is_smp_enabled; static BAN::Atomic s_should_print_cpu_load; + static paddr_t s_shared_page_paddr; + static vaddr_t s_shared_page_vaddr; ProcessorID m_id { 0 }; + uint8_t m_index { 0xFF }; static constexpr size_t s_stack_size { 4096 }; void* m_stack { nullptr }; diff --git a/kernel/include/kernel/Thread.h b/kernel/include/kernel/Thread.h index 8833a8ab..454c6345 100644 --- a/kernel/include/kernel/Thread.h +++ b/kernel/include/kernel/Thread.h @@ -122,6 +122,8 @@ namespace Kernel void set_cpu_time_start(); void set_cpu_time_stop(); + void update_processor_index_address(); + void set_fsbase(vaddr_t base) { m_fsbase = base; } vaddr_t get_fsbase() const { return m_fsbase; } void set_gsbase(vaddr_t base) { m_gsbase = base; } diff --git a/kernel/include/kernel/Timer/Timer.h b/kernel/include/kernel/Timer/Timer.h index f30523b6..6ef5aaef 100644 --- a/kernel/include/kernel/Timer/Timer.h +++ b/kernel/include/kernel/Timer/Timer.h @@ -35,6 +35,8 @@ namespace Kernel static SystemTimer& get(); static bool is_initialized(); + void initialize_tsc(); + virtual uint64_t ms_since_boot() const override; virtual uint64_t ns_since_boot() const override; virtual timespec time_since_boot() const override; @@ -47,6 +49,9 @@ namespace Kernel void dont_invoke_scheduler() { m_timer->m_should_invoke_scheduler = false; } + void update_tsc() const; + uint64_t ns_since_boot_no_tsc() const; + timespec real_time() const; private: @@ -54,10 +59,14 @@ namespace Kernel void initialize_timers(bool force_pic); + uint64_t get_tsc_frequency() const; + private: uint64_t m_boot_time { 0 }; BAN::UniqPtr m_rtc; BAN::UniqPtr m_timer; + bool m_has_invariant_tsc { false }; + mutable uint32_t m_timer_ticks { 0 }; }; } diff --git a/kernel/kernel/CPUID.cpp b/kernel/kernel/CPUID.cpp index e1ed1270..fb01f9a2 100644 --- a/kernel/kernel/CPUID.cpp +++ b/kernel/kernel/CPUID.cpp @@ -75,6 +75,16 @@ namespace CPUID return buffer[3] & (1 << 26); } + bool has_invariant_tsc() + { + uint32_t buffer[4] {}; + get_cpuid(0x80000000, buffer); + if (buffer[0] < 0x80000007) + return false; + get_cpuid(0x80000007, buffer); + return buffer[3] & (1 << 8); + } + const char* feature_string_ecx(uint32_t feat) { switch (feat) diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index 077bac02..87df969d 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -152,6 +152,20 @@ namespace Kernel })); } + process->m_shared_page_vaddr = process->page_table().reserve_free_page(process->m_mapped_regions.back()->vaddr(), USERSPACE_END); + if (process->m_shared_page_vaddr == 0) + return BAN::Error::from_errno(ENOMEM); + process->page_table().map_page_at( + Processor::shared_page_paddr(), + process->m_shared_page_vaddr, + PageTable::UserSupervisor | PageTable::Present + ); + + TRY(auxiliary_vector.push_back({ + .a_type = LibELF::AT_SHARED_PAGE, + .a_un = { .a_ptr = reinterpret_cast(process->m_shared_page_vaddr) }, + })); + TRY(auxiliary_vector.push_back({ .a_type = LibELF::AT_NULL, .a_un = { .a_val = 0 }, @@ -683,6 +697,13 @@ namespace Kernel for (auto& mapped_region : m_mapped_regions) MUST(mapped_regions.push_back(TRY(mapped_region->clone(*page_table)))); + const vaddr_t shared_page_vaddr = m_shared_page_vaddr; + page_table->map_page_at( + Processor::shared_page_paddr(), + shared_page_vaddr, + PageTable::UserSupervisor | PageTable::Present + ); + Process* forked = create_process(m_credentials, m_pid, m_sid, m_pgrp); forked->m_controlling_terminal = m_controlling_terminal; forked->m_working_directory = BAN::move(working_directory); @@ -691,6 +712,7 @@ namespace Kernel forked->m_environ = BAN::move(environ); forked->m_executable = BAN::move(executable); forked->m_page_table = BAN::move(page_table); + forked->m_shared_page_vaddr = BAN::move(shared_page_vaddr); forked->m_open_file_descriptors = BAN::move(*open_file_descriptors); forked->m_mapped_regions = BAN::move(mapped_regions); forked->m_has_called_exec = false; @@ -766,6 +788,20 @@ namespace Kernel })); } + const vaddr_t shared_page_vaddr = new_page_table->reserve_free_page(new_mapped_regions.back()->vaddr(), USERSPACE_END); + if (shared_page_vaddr == 0) + return BAN::Error::from_errno(ENOMEM); + new_page_table->map_page_at( + Processor::shared_page_paddr(), + shared_page_vaddr, + PageTable::UserSupervisor | PageTable::Present + ); + + TRY(auxiliary_vector.push_back({ + .a_type = LibELF::AT_SHARED_PAGE, + .a_un = { .a_ptr = reinterpret_cast(shared_page_vaddr) }, + })); + TRY(auxiliary_vector.push_back({ .a_type = LibELF::AT_NULL, .a_un = { .a_val = 0 }, @@ -837,6 +873,9 @@ namespace Kernel m_mapped_regions = BAN::move(new_mapped_regions); m_page_table = BAN::move(new_page_table); + m_shared_page_vaddr = shared_page_vaddr; + m_threads.front()->update_processor_index_address(); + execfd_guard.disable(); m_cmdline = BAN::move(str_argv); diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp index 3a83571c..db6f31ac 100644 --- a/kernel/kernel/Processor.cpp +++ b/kernel/kernel/Processor.cpp @@ -15,10 +15,12 @@ namespace Kernel static constexpr uint32_t MSR_IA32_KERNEL_GS_BASE = 0xC0000102; #endif - ProcessorID Processor::s_bsp_id { PROCESSOR_NONE }; - BAN::Atomic Processor::s_processor_count { 0 }; - BAN::Atomic Processor::s_is_smp_enabled { false }; - BAN::Atomic Processor::s_should_print_cpu_load { false }; + ProcessorID Processor::s_bsp_id { PROCESSOR_NONE }; + BAN::Atomic Processor::s_processor_count { 0 }; + BAN::Atomic Processor::s_is_smp_enabled { false }; + BAN::Atomic Processor::s_should_print_cpu_load { false }; + paddr_t Processor::s_shared_page_paddr { 0 }; + vaddr_t Processor::s_shared_page_vaddr { 0 }; static BAN::Atomic s_processors_created { 0 }; @@ -128,6 +130,33 @@ namespace Kernel processor.m_smp_free = smp_storage; } + void Processor::initialize_shared_page() + { + [[maybe_unused]] constexpr size_t max_processors = (PAGE_SIZE - sizeof(API::SharedPage)) / sizeof(decltype(*API::SharedPage::cpus)); + ASSERT(s_processors_created < max_processors); + + s_shared_page_paddr = Heap::get().take_free_page(); + ASSERT(s_shared_page_paddr); + + s_shared_page_vaddr = PageTable::kernel().reserve_free_page(KERNEL_OFFSET); + ASSERT(s_shared_page_vaddr); + + PageTable::kernel().map_page_at( + s_shared_page_paddr, + s_shared_page_vaddr, + PageTable::ReadWrite | PageTable::Present + ); + + memset(reinterpret_cast(s_shared_page_vaddr), 0, PAGE_SIZE); + + auto& shared_page = *reinterpret_cast(s_shared_page_vaddr); + for (size_t i = 0; i <= 0xFF; i++) + shared_page.__sequence[i] = i; + shared_page.features = 0; + + ASSERT(Processor::count() + sizeof(Kernel::API::SharedPage) <= PAGE_SIZE); + } + ProcessorID Processor::id_from_index(size_t index) { ASSERT(index < s_processor_count); @@ -142,8 +171,11 @@ namespace Kernel // wait until bsp is ready if (current_is_bsp()) { + initialize_shared_page(); + s_processor_count = 1; s_processor_ids[0] = current_id(); + s_processors[current_id().as_u32()].m_index = 0; // single processor system if (s_processors_created == 1) @@ -167,9 +199,10 @@ namespace Kernel while (s_processor_count == 0) __builtin_ia32_pause(); - auto lookup_index = s_processor_count++; - ASSERT(s_processor_ids[lookup_index] == PROCESSOR_NONE); - s_processor_ids[lookup_index] = current_id(); + const auto index = s_processor_count++; + ASSERT(s_processor_ids[index] == PROCESSOR_NONE); + s_processor_ids[index] = current_id(); + s_processors[current_id().as_u32()].m_index = index; uint32_t expected = static_cast(-1); s_first_ap_ready_ms.compare_exchange(expected, SystemTimer::get().ms_since_boot()); @@ -191,6 +224,76 @@ namespace Kernel } } + void Processor::initialize_tsc(uint8_t shift, uint64_t mult, uint64_t realtime_seconds) + { + auto& shared_page = Processor::shared_page(); + + shared_page.gettime_shared.shift = shift; + shared_page.gettime_shared.mult = mult; + shared_page.gettime_shared.realtime_seconds = realtime_seconds; + + update_tsc(); + + broadcast_smp_message({ + .type = SMPMessage::Type::UpdateTSC, + .dummy = 0, + }); + + bool everyone_initialized { false }; + while (!everyone_initialized) + { + everyone_initialized = true; + for (size_t i = 0; i < count(); i++) + { + if (shared_page.cpus[i].gettime_local.seq != 0) + continue; + everyone_initialized = false; + break; + } + } + + shared_page.features |= API::SPF_GETTIME; + } + + void Processor::update_tsc() + { + const auto read_tsc = + []() -> uint64_t { + uint32_t high, low; + asm volatile("lfence; rdtsc" : "=d"(high), "=a"(low)); + return (static_cast(high) << 32) | low; + }; + + auto& sgettime = shared_page().cpus[current_index()].gettime_local; + sgettime.seq = sgettime.seq + 1; + sgettime.last_ns = SystemTimer::get().ns_since_boot_no_tsc(); + sgettime.last_tsc = read_tsc(); + sgettime.seq = sgettime.seq + 1; + } + + uint64_t Processor::ns_since_boot_tsc() + { + const auto read_tsc = + []() -> uint64_t { + uint32_t high, low; + asm volatile("lfence; rdtsc" : "=d"(high), "=a"(low)); + return (static_cast(high) << 32) | low; + }; + + const auto& shared_page = Processor::shared_page(); + const auto& sgettime = shared_page.gettime_shared; + const auto& lgettime = shared_page.cpus[current_index()].gettime_local; + + auto state = get_interrupt_state(); + set_interrupt_state(InterruptState::Disabled); + + const auto current_ns = lgettime.last_ns + (((read_tsc() - lgettime.last_tsc) * sgettime.mult) >> sgettime.shift); + + set_interrupt_state(state); + + return current_ns; + } + void Processor::handle_ipi() { handle_smp_messages(); @@ -240,6 +343,9 @@ namespace Kernel case SMPMessage::Type::UnblockThread: processor.m_scheduler->unblock_thread(message->unblock_thread); break; + case SMPMessage::Type::UpdateTSC: + update_tsc(); + break; #if WITH_PROFILING case SMPMessage::Type::StartProfiling: processor.start_profiling(); @@ -375,13 +481,14 @@ namespace Kernel if (!is_smp_enabled()) return; - auto state = get_interrupt_state(); + const auto state = get_interrupt_state(); set_interrupt_state(InterruptState::Disabled); + const auto current_id = Processor::current_id(); for (size_t i = 0; i < Processor::count(); i++) { - auto processor_id = s_processor_ids[i]; - if (processor_id != current_id()) + const auto processor_id = s_processor_ids[i]; + if (processor_id != current_id) send_smp_message(processor_id, message, false); } diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index d44b1483..f2e3a98b 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -387,6 +387,9 @@ namespace Kernel else m_block_queue.add_thread_with_wake_time(node); + if (auto* thread = node->thread; thread->is_userspace() && thread->has_process()) + thread->update_processor_index_address(); + m_thread_count++; Processor::set_interrupt_state(state); diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index da435630..848b6e7d 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -295,6 +295,20 @@ namespace Kernel m_cpu_time_start_ns = UINT64_MAX; } + void Thread::update_processor_index_address() + { + if (!is_userspace() || !has_process()) + return; + + const vaddr_t vaddr = process().shared_page_vaddr() + Processor::current_index(); + +#if ARCH(x86_64) + set_gsbase(vaddr); +#elif ARCH(i686) + set_fsbase(vaddr); +#endif + } + BAN::ErrorOr Thread::pthread_create(entry_t entry, void* arg) { auto* thread = TRY(create_userspace(m_process, m_process->page_table())); diff --git a/kernel/kernel/Timer/HPET.cpp b/kernel/kernel/Timer/HPET.cpp index e4b9cd1c..133a1e20 100644 --- a/kernel/kernel/Timer/HPET.cpp +++ b/kernel/kernel/Timer/HPET.cpp @@ -272,6 +272,8 @@ namespace Kernel m_last_ticks = current_ticks; } + SystemTimer::get().update_tsc(); + if (should_invoke_scheduler()) Processor::scheduler().timer_interrupt(); } diff --git a/kernel/kernel/Timer/PIT.cpp b/kernel/kernel/Timer/PIT.cpp index c498f0f2..7447bca6 100644 --- a/kernel/kernel/Timer/PIT.cpp +++ b/kernel/kernel/Timer/PIT.cpp @@ -58,6 +58,8 @@ namespace Kernel m_system_time_ms++; } + SystemTimer::get().update_tsc(); + if (should_invoke_scheduler()) Processor::scheduler().timer_interrupt(); } diff --git a/kernel/kernel/Timer/Timer.cpp b/kernel/kernel/Timer/Timer.cpp index e9929ff2..026017a8 100644 --- a/kernel/kernel/Timer/Timer.cpp +++ b/kernel/kernel/Timer/Timer.cpp @@ -1,3 +1,6 @@ +#include + +#include #include #include #include @@ -54,19 +57,107 @@ namespace Kernel Kernel::panic("Could not initialize any timer"); } - uint64_t SystemTimer::ms_since_boot() const + void SystemTimer::initialize_tsc() { - return m_timer->ms_since_boot(); + if (!CPUID::has_invariant_tsc()) + { + dwarnln("CPU does not have an invariant TSC"); + return; + } + + const uint64_t tsc_freq = get_tsc_frequency(); + + dprintln("Initialized invariant TSC ({} Hz)", tsc_freq); + + const uint8_t tsc_shift = 22; + const uint64_t tsc_mult = (static_cast(1'000'000'000) << tsc_shift) / tsc_freq; + Processor::initialize_tsc(tsc_shift, tsc_mult, m_boot_time); + + m_has_invariant_tsc = true; } - uint64_t SystemTimer::ns_since_boot() const + uint64_t SystemTimer::get_tsc_frequency() const + { + // take 5x 50 ms samples and use the median value + + const auto read_tsc = + []() -> uint64_t { + uint32_t high, low; + asm volatile("lfence; rdtsc" : "=d"(high), "=a"(low)); + return (static_cast(high) << 32) | low; + }; + + constexpr size_t tsc_sample_count = 5; + constexpr size_t tsc_sample_ns = 50'000'000; + + uint64_t tsc_freq_samples[tsc_sample_count]; + for (size_t i = 0; i < tsc_sample_count; i++) + { + const auto start_ns = m_timer->ns_since_boot(); + + const auto start_tsc = read_tsc(); + while (m_timer->ns_since_boot() < start_ns + tsc_sample_ns) + Processor::pause(); + const auto stop_tsc = read_tsc(); + + const auto stop_ns = m_timer->ns_since_boot(); + + const auto duration_ns = stop_ns - start_ns; + const auto count_tsc = stop_tsc - start_tsc; + + tsc_freq_samples[i] = count_tsc * 1'000'000'000 / duration_ns; + } + + BAN::sort::sort(tsc_freq_samples, tsc_freq_samples + tsc_sample_count); + + return tsc_freq_samples[tsc_sample_count / 2]; + } + + void SystemTimer::update_tsc() const + { + if (!m_has_invariant_tsc) + return; + + // only update every 100 ms + if (++m_timer_ticks < 100) + return; + m_timer_ticks = 0; + + Processor::update_tsc(); + Processor::broadcast_smp_message({ + .type = Processor::SMPMessage::Type::UpdateTSC, + .dummy = 0, + }); + } + + uint64_t SystemTimer::ns_since_boot_no_tsc() const { return m_timer->ns_since_boot(); } + uint64_t SystemTimer::ms_since_boot() const + { + if (!m_has_invariant_tsc) + return m_timer->ms_since_boot(); + return Processor::ns_since_boot_tsc() / 1'000'000; + } + + uint64_t SystemTimer::ns_since_boot() const + { + if (!m_has_invariant_tsc) + return m_timer->ns_since_boot(); + return Processor::ns_since_boot_tsc(); + } + timespec SystemTimer::time_since_boot() const { - return m_timer->time_since_boot(); + if (!m_has_invariant_tsc) + return m_timer->time_since_boot(); + const auto ns_since_boot = Processor::ns_since_boot_tsc(); + return { + .tv_sec = static_cast(ns_since_boot / 1'000'000'000), + .tv_nsec = static_cast(ns_since_boot % 1'000'000'000) + }; } bool SystemTimer::pre_scheduler_sleep_needs_lock() const diff --git a/kernel/kernel/kernel.cpp b/kernel/kernel/kernel.cpp index eccbcfa9..6b005aac 100644 --- a/kernel/kernel/kernel.cpp +++ b/kernel/kernel/kernel.cpp @@ -208,6 +208,8 @@ static void init2(void*) dprintln("Scheduler started"); + SystemTimer::get().initialize_tsc(); + auto console = MUST(DevFileSystem::get().root_inode()->find_inode(cmdline.console)); ASSERT(console->is_tty()); static_cast(console.ptr())->set_as_current(); diff --git a/userspace/libraries/LibC/time.cpp b/userspace/libraries/LibC/time.cpp index a7f13463..4b84b699 100644 --- a/userspace/libraries/LibC/time.cpp +++ b/userspace/libraries/LibC/time.cpp @@ -2,6 +2,8 @@ #include #include +#include + #include #include #include @@ -15,9 +17,60 @@ int daylight; long timezone; char* tzname[2]; +extern volatile Kernel::API::SharedPage* g_shared_page; + int clock_gettime(clockid_t clock_id, struct timespec* tp) { - return syscall(SYS_CLOCK_GETTIME, clock_id, tp); + if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_REALTIME) + return syscall(SYS_CLOCK_GETTIME, clock_id, tp); + + if (g_shared_page == nullptr || !(g_shared_page->features & Kernel::API::SPF_GETTIME)) + return syscall(SYS_CLOCK_GETTIME, clock_id, tp); + + const auto get_cpu = + []() -> uint8_t { + uint8_t cpu; +#if defined(__x86_64__) + asm volatile("movb %%gs:0, %0" : "=r"(cpu)); +#elif defined(__i686__) + asm volatile("movb %%fs:0, %0" : "=q"(cpu)); +#endif + return cpu; + }; + + const auto read_tsc = + []() -> uint64_t { + uint32_t high, low; + asm volatile("lfence; rdtsc" : "=d"(high), "=a"(low)); + return (static_cast(high) << 32) | low; + }; + + for (;;) + { + const auto cpu = get_cpu(); + + const auto& sgettime = g_shared_page->gettime_shared; + const auto& lgettime = g_shared_page->cpus[cpu].gettime_local; + + const auto old_seq = lgettime.seq; + if (old_seq & 1) + continue; + + const auto monotonic_ns = lgettime.last_ns + (((read_tsc() - lgettime.last_tsc) * sgettime.mult) >> sgettime.shift); + + if (old_seq != lgettime.seq || cpu != get_cpu()) + continue; + + *tp = { + .tv_sec = static_cast(monotonic_ns / 1'000'000'000), + .tv_nsec = static_cast(monotonic_ns % 1'000'000'000) + }; + + if (clock_id == CLOCK_REALTIME) + tp->tv_sec += sgettime.realtime_seconds; + + return monotonic_ns; + } } int clock_getres(clockid_t clock_id, struct timespec* res) diff --git a/userspace/libraries/LibC/unistd.cpp b/userspace/libraries/LibC/unistd.cpp index 1ea5f819..968adf3b 100644 --- a/userspace/libraries/LibC/unistd.cpp +++ b/userspace/libraries/LibC/unistd.cpp @@ -2,6 +2,9 @@ #include #include +#include + +#include #include #include @@ -31,6 +34,8 @@ struct init_funcs_t extern "C" char** environ; +volatile Kernel::API::SharedPage* g_shared_page = nullptr; + #define DUMP_BACKTRACE 1 #define DEMANGLE_BACKTRACE 0 @@ -40,11 +45,28 @@ extern "C" char** environ; static void __dump_backtrace(int, siginfo_t*, void*); +static LibELF::AuxiliaryVector* find_auxv(char** envp) +{ + if (envp == nullptr) + return nullptr; + + char** null_env = envp; + while (*null_env) + null_env++; + + return reinterpret_cast(null_env + 1); +} + extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t fini_funcs) { if (::environ == nullptr) ::environ = environ; + if (auto* auxv = find_auxv(environ)) + for (auto* aux = auxv; aux->a_type != LibELF::AT_NULL; aux++) + if (aux->a_type == LibELF::AT_SHARED_PAGE) + g_shared_page = static_cast(aux->a_un.a_ptr); + #if defined(__x86_64__) if (uthread* self = reinterpret_cast(syscall(SYS_GET_FSBASE))) #elif defined(__i686__) diff --git a/userspace/libraries/LibELF/include/LibELF/AuxiliaryVector.h b/userspace/libraries/LibELF/include/LibELF/AuxiliaryVector.h index e133a102..0694fe8b 100644 --- a/userspace/libraries/LibELF/include/LibELF/AuxiliaryVector.h +++ b/userspace/libraries/LibELF/include/LibELF/AuxiliaryVector.h @@ -23,6 +23,8 @@ namespace LibELF AT_PHDR = 3, AT_PHENT = 4, AT_PHNUM = 5, + + AT_SHARED_PAGE = 0xFFFF0001, }; }