Kernel: Add syscall-less clock_gettime

If the processor has invariant TSC it can be used to measure time. We
keep track of the last nanosecond and TSC values and offset them based
on the current TSC. This allows getting current time in userspace.

The implementation maps a single RO page to every processes' address
space. The page contains the TSC info which gets updated every 100 ms.
If the processor does not have invariant TSC, this page will not
indicate the capability for TSC based timing.

There was the problem about how does a processor know which cpu it is
running without doing syscall. TSC counters may or may not be
synchronized between cores, so we need a separate TSC info for each
processor. I ended up adding sequence of bytes 0..255 at the start of
the shared page. When a scheduler gets a new thread, it updates the
threads gs/fs segment to point to the byte corresponding to the current
cpu.

This TSC based timing is also used in kernel. With 64 bit HPET this
probably does not bring much of a benefit, but on PIT or 32 bit HPET
this removes the need to aquire a spinlock to get the current time.

This change does force the userspace to not use gs/fs themselves and
they are both now reserved. Other one is used for TLS (this can be
technically used if user does not call libc code) and the other for
the current processor index (cannot be used as kernel unconditionally
resets it after each load balance).

I was looking at how many times timer's current time was polled
(userspace and kernel combined). When idling in window manager, it was
around 8k times/s. When running doom it peaked at over 1 million times
per second when loading and settled at ~30k times/s.
This commit is contained in:
2026-01-08 13:30:04 +02:00
parent ee57cf3e9a
commit 9eb3834ae5
20 changed files with 448 additions and 15 deletions

View File

@@ -2,6 +2,8 @@
#include <BAN/Debug.h>
#include <BAN/Math.h>
#include <kernel/API/SharedPage.h>
#include <ctype.h>
#include <errno.h>
#include <langinfo.h>
@@ -15,9 +17,60 @@ int daylight;
long timezone;
char* tzname[2];
extern volatile Kernel::API::SharedPage* g_shared_page;
int clock_gettime(clockid_t clock_id, struct timespec* tp)
{
return syscall(SYS_CLOCK_GETTIME, clock_id, tp);
if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_REALTIME)
return syscall(SYS_CLOCK_GETTIME, clock_id, tp);
if (g_shared_page == nullptr || !(g_shared_page->features & Kernel::API::SPF_GETTIME))
return syscall(SYS_CLOCK_GETTIME, clock_id, tp);
const auto get_cpu =
[]() -> uint8_t {
uint8_t cpu;
#if defined(__x86_64__)
asm volatile("movb %%gs:0, %0" : "=r"(cpu));
#elif defined(__i686__)
asm volatile("movb %%fs:0, %0" : "=q"(cpu));
#endif
return cpu;
};
const auto read_tsc =
[]() -> uint64_t {
uint32_t high, low;
asm volatile("lfence; rdtsc" : "=d"(high), "=a"(low));
return (static_cast<uint64_t>(high) << 32) | low;
};
for (;;)
{
const auto cpu = get_cpu();
const auto& sgettime = g_shared_page->gettime_shared;
const auto& lgettime = g_shared_page->cpus[cpu].gettime_local;
const auto old_seq = lgettime.seq;
if (old_seq & 1)
continue;
const auto monotonic_ns = lgettime.last_ns + (((read_tsc() - lgettime.last_tsc) * sgettime.mult) >> sgettime.shift);
if (old_seq != lgettime.seq || cpu != get_cpu())
continue;
*tp = {
.tv_sec = static_cast<time_t>(monotonic_ns / 1'000'000'000),
.tv_nsec = static_cast<long>(monotonic_ns % 1'000'000'000)
};
if (clock_id == CLOCK_REALTIME)
tp->tv_sec += sgettime.realtime_seconds;
return monotonic_ns;
}
}
int clock_getres(clockid_t clock_id, struct timespec* res)

View File

@@ -2,6 +2,9 @@
#include <BAN/Debug.h>
#include <BAN/StringView.h>
#include <LibELF/AuxiliaryVector.h>
#include <kernel/API/SharedPage.h>
#include <kernel/Memory/Types.h>
#include <kernel/Syscall.h>
@@ -31,6 +34,8 @@ struct init_funcs_t
extern "C" char** environ;
volatile Kernel::API::SharedPage* g_shared_page = nullptr;
#define DUMP_BACKTRACE 1
#define DEMANGLE_BACKTRACE 0
@@ -40,11 +45,28 @@ extern "C" char** environ;
static void __dump_backtrace(int, siginfo_t*, void*);
static LibELF::AuxiliaryVector* find_auxv(char** envp)
{
if (envp == nullptr)
return nullptr;
char** null_env = envp;
while (*null_env)
null_env++;
return reinterpret_cast<LibELF::AuxiliaryVector*>(null_env + 1);
}
extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t fini_funcs)
{
if (::environ == nullptr)
::environ = environ;
if (auto* auxv = find_auxv(environ))
for (auto* aux = auxv; aux->a_type != LibELF::AT_NULL; aux++)
if (aux->a_type == LibELF::AT_SHARED_PAGE)
g_shared_page = static_cast<Kernel::API::SharedPage*>(aux->a_un.a_ptr);
#if defined(__x86_64__)
if (uthread* self = reinterpret_cast<uthread*>(syscall(SYS_GET_FSBASE)))
#elif defined(__i686__)