LibC/DynamicLoader: Add support for dynamically loaded TLS

Previously I failed to dlopen if any of the objects contained TLS
section
This commit is contained in:
Bananymous 2026-03-17 20:01:51 +02:00
parent 05b2424fca
commit 1d07d8e08e
5 changed files with 193 additions and 70 deletions

View File

@ -379,7 +379,7 @@ namespace Kernel
const auto [master_addr, master_size] = master_tls; const auto [master_addr, master_size] = master_tls;
ASSERT(master_size % alignof(uthread) == 0); ASSERT(master_size % alignof(uthread) == 0);
const size_t tls_size = master_size + PAGE_SIZE; const size_t tls_size = master_size + sizeof(uthread);
auto region = TRY(MemoryBackedRegion::create( auto region = TRY(MemoryBackedRegion::create(
page_table, page_table,
@ -408,28 +408,26 @@ namespace Kernel
bytes_copied += to_copy; bytes_copied += to_copy;
} }
const uthread uthread { auto uthread = TRY(BAN::UniqPtr<struct uthread>::create());
*uthread = {
.self = reinterpret_cast<struct uthread*>(region->vaddr() + master_size), .self = reinterpret_cast<struct uthread*>(region->vaddr() + master_size),
.master_tls_addr = reinterpret_cast<void*>(master_addr), .master_tls_addr = reinterpret_cast<void*>(master_addr),
.master_tls_size = master_size, .master_tls_size = master_size,
.master_tls_module_count = 1,
.dynamic_tls = nullptr,
.cleanup_stack = nullptr, .cleanup_stack = nullptr,
.id = 0, .id = 0,
.errno_ = 0, .errno_ = 0,
.cancel_type = 0, .cancel_type = 0,
.cancel_state = 0, .cancel_state = 0,
.canceled = 0, .canceled = 0,
.dtv = { 0, region->vaddr() }
}; };
const uintptr_t dtv[2] { 1, region->vaddr() };
TRY(region->copy_data_to_region( TRY(region->copy_data_to_region(
master_size, master_size,
reinterpret_cast<const uint8_t*>(&uthread), reinterpret_cast<const uint8_t*>(uthread.ptr()),
sizeof(uthread) sizeof(struct uthread)
));
TRY(region->copy_data_to_region(
master_size + sizeof(uthread),
reinterpret_cast<const uint8_t*>(&dtv),
sizeof(dtv)
)); ));
TLSResult result; TLSResult result;

View File

@ -17,25 +17,41 @@ __BEGIN_DECLS
#include <bits/types/pthread_types.h> #include <bits/types/pthread_types.h>
struct _pthread_cleanup_t typedef struct _pthread_cleanup_t
{ {
void (*routine)(void*); void (*routine)(void*);
void* arg; void* arg;
struct _pthread_cleanup_t* next; struct _pthread_cleanup_t* next;
}; } _pthread_cleanup_t;
typedef struct _dynamic_tls_entry_t
{
void* master_addr;
size_t master_size;
} _dynamic_tls_entry_t;
typedef struct _dynamic_tls_t
{
int lock;
size_t entry_count;
_dynamic_tls_entry_t* entries;
} _dynamic_tls_t;
struct uthread struct uthread
{ {
struct uthread* self; struct uthread* self;
void* master_tls_addr; void* master_tls_addr;
size_t master_tls_size; size_t master_tls_size;
struct _pthread_cleanup_t* cleanup_stack; size_t master_tls_module_count;
_dynamic_tls_t* dynamic_tls;
_pthread_cleanup_t* cleanup_stack;
pthread_t id; pthread_t id;
int errno_; int errno_;
int cancel_type; int cancel_type;
int cancel_state; int cancel_state;
int canceled; int canceled;
uintptr_t dtv[]; // FIXME: make this dynamic
uintptr_t dtv[1 + 128];
}; };
#define PTHREAD_CANCELED (void*)1 #define PTHREAD_CANCELED (void*)1

View File

@ -63,13 +63,35 @@ extern "C" void _pthread_trampoline_cpp(void* arg)
static void free_uthread(uthread* uthread) static void free_uthread(uthread* uthread)
{ {
if (uthread->dtv[0] == 0) const auto lock_dynamic_tls =
return free(uthread); [uthread] {
int expected = 0;
while (BAN::atomic_compare_exchange(uthread->dynamic_tls->lock, expected, 1))
{
sched_yield();
expected = 0;
}
};
const auto unlock_dynamic_tls =
[uthread] {
BAN::atomic_store(uthread->dynamic_tls->lock, 0);
};
for (size_t i = uthread->master_tls_module_count; i < uthread->dtv[0]; i++)
{
if (uthread->dtv[i] == 0)
continue;
lock_dynamic_tls();
const size_t size = uthread->dynamic_tls->entries[i].master_size;
unlock_dynamic_tls();
munmap(reinterpret_cast<void*>(uthread->dtv[i]), size);
}
uint8_t* tls_addr = reinterpret_cast<uint8_t*>(uthread) - uthread->master_tls_size; uint8_t* tls_addr = reinterpret_cast<uint8_t*>(uthread) - uthread->master_tls_size;
const size_t tls_size = uthread->master_tls_size const size_t tls_size = uthread->master_tls_size + sizeof(struct uthread);
+ sizeof(struct uthread)
+ (uthread->dtv[0] + 1) * sizeof(uintptr_t);
munmap(tls_addr, tls_size); munmap(tls_addr, tls_size);
} }
@ -358,58 +380,37 @@ int pthread_create(pthread_t* __restrict thread_id, const pthread_attr_t* __rest
long syscall_ret = 0; long syscall_ret = 0;
if (uthread* self = _get_uthread(); self->master_tls_addr == nullptr)
{ {
uthread* uthread = static_cast<struct uthread*>(malloc(sizeof(struct uthread) + sizeof(uintptr_t))); uthread* self = _get_uthread();
if (uthread == nullptr)
goto pthread_create_error;
*uthread = { const size_t tls_size = self->master_tls_size + sizeof(uthread);
.self = uthread,
.master_tls_addr = nullptr,
.master_tls_size = 0,
.cleanup_stack = nullptr,
.id = -1,
.errno_ = 0,
.cancel_type = PTHREAD_CANCEL_DEFERRED,
.cancel_state = PTHREAD_CANCEL_ENABLE,
.canceled = false,
};
uthread->dtv[0] = 0;
info->uthread = uthread;
}
else
{
const size_t module_count = self->dtv[0];
const size_t tls_size = self->master_tls_size
+ sizeof(uthread)
+ (module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr = static_cast<uint8_t*>(mmap(nullptr, tls_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); uint8_t* tls_addr = static_cast<uint8_t*>(mmap(nullptr, tls_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (tls_addr == MAP_FAILED) if (tls_addr == MAP_FAILED)
goto pthread_create_error; goto pthread_create_error;
memcpy(tls_addr, self->master_tls_addr, self->master_tls_size);
if (self->master_tls_addr)
memcpy(tls_addr, self->master_tls_addr, self->master_tls_size);
uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + self->master_tls_size); uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + self->master_tls_size);
*uthread = { *uthread = {
.self = uthread, .self = uthread,
.master_tls_addr = self->master_tls_addr, .master_tls_addr = self->master_tls_addr,
.master_tls_size = self->master_tls_size, .master_tls_size = self->master_tls_size,
.master_tls_module_count = self->master_tls_module_count,
.dynamic_tls = self->dynamic_tls,
.cleanup_stack = nullptr, .cleanup_stack = nullptr,
.id = -1, .id = -1,
.errno_ = 0, .errno_ = 0,
.cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_type = PTHREAD_CANCEL_DEFERRED,
.cancel_state = PTHREAD_CANCEL_ENABLE, .cancel_state = PTHREAD_CANCEL_ENABLE,
.canceled = 0, .canceled = 0,
.dtv = { self->dtv[0] }
}; };
const uintptr_t self_addr = reinterpret_cast<uintptr_t>(self); const uintptr_t self_addr = reinterpret_cast<uintptr_t>(self);
const uintptr_t uthread_addr = reinterpret_cast<uintptr_t>(uthread); const uintptr_t uthread_addr = reinterpret_cast<uintptr_t>(uthread);
for (size_t i = 1; i <= self->master_tls_module_count; i++)
uthread->dtv[0] = module_count;
for (size_t i = 1; i <= module_count; i++)
uthread->dtv[i] = self->dtv[i] - self_addr + uthread_addr; uthread->dtv[i] = self->dtv[i] - self_addr + uthread_addr;
info->uthread = uthread; info->uthread = uthread;
@ -1276,6 +1277,36 @@ int pthread_barrier_wait(pthread_barrier_t* barrier)
return 0; return 0;
} }
static void load_dynamic_tls_module(size_t module)
{
auto* uthread = _get_uthread();
ASSERT(uthread->dynamic_tls);
ASSERT(module > uthread->master_tls_module_count);
const _dynamic_tls_entry_t entry = ({
int expected = 0;
while (BAN::atomic_compare_exchange(uthread->dynamic_tls->lock, expected, 1))
{
sched_yield();
expected = 0;
}
ASSERT(module <= uthread->master_tls_module_count + uthread->dynamic_tls->entry_count);
auto result = uthread->dynamic_tls->entries[module - uthread->master_tls_module_count - 1];
BAN::atomic_store(uthread->dynamic_tls->lock, 0);
result;
});
void* dtv_data = mmap(nullptr, entry.master_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT(dtv_data != MAP_FAILED);
memcpy(dtv_data, entry.master_addr, entry.master_size);
uthread->dtv[module] = reinterpret_cast<uintptr_t>(dtv_data);
}
struct tls_index struct tls_index
{ {
unsigned long int ti_module; unsigned long int ti_module;
@ -1284,12 +1315,18 @@ struct tls_index
extern "C" void* __tls_get_addr(tls_index* ti) extern "C" void* __tls_get_addr(tls_index* ti)
{ {
return reinterpret_cast<void*>(_get_uthread()->dtv[ti->ti_module] + ti->ti_offset); auto* uthread = _get_uthread();
if (uthread->dtv[ti->ti_module] == 0) [[unlikely]]
load_dynamic_tls_module(ti->ti_module);
return reinterpret_cast<void*>(uthread->dtv[ti->ti_module] + ti->ti_offset);
} }
#if ARCH(i686) #if ARCH(i686)
extern "C" void* __attribute__((__regparm__(1))) ___tls_get_addr(tls_index* ti) extern "C" void* __attribute__((__regparm__(1))) ___tls_get_addr(tls_index* ti)
{ {
return reinterpret_cast<void*>(_get_uthread()->dtv[ti->ti_module] + ti->ti_offset); auto* uthread = _get_uthread();
if (uthread->dtv[ti->ti_module] == 0) [[unlikely]]
load_dynamic_tls_module(ti->ti_module);
return reinterpret_cast<void*>(uthread->dtv[ti->ti_module] + ti->ti_offset);
} }
#endif #endif

View File

@ -84,21 +84,23 @@ extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t
} }
else else
{ {
alignas(uthread) static uint8_t storage[sizeof(uthread) + sizeof(uintptr_t)]; alignas(uthread) static uint8_t storage[sizeof(uthread)];
uthread& uthread = *reinterpret_cast<struct uthread*>(storage); uthread& uthread = *reinterpret_cast<struct uthread*>(storage);
uthread = { uthread = {
.self = &uthread, .self = &uthread,
.master_tls_addr = nullptr, .master_tls_addr = nullptr,
.master_tls_size = 0, .master_tls_size = 0,
.master_tls_module_count = 1,
.dynamic_tls = nullptr,
.cleanup_stack = nullptr, .cleanup_stack = nullptr,
.id = static_cast<pthread_t>(syscall(SYS_PTHREAD_SELF)), .id = static_cast<pthread_t>(syscall(SYS_PTHREAD_SELF)),
.errno_ = 0, .errno_ = 0,
.cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_type = PTHREAD_CANCEL_DEFERRED,
.cancel_state = PTHREAD_CANCEL_ENABLE, .cancel_state = PTHREAD_CANCEL_ENABLE,
.canceled = false, .canceled = false,
.dtv = { 0 },
}; };
uthread.dtv[0] = 0;
#if defined(__x86_64__) #if defined(__x86_64__)
syscall(SYS_SET_FSBASE, &uthread); syscall(SYS_SET_FSBASE, &uthread);

View File

@ -207,9 +207,13 @@ struct LoadedElf
const uint8_t* real_strtab_addr; const uint8_t* real_strtab_addr;
}; };
static LoadedElf s_loaded_files[128]; static constexpr size_t s_max_loaded_files = 128;
static LoadedElf s_loaded_files[s_max_loaded_files];
static size_t s_loaded_file_count = 0; static size_t s_loaded_file_count = 0;
static size_t s_tls_module = 1;
static _dynamic_tls_t* s_dynamic_tls = nullptr;
static const char* s_ld_library_path = nullptr; static const char* s_ld_library_path = nullptr;
static BAN::Atomic<pthread_t> s_global_locker = 0; static BAN::Atomic<pthread_t> s_global_locker = 0;
@ -1072,6 +1076,9 @@ static LoadedElf& load_elf(const char* path, int fd)
} }
} }
if (s_loaded_file_count == s_max_loaded_files)
print_error_and_exit("cannot load more dynamic libraries", 0);
if (fd == -1 && (fd = syscall(SYS_OPENAT, AT_FDCWD, path, O_RDONLY)) < 0) if (fd == -1 && (fd = syscall(SYS_OPENAT, AT_FDCWD, path, O_RDONLY)) < 0)
print_error_and_exit("could not open library", fd); print_error_and_exit("could not open library", fd);
@ -1277,7 +1284,7 @@ static MasterTLS initialize_master_tls()
master_tls_addr = reinterpret_cast<uint8_t*>(ret); master_tls_addr = reinterpret_cast<uint8_t*>(ret);
} }
for (size_t i = 0, tls_offset = 0, tls_module = 1; i < s_loaded_file_count; i++) for (size_t i = 0, tls_offset = 0; i < s_loaded_file_count; i++)
{ {
const auto& tls_header = s_loaded_files[i].tls_header; const auto& tls_header = s_loaded_files[i].tls_header;
if (tls_header.p_type != PT_TLS) if (tls_header.p_type != PT_TLS)
@ -1298,18 +1305,42 @@ static MasterTLS initialize_master_tls()
auto& elf = s_loaded_files[i]; auto& elf = s_loaded_files[i];
elf.tls_addr = tls_buffer; elf.tls_addr = tls_buffer;
elf.tls_module = tls_module++; elf.tls_module = s_tls_module++;
elf.tls_offset = tls_offset; elf.tls_offset = tls_offset;
} }
return { .addr = master_tls_addr, .size = master_tls_size, .module_count = module_count }; {
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = sizeof(_dynamic_tls_t) + s_max_loaded_files * sizeof(_dynamic_tls_entry_t),
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate dynamic TLS", ret);
s_dynamic_tls = reinterpret_cast<_dynamic_tls_t*>(ret);
*s_dynamic_tls = {
.lock = 0,
.entry_count = 0,
.entries = reinterpret_cast<_dynamic_tls_entry_t*>(s_dynamic_tls + 1),
};
}
return {
.addr = master_tls_addr,
.size = master_tls_size,
.module_count = module_count,
};
} }
static void initialize_tls(MasterTLS master_tls) static void initialize_tls(MasterTLS master_tls)
{ {
const size_t tls_size = master_tls.size const size_t tls_size = master_tls.size + sizeof(uthread);
+ sizeof(uthread)
+ (master_tls.module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr; uint8_t* tls_addr;
@ -1339,12 +1370,15 @@ static void initialize_tls(MasterTLS master_tls)
.self = &uthread, .self = &uthread,
.master_tls_addr = master_tls.addr, .master_tls_addr = master_tls.addr,
.master_tls_size = master_tls.size, .master_tls_size = master_tls.size,
.master_tls_module_count = master_tls.module_count,
.dynamic_tls = s_dynamic_tls,
.cleanup_stack = nullptr, .cleanup_stack = nullptr,
.id = static_cast<pthread_t>(syscall<>(SYS_PTHREAD_SELF)), .id = static_cast<pthread_t>(syscall<>(SYS_PTHREAD_SELF)),
.errno_ = 0, .errno_ = 0,
.cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_type = PTHREAD_CANCEL_DEFERRED,
.cancel_state = PTHREAD_CANCEL_ENABLE, .cancel_state = PTHREAD_CANCEL_ENABLE,
.canceled = false, .canceled = false,
.dtv = {},
}; };
uthread.dtv[0] = master_tls.module_count; uthread.dtv[0] = master_tls.module_count;
@ -1478,6 +1512,51 @@ static void register_fini_funcs(LoadedElf& elf, bool is_main_elf)
} }
} }
static void load_dynamic_tls(LoadedElf& elf)
{
if (elf.tls_header.p_type != PT_TLS)
return;
if (elf.tls_module != 0)
print_error_and_exit("TLS module already loaded??", 0);
elf.tls_module = s_tls_module++;
elf.tls_offset = 0;
{
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = elf.tls_header.p_memsz,
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate dynamic TLS", ret);
elf.tls_addr = reinterpret_cast<uint8_t*>(ret);
memset(elf.tls_addr + elf.tls_header.p_filesz, 0, elf.tls_header.p_memsz - elf.tls_header.p_filesz);
if (const auto ret = syscall(SYS_PREAD, elf.fd, elf.tls_addr, elf.tls_header.p_filesz, elf.tls_header.p_offset); ret < static_cast<long>(elf.tls_header.p_filesz))
print_error_and_exit("failed to read TLS data", 0);
}
int expected = 0;
while (!BAN::atomic_compare_exchange(s_dynamic_tls->lock, expected, 1))
{
syscall<>(SYS_YIELD);
expected = 0;
}
s_dynamic_tls->entries[s_dynamic_tls->entry_count++] = {
.master_addr = elf.tls_addr,
.master_size = elf.tls_header.p_memsz,
};
BAN::atomic_store(s_dynamic_tls->lock, 0);
}
int __dlclose(void* handle) int __dlclose(void* handle)
{ {
// TODO: maybe actually close handles? (not required by spec) // TODO: maybe actually close handles? (not required by spec)
@ -1521,17 +1600,8 @@ void* __dlopen(const char* file, int mode)
if (!elf.is_relocating && !elf.is_calling_init) if (!elf.is_relocating && !elf.is_calling_init)
{ {
for (size_t i = old_loaded_count; i < s_loaded_file_count; i++) for (size_t i = old_loaded_count; i < s_loaded_file_count; i++)
{ load_dynamic_tls(s_loaded_files[i]);
if (s_loaded_files[i].tls_header.p_type == PT_TLS)
{
s_dlerror_string = "TODO: __dlopen with TLS";
// FIXME: leaks loaded files :)
s_loaded_file_count = old_loaded_count;
return nullptr;
}
}
relocate_elf(elf, lazy); relocate_elf(elf, lazy);
call_init_funcs(elf, false); call_init_funcs(elf, false);