diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index 0515c5f3..fa8df3ac 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -379,7 +379,7 @@ namespace Kernel const auto [master_addr, master_size] = master_tls; ASSERT(master_size % alignof(uthread) == 0); - const size_t tls_size = master_size + PAGE_SIZE; + const size_t tls_size = master_size + sizeof(uthread); auto region = TRY(MemoryBackedRegion::create( page_table, @@ -408,28 +408,26 @@ namespace Kernel bytes_copied += to_copy; } - const uthread uthread { + auto uthread = TRY(BAN::UniqPtr::create()); + *uthread = { .self = reinterpret_cast(region->vaddr() + master_size), .master_tls_addr = reinterpret_cast(master_addr), .master_tls_size = master_size, + .master_tls_module_count = 1, + .dynamic_tls = nullptr, .cleanup_stack = nullptr, .id = 0, .errno_ = 0, .cancel_type = 0, .cancel_state = 0, .canceled = 0, + .dtv = { 0, region->vaddr() } }; - const uintptr_t dtv[2] { 1, region->vaddr() }; TRY(region->copy_data_to_region( master_size, - reinterpret_cast(&uthread), - sizeof(uthread) - )); - TRY(region->copy_data_to_region( - master_size + sizeof(uthread), - reinterpret_cast(&dtv), - sizeof(dtv) + reinterpret_cast(uthread.ptr()), + sizeof(struct uthread) )); TLSResult result; diff --git a/userspace/libraries/LibC/include/pthread.h b/userspace/libraries/LibC/include/pthread.h index 11c934e9..f6de8968 100644 --- a/userspace/libraries/LibC/include/pthread.h +++ b/userspace/libraries/LibC/include/pthread.h @@ -17,25 +17,41 @@ __BEGIN_DECLS #include -struct _pthread_cleanup_t +typedef struct _pthread_cleanup_t { void (*routine)(void*); void* arg; struct _pthread_cleanup_t* next; -}; +} _pthread_cleanup_t; + +typedef struct _dynamic_tls_entry_t +{ + void* master_addr; + size_t master_size; +} _dynamic_tls_entry_t; + +typedef struct _dynamic_tls_t +{ + int lock; + size_t entry_count; + _dynamic_tls_entry_t* entries; +} _dynamic_tls_t; struct uthread { struct uthread* self; void* master_tls_addr; size_t master_tls_size; - struct _pthread_cleanup_t* cleanup_stack; + size_t master_tls_module_count; + _dynamic_tls_t* dynamic_tls; + _pthread_cleanup_t* cleanup_stack; pthread_t id; int errno_; int cancel_type; int cancel_state; int canceled; - uintptr_t dtv[]; + // FIXME: make this dynamic + uintptr_t dtv[1 + 128]; }; #define PTHREAD_CANCELED (void*)1 diff --git a/userspace/libraries/LibC/pthread.cpp b/userspace/libraries/LibC/pthread.cpp index 5c8a0287..60b8a252 100644 --- a/userspace/libraries/LibC/pthread.cpp +++ b/userspace/libraries/LibC/pthread.cpp @@ -63,13 +63,35 @@ extern "C" void _pthread_trampoline_cpp(void* arg) static void free_uthread(uthread* uthread) { - if (uthread->dtv[0] == 0) - return free(uthread); + const auto lock_dynamic_tls = + [uthread] { + int expected = 0; + while (BAN::atomic_compare_exchange(uthread->dynamic_tls->lock, expected, 1)) + { + sched_yield(); + expected = 0; + } + }; + + const auto unlock_dynamic_tls = + [uthread] { + BAN::atomic_store(uthread->dynamic_tls->lock, 0); + }; + + for (size_t i = uthread->master_tls_module_count; i < uthread->dtv[0]; i++) + { + if (uthread->dtv[i] == 0) + continue; + + lock_dynamic_tls(); + const size_t size = uthread->dynamic_tls->entries[i].master_size; + unlock_dynamic_tls(); + + munmap(reinterpret_cast(uthread->dtv[i]), size); + } uint8_t* tls_addr = reinterpret_cast(uthread) - uthread->master_tls_size; - const size_t tls_size = uthread->master_tls_size - + sizeof(struct uthread) - + (uthread->dtv[0] + 1) * sizeof(uintptr_t); + const size_t tls_size = uthread->master_tls_size + sizeof(struct uthread); munmap(tls_addr, tls_size); } @@ -358,58 +380,37 @@ int pthread_create(pthread_t* __restrict thread_id, const pthread_attr_t* __rest long syscall_ret = 0; - if (uthread* self = _get_uthread(); self->master_tls_addr == nullptr) { - uthread* uthread = static_cast(malloc(sizeof(struct uthread) + sizeof(uintptr_t))); - if (uthread == nullptr) - goto pthread_create_error; + uthread* self = _get_uthread(); - *uthread = { - .self = uthread, - .master_tls_addr = nullptr, - .master_tls_size = 0, - .cleanup_stack = nullptr, - .id = -1, - .errno_ = 0, - .cancel_type = PTHREAD_CANCEL_DEFERRED, - .cancel_state = PTHREAD_CANCEL_ENABLE, - .canceled = false, - }; - uthread->dtv[0] = 0; - - info->uthread = uthread; - } - else - { - const size_t module_count = self->dtv[0]; - - const size_t tls_size = self->master_tls_size - + sizeof(uthread) - + (module_count + 1) * sizeof(uintptr_t); + const size_t tls_size = self->master_tls_size + sizeof(uthread); uint8_t* tls_addr = static_cast(mmap(nullptr, tls_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); if (tls_addr == MAP_FAILED) goto pthread_create_error; - memcpy(tls_addr, self->master_tls_addr, self->master_tls_size); + + if (self->master_tls_addr) + memcpy(tls_addr, self->master_tls_addr, self->master_tls_size); uthread* uthread = reinterpret_cast(tls_addr + self->master_tls_size); *uthread = { .self = uthread, .master_tls_addr = self->master_tls_addr, .master_tls_size = self->master_tls_size, + .master_tls_module_count = self->master_tls_module_count, + .dynamic_tls = self->dynamic_tls, .cleanup_stack = nullptr, .id = -1, .errno_ = 0, .cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_state = PTHREAD_CANCEL_ENABLE, .canceled = 0, + .dtv = { self->dtv[0] } }; const uintptr_t self_addr = reinterpret_cast(self); const uintptr_t uthread_addr = reinterpret_cast(uthread); - - uthread->dtv[0] = module_count; - for (size_t i = 1; i <= module_count; i++) + for (size_t i = 1; i <= self->master_tls_module_count; i++) uthread->dtv[i] = self->dtv[i] - self_addr + uthread_addr; info->uthread = uthread; @@ -1276,6 +1277,36 @@ int pthread_barrier_wait(pthread_barrier_t* barrier) return 0; } +static void load_dynamic_tls_module(size_t module) +{ + auto* uthread = _get_uthread(); + ASSERT(uthread->dynamic_tls); + ASSERT(module > uthread->master_tls_module_count); + + const _dynamic_tls_entry_t entry = ({ + int expected = 0; + while (BAN::atomic_compare_exchange(uthread->dynamic_tls->lock, expected, 1)) + { + sched_yield(); + expected = 0; + } + + ASSERT(module <= uthread->master_tls_module_count + uthread->dynamic_tls->entry_count); + auto result = uthread->dynamic_tls->entries[module - uthread->master_tls_module_count - 1]; + + BAN::atomic_store(uthread->dynamic_tls->lock, 0); + + result; + }); + + void* dtv_data = mmap(nullptr, entry.master_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT(dtv_data != MAP_FAILED); + + memcpy(dtv_data, entry.master_addr, entry.master_size); + + uthread->dtv[module] = reinterpret_cast(dtv_data); +} + struct tls_index { unsigned long int ti_module; @@ -1284,12 +1315,18 @@ struct tls_index extern "C" void* __tls_get_addr(tls_index* ti) { - return reinterpret_cast(_get_uthread()->dtv[ti->ti_module] + ti->ti_offset); + auto* uthread = _get_uthread(); + if (uthread->dtv[ti->ti_module] == 0) [[unlikely]] + load_dynamic_tls_module(ti->ti_module); + return reinterpret_cast(uthread->dtv[ti->ti_module] + ti->ti_offset); } #if ARCH(i686) extern "C" void* __attribute__((__regparm__(1))) ___tls_get_addr(tls_index* ti) { - return reinterpret_cast(_get_uthread()->dtv[ti->ti_module] + ti->ti_offset); + auto* uthread = _get_uthread(); + if (uthread->dtv[ti->ti_module] == 0) [[unlikely]] + load_dynamic_tls_module(ti->ti_module); + return reinterpret_cast(uthread->dtv[ti->ti_module] + ti->ti_offset); } #endif diff --git a/userspace/libraries/LibC/unistd.cpp b/userspace/libraries/LibC/unistd.cpp index d268df5c..ff3941fa 100644 --- a/userspace/libraries/LibC/unistd.cpp +++ b/userspace/libraries/LibC/unistd.cpp @@ -84,21 +84,23 @@ extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t } else { - alignas(uthread) static uint8_t storage[sizeof(uthread) + sizeof(uintptr_t)]; + alignas(uthread) static uint8_t storage[sizeof(uthread)]; uthread& uthread = *reinterpret_cast(storage); uthread = { .self = &uthread, .master_tls_addr = nullptr, .master_tls_size = 0, + .master_tls_module_count = 1, + .dynamic_tls = nullptr, .cleanup_stack = nullptr, .id = static_cast(syscall(SYS_PTHREAD_SELF)), .errno_ = 0, .cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_state = PTHREAD_CANCEL_ENABLE, .canceled = false, + .dtv = { 0 }, }; - uthread.dtv[0] = 0; #if defined(__x86_64__) syscall(SYS_SET_FSBASE, &uthread); diff --git a/userspace/programs/DynamicLoader/main.cpp b/userspace/programs/DynamicLoader/main.cpp index f7fb6441..c8a67968 100644 --- a/userspace/programs/DynamicLoader/main.cpp +++ b/userspace/programs/DynamicLoader/main.cpp @@ -207,9 +207,13 @@ struct LoadedElf const uint8_t* real_strtab_addr; }; -static LoadedElf s_loaded_files[128]; +static constexpr size_t s_max_loaded_files = 128; +static LoadedElf s_loaded_files[s_max_loaded_files]; static size_t s_loaded_file_count = 0; +static size_t s_tls_module = 1; +static _dynamic_tls_t* s_dynamic_tls = nullptr; + static const char* s_ld_library_path = nullptr; static BAN::Atomic s_global_locker = 0; @@ -1072,6 +1076,9 @@ static LoadedElf& load_elf(const char* path, int fd) } } + if (s_loaded_file_count == s_max_loaded_files) + print_error_and_exit("cannot load more dynamic libraries", 0); + if (fd == -1 && (fd = syscall(SYS_OPENAT, AT_FDCWD, path, O_RDONLY)) < 0) print_error_and_exit("could not open library", fd); @@ -1277,7 +1284,7 @@ static MasterTLS initialize_master_tls() master_tls_addr = reinterpret_cast(ret); } - for (size_t i = 0, tls_offset = 0, tls_module = 1; i < s_loaded_file_count; i++) + for (size_t i = 0, tls_offset = 0; i < s_loaded_file_count; i++) { const auto& tls_header = s_loaded_files[i].tls_header; if (tls_header.p_type != PT_TLS) @@ -1298,18 +1305,42 @@ static MasterTLS initialize_master_tls() auto& elf = s_loaded_files[i]; elf.tls_addr = tls_buffer; - elf.tls_module = tls_module++; + elf.tls_module = s_tls_module++; elf.tls_offset = tls_offset; } - return { .addr = master_tls_addr, .size = master_tls_size, .module_count = module_count }; + { + const sys_mmap_t mmap_args { + .addr = nullptr, + .len = sizeof(_dynamic_tls_t) + s_max_loaded_files * sizeof(_dynamic_tls_entry_t), + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_ANONYMOUS | MAP_PRIVATE, + .fildes = -1, + .off = 0, + }; + + const auto ret = syscall(SYS_MMAP, &mmap_args); + if (ret < 0) + print_error_and_exit("failed to allocate dynamic TLS", ret); + s_dynamic_tls = reinterpret_cast<_dynamic_tls_t*>(ret); + + *s_dynamic_tls = { + .lock = 0, + .entry_count = 0, + .entries = reinterpret_cast<_dynamic_tls_entry_t*>(s_dynamic_tls + 1), + }; + } + + return { + .addr = master_tls_addr, + .size = master_tls_size, + .module_count = module_count, + }; } static void initialize_tls(MasterTLS master_tls) { - const size_t tls_size = master_tls.size - + sizeof(uthread) - + (master_tls.module_count + 1) * sizeof(uintptr_t); + const size_t tls_size = master_tls.size + sizeof(uthread); uint8_t* tls_addr; @@ -1339,12 +1370,15 @@ static void initialize_tls(MasterTLS master_tls) .self = &uthread, .master_tls_addr = master_tls.addr, .master_tls_size = master_tls.size, + .master_tls_module_count = master_tls.module_count, + .dynamic_tls = s_dynamic_tls, .cleanup_stack = nullptr, .id = static_cast(syscall<>(SYS_PTHREAD_SELF)), .errno_ = 0, .cancel_type = PTHREAD_CANCEL_DEFERRED, .cancel_state = PTHREAD_CANCEL_ENABLE, .canceled = false, + .dtv = {}, }; uthread.dtv[0] = master_tls.module_count; @@ -1478,6 +1512,51 @@ static void register_fini_funcs(LoadedElf& elf, bool is_main_elf) } } +static void load_dynamic_tls(LoadedElf& elf) +{ + if (elf.tls_header.p_type != PT_TLS) + return; + if (elf.tls_module != 0) + print_error_and_exit("TLS module already loaded??", 0); + + elf.tls_module = s_tls_module++; + elf.tls_offset = 0; + + { + const sys_mmap_t mmap_args { + .addr = nullptr, + .len = elf.tls_header.p_memsz, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_ANONYMOUS | MAP_PRIVATE, + .fildes = -1, + .off = 0, + }; + + const auto ret = syscall(SYS_MMAP, &mmap_args); + if (ret < 0) + print_error_and_exit("failed to allocate dynamic TLS", ret); + elf.tls_addr = reinterpret_cast(ret); + + memset(elf.tls_addr + elf.tls_header.p_filesz, 0, elf.tls_header.p_memsz - elf.tls_header.p_filesz); + if (const auto ret = syscall(SYS_PREAD, elf.fd, elf.tls_addr, elf.tls_header.p_filesz, elf.tls_header.p_offset); ret < static_cast(elf.tls_header.p_filesz)) + print_error_and_exit("failed to read TLS data", 0); + } + + int expected = 0; + while (!BAN::atomic_compare_exchange(s_dynamic_tls->lock, expected, 1)) + { + syscall<>(SYS_YIELD); + expected = 0; + } + + s_dynamic_tls->entries[s_dynamic_tls->entry_count++] = { + .master_addr = elf.tls_addr, + .master_size = elf.tls_header.p_memsz, + }; + + BAN::atomic_store(s_dynamic_tls->lock, 0); +} + int __dlclose(void* handle) { // TODO: maybe actually close handles? (not required by spec) @@ -1521,17 +1600,8 @@ void* __dlopen(const char* file, int mode) if (!elf.is_relocating && !elf.is_calling_init) { for (size_t i = old_loaded_count; i < s_loaded_file_count; i++) - { - if (s_loaded_files[i].tls_header.p_type == PT_TLS) - { - s_dlerror_string = "TODO: __dlopen with TLS"; + load_dynamic_tls(s_loaded_files[i]); - // FIXME: leaks loaded files :) - s_loaded_file_count = old_loaded_count; - - return nullptr; - } - } relocate_elf(elf, lazy); call_init_funcs(elf, false);