Compare commits

...

22 Commits

Author SHA1 Message Date
Bananymous ee9e941a56 LibC: Implement getitimer 2025-04-15 23:35:25 +03:00
Bananymous ac90800c3c Kernel/LibC/DynamicLoader: Implement thread local storage
For some reason this does not work on 32 bit version, so it is disabled
on that platform. I'll have to look into it later to find the bug :)
2025-04-15 23:33:39 +03:00
Bananymous 08f5833ca8 Kernel/LibC: Implement pthread_attr_init 2025-04-15 23:31:17 +03:00
Bananymous 4bcd3ed86f Kernel: Start working on TLS, add SYS_{SET,GET}_TLS 2025-04-15 23:31:17 +03:00
Bananymous 254fd80088 DynamicLoader: Cleanup and prepare for TLS 2025-04-15 23:31:17 +03:00
Bananymous d7e6df1e44 LibELF: Add definitions for ELF TLS values 2025-04-15 23:31:17 +03:00
Bananymous a933fabb86 Kernel: Define constant USERSPACE_END
This should be used for userspace generic allocations. Currently I used
KERNEL_OFFSET, but I want to limit userspace to the actual lower half of
the address space
2025-04-15 23:31:17 +03:00
Bananymous 36baf7b0af Kernel/LibC/DynamicLoader: Update process start ABI
We now use SysV abi for process startup
2025-04-15 23:05:52 +03:00
Bananymous e6026cb0b8 DynamicLoader: Handle circular dependencies 2025-04-15 23:05:52 +03:00
Bananymous cc2b4967ea LibC: Make malloc thread safe with pthread_spinlock 2025-04-15 23:05:52 +03:00
Bananymous cf59f89bfb LibC: Rework constructor/destructor calling
constructors are now called in _init_libc instead of crt0

destructors are now registered with atexit() instead of called manually
2025-04-15 23:05:52 +03:00
Bananymous 066ed7e4a1 LibC: merge atexit and __cxa_atexit into common function 2025-04-15 23:05:52 +03:00
Bananymous 4f49d60e4a DynamicLoader/LibC: lazy malloc environ
This allows DynamicLoader to just set the value of global environ symbol
without libc needing to malloc it at startup
2025-04-15 23:05:52 +03:00
Bananymous 3721dadd72 LibC: Make stdio and malloc initialization constructors 2025-04-15 22:01:57 +03:00
Bananymous 5539d5eed0 LibC: Implement pthread_cleanup_{pop,push} 2025-04-15 21:48:38 +03:00
Bananymous 64002626b9 LibC: Add h_addr definition for hostent backwards compatibility 2025-04-15 21:39:50 +03:00
Bananymous 4b1c20fa36 Kernel: Make SYS_{,P}READ work with count == 0 2025-04-15 21:36:00 +03:00
Bananymous b418683337 LibELF: Add definition and values for auxiliary vectors 2025-04-15 21:31:02 +03:00
Bananymous 00015285ab Kernel: Add vaddr->paddr getter to VirtualRange 2025-04-15 21:25:15 +03:00
Bananymous 0f936fc163 Kernel: Cleanup 32 bit interrupt stack alignment 2025-04-15 21:21:23 +03:00
Bananymous 15045cc486 Kernel: Make nonblocking sockets unblocking :) 2025-04-05 18:42:02 +03:00
Bananymous c9132d984b Kernel: Fix TCP window scaling 2025-04-05 18:40:23 +03:00
40 changed files with 1328 additions and 551 deletions

View File

@ -41,31 +41,15 @@ start_userspace_thread:
call get_thread_start_sp
movl %eax, %esp
# STACK LAYOUT
# entry
# argc
# argv
# envp
# userspace stack
call get_userspace_thread_stack_top
# ds, es = user data
movw $(0x20 | 3), %bx
movw %bx, %ds
movw %bx, %es
movw %bx, %fs
# gs = thread local
movw $(0x30 | 3), %bx
movw %bx, %gs
# fs = 0
xorw %bx, %bx
movw %bx, %fs
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
pushl $(0x20 | 3)
pushl %eax
pushl $0x202
pushl $(0x18 | 3)
pushl %ebx
iret

View File

@ -40,8 +40,7 @@ isr_stub:
movl 56(%esp), %edx // isr number
movl %esp, %ebp
subl $15, %esp
andl $0xFFFFFFF0, %esp
andl $-16, %esp
pushl %eax
pushl %ebx
@ -61,14 +60,13 @@ irq_stub:
load_kernel_segments
cld
movl 40(%esp), %eax # interrupt number
movl 40(%esp), %edi # interrupt number
movl %esp, %ebp
subl $15, %esp
andl $0xFFFFFFF0, %esp
andl $-16, %esp
subl $12, %esp
pushl %eax
pushl %edi
call cpp_irq_handler
movl %ebp, %esp
@ -83,16 +81,15 @@ asm_yield_handler:
pushal
cld
movl %esp, %eax # interrupt registers ptr
leal 32(%esp), %ebx # interrupt stack ptr
leal 32(%esp), %edi # interrupt stack ptr
movl %esp, %esi # interrupt registers ptr
movl %esp, %ebp
subl $15, %esp
andl $0xFFFFFFF0, %esp
andl $-16, %esp
subl $8, %esp
pushl %eax
pushl %ebx
pushl %esi
pushl %edi
call cpp_yield_handler
movl %ebp, %esp
@ -107,8 +104,7 @@ asm_ipi_handler:
cld
movl %esp, %ebp
subl $15, %esp
andl $0xFFFFFFF0, %esp
andl $-16, %esp
call cpp_ipi_handler
@ -117,7 +113,6 @@ asm_ipi_handler:
pop_userspace
iret
.global asm_timer_handler
asm_timer_handler:
push_userspace
@ -125,8 +120,7 @@ asm_timer_handler:
cld
movl %esp, %ebp
subl $15, %esp
andl $0xFFFFFFF0, %esp
andl $-16, %esp
call cpp_timer_handler

View File

@ -32,24 +32,4 @@ start_userspace_thread:
call get_thread_start_sp
movq %rax, %rsp
# STACK LAYOUT
# entry
# argc
# argv
# envp
# userspace stack
call get_userspace_thread_stack_top
popq %rdi
popq %rsi
popq %rdx
popq %rcx
popq %rbx
pushq $(0x20 | 3)
pushq %rax
pushq $0x202
pushq $(0x18 | 3)
pushq %rbx
iretq

View File

@ -8,8 +8,15 @@ namespace Kernel::ELF
struct LoadResult
{
bool has_interpreter;
struct TLS
{
vaddr_t addr;
size_t size;
};
bool open_execfd;
vaddr_t entry_point;
BAN::Optional<TLS> master_tls;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> regions;
};

View File

@ -169,6 +169,7 @@ namespace Kernel
private:
BAN::WeakPtr<SharedFileData> m_shared_region;
friend class FileBackedRegion;
friend class OpenFileDescriptorSet;
friend class SharedFileData;
friend class TTY;
};

View File

@ -128,6 +128,10 @@ namespace Kernel
#endif
}
#if ARCH(i686)
void set_tls(uintptr_t addr);
#endif
private:
GDT() = default;
@ -149,8 +153,8 @@ namespace Kernel
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, tss low, tss high
static constexpr uint16_t m_tss_offset = 0x28;
#elif ARCH(i686)
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, tss
static constexpr uint16_t m_tss_offset = 0x30;
BAN::Array<SegmentDescriptor, 8> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, tls, tss
static constexpr uint16_t m_tss_offset = 0x38;
#endif
TaskStateSegment m_tss;
const GDTR m_gdtr {

View File

@ -4,8 +4,10 @@
#if ARCH(x86_64)
#define KERNEL_OFFSET 0xFFFFFFFF80000000
#define USERSPACE_END 0xFFFF800000000000
#elif ARCH(i686)
#define KERNEL_OFFSET 0xC0000000
#define USERSPACE_END 0xC0000000
#else
#error
#endif

View File

@ -26,6 +26,16 @@ namespace Kernel
size_t size() const { return m_size; }
PageTable::flags_t flags() const { return m_flags; }
paddr_t paddr_of(vaddr_t vaddr) const
{
ASSERT(vaddr % PAGE_SIZE == 0);
const size_t index = (vaddr - m_vaddr) / PAGE_SIZE;
ASSERT(index < m_paddrs.size());
const paddr_t paddr = m_paddrs[index];
ASSERT(paddr);
return paddr;
}
bool contains(vaddr_t address) const { return vaddr() <= address && address < vaddr() + size(); }
BAN::ErrorOr<void> allocate_page_for_demand_paging(vaddr_t address);

View File

@ -91,7 +91,7 @@ namespace Kernel
bool has_ghost_byte { false };
uint32_t data_size { 0 }; // number of bytes in this buffer
uint8_t scale { 1 }; // window scale
uint8_t scale_shift { 0 }; // window scale
BAN::UniqPtr<VirtualRange> buffer;
};
@ -99,8 +99,8 @@ namespace Kernel
{
uint32_t mss { 0 }; // maximum segment size
uint16_t non_scaled_size { 0 }; // window size without scaling
uint8_t scale { 0 }; // window scale
uint32_t scaled_size() const { return (uint32_t)non_scaled_size << scale; }
uint8_t scale_shift { 0 }; // window scale
uint32_t scaled_size() const { return (uint32_t)non_scaled_size << scale_shift; }
uint32_t start_seq { 0 }; // sequence number of first byte in buffer
uint32_t current_seq { 0 }; // sequence number of next send
@ -118,6 +118,7 @@ namespace Kernel
{
sockaddr_storage address;
socklen_t address_len;
bool has_window_scale;
};
struct PendingConnection

View File

@ -47,6 +47,9 @@ namespace Kernel
BAN::ErrorOr<size_t> read_dir_entries(int fd, struct dirent* list, size_t list_len);
BAN::ErrorOr<size_t> recvfrom(int fd, BAN::ByteSpan buffer, sockaddr* address, socklen_t* address_len);
BAN::ErrorOr<size_t> sendto(int fd, BAN::ConstByteSpan buffer, const sockaddr* address, socklen_t address_len);
BAN::ErrorOr<VirtualFileSystem::File> file_of(int) const;
BAN::ErrorOr<BAN::StringView> path_of(int) const;
BAN::ErrorOr<BAN::RefPtr<Inode>> inode_of(int);

View File

@ -6,6 +6,7 @@
#include <BAN/StringView.h>
#include <BAN/Vector.h>
#include <kernel/Credentials.h>
#include <kernel/ELF.h>
#include <kernel/FS/Inode.h>
#include <kernel/Lock/Mutex.h>
#include <kernel/Memory/Heap.h>
@ -34,15 +35,6 @@ namespace Kernel
public:
using entry_t = Thread::entry_t;
struct userspace_info_t
{
uintptr_t entry { 0 };
int argc { 0 };
char** argv { nullptr };
char** envp { nullptr };
int file_fd { -1 };
};
public:
static Process* create_kernel();
static Process* create_kernel(entry_t, void*);
@ -183,7 +175,9 @@ namespace Kernel
BAN::ErrorOr<long> sys_sigprocmask(int how, const sigset_t* set, sigset_t* oset);
BAN::ErrorOr<long> sys_yield();
BAN::ErrorOr<long> sys_pthread_create(const pthread_attr_t* __restrict attr, void (*entry)(void*), void* arg);
BAN::ErrorOr<long> sys_set_tls(void*);
BAN::ErrorOr<long> sys_get_tls();
BAN::ErrorOr<long> sys_pthread_create(const pthread_attr_t* attr, void (*entry)(void*), void* arg);
BAN::ErrorOr<long> sys_pthread_exit(void* value);
BAN::ErrorOr<long> sys_pthread_join(pthread_t thread, void** value);
BAN::ErrorOr<long> sys_pthread_self();
@ -208,7 +202,6 @@ namespace Kernel
size_t proc_environ(off_t offset, BAN::ByteSpan) const;
bool is_userspace() const { return m_is_userspace; }
const userspace_info_t& userspace_info() const { return m_userspace_info; }
// Returns error if page could not be allocated
// Returns true if the page was allocated successfully
@ -227,6 +220,13 @@ namespace Kernel
Process(const Credentials&, pid_t pid, pid_t parent, pid_t sid, pid_t pgrp);
static Process* create_process(const Credentials&, pid_t parent, pid_t sid = 0, pid_t pgrp = 0);
struct TLSResult
{
BAN::UniqPtr<MemoryRegion> region;
vaddr_t addr;
};
static BAN::ErrorOr<TLSResult> initialize_thread_local_storage(PageTable&, ELF::LoadResult::TLS master_tls);
struct FileParent
{
VirtualFileSystem::File parent;
@ -315,7 +315,6 @@ namespace Kernel
BAN::Vector<BAN::String> m_environ;
bool m_is_userspace { false };
userspace_info_t m_userspace_info;
SpinLock m_child_exit_lock;
BAN::Vector<ChildExitStatus> m_child_exit_statuses;

View File

@ -110,6 +110,8 @@ namespace Kernel
static void send_smp_message(ProcessorID, const SMPMessage&, bool send_ipi = true);
static void broadcast_smp_message(const SMPMessage&);
static void load_tls();
private:
Processor() = default;
~Processor() { ASSERT_NOT_REACHED(); }

View File

@ -7,6 +7,8 @@
#include <kernel/Memory/VirtualRange.h>
#include <kernel/ThreadBlocker.h>
#include <LibELF/AuxiliaryVector.h>
#include <signal.h>
#include <sys/types.h>
@ -41,9 +43,10 @@ namespace Kernel
BAN::ErrorOr<Thread*> pthread_create(entry_t, void*);
BAN::ErrorOr<Thread*> clone(Process*, uintptr_t sp, uintptr_t ip);
void setup_exec();
void setup_process_cleanup();
BAN::ErrorOr<void> initialize_userspace(vaddr_t entry, BAN::Span<BAN::String> argv, BAN::Span<BAN::String> envp, BAN::Span<LibELF::AuxiliaryVector> auxv);
// Returns true, if thread is going to trigger signal
bool is_interrupted_by_signal() const;
@ -85,6 +88,9 @@ namespace Kernel
bool is_userspace() const { return m_is_userspace; }
void set_tls(vaddr_t tls) { m_tls = tls; }
vaddr_t get_tls() const { return m_tls; }
size_t virtual_page_count() const { return (m_kernel_stack ? (m_kernel_stack->size() / PAGE_SIZE) : 0) + (m_userspace_stack ? (m_userspace_stack->size() / PAGE_SIZE) : 0); }
size_t physical_page_count() const { return virtual_page_count(); }
@ -100,7 +106,7 @@ namespace Kernel
private:
Thread(pid_t tid, Process*);
void setup_exec_impl(uintptr_t entry, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
void setup_exec(vaddr_t ip, vaddr_t sp);
static void on_exit_trampoline(Thread*);
void on_exit();
@ -118,6 +124,8 @@ namespace Kernel
bool m_is_userspace { false };
bool m_delete_process { false };
vaddr_t m_tls { 0 };
SchedulerQueue::Node* m_scheduler_node { nullptr };
InterruptStack m_interrupt_stack { };

View File

@ -8,6 +8,7 @@
#include <ctype.h>
#include <fcntl.h>
#include <pthread.h>
namespace Kernel::ELF
{
@ -108,13 +109,13 @@ namespace Kernel::ELF
auto file_header = TRY(read_and_validate_file_header(inode));
auto program_headers = TRY(read_program_headers(inode, file_header));
vaddr_t executable_end = 0;
size_t exec_max_offset { 0 };
BAN::String interpreter;
for (const auto& program_header : program_headers)
{
if (program_header.p_type == PT_LOAD)
executable_end = BAN::Math::max<vaddr_t>(executable_end, program_header.p_vaddr + program_header.p_memsz);
exec_max_offset = BAN::Math::max<vaddr_t>(exec_max_offset, program_header.p_vaddr + program_header.p_memsz);
else if (program_header.p_type == PT_INTERP)
{
BAN::Vector<uint8_t> interp_buffer;
@ -140,9 +141,6 @@ namespace Kernel::ELF
}
}
if (file_header.e_type == ET_DYN)
executable_end = 0x400000;
if (!interpreter.empty())
{
auto interpreter_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(credentials, interpreter, O_EXEC)).inode;
@ -164,15 +162,16 @@ namespace Kernel::ELF
}
const vaddr_t load_base_vaddr =
[&file_header, executable_end]() -> vaddr_t
[&file_header, exec_max_offset]() -> vaddr_t
{
if (file_header.e_type == ET_EXEC)
return 0;
if (file_header.e_type == ET_DYN)
return (executable_end + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
return (exec_max_offset + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
ASSERT_NOT_REACHED();
}();
vaddr_t last_loaded_address = 0;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> memory_regions;
for (const auto& program_header : program_headers)
{
@ -241,10 +240,57 @@ namespace Kernel::ELF
TRY(memory_regions.emplace_back(BAN::move(region)));
}
last_loaded_address = BAN::Math::max(last_loaded_address, pheader_base + program_header.p_memsz);
}
LoadResult result;
result.has_interpreter = !interpreter.empty();
for (const auto& program_header : program_headers)
{
if (program_header.p_type != PT_TLS)
continue;
if (!BAN::Math::is_power_of_two(program_header.p_align))
return BAN::Error::from_errno(EINVAL);
size_t region_size = program_header.p_memsz;
if (auto rem = region_size % program_header.p_align)
region_size += program_header.p_align - rem;
size_t offset = 0;
if (auto rem = region_size % alignof(uthread))
offset = alignof(uthread) - rem;
auto region = TRY(MemoryBackedRegion::create(
page_table,
offset + region_size,
{ .start = last_loaded_address, .end = USERSPACE_END },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::Present
));
for (vaddr_t vaddr = region->vaddr(); vaddr < region->vaddr() + offset + region->size(); vaddr += PAGE_SIZE)
TRY(region->allocate_page_containing(vaddr, false));
if (program_header.p_filesz > 0)
{
BAN::Vector<uint8_t> file_data_buffer;
TRY(file_data_buffer.resize(program_header.p_filesz));
if (TRY(inode->read(program_header.p_offset, file_data_buffer.span())) != file_data_buffer.size())
return BAN::Error::from_errno(EFAULT);
TRY(region->copy_data_to_region(offset, file_data_buffer.data(), file_data_buffer.size()));
}
result.master_tls = LoadResult::TLS {
.addr = region->vaddr(),
.size = region->size(),
};
TRY(memory_regions.emplace_back(BAN::move(region)));
}
result.open_execfd = !interpreter.empty();
result.entry_point = load_base_vaddr + file_header.e_entry;
result.regions = BAN::move(memory_regions);
return BAN::move(result);

View File

@ -26,12 +26,20 @@ namespace Kernel
gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, data_flags); // user data
#if ARCH(i686)
gdt->write_entry(0x28, reinterpret_cast<uint32_t>(processor), sizeof(Processor), 0x92, 0x4); // processor data
gdt->write_entry(0x30, 0x00000000, 0x00000, 0x00, 0x0); // tls
#endif
gdt->write_tss();
return gdt;
}
#if ARCH(i686)
void GDT::set_tls(uintptr_t addr)
{
write_entry(0x30, addr, 0xFFFF, 0xF2, 0xC);
}
#endif
void GDT::write_entry(uint8_t offset, uint32_t base, uint32_t limit, uint8_t access, uint8_t flags)
{
ASSERT(offset % sizeof(SegmentDescriptor) == 0);

View File

@ -248,11 +248,7 @@ namespace Kernel
}
if (Thread::current().has_process() && Process::current().is_userspace())
{
const char* const* argv = Process::current().userspace_info().argv;
if (argv && *argv)
process_name = *argv;
}
process_name = Process::current().name();
#if ARCH(x86_64)
dwarnln(

View File

@ -32,7 +32,7 @@ namespace Kernel
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
));
socket->m_recv_window.scale = 12; // use PAGE_SIZE windows
socket->m_recv_window.scale_shift = PAGE_SIZE_SHIFT; // use PAGE_SIZE windows
socket->m_send_window.buffer = TRY(VirtualRange::create_to_vaddr_range(
PageTable::kernel(),
KERNEL_OFFSET,
@ -80,7 +80,6 @@ namespace Kernel
auto connection = m_pending_connections.front();
m_pending_connections.pop();
auto listen_key = ListenKey(
reinterpret_cast<const sockaddr*>(&connection.target.address),
connection.target.address_len
@ -104,6 +103,9 @@ namespace Kernel
return_inode->m_next_state = State::SynReceived;
return_inode->m_mutex.unlock();
if (!return_inode->m_connection_info->has_window_scale)
return_inode->m_recv_window.scale_shift = 0;
TRY(m_listen_children.emplace(listen_key, return_inode));
const uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + 5000;
@ -155,7 +157,7 @@ namespace Kernel
if (!is_bound())
TRY(m_network_layer.bind_socket_to_unused(this, address, address_len));
m_connection_info.emplace(sockaddr_storage {}, address_len);
m_connection_info.emplace(sockaddr_storage {}, address_len, true);
memcpy(&m_connection_info->address, address, address_len);
m_next_flags = SYN;
@ -224,51 +226,33 @@ namespace Kernel
BAN::ErrorOr<size_t> TCPSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len)
{
(void)address;
(void)address_len;
if (address)
return BAN::Error::from_errno(EISCONN);
if (!m_has_connected)
return BAN::Error::from_errno(ENOTCONN);
if (message.size() > m_send_window.buffer->size())
{
size_t nsent = 0;
while (nsent < message.size())
{
const size_t to_send = BAN::Math::min<size_t>(message.size() - nsent, m_send_window.buffer->size());
TRY(sendto_impl(message.slice(nsent, to_send), address, address_len));
nsent += to_send;
}
return nsent;
}
while (true)
while (m_send_window.data_size == m_send_window.buffer->size())
{
if (m_state != State::Established)
return return_with_maybe_zero();
if (m_send_window.data_size + message.size() <= m_send_window.buffer->size())
break;
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_or_timeout_ms(m_thread_blocker, 100, false));
}
const size_t to_send = BAN::Math::min<size_t>(message.size(), m_send_window.buffer->size() - m_send_window.data_size);
{
auto* buffer = reinterpret_cast<uint8_t*>(m_send_window.buffer->vaddr());
memcpy(buffer + m_send_window.data_size, message.data(), message.size());
m_send_window.data_size += message.size();
memcpy(buffer + m_send_window.data_size, message.data(), to_send);
m_send_window.data_size += to_send;
}
const uint32_t target_ack = m_send_window.start_seq + m_send_window.data_size;
m_thread_blocker.unblock();
while (m_send_window.current_ack < target_ack)
{
if (m_state != State::Established)
return return_with_maybe_zero();
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_or_timeout_ms(m_thread_blocker, 100, false));
}
return message.size();
return to_send;
}
bool TCPSocket::can_read_impl() const
@ -377,16 +361,18 @@ namespace Kernel
header.seq_number = m_send_window.current_seq + m_send_window.has_ghost_byte;
header.ack_number = m_recv_window.start_seq + m_recv_window.data_size + m_recv_window.has_ghost_byte;
header.data_offset = (sizeof(TCPHeader) + m_tcp_options_bytes) / sizeof(uint32_t);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size() >> m_recv_window.scale);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size() >> m_recv_window.scale_shift);
header.flags = m_next_flags;
if (header.flags & FIN)
m_send_window.has_ghost_byte = true;
m_next_flags = 0;
if (m_state == State::Closed)
if (m_state == State::Closed || m_state == State::SynReceived)
{
add_tcp_header_option<0, TCPOption::MaximumSeqmentSize>(header, m_interface->payload_mtu() - m_network_layer.header_size());
add_tcp_header_option<4, TCPOption::WindowScale>(header, m_recv_window.scale);
if (m_connection_info->has_window_scale)
add_tcp_header_option<4, TCPOption::WindowScale>(header, m_recv_window.scale_shift);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size());
m_send_window.mss = 1440;
@ -463,9 +449,12 @@ namespace Kernel
if (options.maximum_seqment_size.has_value())
m_send_window.mss = *options.maximum_seqment_size;
if (options.window_scale.has_value())
m_send_window.scale = *options.window_scale;
m_send_window.scale_shift = *options.window_scale;
else
m_recv_window.scale = 1;
{
m_recv_window.scale_shift = 0;
m_connection_info->has_window_scale = false;
}
m_send_window.start_seq = m_send_window.current_seq;
m_send_window.current_ack = m_send_window.current_seq;
@ -492,6 +481,7 @@ namespace Kernel
ConnectionInfo connection_info;
memcpy(&connection_info.address, sender, sender_len);
connection_info.address_len = sender_len;
connection_info.has_window_scale = parse_tcp_options(header).window_scale.has_value();
MUST(m_pending_connections.emplace(
connection_info,
header.seq_number + 1

View File

@ -1,5 +1,6 @@
#include <kernel/FS/Pipe.h>
#include <kernel/FS/VirtualFileSystem.h>
#include <kernel/Lock/LockGuard.h>
#include <kernel/Networking/NetworkManager.h>
#include <kernel/OpenFileDescriptorSet.h>
@ -298,11 +299,13 @@ namespace Kernel
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (open_file.inode()->mode().ifsock())
return recvfrom(fd, buffer, nullptr, nullptr);
if (!(open_file.status_flags() & O_RDONLY))
return BAN::Error::from_errno(EBADF);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_read())
return 0;
size_t nread = TRY(open_file.inode()->read(open_file.offset(), buffer));
const size_t nread = TRY(open_file.inode()->read(open_file.offset(), buffer));
open_file.offset() += nread;
return nread;
}
@ -311,13 +314,15 @@ namespace Kernel
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (open_file.inode()->mode().ifsock())
return sendto(fd, buffer, nullptr, 0);
if (!(open_file.status_flags() & O_WRONLY))
return BAN::Error::from_errno(EBADF);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return 0;
return BAN::Error::from_errno(EWOULDBLOCK);
if (open_file.status_flags() & O_APPEND)
open_file.offset() = open_file.inode()->size();
size_t nwrite = TRY(open_file.inode()->write(open_file.offset(), buffer));
const size_t nwrite = TRY(open_file.inode()->write(open_file.offset(), buffer));
open_file.offset() += nwrite;
return nwrite;
}
@ -340,6 +345,43 @@ namespace Kernel
}
}
BAN::ErrorOr<size_t> OpenFileDescriptorSet::recvfrom(int fd, BAN::ByteSpan buffer, sockaddr* address, socklen_t* address_len)
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (!open_file.inode()->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
LockGuard _(open_file.inode()->m_mutex);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_read())
return BAN::Error::from_errno(EWOULDBLOCK);
return open_file.inode()->recvfrom(buffer, address, address_len);
}
BAN::ErrorOr<size_t> OpenFileDescriptorSet::sendto(int fd, BAN::ConstByteSpan buffer, const sockaddr* address, socklen_t address_len)
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (!open_file.inode()->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return BAN::Error::from_errno(EWOULDBLOCK);
LockGuard _(open_file.inode()->m_mutex);
size_t total_sent = 0;
while (total_sent < buffer.size())
{
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return total_sent;
const size_t nsend = TRY(open_file.inode()->sendto(buffer.slice(total_sent), address, address_len));
if (nsend == 0)
return 0;
total_sent += nsend;
}
return total_sent;
}
BAN::ErrorOr<VirtualFileSystem::File> OpenFileDescriptorSet::file_of(int fd) const
{
TRY(validate_fd(fd));

View File

@ -17,9 +17,12 @@
#include <kernel/Terminal/PseudoTerminal.h>
#include <kernel/Timer/Timer.h>
#include <LibELF/AuxiliaryVector.h>
#include <LibInput/KeyboardLayout.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/banan-os.h>
#include <sys/sysmacros.h>
@ -116,76 +119,61 @@ namespace Kernel
process->m_working_directory = VirtualFileSystem::get().root_file();
process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace()));
TRY(process->m_cmdline.push_back({}));
TRY(process->m_cmdline.emplace_back());
TRY(process->m_cmdline.back().append(path));
for (auto argument : arguments)
{
TRY(process->m_cmdline.emplace_back());
TRY(process->m_cmdline.back().append(argument));
}
LockGuard _(process->m_process_lock);
auto executable_inode = TRY(process->find_file(AT_FDCWD, path.data(), O_EXEC)).inode;
auto executable_file = TRY(process->find_file(AT_FDCWD, path.data(), O_EXEC));
auto executable_inode = executable_file.inode;
auto executable = TRY(ELF::load_from_inode(executable_inode, process->m_credentials, process->page_table()));
process->m_mapped_regions = BAN::move(executable.regions);
char** argv_addr = nullptr;
{
size_t needed_bytes = sizeof(char*) + path.size() + 1;
for (auto argument : arguments)
needed_bytes += sizeof(char*) + argument.size() + 1;
needed_bytes += sizeof(char*);
if (auto rem = needed_bytes % PAGE_SIZE)
needed_bytes += PAGE_SIZE - rem;
auto argv_region = MUST(MemoryBackedRegion::create(
process->page_table(),
needed_bytes,
{ .start = 0x400000, .end = KERNEL_OFFSET },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::Present
));
argv_addr = reinterpret_cast<char**>(argv_region->vaddr());
uintptr_t offset = sizeof(char*) * (1 + arguments.size() + 1);
for (size_t i = 0; i <= arguments.size(); i++)
{
const uintptr_t addr = argv_region->vaddr() + offset;
TRY(argv_region->copy_data_to_region(i * sizeof(char*), reinterpret_cast<const uint8_t*>(&addr), sizeof(char*)));
const auto current = (i == 0) ? path : arguments[i - 1];
TRY(argv_region->copy_data_to_region(offset, reinterpret_cast<const uint8_t*>(current.data()), current.size()));
const uint8_t zero = 0;
TRY(argv_region->copy_data_to_region(offset + current.size(), &zero, 1));
offset += current.size() + 1;
}
const uintptr_t zero = 0;
TRY(argv_region->copy_data_to_region((1 + arguments.size()) * sizeof(char*), reinterpret_cast<const uint8_t*>(&zero), sizeof(char*)));
TRY(process->m_mapped_regions.push_back(BAN::move(argv_region)));
}
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
process->m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
process->m_credentials.set_egid(executable_inode->gid());
if (executable.has_interpreter)
BAN::Vector<LibELF::AuxiliaryVector> auxiliary_vector;
TRY(auxiliary_vector.reserve(1 + executable.open_execfd));
if (executable.open_execfd)
{
VirtualFileSystem::File file;
TRY(file.canonical_path.append("<self>"));
file.inode = executable_inode;
process->m_userspace_info.file_fd = TRY(process->m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
const int execfd = TRY(process->m_open_file_descriptors.open(BAN::move(executable_file), O_RDONLY));
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_EXECFD,
.a_un = { .a_val = static_cast<uint32_t>(execfd) },
}));
}
process->m_is_userspace = true;
process->m_userspace_info.entry = executable.entry_point;
process->m_userspace_info.argc = 1 + arguments.size();
process->m_userspace_info.argv = argv_addr;
process->m_userspace_info.envp = nullptr;
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_NULL,
.a_un = { .a_val = 0 },
}));
BAN::Optional<vaddr_t> tls_addr;
if (executable.master_tls.has_value())
{
auto tls_result = TRY(process->initialize_thread_local_storage(process->page_table(), *executable.master_tls));
TRY(process->m_mapped_regions.emplace_back(BAN::move(tls_result.region)));
tls_addr = tls_result.addr;
}
auto* thread = MUST(Thread::create_userspace(process, process->page_table()));
thread->setup_exec();
MUST(thread->initialize_userspace(
executable.entry_point,
process->m_cmdline.span(),
process->m_environ.span(),
auxiliary_vector.span()
));
if (tls_addr.has_value())
thread->set_tls(*tls_addr);
process->add_thread(thread);
process->register_to_scheduler();
@ -318,6 +306,63 @@ namespace Kernel
ASSERT_NOT_REACHED();
}
BAN::ErrorOr<Process::TLSResult> Process::initialize_thread_local_storage(PageTable& page_table, ELF::LoadResult::TLS master_tls)
{
const auto [master_addr, master_size] = master_tls;
ASSERT(master_size % alignof(uthread) == 0);
const size_t tls_size = master_size + PAGE_SIZE;
auto region = TRY(MemoryBackedRegion::create(
page_table,
tls_size,
{ .start = master_addr, .end = USERSPACE_END },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present
));
BAN::Vector<uint8_t> temp_buffer;
TRY(temp_buffer.resize(BAN::Math::min<size_t>(master_size, PAGE_SIZE)));
size_t bytes_copied = 0;
while (bytes_copied < master_size)
{
const size_t to_copy = BAN::Math::min(master_size - bytes_copied, temp_buffer.size());
const vaddr_t vaddr = master_addr + bytes_copied;
const paddr_t paddr = page_table.physical_address_of(vaddr & PAGE_ADDR_MASK);
PageTable::with_fast_page(paddr, [&] {
memcpy(temp_buffer.data(), PageTable::fast_page_as_ptr(vaddr % PAGE_SIZE), to_copy);
});
TRY(region->copy_data_to_region(bytes_copied, temp_buffer.data(), to_copy));
bytes_copied += to_copy;
}
const uthread uthread {
.self = reinterpret_cast<struct uthread*>(region->vaddr() + master_size),
.master_tls_addr = reinterpret_cast<void*>(master_addr),
.master_tls_size = master_size,
};
const uintptr_t dtv[2] { 1, region->vaddr() };
TRY(region->copy_data_to_region(
master_size,
reinterpret_cast<const uint8_t*>(&uthread),
sizeof(uthread)
));
TRY(region->copy_data_to_region(
master_size + sizeof(uthread),
reinterpret_cast<const uint8_t*>(&dtv),
sizeof(dtv)
));
TLSResult result;
result.addr = region->vaddr() + master_size;;
result.region = BAN::move(region);
return result;
}
size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const
{
ASSERT(offset >= 0);
@ -534,7 +579,6 @@ namespace Kernel
forked->m_open_file_descriptors = BAN::move(*open_file_descriptors);
forked->m_mapped_regions = BAN::move(mapped_regions);
forked->m_is_userspace = m_is_userspace;
forked->m_userspace_info = m_userspace_info;
forked->m_has_called_exec = false;
memcpy(forked->m_signal_handlers, m_signal_handlers, sizeof(m_signal_handlers));
@ -561,77 +605,69 @@ namespace Kernel
TRY(validate_string_access(path));
auto executable_file = TRY(find_file(AT_FDCWD, path, O_EXEC));
auto executable_inode = executable_file.inode;
BAN::Vector<BAN::String> str_argv;
for (int i = 0; argv && argv[i]; i++)
{
TRY(validate_pointer_access(argv + i, sizeof(char*), false));
TRY(validate_string_access(argv[i]));
TRY(str_argv.emplace_back(argv[i]));
TRY(str_argv.emplace_back());
TRY(str_argv.back().append(argv[i]));
}
BAN::Vector<BAN::String> str_envp;
for (int i = 0; envp && envp[i]; i++)
{
TRY(validate_pointer_access(envp + 1, sizeof(char*), false));
TRY(validate_pointer_access(envp + i, sizeof(char*), false));
TRY(validate_string_access(envp[i]));
TRY(str_envp.emplace_back(envp[i]));
TRY(str_envp.emplace_back());
TRY(str_envp.back().append(envp[i]));
}
auto executable_file = TRY(find_file(AT_FDCWD, path, O_EXEC));
auto executable_inode = executable_file.inode;
auto executable = TRY(ELF::load_from_inode(executable_inode, m_credentials, *new_page_table));
auto new_mapped_regions = BAN::move(executable.regions);
int file_fd = -1;
if (executable.has_interpreter)
BAN::Vector<LibELF::AuxiliaryVector> auxiliary_vector;
TRY(auxiliary_vector.reserve(1 + executable.open_execfd));
BAN::ScopeGuard execfd_guard([this, &auxiliary_vector] {
if (auxiliary_vector.empty())
return;
if (auxiliary_vector.front().a_type != LibELF::AT_EXECFD)
return;
MUST(m_open_file_descriptors.close(auxiliary_vector.front().a_un.a_val));
});
if (executable.open_execfd)
{
VirtualFileSystem::File file;
file.canonical_path = BAN::move(executable_file.canonical_path);
file.inode = executable_inode;
file_fd = TRY(m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
const int execfd = TRY(m_open_file_descriptors.open(BAN::move(executable_file), O_RDONLY));
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_EXECFD,
.a_un = { .a_val = static_cast<uint32_t>(execfd) },
}));
}
BAN::ScopeGuard file_closer([&] { if (file_fd != -1) MUST(m_open_file_descriptors.close(file_fd)); });
// allocate memory on the new process for arguments and environment
auto create_region =
[&](BAN::Span<BAN::String> container) -> BAN::ErrorOr<BAN::UniqPtr<MemoryRegion>>
{
size_t bytes = sizeof(char*);
for (auto& elem : container)
bytes += sizeof(char*) + elem.size() + 1;
if (auto rem = bytes % PAGE_SIZE)
bytes += PAGE_SIZE - rem;
auto region = TRY(MemoryBackedRegion::create(
*new_page_table,
bytes,
{ .start = executable.entry_point, .end = KERNEL_OFFSET },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present
));
size_t data_offset = sizeof(char*) * (container.size() + 1);
for (size_t i = 0; i < container.size(); i++)
{
uintptr_t ptr_addr = region->vaddr() + data_offset;
TRY(region->copy_data_to_region(sizeof(char*) * i, (const uint8_t*)&ptr_addr, sizeof(char*)));
TRY(region->copy_data_to_region(data_offset, (const uint8_t*)container[i].data(), container[i].size()));
data_offset += container[i].size() + 1;
}
uintptr_t null = 0;
TRY(region->copy_data_to_region(sizeof(char*) * container.size(), (const uint8_t*)&null, sizeof(char*)));
return BAN::UniqPtr<MemoryRegion>(BAN::move(region));
};
TRY(new_mapped_regions.reserve(new_mapped_regions.size() + 2));
MUST(new_mapped_regions.push_back(TRY(create_region(str_argv.span()))));
MUST(new_mapped_regions.push_back(TRY(create_region(str_envp.span()))));
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_NULL,
.a_un = { .a_val = 0 },
}));
auto* new_thread = TRY(Thread::create_userspace(this, *new_page_table));
TRY(new_thread->initialize_userspace(
executable.entry_point,
str_argv.span(),
str_envp.span(),
auxiliary_vector.span()
));
if (executable.master_tls.has_value())
{
auto tls_result = TRY(initialize_thread_local_storage(*new_page_table, *executable.master_tls));
TRY(new_mapped_regions.emplace_back(BAN::move(tls_result.region)));
new_thread->set_tls(tls_result.addr);
}
ASSERT(Processor::get_interrupt_state() == InterruptState::Enabled);
Processor::set_interrupt_state(InterruptState::Disabled);
@ -655,8 +691,8 @@ namespace Kernel
m_threads.front()->m_process = nullptr;
m_threads.front()->give_keep_alive_page_table(BAN::move(m_page_table));
MUST(Processor::scheduler().add_thread(new_thread));
m_threads.front() = new_thread;
MUST(Processor::scheduler().add_thread(m_threads.front()));
for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++)
{
@ -673,21 +709,13 @@ namespace Kernel
m_mapped_regions = BAN::move(new_mapped_regions);
m_page_table = BAN::move(new_page_table);
file_closer.disable();
m_userspace_info.argc = str_argv.size();
m_userspace_info.argv = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 2]->vaddr());
m_userspace_info.envp = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 1]->vaddr());
m_userspace_info.entry = executable.entry_point;
m_userspace_info.file_fd = file_fd;
execfd_guard.disable();
m_cmdline = BAN::move(str_argv);
m_environ = BAN::move(str_envp);
}
m_has_called_exec = true;
m_threads.front()->setup_exec();
Processor::yield();
ASSERT_NOT_REACHED();
}
@ -1022,6 +1050,11 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_read(int fd, void* buffer, size_t count)
{
LockGuard _(m_process_lock);
if (count == 0)
{
TRY(m_open_file_descriptors.inode_of(fd));
return 0;
}
TRY(validate_pointer_access(buffer, count, true));
return TRY(m_open_file_descriptors.read(fd, BAN::ByteSpan((uint8_t*)buffer, count)));
}
@ -1136,6 +1169,11 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_pread(int fd, void* buffer, size_t count, off_t offset)
{
LockGuard _(m_process_lock);
if (count == 0)
{
TRY(m_open_file_descriptors.inode_of(fd));
return 0;
}
TRY(validate_pointer_access(buffer, count, true));
auto inode = TRY(m_open_file_descriptors.inode_of(fd));
return TRY(inode->read(offset, { (uint8_t*)buffer, count }));
@ -1321,16 +1359,8 @@ namespace Kernel
TRY(validate_pointer_access(arguments->message, arguments->length, false));
TRY(validate_pointer_access(arguments->dest_addr, arguments->dest_len, false));
auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket));
if (!inode->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
const auto status_flags = TRY(m_open_file_descriptors.status_flags_of(arguments->socket));
if ((status_flags & O_NONBLOCK) && !inode->can_write())
return BAN::Error::from_errno(EAGAIN);
BAN::ConstByteSpan message { reinterpret_cast<const uint8_t*>(arguments->message), arguments->length };
return TRY(inode->sendto(message, arguments->dest_addr, arguments->dest_len));
auto message = BAN::ConstByteSpan(static_cast<const uint8_t*>(arguments->message), arguments->length);
return TRY(m_open_file_descriptors.sendto(arguments->socket, message, arguments->dest_addr, arguments->dest_len));
}
BAN::ErrorOr<long> Process::sys_recvfrom(sys_recvfrom_t* arguments)
@ -1349,16 +1379,8 @@ namespace Kernel
TRY(validate_pointer_access(arguments->address, *arguments->address_len, true));
}
auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket));
if (!inode->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
const auto status_flags = TRY(m_open_file_descriptors.status_flags_of(arguments->socket));
if ((status_flags & O_NONBLOCK) && !inode->can_read())
return BAN::Error::from_errno(EAGAIN);
BAN::ByteSpan buffer { reinterpret_cast<uint8_t*>(arguments->buffer), arguments->length };
return TRY(inode->recvfrom(buffer, arguments->address, arguments->address_len));
auto message = BAN::ByteSpan(static_cast<uint8_t*>(arguments->buffer), arguments->length);
return TRY(m_open_file_descriptors.recvfrom(arguments->socket, message, arguments->address, arguments->address_len));
}
BAN::ErrorOr<long> Process::sys_ioctl(int fildes, int request, void* arg)
@ -1675,7 +1697,7 @@ namespace Kernel
else
page_flags |= PageTable::Flags::UserSupervisor;
AddressRange address_range { .start = 0x400000, .end = KERNEL_OFFSET };
AddressRange address_range { .start = 0x400000, .end = USERSPACE_END };
if (args->flags & MAP_FIXED)
{
vaddr_t base_addr = reinterpret_cast<vaddr_t>(args->addr);
@ -1820,7 +1842,7 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_smo_map(SharedMemoryObjectManager::Key key)
{
auto region = TRY(SharedMemoryObjectManager::get().map_object(key, page_table(), { .start = 0x400000, .end = KERNEL_OFFSET }));
auto region = TRY(SharedMemoryObjectManager::get().map_object(key, page_table(), { .start = 0x400000, .end = USERSPACE_END }));
LockGuard _(m_process_lock);
TRY(m_mapped_regions.push_back(BAN::move(region)));
@ -2090,12 +2112,28 @@ namespace Kernel
return 0;
}
BAN::ErrorOr<long> Process::sys_pthread_create(const pthread_attr_t* __restrict attr, void (*entry)(void*), void* arg)
BAN::ErrorOr<long> Process::sys_set_tls(void* addr)
{
if (attr != nullptr)
Thread::current().set_tls(reinterpret_cast<vaddr_t>(addr));
Processor::load_tls();
return 0;
}
BAN::ErrorOr<long> Process::sys_get_tls()
{
return Thread::current().get_tls();
}
BAN::ErrorOr<long> Process::sys_pthread_create(const pthread_attr_t* attr, void (*entry)(void*), void* arg)
{
if (attr)
{
dwarnln("pthread attr not supported");
return BAN::Error::from_errno(ENOTSUP);
TRY(validate_pointer_access(attr, sizeof(*attr), false));
if (*attr)
{
dwarnln("pthread attr not supported");
return BAN::Error::from_errno(ENOTSUP);
}
}
LockGuard _(m_process_lock);
@ -2575,7 +2613,7 @@ namespace Kernel
goto unauthorized_access;
// trying to access kernel space memory
if (vaddr + size > KERNEL_OFFSET)
if (vaddr + size > USERSPACE_END)
goto unauthorized_access;
if (vaddr == 0)

View File

@ -10,7 +10,10 @@ extern Kernel::TerminalDriver* g_terminal_driver;
namespace Kernel
{
#if ARCH(x86_64)
static constexpr uint32_t MSR_IA32_FS_BASE = 0xC0000100;
static constexpr uint32_t MSR_IA32_GS_BASE = 0xC0000101;
#endif
ProcessorID Processor::s_bsb_id { PROCESSOR_NONE };
BAN::Atomic<uint8_t> Processor::s_processor_count { 0 };
@ -260,6 +263,18 @@ namespace Kernel
set_interrupt_state(state);
}
void Processor::load_tls()
{
const auto addr = scheduler().current_thread().get_tls();
#if ARCH(x86_64)
uint32_t ptr_hi = addr >> 32;
uint32_t ptr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_FS_BASE));
#elif ARCH(i686)
gdt().set_tls(addr);
#endif
}
void Processor::send_smp_message(ProcessorID processor_id, const SMPMessage& message, bool send_ipi)
{
ASSERT(processor_id != current_id());

View File

@ -278,6 +278,7 @@ namespace Kernel
thread->m_state = Thread::State::Executing;
Processor::gdt().set_tss_stack(thread->kernel_stack_top());
Processor::load_tls();
*interrupt_stack = thread->interrupt_stack();
*interrupt_registers = thread->interrupt_registers();

View File

@ -44,11 +44,7 @@ namespace Kernel
BAN::ErrorOr<long> ret = BAN::Error::from_errno(ENOSYS);
const char* process_path = nullptr;
if (Process::current().userspace_info().argc > 0 && Process::current().userspace_info().argv)
process_path = Process::current().userspace_info().argv[0];
if (process_path == nullptr)
process_path = "<null>";
const char* process_path = Process::current().name();
#if DUMP_ALL_SYSCALLS
dprintln("{} pid {}: {}", process_path, Process::current().pid(), s_syscall_names[syscall]);

View File

@ -29,11 +29,6 @@ namespace Kernel
return Thread::current().interrupt_stack().sp;
}
extern "C" uintptr_t get_userspace_thread_stack_top()
{
return Thread::current().userspace_stack_top() - 4 * sizeof(uintptr_t);
}
extern "C" void load_thread_sse()
{
Thread::current().load_sse();
@ -108,7 +103,7 @@ namespace Kernel
thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range(
page_table,
0x200000, KERNEL_OFFSET,
0x200000, USERSPACE_END,
kernel_stack_size,
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
@ -116,7 +111,7 @@ namespace Kernel
thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range(
page_table,
0x200000, KERNEL_OFFSET,
0x200000, USERSPACE_END,
userspace_stack_size,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
@ -176,11 +171,12 @@ namespace Kernel
save_sse();
memcpy(thread->m_sse_storage, m_sse_storage, sizeof(m_sse_storage));
thread->setup_exec_impl(
reinterpret_cast<uintptr_t>(entry),
reinterpret_cast<uintptr_t>(arg),
0, 0, 0
);
PageTable::with_fast_page(thread->userspace_stack().paddr_of(thread->userspace_stack_top() - PAGE_SIZE), [=] {
PageTable::fast_page_as<void*>(PAGE_SIZE - sizeof(uintptr_t)) = arg;
});
const vaddr_t entry_addr = reinterpret_cast<vaddr_t>(entry);
thread->setup_exec(entry_addr, thread->userspace_stack_top() - sizeof(uintptr_t));
return thread;
}
@ -200,6 +196,8 @@ namespace Kernel
thread->m_kernel_stack = TRY(m_kernel_stack->clone(new_process->page_table()));
thread->m_userspace_stack = TRY(m_userspace_stack->clone(new_process->page_table()));
thread->m_tls = m_tls;
thread->m_state = State::NotStarted;
thread->m_interrupt_stack.ip = ip;
@ -222,21 +220,112 @@ namespace Kernel
return thread;
}
void Thread::setup_exec()
BAN::ErrorOr<void> Thread::initialize_userspace(vaddr_t entry, BAN::Span<BAN::String> argv, BAN::Span<BAN::String> envp, BAN::Span<LibELF::AuxiliaryVector> auxv)
{
const auto& userspace_info = process().userspace_info();
ASSERT(userspace_info.entry);
// System V ABI: Initial process stack
setup_exec_impl(
userspace_info.entry,
userspace_info.argc,
reinterpret_cast<uintptr_t>(userspace_info.argv),
reinterpret_cast<uintptr_t>(userspace_info.envp),
userspace_info.file_fd
);
ASSERT(m_is_userspace);
ASSERT(m_userspace_stack);
size_t needed_size = 0;
// argc
needed_size += sizeof(uintptr_t);
// argv
needed_size += (argv.size() + 1) * sizeof(uintptr_t);
for (auto arg : argv)
needed_size += arg.size() + 1;
// envp
needed_size += (envp.size() + 1) * sizeof(uintptr_t);
for (auto env : envp)
needed_size += env.size() + 1;
// auxv
needed_size += auxv.size() * sizeof(LibELF::AuxiliaryVector);
if (needed_size > m_userspace_stack->size())
return BAN::Error::from_errno(ENOBUFS);
vaddr_t vaddr = userspace_stack_top() - needed_size;
const auto stack_copy_buf =
[this](BAN::ConstByteSpan buffer, vaddr_t vaddr) -> void
{
ASSERT(vaddr + buffer.size() <= userspace_stack_top());
size_t bytes_copied = 0;
while (bytes_copied < buffer.size())
{
const size_t to_copy = BAN::Math::min<size_t>(buffer.size() - bytes_copied, PAGE_SIZE - (vaddr % PAGE_SIZE));
PageTable::with_fast_page(userspace_stack().paddr_of(vaddr & PAGE_ADDR_MASK), [=]() {
memcpy(PageTable::fast_page_as_ptr(vaddr % PAGE_SIZE), buffer.data() + bytes_copied, to_copy);
});
vaddr += to_copy;
bytes_copied += to_copy;
}
};
const auto stack_push_buf =
[&stack_copy_buf, &vaddr](BAN::ConstByteSpan buffer) -> void
{
stack_copy_buf(buffer, vaddr);
vaddr += buffer.size();
};
const auto stack_push_uint =
[&stack_push_buf](uintptr_t value) -> void
{
stack_push_buf(BAN::ConstByteSpan::from(value));
};
const auto stack_push_str =
[&stack_push_buf](BAN::StringView string) -> void
{
const uint8_t* string_u8 = reinterpret_cast<const uint8_t*>(string.data());
stack_push_buf(BAN::ConstByteSpan(string_u8, string.size() + 1));
};
// argc
stack_push_uint(argv.size());
// argv
const vaddr_t argv_vaddr = vaddr;
vaddr += argv.size() * sizeof(uintptr_t);
stack_push_uint(0);
// envp
const vaddr_t envp_vaddr = vaddr;
vaddr += envp.size() * sizeof(uintptr_t);
stack_push_uint(0);
// auxv
for (auto aux : auxv)
stack_push_buf(BAN::ConstByteSpan::from(aux));
// information
for (size_t i = 0; i < argv.size(); i++)
{
stack_copy_buf(BAN::ConstByteSpan::from(vaddr), argv_vaddr + i * sizeof(uintptr_t));
stack_push_str(argv[i]);
}
for (size_t i = 0; i < envp.size(); i++)
{
stack_copy_buf(BAN::ConstByteSpan::from(vaddr), envp_vaddr + i * sizeof(uintptr_t));
stack_push_str(envp[i]);
}
ASSERT(vaddr == userspace_stack_top());
setup_exec(entry, userspace_stack_top() - needed_size);
return {};
}
void Thread::setup_exec_impl(uintptr_t entry, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
void Thread::setup_exec(vaddr_t ip, vaddr_t sp)
{
ASSERT(is_userspace());
m_state = State::NotStarted;
@ -244,13 +333,13 @@ namespace Kernel
// Signal mask is inherited
// Initialize stack for returning
PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, entry);
write_to_stack(sp, arg3);
write_to_stack(sp, arg2);
write_to_stack(sp, arg1);
write_to_stack(sp, arg0);
PageTable::with_fast_page(kernel_stack().paddr_of(kernel_stack_top() - PAGE_SIZE), [=] {
uintptr_t cur_sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(cur_sp, 0x20 | 3);
write_to_stack(cur_sp, sp);
write_to_stack(cur_sp, 0x202);
write_to_stack(cur_sp, 0x18 | 3);
write_to_stack(cur_sp, ip);
});
m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_userspace_thread);
@ -286,7 +375,7 @@ namespace Kernel
m_signal_pending_mask = 0;
m_signal_block_mask = ~0ull;
PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] {
PageTable::with_fast_page(kernel_stack().paddr_of(kernel_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, this);
write_to_stack(sp, &Thread::on_exit_trampoline);

View File

@ -58,6 +58,10 @@ target_compile_definitions(objlibc PRIVATE __arch=${BANAN_ARCH})
target_compile_options(objlibc PRIVATE -O2 -g -Wstack-usage=512 -fno-exceptions -fpic -nolibc)
target_compile_options(objlibc PUBLIC -Wall -Wextra -Werror -Wno-error=stack-usage=)
if("${BANAN_ARCH}" STREQUAL "i686")
target_compile_definitions(objlibc PRIVATE __disable_thread_local_storage)
endif()
function(add_crtx crtx)
add_custom_target(${crtx}
COMMAND ${CMAKE_CXX_COMPILER} -c -o ${CMAKE_INSTALL_LIBDIR}/${crtx}.o ${CMAKE_CURRENT_SOURCE_DIR}/arch/${BANAN_ARCH}/${crtx}.S
@ -71,6 +75,7 @@ add_crtx(crtn)
banan_include_headers(objlibc ban)
banan_include_headers(objlibc kernel)
banan_include_headers(objlibc libelf)
banan_install_headers(objlibc)

View File

@ -2,40 +2,38 @@
.global _start
_start:
pushl $0
pushl %edi
pushl %esi
pushl %edx
# STACK LAYOUT
# null
# argc
# argv
# envp
# get argc, argv, envp
movl (%esp), %edi
leal 4(%esp), %esi
leal 4(%esi, %edi, 4), %edx
# align stack
andl $-16, %esp
xorl %ebp, %ebp
# init libc (envp already as argument)
# save argc, argv, envp
subl $4, %esp
pushl %edx
pushl %esi
pushl %edi
subl $4, %esp
pushl $__fini_array_end
pushl $__fini_array_start
pushl $_fini
pushl $__init_array_end
pushl $__init_array_start
pushl $_init
pushl %edx
call _init_libc
# call global constructors
movl $_init, %eax
testl %eax, %eax
jz 1f
call *%eax
1:
addl $(4 * 8), %esp
movl $__init_array_start, %ebx
jmp 2f
1: call *(%ebx)
addl $4, %ebx
2: cmpl $__init_array_end, %ebx
jne 1b
# call main
movl 0(%esp), %eax
xchgl %eax, 8(%esp)
movl %eax, (%esp)
# argc, argv, envp already on stack
call main
subl $12, %esp

View File

@ -2,41 +2,40 @@
.global _start
_start:
pushq $0
# get argc, argv, envp
movq (%rsp), %rdi
leaq 8(%rsp), %rsi
leaq 8(%rsi, %rdi, 8), %rdx
# align stack
andq $-16, %rsp
xorq %rbp, %rbp
# save argc, argv, envp
subq $8, %rsp
pushq %rdi
pushq %rsi
pushq %rdx
# STACK LAYOUT
# null
# argc
# argv
# envp
movq %rdx, %rdi # environ
xorq %rbp, %rbp
pushq $__fini_array_end
pushq $__fini_array_start
pushq $_fini
pushq $__init_array_end
pushq $__init_array_start
pushq $_init
# init libc
movq 0(%rsp), %rdi
call _init_libc
# call global constructors
movq $_init, %rax
testq %rax, %rax
jz 1f
call *%rax
1:
movq $__init_array_start, %rbx
jmp 2f
1: call *(%rbx)
addq $8, %rbx
2: cmpq $__init_array_end, %rbx
jne 1b
addq $(6 * 8), %rsp
# call main
movq 16(%rsp), %rdi
movq 8(%rsp), %rsi
movq 0(%rsp), %rdx
popq %rdx
popq %rsi
popq %rdi
addq $8, %rsp
call main
# call exit

View File

@ -1,6 +1,10 @@
#include <errno.h>
#if __disable_thread_local_storage
static int s_errno = 0;
#else
static thread_local int s_errno = 0;
#endif
int* __errno_location()
{

View File

@ -1,41 +1,39 @@
#include <BAN/Assert.h>
#include <stdint.h>
#include <stddef.h>
#define ATEXIT_MAX_FUNCS 128
struct atexit_func_entry_t
{
void(*func)(void*);
void (*func)(void*);
void* arg;
void* dso_handle;
};
static atexit_func_entry_t __atexit_funcs[ATEXIT_MAX_FUNCS];
static size_t __atexit_func_count = 0;
static atexit_func_entry_t s_atexit_funcs[ATEXIT_MAX_FUNCS];
static size_t s_atexit_func_count = 0;
extern "C" int __cxa_atexit(void(*func)(void*), void* arg, void* dso_handle)
{
if (__atexit_func_count >= ATEXIT_MAX_FUNCS)
if (s_atexit_func_count >= ATEXIT_MAX_FUNCS)
return -1;
auto& atexit_func = __atexit_funcs[__atexit_func_count++];
atexit_func.func = func;
atexit_func.arg = arg;
atexit_func.dso_handle = dso_handle;
s_atexit_funcs[s_atexit_func_count++] = {
.func = func,
.arg = arg,
.dso_handle = dso_handle,
};
return 0;
};
extern "C" void __cxa_finalize(void* f)
extern "C" void __cxa_finalize(void* dso_handle)
{
for (size_t i = __atexit_func_count; i > 0; i--)
for (size_t i = s_atexit_func_count; i > 0; i--)
{
auto& atexit_func = __atexit_funcs[i - 1];
auto& atexit_func = s_atexit_funcs[i - 1];
if (atexit_func.func == nullptr)
continue;
if (f == nullptr || f == atexit_func.func)
{
atexit_func.func(atexit_func.arg);
atexit_func.func = nullptr;
}
if (dso_handle && dso_handle != atexit_func.dso_handle)
continue;
atexit_func.func(atexit_func.arg);
atexit_func.func = nullptr;
}
};

View File

@ -54,6 +54,7 @@ struct hostent
int h_addrtype; /* Address type. */
int h_length; /* The length, in bytes, of the address. */
char** h_addr_list; /* A pointer to an array of pointers to network addresses (in network byte order) for the host, terminated by a null pointer. */
#define h_addr h_addr_list[0] /* Backwards compatibility */
};
struct netent

View File

@ -8,6 +8,7 @@
__BEGIN_DECLS
#include <sched.h>
#include <stdint.h>
#include <time.h>
#define __need_size_t
@ -27,6 +28,14 @@ __BEGIN_DECLS
#define __need_pthread_t
#include <sys/types.h>
struct uthread
{
struct uthread* self;
void* master_tls_addr;
size_t master_tls_size;
uintptr_t dtv[];
};
#define PTHREAD_BARRIER_SERIAL_THREAD 1
#define PTHREAD_CANCEL_ASYNCHRONOUS 2
#define PTHREAD_CANCEL_ENABLE 3

View File

@ -91,6 +91,8 @@ __BEGIN_DECLS
O(SYS_SYMLINKAT, symlinkat) \
O(SYS_HARDLINKAT, hardlinkat) \
O(SYS_YIELD, yield) \
O(SYS_SET_TLS, set_tls) \
O(SYS_GET_TLS, get_tls) \
O(SYS_PTHREAD_CREATE, pthread_create) \
O(SYS_PTHREAD_EXIT, pthread_exit) \
O(SYS_PTHREAD_JOIN, pthread_join) \

View File

@ -2,6 +2,7 @@
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@ -50,19 +51,29 @@ struct malloc_pool_t
bool contains(malloc_node_t* node) { return start <= (uint8_t*)node && (uint8_t*)node < end(); }
};
static malloc_pool_t s_malloc_pools[s_malloc_pool_count];
void _init_malloc()
struct malloc_info_t
{
size_t pool_size = s_malloc_pool_size_initial;
for (size_t i = 0; i < s_malloc_pool_count; i++)
consteval malloc_info_t()
{
s_malloc_pools[i].start = nullptr;
s_malloc_pools[i].size = pool_size;
s_malloc_pools[i].free_list = nullptr;;
pool_size *= s_malloc_pool_size_multiplier;
size_t pool_size = s_malloc_pool_size_initial;
for (auto& pool : pools)
{
pool = {
.start = nullptr,
.size = pool_size,
.free_list = nullptr,
};
pool_size *= s_malloc_pool_size_multiplier;
}
}
}
malloc_pool_t pools[s_malloc_pool_count];
};
static malloc_info_t s_malloc_info;
static auto& s_malloc_pools = s_malloc_info.pools;
static pthread_spinlock_t s_malloc_lock;
static bool allocate_pool(size_t pool_index)
{
@ -192,19 +203,31 @@ void* malloc(size_t size)
// try to find any already existing pools that we can allocate in
for (size_t i = first_usable_pool; i < s_malloc_pool_count; i++)
if (s_malloc_pools[i].start != nullptr)
if (void* ret = allocate_from_pool(i, size))
return ret;
{
if (s_malloc_pools[i].start == nullptr)
continue;
pthread_spin_lock(&s_malloc_lock);
void* ret = allocate_from_pool(i, size);
pthread_spin_unlock(&s_malloc_lock);
if (ret != nullptr)
return ret;
}
// allocate new pool
for (size_t i = first_usable_pool; i < s_malloc_pool_count; i++)
{
if (s_malloc_pools[i].start != nullptr)
continue;
if (!allocate_pool(i))
pthread_spin_lock(&s_malloc_lock);
void* ret = nullptr;
if (allocate_pool(i))
ret = allocate_from_pool(i, size);
pthread_spin_unlock(&s_malloc_lock);
if (ret == nullptr)
break;
// NOTE: always works since we just created the pool
return allocate_from_pool(i, size);
return ret;
}
errno = ENOMEM;
@ -250,6 +273,8 @@ void free(void* ptr)
if (ptr == nullptr)
return;
pthread_spin_lock(&s_malloc_lock);
auto* node = node_from_data_pointer(ptr);
node->allocated = false;
@ -271,6 +296,8 @@ void free(void* ptr)
node->prev_free = nullptr;
node->next_free = pool.free_list;
pool.free_list = node;
pthread_spin_unlock(&s_malloc_lock);
}
void* calloc(size_t nmemb, size_t size)

View File

@ -2,55 +2,195 @@
#include <BAN/Atomic.h>
#include <BAN/PlacementNew.h>
#include <kernel/Arch.h>
#include <errno.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
struct pthread_trampoline_info_t
{
struct uthread* uthread;
void* (*start_routine)(void*);
void* arg;
};
// stack is 16 byte aligned on entry, this `call` is used to align it
extern "C" void pthread_trampoline(void*);
asm("pthread_trampoline: call pthread_trampoline_cpp");
extern "C" void _pthread_trampoline(void*);
asm(
#if ARCH(x86_64)
"_pthread_trampoline:"
"popq %rdi;"
"andq $-16, %rsp;"
"xorq %rbp, %rbp;"
"call _pthread_trampoline_cpp"
#elif ARCH(i686)
"_pthread_trampoline:"
"ud2;"
"popl %edi;"
"andl $-16, %esp;"
"xorl %ebp, %ebp;"
"subl $12, %esp;"
"pushl %edi;"
"call _pthread_trampoline_cpp"
#endif
);
extern "C" void pthread_trampoline_cpp(void* arg)
extern "C" void _pthread_trampoline_cpp(void* arg)
{
pthread_trampoline_info_t info;
memcpy(&info, arg, sizeof(pthread_trampoline_info_t));
auto info = *reinterpret_cast<pthread_trampoline_info_t*>(arg);
syscall(SYS_SET_TLS, info.uthread);
free(arg);
pthread_exit(info.start_routine(info.arg));
ASSERT_NOT_REACHED();
}
int pthread_create(pthread_t* __restrict thread, const pthread_attr_t* __restrict attr, void* (*start_routine)(void*), void* __restrict arg)
static uthread* get_uthread()
{
uthread* result;
#if ARCH(x86_64)
asm volatile("movq %%fs:0, %0" : "=r"(result));
#elif ARCH(i686)
asm volatile("movl %%gs:0, %0" : "=r"(result));
#endif
return result;
}
static void free_uthread(uthread* uthread)
{
if (uthread->dtv[0] == 0)
return free(uthread);
uint8_t* tls_addr = reinterpret_cast<uint8_t*>(uthread) - uthread->master_tls_size;
const size_t tls_size = uthread->master_tls_size
+ sizeof(struct uthread)
+ (uthread->dtv[0] + 1) * sizeof(uintptr_t);
munmap(tls_addr, tls_size);
}
#if not __disable_thread_local_storage
struct pthread_cleanup_t
{
void (*routine)(void*);
void* arg;
pthread_cleanup_t* next;
};
static thread_local pthread_cleanup_t* s_cleanup_stack = nullptr;
void pthread_cleanup_pop(int execute)
{
ASSERT(s_cleanup_stack);
auto* cleanup = s_cleanup_stack;
s_cleanup_stack = cleanup->next;
if (execute)
cleanup->routine(cleanup->arg);
free(cleanup);
}
void pthread_cleanup_push(void (*routine)(void*), void* arg)
{
auto* cleanup = static_cast<pthread_cleanup_t*>(malloc(sizeof(pthread_cleanup_t)));
ASSERT(cleanup);
cleanup->routine = routine;
cleanup->arg = arg;
cleanup->next = s_cleanup_stack;
s_cleanup_stack = cleanup;
}
#endif
int pthread_attr_init(pthread_attr_t* attr)
{
*attr = 0;
return 0;
}
int pthread_create(pthread_t* __restrict thread_id, const pthread_attr_t* __restrict attr, void* (*start_routine)(void*), void* __restrict arg)
{
auto* info = static_cast<pthread_trampoline_info_t*>(malloc(sizeof(pthread_trampoline_info_t)));
if (info == nullptr)
return -1;
info->start_routine = start_routine;
info->arg = arg;
return errno;
const auto ret = syscall(SYS_PTHREAD_CREATE, attr, pthread_trampoline, info);
if (ret == -1)
*info = {
.uthread = nullptr,
.start_routine = start_routine,
.arg = arg,
};
long syscall_ret = 0;
if (uthread* self = get_uthread(); self->master_tls_addr == nullptr)
{
free(info);
return -1;
uthread* uthread = static_cast<struct uthread*>(malloc(sizeof(struct uthread) + sizeof(uintptr_t)));
if (uthread == nullptr)
goto pthread_create_error;
uthread->self = uthread;
uthread->master_tls_addr = nullptr;
uthread->master_tls_size = 0;
uthread->dtv[0] = 0;
info->uthread = uthread;
}
else
{
const size_t module_count = self->dtv[0];
const size_t tls_size = self->master_tls_size
+ sizeof(uthread)
+ (module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr = static_cast<uint8_t*>(mmap(nullptr, tls_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (tls_addr == MAP_FAILED)
goto pthread_create_error;
memcpy(tls_addr, self->master_tls_addr, self->master_tls_size);
uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + self->master_tls_size);
uthread->self = uthread;
uthread->master_tls_addr = self->master_tls_addr;
uthread->master_tls_size = self->master_tls_size;
const uintptr_t self_addr = reinterpret_cast<uintptr_t>(self);
const uintptr_t uthread_addr = reinterpret_cast<uintptr_t>(uthread);
uthread->dtv[0] = module_count;
for (size_t i = 1; i <= module_count; i++)
uthread->dtv[i] = self->dtv[i] - self_addr + uthread_addr;
info->uthread = uthread;
}
if (thread)
*thread = ret;
syscall_ret = syscall(SYS_PTHREAD_CREATE, attr, _pthread_trampoline, info);
if (syscall_ret == -1)
goto pthread_create_error;
if (thread_id)
*thread_id = syscall_ret;
return 0;
pthread_create_error:
const int return_code = errno;
if (info->uthread)
free_uthread(info->uthread);
free(info);
return return_code;
}
void pthread_exit(void* value_ptr)
{
#if not __disable_thread_local_storage
while (s_cleanup_stack)
pthread_cleanup_pop(1);
#endif
free_uthread(get_uthread());
syscall(SYS_PTHREAD_EXIT, value_ptr);
ASSERT_NOT_REACHED();
}
@ -62,7 +202,14 @@ int pthread_join(pthread_t thread, void** value_ptr)
pthread_t pthread_self(void)
{
#if __disable_thread_local_storage
return syscall(SYS_PTHREAD_SELF);
#else
static thread_local pthread_t s_pthread_self { -1 };
if (s_pthread_self == -1) [[unlikely]]
s_pthread_self = syscall(SYS_PTHREAD_SELF);
return s_pthread_self;
#endif
}
static inline BAN::Atomic<pthread_t>& pthread_spin_get_atomic(pthread_spinlock_t* lock)
@ -123,3 +270,21 @@ int pthread_spin_unlock(pthread_spinlock_t* lock)
atomic.store(0, BAN::MemoryOrder::memory_order_release);
return 0;
}
struct tls_index
{
unsigned long int ti_module;
unsigned long int ti_offset;
};
extern "C" void* __tls_get_addr(tls_index* ti)
{
return reinterpret_cast<void*>(get_uthread()->dtv[ti->ti_module] + ti->ti_offset);
}
#if ARCH(i686)
extern "C" void* __attribute__((__regparm__(1))) ___tls_get_addr(tls_index* ti)
{
return reinterpret_cast<void*>(get_uthread()->dtv[ti->ti_module] + ti->ti_offset);
}
#endif

View File

@ -94,7 +94,8 @@ static int drop_read_buffer(FILE* file)
return 0;
}
void _init_stdio()
__attribute__((constructor))
static void _init_stdio()
{
for (size_t i = 0; i < FOPEN_MAX; i++)
{

View File

@ -14,16 +14,14 @@
#include <strings.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/weak_alias.h>
#include <unistd.h>
#include <icxxabi.h>
extern "C" char** environ;
extern "C" __attribute__((weak)) void _fini();
static void (*at_exit_funcs[64])();
static uint32_t at_exit_funcs_count = 0;
char** __environ;
weak_alias(__environ, environ);
static bool s_environ_malloced = false;
void abort(void)
{
@ -41,11 +39,8 @@ void abort(void)
void exit(int status)
{
for (uint32_t i = at_exit_funcs_count; i > 0; i--)
at_exit_funcs[i - 1]();
fflush(nullptr);
__cxa_finalize(nullptr);
if (_fini) _fini();
_exit(status);
ASSERT_NOT_REACHED();
}
@ -62,13 +57,8 @@ int abs(int val)
int atexit(void (*func)(void))
{
if (at_exit_funcs_count > sizeof(at_exit_funcs) / sizeof(*at_exit_funcs))
{
errno = ENOBUFS;
return -1;
}
at_exit_funcs[at_exit_funcs_count++] = func;
return 0;
void* func_addr = reinterpret_cast<void*>(func);
return __cxa_atexit([](void* func_ptr) { reinterpret_cast<void (*)(void)>(func_ptr)(); }, func_addr, nullptr);
}
static constexpr int get_base_digit(char c, int base)
@ -516,27 +506,35 @@ int putenv(char* string)
return -1;
}
if (!environ)
if (!s_environ_malloced)
{
environ = (char**)malloc(sizeof(char*) * 2);
if (!environ)
size_t env_count = 0;
while (environ[env_count])
env_count++;
char** new_environ = static_cast<char**>(malloc((env_count + 1) * sizeof(char*)));
if (new_environ == nullptr)
return -1;
environ[0] = string;
environ[1] = nullptr;
return 0;
for (size_t i = 0; i < env_count; i++)
{
const size_t bytes = strlen(environ[i]) + 1;
new_environ[i] = (char*)malloc(bytes);
memcpy(new_environ[i], environ[i], bytes);
}
new_environ[env_count] = nullptr;
environ = new_environ;
s_environ_malloced = true;
}
int cnt = 0;
for (int i = 0; string[i]; i++)
if (string[i] == '=')
cnt++;
if (cnt != 1)
const char* eq_addr = strchr(string, '=');
if (eq_addr == nullptr)
{
errno = EINVAL;
return -1;
}
int namelen = strchr(string, '=') - string;
size_t namelen = eq_addr - string;
for (int i = 0; environ[i]; i++)
{
if (strncmp(environ[i], string, namelen + 1) == 0)
@ -547,15 +545,15 @@ int putenv(char* string)
}
}
int env_count = 0;
size_t env_count = 0;
while (environ[env_count])
env_count++;
char** new_envp = (char**)malloc(sizeof(char*) * (env_count + 2));
char** new_envp = static_cast<char**>(malloc(sizeof(char*) * (env_count + 2)));
if (new_envp == nullptr)
return -1;
for (int i = 0; i < env_count; i++)
for (size_t i = 0; i < env_count; i++)
new_envp[i] = environ[i];
new_envp[env_count] = string;
new_envp[env_count + 1] = nullptr;

View File

@ -17,6 +17,11 @@ int gettimeofday(struct timeval* __restrict tp, void* __restrict tzp)
return 0;
}
int getitimer(int which, struct itimerval* value)
{
return setitimer(which, nullptr, value);
}
int setitimer(int which, const struct itimerval* __restrict value, struct itimerval* __restrict ovalue)
{
return syscall(SYS_SETITIMER, which, value, ovalue);

View File

@ -1,10 +1,12 @@
#include <BAN/Assert.h>
#include <BAN/Debug.h>
#include <kernel/Memory/Types.h>
#include <kernel/Syscall.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <stdarg.h>
#include <stdio.h>
@ -16,36 +18,48 @@
#include <termios.h>
#include <unistd.h>
char** __environ;
extern char** environ __attribute__((weak, alias("__environ")));
extern void _init_malloc();
extern void _init_stdio();
extern "C" void _init_libc(char** _environ)
struct init_funcs_t
{
static bool is_initialized = false;
if (is_initialized)
return;
is_initialized = true;
void (*func)();
void (**array_start)();
void (**array_end)();
};
_init_malloc();
_init_stdio();
extern "C" char** environ;
if (!_environ)
return;
extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t fini_funcs)
{
if (::environ == nullptr)
::environ = environ;
size_t env_count = 0;
while (_environ[env_count])
env_count++;
environ = (char**)malloc(sizeof(char*) * env_count + 1);
for (size_t i = 0; i < env_count; i++)
if (syscall(SYS_GET_TLS) == 0)
{
size_t bytes = strlen(_environ[i]) + 1;
environ[i] = (char*)malloc(bytes);
memcpy(environ[i], _environ[i], bytes);
alignas(uthread) static uint8_t storage[sizeof(uthread) + sizeof(uintptr_t)];
uthread& uthread = *reinterpret_cast<struct uthread*>(storage);
uthread = {
.self = &uthread,
.master_tls_addr = nullptr,
.master_tls_size = 0,
};
uthread.dtv[0] = 0;
syscall(SYS_SET_TLS, &uthread);
}
environ[env_count] = nullptr;
// call global constructors
if (init_funcs.func)
init_funcs.func();
const size_t init_array_count = init_funcs.array_end - init_funcs.array_start;
for (size_t i = 0; i < init_array_count; i++)
init_funcs.array_start[i]();
// register global destructors
const size_t fini_array_count = fini_funcs.array_end - fini_funcs.array_start;
for (size_t i = 0; i < fini_array_count; i++)
atexit(fini_funcs.array_start[i]);
if (fini_funcs.func)
atexit(fini_funcs.func);
}
void _exit(int status)

View File

@ -0,0 +1,28 @@
#pragma once
#include <stdint.h>
namespace LibELF
{
struct AuxiliaryVector
{
uint32_t a_type;
union
{
uint32_t a_val;
void* a_ptr;
} a_un;
};
enum AuxiliaryVectorValues
{
AT_NULL = 0,
AT_IGNORE = 1,
AT_EXECFD = 2,
AT_PHDR = 3,
AT_PHENT = 4,
AT_PHNUM = 5,
};
}

View File

@ -109,6 +109,7 @@ namespace LibELF
STT_FUNC = 2,
STT_SECTION = 3,
STT_FILE = 4,
STT_TLS = 6,
STT_LOOS = 10,
STT_HIOS = 12,
STT_LOPROC = 13,
@ -124,6 +125,7 @@ namespace LibELF
PT_NOTE = 4,
PT_SHLIB = 5,
PT_PHDR = 6,
PT_TLS = 7,
PT_LOOS = 0x60000000,
PT_GNU_EH_FRAME = 0x6474E550,
PT_GNU_STACK = 0x6474E551,
@ -194,6 +196,26 @@ namespace LibELF
R_386_RELATIVE = 8,
R_386_GOTOFF = 9,
R_386_GOTPC = 10,
R_386_TLS_TPOFF = 14,
R_386_TLS_IE = 15,
R_386_TLS_GOTIE = 16,
R_386_TLS_LE = 17,
R_386_TLS_GD = 18,
R_386_TLS_LDM = 19,
R_386_TLS_GD_32 = 24,
R_386_TLS_GD_PUSH = 25,
R_386_TLS_GD_CALL = 26,
R_386_TLS_GD_POP = 27,
R_386_TLS_LDM_32 = 28,
R_386_TLS_LDM_PUSH = 29,
R_386_TLS_LDM_CALL = 30,
R_386_TLS_LDM_POP = 31,
R_386_TLS_LDO_32 = 32,
R_386_TLS_IE_32 = 33,
R_386_TLS_LE_32 = 34,
R_386_TLS_DTPMOD32 = 35,
R_386_TLS_DTPOFF32 = 36,
R_386_TLS_TPOFF32 = 37,
};
#define ELF64_R_SYM(i) ((i) >> 32)

View File

@ -1,56 +1,66 @@
#include "utils.h"
#include <LibELF/AuxiliaryVector.h>
#include <LibELF/Types.h>
#include <LibELF/Values.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
#if defined(__x86_64__)
#define ELF_R_SYM ELF64_R_SYM
#elif defined(__i686__)
#define ELF_R_SYM ELF32_R_SYM
#endif
extern "C"
__attribute__((naked))
void _start()
{
#if defined(__x86_64__)
asm volatile(
"xorq %rbp, %rbp;"
"movq (%rsp), %rdi;"
"leaq 8(%rsp), %rsi;"
"leaq 8(%rsi, %rdi, 8), %rdx;"
"movq %rsp, %rbp;"
"andq $-16, %rsp;"
"call _entry;"
"movq %rbp, %rsp;"
"xorq %rbp, %rbp;"
"jmp *%rax;"
"ud2;"
);
#elif defined(__i686__)
asm volatile(
"xorl %ebp, %ebp;"
"pushl %ecx;"
"movl (%esp), %edi;"
"leal 4(%esp), %esi;"
"leal 4(%esi, %edi, 4), %edx;"
"movl %esp, %ebp;"
"andl $-16, %esp;"
"subl $4, %esp;"
"pushl %edx;"
"pushl %esi;"
"pushl %edi;"
"call _entry;"
"ud2;"
);
#else
#error "unsupported architecture"
#endif
}
__attribute__((naked, noreturn))
static void call_entry_point(int, char**, char**, uintptr_t)
{
#if defined(__x86_64__)
asm volatile(
"andq $-16, %rsp;"
"jmp *%rcx;"
);
#elif defined(__i686__)
asm volatile(
"addl $4, %esp;"
"popl %edi;"
"popl %esi;"
"popl %edx;"
"popl %ecx;"
"andl $-16, %esp;"
"jmp *%ecx;"
"call _entry;"
"movl %ebp, %esp;"
"xorl %ebp, %ebp;"
"jmp *%eax;"
"ud2;"
);
#else
#error "unsupported architecture"
@ -136,8 +146,15 @@ static void resolve_symbol_trampoline()
struct LoadedElf
{
ElfNativeFileHeader file_header;
ElfNativeProgramHeader tls_header;
ElfNativeDynamic* dynamics;
uint8_t* tls_addr;
size_t tls_module;
size_t tls_offset;
int fd;
uintptr_t base;
uintptr_t hash;
@ -163,8 +180,8 @@ struct LoadedElf
uintptr_t init_array;
size_t init_arraysz;
bool has_called_init;
bool is_relocated;
bool is_calling_init;
bool is_relocating;
char path[PATH_MAX];
};
@ -207,20 +224,71 @@ static ElfNativeSymbol* find_symbol(const LoadedElf& elf, const char* name)
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static void handle_copy_relocation(const LoadedElf& elf, const RelocT& reloc)
static bool is_tls_relocation(const RelocT& reloc)
{
#if defined(__x86_64__)
if (ELF64_R_TYPE(reloc.r_info) != R_X86_64_COPY)
return;
const uint32_t symbol_index = ELF64_R_SYM(reloc.r_info);
switch (ELF64_R_TYPE(reloc.r_info))
{
case R_X86_64_DTPMOD64:
case R_X86_64_DTPOFF64:
case R_X86_64_TPOFF64:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
case R_X86_64_DTPOFF32:
case R_X86_64_GOTTPOFF:
case R_X86_64_TPOFF32:
return true;
}
#elif defined(__i686__)
if (ELF32_R_TYPE(reloc.r_info) != R_386_COPY)
return;
const uint32_t symbol_index = ELF32_R_SYM(reloc.r_info);
switch (ELF32_R_TYPE(reloc.r_info))
{
case R_386_TLS_TPOFF:
case R_386_TLS_IE:
case R_386_TLS_GOTIE:
case R_386_TLS_LE:
case R_386_TLS_GD:
case R_386_TLS_LDM:
case R_386_TLS_GD_32:
case R_386_TLS_GD_PUSH:
case R_386_TLS_GD_CALL:
case R_386_TLS_GD_POP:
case R_386_TLS_LDM_32:
case R_386_TLS_LDM_PUSH:
case R_386_TLS_LDM_CALL:
case R_386_TLS_LDM_POP:
case R_386_TLS_LDO_32:
case R_386_TLS_IE_32:
case R_386_TLS_LE_32:
case R_386_TLS_DTPMOD32:
case R_386_TLS_DTPOFF32:
case R_386_TLS_TPOFF32:
return true;
}
#else
#error "unsupported architecture"
#endif
return false;
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static bool is_copy_relocation(const RelocT& reloc)
{
#if defined(__x86_64__)
return ELF64_R_TYPE(reloc.r_info) == R_X86_64_COPY;
#elif defined(__i686__)
return ELF32_R_TYPE(reloc.r_info) == R_386_COPY;
#else
#error "unsupported architecture"
#endif
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static void handle_copy_relocation(const LoadedElf& elf, const RelocT& reloc)
{
if (!is_copy_relocation(reloc))
return;
const uint32_t symbol_index = ELF_R_SYM(reloc.r_info);
if (symbol_index == 0)
print_error_and_exit("copy relocation without a symbol", 0);
@ -259,25 +327,60 @@ static void handle_copy_relocation(const LoadedElf& elf, const RelocT& reloc)
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bool resolve_symbols)
static void handle_tls_relocation(const LoadedElf& elf, const RelocT& reloc)
{
uintptr_t symbol_address = 0;
if (!is_tls_relocation(reloc))
return;
if (ELF_R_SYM(reloc.r_info))
print_error_and_exit("tls relocation with symbol index", 0);
if (elf.tls_addr == nullptr)
print_error_and_exit("tls relocation without tls", 0);
#if defined(__x86_64__)
if (ELF64_R_TYPE(reloc.r_info) == R_X86_64_COPY)
return 0;
const uint32_t symbol_index = ELF64_R_SYM(reloc.r_info);
switch (ELF64_R_TYPE(reloc.r_info))
{
case R_X86_64_DTPMOD64:
*reinterpret_cast<uint64_t*>(elf.base + reloc.r_offset) = elf.tls_module;
break;
default:
print(STDERR_FILENO, "unsupported tls reloc type ");
print_uint(STDERR_FILENO, ELF64_R_TYPE(reloc.r_info));
print(STDERR_FILENO, " in ");
print(STDERR_FILENO, elf.path);
print_error_and_exit("", 0);
}
#elif defined(__i686__)
if (ELF32_R_TYPE(reloc.r_info) == R_386_COPY)
return 0;
const uint32_t symbol_index = ELF32_R_SYM(reloc.r_info);
switch (ELF32_R_TYPE(reloc.r_info))
{
case R_386_TLS_DTPMOD32:
*reinterpret_cast<uint32_t*>(elf.base + reloc.r_offset) = elf.tls_module;
break;
default:
print(STDERR_FILENO, "unsupported tls reloc type ");
print_uint(STDERR_FILENO, ELF64_R_TYPE(reloc.r_info));
print(STDERR_FILENO, " in ");
print(STDERR_FILENO, elf.path);
print_error_and_exit("", 0);
}
#else
#error "unsupported architecture"
#endif
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bool resolve_symbols)
{
if (is_copy_relocation(reloc) || is_tls_relocation(reloc))
return 0;
const uint32_t symbol_index = ELF_R_SYM(reloc.r_info);
if (resolve_symbols == !symbol_index)
return 0;
uintptr_t symbol_address = 0;
if (symbol_index)
{
const auto& symbol = *reinterpret_cast<ElfNativeSymbol*>(elf.symtab + symbol_index * elf.syment);
@ -311,6 +414,9 @@ static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bo
symbol_address = 0;
}
}
if (ELF_ST_TYPE(symbol.st_info) == STT_TLS)
print_error_and_exit("relocating TLS symbol", 0);
}
size_t size = 0;
@ -417,10 +523,9 @@ static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bo
static void relocate_elf(LoadedElf& elf, bool lazy_load)
{
// FIXME: handle circular dependencies
if (elf.is_relocated)
if (elf.is_relocating)
return;
elf.is_relocating = true;
// do copy relocations
if (elf.rel && elf.relent)
@ -449,6 +554,14 @@ static void relocate_elf(LoadedElf& elf, bool lazy_load)
for (size_t i = 0; i < elf.relasz / elf.relaent; i++)
handle_relocation(elf, *reinterpret_cast<ElfNativeRelocationA*>(elf.rela + i * elf.relaent), true);
// do tls relocations
if (elf.rel && elf.relent)
for (size_t i = 0; i < elf.relsz / elf.relent; i++)
handle_tls_relocation(elf, *reinterpret_cast<ElfNativeRelocation*>(elf.rel + i * elf.relent));
if (elf.rela && elf.relaent)
for (size_t i = 0; i < elf.relasz / elf.relaent; i++)
handle_tls_relocation(elf, *reinterpret_cast<ElfNativeRelocationA*>(elf.rela + i * elf.relaent));
// do jumprel relocations
if (elf.jmprel && elf.pltrelsz)
{
@ -498,8 +611,6 @@ static void relocate_elf(LoadedElf& elf, bool lazy_load)
}
}
}
elf.is_relocated = true;
}
extern "C"
@ -527,17 +638,19 @@ static void handle_dynamic(LoadedElf& elf)
switch (dynamic.d_tag)
{
case DT_PLTGOT: dynamic.d_un.d_ptr += elf.base; break;
case DT_HASH: dynamic.d_un.d_ptr += elf.base; break;
case DT_STRTAB: dynamic.d_un.d_ptr += elf.base; break;
case DT_SYMTAB: dynamic.d_un.d_ptr += elf.base; break;
case DT_RELA: dynamic.d_un.d_ptr += elf.base; break;
case DT_INIT: dynamic.d_un.d_ptr += elf.base; break;
case DT_FINI: dynamic.d_un.d_ptr += elf.base; break;
case DT_REL: dynamic.d_un.d_ptr += elf.base; break;
case DT_JMPREL: dynamic.d_un.d_ptr += elf.base; break;
case DT_INIT_ARRAY: dynamic.d_un.d_ptr += elf.base; break;
case DT_FINI_ARRAY: dynamic.d_un.d_ptr += elf.base; break;
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_JMPREL:
case DT_INIT_ARRAY:
case DT_FINI_ARRAY:
dynamic.d_un.d_ptr += elf.base;
break;
}
switch (dynamic.d_tag)
@ -593,8 +706,6 @@ static void handle_dynamic(LoadedElf& elf)
const auto& loaded_elf = load_elf(realpath, library_fd);
dynamic.d_un.d_ptr = reinterpret_cast<uintptr_t>(&loaded_elf);
syscall(SYS_CLOSE, library_fd);
}
// do relocations without symbols
@ -798,6 +909,9 @@ static LoadedElf& load_elf(const char* path, int fd)
break;
}
ElfNativeProgramHeader tls_header {};
tls_header.p_type = PT_NULL;
for (size_t i = 0; i < file_header.e_phnum; i++)
{
ElfNativeProgramHeader program_header;
@ -817,6 +931,9 @@ static LoadedElf& load_elf(const char* path, int fd)
case PT_GNU_RELRO:
print(STDDBG_FILENO, "TODO: PT_GNU_*\n");
break;
case PT_TLS:
tls_header = program_header;
break;
case PT_LOAD:
program_header.p_vaddr += base;
load_program_header(program_header, fd, needs_writable);
@ -829,7 +946,9 @@ static LoadedElf& load_elf(const char* path, int fd)
}
auto& elf = s_loaded_files[s_loaded_file_count++];
elf.tls_header = tls_header;
elf.base = base;
elf.fd = fd;
elf.dynamics = nullptr;
memcpy(&elf.file_header, &file_header, sizeof(file_header));
strcpy(elf.path, path);
@ -857,20 +976,165 @@ static LoadedElf& load_elf(const char* path, int fd)
return elf;
}
static void call_init_libc(LoadedElf& elf, char** envp)
struct MasterTLS
{
const auto* _init_libc = find_symbol(elf, "_init_libc");
if (_init_libc == nullptr)
return;
using _init_libc_t = void(*)(char**);
reinterpret_cast<_init_libc_t>(elf.base + _init_libc->st_value)(envp);
uint8_t* addr;
size_t size;
size_t module_count;
};
static MasterTLS initialize_master_tls()
{
constexpr auto round =
[](size_t a, size_t b) -> size_t
{
return b * ((a + b - 1) / b);
};
size_t max_align = alignof(uthread);
size_t tls_m_offset = 0;
size_t tls_m_size = 0;
size_t module_count = 0;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto& tls_header = s_loaded_files[i].tls_header;
if (tls_header.p_type != PT_TLS)
continue;
if (tls_header.p_align == 0)
print_error_and_exit("TLS alignment is 0", 0);
max_align = max<size_t>(max_align, tls_header.p_align);
tls_m_offset = round(tls_m_offset + tls_header.p_memsz, tls_header.p_align);
tls_m_size = tls_header.p_memsz;
module_count++;
}
if (module_count == 0)
return { .addr = nullptr, .size = 0, .module_count = 0 };
size_t master_tls_size = tls_m_offset + tls_m_size;
if (auto rem = master_tls_size % max_align)
master_tls_size += max_align - rem;
uint8_t* master_tls_addr;
{
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = master_tls_size,
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate master TLS", ret);
master_tls_addr = reinterpret_cast<uint8_t*>(ret);
}
for (size_t i = 0, tls_offset = 0, tls_module = 1; i < s_loaded_file_count; i++)
{
const auto& tls_header = s_loaded_files[i].tls_header;
if (tls_header.p_type != PT_TLS)
continue;
tls_offset = round(tls_offset + tls_header.p_memsz, tls_header.p_align);
uint8_t* tls_buffer = master_tls_addr + master_tls_size - tls_offset;
if (tls_header.p_filesz > 0)
{
const int fd = s_loaded_files[i].fd;
if (auto ret = syscall(SYS_PREAD, fd, tls_buffer, tls_header.p_filesz, tls_header.p_offset); ret != static_cast<long>(tls_header.p_filesz))
print_error_and_exit("failed to read TLS data", ret);
}
memset(tls_buffer + tls_header.p_filesz, 0, tls_header.p_memsz - tls_header.p_filesz);
auto& elf = s_loaded_files[i];
elf.tls_addr = tls_buffer;
elf.tls_module = tls_module++;
elf.tls_offset = master_tls_size - tls_offset;
}
return { .addr = master_tls_addr, .size = master_tls_size, .module_count = module_count };
}
static void call_init_funcs(LoadedElf& elf, char** envp, bool skip)
static void initialize_tls(MasterTLS master_tls)
{
if (elf.has_called_init)
if (master_tls.addr == nullptr)
return;
const size_t tls_size = master_tls.size
+ sizeof(uthread)
+ (master_tls.module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr;
{
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = tls_size,
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate master TLS", ret);
tls_addr = reinterpret_cast<uint8_t*>(ret);
}
memcpy(tls_addr, master_tls.addr, master_tls.size);
uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + master_tls.size);
uthread->self = uthread;
uthread->master_tls_addr = master_tls.addr;
uthread->master_tls_size = master_tls.size;
uthread->dtv[0] = master_tls.module_count;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto& elf = s_loaded_files[i];
if (elf.tls_addr == nullptr)
continue;
uthread->dtv[elf.tls_module] = reinterpret_cast<uintptr_t>(tls_addr) + elf.tls_offset;
}
syscall(SYS_SET_TLS, uthread);
}
static void initialize_environ(char** envp)
{
uintptr_t environ = SYM_NOT_FOUND;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto* match = find_symbol(s_loaded_files[i], "environ");
if (match == nullptr)
continue;
if (environ == SYM_NOT_FOUND || ELF_ST_BIND(match->st_info) != STB_WEAK)
environ = s_loaded_files[i].base + match->st_value;
if (ELF_ST_BIND(match->st_info) != STB_WEAK)
break;
}
if (environ == SYM_NOT_FOUND)
return;
*reinterpret_cast<char***>(environ) = envp;
}
static void call_init_funcs(LoadedElf& elf, bool is_main_elf)
{
if (elf.is_calling_init)
return;
elf.is_calling_init = true;
if (elf.dynamics)
{
for (size_t i = 0;; i++)
@ -879,11 +1143,12 @@ static void call_init_funcs(LoadedElf& elf, char** envp, bool skip)
if (dynamic.d_tag == DT_NULL)
break;
if (dynamic.d_tag == DT_NEEDED)
call_init_funcs(*reinterpret_cast<LoadedElf*>(dynamic.d_un.d_ptr), envp, false);
call_init_funcs(*reinterpret_cast<LoadedElf*>(dynamic.d_un.d_ptr), false);
}
}
if (elf.has_called_init || skip)
// main executable calls its init functions in _start
if (is_main_elf)
return;
using init_t = void(*)();
@ -891,19 +1156,33 @@ static void call_init_funcs(LoadedElf& elf, char** envp, bool skip)
reinterpret_cast<init_t>(elf.init)();
for (size_t i = 0; i < elf.init_arraysz / sizeof(init_t); i++)
reinterpret_cast<init_t*>(elf.init_array)[i]();
}
if (strcmp(elf.path, "/usr/lib/libc.so") == 0)
call_init_libc(elf, envp);
static LibELF::AuxiliaryVector* find_auxv(char** envp)
{
if (envp == nullptr)
return nullptr;
elf.has_called_init = true;
char** null_env = envp;
while (*null_env)
null_env++;
return reinterpret_cast<LibELF::AuxiliaryVector*>(null_env + 1);
}
extern "C"
__attribute__((used, noreturn))
int _entry(int argc, char** argv, char** envp, int fd)
__attribute__((used))
uintptr_t _entry(int argc, char* argv[], char* envp[])
{
const bool invoked_directly = (fd < 0);
if (invoked_directly)
int execfd = -1;
if (auto* auxv = find_auxv(envp))
for (auto* aux = auxv; aux->a_type != LibELF::AT_NULL; aux++)
if (aux->a_type == LibELF::AT_EXECFD) {
execfd = aux->a_un.a_val;
aux->a_type = LibELF::AT_IGNORE;
}
if (execfd == -1)
{
if (argc < 2)
print_error_and_exit("missing program name", 0);
@ -911,17 +1190,23 @@ int _entry(int argc, char** argv, char** envp, int fd)
argc--;
argv++;
fd = syscall(SYS_OPENAT, AT_FDCWD, argv[0], O_RDONLY);
if (fd < 0)
print_error_and_exit("could not open program", fd);
execfd = syscall(SYS_OPENAT, AT_FDCWD, argv[0], O_RDONLY);
if (execfd < 0)
print_error_and_exit("could not open program", execfd);
}
init_random();
auto& elf = load_elf(argv[0], fd);
syscall(SYS_CLOSE, fd);
auto& elf = load_elf(argv[0], execfd);
fini_random();
const auto master_tls = initialize_master_tls();
relocate_elf(elf, true);
call_init_funcs(elf, envp, true);
call_entry_point(argc, argv, envp, elf.base + elf.file_header.e_entry);
initialize_tls(master_tls);
initialize_environ(envp);
call_init_funcs(elf, true);
for (size_t i = 0; i < s_loaded_file_count; i++)
syscall(SYS_CLOSE, s_loaded_files[i].fd);
return elf.base + elf.file_header.e_entry;
}