Compare commits

..

No commits in common. "ee9e941a5694258c241f80acd3b2f2e71fac0733" and "a0123e7c2ddeb2514774ab3c13048498a04d9af5" have entirely different histories.

40 changed files with 560 additions and 1337 deletions

View File

@ -41,15 +41,31 @@ start_userspace_thread:
call get_thread_start_sp
movl %eax, %esp
# ds, es = user data
# STACK LAYOUT
# entry
# argc
# argv
# envp
# userspace stack
call get_userspace_thread_stack_top
movw $(0x20 | 3), %bx
movw %bx, %ds
movw %bx, %es
# gs = thread local
movw $(0x30 | 3), %bx
movw %bx, %gs
# fs = 0
xorw %bx, %bx
movw %bx, %fs
movw %bx, %gs
xorw %bx, %bx
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
pushl $(0x20 | 3)
pushl %eax
pushl $0x202
pushl $(0x18 | 3)
pushl %ebx
iret

View File

@ -40,7 +40,8 @@ isr_stub:
movl 56(%esp), %edx // isr number
movl %esp, %ebp
andl $-16, %esp
subl $15, %esp
andl $0xFFFFFFF0, %esp
pushl %eax
pushl %ebx
@ -60,13 +61,14 @@ irq_stub:
load_kernel_segments
cld
movl 40(%esp), %edi # interrupt number
movl 40(%esp), %eax # interrupt number
movl %esp, %ebp
andl $-16, %esp
subl $15, %esp
andl $0xFFFFFFF0, %esp
subl $12, %esp
pushl %edi
pushl %eax
call cpp_irq_handler
movl %ebp, %esp
@ -81,15 +83,16 @@ asm_yield_handler:
pushal
cld
leal 32(%esp), %edi # interrupt stack ptr
movl %esp, %esi # interrupt registers ptr
movl %esp, %eax # interrupt registers ptr
leal 32(%esp), %ebx # interrupt stack ptr
movl %esp, %ebp
andl $-16, %esp
subl $15, %esp
andl $0xFFFFFFF0, %esp
subl $8, %esp
pushl %esi
pushl %edi
pushl %eax
pushl %ebx
call cpp_yield_handler
movl %ebp, %esp
@ -104,7 +107,8 @@ asm_ipi_handler:
cld
movl %esp, %ebp
andl $-16, %esp
subl $15, %esp
andl $0xFFFFFFF0, %esp
call cpp_ipi_handler
@ -113,6 +117,7 @@ asm_ipi_handler:
pop_userspace
iret
.global asm_timer_handler
asm_timer_handler:
push_userspace
@ -120,7 +125,8 @@ asm_timer_handler:
cld
movl %esp, %ebp
andl $-16, %esp
subl $15, %esp
andl $0xFFFFFFF0, %esp
call cpp_timer_handler

View File

@ -32,4 +32,24 @@ start_userspace_thread:
call get_thread_start_sp
movq %rax, %rsp
# STACK LAYOUT
# entry
# argc
# argv
# envp
# userspace stack
call get_userspace_thread_stack_top
popq %rdi
popq %rsi
popq %rdx
popq %rcx
popq %rbx
pushq $(0x20 | 3)
pushq %rax
pushq $0x202
pushq $(0x18 | 3)
pushq %rbx
iretq

View File

@ -8,15 +8,8 @@ namespace Kernel::ELF
struct LoadResult
{
struct TLS
{
vaddr_t addr;
size_t size;
};
bool open_execfd;
bool has_interpreter;
vaddr_t entry_point;
BAN::Optional<TLS> master_tls;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> regions;
};

View File

@ -169,7 +169,6 @@ namespace Kernel
private:
BAN::WeakPtr<SharedFileData> m_shared_region;
friend class FileBackedRegion;
friend class OpenFileDescriptorSet;
friend class SharedFileData;
friend class TTY;
};

View File

@ -128,10 +128,6 @@ namespace Kernel
#endif
}
#if ARCH(i686)
void set_tls(uintptr_t addr);
#endif
private:
GDT() = default;
@ -153,8 +149,8 @@ namespace Kernel
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, tss low, tss high
static constexpr uint16_t m_tss_offset = 0x28;
#elif ARCH(i686)
BAN::Array<SegmentDescriptor, 8> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, tls, tss
static constexpr uint16_t m_tss_offset = 0x38;
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, tss
static constexpr uint16_t m_tss_offset = 0x30;
#endif
TaskStateSegment m_tss;
const GDTR m_gdtr {

View File

@ -4,10 +4,8 @@
#if ARCH(x86_64)
#define KERNEL_OFFSET 0xFFFFFFFF80000000
#define USERSPACE_END 0xFFFF800000000000
#elif ARCH(i686)
#define KERNEL_OFFSET 0xC0000000
#define USERSPACE_END 0xC0000000
#else
#error
#endif

View File

@ -26,16 +26,6 @@ namespace Kernel
size_t size() const { return m_size; }
PageTable::flags_t flags() const { return m_flags; }
paddr_t paddr_of(vaddr_t vaddr) const
{
ASSERT(vaddr % PAGE_SIZE == 0);
const size_t index = (vaddr - m_vaddr) / PAGE_SIZE;
ASSERT(index < m_paddrs.size());
const paddr_t paddr = m_paddrs[index];
ASSERT(paddr);
return paddr;
}
bool contains(vaddr_t address) const { return vaddr() <= address && address < vaddr() + size(); }
BAN::ErrorOr<void> allocate_page_for_demand_paging(vaddr_t address);

View File

@ -91,7 +91,7 @@ namespace Kernel
bool has_ghost_byte { false };
uint32_t data_size { 0 }; // number of bytes in this buffer
uint8_t scale_shift { 0 }; // window scale
uint8_t scale { 1 }; // window scale
BAN::UniqPtr<VirtualRange> buffer;
};
@ -99,8 +99,8 @@ namespace Kernel
{
uint32_t mss { 0 }; // maximum segment size
uint16_t non_scaled_size { 0 }; // window size without scaling
uint8_t scale_shift { 0 }; // window scale
uint32_t scaled_size() const { return (uint32_t)non_scaled_size << scale_shift; }
uint8_t scale { 0 }; // window scale
uint32_t scaled_size() const { return (uint32_t)non_scaled_size << scale; }
uint32_t start_seq { 0 }; // sequence number of first byte in buffer
uint32_t current_seq { 0 }; // sequence number of next send
@ -118,7 +118,6 @@ namespace Kernel
{
sockaddr_storage address;
socklen_t address_len;
bool has_window_scale;
};
struct PendingConnection

View File

@ -47,9 +47,6 @@ namespace Kernel
BAN::ErrorOr<size_t> read_dir_entries(int fd, struct dirent* list, size_t list_len);
BAN::ErrorOr<size_t> recvfrom(int fd, BAN::ByteSpan buffer, sockaddr* address, socklen_t* address_len);
BAN::ErrorOr<size_t> sendto(int fd, BAN::ConstByteSpan buffer, const sockaddr* address, socklen_t address_len);
BAN::ErrorOr<VirtualFileSystem::File> file_of(int) const;
BAN::ErrorOr<BAN::StringView> path_of(int) const;
BAN::ErrorOr<BAN::RefPtr<Inode>> inode_of(int);

View File

@ -6,7 +6,6 @@
#include <BAN/StringView.h>
#include <BAN/Vector.h>
#include <kernel/Credentials.h>
#include <kernel/ELF.h>
#include <kernel/FS/Inode.h>
#include <kernel/Lock/Mutex.h>
#include <kernel/Memory/Heap.h>
@ -35,6 +34,15 @@ namespace Kernel
public:
using entry_t = Thread::entry_t;
struct userspace_info_t
{
uintptr_t entry { 0 };
int argc { 0 };
char** argv { nullptr };
char** envp { nullptr };
int file_fd { -1 };
};
public:
static Process* create_kernel();
static Process* create_kernel(entry_t, void*);
@ -175,9 +183,7 @@ namespace Kernel
BAN::ErrorOr<long> sys_sigprocmask(int how, const sigset_t* set, sigset_t* oset);
BAN::ErrorOr<long> sys_yield();
BAN::ErrorOr<long> sys_set_tls(void*);
BAN::ErrorOr<long> sys_get_tls();
BAN::ErrorOr<long> sys_pthread_create(const pthread_attr_t* attr, void (*entry)(void*), void* arg);
BAN::ErrorOr<long> sys_pthread_create(const pthread_attr_t* __restrict attr, void (*entry)(void*), void* arg);
BAN::ErrorOr<long> sys_pthread_exit(void* value);
BAN::ErrorOr<long> sys_pthread_join(pthread_t thread, void** value);
BAN::ErrorOr<long> sys_pthread_self();
@ -202,6 +208,7 @@ namespace Kernel
size_t proc_environ(off_t offset, BAN::ByteSpan) const;
bool is_userspace() const { return m_is_userspace; }
const userspace_info_t& userspace_info() const { return m_userspace_info; }
// Returns error if page could not be allocated
// Returns true if the page was allocated successfully
@ -220,13 +227,6 @@ namespace Kernel
Process(const Credentials&, pid_t pid, pid_t parent, pid_t sid, pid_t pgrp);
static Process* create_process(const Credentials&, pid_t parent, pid_t sid = 0, pid_t pgrp = 0);
struct TLSResult
{
BAN::UniqPtr<MemoryRegion> region;
vaddr_t addr;
};
static BAN::ErrorOr<TLSResult> initialize_thread_local_storage(PageTable&, ELF::LoadResult::TLS master_tls);
struct FileParent
{
VirtualFileSystem::File parent;
@ -315,6 +315,7 @@ namespace Kernel
BAN::Vector<BAN::String> m_environ;
bool m_is_userspace { false };
userspace_info_t m_userspace_info;
SpinLock m_child_exit_lock;
BAN::Vector<ChildExitStatus> m_child_exit_statuses;

View File

@ -110,8 +110,6 @@ namespace Kernel
static void send_smp_message(ProcessorID, const SMPMessage&, bool send_ipi = true);
static void broadcast_smp_message(const SMPMessage&);
static void load_tls();
private:
Processor() = default;
~Processor() { ASSERT_NOT_REACHED(); }

View File

@ -7,8 +7,6 @@
#include <kernel/Memory/VirtualRange.h>
#include <kernel/ThreadBlocker.h>
#include <LibELF/AuxiliaryVector.h>
#include <signal.h>
#include <sys/types.h>
@ -43,10 +41,9 @@ namespace Kernel
BAN::ErrorOr<Thread*> pthread_create(entry_t, void*);
BAN::ErrorOr<Thread*> clone(Process*, uintptr_t sp, uintptr_t ip);
void setup_exec();
void setup_process_cleanup();
BAN::ErrorOr<void> initialize_userspace(vaddr_t entry, BAN::Span<BAN::String> argv, BAN::Span<BAN::String> envp, BAN::Span<LibELF::AuxiliaryVector> auxv);
// Returns true, if thread is going to trigger signal
bool is_interrupted_by_signal() const;
@ -88,9 +85,6 @@ namespace Kernel
bool is_userspace() const { return m_is_userspace; }
void set_tls(vaddr_t tls) { m_tls = tls; }
vaddr_t get_tls() const { return m_tls; }
size_t virtual_page_count() const { return (m_kernel_stack ? (m_kernel_stack->size() / PAGE_SIZE) : 0) + (m_userspace_stack ? (m_userspace_stack->size() / PAGE_SIZE) : 0); }
size_t physical_page_count() const { return virtual_page_count(); }
@ -106,7 +100,7 @@ namespace Kernel
private:
Thread(pid_t tid, Process*);
void setup_exec(vaddr_t ip, vaddr_t sp);
void setup_exec_impl(uintptr_t entry, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
static void on_exit_trampoline(Thread*);
void on_exit();
@ -124,8 +118,6 @@ namespace Kernel
bool m_is_userspace { false };
bool m_delete_process { false };
vaddr_t m_tls { 0 };
SchedulerQueue::Node* m_scheduler_node { nullptr };
InterruptStack m_interrupt_stack { };

View File

@ -8,7 +8,6 @@
#include <ctype.h>
#include <fcntl.h>
#include <pthread.h>
namespace Kernel::ELF
{
@ -109,13 +108,13 @@ namespace Kernel::ELF
auto file_header = TRY(read_and_validate_file_header(inode));
auto program_headers = TRY(read_program_headers(inode, file_header));
size_t exec_max_offset { 0 };
vaddr_t executable_end = 0;
BAN::String interpreter;
for (const auto& program_header : program_headers)
{
if (program_header.p_type == PT_LOAD)
exec_max_offset = BAN::Math::max<vaddr_t>(exec_max_offset, program_header.p_vaddr + program_header.p_memsz);
executable_end = BAN::Math::max<vaddr_t>(executable_end, program_header.p_vaddr + program_header.p_memsz);
else if (program_header.p_type == PT_INTERP)
{
BAN::Vector<uint8_t> interp_buffer;
@ -141,6 +140,9 @@ namespace Kernel::ELF
}
}
if (file_header.e_type == ET_DYN)
executable_end = 0x400000;
if (!interpreter.empty())
{
auto interpreter_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(credentials, interpreter, O_EXEC)).inode;
@ -162,16 +164,15 @@ namespace Kernel::ELF
}
const vaddr_t load_base_vaddr =
[&file_header, exec_max_offset]() -> vaddr_t
[&file_header, executable_end]() -> vaddr_t
{
if (file_header.e_type == ET_EXEC)
return 0;
if (file_header.e_type == ET_DYN)
return (exec_max_offset + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
return (executable_end + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
ASSERT_NOT_REACHED();
}();
vaddr_t last_loaded_address = 0;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> memory_regions;
for (const auto& program_header : program_headers)
{
@ -240,57 +241,10 @@ namespace Kernel::ELF
TRY(memory_regions.emplace_back(BAN::move(region)));
}
last_loaded_address = BAN::Math::max(last_loaded_address, pheader_base + program_header.p_memsz);
}
LoadResult result;
for (const auto& program_header : program_headers)
{
if (program_header.p_type != PT_TLS)
continue;
if (!BAN::Math::is_power_of_two(program_header.p_align))
return BAN::Error::from_errno(EINVAL);
size_t region_size = program_header.p_memsz;
if (auto rem = region_size % program_header.p_align)
region_size += program_header.p_align - rem;
size_t offset = 0;
if (auto rem = region_size % alignof(uthread))
offset = alignof(uthread) - rem;
auto region = TRY(MemoryBackedRegion::create(
page_table,
offset + region_size,
{ .start = last_loaded_address, .end = USERSPACE_END },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::Present
));
for (vaddr_t vaddr = region->vaddr(); vaddr < region->vaddr() + offset + region->size(); vaddr += PAGE_SIZE)
TRY(region->allocate_page_containing(vaddr, false));
if (program_header.p_filesz > 0)
{
BAN::Vector<uint8_t> file_data_buffer;
TRY(file_data_buffer.resize(program_header.p_filesz));
if (TRY(inode->read(program_header.p_offset, file_data_buffer.span())) != file_data_buffer.size())
return BAN::Error::from_errno(EFAULT);
TRY(region->copy_data_to_region(offset, file_data_buffer.data(), file_data_buffer.size()));
}
result.master_tls = LoadResult::TLS {
.addr = region->vaddr(),
.size = region->size(),
};
TRY(memory_regions.emplace_back(BAN::move(region)));
}
result.open_execfd = !interpreter.empty();
result.has_interpreter = !interpreter.empty();
result.entry_point = load_base_vaddr + file_header.e_entry;
result.regions = BAN::move(memory_regions);
return BAN::move(result);

View File

@ -26,20 +26,12 @@ namespace Kernel
gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, data_flags); // user data
#if ARCH(i686)
gdt->write_entry(0x28, reinterpret_cast<uint32_t>(processor), sizeof(Processor), 0x92, 0x4); // processor data
gdt->write_entry(0x30, 0x00000000, 0x00000, 0x00, 0x0); // tls
#endif
gdt->write_tss();
return gdt;
}
#if ARCH(i686)
void GDT::set_tls(uintptr_t addr)
{
write_entry(0x30, addr, 0xFFFF, 0xF2, 0xC);
}
#endif
void GDT::write_entry(uint8_t offset, uint32_t base, uint32_t limit, uint8_t access, uint8_t flags)
{
ASSERT(offset % sizeof(SegmentDescriptor) == 0);

View File

@ -248,7 +248,11 @@ namespace Kernel
}
if (Thread::current().has_process() && Process::current().is_userspace())
process_name = Process::current().name();
{
const char* const* argv = Process::current().userspace_info().argv;
if (argv && *argv)
process_name = *argv;
}
#if ARCH(x86_64)
dwarnln(

View File

@ -32,7 +32,7 @@ namespace Kernel
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
));
socket->m_recv_window.scale_shift = PAGE_SIZE_SHIFT; // use PAGE_SIZE windows
socket->m_recv_window.scale = 12; // use PAGE_SIZE windows
socket->m_send_window.buffer = TRY(VirtualRange::create_to_vaddr_range(
PageTable::kernel(),
KERNEL_OFFSET,
@ -80,6 +80,7 @@ namespace Kernel
auto connection = m_pending_connections.front();
m_pending_connections.pop();
auto listen_key = ListenKey(
reinterpret_cast<const sockaddr*>(&connection.target.address),
connection.target.address_len
@ -103,9 +104,6 @@ namespace Kernel
return_inode->m_next_state = State::SynReceived;
return_inode->m_mutex.unlock();
if (!return_inode->m_connection_info->has_window_scale)
return_inode->m_recv_window.scale_shift = 0;
TRY(m_listen_children.emplace(listen_key, return_inode));
const uint64_t wake_time_ms = SystemTimer::get().ms_since_boot() + 5000;
@ -157,7 +155,7 @@ namespace Kernel
if (!is_bound())
TRY(m_network_layer.bind_socket_to_unused(this, address, address_len));
m_connection_info.emplace(sockaddr_storage {}, address_len, true);
m_connection_info.emplace(sockaddr_storage {}, address_len);
memcpy(&m_connection_info->address, address, address_len);
m_next_flags = SYN;
@ -226,15 +224,43 @@ namespace Kernel
BAN::ErrorOr<size_t> TCPSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len)
{
(void)address;
(void)address_len;
if (address)
return BAN::Error::from_errno(EISCONN);
if (!m_has_connected)
return BAN::Error::from_errno(ENOTCONN);
while (m_send_window.data_size == m_send_window.buffer->size())
if (message.size() > m_send_window.buffer->size())
{
size_t nsent = 0;
while (nsent < message.size())
{
const size_t to_send = BAN::Math::min<size_t>(message.size() - nsent, m_send_window.buffer->size());
TRY(sendto_impl(message.slice(nsent, to_send), address, address_len));
nsent += to_send;
}
return nsent;
}
while (true)
{
if (m_state != State::Established)
return return_with_maybe_zero();
if (m_send_window.data_size + message.size() <= m_send_window.buffer->size())
break;
LockFreeGuard free(m_mutex);
TRY(Thread::current().block_or_eintr_or_timeout_ms(m_thread_blocker, 100, false));
}
{
auto* buffer = reinterpret_cast<uint8_t*>(m_send_window.buffer->vaddr());
memcpy(buffer + m_send_window.data_size, message.data(), message.size());
m_send_window.data_size += message.size();
}
const uint32_t target_ack = m_send_window.start_seq + m_send_window.data_size;
m_thread_blocker.unblock();
while (m_send_window.current_ack < target_ack)
{
if (m_state != State::Established)
return return_with_maybe_zero();
@ -242,17 +268,7 @@ namespace Kernel
TRY(Thread::current().block_or_eintr_or_timeout_ms(m_thread_blocker, 100, false));
}
const size_t to_send = BAN::Math::min<size_t>(message.size(), m_send_window.buffer->size() - m_send_window.data_size);
{
auto* buffer = reinterpret_cast<uint8_t*>(m_send_window.buffer->vaddr());
memcpy(buffer + m_send_window.data_size, message.data(), to_send);
m_send_window.data_size += to_send;
}
m_thread_blocker.unblock();
return to_send;
return message.size();
}
bool TCPSocket::can_read_impl() const
@ -361,18 +377,16 @@ namespace Kernel
header.seq_number = m_send_window.current_seq + m_send_window.has_ghost_byte;
header.ack_number = m_recv_window.start_seq + m_recv_window.data_size + m_recv_window.has_ghost_byte;
header.data_offset = (sizeof(TCPHeader) + m_tcp_options_bytes) / sizeof(uint32_t);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size() >> m_recv_window.scale_shift);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size() >> m_recv_window.scale);
header.flags = m_next_flags;
if (header.flags & FIN)
m_send_window.has_ghost_byte = true;
m_next_flags = 0;
if (m_state == State::Closed || m_state == State::SynReceived)
if (m_state == State::Closed)
{
add_tcp_header_option<0, TCPOption::MaximumSeqmentSize>(header, m_interface->payload_mtu() - m_network_layer.header_size());
if (m_connection_info->has_window_scale)
add_tcp_header_option<4, TCPOption::WindowScale>(header, m_recv_window.scale_shift);
add_tcp_header_option<4, TCPOption::WindowScale>(header, m_recv_window.scale);
header.window_size = BAN::Math::min<size_t>(0xFFFF, m_recv_window.buffer->size());
m_send_window.mss = 1440;
@ -449,12 +463,9 @@ namespace Kernel
if (options.maximum_seqment_size.has_value())
m_send_window.mss = *options.maximum_seqment_size;
if (options.window_scale.has_value())
m_send_window.scale_shift = *options.window_scale;
m_send_window.scale = *options.window_scale;
else
{
m_recv_window.scale_shift = 0;
m_connection_info->has_window_scale = false;
}
m_recv_window.scale = 1;
m_send_window.start_seq = m_send_window.current_seq;
m_send_window.current_ack = m_send_window.current_seq;
@ -481,7 +492,6 @@ namespace Kernel
ConnectionInfo connection_info;
memcpy(&connection_info.address, sender, sender_len);
connection_info.address_len = sender_len;
connection_info.has_window_scale = parse_tcp_options(header).window_scale.has_value();
MUST(m_pending_connections.emplace(
connection_info,
header.seq_number + 1

View File

@ -1,6 +1,5 @@
#include <kernel/FS/Pipe.h>
#include <kernel/FS/VirtualFileSystem.h>
#include <kernel/Lock/LockGuard.h>
#include <kernel/Networking/NetworkManager.h>
#include <kernel/OpenFileDescriptorSet.h>
@ -299,13 +298,11 @@ namespace Kernel
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (open_file.inode()->mode().ifsock())
return recvfrom(fd, buffer, nullptr, nullptr);
if (!(open_file.status_flags() & O_RDONLY))
return BAN::Error::from_errno(EBADF);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_read())
return 0;
const size_t nread = TRY(open_file.inode()->read(open_file.offset(), buffer));
size_t nread = TRY(open_file.inode()->read(open_file.offset(), buffer));
open_file.offset() += nread;
return nread;
}
@ -314,15 +311,13 @@ namespace Kernel
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (open_file.inode()->mode().ifsock())
return sendto(fd, buffer, nullptr, 0);
if (!(open_file.status_flags() & O_WRONLY))
return BAN::Error::from_errno(EBADF);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return BAN::Error::from_errno(EWOULDBLOCK);
return 0;
if (open_file.status_flags() & O_APPEND)
open_file.offset() = open_file.inode()->size();
const size_t nwrite = TRY(open_file.inode()->write(open_file.offset(), buffer));
size_t nwrite = TRY(open_file.inode()->write(open_file.offset(), buffer));
open_file.offset() += nwrite;
return nwrite;
}
@ -345,43 +340,6 @@ namespace Kernel
}
}
BAN::ErrorOr<size_t> OpenFileDescriptorSet::recvfrom(int fd, BAN::ByteSpan buffer, sockaddr* address, socklen_t* address_len)
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (!open_file.inode()->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
LockGuard _(open_file.inode()->m_mutex);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_read())
return BAN::Error::from_errno(EWOULDBLOCK);
return open_file.inode()->recvfrom(buffer, address, address_len);
}
BAN::ErrorOr<size_t> OpenFileDescriptorSet::sendto(int fd, BAN::ConstByteSpan buffer, const sockaddr* address, socklen_t address_len)
{
TRY(validate_fd(fd));
auto& open_file = m_open_files[fd];
if (!open_file.inode()->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return BAN::Error::from_errno(EWOULDBLOCK);
LockGuard _(open_file.inode()->m_mutex);
size_t total_sent = 0;
while (total_sent < buffer.size())
{
if ((open_file.status_flags() & O_NONBLOCK) && !open_file.inode()->can_write())
return total_sent;
const size_t nsend = TRY(open_file.inode()->sendto(buffer.slice(total_sent), address, address_len));
if (nsend == 0)
return 0;
total_sent += nsend;
}
return total_sent;
}
BAN::ErrorOr<VirtualFileSystem::File> OpenFileDescriptorSet::file_of(int fd) const
{
TRY(validate_fd(fd));

View File

@ -17,12 +17,9 @@
#include <kernel/Terminal/PseudoTerminal.h>
#include <kernel/Timer/Timer.h>
#include <LibELF/AuxiliaryVector.h>
#include <LibInput/KeyboardLayout.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/banan-os.h>
#include <sys/sysmacros.h>
@ -119,61 +116,76 @@ namespace Kernel
process->m_working_directory = VirtualFileSystem::get().root_file();
process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace()));
TRY(process->m_cmdline.emplace_back());
TRY(process->m_cmdline.push_back({}));
TRY(process->m_cmdline.back().append(path));
for (auto argument : arguments)
{
TRY(process->m_cmdline.emplace_back());
TRY(process->m_cmdline.back().append(argument));
}
LockGuard _(process->m_process_lock);
auto executable_file = TRY(process->find_file(AT_FDCWD, path.data(), O_EXEC));
auto executable_inode = executable_file.inode;
auto executable_inode = TRY(process->find_file(AT_FDCWD, path.data(), O_EXEC)).inode;
auto executable = TRY(ELF::load_from_inode(executable_inode, process->m_credentials, process->page_table()));
process->m_mapped_regions = BAN::move(executable.regions);
char** argv_addr = nullptr;
{
size_t needed_bytes = sizeof(char*) + path.size() + 1;
for (auto argument : arguments)
needed_bytes += sizeof(char*) + argument.size() + 1;
needed_bytes += sizeof(char*);
if (auto rem = needed_bytes % PAGE_SIZE)
needed_bytes += PAGE_SIZE - rem;
auto argv_region = MUST(MemoryBackedRegion::create(
process->page_table(),
needed_bytes,
{ .start = 0x400000, .end = KERNEL_OFFSET },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::Present
));
argv_addr = reinterpret_cast<char**>(argv_region->vaddr());
uintptr_t offset = sizeof(char*) * (1 + arguments.size() + 1);
for (size_t i = 0; i <= arguments.size(); i++)
{
const uintptr_t addr = argv_region->vaddr() + offset;
TRY(argv_region->copy_data_to_region(i * sizeof(char*), reinterpret_cast<const uint8_t*>(&addr), sizeof(char*)));
const auto current = (i == 0) ? path : arguments[i - 1];
TRY(argv_region->copy_data_to_region(offset, reinterpret_cast<const uint8_t*>(current.data()), current.size()));
const uint8_t zero = 0;
TRY(argv_region->copy_data_to_region(offset + current.size(), &zero, 1));
offset += current.size() + 1;
}
const uintptr_t zero = 0;
TRY(argv_region->copy_data_to_region((1 + arguments.size()) * sizeof(char*), reinterpret_cast<const uint8_t*>(&zero), sizeof(char*)));
TRY(process->m_mapped_regions.push_back(BAN::move(argv_region)));
}
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
process->m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
process->m_credentials.set_egid(executable_inode->gid());
BAN::Vector<LibELF::AuxiliaryVector> auxiliary_vector;
TRY(auxiliary_vector.reserve(1 + executable.open_execfd));
if (executable.open_execfd)
if (executable.has_interpreter)
{
const int execfd = TRY(process->m_open_file_descriptors.open(BAN::move(executable_file), O_RDONLY));
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_EXECFD,
.a_un = { .a_val = static_cast<uint32_t>(execfd) },
}));
VirtualFileSystem::File file;
TRY(file.canonical_path.append("<self>"));
file.inode = executable_inode;
process->m_userspace_info.file_fd = TRY(process->m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
}
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_NULL,
.a_un = { .a_val = 0 },
}));
BAN::Optional<vaddr_t> tls_addr;
if (executable.master_tls.has_value())
{
auto tls_result = TRY(process->initialize_thread_local_storage(process->page_table(), *executable.master_tls));
TRY(process->m_mapped_regions.emplace_back(BAN::move(tls_result.region)));
tls_addr = tls_result.addr;
}
process->m_is_userspace = true;
process->m_userspace_info.entry = executable.entry_point;
process->m_userspace_info.argc = 1 + arguments.size();
process->m_userspace_info.argv = argv_addr;
process->m_userspace_info.envp = nullptr;
auto* thread = MUST(Thread::create_userspace(process, process->page_table()));
MUST(thread->initialize_userspace(
executable.entry_point,
process->m_cmdline.span(),
process->m_environ.span(),
auxiliary_vector.span()
));
if (tls_addr.has_value())
thread->set_tls(*tls_addr);
thread->setup_exec();
process->add_thread(thread);
process->register_to_scheduler();
@ -306,63 +318,6 @@ namespace Kernel
ASSERT_NOT_REACHED();
}
BAN::ErrorOr<Process::TLSResult> Process::initialize_thread_local_storage(PageTable& page_table, ELF::LoadResult::TLS master_tls)
{
const auto [master_addr, master_size] = master_tls;
ASSERT(master_size % alignof(uthread) == 0);
const size_t tls_size = master_size + PAGE_SIZE;
auto region = TRY(MemoryBackedRegion::create(
page_table,
tls_size,
{ .start = master_addr, .end = USERSPACE_END },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present
));
BAN::Vector<uint8_t> temp_buffer;
TRY(temp_buffer.resize(BAN::Math::min<size_t>(master_size, PAGE_SIZE)));
size_t bytes_copied = 0;
while (bytes_copied < master_size)
{
const size_t to_copy = BAN::Math::min(master_size - bytes_copied, temp_buffer.size());
const vaddr_t vaddr = master_addr + bytes_copied;
const paddr_t paddr = page_table.physical_address_of(vaddr & PAGE_ADDR_MASK);
PageTable::with_fast_page(paddr, [&] {
memcpy(temp_buffer.data(), PageTable::fast_page_as_ptr(vaddr % PAGE_SIZE), to_copy);
});
TRY(region->copy_data_to_region(bytes_copied, temp_buffer.data(), to_copy));
bytes_copied += to_copy;
}
const uthread uthread {
.self = reinterpret_cast<struct uthread*>(region->vaddr() + master_size),
.master_tls_addr = reinterpret_cast<void*>(master_addr),
.master_tls_size = master_size,
};
const uintptr_t dtv[2] { 1, region->vaddr() };
TRY(region->copy_data_to_region(
master_size,
reinterpret_cast<const uint8_t*>(&uthread),
sizeof(uthread)
));
TRY(region->copy_data_to_region(
master_size + sizeof(uthread),
reinterpret_cast<const uint8_t*>(&dtv),
sizeof(dtv)
));
TLSResult result;
result.addr = region->vaddr() + master_size;;
result.region = BAN::move(region);
return result;
}
size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const
{
ASSERT(offset >= 0);
@ -579,6 +534,7 @@ namespace Kernel
forked->m_open_file_descriptors = BAN::move(*open_file_descriptors);
forked->m_mapped_regions = BAN::move(mapped_regions);
forked->m_is_userspace = m_is_userspace;
forked->m_userspace_info = m_userspace_info;
forked->m_has_called_exec = false;
memcpy(forked->m_signal_handlers, m_signal_handlers, sizeof(m_signal_handlers));
@ -605,69 +561,77 @@ namespace Kernel
TRY(validate_string_access(path));
auto executable_file = TRY(find_file(AT_FDCWD, path, O_EXEC));
auto executable_inode = executable_file.inode;
BAN::Vector<BAN::String> str_argv;
for (int i = 0; argv && argv[i]; i++)
{
TRY(validate_pointer_access(argv + i, sizeof(char*), false));
TRY(validate_string_access(argv[i]));
TRY(str_argv.emplace_back());
TRY(str_argv.back().append(argv[i]));
TRY(str_argv.emplace_back(argv[i]));
}
BAN::Vector<BAN::String> str_envp;
for (int i = 0; envp && envp[i]; i++)
{
TRY(validate_pointer_access(envp + i, sizeof(char*), false));
TRY(validate_pointer_access(envp + 1, sizeof(char*), false));
TRY(validate_string_access(envp[i]));
TRY(str_envp.emplace_back());
TRY(str_envp.back().append(envp[i]));
TRY(str_envp.emplace_back(envp[i]));
}
auto executable_file = TRY(find_file(AT_FDCWD, path, O_EXEC));
auto executable_inode = executable_file.inode;
auto executable = TRY(ELF::load_from_inode(executable_inode, m_credentials, *new_page_table));
auto new_mapped_regions = BAN::move(executable.regions);
BAN::Vector<LibELF::AuxiliaryVector> auxiliary_vector;
TRY(auxiliary_vector.reserve(1 + executable.open_execfd));
BAN::ScopeGuard execfd_guard([this, &auxiliary_vector] {
if (auxiliary_vector.empty())
return;
if (auxiliary_vector.front().a_type != LibELF::AT_EXECFD)
return;
MUST(m_open_file_descriptors.close(auxiliary_vector.front().a_un.a_val));
});
if (executable.open_execfd)
int file_fd = -1;
if (executable.has_interpreter)
{
const int execfd = TRY(m_open_file_descriptors.open(BAN::move(executable_file), O_RDONLY));
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_EXECFD,
.a_un = { .a_val = static_cast<uint32_t>(execfd) },
}));
VirtualFileSystem::File file;
file.canonical_path = BAN::move(executable_file.canonical_path);
file.inode = executable_inode;
file_fd = TRY(m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
}
BAN::ScopeGuard file_closer([&] { if (file_fd != -1) MUST(m_open_file_descriptors.close(file_fd)); });
TRY(auxiliary_vector.push_back({
.a_type = LibELF::AT_NULL,
.a_un = { .a_val = 0 },
}));
// allocate memory on the new process for arguments and environment
auto create_region =
[&](BAN::Span<BAN::String> container) -> BAN::ErrorOr<BAN::UniqPtr<MemoryRegion>>
{
size_t bytes = sizeof(char*);
for (auto& elem : container)
bytes += sizeof(char*) + elem.size() + 1;
if (auto rem = bytes % PAGE_SIZE)
bytes += PAGE_SIZE - rem;
auto region = TRY(MemoryBackedRegion::create(
*new_page_table,
bytes,
{ .start = executable.entry_point, .end = KERNEL_OFFSET },
MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present
));
size_t data_offset = sizeof(char*) * (container.size() + 1);
for (size_t i = 0; i < container.size(); i++)
{
uintptr_t ptr_addr = region->vaddr() + data_offset;
TRY(region->copy_data_to_region(sizeof(char*) * i, (const uint8_t*)&ptr_addr, sizeof(char*)));
TRY(region->copy_data_to_region(data_offset, (const uint8_t*)container[i].data(), container[i].size()));
data_offset += container[i].size() + 1;
}
uintptr_t null = 0;
TRY(region->copy_data_to_region(sizeof(char*) * container.size(), (const uint8_t*)&null, sizeof(char*)));
return BAN::UniqPtr<MemoryRegion>(BAN::move(region));
};
TRY(new_mapped_regions.reserve(new_mapped_regions.size() + 2));
MUST(new_mapped_regions.push_back(TRY(create_region(str_argv.span()))));
MUST(new_mapped_regions.push_back(TRY(create_region(str_envp.span()))));
auto* new_thread = TRY(Thread::create_userspace(this, *new_page_table));
TRY(new_thread->initialize_userspace(
executable.entry_point,
str_argv.span(),
str_envp.span(),
auxiliary_vector.span()
));
if (executable.master_tls.has_value())
{
auto tls_result = TRY(initialize_thread_local_storage(*new_page_table, *executable.master_tls));
TRY(new_mapped_regions.emplace_back(BAN::move(tls_result.region)));
new_thread->set_tls(tls_result.addr);
}
ASSERT(Processor::get_interrupt_state() == InterruptState::Enabled);
Processor::set_interrupt_state(InterruptState::Disabled);
@ -691,8 +655,8 @@ namespace Kernel
m_threads.front()->m_process = nullptr;
m_threads.front()->give_keep_alive_page_table(BAN::move(m_page_table));
MUST(Processor::scheduler().add_thread(new_thread));
m_threads.front() = new_thread;
MUST(Processor::scheduler().add_thread(m_threads.front()));
for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++)
{
@ -709,13 +673,21 @@ namespace Kernel
m_mapped_regions = BAN::move(new_mapped_regions);
m_page_table = BAN::move(new_page_table);
execfd_guard.disable();
file_closer.disable();
m_userspace_info.argc = str_argv.size();
m_userspace_info.argv = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 2]->vaddr());
m_userspace_info.envp = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 1]->vaddr());
m_userspace_info.entry = executable.entry_point;
m_userspace_info.file_fd = file_fd;
m_cmdline = BAN::move(str_argv);
m_environ = BAN::move(str_envp);
}
m_has_called_exec = true;
m_threads.front()->setup_exec();
Processor::yield();
ASSERT_NOT_REACHED();
}
@ -1050,11 +1022,6 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_read(int fd, void* buffer, size_t count)
{
LockGuard _(m_process_lock);
if (count == 0)
{
TRY(m_open_file_descriptors.inode_of(fd));
return 0;
}
TRY(validate_pointer_access(buffer, count, true));
return TRY(m_open_file_descriptors.read(fd, BAN::ByteSpan((uint8_t*)buffer, count)));
}
@ -1169,11 +1136,6 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_pread(int fd, void* buffer, size_t count, off_t offset)
{
LockGuard _(m_process_lock);
if (count == 0)
{
TRY(m_open_file_descriptors.inode_of(fd));
return 0;
}
TRY(validate_pointer_access(buffer, count, true));
auto inode = TRY(m_open_file_descriptors.inode_of(fd));
return TRY(inode->read(offset, { (uint8_t*)buffer, count }));
@ -1359,8 +1321,16 @@ namespace Kernel
TRY(validate_pointer_access(arguments->message, arguments->length, false));
TRY(validate_pointer_access(arguments->dest_addr, arguments->dest_len, false));
auto message = BAN::ConstByteSpan(static_cast<const uint8_t*>(arguments->message), arguments->length);
return TRY(m_open_file_descriptors.sendto(arguments->socket, message, arguments->dest_addr, arguments->dest_len));
auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket));
if (!inode->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
const auto status_flags = TRY(m_open_file_descriptors.status_flags_of(arguments->socket));
if ((status_flags & O_NONBLOCK) && !inode->can_write())
return BAN::Error::from_errno(EAGAIN);
BAN::ConstByteSpan message { reinterpret_cast<const uint8_t*>(arguments->message), arguments->length };
return TRY(inode->sendto(message, arguments->dest_addr, arguments->dest_len));
}
BAN::ErrorOr<long> Process::sys_recvfrom(sys_recvfrom_t* arguments)
@ -1379,8 +1349,16 @@ namespace Kernel
TRY(validate_pointer_access(arguments->address, *arguments->address_len, true));
}
auto message = BAN::ByteSpan(static_cast<uint8_t*>(arguments->buffer), arguments->length);
return TRY(m_open_file_descriptors.recvfrom(arguments->socket, message, arguments->address, arguments->address_len));
auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket));
if (!inode->mode().ifsock())
return BAN::Error::from_errno(ENOTSOCK);
const auto status_flags = TRY(m_open_file_descriptors.status_flags_of(arguments->socket));
if ((status_flags & O_NONBLOCK) && !inode->can_read())
return BAN::Error::from_errno(EAGAIN);
BAN::ByteSpan buffer { reinterpret_cast<uint8_t*>(arguments->buffer), arguments->length };
return TRY(inode->recvfrom(buffer, arguments->address, arguments->address_len));
}
BAN::ErrorOr<long> Process::sys_ioctl(int fildes, int request, void* arg)
@ -1697,7 +1675,7 @@ namespace Kernel
else
page_flags |= PageTable::Flags::UserSupervisor;
AddressRange address_range { .start = 0x400000, .end = USERSPACE_END };
AddressRange address_range { .start = 0x400000, .end = KERNEL_OFFSET };
if (args->flags & MAP_FIXED)
{
vaddr_t base_addr = reinterpret_cast<vaddr_t>(args->addr);
@ -1842,7 +1820,7 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_smo_map(SharedMemoryObjectManager::Key key)
{
auto region = TRY(SharedMemoryObjectManager::get().map_object(key, page_table(), { .start = 0x400000, .end = USERSPACE_END }));
auto region = TRY(SharedMemoryObjectManager::get().map_object(key, page_table(), { .start = 0x400000, .end = KERNEL_OFFSET }));
LockGuard _(m_process_lock);
TRY(m_mapped_regions.push_back(BAN::move(region)));
@ -2112,28 +2090,12 @@ namespace Kernel
return 0;
}
BAN::ErrorOr<long> Process::sys_set_tls(void* addr)
BAN::ErrorOr<long> Process::sys_pthread_create(const pthread_attr_t* __restrict attr, void (*entry)(void*), void* arg)
{
Thread::current().set_tls(reinterpret_cast<vaddr_t>(addr));
Processor::load_tls();
return 0;
}
BAN::ErrorOr<long> Process::sys_get_tls()
{
return Thread::current().get_tls();
}
BAN::ErrorOr<long> Process::sys_pthread_create(const pthread_attr_t* attr, void (*entry)(void*), void* arg)
{
if (attr)
if (attr != nullptr)
{
TRY(validate_pointer_access(attr, sizeof(*attr), false));
if (*attr)
{
dwarnln("pthread attr not supported");
return BAN::Error::from_errno(ENOTSUP);
}
dwarnln("pthread attr not supported");
return BAN::Error::from_errno(ENOTSUP);
}
LockGuard _(m_process_lock);
@ -2613,7 +2575,7 @@ namespace Kernel
goto unauthorized_access;
// trying to access kernel space memory
if (vaddr + size > USERSPACE_END)
if (vaddr + size > KERNEL_OFFSET)
goto unauthorized_access;
if (vaddr == 0)

View File

@ -10,10 +10,7 @@ extern Kernel::TerminalDriver* g_terminal_driver;
namespace Kernel
{
#if ARCH(x86_64)
static constexpr uint32_t MSR_IA32_FS_BASE = 0xC0000100;
static constexpr uint32_t MSR_IA32_GS_BASE = 0xC0000101;
#endif
ProcessorID Processor::s_bsb_id { PROCESSOR_NONE };
BAN::Atomic<uint8_t> Processor::s_processor_count { 0 };
@ -263,18 +260,6 @@ namespace Kernel
set_interrupt_state(state);
}
void Processor::load_tls()
{
const auto addr = scheduler().current_thread().get_tls();
#if ARCH(x86_64)
uint32_t ptr_hi = addr >> 32;
uint32_t ptr_lo = addr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_FS_BASE));
#elif ARCH(i686)
gdt().set_tls(addr);
#endif
}
void Processor::send_smp_message(ProcessorID processor_id, const SMPMessage& message, bool send_ipi)
{
ASSERT(processor_id != current_id());

View File

@ -278,7 +278,6 @@ namespace Kernel
thread->m_state = Thread::State::Executing;
Processor::gdt().set_tss_stack(thread->kernel_stack_top());
Processor::load_tls();
*interrupt_stack = thread->interrupt_stack();
*interrupt_registers = thread->interrupt_registers();

View File

@ -44,7 +44,11 @@ namespace Kernel
BAN::ErrorOr<long> ret = BAN::Error::from_errno(ENOSYS);
const char* process_path = Process::current().name();
const char* process_path = nullptr;
if (Process::current().userspace_info().argc > 0 && Process::current().userspace_info().argv)
process_path = Process::current().userspace_info().argv[0];
if (process_path == nullptr)
process_path = "<null>";
#if DUMP_ALL_SYSCALLS
dprintln("{} pid {}: {}", process_path, Process::current().pid(), s_syscall_names[syscall]);

View File

@ -29,6 +29,11 @@ namespace Kernel
return Thread::current().interrupt_stack().sp;
}
extern "C" uintptr_t get_userspace_thread_stack_top()
{
return Thread::current().userspace_stack_top() - 4 * sizeof(uintptr_t);
}
extern "C" void load_thread_sse()
{
Thread::current().load_sse();
@ -103,7 +108,7 @@ namespace Kernel
thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range(
page_table,
0x200000, USERSPACE_END,
0x200000, KERNEL_OFFSET,
kernel_stack_size,
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
@ -111,7 +116,7 @@ namespace Kernel
thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range(
page_table,
0x200000, USERSPACE_END,
0x200000, KERNEL_OFFSET,
userspace_stack_size,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
@ -171,12 +176,11 @@ namespace Kernel
save_sse();
memcpy(thread->m_sse_storage, m_sse_storage, sizeof(m_sse_storage));
PageTable::with_fast_page(thread->userspace_stack().paddr_of(thread->userspace_stack_top() - PAGE_SIZE), [=] {
PageTable::fast_page_as<void*>(PAGE_SIZE - sizeof(uintptr_t)) = arg;
});
const vaddr_t entry_addr = reinterpret_cast<vaddr_t>(entry);
thread->setup_exec(entry_addr, thread->userspace_stack_top() - sizeof(uintptr_t));
thread->setup_exec_impl(
reinterpret_cast<uintptr_t>(entry),
reinterpret_cast<uintptr_t>(arg),
0, 0, 0
);
return thread;
}
@ -196,8 +200,6 @@ namespace Kernel
thread->m_kernel_stack = TRY(m_kernel_stack->clone(new_process->page_table()));
thread->m_userspace_stack = TRY(m_userspace_stack->clone(new_process->page_table()));
thread->m_tls = m_tls;
thread->m_state = State::NotStarted;
thread->m_interrupt_stack.ip = ip;
@ -220,112 +222,21 @@ namespace Kernel
return thread;
}
BAN::ErrorOr<void> Thread::initialize_userspace(vaddr_t entry, BAN::Span<BAN::String> argv, BAN::Span<BAN::String> envp, BAN::Span<LibELF::AuxiliaryVector> auxv)
void Thread::setup_exec()
{
// System V ABI: Initial process stack
const auto& userspace_info = process().userspace_info();
ASSERT(userspace_info.entry);
ASSERT(m_is_userspace);
ASSERT(m_userspace_stack);
size_t needed_size = 0;
// argc
needed_size += sizeof(uintptr_t);
// argv
needed_size += (argv.size() + 1) * sizeof(uintptr_t);
for (auto arg : argv)
needed_size += arg.size() + 1;
// envp
needed_size += (envp.size() + 1) * sizeof(uintptr_t);
for (auto env : envp)
needed_size += env.size() + 1;
// auxv
needed_size += auxv.size() * sizeof(LibELF::AuxiliaryVector);
if (needed_size > m_userspace_stack->size())
return BAN::Error::from_errno(ENOBUFS);
vaddr_t vaddr = userspace_stack_top() - needed_size;
const auto stack_copy_buf =
[this](BAN::ConstByteSpan buffer, vaddr_t vaddr) -> void
{
ASSERT(vaddr + buffer.size() <= userspace_stack_top());
size_t bytes_copied = 0;
while (bytes_copied < buffer.size())
{
const size_t to_copy = BAN::Math::min<size_t>(buffer.size() - bytes_copied, PAGE_SIZE - (vaddr % PAGE_SIZE));
PageTable::with_fast_page(userspace_stack().paddr_of(vaddr & PAGE_ADDR_MASK), [=]() {
memcpy(PageTable::fast_page_as_ptr(vaddr % PAGE_SIZE), buffer.data() + bytes_copied, to_copy);
});
vaddr += to_copy;
bytes_copied += to_copy;
}
};
const auto stack_push_buf =
[&stack_copy_buf, &vaddr](BAN::ConstByteSpan buffer) -> void
{
stack_copy_buf(buffer, vaddr);
vaddr += buffer.size();
};
const auto stack_push_uint =
[&stack_push_buf](uintptr_t value) -> void
{
stack_push_buf(BAN::ConstByteSpan::from(value));
};
const auto stack_push_str =
[&stack_push_buf](BAN::StringView string) -> void
{
const uint8_t* string_u8 = reinterpret_cast<const uint8_t*>(string.data());
stack_push_buf(BAN::ConstByteSpan(string_u8, string.size() + 1));
};
// argc
stack_push_uint(argv.size());
// argv
const vaddr_t argv_vaddr = vaddr;
vaddr += argv.size() * sizeof(uintptr_t);
stack_push_uint(0);
// envp
const vaddr_t envp_vaddr = vaddr;
vaddr += envp.size() * sizeof(uintptr_t);
stack_push_uint(0);
// auxv
for (auto aux : auxv)
stack_push_buf(BAN::ConstByteSpan::from(aux));
// information
for (size_t i = 0; i < argv.size(); i++)
{
stack_copy_buf(BAN::ConstByteSpan::from(vaddr), argv_vaddr + i * sizeof(uintptr_t));
stack_push_str(argv[i]);
}
for (size_t i = 0; i < envp.size(); i++)
{
stack_copy_buf(BAN::ConstByteSpan::from(vaddr), envp_vaddr + i * sizeof(uintptr_t));
stack_push_str(envp[i]);
}
ASSERT(vaddr == userspace_stack_top());
setup_exec(entry, userspace_stack_top() - needed_size);
return {};
setup_exec_impl(
userspace_info.entry,
userspace_info.argc,
reinterpret_cast<uintptr_t>(userspace_info.argv),
reinterpret_cast<uintptr_t>(userspace_info.envp),
userspace_info.file_fd
);
}
void Thread::setup_exec(vaddr_t ip, vaddr_t sp)
void Thread::setup_exec_impl(uintptr_t entry, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
{
ASSERT(is_userspace());
m_state = State::NotStarted;
@ -333,13 +244,13 @@ namespace Kernel
// Signal mask is inherited
// Initialize stack for returning
PageTable::with_fast_page(kernel_stack().paddr_of(kernel_stack_top() - PAGE_SIZE), [=] {
uintptr_t cur_sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(cur_sp, 0x20 | 3);
write_to_stack(cur_sp, sp);
write_to_stack(cur_sp, 0x202);
write_to_stack(cur_sp, 0x18 | 3);
write_to_stack(cur_sp, ip);
PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, entry);
write_to_stack(sp, arg3);
write_to_stack(sp, arg2);
write_to_stack(sp, arg1);
write_to_stack(sp, arg0);
});
m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_userspace_thread);
@ -375,7 +286,7 @@ namespace Kernel
m_signal_pending_mask = 0;
m_signal_block_mask = ~0ull;
PageTable::with_fast_page(kernel_stack().paddr_of(kernel_stack_top() - PAGE_SIZE), [&] {
PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, this);
write_to_stack(sp, &Thread::on_exit_trampoline);

View File

@ -58,10 +58,6 @@ target_compile_definitions(objlibc PRIVATE __arch=${BANAN_ARCH})
target_compile_options(objlibc PRIVATE -O2 -g -Wstack-usage=512 -fno-exceptions -fpic -nolibc)
target_compile_options(objlibc PUBLIC -Wall -Wextra -Werror -Wno-error=stack-usage=)
if("${BANAN_ARCH}" STREQUAL "i686")
target_compile_definitions(objlibc PRIVATE __disable_thread_local_storage)
endif()
function(add_crtx crtx)
add_custom_target(${crtx}
COMMAND ${CMAKE_CXX_COMPILER} -c -o ${CMAKE_INSTALL_LIBDIR}/${crtx}.o ${CMAKE_CURRENT_SOURCE_DIR}/arch/${BANAN_ARCH}/${crtx}.S
@ -75,7 +71,6 @@ add_crtx(crtn)
banan_include_headers(objlibc ban)
banan_include_headers(objlibc kernel)
banan_include_headers(objlibc libelf)
banan_install_headers(objlibc)

View File

@ -2,38 +2,40 @@
.global _start
_start:
# get argc, argv, envp
movl (%esp), %edi
leal 4(%esp), %esi
leal 4(%esi, %edi, 4), %edx
pushl $0
pushl %edi
pushl %esi
pushl %edx
# STACK LAYOUT
# null
# argc
# argv
# envp
# align stack
andl $-16, %esp
xorl %ebp, %ebp
# save argc, argv, envp
subl $4, %esp
pushl %edx
pushl %esi
pushl %edi
subl $4, %esp
pushl $__fini_array_end
pushl $__fini_array_start
pushl $_fini
pushl $__init_array_end
pushl $__init_array_start
pushl $_init
pushl %edx
# init libc (envp already as argument)
call _init_libc
addl $(4 * 8), %esp
# call global constructors
movl $_init, %eax
testl %eax, %eax
jz 1f
call *%eax
1:
# argc, argv, envp already on stack
movl $__init_array_start, %ebx
jmp 2f
1: call *(%ebx)
addl $4, %ebx
2: cmpl $__init_array_end, %ebx
jne 1b
# call main
movl 0(%esp), %eax
xchgl %eax, 8(%esp)
movl %eax, (%esp)
call main
subl $12, %esp

View File

@ -2,40 +2,41 @@
.global _start
_start:
# get argc, argv, envp
movq (%rsp), %rdi
leaq 8(%rsp), %rsi
leaq 8(%rsi, %rdi, 8), %rdx
# align stack
andq $-16, %rsp
xorq %rbp, %rbp
# save argc, argv, envp
subq $8, %rsp
pushq $0
pushq %rdi
pushq %rsi
pushq %rdx
movq %rdx, %rdi # environ
# STACK LAYOUT
# null
# argc
# argv
# envp
pushq $__fini_array_end
pushq $__fini_array_start
pushq $_fini
pushq $__init_array_end
pushq $__init_array_start
pushq $_init
xorq %rbp, %rbp
# init libc
movq 0(%rsp), %rdi
call _init_libc
addq $(6 * 8), %rsp
# call global constructors
movq $_init, %rax
testq %rax, %rax
jz 1f
call *%rax
1:
movq $__init_array_start, %rbx
jmp 2f
1: call *(%rbx)
addq $8, %rbx
2: cmpq $__init_array_end, %rbx
jne 1b
# call main
popq %rdx
popq %rsi
popq %rdi
addq $8, %rsp
movq 16(%rsp), %rdi
movq 8(%rsp), %rsi
movq 0(%rsp), %rdx
call main
# call exit

View File

@ -1,10 +1,6 @@
#include <errno.h>
#if __disable_thread_local_storage
static int s_errno = 0;
#else
static thread_local int s_errno = 0;
#endif
int* __errno_location()
{

View File

@ -1,39 +1,41 @@
#include <BAN/Assert.h>
#include <stdint.h>
#include <stddef.h>
#define ATEXIT_MAX_FUNCS 128
struct atexit_func_entry_t
{
void (*func)(void*);
void(*func)(void*);
void* arg;
void* dso_handle;
};
static atexit_func_entry_t s_atexit_funcs[ATEXIT_MAX_FUNCS];
static size_t s_atexit_func_count = 0;
static atexit_func_entry_t __atexit_funcs[ATEXIT_MAX_FUNCS];
static size_t __atexit_func_count = 0;
extern "C" int __cxa_atexit(void(*func)(void*), void* arg, void* dso_handle)
{
if (s_atexit_func_count >= ATEXIT_MAX_FUNCS)
if (__atexit_func_count >= ATEXIT_MAX_FUNCS)
return -1;
s_atexit_funcs[s_atexit_func_count++] = {
.func = func,
.arg = arg,
.dso_handle = dso_handle,
};
auto& atexit_func = __atexit_funcs[__atexit_func_count++];
atexit_func.func = func;
atexit_func.arg = arg;
atexit_func.dso_handle = dso_handle;
return 0;
};
extern "C" void __cxa_finalize(void* dso_handle)
extern "C" void __cxa_finalize(void* f)
{
for (size_t i = s_atexit_func_count; i > 0; i--)
for (size_t i = __atexit_func_count; i > 0; i--)
{
auto& atexit_func = s_atexit_funcs[i - 1];
auto& atexit_func = __atexit_funcs[i - 1];
if (atexit_func.func == nullptr)
continue;
if (dso_handle && dso_handle != atexit_func.dso_handle)
continue;
atexit_func.func(atexit_func.arg);
atexit_func.func = nullptr;
if (f == nullptr || f == atexit_func.func)
{
atexit_func.func(atexit_func.arg);
atexit_func.func = nullptr;
}
}
};

View File

@ -54,7 +54,6 @@ struct hostent
int h_addrtype; /* Address type. */
int h_length; /* The length, in bytes, of the address. */
char** h_addr_list; /* A pointer to an array of pointers to network addresses (in network byte order) for the host, terminated by a null pointer. */
#define h_addr h_addr_list[0] /* Backwards compatibility */
};
struct netent

View File

@ -8,7 +8,6 @@
__BEGIN_DECLS
#include <sched.h>
#include <stdint.h>
#include <time.h>
#define __need_size_t
@ -28,14 +27,6 @@ __BEGIN_DECLS
#define __need_pthread_t
#include <sys/types.h>
struct uthread
{
struct uthread* self;
void* master_tls_addr;
size_t master_tls_size;
uintptr_t dtv[];
};
#define PTHREAD_BARRIER_SERIAL_THREAD 1
#define PTHREAD_CANCEL_ASYNCHRONOUS 2
#define PTHREAD_CANCEL_ENABLE 3

View File

@ -91,8 +91,6 @@ __BEGIN_DECLS
O(SYS_SYMLINKAT, symlinkat) \
O(SYS_HARDLINKAT, hardlinkat) \
O(SYS_YIELD, yield) \
O(SYS_SET_TLS, set_tls) \
O(SYS_GET_TLS, get_tls) \
O(SYS_PTHREAD_CREATE, pthread_create) \
O(SYS_PTHREAD_EXIT, pthread_exit) \
O(SYS_PTHREAD_JOIN, pthread_join) \

View File

@ -2,7 +2,6 @@
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@ -51,29 +50,19 @@ struct malloc_pool_t
bool contains(malloc_node_t* node) { return start <= (uint8_t*)node && (uint8_t*)node < end(); }
};
struct malloc_info_t
static malloc_pool_t s_malloc_pools[s_malloc_pool_count];
void _init_malloc()
{
consteval malloc_info_t()
size_t pool_size = s_malloc_pool_size_initial;
for (size_t i = 0; i < s_malloc_pool_count; i++)
{
size_t pool_size = s_malloc_pool_size_initial;
for (auto& pool : pools)
{
pool = {
.start = nullptr,
.size = pool_size,
.free_list = nullptr,
};
pool_size *= s_malloc_pool_size_multiplier;
}
s_malloc_pools[i].start = nullptr;
s_malloc_pools[i].size = pool_size;
s_malloc_pools[i].free_list = nullptr;;
pool_size *= s_malloc_pool_size_multiplier;
}
malloc_pool_t pools[s_malloc_pool_count];
};
static malloc_info_t s_malloc_info;
static auto& s_malloc_pools = s_malloc_info.pools;
static pthread_spinlock_t s_malloc_lock;
}
static bool allocate_pool(size_t pool_index)
{
@ -203,31 +192,19 @@ void* malloc(size_t size)
// try to find any already existing pools that we can allocate in
for (size_t i = first_usable_pool; i < s_malloc_pool_count; i++)
{
if (s_malloc_pools[i].start == nullptr)
continue;
pthread_spin_lock(&s_malloc_lock);
void* ret = allocate_from_pool(i, size);
pthread_spin_unlock(&s_malloc_lock);
if (ret != nullptr)
return ret;
}
if (s_malloc_pools[i].start != nullptr)
if (void* ret = allocate_from_pool(i, size))
return ret;
// allocate new pool
for (size_t i = first_usable_pool; i < s_malloc_pool_count; i++)
{
if (s_malloc_pools[i].start != nullptr)
continue;
pthread_spin_lock(&s_malloc_lock);
void* ret = nullptr;
if (allocate_pool(i))
ret = allocate_from_pool(i, size);
pthread_spin_unlock(&s_malloc_lock);
if (ret == nullptr)
if (!allocate_pool(i))
break;
return ret;
// NOTE: always works since we just created the pool
return allocate_from_pool(i, size);
}
errno = ENOMEM;
@ -273,8 +250,6 @@ void free(void* ptr)
if (ptr == nullptr)
return;
pthread_spin_lock(&s_malloc_lock);
auto* node = node_from_data_pointer(ptr);
node->allocated = false;
@ -296,8 +271,6 @@ void free(void* ptr)
node->prev_free = nullptr;
node->next_free = pool.free_list;
pool.free_list = node;
pthread_spin_unlock(&s_malloc_lock);
}
void* calloc(size_t nmemb, size_t size)

View File

@ -2,195 +2,55 @@
#include <BAN/Atomic.h>
#include <BAN/PlacementNew.h>
#include <kernel/Arch.h>
#include <errno.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
struct pthread_trampoline_info_t
{
struct uthread* uthread;
void* (*start_routine)(void*);
void* arg;
};
// stack is 16 byte aligned on entry, this `call` is used to align it
extern "C" void _pthread_trampoline(void*);
asm(
#if ARCH(x86_64)
"_pthread_trampoline:"
"popq %rdi;"
"andq $-16, %rsp;"
"xorq %rbp, %rbp;"
"call _pthread_trampoline_cpp"
#elif ARCH(i686)
"_pthread_trampoline:"
"ud2;"
"popl %edi;"
"andl $-16, %esp;"
"xorl %ebp, %ebp;"
"subl $12, %esp;"
"pushl %edi;"
"call _pthread_trampoline_cpp"
#endif
);
extern "C" void pthread_trampoline(void*);
asm("pthread_trampoline: call pthread_trampoline_cpp");
extern "C" void _pthread_trampoline_cpp(void* arg)
extern "C" void pthread_trampoline_cpp(void* arg)
{
auto info = *reinterpret_cast<pthread_trampoline_info_t*>(arg);
syscall(SYS_SET_TLS, info.uthread);
pthread_trampoline_info_t info;
memcpy(&info, arg, sizeof(pthread_trampoline_info_t));
free(arg);
pthread_exit(info.start_routine(info.arg));
ASSERT_NOT_REACHED();
}
static uthread* get_uthread()
{
uthread* result;
#if ARCH(x86_64)
asm volatile("movq %%fs:0, %0" : "=r"(result));
#elif ARCH(i686)
asm volatile("movl %%gs:0, %0" : "=r"(result));
#endif
return result;
}
static void free_uthread(uthread* uthread)
{
if (uthread->dtv[0] == 0)
return free(uthread);
uint8_t* tls_addr = reinterpret_cast<uint8_t*>(uthread) - uthread->master_tls_size;
const size_t tls_size = uthread->master_tls_size
+ sizeof(struct uthread)
+ (uthread->dtv[0] + 1) * sizeof(uintptr_t);
munmap(tls_addr, tls_size);
}
#if not __disable_thread_local_storage
struct pthread_cleanup_t
{
void (*routine)(void*);
void* arg;
pthread_cleanup_t* next;
};
static thread_local pthread_cleanup_t* s_cleanup_stack = nullptr;
void pthread_cleanup_pop(int execute)
{
ASSERT(s_cleanup_stack);
auto* cleanup = s_cleanup_stack;
s_cleanup_stack = cleanup->next;
if (execute)
cleanup->routine(cleanup->arg);
free(cleanup);
}
void pthread_cleanup_push(void (*routine)(void*), void* arg)
{
auto* cleanup = static_cast<pthread_cleanup_t*>(malloc(sizeof(pthread_cleanup_t)));
ASSERT(cleanup);
cleanup->routine = routine;
cleanup->arg = arg;
cleanup->next = s_cleanup_stack;
s_cleanup_stack = cleanup;
}
#endif
int pthread_attr_init(pthread_attr_t* attr)
{
*attr = 0;
return 0;
}
int pthread_create(pthread_t* __restrict thread_id, const pthread_attr_t* __restrict attr, void* (*start_routine)(void*), void* __restrict arg)
int pthread_create(pthread_t* __restrict thread, const pthread_attr_t* __restrict attr, void* (*start_routine)(void*), void* __restrict arg)
{
auto* info = static_cast<pthread_trampoline_info_t*>(malloc(sizeof(pthread_trampoline_info_t)));
if (info == nullptr)
return errno;
return -1;
info->start_routine = start_routine;
info->arg = arg;
*info = {
.uthread = nullptr,
.start_routine = start_routine,
.arg = arg,
};
long syscall_ret = 0;
if (uthread* self = get_uthread(); self->master_tls_addr == nullptr)
const auto ret = syscall(SYS_PTHREAD_CREATE, attr, pthread_trampoline, info);
if (ret == -1)
{
uthread* uthread = static_cast<struct uthread*>(malloc(sizeof(struct uthread) + sizeof(uintptr_t)));
if (uthread == nullptr)
goto pthread_create_error;
uthread->self = uthread;
uthread->master_tls_addr = nullptr;
uthread->master_tls_size = 0;
uthread->dtv[0] = 0;
info->uthread = uthread;
}
else
{
const size_t module_count = self->dtv[0];
const size_t tls_size = self->master_tls_size
+ sizeof(uthread)
+ (module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr = static_cast<uint8_t*>(mmap(nullptr, tls_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (tls_addr == MAP_FAILED)
goto pthread_create_error;
memcpy(tls_addr, self->master_tls_addr, self->master_tls_size);
uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + self->master_tls_size);
uthread->self = uthread;
uthread->master_tls_addr = self->master_tls_addr;
uthread->master_tls_size = self->master_tls_size;
const uintptr_t self_addr = reinterpret_cast<uintptr_t>(self);
const uintptr_t uthread_addr = reinterpret_cast<uintptr_t>(uthread);
uthread->dtv[0] = module_count;
for (size_t i = 1; i <= module_count; i++)
uthread->dtv[i] = self->dtv[i] - self_addr + uthread_addr;
info->uthread = uthread;
free(info);
return -1;
}
syscall_ret = syscall(SYS_PTHREAD_CREATE, attr, _pthread_trampoline, info);
if (syscall_ret == -1)
goto pthread_create_error;
if (thread_id)
*thread_id = syscall_ret;
if (thread)
*thread = ret;
return 0;
pthread_create_error:
const int return_code = errno;
if (info->uthread)
free_uthread(info->uthread);
free(info);
return return_code;
}
void pthread_exit(void* value_ptr)
{
#if not __disable_thread_local_storage
while (s_cleanup_stack)
pthread_cleanup_pop(1);
#endif
free_uthread(get_uthread());
syscall(SYS_PTHREAD_EXIT, value_ptr);
ASSERT_NOT_REACHED();
}
@ -202,14 +62,7 @@ int pthread_join(pthread_t thread, void** value_ptr)
pthread_t pthread_self(void)
{
#if __disable_thread_local_storage
return syscall(SYS_PTHREAD_SELF);
#else
static thread_local pthread_t s_pthread_self { -1 };
if (s_pthread_self == -1) [[unlikely]]
s_pthread_self = syscall(SYS_PTHREAD_SELF);
return s_pthread_self;
#endif
}
static inline BAN::Atomic<pthread_t>& pthread_spin_get_atomic(pthread_spinlock_t* lock)
@ -270,21 +123,3 @@ int pthread_spin_unlock(pthread_spinlock_t* lock)
atomic.store(0, BAN::MemoryOrder::memory_order_release);
return 0;
}
struct tls_index
{
unsigned long int ti_module;
unsigned long int ti_offset;
};
extern "C" void* __tls_get_addr(tls_index* ti)
{
return reinterpret_cast<void*>(get_uthread()->dtv[ti->ti_module] + ti->ti_offset);
}
#if ARCH(i686)
extern "C" void* __attribute__((__regparm__(1))) ___tls_get_addr(tls_index* ti)
{
return reinterpret_cast<void*>(get_uthread()->dtv[ti->ti_module] + ti->ti_offset);
}
#endif

View File

@ -94,8 +94,7 @@ static int drop_read_buffer(FILE* file)
return 0;
}
__attribute__((constructor))
static void _init_stdio()
void _init_stdio()
{
for (size_t i = 0; i < FOPEN_MAX; i++)
{

View File

@ -14,14 +14,16 @@
#include <strings.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/weak_alias.h>
#include <unistd.h>
#include <icxxabi.h>
char** __environ;
weak_alias(__environ, environ);
static bool s_environ_malloced = false;
extern "C" char** environ;
extern "C" __attribute__((weak)) void _fini();
static void (*at_exit_funcs[64])();
static uint32_t at_exit_funcs_count = 0;
void abort(void)
{
@ -39,8 +41,11 @@ void abort(void)
void exit(int status)
{
for (uint32_t i = at_exit_funcs_count; i > 0; i--)
at_exit_funcs[i - 1]();
fflush(nullptr);
__cxa_finalize(nullptr);
if (_fini) _fini();
_exit(status);
ASSERT_NOT_REACHED();
}
@ -57,8 +62,13 @@ int abs(int val)
int atexit(void (*func)(void))
{
void* func_addr = reinterpret_cast<void*>(func);
return __cxa_atexit([](void* func_ptr) { reinterpret_cast<void (*)(void)>(func_ptr)(); }, func_addr, nullptr);
if (at_exit_funcs_count > sizeof(at_exit_funcs) / sizeof(*at_exit_funcs))
{
errno = ENOBUFS;
return -1;
}
at_exit_funcs[at_exit_funcs_count++] = func;
return 0;
}
static constexpr int get_base_digit(char c, int base)
@ -506,35 +516,27 @@ int putenv(char* string)
return -1;
}
if (!s_environ_malloced)
if (!environ)
{
size_t env_count = 0;
while (environ[env_count])
env_count++;
char** new_environ = static_cast<char**>(malloc((env_count + 1) * sizeof(char*)));
if (new_environ == nullptr)
environ = (char**)malloc(sizeof(char*) * 2);
if (!environ)
return -1;
for (size_t i = 0; i < env_count; i++)
{
const size_t bytes = strlen(environ[i]) + 1;
new_environ[i] = (char*)malloc(bytes);
memcpy(new_environ[i], environ[i], bytes);
}
new_environ[env_count] = nullptr;
environ = new_environ;
s_environ_malloced = true;
environ[0] = string;
environ[1] = nullptr;
return 0;
}
const char* eq_addr = strchr(string, '=');
if (eq_addr == nullptr)
int cnt = 0;
for (int i = 0; string[i]; i++)
if (string[i] == '=')
cnt++;
if (cnt != 1)
{
errno = EINVAL;
return -1;
}
size_t namelen = eq_addr - string;
int namelen = strchr(string, '=') - string;
for (int i = 0; environ[i]; i++)
{
if (strncmp(environ[i], string, namelen + 1) == 0)
@ -545,15 +547,15 @@ int putenv(char* string)
}
}
size_t env_count = 0;
int env_count = 0;
while (environ[env_count])
env_count++;
char** new_envp = static_cast<char**>(malloc(sizeof(char*) * (env_count + 2)));
char** new_envp = (char**)malloc(sizeof(char*) * (env_count + 2));
if (new_envp == nullptr)
return -1;
for (size_t i = 0; i < env_count; i++)
for (int i = 0; i < env_count; i++)
new_envp[i] = environ[i];
new_envp[env_count] = string;
new_envp[env_count + 1] = nullptr;

View File

@ -17,11 +17,6 @@ int gettimeofday(struct timeval* __restrict tp, void* __restrict tzp)
return 0;
}
int getitimer(int which, struct itimerval* value)
{
return setitimer(which, nullptr, value);
}
int setitimer(int which, const struct itimerval* __restrict value, struct itimerval* __restrict ovalue)
{
return syscall(SYS_SETITIMER, which, value, ovalue);

View File

@ -1,12 +1,10 @@
#include <BAN/Assert.h>
#include <BAN/Debug.h>
#include <kernel/Memory/Types.h>
#include <kernel/Syscall.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <stdarg.h>
#include <stdio.h>
@ -18,48 +16,36 @@
#include <termios.h>
#include <unistd.h>
struct init_funcs_t
char** __environ;
extern char** environ __attribute__((weak, alias("__environ")));
extern void _init_malloc();
extern void _init_stdio();
extern "C" void _init_libc(char** _environ)
{
void (*func)();
void (**array_start)();
void (**array_end)();
};
static bool is_initialized = false;
if (is_initialized)
return;
is_initialized = true;
extern "C" char** environ;
_init_malloc();
_init_stdio();
extern "C" void _init_libc(char** environ, init_funcs_t init_funcs, init_funcs_t fini_funcs)
{
if (::environ == nullptr)
::environ = environ;
if (!_environ)
return;
if (syscall(SYS_GET_TLS) == 0)
size_t env_count = 0;
while (_environ[env_count])
env_count++;
environ = (char**)malloc(sizeof(char*) * env_count + 1);
for (size_t i = 0; i < env_count; i++)
{
alignas(uthread) static uint8_t storage[sizeof(uthread) + sizeof(uintptr_t)];
uthread& uthread = *reinterpret_cast<struct uthread*>(storage);
uthread = {
.self = &uthread,
.master_tls_addr = nullptr,
.master_tls_size = 0,
};
uthread.dtv[0] = 0;
syscall(SYS_SET_TLS, &uthread);
size_t bytes = strlen(_environ[i]) + 1;
environ[i] = (char*)malloc(bytes);
memcpy(environ[i], _environ[i], bytes);
}
// call global constructors
if (init_funcs.func)
init_funcs.func();
const size_t init_array_count = init_funcs.array_end - init_funcs.array_start;
for (size_t i = 0; i < init_array_count; i++)
init_funcs.array_start[i]();
// register global destructors
const size_t fini_array_count = fini_funcs.array_end - fini_funcs.array_start;
for (size_t i = 0; i < fini_array_count; i++)
atexit(fini_funcs.array_start[i]);
if (fini_funcs.func)
atexit(fini_funcs.func);
environ[env_count] = nullptr;
}
void _exit(int status)

View File

@ -1,28 +0,0 @@
#pragma once
#include <stdint.h>
namespace LibELF
{
struct AuxiliaryVector
{
uint32_t a_type;
union
{
uint32_t a_val;
void* a_ptr;
} a_un;
};
enum AuxiliaryVectorValues
{
AT_NULL = 0,
AT_IGNORE = 1,
AT_EXECFD = 2,
AT_PHDR = 3,
AT_PHENT = 4,
AT_PHNUM = 5,
};
}

View File

@ -109,7 +109,6 @@ namespace LibELF
STT_FUNC = 2,
STT_SECTION = 3,
STT_FILE = 4,
STT_TLS = 6,
STT_LOOS = 10,
STT_HIOS = 12,
STT_LOPROC = 13,
@ -125,7 +124,6 @@ namespace LibELF
PT_NOTE = 4,
PT_SHLIB = 5,
PT_PHDR = 6,
PT_TLS = 7,
PT_LOOS = 0x60000000,
PT_GNU_EH_FRAME = 0x6474E550,
PT_GNU_STACK = 0x6474E551,
@ -196,26 +194,6 @@ namespace LibELF
R_386_RELATIVE = 8,
R_386_GOTOFF = 9,
R_386_GOTPC = 10,
R_386_TLS_TPOFF = 14,
R_386_TLS_IE = 15,
R_386_TLS_GOTIE = 16,
R_386_TLS_LE = 17,
R_386_TLS_GD = 18,
R_386_TLS_LDM = 19,
R_386_TLS_GD_32 = 24,
R_386_TLS_GD_PUSH = 25,
R_386_TLS_GD_CALL = 26,
R_386_TLS_GD_POP = 27,
R_386_TLS_LDM_32 = 28,
R_386_TLS_LDM_PUSH = 29,
R_386_TLS_LDM_CALL = 30,
R_386_TLS_LDM_POP = 31,
R_386_TLS_LDO_32 = 32,
R_386_TLS_IE_32 = 33,
R_386_TLS_LE_32 = 34,
R_386_TLS_DTPMOD32 = 35,
R_386_TLS_DTPOFF32 = 36,
R_386_TLS_TPOFF32 = 37,
};
#define ELF64_R_SYM(i) ((i) >> 32)

View File

@ -1,65 +1,32 @@
#include "utils.h"
#include <LibELF/AuxiliaryVector.h>
#include <LibELF/Types.h>
#include <LibELF/Values.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
#if defined(__x86_64__)
#define ELF_R_SYM ELF64_R_SYM
#elif defined(__i686__)
#define ELF_R_SYM ELF32_R_SYM
#endif
extern "C"
__attribute__((naked))
void _start()
{
#if defined(__x86_64__)
asm volatile(
"movq (%rsp), %rdi;"
"leaq 8(%rsp), %rsi;"
"leaq 8(%rsi, %rdi, 8), %rdx;"
"movq %rsp, %rbp;"
"andq $-16, %rsp;"
"call _entry;"
"movq %rbp, %rsp;"
"xorq %rbp, %rbp;"
"jmp *%rax;"
"call _entry;"
"ud2;"
);
#elif defined(__i686__)
asm volatile(
"movl (%esp), %edi;"
"leal 4(%esp), %esi;"
"leal 4(%esi, %edi, 4), %edx;"
"movl %esp, %ebp;"
"andl $-16, %esp;"
"subl $4, %esp;"
"xorl %ebp, %ebp;"
"pushl %ecx;"
"pushl %edx;"
"pushl %esi;"
"pushl %edi;"
"call _entry;"
"movl %ebp, %esp;"
"xorl %ebp, %ebp;"
"jmp *%eax;"
"ud2;"
);
#else
@ -67,6 +34,29 @@ void _start()
#endif
}
__attribute__((naked, noreturn))
static void call_entry_point(int, char**, char**, uintptr_t)
{
#if defined(__x86_64__)
asm volatile(
"andq $-16, %rsp;"
"jmp *%rcx;"
);
#elif defined(__i686__)
asm volatile(
"addl $4, %esp;"
"popl %edi;"
"popl %esi;"
"popl %edx;"
"popl %ecx;"
"andl $-16, %esp;"
"jmp *%ecx;"
);
#else
#error "unsupported architecture"
#endif
}
using namespace LibELF;
static void validate_program_header(const ElfNativeFileHeader& file_header)
@ -146,15 +136,8 @@ static void resolve_symbol_trampoline()
struct LoadedElf
{
ElfNativeFileHeader file_header;
ElfNativeProgramHeader tls_header;
ElfNativeDynamic* dynamics;
uint8_t* tls_addr;
size_t tls_module;
size_t tls_offset;
int fd;
uintptr_t base;
uintptr_t hash;
@ -180,8 +163,8 @@ struct LoadedElf
uintptr_t init_array;
size_t init_arraysz;
bool is_calling_init;
bool is_relocating;
bool has_called_init;
bool is_relocated;
char path[PATH_MAX];
};
@ -223,72 +206,21 @@ static ElfNativeSymbol* find_symbol(const LoadedElf& elf, const char* name)
return nullptr;
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static bool is_tls_relocation(const RelocT& reloc)
{
#if defined(__x86_64__)
switch (ELF64_R_TYPE(reloc.r_info))
{
case R_X86_64_DTPMOD64:
case R_X86_64_DTPOFF64:
case R_X86_64_TPOFF64:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
case R_X86_64_DTPOFF32:
case R_X86_64_GOTTPOFF:
case R_X86_64_TPOFF32:
return true;
}
#elif defined(__i686__)
switch (ELF32_R_TYPE(reloc.r_info))
{
case R_386_TLS_TPOFF:
case R_386_TLS_IE:
case R_386_TLS_GOTIE:
case R_386_TLS_LE:
case R_386_TLS_GD:
case R_386_TLS_LDM:
case R_386_TLS_GD_32:
case R_386_TLS_GD_PUSH:
case R_386_TLS_GD_CALL:
case R_386_TLS_GD_POP:
case R_386_TLS_LDM_32:
case R_386_TLS_LDM_PUSH:
case R_386_TLS_LDM_CALL:
case R_386_TLS_LDM_POP:
case R_386_TLS_LDO_32:
case R_386_TLS_IE_32:
case R_386_TLS_LE_32:
case R_386_TLS_DTPMOD32:
case R_386_TLS_DTPOFF32:
case R_386_TLS_TPOFF32:
return true;
}
#else
#error "unsupported architecture"
#endif
return false;
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static bool is_copy_relocation(const RelocT& reloc)
{
#if defined(__x86_64__)
return ELF64_R_TYPE(reloc.r_info) == R_X86_64_COPY;
#elif defined(__i686__)
return ELF32_R_TYPE(reloc.r_info) == R_386_COPY;
#else
#error "unsupported architecture"
#endif
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static void handle_copy_relocation(const LoadedElf& elf, const RelocT& reloc)
{
if (!is_copy_relocation(reloc))
#if defined(__x86_64__)
if (ELF64_R_TYPE(reloc.r_info) != R_X86_64_COPY)
return;
const uint32_t symbol_index = ELF64_R_SYM(reloc.r_info);
#elif defined(__i686__)
if (ELF32_R_TYPE(reloc.r_info) != R_386_COPY)
return;
const uint32_t symbol_index = ELF32_R_SYM(reloc.r_info);
#else
#error "unsupported architecture"
#endif
const uint32_t symbol_index = ELF_R_SYM(reloc.r_info);
if (symbol_index == 0)
print_error_and_exit("copy relocation without a symbol", 0);
@ -327,60 +259,25 @@ static void handle_copy_relocation(const LoadedElf& elf, const RelocT& reloc)
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static void handle_tls_relocation(const LoadedElf& elf, const RelocT& reloc)
static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bool resolve_symbols)
{
if (!is_tls_relocation(reloc))
return;
if (ELF_R_SYM(reloc.r_info))
print_error_and_exit("tls relocation with symbol index", 0);
if (elf.tls_addr == nullptr)
print_error_and_exit("tls relocation without tls", 0);
uintptr_t symbol_address = 0;
#if defined(__x86_64__)
switch (ELF64_R_TYPE(reloc.r_info))
{
case R_X86_64_DTPMOD64:
*reinterpret_cast<uint64_t*>(elf.base + reloc.r_offset) = elf.tls_module;
break;
default:
print(STDERR_FILENO, "unsupported tls reloc type ");
print_uint(STDERR_FILENO, ELF64_R_TYPE(reloc.r_info));
print(STDERR_FILENO, " in ");
print(STDERR_FILENO, elf.path);
print_error_and_exit("", 0);
}
if (ELF64_R_TYPE(reloc.r_info) == R_X86_64_COPY)
return 0;
const uint32_t symbol_index = ELF64_R_SYM(reloc.r_info);
#elif defined(__i686__)
switch (ELF32_R_TYPE(reloc.r_info))
{
case R_386_TLS_DTPMOD32:
*reinterpret_cast<uint32_t*>(elf.base + reloc.r_offset) = elf.tls_module;
break;
default:
print(STDERR_FILENO, "unsupported tls reloc type ");
print_uint(STDERR_FILENO, ELF64_R_TYPE(reloc.r_info));
print(STDERR_FILENO, " in ");
print(STDERR_FILENO, elf.path);
print_error_and_exit("", 0);
}
if (ELF32_R_TYPE(reloc.r_info) == R_386_COPY)
return 0;
const uint32_t symbol_index = ELF32_R_SYM(reloc.r_info);
#else
#error "unsupported architecture"
#endif
}
template<typename RelocT> requires BAN::is_same_v<RelocT, ElfNativeRelocation> || BAN::is_same_v<RelocT, ElfNativeRelocationA>
static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bool resolve_symbols)
{
if (is_copy_relocation(reloc) || is_tls_relocation(reloc))
return 0;
const uint32_t symbol_index = ELF_R_SYM(reloc.r_info);
if (resolve_symbols == !symbol_index)
return 0;
uintptr_t symbol_address = 0;
if (symbol_index)
{
const auto& symbol = *reinterpret_cast<ElfNativeSymbol*>(elf.symtab + symbol_index * elf.syment);
@ -414,9 +311,6 @@ static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bo
symbol_address = 0;
}
}
if (ELF_ST_TYPE(symbol.st_info) == STT_TLS)
print_error_and_exit("relocating TLS symbol", 0);
}
size_t size = 0;
@ -523,9 +417,10 @@ static uintptr_t handle_relocation(const LoadedElf& elf, const RelocT& reloc, bo
static void relocate_elf(LoadedElf& elf, bool lazy_load)
{
if (elf.is_relocating)
// FIXME: handle circular dependencies
if (elf.is_relocated)
return;
elf.is_relocating = true;
// do copy relocations
if (elf.rel && elf.relent)
@ -554,14 +449,6 @@ static void relocate_elf(LoadedElf& elf, bool lazy_load)
for (size_t i = 0; i < elf.relasz / elf.relaent; i++)
handle_relocation(elf, *reinterpret_cast<ElfNativeRelocationA*>(elf.rela + i * elf.relaent), true);
// do tls relocations
if (elf.rel && elf.relent)
for (size_t i = 0; i < elf.relsz / elf.relent; i++)
handle_tls_relocation(elf, *reinterpret_cast<ElfNativeRelocation*>(elf.rel + i * elf.relent));
if (elf.rela && elf.relaent)
for (size_t i = 0; i < elf.relasz / elf.relaent; i++)
handle_tls_relocation(elf, *reinterpret_cast<ElfNativeRelocationA*>(elf.rela + i * elf.relaent));
// do jumprel relocations
if (elf.jmprel && elf.pltrelsz)
{
@ -611,6 +498,8 @@ static void relocate_elf(LoadedElf& elf, bool lazy_load)
}
}
}
elf.is_relocated = true;
}
extern "C"
@ -638,19 +527,17 @@ static void handle_dynamic(LoadedElf& elf)
switch (dynamic.d_tag)
{
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_JMPREL:
case DT_INIT_ARRAY:
case DT_FINI_ARRAY:
dynamic.d_un.d_ptr += elf.base;
break;
case DT_PLTGOT: dynamic.d_un.d_ptr += elf.base; break;
case DT_HASH: dynamic.d_un.d_ptr += elf.base; break;
case DT_STRTAB: dynamic.d_un.d_ptr += elf.base; break;
case DT_SYMTAB: dynamic.d_un.d_ptr += elf.base; break;
case DT_RELA: dynamic.d_un.d_ptr += elf.base; break;
case DT_INIT: dynamic.d_un.d_ptr += elf.base; break;
case DT_FINI: dynamic.d_un.d_ptr += elf.base; break;
case DT_REL: dynamic.d_un.d_ptr += elf.base; break;
case DT_JMPREL: dynamic.d_un.d_ptr += elf.base; break;
case DT_INIT_ARRAY: dynamic.d_un.d_ptr += elf.base; break;
case DT_FINI_ARRAY: dynamic.d_un.d_ptr += elf.base; break;
}
switch (dynamic.d_tag)
@ -706,6 +593,8 @@ static void handle_dynamic(LoadedElf& elf)
const auto& loaded_elf = load_elf(realpath, library_fd);
dynamic.d_un.d_ptr = reinterpret_cast<uintptr_t>(&loaded_elf);
syscall(SYS_CLOSE, library_fd);
}
// do relocations without symbols
@ -909,9 +798,6 @@ static LoadedElf& load_elf(const char* path, int fd)
break;
}
ElfNativeProgramHeader tls_header {};
tls_header.p_type = PT_NULL;
for (size_t i = 0; i < file_header.e_phnum; i++)
{
ElfNativeProgramHeader program_header;
@ -931,9 +817,6 @@ static LoadedElf& load_elf(const char* path, int fd)
case PT_GNU_RELRO:
print(STDDBG_FILENO, "TODO: PT_GNU_*\n");
break;
case PT_TLS:
tls_header = program_header;
break;
case PT_LOAD:
program_header.p_vaddr += base;
load_program_header(program_header, fd, needs_writable);
@ -946,9 +829,7 @@ static LoadedElf& load_elf(const char* path, int fd)
}
auto& elf = s_loaded_files[s_loaded_file_count++];
elf.tls_header = tls_header;
elf.base = base;
elf.fd = fd;
elf.dynamics = nullptr;
memcpy(&elf.file_header, &file_header, sizeof(file_header));
strcpy(elf.path, path);
@ -976,165 +857,20 @@ static LoadedElf& load_elf(const char* path, int fd)
return elf;
}
struct MasterTLS
static void call_init_libc(LoadedElf& elf, char** envp)
{
uint8_t* addr;
size_t size;
size_t module_count;
};
static MasterTLS initialize_master_tls()
{
constexpr auto round =
[](size_t a, size_t b) -> size_t
{
return b * ((a + b - 1) / b);
};
size_t max_align = alignof(uthread);
size_t tls_m_offset = 0;
size_t tls_m_size = 0;
size_t module_count = 0;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto& tls_header = s_loaded_files[i].tls_header;
if (tls_header.p_type != PT_TLS)
continue;
if (tls_header.p_align == 0)
print_error_and_exit("TLS alignment is 0", 0);
max_align = max<size_t>(max_align, tls_header.p_align);
tls_m_offset = round(tls_m_offset + tls_header.p_memsz, tls_header.p_align);
tls_m_size = tls_header.p_memsz;
module_count++;
}
if (module_count == 0)
return { .addr = nullptr, .size = 0, .module_count = 0 };
size_t master_tls_size = tls_m_offset + tls_m_size;
if (auto rem = master_tls_size % max_align)
master_tls_size += max_align - rem;
uint8_t* master_tls_addr;
{
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = master_tls_size,
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate master TLS", ret);
master_tls_addr = reinterpret_cast<uint8_t*>(ret);
}
for (size_t i = 0, tls_offset = 0, tls_module = 1; i < s_loaded_file_count; i++)
{
const auto& tls_header = s_loaded_files[i].tls_header;
if (tls_header.p_type != PT_TLS)
continue;
tls_offset = round(tls_offset + tls_header.p_memsz, tls_header.p_align);
uint8_t* tls_buffer = master_tls_addr + master_tls_size - tls_offset;
if (tls_header.p_filesz > 0)
{
const int fd = s_loaded_files[i].fd;
if (auto ret = syscall(SYS_PREAD, fd, tls_buffer, tls_header.p_filesz, tls_header.p_offset); ret != static_cast<long>(tls_header.p_filesz))
print_error_and_exit("failed to read TLS data", ret);
}
memset(tls_buffer + tls_header.p_filesz, 0, tls_header.p_memsz - tls_header.p_filesz);
auto& elf = s_loaded_files[i];
elf.tls_addr = tls_buffer;
elf.tls_module = tls_module++;
elf.tls_offset = master_tls_size - tls_offset;
}
return { .addr = master_tls_addr, .size = master_tls_size, .module_count = module_count };
const auto* _init_libc = find_symbol(elf, "_init_libc");
if (_init_libc == nullptr)
return;
using _init_libc_t = void(*)(char**);
reinterpret_cast<_init_libc_t>(elf.base + _init_libc->st_value)(envp);
}
static void initialize_tls(MasterTLS master_tls)
static void call_init_funcs(LoadedElf& elf, char** envp, bool skip)
{
if (master_tls.addr == nullptr)
if (elf.has_called_init)
return;
const size_t tls_size = master_tls.size
+ sizeof(uthread)
+ (master_tls.module_count + 1) * sizeof(uintptr_t);
uint8_t* tls_addr;
{
const sys_mmap_t mmap_args {
.addr = nullptr,
.len = tls_size,
.prot = PROT_READ | PROT_WRITE,
.flags = MAP_ANONYMOUS | MAP_PRIVATE,
.fildes = -1,
.off = 0,
};
const auto ret = syscall(SYS_MMAP, &mmap_args);
if (ret < 0)
print_error_and_exit("failed to allocate master TLS", ret);
tls_addr = reinterpret_cast<uint8_t*>(ret);
}
memcpy(tls_addr, master_tls.addr, master_tls.size);
uthread* uthread = reinterpret_cast<struct uthread*>(tls_addr + master_tls.size);
uthread->self = uthread;
uthread->master_tls_addr = master_tls.addr;
uthread->master_tls_size = master_tls.size;
uthread->dtv[0] = master_tls.module_count;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto& elf = s_loaded_files[i];
if (elf.tls_addr == nullptr)
continue;
uthread->dtv[elf.tls_module] = reinterpret_cast<uintptr_t>(tls_addr) + elf.tls_offset;
}
syscall(SYS_SET_TLS, uthread);
}
static void initialize_environ(char** envp)
{
uintptr_t environ = SYM_NOT_FOUND;
for (size_t i = 0; i < s_loaded_file_count; i++)
{
const auto* match = find_symbol(s_loaded_files[i], "environ");
if (match == nullptr)
continue;
if (environ == SYM_NOT_FOUND || ELF_ST_BIND(match->st_info) != STB_WEAK)
environ = s_loaded_files[i].base + match->st_value;
if (ELF_ST_BIND(match->st_info) != STB_WEAK)
break;
}
if (environ == SYM_NOT_FOUND)
return;
*reinterpret_cast<char***>(environ) = envp;
}
static void call_init_funcs(LoadedElf& elf, bool is_main_elf)
{
if (elf.is_calling_init)
return;
elf.is_calling_init = true;
if (elf.dynamics)
{
for (size_t i = 0;; i++)
@ -1143,12 +879,11 @@ static void call_init_funcs(LoadedElf& elf, bool is_main_elf)
if (dynamic.d_tag == DT_NULL)
break;
if (dynamic.d_tag == DT_NEEDED)
call_init_funcs(*reinterpret_cast<LoadedElf*>(dynamic.d_un.d_ptr), false);
call_init_funcs(*reinterpret_cast<LoadedElf*>(dynamic.d_un.d_ptr), envp, false);
}
}
// main executable calls its init functions in _start
if (is_main_elf)
if (elf.has_called_init || skip)
return;
using init_t = void(*)();
@ -1156,33 +891,19 @@ static void call_init_funcs(LoadedElf& elf, bool is_main_elf)
reinterpret_cast<init_t>(elf.init)();
for (size_t i = 0; i < elf.init_arraysz / sizeof(init_t); i++)
reinterpret_cast<init_t*>(elf.init_array)[i]();
}
static LibELF::AuxiliaryVector* find_auxv(char** envp)
{
if (envp == nullptr)
return nullptr;
if (strcmp(elf.path, "/usr/lib/libc.so") == 0)
call_init_libc(elf, envp);
char** null_env = envp;
while (*null_env)
null_env++;
return reinterpret_cast<LibELF::AuxiliaryVector*>(null_env + 1);
elf.has_called_init = true;
}
extern "C"
__attribute__((used))
uintptr_t _entry(int argc, char* argv[], char* envp[])
__attribute__((used, noreturn))
int _entry(int argc, char** argv, char** envp, int fd)
{
int execfd = -1;
if (auto* auxv = find_auxv(envp))
for (auto* aux = auxv; aux->a_type != LibELF::AT_NULL; aux++)
if (aux->a_type == LibELF::AT_EXECFD) {
execfd = aux->a_un.a_val;
aux->a_type = LibELF::AT_IGNORE;
}
if (execfd == -1)
const bool invoked_directly = (fd < 0);
if (invoked_directly)
{
if (argc < 2)
print_error_and_exit("missing program name", 0);
@ -1190,23 +911,17 @@ uintptr_t _entry(int argc, char* argv[], char* envp[])
argc--;
argv++;
execfd = syscall(SYS_OPENAT, AT_FDCWD, argv[0], O_RDONLY);
if (execfd < 0)
print_error_and_exit("could not open program", execfd);
fd = syscall(SYS_OPENAT, AT_FDCWD, argv[0], O_RDONLY);
if (fd < 0)
print_error_and_exit("could not open program", fd);
}
init_random();
auto& elf = load_elf(argv[0], execfd);
auto& elf = load_elf(argv[0], fd);
syscall(SYS_CLOSE, fd);
fini_random();
const auto master_tls = initialize_master_tls();
relocate_elf(elf, true);
initialize_tls(master_tls);
initialize_environ(envp);
call_init_funcs(elf, true);
for (size_t i = 0; i < s_loaded_file_count; i++)
syscall(SYS_CLOSE, s_loaded_files[i].fd);
return elf.base + elf.file_header.e_entry;
call_init_funcs(elf, envp, true);
call_entry_point(argc, argv, envp, elf.base + elf.file_header.e_entry);
}