Kernel: Rework kernel-side ELF loading

ELFs are now loaded as MemoryRegions so they don't need special handling
anywhere. This also allows file backed COW optimizations to work. This
was not the case before.

This patch removes now obsolete LoadableELF and unused ELF files from
LibElf.
This commit is contained in:
2024-09-15 23:20:32 +03:00
parent 54732edff4
commit a084f83f4c
9 changed files with 300 additions and 1044 deletions

View File

@@ -22,6 +22,7 @@ set(KERNEL_SOURCES
kernel/Device/NullDevice.cpp
kernel/Device/RandomDevice.cpp
kernel/Device/ZeroDevice.cpp
kernel/ELF.cpp
kernel/Errors.cpp
kernel/FS/DevFS/FileSystem.cpp
kernel/FS/Ext2/FileSystem.cpp
@@ -151,10 +152,6 @@ set(KLIBC_SOURCES
klibc/string.cpp
)
set(LIBELF_SOURCES
../userspace/libraries/LibELF/LibELF/LoadableELF.cpp
)
set(LIBFONT_SOURCES
../userspace/libraries/LibFont/Font.cpp
../userspace/libraries/LibFont/PSF.cpp
@@ -169,7 +166,6 @@ set(KERNEL_SOURCES
${KERNEL_SOURCES}
${BAN_SOURCES}
${KLIBC_SOURCES}
${LIBELF_SOURCES}
${LIBFONT_SOURCES}
${LIBINPUT_SOURCE}
)

View File

@@ -0,0 +1,18 @@
#pragma once
#include <kernel/FS/Inode.h>
#include <kernel/Memory/MemoryRegion.h>
namespace Kernel::ELF
{
struct LoadResult
{
bool has_interpreter;
vaddr_t entry_point;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> regions;
};
BAN::ErrorOr<LoadResult> load_from_inode(BAN::RefPtr<Inode>, const Credentials&, PageTable&);
}

View File

@@ -22,8 +22,6 @@
#include <sys/time.h>
#include <termios.h>
namespace LibELF { class LoadableELF; }
namespace Kernel
{
@@ -269,7 +267,6 @@ namespace Kernel
OpenFileDescriptorSet m_open_file_descriptors;
BAN::UniqPtr<LibELF::LoadableELF> m_loadable_elf;
BAN::Vector<BAN::UniqPtr<MemoryRegion>> m_mapped_regions;
pid_t m_sid;

253
kernel/kernel/ELF.cpp Normal file
View File

@@ -0,0 +1,253 @@
#include <kernel/ELF.h>
#include <kernel/FS/VirtualFileSystem.h>
#include <kernel/Memory/FileBackedRegion.h>
#include <kernel/Memory/MemoryBackedRegion.h>
#include <LibELF/Types.h>
#include <LibELF/Values.h>
#include <ctype.h>
#include <fcntl.h>
namespace Kernel::ELF
{
using namespace LibELF;
static BAN::ErrorOr<ElfNativeFileHeader> read_and_validate_file_header(BAN::RefPtr<Inode> inode)
{
if ((size_t)inode->size() < sizeof(ElfNativeFileHeader))
{
dprintln("File is too small to be ELF");
return BAN::Error::from_errno(ENOEXEC);
}
ElfNativeFileHeader file_header;
size_t nread = TRY(inode->read(0, BAN::ByteSpan::from(file_header)));
ASSERT(nread == sizeof(file_header));
if (file_header.e_ident[EI_MAG0] != ELFMAG0 ||
file_header.e_ident[EI_MAG1] != ELFMAG1 ||
file_header.e_ident[EI_MAG2] != ELFMAG2 ||
file_header.e_ident[EI_MAG3] != ELFMAG3)
{
dprintln("Not an ELF file");
return BAN::Error::from_errno(ENOEXEC);
}
if (file_header.e_ident[EI_DATA] != ELFDATA2LSB)
{
dprintln("Not in little-endian");
return BAN::Error::from_errno(ENOEXEC);
}
if (file_header.e_ident[EI_VERSION] != EV_CURRENT)
{
dprintln("Unsupported version {}", file_header.e_ident[EI_VERSION]);
return BAN::Error::from_errno(ENOEXEC);
}
#if ARCH(i686)
if (file_header.e_ident[EI_CLASS] != ELFCLASS32)
#elif ARCH(x86_64)
if (file_header.e_ident[EI_CLASS] != ELFCLASS64)
#endif
{
dprintln("Not in native format");
return BAN::Error::from_errno(EINVAL);
}
if (file_header.e_type != ET_EXEC && file_header.e_type != ET_DYN)
{
dprintln("Unsupported file header type {}", file_header.e_type);
return BAN::Error::from_errno(ENOTSUP);
}
if (file_header.e_version != EV_CURRENT)
{
dprintln("Unsupported version {}", file_header.e_version);
return BAN::Error::from_errno(EINVAL);
}
if (file_header.e_phentsize < sizeof(ElfNativeProgramHeader))
{
dprintln("Too small program header size ({} bytes)", file_header.e_phentsize);
return BAN::Error::from_errno(EINVAL);
}
return file_header;
}
static BAN::ErrorOr<BAN::Vector<ElfNativeProgramHeader>> read_program_headers(BAN::RefPtr<Inode> inode, const ElfNativeFileHeader& file_header)
{
BAN::Vector<uint8_t> program_header_buffer;
TRY(program_header_buffer.resize(file_header.e_phnum * file_header.e_phentsize));
TRY(inode->read(file_header.e_phoff, BAN::ByteSpan(program_header_buffer.span())));
BAN::Vector<ElfNativeProgramHeader> program_headers;
TRY(program_headers.reserve(file_header.e_phnum));
for (size_t i = 0; i < file_header.e_phnum; i++)
{
const auto& pheader = *reinterpret_cast<ElfNativeProgramHeader*>(program_header_buffer.data() + i * file_header.e_phentsize);
if (pheader.p_memsz < pheader.p_filesz)
{
dprintln("Invalid program header, memsz less than filesz");
return BAN::Error::from_errno(EINVAL);
}
MUST(program_headers.emplace_back(pheader));
}
return BAN::move(program_headers);
}
BAN::ErrorOr<LoadResult> load_from_inode(BAN::RefPtr<Inode> inode, const Credentials& credentials, PageTable& page_table)
{
auto file_header = TRY(read_and_validate_file_header(inode));
auto program_headers = TRY(read_program_headers(inode, file_header));
vaddr_t executable_end = 0;
BAN::String interpreter;
for (const auto& program_header : program_headers)
{
if (program_header.p_type == PT_LOAD)
executable_end = BAN::Math::max<vaddr_t>(executable_end, program_header.p_vaddr + program_header.p_memsz);
else if (program_header.p_type == PT_INTERP)
{
BAN::Vector<uint8_t> interp_buffer;
TRY(interp_buffer.resize(program_header.p_memsz, 0));
TRY(inode->read(program_header.p_offset, BAN::ByteSpan(interp_buffer.data(), program_header.p_filesz)));
if (interp_buffer.empty() || interp_buffer.front() != '/' || interp_buffer.back() != '\0')
{
dprintln("ELF interpreter is not an valid absolute path");
return BAN::Error::from_errno(EINVAL);
}
auto interpreter_sv = BAN::StringView(reinterpret_cast<const char*>(interp_buffer.data()), interp_buffer.size() - 1);
for (char ch : interpreter_sv)
{
if (isprint(ch))
continue;
dprintln("ELF interpreter name contains non-printable characters");
return BAN::Error::from_errno(EINVAL);
}
interpreter.clear();
TRY(interpreter.append(interpreter_sv));
}
}
if (file_header.e_type == ET_DYN)
executable_end = 0x400000;
if (!interpreter.empty())
{
auto interpreter_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(credentials, interpreter, O_EXEC)).inode;
auto interpreter_file_header = TRY(read_and_validate_file_header(interpreter_inode));
auto interpreter_program_headers = TRY(read_program_headers(interpreter_inode, interpreter_file_header));
for (const auto& program_header : interpreter_program_headers)
{
if (program_header.p_type == PT_INTERP)
{
dprintln("ELF interpreter has an interpreter specified");
return BAN::Error::from_errno(EINVAL);
}
}
inode = interpreter_inode;
file_header = interpreter_file_header;
program_headers = BAN::move(interpreter_program_headers);
}
const vaddr_t load_base_vaddr =
[&file_header, executable_end]() -> vaddr_t
{
if (file_header.e_type == ET_EXEC)
return 0;
if (file_header.e_type == ET_DYN)
return (executable_end + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
ASSERT_NOT_REACHED();
}();
BAN::Vector<BAN::UniqPtr<MemoryRegion>> memory_regions;
for (const auto& program_header : program_headers)
{
if (program_header.p_type != PT_LOAD)
continue;
const PageTable::flags_t flags =
[&program_header]() -> int
{
PageTable::flags_t result = PageTable::Flags::UserSupervisor;
if (program_header.p_flags & PF_R)
result |= PageTable::Flags::Present;
if (program_header.p_flags & PF_W)
result |= PageTable::Flags::ReadWrite;
if (program_header.p_flags & PF_X)
result |= PageTable::Flags::Execute;
return result;
}();
const size_t file_backed_size =
[&program_header]() -> size_t
{
if ((program_header.p_vaddr & 0xFFF) || (program_header.p_offset & 0xFFF))
return 0;
if (program_header.p_filesz == program_header.p_memsz)
return program_header.p_filesz;
return program_header.p_filesz & ~(uintptr_t)0xFFF;
}();
const vaddr_t pheader_base = load_base_vaddr + program_header.p_vaddr;
if (file_backed_size)
{
auto region = TRY(FileBackedRegion::create(
inode,
page_table,
program_header.p_offset,
file_backed_size,
{ .start = pheader_base, .end = pheader_base + file_backed_size },
MemoryRegion::Type::PRIVATE,
flags
));
TRY(memory_regions.emplace_back(BAN::move(region)));
}
if (file_backed_size < program_header.p_memsz)
{
const vaddr_t aligned_vaddr = pheader_base & PAGE_ADDR_MASK;
auto region = TRY(MemoryBackedRegion::create(
page_table,
(pheader_base + program_header.p_memsz) - (aligned_vaddr + file_backed_size),
{ .start = aligned_vaddr + file_backed_size, .end = pheader_base + program_header.p_memsz },
MemoryRegion::Type::PRIVATE,
flags
));
if (file_backed_size < program_header.p_filesz)
{
BAN::Vector<uint8_t> file_data_buffer;
TRY(file_data_buffer.resize(program_header.p_filesz - file_backed_size));
if (TRY(inode->read(program_header.p_offset + file_backed_size, file_data_buffer.span())) != file_data_buffer.size())
return BAN::Error::from_errno(EFAULT);
TRY(region->copy_data_to_region(pheader_base - aligned_vaddr, file_data_buffer.data(), file_data_buffer.size()));
}
TRY(memory_regions.emplace_back(BAN::move(region)));
}
}
LoadResult result;
result.has_interpreter = !interpreter.empty();
result.entry_point = load_base_vaddr + file_header.e_entry;
result.regions = BAN::move(memory_regions);
return BAN::move(result);
}
}

View File

@@ -1,6 +1,7 @@
#include <BAN/ScopeGuard.h>
#include <BAN/StringView.h>
#include <kernel/ACPI/ACPI.h>
#include <kernel/ELF.h>
#include <kernel/FS/DevFS/FileSystem.h>
#include <kernel/FS/ProcFS/FileSystem.h>
#include <kernel/FS/VirtualFileSystem.h>
@@ -16,7 +17,6 @@
#include <kernel/Terminal/PseudoTerminal.h>
#include <kernel/Timer/Timer.h>
#include <LibELF/LoadableELF.h>
#include <LibInput/KeyboardLayout.h>
#include <fcntl.h>
@@ -121,13 +121,8 @@ namespace Kernel
auto absolute_path = TRY(process->absolute_path_of(path));
auto executable_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(process->m_credentials, absolute_path, O_EXEC)).inode;
process->m_loadable_elf = TRY(LibELF::LoadableELF::load_from_inode(process->page_table(), process->m_credentials, executable_inode));
if (!process->m_loadable_elf->is_address_space_free())
{
dprintln("Could not load ELF address space");
return BAN::Error::from_errno(ENOEXEC);
}
process->m_loadable_elf->reserve_address_space();
auto executable = TRY(ELF::load_from_inode(executable_inode, process->m_credentials, process->page_table()));
process->m_mapped_regions = BAN::move(executable.regions);
char** argv = nullptr;
{
@@ -154,8 +149,21 @@ namespace Kernel
MUST(process->m_mapped_regions.push_back(BAN::move(argv_region)));
}
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
process->m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
process->m_credentials.set_egid(executable_inode->gid());
if (executable.has_interpreter)
{
VirtualFileSystem::File file;
TRY(file.canonical_path.append("<self>"));
file.inode = executable_inode;
process->m_userspace_info.file_fd = TRY(process->m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
}
process->m_is_userspace = true;
process->m_userspace_info.entry = process->m_loadable_elf->entry_point();
process->m_userspace_info.entry = executable.entry_point;
process->m_userspace_info.argc = 1;
process->m_userspace_info.argv = argv;
process->m_userspace_info.envp = nullptr;
@@ -185,7 +193,6 @@ namespace Kernel
{
ASSERT(m_threads.empty());
ASSERT(m_mapped_regions.empty());
ASSERT(!m_loadable_elf);
ASSERT(&PageTable::current() != m_page_table.ptr());
}
@@ -216,7 +223,6 @@ namespace Kernel
// NOTE: We must unmap ranges while the page table is still alive
m_mapped_regions.clear();
m_loadable_elf.clear();
}
bool Process::on_thread_exit(Thread& thread)
@@ -302,11 +308,6 @@ namespace Kernel
meminfo.virt_pages += region->virtual_page_count();
meminfo.phys_pages += region->physical_page_count();
}
if (m_loadable_elf)
{
meminfo.virt_pages += m_loadable_elf->virtual_page_count();
meminfo.phys_pages += m_loadable_elf->physical_page_count();
}
}
size_t bytes = BAN::Math::min<size_t>(sizeof(proc_meminfo_t) - offset, buffer.size());
@@ -424,15 +425,12 @@ namespace Kernel
for (auto& mapped_region : m_mapped_regions)
MUST(mapped_regions.push_back(TRY(mapped_region->clone(*page_table))));
auto loadable_elf = TRY(m_loadable_elf->clone(*page_table));
Process* forked = create_process(m_credentials, m_pid, m_sid, m_pgrp);
forked->m_controlling_terminal = m_controlling_terminal;
forked->m_working_directory = BAN::move(working_directory);
forked->m_page_table = BAN::move(page_table);
forked->m_open_file_descriptors = BAN::move(open_file_descriptors);
forked->m_mapped_regions = BAN::move(mapped_regions);
forked->m_loadable_elf = BAN::move(loadable_elf);
forked->m_is_userspace = m_is_userspace;
forked->m_userspace_info = m_userspace_info;
forked->m_has_called_exec = false;
@@ -461,7 +459,6 @@ namespace Kernel
auto absolute_path = TRY(absolute_path_of(path));
auto executable_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(m_credentials, absolute_path, O_EXEC)).inode;
auto loadable_elf = TRY(LibELF::LoadableELF::load_from_inode(page_table(), m_credentials, executable_inode));
BAN::Vector<BAN::String> str_argv;
for (int i = 0; argv && argv[i]; i++)
@@ -479,29 +476,24 @@ namespace Kernel
TRY(str_envp.emplace_back(envp[i]));
}
BAN::String executable_path;
TRY(executable_path.append(path));
m_open_file_descriptors.close_cloexec();
m_mapped_regions.clear();
m_loadable_elf = BAN::move(loadable_elf);
if (!m_loadable_elf->is_address_space_free())
{
dprintln("ELF has unloadable address space");
MUST(sys_kill(pid(), SIGKILL));
// NOTE: signal will only execute after return from syscall
return BAN::Error::from_errno(EINTR);
}
m_loadable_elf->reserve_address_space();
m_loadable_elf->update_suid_sgid(m_credentials);
m_userspace_info.entry = m_loadable_elf->entry_point();
if (m_loadable_elf->has_interpreter())
auto executable = TRY(ELF::load_from_inode(executable_inode, m_credentials, page_table()));
m_mapped_regions = BAN::move(executable.regions);
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
m_credentials.set_egid(executable_inode->gid());
m_userspace_info.entry = executable.entry_point;
if (executable.has_interpreter)
{
VirtualFileSystem::File file;
TRY(file.canonical_path.append("<self>"));
file.inode = m_loadable_elf->executable();
file.inode = executable_inode;
m_userspace_info.file_fd = TRY(m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
}
@@ -845,12 +837,6 @@ namespace Kernel
return true;
}
if (m_loadable_elf && m_loadable_elf->contains(address))
{
TRY(m_loadable_elf->load_page_to_memory(address));
return true;
}
return false;
}
@@ -2387,10 +2373,6 @@ namespace Kernel
return {};
}
// FIXME: elf should use MemoryRegions instead of mapping executables itself
if (m_loadable_elf->contains(vaddr))
return {};
unauthorized_access:
dwarnln("process {}, thread {} attempted to make an invalid pointer access to 0x{H}->0x{H}", pid(), Thread::current().tid(), vaddr, vaddr + size);
Debug::dump_stack_trace();