Kernel: Use the new on demand ELF structure

All executable files are now read from disk and paged on demand.
This was a big rewrite of the old ELF library but in the end
everything seems much cleaner, since all the old functionality was
not actually needed for execution.

I have to do some measurements, but I feel like memory usage dropped
quite a bit after this change.
This commit is contained in:
Bananymous 2023-09-29 02:00:10 +03:00
parent be13120554
commit c11e84b248
3 changed files with 61 additions and 152 deletions

View File

@ -118,7 +118,7 @@ set(LIBC_SOURCES
) )
set(LIBELF_SOURCES set(LIBELF_SOURCES
../LibELF/LibELF/ELF.cpp ../LibELF/LibELF/LoadableELF.cpp
) )
set(KERNEL_SOURCES set(KERNEL_SOURCES

View File

@ -16,7 +16,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include <termios.h> #include <termios.h>
namespace LibELF { class ELF; } namespace LibELF { class LoadableELF; }
namespace Kernel namespace Kernel
{ {
@ -150,11 +150,8 @@ namespace Kernel
Process(const Credentials&, pid_t pid, pid_t parent, pid_t sid, pid_t pgrp); Process(const Credentials&, pid_t pid, pid_t parent, pid_t sid, pid_t pgrp);
static Process* create_process(const Credentials&, pid_t parent, pid_t sid = 0, pid_t pgrp = 0); static Process* create_process(const Credentials&, pid_t parent, pid_t sid = 0, pid_t pgrp = 0);
// Load an elf file to virtual address space of the current page table // Load elf from a file
static BAN::ErrorOr<BAN::UniqPtr<LibELF::ELF>> load_elf_for_exec(const Credentials&, BAN::StringView file_path, const BAN::String& cwd); static BAN::ErrorOr<BAN::UniqPtr<LibELF::LoadableELF>> load_elf_for_exec(const Credentials&, BAN::StringView file_path, const BAN::String& cwd, Kernel::PageTable&);
// Copy an elf file from the current page table to the processes own
void load_elf_to_memory(LibELF::ELF&);
int block_until_exit(); int block_until_exit();
@ -172,17 +169,12 @@ namespace Kernel
int waiting { 0 }; int waiting { 0 };
}; };
struct MappedRange
{
bool can_be_unmapped;
BAN::UniqPtr<VirtualRange> range;
};
Credentials m_credentials; Credentials m_credentials;
OpenFileDescriptorSet m_open_file_descriptors; OpenFileDescriptorSet m_open_file_descriptors;
BAN::Vector<MappedRange> m_mapped_ranges; BAN::UniqPtr<LibELF::LoadableELF> m_loadable_elf;
BAN::Vector<BAN::UniqPtr<VirtualRange>> m_mapped_ranges;
pid_t m_sid; pid_t m_sid;
pid_t m_pgrp; pid_t m_pgrp;

View File

@ -12,8 +12,8 @@
#include <kernel/Scheduler.h> #include <kernel/Scheduler.h>
#include <kernel/Storage/StorageDevice.h> #include <kernel/Storage/StorageDevice.h>
#include <kernel/Timer/Timer.h> #include <kernel/Timer/Timer.h>
#include <LibELF/ELF.h>
#include <LibELF/Values.h> #include <LibELF/LoadableELF.h>
#include <lai/helpers/pm.h> #include <lai/helpers/pm.h>
@ -110,19 +110,15 @@ namespace Kernel
BAN::ErrorOr<Process*> Process::create_userspace(const Credentials& credentials, BAN::StringView path) BAN::ErrorOr<Process*> Process::create_userspace(const Credentials& credentials, BAN::StringView path)
{ {
auto elf = TRY(load_elf_for_exec(credentials, path, "/"sv));
auto* process = create_process(credentials, 0); auto* process = create_process(credentials, 0);
MUST(process->m_working_directory.push_back('/')); MUST(process->m_working_directory.push_back('/'));
process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace()));; process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace()));
process->load_elf_to_memory(*elf); process->m_loadable_elf = TRY(load_elf_for_exec(credentials, path, "/"sv, process->page_table()));
process->m_loadable_elf->reserve_address_space();
process->m_is_userspace = true; process->m_is_userspace = true;
process->m_userspace_info.entry = elf->file_header_native().e_entry; process->m_userspace_info.entry = process->m_loadable_elf->entry_point();
// NOTE: we clear the elf since we don't need the memory anymore
elf.clear();
char** argv = nullptr; char** argv = nullptr;
{ {
@ -148,7 +144,7 @@ namespace Kernel
argv_range->copy_from(sizeof(char*) * 2, (const uint8_t*)path.data(), path.size()); argv_range->copy_from(sizeof(char*) * 2, (const uint8_t*)path.data(), path.size());
MUST(process->m_mapped_ranges.emplace_back(false, BAN::move(argv_range))); MUST(process->m_mapped_ranges.push_back(BAN::move(argv_range)));
} }
process->m_userspace_info.argc = 1; process->m_userspace_info.argc = 1;
@ -290,7 +286,7 @@ namespace Kernel
return 0; return 0;
} }
BAN::ErrorOr<BAN::UniqPtr<LibELF::ELF>> Process::load_elf_for_exec(const Credentials& credentials, BAN::StringView file_path, const BAN::String& cwd) BAN::ErrorOr<BAN::UniqPtr<LibELF::LoadableELF>> Process::load_elf_for_exec(const Credentials& credentials, BAN::StringView file_path, const BAN::String& cwd, PageTable& page_table)
{ {
if (file_path.empty()) if (file_path.empty())
return BAN::Error::from_errno(ENOENT); return BAN::Error::from_errno(ENOENT);
@ -307,29 +303,7 @@ namespace Kernel
} }
auto file = TRY(VirtualFileSystem::get().file_from_absolute_path(credentials, absolute_path, O_EXEC)); auto file = TRY(VirtualFileSystem::get().file_from_absolute_path(credentials, absolute_path, O_EXEC));
return TRY(LibELF::LoadableELF::load_from_inode(page_table, file.inode));
auto elf_or_error = LibELF::ELF::load_from_file(file.inode);
if (elf_or_error.is_error())
{
if (elf_or_error.error().get_error_code() == EINVAL)
return BAN::Error::from_errno(ENOEXEC);
return elf_or_error.error();
}
auto elf = elf_or_error.release_value();
if (!elf->is_native())
{
derrorln("ELF has invalid architecture");
return BAN::Error::from_errno(EINVAL);
}
if (elf->file_header_native().e_type != LibELF::ET_EXEC)
{
derrorln("Not an executable");
return BAN::Error::from_errno(ENOEXEC);
}
return BAN::move(elf);
} }
BAN::ErrorOr<long> Process::sys_fork(uintptr_t rsp, uintptr_t rip) BAN::ErrorOr<long> Process::sys_fork(uintptr_t rsp, uintptr_t rip)
@ -344,10 +318,12 @@ namespace Kernel
OpenFileDescriptorSet open_file_descriptors(m_credentials); OpenFileDescriptorSet open_file_descriptors(m_credentials);
TRY(open_file_descriptors.clone_from(m_open_file_descriptors)); TRY(open_file_descriptors.clone_from(m_open_file_descriptors));
BAN::Vector<MappedRange> mapped_ranges; BAN::Vector<BAN::UniqPtr<VirtualRange>> mapped_ranges;
TRY(mapped_ranges.reserve(m_mapped_ranges.size())); TRY(mapped_ranges.reserve(m_mapped_ranges.size()));
for (auto& mapped_range : m_mapped_ranges) for (auto& mapped_range : m_mapped_ranges)
MUST(mapped_ranges.emplace_back(mapped_range.can_be_unmapped, TRY(mapped_range.range->clone(*page_table)))); MUST(mapped_ranges.push_back(TRY(mapped_range->clone(*page_table))));
auto loadable_elf = TRY(m_loadable_elf->clone(*page_table));
Process* forked = create_process(m_credentials, m_pid, m_sid, m_pgrp); Process* forked = create_process(m_credentials, m_pid, m_sid, m_pgrp);
forked->m_controlling_terminal = m_controlling_terminal; forked->m_controlling_terminal = m_controlling_terminal;
@ -355,6 +331,7 @@ namespace Kernel
forked->m_page_table = BAN::move(page_table); forked->m_page_table = BAN::move(page_table);
forked->m_open_file_descriptors = BAN::move(open_file_descriptors); forked->m_open_file_descriptors = BAN::move(open_file_descriptors);
forked->m_mapped_ranges = BAN::move(mapped_ranges); forked->m_mapped_ranges = BAN::move(mapped_ranges);
forked->m_loadable_elf = BAN::move(loadable_elf);
forked->m_is_userspace = m_is_userspace; forked->m_is_userspace = m_is_userspace;
forked->m_userspace_info = m_userspace_info; forked->m_userspace_info = m_userspace_info;
forked->m_has_called_exec = false; forked->m_has_called_exec = false;
@ -373,52 +350,39 @@ namespace Kernel
{ {
// NOTE: We scope everything for automatic deletion // NOTE: We scope everything for automatic deletion
{ {
LockGuard _(m_lock);
BAN::Vector<BAN::String> str_argv; BAN::Vector<BAN::String> str_argv;
for (int i = 0; argv && argv[i]; i++)
{
validate_pointer_access(argv + i, sizeof(char*));
validate_string_access(argv[i]);
TRY(str_argv.emplace_back(argv[i]));
}
BAN::Vector<BAN::String> str_envp; BAN::Vector<BAN::String> str_envp;
for (int i = 0; envp && envp[i]; i++)
{ {
LockGuard _(m_lock); validate_pointer_access(envp + 1, sizeof(char*));
validate_string_access(envp[i]);
for (int i = 0; argv && argv[i]; i++) TRY(str_envp.emplace_back(envp[i]));
{
validate_pointer_access(argv + i, sizeof(char*));
validate_string_access(argv[i]);
TRY(str_argv.emplace_back(argv[i]));
}
for (int i = 0; envp && envp[i]; i++)
{
validate_pointer_access(envp + 1, sizeof(char*));
validate_string_access(envp[i]);
TRY(str_envp.emplace_back(envp[i]));
}
} }
BAN::String working_directory; BAN::String executable_path;
TRY(executable_path.append(path));
{
LockGuard _(m_lock);
TRY(working_directory.append(m_working_directory));
}
auto elf = TRY(load_elf_for_exec(m_credentials, path, working_directory));
LockGuard lock_guard(m_lock);
m_open_file_descriptors.close_cloexec(); m_open_file_descriptors.close_cloexec();
m_mapped_ranges.clear(); m_mapped_ranges.clear();
m_loadable_elf.clear();
load_elf_to_memory(*elf); m_loadable_elf = TRY(load_elf_for_exec(m_credentials, executable_path, m_working_directory, page_table()));
m_loadable_elf->reserve_address_space();
m_userspace_info.entry = elf->file_header_native().e_entry; m_userspace_info.entry = m_loadable_elf->entry_point();
for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++) for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++)
m_signal_handlers[i] = (vaddr_t)SIG_DFL; m_signal_handlers[i] = (vaddr_t)SIG_DFL;
// NOTE: we clear the elf since we don't need the memory anymore
elf.clear();
ASSERT(m_threads.size() == 1); ASSERT(m_threads.size() == 1);
ASSERT(&Process::current() == this); ASSERT(&Process::current() == this);
@ -458,17 +422,19 @@ namespace Kernel
auto argv_range = create_range(str_argv); auto argv_range = create_range(str_argv);
m_userspace_info.argv = (char**)argv_range->vaddr(); m_userspace_info.argv = (char**)argv_range->vaddr();
MUST(m_mapped_ranges.emplace_back(false, BAN::move(argv_range))); MUST(m_mapped_ranges.push_back(BAN::move(argv_range)));
auto envp_range = create_range(str_envp); auto envp_range = create_range(str_envp);
m_userspace_info.envp = (char**)envp_range->vaddr(); m_userspace_info.envp = (char**)envp_range->vaddr();
MUST(m_mapped_ranges.emplace_back(false, BAN::move(envp_range))); MUST(m_mapped_ranges.push_back(BAN::move(envp_range)));
m_userspace_info.argc = str_argv.size(); m_userspace_info.argc = str_argv.size();
asm volatile("cli"); asm volatile("cli");
} }
m_has_called_exec = true;
m_threads.front()->setup_exec(); m_threads.front()->setup_exec();
Scheduler::get().execute_current_thread(); Scheduler::get().execute_current_thread();
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
@ -546,62 +512,6 @@ namespace Kernel
return 0; return 0;
} }
void Process::load_elf_to_memory(LibELF::ELF& elf)
{
ASSERT(elf.is_native());
auto& elf_file_header = elf.file_header_native();
for (size_t i = 0; i < elf_file_header.e_phnum; i++)
{
auto& elf_program_header = elf.program_header_native(i);
switch (elf_program_header.p_type)
{
case LibELF::PT_NULL:
break;
case LibELF::PT_LOAD:
{
PageTable::flags_t flags = PageTable::Flags::UserSupervisor | PageTable::Flags::Present;
if (elf_program_header.p_flags & LibELF::PF_W)
flags |= PageTable::Flags::ReadWrite;
if (elf_program_header.p_flags & LibELF::PF_X)
flags |= PageTable::Flags::Execute;
size_t page_start = elf_program_header.p_vaddr / PAGE_SIZE;
size_t page_end = BAN::Math::div_round_up<size_t>(elf_program_header.p_vaddr + elf_program_header.p_memsz, PAGE_SIZE);
size_t page_count = page_end - page_start;
page_table().lock();
if (!page_table().is_range_free(page_start * PAGE_SIZE, page_count * PAGE_SIZE))
{
page_table().debug_dump();
Kernel::panic("vaddr {8H}-{8H} not free {8H}-{8H}",
page_start * PAGE_SIZE,
page_start * PAGE_SIZE + page_count * PAGE_SIZE
);
}
{
LockGuard _(m_lock);
auto range = MUST(VirtualRange::create_to_vaddr(page_table(), page_start * PAGE_SIZE, page_count * PAGE_SIZE, flags, true));
range->copy_from(elf_program_header.p_vaddr % PAGE_SIZE, elf.data() + elf_program_header.p_offset, elf_program_header.p_filesz);
MUST(m_mapped_ranges.emplace_back(false, BAN::move(range)));
}
page_table().unlock();
break;
}
default:
ASSERT_NOT_REACHED();
}
}
m_has_called_exec = true;
}
BAN::ErrorOr<void> Process::create_file(BAN::StringView path, mode_t mode) BAN::ErrorOr<void> Process::create_file(BAN::StringView path, mode_t mode)
{ {
LockGuard _(m_lock); LockGuard _(m_lock);
@ -622,23 +532,29 @@ namespace Kernel
return {}; return {};
} }
BAN::ErrorOr<bool> Process::allocate_page_for_demand_paging(vaddr_t addr) BAN::ErrorOr<bool> Process::allocate_page_for_demand_paging(vaddr_t address)
{ {
ASSERT(&Process::current() == this); ASSERT(&Process::current() == this);
LockGuard _(m_lock); LockGuard _(m_lock);
if (Thread::current().stack().contains(addr)) if (Thread::current().stack().contains(address))
{ {
TRY(Thread::current().stack().allocate_page_for_demand_paging(addr)); TRY(Thread::current().stack().allocate_page_for_demand_paging(address));
return true; return true;
} }
for (auto& mapped_range : m_mapped_ranges) for (auto& mapped_range : m_mapped_ranges)
{ {
if (!mapped_range.range->contains(addr)) if (!mapped_range->contains(address))
continue; continue;
TRY(mapped_range.range->allocate_page_for_demand_paging(addr)); TRY(mapped_range->allocate_page_for_demand_paging(address));
return true;
}
if (m_loadable_elf && m_loadable_elf->contains(address))
{
TRY(m_loadable_elf->load_page_to_memory(address));
return true; return true;
} }
@ -917,8 +833,8 @@ namespace Kernel
)); ));
LockGuard _(m_lock); LockGuard _(m_lock);
TRY(m_mapped_ranges.emplace_back(true, BAN::move(range))); TRY(m_mapped_ranges.push_back(BAN::move(range)));
return m_mapped_ranges.back().range->vaddr(); return m_mapped_ranges.back()->vaddr();
} }
return BAN::Error::from_errno(ENOTSUP); return BAN::Error::from_errno(ENOTSUP);
@ -937,9 +853,7 @@ namespace Kernel
for (size_t i = 0; i < m_mapped_ranges.size(); i++) for (size_t i = 0; i < m_mapped_ranges.size(); i++)
{ {
if (!m_mapped_ranges[i].can_be_unmapped) auto& range = m_mapped_ranges[i];
continue;
auto& range = m_mapped_ranges[i].range;
if (vaddr + len < range->vaddr() || vaddr >= range->vaddr() + range->size()) if (vaddr + len < range->vaddr() || vaddr >= range->vaddr() + range->size())
continue; continue;
m_mapped_ranges.remove(i); m_mapped_ranges.remove(i);
@ -1428,9 +1342,12 @@ namespace Kernel
// FIXME: should we allow cross mapping access? // FIXME: should we allow cross mapping access?
for (auto& mapped_range : m_mapped_ranges) for (auto& mapped_range : m_mapped_ranges)
if (vaddr >= mapped_range.range->vaddr() && vaddr + size <= mapped_range.range->vaddr() + mapped_range.range->size()) if (vaddr >= mapped_range->vaddr() && vaddr + size <= mapped_range->vaddr() + mapped_range->size())
return; return;
if (m_loadable_elf->contains(vaddr))
return;
unauthorized_access: unauthorized_access:
dwarnln("process {}, thread {} attempted to make an invalid pointer access", pid(), Thread::current().tid()); dwarnln("process {}, thread {} attempted to make an invalid pointer access", pid(), Thread::current().tid());
Debug::dump_stack_trace(); Debug::dump_stack_trace();