Kernel: Big commit. Rewrite ELF loading code

We now load ELF files to VirtualRanges instead of using kmalloc.
We have only a fixed 1 MiB kmalloc for big allocations and this
allows loading files even when they don't fit in there.

This caused me to rewrite the whole ELF loading process since the
loaded ELF is not in memory mapped by every process.

Virtual ranges allow you to zero out the memory and to copy into
them from arbitary byte buffers.
This commit is contained in:
Bananymous 2023-06-09 00:37:43 +03:00
parent 96d6453ea8
commit 8af390e0f6
9 changed files with 206 additions and 49 deletions

View File

@ -1,15 +1,54 @@
#include <BAN/ScopeGuard.h> #include <BAN/ScopeGuard.h>
#include <kernel/Process.h>
#include <LibELF/ELF.h> #include <LibELF/ELF.h>
#include <LibELF/Values.h> #include <LibELF/Values.h>
#ifdef __is_kernel
#include <kernel/FS/VirtualFileSystem.h>
#include <kernel/Memory/PageTableScope.h>
#include <kernel/Process.h>
#endif
#include <fcntl.h> #include <fcntl.h>
#define ELF_PRINT_HEADERS 0 #define ELF_PRINT_HEADERS 0
#ifdef __is_kernel
extern uint8_t g_kernel_end[];
using namespace Kernel;
#endif
namespace LibELF namespace LibELF
{ {
#ifdef __is_kernel
BAN::ErrorOr<BAN::UniqPtr<ELF>> ELF::load_from_file(BAN::StringView file_path)
{
auto file = TRY(VirtualFileSystem::get().file_from_absolute_path(file_path, true));
PageTable::current().lock();
size_t page_count = BAN::Math::div_round_up<size_t>(file.inode->size(), PAGE_SIZE);
vaddr_t vaddr = PageTable::current().get_free_contiguous_pages(page_count, (vaddr_t)g_kernel_end);
auto virtual_range = BAN::UniqPtr<VirtualRange>::adopt(
VirtualRange::create(
PageTable::current(),
vaddr, page_count * PAGE_SIZE,
PageTable::Flags::ReadWrite | PageTable::Flags::Present
)
);
PageTable::current().unlock();
TRY(file.inode->read(0, (void*)vaddr, file.inode->size()));
ELF* elf_ptr = new ELF(BAN::move(virtual_range), file.inode->size());
if (elf_ptr == nullptr)
return BAN::Error::from_errno(ENOMEM);
auto elf = BAN::UniqPtr<ELF>::adopt(elf_ptr);
TRY(elf->load());
return BAN::move(elf);
}
#else
BAN::ErrorOr<ELF*> ELF::load_from_file(BAN::StringView file_path) BAN::ErrorOr<ELF*> ELF::load_from_file(BAN::StringView file_path)
{ {
ELF* elf = nullptr; ELF* elf = nullptr;
@ -39,6 +78,7 @@ namespace LibELF
return elf; return elf;
} }
#endif
BAN::ErrorOr<void> ELF::load() BAN::ErrorOr<void> ELF::load()
{ {

View File

@ -1,6 +1,11 @@
#pragma once #pragma once
#ifdef __is_kernel
#include <kernel/Memory/VirtualRange.h>
#endif
#include <BAN/StringView.h> #include <BAN/StringView.h>
#include <BAN/UniqPtr.h>
#include <BAN/Vector.h> #include <BAN/Vector.h>
#include <kernel/Arch.h> #include <kernel/Arch.h>
#include "Types.h" #include "Types.h"
@ -11,7 +16,7 @@ namespace LibELF
class ELF class ELF
{ {
public: public:
static BAN::ErrorOr<ELF*> load_from_file(BAN::StringView); static BAN::ErrorOr<BAN::UniqPtr<ELF>> load_from_file(BAN::StringView);
const Elf64FileHeader& file_header64() const; const Elf64FileHeader& file_header64() const;
const Elf64ProgramHeader& program_header64(size_t) const; const Elf64ProgramHeader& program_header64(size_t) const;
@ -47,9 +52,16 @@ namespace LibELF
bool is_x86_64() const; bool is_x86_64() const;
private: private:
#ifdef __is_kernel
ELF(BAN::UniqPtr<Kernel::VirtualRange>&& storage, size_t size)
: m_storage(BAN::move(storage))
, m_data((const uint8_t*)m_storage->vaddr(), size)
{}
#else
ELF(BAN::Vector<uint8_t>&& data) ELF(BAN::Vector<uint8_t>&& data)
: m_data(BAN::move(data)) : m_data(BAN::move(data))
{} {}
#endif
BAN::ErrorOr<void> load(); BAN::ErrorOr<void> load();
bool parse_elf64_file_header(const Elf64FileHeader&); bool parse_elf64_file_header(const Elf64FileHeader&);
@ -61,7 +73,12 @@ namespace LibELF
bool parse_elf32_section_header(const Elf32SectionHeader&); bool parse_elf32_section_header(const Elf32SectionHeader&);
private: private:
#ifdef __is_kernel
BAN::UniqPtr<Kernel::VirtualRange> m_storage;
BAN::Span<const uint8_t> m_data;
#else
const BAN::Vector<uint8_t> m_data; const BAN::Vector<uint8_t> m_data;
#endif
}; };
} }

View File

@ -23,11 +23,15 @@ namespace Kernel
size_t size() const { return m_size; } size_t size() const { return m_size; }
uint8_t flags() const { return m_flags; } uint8_t flags() const { return m_flags; }
void set_zero();
void copy_from(size_t offset, const uint8_t* buffer, size_t bytes);
private: private:
VirtualRange(PageTable&); VirtualRange(PageTable&);
private: private:
PageTable& m_page_table; PageTable& m_page_table;
bool m_kmalloc { false };
vaddr_t m_vaddr { 0 }; vaddr_t m_vaddr { 0 };
size_t m_size { 0 }; size_t m_size { 0 };
uint8_t m_flags { 0 }; uint8_t m_flags { 0 };

View File

@ -49,8 +49,6 @@ namespace Kernel
pid_t pid() const { return m_pid; } pid_t pid() const { return m_pid; }
static BAN::ErrorOr<LibELF::ELF*> load_elf_for_exec(BAN::StringView file_path, const BAN::String& cwd, const BAN::Vector<BAN::StringView>& path_env);
BAN::ErrorOr<Process*> fork(uintptr_t rsp, uintptr_t rip); BAN::ErrorOr<Process*> fork(uintptr_t rsp, uintptr_t rip);
BAN::ErrorOr<void> exec(BAN::StringView path, const char* const* argv, const char* const* envp); BAN::ErrorOr<void> exec(BAN::StringView path, const char* const* argv, const char* const* envp);
@ -96,7 +94,11 @@ namespace Kernel
static Process* create_process(); static Process* create_process();
static void register_process(Process*); static void register_process(Process*);
void load_elf(LibELF::ELF&); // Load an elf file to virtual address space of the current page table
static BAN::ErrorOr<BAN::UniqPtr<LibELF::ELF>> load_elf_for_exec(BAN::StringView file_path, const BAN::String& cwd, const BAN::Vector<BAN::StringView>& path_env);
// Copy an elf file from the current page table to the processes own
void load_elf_to_memory(LibELF::ELF&);
BAN::ErrorOr<BAN::String> absolute_path_of(BAN::StringView) const; BAN::ErrorOr<BAN::String> absolute_path_of(BAN::StringView) const;

View File

@ -1,5 +1,4 @@
#include <kernel/Memory/GeneralAllocator.h> #include <kernel/Memory/GeneralAllocator.h>
#include <kernel/Memory/PageTableScope.h>
namespace Kernel namespace Kernel
{ {

View File

@ -8,11 +8,11 @@ namespace Kernel
{ {
ASSERT(size % PAGE_SIZE == 0); ASSERT(size % PAGE_SIZE == 0);
ASSERT(vaddr % PAGE_SIZE == 0); ASSERT(vaddr % PAGE_SIZE == 0);
ASSERT(&page_table != &PageTable::kernel());
VirtualRange* result = new VirtualRange(page_table); VirtualRange* result = new VirtualRange(page_table);
ASSERT(result); ASSERT(result);
result->m_kmalloc = false;
result->m_size = size; result->m_size = size;
result->m_flags = flags; result->m_flags = flags;
MUST(result->m_physical_pages.reserve(size / PAGE_SIZE)); MUST(result->m_physical_pages.reserve(size / PAGE_SIZE));
@ -21,7 +21,7 @@ namespace Kernel
if (vaddr == 0) if (vaddr == 0)
{ {
vaddr = page_table.get_free_contiguous_pages(size / PAGE_SIZE); vaddr = page_table.get_free_contiguous_pages(size / PAGE_SIZE, 0x400000);
ASSERT(vaddr); ASSERT(vaddr);
} }
@ -43,8 +43,9 @@ namespace Kernel
VirtualRange* VirtualRange::create_kmalloc(size_t size) VirtualRange* VirtualRange::create_kmalloc(size_t size)
{ {
VirtualRange* result = new VirtualRange(PageTable::kernel()); VirtualRange* result = new VirtualRange(PageTable::kernel());
if (result == nullptr) ASSERT(result);
return nullptr;
result->m_kmalloc = true;
result->m_size = size; result->m_size = size;
result->m_flags = PageTable::Flags::ReadWrite | PageTable::Flags::Present; result->m_flags = PageTable::Flags::ReadWrite | PageTable::Flags::Present;
result->m_vaddr = (vaddr_t)kmalloc(size); result->m_vaddr = (vaddr_t)kmalloc(size);
@ -53,6 +54,7 @@ namespace Kernel
delete result; delete result;
return nullptr; return nullptr;
} }
return result; return result;
} }
@ -62,7 +64,7 @@ namespace Kernel
VirtualRange::~VirtualRange() VirtualRange::~VirtualRange()
{ {
if (&m_page_table == &PageTable::kernel()) if (m_kmalloc)
{ {
kfree((void*)m_vaddr); kfree((void*)m_vaddr);
return; return;
@ -94,4 +96,82 @@ namespace Kernel
return result; return result;
} }
void VirtualRange::set_zero()
{
PageTable& page_table = PageTable::current();
if (&page_table == &m_page_table)
{
memset((void*)vaddr(), 0, size());
return;
}
page_table.lock();
ASSERT(page_table.is_page_free(0));
for (size_t i = 0; i < m_physical_pages.size(); i++)
{
page_table.map_page_at(m_physical_pages[i], 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.invalidate(0);
memset((void*)0, 0, PAGE_SIZE);
}
page_table.unmap_page(0);
page_table.invalidate(0);
page_table.unlock();
}
void VirtualRange::copy_from(size_t offset, const uint8_t* buffer, size_t bytes)
{
if (bytes == 0)
return;
// NOTE: Handling overflow
ASSERT(offset <= size());
ASSERT(bytes <= size());
ASSERT(offset + bytes <= size());
PageTable& page_table = PageTable::current();
if (&page_table == &m_page_table)
{
memcpy((void*)(vaddr() + offset), buffer, bytes);
return;
}
page_table.lock();
ASSERT(page_table.is_page_free(0));
size_t off = offset % PAGE_SIZE;
size_t i = offset / PAGE_SIZE;
// NOTE: we map the first page separately since it needs extra calculations
page_table.map_page_at(m_physical_pages[i], 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.invalidate(0);
memcpy((void*)off, buffer, PAGE_SIZE - off);
buffer += PAGE_SIZE - off;
bytes -= PAGE_SIZE - off;
i++;
while (bytes > 0)
{
size_t len = BAN::Math::min<size_t>(PAGE_SIZE, bytes);
page_table.map_page_at(m_physical_pages[i], 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.invalidate(0);
memcpy((void*)0, buffer, len);
buffer += len;
bytes -= len;
i++;
}
page_table.unmap_page(0);
page_table.invalidate(0);
page_table.unlock();
}
} }

View File

@ -1,4 +1,5 @@
#include <BAN/StringView.h> #include <BAN/StringView.h>
#include <kernel/CriticalScope.h>
#include <kernel/FS/VirtualFileSystem.h> #include <kernel/FS/VirtualFileSystem.h>
#include <kernel/LockGuard.h> #include <kernel/LockGuard.h>
#include <kernel/Memory/Heap.h> #include <kernel/Memory/Heap.h>
@ -46,13 +47,18 @@ namespace Kernel
BAN::ErrorOr<Process*> Process::create_userspace(BAN::StringView path) BAN::ErrorOr<Process*> Process::create_userspace(BAN::StringView path)
{ {
auto* elf = TRY(load_elf_for_exec(path, "/"sv, {})); auto elf = TRY(load_elf_for_exec(path, "/"sv, {}));
auto* process = create_process(); auto* process = create_process();
MUST(process->m_working_directory.push_back('/')); MUST(process->m_working_directory.push_back('/'));
process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace())); process->m_page_table = BAN::UniqPtr<PageTable>::adopt(MUST(PageTable::create_userspace()));;
process->load_elf(*elf); process->load_elf_to_memory(*elf);
process->m_userspace_info.entry = elf->file_header_native().e_entry;
// NOTE: we clear the elf since we don't need the memory anymore
elf.clear();
char** argv = nullptr; char** argv = nullptr;
char** envp = nullptr; char** envp = nullptr;
@ -76,9 +82,6 @@ namespace Kernel
process->m_userspace_info.argc = 1; process->m_userspace_info.argc = 1;
process->m_userspace_info.argv = argv; process->m_userspace_info.argv = argv;
process->m_userspace_info.envp = envp; process->m_userspace_info.envp = envp;
process->m_userspace_info.entry = elf->file_header_native().e_entry;
delete elf;
auto* thread = MUST(Thread::create_userspace(process)); auto* thread = MUST(Thread::create_userspace(process));
process->add_thread(thread); process->add_thread(thread);
@ -164,7 +167,7 @@ namespace Kernel
return {}; return {};
} }
BAN::ErrorOr<LibELF::ELF*> Process::load_elf_for_exec(BAN::StringView file_path, const BAN::String& cwd, const BAN::Vector<BAN::StringView>& path_env) BAN::ErrorOr<BAN::UniqPtr<LibELF::ELF>> Process::load_elf_for_exec(BAN::StringView file_path, const BAN::String& cwd, const BAN::Vector<BAN::StringView>& path_env)
{ {
if (file_path.empty()) if (file_path.empty())
return BAN::Error::from_errno(ENOENT); return BAN::Error::from_errno(ENOENT);
@ -219,22 +222,20 @@ namespace Kernel
return elf_or_error.error(); return elf_or_error.error();
} }
auto* elf = elf_or_error.release_value(); auto elf = elf_or_error.release_value();
if (!elf->is_native()) if (!elf->is_native())
{ {
derrorln("ELF has invalid architecture"); derrorln("ELF has invalid architecture");
delete elf;
return BAN::Error::from_errno(EINVAL); return BAN::Error::from_errno(EINVAL);
} }
if (elf->file_header_native().e_type != LibELF::ET_EXEC) if (elf->file_header_native().e_type != LibELF::ET_EXEC)
{ {
derrorln("Not an executable"); derrorln("Not an executable");
delete elf;
return BAN::Error::from_errno(ENOEXEC); return BAN::Error::from_errno(ENOEXEC);
} }
return elf; return BAN::move(elf);
} }
BAN::ErrorOr<Process*> Process::fork(uintptr_t rsp, uintptr_t rip) BAN::ErrorOr<Process*> Process::fork(uintptr_t rsp, uintptr_t rip)
@ -285,7 +286,7 @@ namespace Kernel
path_env = TRY(BAN::StringView(envp[i]).substring(5).split(':')); path_env = TRY(BAN::StringView(envp[i]).substring(5).split(':'));
} }
auto* elf = TRY(load_elf_for_exec(path, TRY(working_directory()), path_env)); auto elf = TRY(load_elf_for_exec(path, TRY(working_directory()), path_env));
LockGuard lock_guard(m_lock); LockGuard lock_guard(m_lock);
@ -298,17 +299,18 @@ namespace Kernel
m_open_files.clear(); m_open_files.clear();
load_elf(*elf); load_elf_to_memory(*elf);
m_userspace_info.entry = elf->file_header_native().e_entry; m_userspace_info.entry = elf->file_header_native().e_entry;
delete elf; // NOTE: we clear the elf since we don't need the memory anymore
elf.clear();
ASSERT(m_threads.size() == 1); ASSERT(m_threads.size() == 1);
ASSERT(&Process::current() == this); ASSERT(&Process::current() == this);
{ {
PageTableScope _(page_table()); LockGuard _(page_table());
m_userspace_info.argv = (char**)MUST(allocate(sizeof(char**) * (str_argv.size() + 1))); m_userspace_info.argv = (char**)MUST(allocate(sizeof(char**) * (str_argv.size() + 1)));
for (size_t i = 0; i < str_argv.size(); i++) for (size_t i = 0; i < str_argv.size(); i++)
@ -396,7 +398,7 @@ namespace Kernel
return {}; return {};
} }
void Process::load_elf(LibELF::ELF& elf) void Process::load_elf_to_memory(LibELF::ELF& elf)
{ {
ASSERT(elf.is_native()); ASSERT(elf.is_native());
@ -411,6 +413,16 @@ namespace Kernel
break; break;
case LibELF::PT_LOAD: case LibELF::PT_LOAD:
{ {
uint8_t flags = PageTable::Flags::UserSupervisor | PageTable::Flags::Present;
if (elf_program_header.p_flags & LibELF::PF_W)
flags |= PageTable::Flags::ReadWrite;
size_t page_start = elf_program_header.p_vaddr / PAGE_SIZE;
size_t page_end = BAN::Math::div_round_up<size_t>(elf_program_header.p_vaddr + elf_program_header.p_memsz, PAGE_SIZE);
size_t page_count = page_end - page_start + 1;
page_table().lock();
if (!page_table().is_range_free(elf_program_header.p_vaddr, elf_program_header.p_memsz)) if (!page_table().is_range_free(elf_program_header.p_vaddr, elf_program_header.p_memsz))
{ {
page_table().debug_dump(); page_table().debug_dump();
@ -419,20 +431,16 @@ namespace Kernel
elf_program_header.p_vaddr + elf_program_header.p_memsz elf_program_header.p_vaddr + elf_program_header.p_memsz
); );
} }
uint8_t flags = PageTable::Flags::UserSupervisor | PageTable::Flags::Present;
if (elf_program_header.p_flags & LibELF::PF_W)
flags |= PageTable::Flags::ReadWrite;
size_t page_start = elf_program_header.p_vaddr / PAGE_SIZE;
size_t page_end = BAN::Math::div_round_up<size_t>(elf_program_header.p_vaddr + elf_program_header.p_memsz, PAGE_SIZE);
size_t page_count = page_end - page_start + 1;
MUST(m_mapped_ranges.push_back(VirtualRange::create(page_table(), page_start * PAGE_SIZE, page_count * PAGE_SIZE, flags)));
{ {
PageTableScope _(page_table()); LockGuard _(m_lock);
memcpy((void*)elf_program_header.p_vaddr, elf.data() + elf_program_header.p_offset, elf_program_header.p_filesz); MUST(m_mapped_ranges.push_back(VirtualRange::create(page_table(), page_start * PAGE_SIZE, page_count * PAGE_SIZE, flags)));
memset((void*)(elf_program_header.p_vaddr + elf_program_header.p_filesz), 0, elf_program_header.p_memsz - elf_program_header.p_filesz); m_mapped_ranges.back()->set_zero();
m_mapped_ranges.back()->copy_from(elf_program_header.p_vaddr % PAGE_SIZE, elf.data() + elf_program_header.p_offset, elf_program_header.p_filesz);
} }
page_table().unlock();
break; break;
} }
default: default:

View File

@ -1,6 +1,6 @@
#include <kernel/LockGuard.h> #include <kernel/LockGuard.h>
#include <kernel/Memory/Heap.h> #include <kernel/Memory/Heap.h>
#include <kernel/Memory/PageTableScope.h> #include <kernel/Memory/PageTable.h>
#include <kernel/Storage/DiskCache.h> #include <kernel/Storage/DiskCache.h>
#include <kernel/Storage/StorageDevice.h> #include <kernel/Storage/StorageDevice.h>
@ -225,24 +225,30 @@ namespace Kernel
{ {
ASSERT(index < sectors.size()); ASSERT(index < sectors.size());
PageTableScope _(PageTable::current()); PageTable& page_table = PageTable::current();
ASSERT(PageTable::current().is_page_free(0));
PageTable::current().map_page_at(paddr, 0, PageTable::Flags::Present); page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(paddr, 0, PageTable::Flags::Present);
memcpy(buffer, (void*)(index * device.sector_size()), device.sector_size()); memcpy(buffer, (void*)(index * device.sector_size()), device.sector_size());
PageTable::current().unmap_page(0); page_table.unmap_page(0);
PageTable::current().invalidate(0); page_table.invalidate(0);
page_table.unlock();
} }
void DiskCache::CacheBlock::write_sector(StorageDevice& device, size_t index, const uint8_t* buffer) void DiskCache::CacheBlock::write_sector(StorageDevice& device, size_t index, const uint8_t* buffer)
{ {
ASSERT(index < sectors.size()); ASSERT(index < sectors.size());
PageTableScope _(PageTable::current()); PageTable& page_table = PageTable::current();
ASSERT(PageTable::current().is_page_free(0));
PageTable::current().map_page_at(paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present); page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
memcpy((void*)(index * device.sector_size()), buffer, device.sector_size()); memcpy((void*)(index * device.sector_size()), buffer, device.sector_size());
PageTable::current().unmap_page(0); page_table.unmap_page(0);
PageTable::current().invalidate(0); page_table.invalidate(0);
page_table.unlock();
} }
} }

View File

@ -140,6 +140,7 @@ namespace Kernel
// Setup stack for returning // Setup stack for returning
{ {
// FIXME: don't use PageTableScope
PageTableScope _(m_process->page_table()); PageTableScope _(m_process->page_table());
write_to_stack<sizeof(void*)>(m_rsp, this); write_to_stack<sizeof(void*)>(m_rsp, this);
write_to_stack<sizeof(void*)>(m_rsp, &Thread::on_exit); write_to_stack<sizeof(void*)>(m_rsp, &Thread::on_exit);