Kernel: Allow SYS_EXEC to fail at any point

This patch builds new executable image to another pml4 structure and
only after everything is validated will current context be replaced.
This allows exec to fail "late" where previously it would panic the
kernel or kill the process. This allows graceful handling of exec
failures in userspace!
This commit is contained in:
Bananymous 2024-09-24 16:29:38 +03:00
parent ebb87ccdde
commit 39802b56c1
3 changed files with 73 additions and 39 deletions

View File

@ -32,7 +32,7 @@ namespace Kernel
public: public:
static BAN::ErrorOr<Thread*> create_kernel(entry_t, void*, Process*); static BAN::ErrorOr<Thread*> create_kernel(entry_t, void*, Process*);
static BAN::ErrorOr<Thread*> create_userspace(Process*); static BAN::ErrorOr<Thread*> create_userspace(Process*, PageTable&);
~Thread(); ~Thread();
BAN::ErrorOr<Thread*> clone(Process*, uintptr_t sp, uintptr_t ip); BAN::ErrorOr<Thread*> clone(Process*, uintptr_t sp, uintptr_t ip);
@ -72,6 +72,8 @@ namespace Kernel
static Thread& current(); static Thread& current();
static pid_t current_tid(); static pid_t current_tid();
void give_keep_alive_page_table(BAN::UniqPtr<PageTable>&& page_table) { m_keep_alive_page_table = BAN::move(page_table); }
Process& process(); Process& process();
const Process& process() const; const Process& process() const;
bool has_process() const { return m_process; } bool has_process() const { return m_process; }
@ -99,6 +101,10 @@ namespace Kernel
void on_exit(); void on_exit();
private: private:
// NOTE: this is the first member to force it being last destructed
// {kernel,userspace}_stack has to be destroyed before page table
BAN::UniqPtr<PageTable> m_keep_alive_page_table;
static constexpr size_t m_kernel_stack_size { PAGE_SIZE * 64 }; static constexpr size_t m_kernel_stack_size { PAGE_SIZE * 64 };
static constexpr size_t m_userspace_stack_size { PAGE_SIZE * 64 }; static constexpr size_t m_userspace_stack_size { PAGE_SIZE * 64 };
BAN::UniqPtr<VirtualRange> m_kernel_stack; BAN::UniqPtr<VirtualRange> m_kernel_stack;

View File

@ -168,7 +168,7 @@ namespace Kernel
process->m_userspace_info.argv = argv; process->m_userspace_info.argv = argv;
process->m_userspace_info.envp = nullptr; process->m_userspace_info.envp = nullptr;
auto* thread = MUST(Thread::create_userspace(process)); auto* thread = MUST(Thread::create_userspace(process, process->page_table()));
process->add_thread(thread); process->add_thread(thread);
process->register_to_scheduler(); process->register_to_scheduler();
return process; return process;
@ -477,10 +477,13 @@ namespace Kernel
{ {
LockGuard _(m_process_lock); LockGuard _(m_process_lock);
auto new_page_table = BAN::UniqPtr<PageTable>::adopt(TRY(PageTable::create_userspace()));
TRY(validate_string_access(path)); TRY(validate_string_access(path));
auto absolute_path = TRY(absolute_path_of(path)); auto absolute_path = TRY(absolute_path_of(path));
auto executable_inode = TRY(VirtualFileSystem::get().file_from_absolute_path(m_credentials, absolute_path, O_EXEC)).inode; auto executable_file = TRY(VirtualFileSystem::get().file_from_absolute_path(m_credentials, absolute_path, O_EXEC));
auto executable_inode = executable_file.inode;
BAN::Vector<BAN::String> str_argv; BAN::Vector<BAN::String> str_argv;
for (int i = 0; argv && argv[i]; i++) for (int i = 0; argv && argv[i]; i++)
@ -498,35 +501,18 @@ namespace Kernel
TRY(str_envp.emplace_back(envp[i])); TRY(str_envp.emplace_back(envp[i]));
} }
m_open_file_descriptors.close_cloexec(); auto executable = TRY(ELF::load_from_inode(executable_inode, m_credentials, *new_page_table));
auto new_mapped_regions = BAN::move(executable.regions);
m_mapped_regions.clear(); int file_fd = -1;
auto executable = TRY(ELF::load_from_inode(executable_inode, m_credentials, page_table()));
m_mapped_regions = BAN::move(executable.regions);
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
m_credentials.set_egid(executable_inode->gid());
m_userspace_info.entry = executable.entry_point;
if (executable.has_interpreter) if (executable.has_interpreter)
{ {
VirtualFileSystem::File file; VirtualFileSystem::File file;
TRY(file.canonical_path.append("<self>")); file.canonical_path = BAN::move(executable_file.canonical_path);
file.inode = executable_inode; file.inode = executable_inode;
m_userspace_info.file_fd = TRY(m_open_file_descriptors.open(BAN::move(file), O_RDONLY)); file_fd = TRY(m_open_file_descriptors.open(BAN::move(file), O_RDONLY));
} }
BAN::ScopeGuard file_closer([&] { if (file_fd != -1) MUST(m_open_file_descriptors.close(file_fd)); });
for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++)
{
m_signal_handlers[i].sa_handler = SIG_DFL;
m_signal_handlers[i].sa_flags = 0;
}
ASSERT(m_threads.size() == 1);
ASSERT(&Process::current() == this);
// allocate memory on the new process for arguments and environment // allocate memory on the new process for arguments and environment
auto create_region = auto create_region =
@ -540,9 +526,9 @@ namespace Kernel
bytes += PAGE_SIZE - rem; bytes += PAGE_SIZE - rem;
auto region = TRY(MemoryBackedRegion::create( auto region = TRY(MemoryBackedRegion::create(
page_table(), *new_page_table,
bytes, bytes,
{ .start = m_userspace_info.entry, .end = KERNEL_OFFSET }, { .start = executable.entry_point, .end = KERNEL_OFFSET },
MemoryRegion::Type::PRIVATE, MemoryRegion::Type::PRIVATE,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present
)); ));
@ -562,20 +548,62 @@ namespace Kernel
return BAN::UniqPtr<MemoryRegion>(BAN::move(region)); return BAN::UniqPtr<MemoryRegion>(BAN::move(region));
}; };
auto argv_region = MUST(create_region(str_argv.span())); TRY(new_mapped_regions.reserve(new_mapped_regions.size() + 2));
m_userspace_info.argv = (char**)argv_region->vaddr(); MUST(new_mapped_regions.push_back(TRY(create_region(str_argv.span()))));
MUST(m_mapped_regions.push_back(BAN::move(argv_region))); MUST(new_mapped_regions.push_back(TRY(create_region(str_envp.span()))));
auto envp_region = MUST(create_region(str_envp.span())); auto* new_thread = TRY(Thread::create_userspace(this, *new_page_table));
m_userspace_info.envp = (char**)envp_region->vaddr();
MUST(m_mapped_regions.push_back(BAN::move(envp_region))); ASSERT(Processor::get_interrupt_state() == InterruptState::Enabled);
Processor::set_interrupt_state(InterruptState::Disabled);
// NOTE: bind new thread to this processor so it wont be rescheduled before end of this function
if (auto ret = Scheduler::bind_thread_to_processor(new_thread, Processor::current_id()); ret.is_error())
{
Processor::set_interrupt_state(InterruptState::Enabled);
return ret.release_error();
}
// after this point, everything is initialized and nothing can fail!
ASSERT(m_threads.size() == 1);
ASSERT(&Thread::current() == m_threads.front());
// Make current thread standalone and terminated
// We need to give it the current page table to keep it alive
// while its kernel stack is in use
m_threads.front()->m_state = Thread::State::Terminated;
m_threads.front()->m_process = nullptr;
m_threads.front()->give_keep_alive_page_table(BAN::move(m_page_table));
m_threads.front() = new_thread;
MUST(Processor::scheduler().add_thread(m_threads.front()));
for (size_t i = 0; i < sizeof(m_signal_handlers) / sizeof(*m_signal_handlers); i++)
{
m_signal_handlers[i].sa_handler = SIG_DFL;
m_signal_handlers[i].sa_flags = 0;
}
if (executable_inode->mode().mode & +Inode::Mode::ISUID)
m_credentials.set_euid(executable_inode->uid());
if (executable_inode->mode().mode & +Inode::Mode::ISGID)
m_credentials.set_egid(executable_inode->gid());
m_open_file_descriptors.close_cloexec();
m_mapped_regions = BAN::move(new_mapped_regions);
m_page_table = BAN::move(new_page_table);
file_closer.disable();
m_userspace_info.argc = str_argv.size(); m_userspace_info.argc = str_argv.size();
m_userspace_info.argv = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 2]->vaddr());
m_userspace_info.envp = reinterpret_cast<char**>(m_mapped_regions[m_mapped_regions.size() - 1]->vaddr());
m_userspace_info.entry = executable.entry_point;
m_userspace_info.file_fd = file_fd;
m_cmdline = BAN::move(str_argv); m_cmdline = BAN::move(str_argv);
m_environ = BAN::move(str_envp); m_environ = BAN::move(str_envp);
Processor::set_interrupt_state(InterruptState::Disabled);
} }
m_has_called_exec = true; m_has_called_exec = true;

View File

@ -74,7 +74,7 @@ namespace Kernel
return thread; return thread;
} }
BAN::ErrorOr<Thread*> Thread::create_userspace(Process* process) BAN::ErrorOr<Thread*> Thread::create_userspace(Process* process, PageTable& page_table)
{ {
ASSERT(process); ASSERT(process);
@ -87,7 +87,7 @@ namespace Kernel
thread->m_is_userspace = true; thread->m_is_userspace = true;
thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range( thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range(
process->page_table(), page_table,
0x300000, KERNEL_OFFSET, 0x300000, KERNEL_OFFSET,
m_kernel_stack_size, m_kernel_stack_size,
PageTable::Flags::ReadWrite | PageTable::Flags::Present, PageTable::Flags::ReadWrite | PageTable::Flags::Present,
@ -95,7 +95,7 @@ namespace Kernel
)); ));
thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range( thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range(
process->page_table(), page_table,
0x300000, KERNEL_OFFSET, 0x300000, KERNEL_OFFSET,
m_userspace_stack_size, m_userspace_stack_size,
PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present, PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present,