From af4af1cae9c92f924af27f3177585623271f8db6 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Fri, 22 Sep 2023 15:41:05 +0300 Subject: [PATCH] Kernel/LibC: add mmap for private anonymous mappings This will be used by the userspace to get more memory. Currently kernel handles all allocations, which is not preferable. --- kernel/include/kernel/Process.h | 6 +++ kernel/kernel/Process.cpp | 69 +++++++++++++++++++++++++++++++++ kernel/kernel/Syscall.cpp | 6 +++ libc/CMakeLists.txt | 1 + libc/include/sys/mman.h | 10 +++++ libc/include/sys/syscall.h | 2 + libc/sys/mman.cpp | 23 +++++++++++ 7 files changed, 117 insertions(+) create mode 100644 libc/sys/mman.cpp diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index ffab05d1..32bb2c18 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -15,6 +15,7 @@ #include #include +#include #include namespace LibELF { class ELF; } @@ -115,6 +116,9 @@ namespace Kernel BAN::ErrorOr sys_read_dir_entries(int fd, DirectoryEntryList* buffer, size_t buffer_size); + BAN::ErrorOr sys_mmap(const sys_mmap_t&); + BAN::ErrorOr sys_munmap(void* addr, size_t len); + BAN::ErrorOr sys_alloc(size_t); BAN::ErrorOr sys_free(void*); @@ -177,6 +181,8 @@ namespace Kernel BAN::String m_working_directory; BAN::Vector m_threads; + BAN::Vector> m_private_anonymous_mappings; + BAN::Vector> m_fixed_width_allocators; BAN::UniqPtr m_general_allocator; diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index db2e9abc..13de5435 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -159,6 +159,7 @@ namespace Kernel ASSERT(m_threads.empty()); ASSERT(m_fixed_width_allocators.empty()); ASSERT(!m_general_allocator); + ASSERT(m_private_anonymous_mappings.empty()); ASSERT(m_mapped_ranges.empty()); ASSERT(m_exit_status.waiting == 0); ASSERT(&PageTable::current() != m_page_table.ptr()); @@ -192,6 +193,7 @@ namespace Kernel m_open_file_descriptors.close_all(); // NOTE: We must unmap ranges while the page table is still alive + m_private_anonymous_mappings.clear(); m_mapped_ranges.clear(); // NOTE: We must clear allocators while the page table is still alive @@ -358,6 +360,11 @@ namespace Kernel OpenFileDescriptorSet open_file_descriptors(m_credentials); TRY(open_file_descriptors.clone_from(m_open_file_descriptors)); + BAN::Vector> private_anonymous_mappings; + TRY(private_anonymous_mappings.reserve(m_private_anonymous_mappings.size())); + for (auto& private_anonymous_mapping : m_private_anonymous_mappings) + MUST(private_anonymous_mappings.push_back(TRY(private_anonymous_mapping->clone(*page_table)))); + BAN::Vector> mapped_ranges; TRY(mapped_ranges.reserve(m_mapped_ranges.size())); for (auto& mapped_range : m_mapped_ranges) @@ -378,6 +385,7 @@ namespace Kernel forked->m_working_directory = BAN::move(working_directory); forked->m_page_table = BAN::move(page_table); forked->m_open_file_descriptors = BAN::move(open_file_descriptors); + forked->m_private_anonymous_mappings = BAN::move(private_anonymous_mappings); forked->m_mapped_ranges = BAN::move(mapped_ranges); forked->m_fixed_width_allocators = BAN::move(fixed_width_allocators); forked->m_general_allocator = BAN::move(general_allocator); @@ -428,6 +436,7 @@ namespace Kernel m_fixed_width_allocators.clear(); m_general_allocator.clear(); + m_private_anonymous_mappings.clear(); m_mapped_ranges.clear(); load_elf_to_memory(*elf); @@ -811,6 +820,66 @@ namespace Kernel return (long)buffer; } + BAN::ErrorOr Process::sys_mmap(const sys_mmap_t& args) + { + if (args.prot != PROT_NONE && args.prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) + return BAN::Error::from_errno(EINVAL); + + PageTable::flags_t flags = PageTable::Flags::UserSupervisor; + if (args.prot & PROT_READ) + flags |= PageTable::Flags::Present; + if (args.prot & PROT_WRITE) + flags |= PageTable::Flags::ReadWrite | PageTable::Flags::Present; + if (args.prot & PROT_EXEC) + flags |= PageTable::Flags::Execute | PageTable::Flags::Present; + + if (args.flags == (MAP_ANONYMOUS | MAP_PRIVATE)) + { + if (args.addr != nullptr) + return BAN::Error::from_errno(ENOTSUP); + if (args.off != 0) + return BAN::Error::from_errno(EINVAL); + if (args.len % PAGE_SIZE != 0) + return BAN::Error::from_errno(EINVAL); + + auto range = TRY(VirtualRange::create_to_vaddr_range( + page_table(), + 0x400000, KERNEL_OFFSET, + args.len, + PageTable::Flags::UserSupervisor | PageTable::Flags::ReadWrite | PageTable::Flags::Present + )); + range->set_zero(); + + LockGuard _(m_lock); + TRY(m_private_anonymous_mappings.push_back(BAN::move(range))); + return m_private_anonymous_mappings.back()->vaddr(); + } + + return BAN::Error::from_errno(ENOTSUP); + } + + BAN::ErrorOr Process::sys_munmap(void* addr, size_t len) + { + if (len == 0) + return BAN::Error::from_errno(EINVAL); + + vaddr_t vaddr = (vaddr_t)addr; + if (vaddr % PAGE_SIZE != 0) + return BAN::Error::from_errno(EINVAL); + + LockGuard _(m_lock); + + for (size_t i = 0; i < m_private_anonymous_mappings.size(); i++) + { + auto& mapping = m_private_anonymous_mappings[i]; + if (vaddr + len < mapping->vaddr() || vaddr >= mapping->vaddr() + mapping->size()) + continue; + m_private_anonymous_mappings.remove(i); + } + + return 0; + } + static constexpr size_t allocator_size_for_allocation(size_t value) { if (value <= 256) { diff --git a/kernel/kernel/Syscall.cpp b/kernel/kernel/Syscall.cpp index f95aa910..05034427 100644 --- a/kernel/kernel/Syscall.cpp +++ b/kernel/kernel/Syscall.cpp @@ -194,6 +194,12 @@ namespace Kernel case SYS_SYNC: ret = Process::current().sys_sync(); break; + case SYS_MMAP: + ret = Process::current().sys_mmap(*(const sys_mmap_t*)arg1); + break; + case SYS_MUNMAP: + ret = Process::current().sys_munmap((void*)arg1, (size_t)arg2); + break; default: dwarnln("Unknown syscall {}", syscall); break; diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index c7d32bf8..f7db3d60 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -13,6 +13,7 @@ set(LIBC_SOURCES stdio.cpp stdlib.cpp string.cpp + sys/mman.cpp sys/stat.cpp sys/wait.cpp termios.cpp diff --git a/libc/include/sys/mman.h b/libc/include/sys/mman.h index ee79bc85..9c168fe0 100644 --- a/libc/include/sys/mman.h +++ b/libc/include/sys/mman.h @@ -46,6 +46,16 @@ struct posix_typed_mem_info size_t posix_tmi_length; /* Maximum length which may be allocated from a typed memory object. */ }; +struct sys_mmap_t +{ + void* addr; + size_t len; + int prot; + int flags; + int fildes; + off_t off; +}; + int mlock(const void* addr, size_t len); int mlockall(int flags); void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off); diff --git a/libc/include/sys/syscall.h b/libc/include/sys/syscall.h index 67b05641..ee452d0c 100644 --- a/libc/include/sys/syscall.h +++ b/libc/include/sys/syscall.h @@ -55,6 +55,8 @@ __BEGIN_DECLS #define SYS_FSTATAT 48 #define SYS_STAT 49 // stat/lstat #define SYS_SYNC 50 +#define SYS_MMAP 51 +#define SYS_MUNMAP 52 __END_DECLS diff --git a/libc/sys/mman.cpp b/libc/sys/mman.cpp new file mode 100644 index 00000000..c34e65f2 --- /dev/null +++ b/libc/sys/mman.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + +void* mmap(void* addr, size_t len, int prot, int flags, int fildes, off_t off) +{ + sys_mmap_t args { + .addr = addr, + .len = len, + .prot = prot, + .flags = flags, + .off = off + }; + long ret = syscall(SYS_MMAP, &args); + if (ret == -1) + return nullptr; + return (void*)ret; +} + +int munmap(void* addr, size_t len) +{ + return syscall(SYS_MUNMAP, addr, len); +}