diff --git a/README.md b/README.md index 511dd6d581..cb676612ed 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,9 @@ You can find a live demo [here](https://bananymous.com/banan-os) - [ ] Program launcher - [ ] Some nice apps - [x] ELF dynamic linking -- [ ] copy-on-write memory +- [x] copy-on-write memory + - [x] file mappings + - [ ] anonymous mappings #### Drivers - [x] NVMe disks diff --git a/kernel/include/kernel/FS/Inode.h b/kernel/include/kernel/FS/Inode.h index c79615d8b4..4a8d60f0cf 100644 --- a/kernel/include/kernel/FS/Inode.h +++ b/kernel/include/kernel/FS/Inode.h @@ -159,6 +159,7 @@ namespace Kernel private: BAN::WeakPtr m_shared_region; friend class FileBackedRegion; + friend class SharedFileData; }; } diff --git a/kernel/include/kernel/Memory/FileBackedRegion.h b/kernel/include/kernel/Memory/FileBackedRegion.h index dcbf2e9656..3ab47c45af 100644 --- a/kernel/include/kernel/Memory/FileBackedRegion.h +++ b/kernel/include/kernel/Memory/FileBackedRegion.h @@ -12,6 +12,8 @@ namespace Kernel void sync(size_t page_index); + Mutex mutex; + // FIXME: this should probably be ordered tree like map // for fast lookup and less memory usage BAN::Vector pages; @@ -33,7 +35,7 @@ namespace Kernel virtual BAN::ErrorOr> clone(PageTable& new_page_table) override; protected: - virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr) override; + virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr, bool wants_write) override; private: FileBackedRegion(BAN::RefPtr, PageTable&, off_t offset, ssize_t size, Type flags, PageTable::flags_t page_flags); @@ -42,7 +44,7 @@ namespace Kernel BAN::RefPtr m_inode; const off_t m_offset; - // FIXME: is this even synchronized? + BAN::Vector m_dirty_pages; BAN::RefPtr m_shared_data; }; diff --git a/kernel/include/kernel/Memory/MemoryBackedRegion.h b/kernel/include/kernel/Memory/MemoryBackedRegion.h index 627b3a08bb..a030dd069b 100644 --- a/kernel/include/kernel/Memory/MemoryBackedRegion.h +++ b/kernel/include/kernel/Memory/MemoryBackedRegion.h @@ -23,7 +23,7 @@ namespace Kernel BAN::ErrorOr copy_data_to_region(size_t offset_into_region, const uint8_t* buffer, size_t buffer_size); protected: - virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr) override; + virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr, bool wants_write) override; private: MemoryBackedRegion(PageTable&, size_t size, Type, PageTable::flags_t); diff --git a/kernel/include/kernel/Memory/MemoryRegion.h b/kernel/include/kernel/Memory/MemoryRegion.h index f6a7142cbc..42e90c14cf 100644 --- a/kernel/include/kernel/Memory/MemoryRegion.h +++ b/kernel/include/kernel/Memory/MemoryRegion.h @@ -34,6 +34,8 @@ namespace Kernel bool contains_fully(vaddr_t address, size_t size) const; bool overlaps(vaddr_t address, size_t size) const; + bool writable() const { return m_flags & PageTable::Flags::ReadWrite; } + size_t size() const { return m_size; } vaddr_t vaddr() const { return m_vaddr; } @@ -45,7 +47,7 @@ namespace Kernel // Returns error if no memory was available // Returns true if page was succesfully allocated // Returns false if page was already allocated - BAN::ErrorOr allocate_page_containing(vaddr_t address); + BAN::ErrorOr allocate_page_containing(vaddr_t address, bool wants_write); virtual BAN::ErrorOr> clone(PageTable& new_page_table) = 0; @@ -53,7 +55,7 @@ namespace Kernel MemoryRegion(PageTable&, size_t size, Type type, PageTable::flags_t flags); BAN::ErrorOr initialize(AddressRange); - virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t address) = 0; + virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t address, bool wants_write) = 0; protected: PageTable& m_page_table; diff --git a/kernel/include/kernel/Memory/SharedMemoryObject.h b/kernel/include/kernel/Memory/SharedMemoryObject.h index 94f82b545d..b30e5f4a62 100644 --- a/kernel/include/kernel/Memory/SharedMemoryObject.h +++ b/kernel/include/kernel/Memory/SharedMemoryObject.h @@ -59,7 +59,7 @@ namespace Kernel virtual BAN::ErrorOr msync(vaddr_t, size_t, int) override { return {}; } protected: - virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr) override; + virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr, bool wants_write) override; private: SharedMemoryObject(BAN::RefPtr object, PageTable& page_table) diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index 53417976c0..6c9b474962 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -210,7 +210,7 @@ namespace Kernel // Returns error if page could not be allocated // Returns true if the page was allocated successfully // Return false if access was page violation (segfault) - BAN::ErrorOr allocate_page_for_demand_paging(vaddr_t addr); + BAN::ErrorOr allocate_page_for_demand_paging(vaddr_t addr, bool wants_write); BAN::ErrorOr absolute_path_of(BAN::StringView) const; @@ -225,8 +225,8 @@ namespace Kernel static BAN::ErrorOr> load_elf_for_exec(const Credentials&, BAN::StringView file_path, const BAN::String& cwd, Kernel::PageTable&); BAN::ErrorOr validate_string_access(const char*); - BAN::ErrorOr validate_pointer_access_check(const void*, size_t); - BAN::ErrorOr validate_pointer_access(const void*, size_t); + BAN::ErrorOr validate_pointer_access_check(const void*, size_t, bool needs_write); + BAN::ErrorOr validate_pointer_access(const void*, size_t, bool needs_write); uint64_t signal_pending_mask() const { diff --git a/kernel/kernel/Device/FramebufferDevice.cpp b/kernel/kernel/Device/FramebufferDevice.cpp index 5a2e27b2ab..80397c41c5 100644 --- a/kernel/kernel/Device/FramebufferDevice.cpp +++ b/kernel/kernel/Device/FramebufferDevice.cpp @@ -280,8 +280,10 @@ namespace Kernel // Returns error if no memory was available // Returns true if page was succesfully allocated // Returns false if page was already allocated - virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr) override + virtual BAN::ErrorOr allocate_page_containing_impl(vaddr_t vaddr, bool wants_write) override { + (void)wants_write; + vaddr &= PAGE_ADDR_MASK; if (m_page_table.physical_address_of(vaddr)) return false; diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index 2211c1b449..df5214fa43 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -200,13 +200,13 @@ namespace Kernel goto done; } - // Try demand paging on non present pages - PageFaultError page_fault_error; - page_fault_error.raw = error; - if (pid && !page_fault_error.present) + if (pid) { + PageFaultError page_fault_error; + page_fault_error.raw = error; + Processor::set_interrupt_state(InterruptState::Enabled); - auto result = Process::current().allocate_page_for_demand_paging(regs->cr2); + auto result = Process::current().allocate_page_for_demand_paging(regs->cr2, page_fault_error.write); Processor::set_interrupt_state(InterruptState::Disabled); if (!result.is_error() && result.value()) diff --git a/kernel/kernel/Memory/FileBackedRegion.cpp b/kernel/kernel/Memory/FileBackedRegion.cpp index 2871e449b4..6609e72b64 100644 --- a/kernel/kernel/Memory/FileBackedRegion.cpp +++ b/kernel/kernel/Memory/FileBackedRegion.cpp @@ -13,20 +13,8 @@ namespace Kernel if (offset < 0 || offset % PAGE_SIZE || size == 0) return BAN::Error::from_errno(EINVAL); - switch (type) - { - case Type::PRIVATE: - if (offset >= inode->size()) - return BAN::Error::from_errno(EOVERFLOW); - break; - case Type::SHARED: - if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size)) - return BAN::Error::from_errno(EOVERFLOW); - break; - default: - ASSERT_NOT_REACHED(); - break; - } + if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size)) + return BAN::Error::from_errno(EOVERFLOW); auto* region_ptr = new FileBackedRegion(inode, page_table, offset, size, type, flags); if (region_ptr == nullptr) @@ -35,19 +23,19 @@ namespace Kernel TRY(region->initialize(address_range)); - if (type == Type::SHARED) + if (type == Type::PRIVATE && (flags & PageTable::Flags::ReadWrite)) + TRY(region->m_dirty_pages.resize(BAN::Math::div_round_up(size, PAGE_SIZE))); + + LockGuard _(inode->m_mutex); + if (inode->m_shared_region.valid()) + region->m_shared_data = inode->m_shared_region.lock(); + else { - LockGuard _(inode->m_mutex); - if (inode->m_shared_region.valid()) - region->m_shared_data = inode->m_shared_region.lock(); - else - { - auto shared_data = TRY(BAN::RefPtr::create()); - TRY(shared_data->pages.resize(BAN::Math::div_round_up(inode->size(), PAGE_SIZE))); - shared_data->inode = inode; - inode->m_shared_region = TRY(shared_data->get_weak_ptr()); - region->m_shared_data = BAN::move(shared_data); - } + auto shared_data = TRY(BAN::RefPtr::create()); + TRY(shared_data->pages.resize(BAN::Math::div_round_up(inode->size(), PAGE_SIZE))); + shared_data->inode = inode; + inode->m_shared_region = TRY(shared_data->get_weak_ptr()); + region->m_shared_data = BAN::move(shared_data); } return region; @@ -64,32 +52,27 @@ namespace Kernel { if (m_vaddr == 0) return; - - if (m_type == Type::SHARED) - return; - - size_t needed_pages = BAN::Math::div_round_up(m_size, PAGE_SIZE); - for (size_t i = 0; i < needed_pages; i++) - { - paddr_t paddr = m_page_table.physical_address_of(m_vaddr + i * PAGE_SIZE); - if (paddr != 0) - Heap::get().release_page(paddr); - } + for (paddr_t dirty_page : m_dirty_pages) + if (dirty_page) + Heap::get().release_page(dirty_page); } SharedFileData::~SharedFileData() { + // no-one should be referencing this anymore + [[maybe_unused]] bool success = mutex.try_lock(); + ASSERT(success); + for (size_t i = 0; i < pages.size(); i++) - { - if (pages[i] == 0) - continue; - sync(i); - } + if (pages[i]) + sync(i); + + mutex.unlock(); } void SharedFileData::sync(size_t page_index) { - // FIXME: should this be locked? + ASSERT(mutex.is_locked()); if (pages[page_index] == 0) return; @@ -105,13 +88,14 @@ namespace Kernel BAN::ErrorOr FileBackedRegion::msync(vaddr_t address, size_t size, int flags) { if (flags != MS_SYNC) - return BAN::Error::from_errno(ENOTSUP); + dprintln("async file backed mmap msync"); if (m_type != Type::SHARED) return {}; - vaddr_t first_page = address & PAGE_ADDR_MASK; - vaddr_t last_page = BAN::Math::div_round_up(address + size, PAGE_SIZE) * PAGE_SIZE; + const vaddr_t first_page = address & PAGE_ADDR_MASK; + const vaddr_t last_page = BAN::Math::div_round_up(address + size, PAGE_SIZE) * PAGE_SIZE; + LockGuard _(m_shared_data->mutex); for (vaddr_t page_addr = first_page; page_addr < last_page; page_addr += PAGE_SIZE) if (contains(page_addr)) m_shared_data->sync((page_addr - m_vaddr) / PAGE_SIZE); @@ -119,89 +103,96 @@ namespace Kernel return {}; } - BAN::ErrorOr FileBackedRegion::allocate_page_containing_impl(vaddr_t address) + BAN::ErrorOr FileBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write) { ASSERT(contains(address)); + ASSERT(m_type == Type::SHARED || m_type == Type::PRIVATE); + ASSERT(!wants_write || writable()); - // Check if address is already mapped const vaddr_t vaddr = address & PAGE_ADDR_MASK; - if (m_page_table.physical_address_of(vaddr) != 0) - return false; - if (m_type == Type::PRIVATE) + const size_t local_page_index = (vaddr - m_vaddr) / PAGE_SIZE; + const size_t shared_page_index = local_page_index + m_offset / PAGE_SIZE; + + if (m_page_table.physical_address_of(vaddr) == 0) { - // Map new physcial page to address - paddr_t paddr = Heap::get().take_free_page(); - if (paddr == 0) - return BAN::Error::from_errno(ENOMEM); + ASSERT(m_shared_data); + LockGuard _(m_shared_data->mutex); - // Temporarily force mapping to be writable so kernel can write to it - m_page_table.map_page_at(paddr, vaddr, m_flags | PageTable::Flags::ReadWrite); - - ASSERT(&PageTable::current() == &m_page_table); - memset(reinterpret_cast(vaddr), 0x00, PAGE_SIZE); - - const size_t file_offset = m_offset + (vaddr - m_vaddr); - - if (file_offset < static_cast(m_inode->size())) + bool shared_data_has_correct_page = false; + if (m_shared_data->pages[shared_page_index] == 0) { - const size_t bytes = BAN::Math::min(BAN::Math::min(m_offset + m_size, m_inode->size()) - file_offset, PAGE_SIZE); - auto read_ret = m_inode->read(file_offset, BAN::ByteSpan((uint8_t*)vaddr, bytes)); - - if (read_ret.is_error()) - { - Heap::get().release_page(paddr); - m_page_table.unmap_page(vaddr); - return read_ret.release_error(); - } - - if (read_ret.value() < bytes) - { - dwarnln("Only {}/{} bytes read", read_ret.value(), bytes); - Heap::get().release_page(paddr); - m_page_table.unmap_page(vaddr); - return BAN::Error::from_errno(EIO); - } - } - - // Disable writable if not wanted - if (!(m_flags & PageTable::Flags::ReadWrite)) - m_page_table.map_page_at(paddr, vaddr, m_flags); - } - else if (m_type == Type::SHARED) - { - LockGuard _(m_inode->m_mutex); - ASSERT(m_inode->m_shared_region.valid()); - ASSERT(m_shared_data->pages.size() == BAN::Math::div_round_up(m_inode->size(), PAGE_SIZE)); - - auto& pages = m_shared_data->pages; - size_t page_index = (vaddr - m_vaddr) / PAGE_SIZE; - - if (pages[page_index] == 0) - { - pages[page_index] = Heap::get().take_free_page(); - if (pages[page_index] == 0) + m_shared_data->pages[shared_page_index] = Heap::get().take_free_page(); + if (m_shared_data->pages[shared_page_index] == 0) return BAN::Error::from_errno(ENOMEM); - size_t offset = vaddr - m_vaddr; - size_t bytes = BAN::Math::min(m_size - offset, PAGE_SIZE); + const size_t offset = (vaddr - m_vaddr) + m_offset; + ASSERT(offset % 4096 == 0); + const size_t bytes = BAN::Math::min(m_inode->size() - offset, PAGE_SIZE); + + memset(m_shared_data->page_buffer, 0x00, PAGE_SIZE); TRY(m_inode->read(offset, BAN::ByteSpan(m_shared_data->page_buffer, bytes))); + shared_data_has_correct_page = true; - PageTable::with_fast_page(pages[page_index], [&] { - memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, bytes); - memset(PageTable::fast_page_as_ptr(bytes), 0x00, PAGE_SIZE - bytes); + PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] { + memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE); }); } - paddr_t paddr = pages[page_index]; - ASSERT(paddr); - - m_page_table.map_page_at(paddr, vaddr, m_flags); + if (m_type == Type::PRIVATE && wants_write) + { + const paddr_t paddr = Heap::get().take_free_page(); + if (paddr == 0) + return BAN::Error::from_errno(ENOMEM); + if (!shared_data_has_correct_page) + { + PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] { + memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE); + }); + } + PageTable::with_fast_page(paddr, [&] { + memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE); + }); + m_dirty_pages[local_page_index] = paddr; + m_page_table.map_page_at(paddr, vaddr, m_flags); + } + else + { + const paddr_t paddr = m_shared_data->pages[shared_page_index]; + auto flags = m_flags; + if (m_type == Type::PRIVATE) + flags &= ~PageTable::Flags::ReadWrite; + m_page_table.map_page_at(paddr, vaddr, flags); + } } else { - ASSERT_NOT_REACHED(); + // page does not need remappings + if (m_type != Type::PRIVATE || !wants_write) + return false; + ASSERT(writable()); + + // page is already mapped as writable + if (m_page_table.get_page_flags(vaddr) & PageTable::Flags::ReadWrite) + return false; + + const paddr_t paddr = Heap::get().take_free_page(); + if (paddr == 0) + return BAN::Error::from_errno(ENOMEM); + + ASSERT(m_shared_data); + LockGuard _(m_shared_data->mutex); + ASSERT(m_shared_data->pages[shared_page_index]); + + PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] { + memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE); + }); + PageTable::with_fast_page(paddr, [&] { + memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE); + }); + m_dirty_pages[local_page_index] = paddr; + m_page_table.map_page_at(paddr, vaddr, m_flags); } return true; @@ -212,31 +203,26 @@ namespace Kernel const size_t aligned_size = (m_size + PAGE_SIZE - 1) & PAGE_ADDR_MASK; auto result = TRY(FileBackedRegion::create(m_inode, page_table, m_offset, m_size, { .start = m_vaddr, .end = m_vaddr + aligned_size }, m_type, m_flags)); - // shared regions can just go through demand paging - if (m_type == Type::SHARED) - return BAN::UniqPtr(BAN::move(result)); + // non-dirty pages can go through demand paging - ASSERT(m_type == Type::PRIVATE); - - for (size_t offset = 0; offset < m_size; offset += PAGE_SIZE) + for (size_t i = 0; i < m_dirty_pages.size(); i++) { - const vaddr_t vaddr = m_vaddr + offset; - if (m_page_table.physical_address_of(vaddr) == 0) + if (m_dirty_pages[i] == 0) continue; - ASSERT(&PageTable::current() == &m_page_table); + const vaddr_t vaddr = m_vaddr + i * PAGE_SIZE; const paddr_t paddr = Heap::get().take_free_page(); if (paddr == 0) return BAN::Error::from_errno(ENOMEM); - page_table.map_page_at(paddr, vaddr, m_flags); - - const size_t to_copy = BAN::Math::min(PAGE_SIZE, m_size - offset); + ASSERT(&m_page_table == &PageTable::current() || &m_page_table == &PageTable::kernel()); PageTable::with_fast_page(paddr, [&] { - memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast(vaddr), to_copy); - memset(PageTable::fast_page_as_ptr(to_copy), 0, PAGE_SIZE - to_copy); + memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast(vaddr), PAGE_SIZE); }); + + result->m_page_table.map_page_at(paddr, vaddr, m_flags); + result->m_dirty_pages[i] = paddr; } return BAN::UniqPtr(BAN::move(result)); diff --git a/kernel/kernel/Memory/MemoryBackedRegion.cpp b/kernel/kernel/Memory/MemoryBackedRegion.cpp index 59ca2a4eed..ede8fa5a60 100644 --- a/kernel/kernel/Memory/MemoryBackedRegion.cpp +++ b/kernel/kernel/Memory/MemoryBackedRegion.cpp @@ -38,11 +38,12 @@ namespace Kernel } } - BAN::ErrorOr MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address) + BAN::ErrorOr MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write) { ASSERT(m_type == Type::PRIVATE); ASSERT(contains(address)); + (void)wants_write; // Check if address is already mapped vaddr_t vaddr = address & PAGE_ADDR_MASK; @@ -93,7 +94,7 @@ namespace Kernel vaddr_t page_offset = write_vaddr % PAGE_SIZE; size_t bytes = BAN::Math::min(buffer_size - written, PAGE_SIZE - page_offset); - TRY(allocate_page_containing(write_vaddr)); + TRY(allocate_page_containing(write_vaddr, true)); PageTable::with_fast_page(m_page_table.physical_address_of(write_vaddr & PAGE_ADDR_MASK), [&] { memcpy(PageTable::fast_page_as_ptr(page_offset), (void*)(buffer + written), bytes); diff --git a/kernel/kernel/Memory/MemoryRegion.cpp b/kernel/kernel/Memory/MemoryRegion.cpp index b95dc2f84d..1dfb2b4d76 100644 --- a/kernel/kernel/Memory/MemoryRegion.cpp +++ b/kernel/kernel/Memory/MemoryRegion.cpp @@ -47,9 +47,12 @@ namespace Kernel return true; } - BAN::ErrorOr MemoryRegion::allocate_page_containing(vaddr_t address) + BAN::ErrorOr MemoryRegion::allocate_page_containing(vaddr_t address, bool wants_write) { - auto ret = allocate_page_containing_impl(address); + ASSERT(contains(address)); + if (wants_write && !writable()) + return false; + auto ret = allocate_page_containing_impl(address, wants_write); if (!ret.is_error() && ret.value()) m_physical_page_count++; return ret; diff --git a/kernel/kernel/Memory/SharedMemoryObject.cpp b/kernel/kernel/Memory/SharedMemoryObject.cpp index e6bb7285a6..76480bea35 100644 --- a/kernel/kernel/Memory/SharedMemoryObject.cpp +++ b/kernel/kernel/Memory/SharedMemoryObject.cpp @@ -87,9 +87,10 @@ namespace Kernel return BAN::UniqPtr(BAN::move(region)); } - BAN::ErrorOr SharedMemoryObject::allocate_page_containing_impl(vaddr_t address) + BAN::ErrorOr SharedMemoryObject::allocate_page_containing_impl(vaddr_t address, bool wants_write) { ASSERT(contains(address)); + (void)wants_write; // Check if address is already mapped vaddr_t vaddr = address & PAGE_ADDR_MASK; diff --git a/kernel/kernel/PCI.cpp b/kernel/kernel/PCI.cpp index 828a03b911..06e19b828e 100644 --- a/kernel/kernel/PCI.cpp +++ b/kernel/kernel/PCI.cpp @@ -655,7 +655,11 @@ namespace Kernel::PCI if (m_vaddr == 0) return BAN::Error::from_errno(ENOMEM); - PageTable::kernel().map_range_at(m_paddr, m_vaddr, m_size, PageTable::Flags::ReadWrite | PageTable::Flags::Present); + PageTable::kernel().map_range_at( + m_paddr, m_vaddr, m_size, + PageTable::Flags::ReadWrite | PageTable::Flags::Present, + PageTable::MemoryType::Uncached + ); return {}; } diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index 56027d5f88..c6f3e25459 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -362,7 +362,7 @@ namespace Kernel { LockGuard _(m_process_lock); - TRY(validate_pointer_access(termios, sizeof(termios))); + TRY(validate_pointer_access(termios, sizeof(termios), true)); auto inode = TRY(m_open_file_descriptors.inode_of(fildes)); if (!inode->is_tty()) @@ -381,7 +381,7 @@ namespace Kernel LockGuard _(m_process_lock); - TRY(validate_pointer_access(termios, sizeof(termios))); + TRY(validate_pointer_access(termios, sizeof(termios), false)); auto inode = TRY(m_open_file_descriptors.inode_of(fildes)); if (!inode->is_tty()) @@ -484,7 +484,7 @@ namespace Kernel BAN::Vector str_argv; for (int i = 0; argv && argv[i]; i++) { - TRY(validate_pointer_access(argv + i, sizeof(char*))); + TRY(validate_pointer_access(argv + i, sizeof(char*), false)); TRY(validate_string_access(argv[i])); TRY(str_argv.emplace_back(argv[i])); } @@ -492,7 +492,7 @@ namespace Kernel BAN::Vector str_envp; for (int i = 0; envp && envp[i]; i++) { - TRY(validate_pointer_access(envp + 1, sizeof(char*))); + TRY(validate_pointer_access(envp + 1, sizeof(char*), false)); TRY(validate_string_access(envp[i])); TRY(str_envp.emplace_back(envp[i])); } @@ -640,7 +640,7 @@ namespace Kernel if (stat_loc) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(stat_loc, sizeof(stat_loc))); + TRY(validate_pointer_access(stat_loc, sizeof(stat_loc), true)); *stat_loc = exit_code; } remove_pending_signal(SIGCHLD); @@ -676,9 +676,9 @@ namespace Kernel { { LockGuard _(m_process_lock); - TRY(validate_pointer_access(rqtp, sizeof(timespec))); + TRY(validate_pointer_access(rqtp, sizeof(timespec), false)); if (rmtp) - TRY(validate_pointer_access(rmtp, sizeof(timespec))); + TRY(validate_pointer_access(rmtp, sizeof(timespec), true)); } const uint64_t sleep_ns = (rqtp->tv_sec * 1'000'000'000) + rqtp->tv_nsec; @@ -718,9 +718,9 @@ namespace Kernel LockGuard _(m_process_lock); if (value) - TRY(validate_pointer_access(value, sizeof(itimerval))); + TRY(validate_pointer_access(value, sizeof(itimerval), false)); if (ovalue) - TRY(validate_pointer_access(ovalue, sizeof(itimerval))); + TRY(validate_pointer_access(ovalue, sizeof(itimerval), true)); { SpinLockGuard _(s_process_lock); @@ -846,7 +846,7 @@ namespace Kernel return {}; } - BAN::ErrorOr Process::allocate_page_for_demand_paging(vaddr_t address) + BAN::ErrorOr Process::allocate_page_for_demand_paging(vaddr_t address, bool wants_write) { ASSERT(&Process::current() == this); @@ -862,7 +862,7 @@ namespace Kernel { if (!region->contains(address)) continue; - TRY(region->allocate_page_containing(address)); + TRY(region->allocate_page_containing(address, wants_write)); return true; } @@ -971,14 +971,14 @@ namespace Kernel BAN::ErrorOr Process::sys_read(int fd, void* buffer, size_t count) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buffer, count)); + TRY(validate_pointer_access(buffer, count, true)); return TRY(m_open_file_descriptors.read(fd, BAN::ByteSpan((uint8_t*)buffer, count))); } BAN::ErrorOr Process::sys_write(int fd, const void* buffer, size_t count) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buffer, count)); + TRY(validate_pointer_access(buffer, count, false)); return TRY(m_open_file_descriptors.write(fd, BAN::ByteSpan((uint8_t*)buffer, count))); } @@ -1064,7 +1064,7 @@ namespace Kernel { LockGuard _(m_process_lock); TRY(validate_string_access(path)); - TRY(validate_pointer_access(buffer, bufsize)); + TRY(validate_pointer_access(buffer, bufsize, true)); auto absolute_path = TRY(absolute_path_of(path)); @@ -1075,7 +1075,7 @@ namespace Kernel { LockGuard _(m_process_lock); TRY(validate_string_access(path)); - TRY(validate_pointer_access(buffer, bufsize)); + TRY(validate_pointer_access(buffer, bufsize, true)); // FIXME: handle O_SEARCH in fd auto parent_path = TRY(m_open_file_descriptors.path_of(fd)); @@ -1091,7 +1091,7 @@ namespace Kernel BAN::ErrorOr Process::sys_pread(int fd, void* buffer, size_t count, off_t offset) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buffer, count)); + TRY(validate_pointer_access(buffer, count, true)); auto inode = TRY(m_open_file_descriptors.inode_of(fd)); return TRY(inode->read(offset, { (uint8_t*)buffer, count })); } @@ -1144,8 +1144,8 @@ namespace Kernel BAN::ErrorOr Process::sys_getsockname(int socket, sockaddr* address, socklen_t* address_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(address_len, sizeof(address_len))); - TRY(validate_pointer_access(address, *address_len)); + TRY(validate_pointer_access(address_len, sizeof(address_len), true)); + TRY(validate_pointer_access(address, *address_len, true)); auto inode = TRY(m_open_file_descriptors.inode_of(socket)); if (!inode->mode().ifsock()) @@ -1158,8 +1158,8 @@ namespace Kernel BAN::ErrorOr Process::sys_getsockopt(int socket, int level, int option_name, void* option_value, socklen_t* option_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(option_len, sizeof(option_len))); - TRY(validate_pointer_access(option_value, *option_len)); + TRY(validate_pointer_access(option_len, sizeof(option_len), true)); + TRY(validate_pointer_access(option_value, *option_len, true)); auto inode = TRY(m_open_file_descriptors.inode_of(socket)); if (!inode->mode().ifsock()) @@ -1180,7 +1180,7 @@ namespace Kernel BAN::ErrorOr Process::sys_setsockopt(int socket, int level, int option_name, const void* option_value, socklen_t option_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(option_value, option_len)); + TRY(validate_pointer_access(option_value, option_len, false)); auto inode = TRY(m_open_file_descriptors.inode_of(socket)); if (!inode->mode().ifsock()) @@ -1202,8 +1202,8 @@ namespace Kernel LockGuard _(m_process_lock); if (address) { - TRY(validate_pointer_access(address_len, sizeof(*address_len))); - TRY(validate_pointer_access(address, *address_len)); + TRY(validate_pointer_access(address_len, sizeof(*address_len), true)); + TRY(validate_pointer_access(address, *address_len, true)); } auto inode = TRY(m_open_file_descriptors.inode_of(socket)); @@ -1216,7 +1216,7 @@ namespace Kernel BAN::ErrorOr Process::sys_bind(int socket, const sockaddr* address, socklen_t address_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(address, address_len)); + TRY(validate_pointer_access(address, address_len, false)); auto inode = TRY(m_open_file_descriptors.inode_of(socket)); if (!inode->mode().ifsock()) @@ -1229,7 +1229,7 @@ namespace Kernel BAN::ErrorOr Process::sys_connect(int socket, const sockaddr* address, socklen_t address_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(address, address_len)); + TRY(validate_pointer_access(address, address_len, false)); auto inode = TRY(m_open_file_descriptors.inode_of(socket)); if (!inode->mode().ifsock()) @@ -1254,9 +1254,9 @@ namespace Kernel BAN::ErrorOr Process::sys_sendto(const sys_sendto_t* arguments) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(arguments, sizeof(sys_sendto_t))); - TRY(validate_pointer_access(arguments->message, arguments->length)); - TRY(validate_pointer_access(arguments->dest_addr, arguments->dest_len)); + TRY(validate_pointer_access(arguments, sizeof(sys_sendto_t), false)); + TRY(validate_pointer_access(arguments->message, arguments->length, false)); + TRY(validate_pointer_access(arguments->dest_addr, arguments->dest_len, false)); auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket)); if (!inode->mode().ifsock()) @@ -1278,12 +1278,12 @@ namespace Kernel return BAN::Error::from_errno(EINVAL); LockGuard _(m_process_lock); - TRY(validate_pointer_access(arguments, sizeof(sys_recvfrom_t))); - TRY(validate_pointer_access(arguments->buffer, arguments->length)); + TRY(validate_pointer_access(arguments, sizeof(sys_recvfrom_t), false)); + TRY(validate_pointer_access(arguments->buffer, arguments->length, true)); if (arguments->address) { - TRY(validate_pointer_access(arguments->address_len, sizeof(*arguments->address_len))); - TRY(validate_pointer_access(arguments->address, *arguments->address_len)); + TRY(validate_pointer_access(arguments->address_len, sizeof(*arguments->address_len), true)); + TRY(validate_pointer_access(arguments->address, *arguments->address_len, true)); } auto inode = TRY(m_open_file_descriptors.inode_of(arguments->socket)); @@ -1309,17 +1309,17 @@ namespace Kernel { LockGuard _(m_process_lock); - TRY(validate_pointer_access(arguments, sizeof(sys_pselect_t))); + TRY(validate_pointer_access(arguments, sizeof(sys_pselect_t), false)); if (arguments->readfds) - TRY(validate_pointer_access(arguments->readfds, sizeof(fd_set))); + TRY(validate_pointer_access(arguments->readfds, sizeof(fd_set), true)); if (arguments->writefds) - TRY(validate_pointer_access(arguments->writefds, sizeof(fd_set))); + TRY(validate_pointer_access(arguments->writefds, sizeof(fd_set), true)); if (arguments->errorfds) - TRY(validate_pointer_access(arguments->errorfds, sizeof(fd_set))); + TRY(validate_pointer_access(arguments->errorfds, sizeof(fd_set), true)); if (arguments->timeout) - TRY(validate_pointer_access(arguments->timeout, sizeof(timespec))); + TRY(validate_pointer_access(arguments->timeout, sizeof(timespec), false)); if (arguments->sigmask) - TRY(validate_pointer_access(arguments->sigmask, sizeof(sigset_t))); + TRY(validate_pointer_access(arguments->sigmask, sizeof(sigset_t), false)); if (arguments->sigmask) return BAN::Error::from_errno(ENOTSUP); @@ -1399,7 +1399,7 @@ namespace Kernel BAN::ErrorOr Process::sys_pipe(int fildes[2]) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(fildes, sizeof(int) * 2)); + TRY(validate_pointer_access(fildes, sizeof(int) * 2, true)); TRY(m_open_file_descriptors.pipe(fildes)); return 0; } @@ -1456,7 +1456,7 @@ namespace Kernel BAN::ErrorOr Process::sys_fstat(int fd, struct stat* buf) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buf, sizeof(struct stat))); + TRY(validate_pointer_access(buf, sizeof(struct stat), true)); TRY(m_open_file_descriptors.fstat(fd, buf)); return 0; } @@ -1464,7 +1464,7 @@ namespace Kernel BAN::ErrorOr Process::sys_fstatat(int fd, const char* path, struct stat* buf, int flag) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buf, sizeof(struct stat))); + TRY(validate_pointer_access(buf, sizeof(struct stat), true)); TRY(m_open_file_descriptors.fstatat(fd, path, buf, flag)); return 0; } @@ -1472,7 +1472,7 @@ namespace Kernel BAN::ErrorOr Process::sys_stat(const char* path, struct stat* buf, int flag) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buf, sizeof(struct stat))); + TRY(validate_pointer_access(buf, sizeof(struct stat), true)); TRY(m_open_file_descriptors.stat(TRY(absolute_path_of(path)), buf, flag)); return 0; } @@ -1481,7 +1481,7 @@ namespace Kernel { LockGuard _(m_process_lock); TRY(validate_string_access(path)); - TRY(validate_pointer_access(buffer, PATH_MAX)); + TRY(validate_pointer_access(buffer, PATH_MAX, true)); auto absolute_path = TRY(absolute_path_of(path)); @@ -1534,7 +1534,7 @@ namespace Kernel BAN::ErrorOr Process::sys_readdir(int fd, struct dirent* list, size_t list_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(list, sizeof(dirent) * list_len)); + TRY(validate_pointer_access(list, sizeof(dirent) * list_len, true)); return TRY(m_open_file_descriptors.read_dir_entries(fd, list, list_len)); } @@ -1562,7 +1562,7 @@ namespace Kernel { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buffer, size)); + TRY(validate_pointer_access(buffer, size, true)); if (size < m_working_directory.size() + 1) return BAN::Error::from_errno(ERANGE); @@ -1577,7 +1577,7 @@ namespace Kernel { { LockGuard _(m_process_lock); - TRY(validate_pointer_access(args, sizeof(sys_mmap_t))); + TRY(validate_pointer_access(args, sizeof(sys_mmap_t), true)); } if (args->prot != PROT_NONE && (args->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))) @@ -1745,7 +1745,7 @@ namespace Kernel BAN::ErrorOr Process::sys_ttyname(int fildes, char* storage) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(storage, TTY_NAME_MAX)); + TRY(validate_pointer_access(storage, TTY_NAME_MAX, true)); auto inode = TRY(m_open_file_descriptors.inode_of(fildes)); if (!inode->is_tty()) return BAN::Error::from_errno(ENOTTY); @@ -1794,7 +1794,7 @@ namespace Kernel BAN::ErrorOr Process::sys_ptsname(int fildes, char* buffer, size_t buffer_len) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(buffer, buffer_len)); + TRY(validate_pointer_access(buffer, buffer_len, true)); auto inode = TRY(m_open_file_descriptors.inode_of(fildes)); if (TRY(m_open_file_descriptors.path_of(fildes)) != ""_sv) @@ -1846,7 +1846,7 @@ namespace Kernel { { LockGuard _(m_process_lock); - TRY(validate_pointer_access(tp, sizeof(timespec))); + TRY(validate_pointer_access(tp, sizeof(timespec), true)); } switch (clock_id) @@ -1925,9 +1925,9 @@ namespace Kernel LockGuard _(m_process_lock); if (act) - TRY(validate_pointer_access(act, sizeof(struct sigaction))); + TRY(validate_pointer_access(act, sizeof(struct sigaction), false)); if (oact) - TRY(validate_pointer_access(oact, sizeof(struct sigaction))); + TRY(validate_pointer_access(oact, sizeof(struct sigaction), true)); SpinLockGuard signal_lock_guard(m_signal_lock); @@ -1947,7 +1947,7 @@ namespace Kernel BAN::ErrorOr Process::sys_sigpending(sigset_t* set) { LockGuard _(m_process_lock); - TRY(validate_pointer_access(set, sizeof(sigset_t))); + TRY(validate_pointer_access(set, sizeof(sigset_t), true)); *set = (signal_pending_mask() | Thread::current().m_signal_pending_mask) & Thread::current().m_signal_block_mask; return 0; } @@ -1956,9 +1956,9 @@ namespace Kernel { LockGuard _(m_process_lock); if (set) - TRY(validate_pointer_access(set, sizeof(sigset_t))); + TRY(validate_pointer_access(set, sizeof(sigset_t), false)); if (oset) - TRY(validate_pointer_access(oset, sizeof(sigset_t))); + TRY(validate_pointer_access(oset, sizeof(sigset_t), true)); if (oset) *oset = Thread::current().m_signal_block_mask; @@ -2342,10 +2342,10 @@ namespace Kernel { // NOTE: we will page fault here, if str is not actually mapped // outcome is still the same; SIGSEGV - return validate_pointer_access(str, strlen(str) + 1); + return validate_pointer_access(str, strlen(str) + 1, false); } - BAN::ErrorOr Process::validate_pointer_access_check(const void* ptr, size_t size) + BAN::ErrorOr Process::validate_pointer_access_check(const void* ptr, size_t size, bool needs_write) { ASSERT(&Process::current() == this); auto& thread = Thread::current(); @@ -2368,10 +2368,15 @@ namespace Kernel // FIXME: should we allow cross mapping access? for (auto& mapped_region : m_mapped_regions) - if (mapped_region->contains_fully(vaddr, size)) - return {}; + { + if (!mapped_region->contains_fully(vaddr, size)) + continue; + if (needs_write && !mapped_region->writable()) + goto unauthorized_access; + return {}; + } - // FIXME: elf should contain full range [vaddr, vaddr + size) + // FIXME: elf should use MemoryRegions instead of mapping executables itself if (m_loadable_elf->contains(vaddr)) return {}; @@ -2382,11 +2387,11 @@ unauthorized_access: return BAN::Error::from_errno(EINTR); } - BAN::ErrorOr Process::validate_pointer_access(const void* ptr, size_t size) + BAN::ErrorOr Process::validate_pointer_access(const void* ptr, size_t size, bool needs_write) { // TODO: This seems very slow as we loop over the range twice - TRY(validate_pointer_access_check(ptr, size)); + TRY(validate_pointer_access_check(ptr, size, needs_write)); const vaddr_t vaddr = reinterpret_cast(ptr); @@ -2399,7 +2404,7 @@ unauthorized_access: const vaddr_t current = page_start + i * PAGE_SIZE; if (page_table().get_page_flags(current) & PageTable::Flags::Present) continue; - TRY(Process::allocate_page_for_demand_paging(current)); + TRY(Process::allocate_page_for_demand_paging(current, needs_write)); } return {};