Kernel: Implement copy-on-write memory for file backed mmaps

This commit is contained in:
2024-09-11 19:32:42 +03:00
parent 4006a04817
commit c77ad5fb34
15 changed files with 217 additions and 208 deletions

View File

@@ -13,20 +13,8 @@ namespace Kernel
if (offset < 0 || offset % PAGE_SIZE || size == 0)
return BAN::Error::from_errno(EINVAL);
switch (type)
{
case Type::PRIVATE:
if (offset >= inode->size())
return BAN::Error::from_errno(EOVERFLOW);
break;
case Type::SHARED:
if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size))
return BAN::Error::from_errno(EOVERFLOW);
break;
default:
ASSERT_NOT_REACHED();
break;
}
if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size))
return BAN::Error::from_errno(EOVERFLOW);
auto* region_ptr = new FileBackedRegion(inode, page_table, offset, size, type, flags);
if (region_ptr == nullptr)
@@ -35,19 +23,19 @@ namespace Kernel
TRY(region->initialize(address_range));
if (type == Type::SHARED)
if (type == Type::PRIVATE && (flags & PageTable::Flags::ReadWrite))
TRY(region->m_dirty_pages.resize(BAN::Math::div_round_up<size_t>(size, PAGE_SIZE)));
LockGuard _(inode->m_mutex);
if (inode->m_shared_region.valid())
region->m_shared_data = inode->m_shared_region.lock();
else
{
LockGuard _(inode->m_mutex);
if (inode->m_shared_region.valid())
region->m_shared_data = inode->m_shared_region.lock();
else
{
auto shared_data = TRY(BAN::RefPtr<SharedFileData>::create());
TRY(shared_data->pages.resize(BAN::Math::div_round_up<size_t>(inode->size(), PAGE_SIZE)));
shared_data->inode = inode;
inode->m_shared_region = TRY(shared_data->get_weak_ptr());
region->m_shared_data = BAN::move(shared_data);
}
auto shared_data = TRY(BAN::RefPtr<SharedFileData>::create());
TRY(shared_data->pages.resize(BAN::Math::div_round_up<size_t>(inode->size(), PAGE_SIZE)));
shared_data->inode = inode;
inode->m_shared_region = TRY(shared_data->get_weak_ptr());
region->m_shared_data = BAN::move(shared_data);
}
return region;
@@ -64,32 +52,27 @@ namespace Kernel
{
if (m_vaddr == 0)
return;
if (m_type == Type::SHARED)
return;
size_t needed_pages = BAN::Math::div_round_up<size_t>(m_size, PAGE_SIZE);
for (size_t i = 0; i < needed_pages; i++)
{
paddr_t paddr = m_page_table.physical_address_of(m_vaddr + i * PAGE_SIZE);
if (paddr != 0)
Heap::get().release_page(paddr);
}
for (paddr_t dirty_page : m_dirty_pages)
if (dirty_page)
Heap::get().release_page(dirty_page);
}
SharedFileData::~SharedFileData()
{
// no-one should be referencing this anymore
[[maybe_unused]] bool success = mutex.try_lock();
ASSERT(success);
for (size_t i = 0; i < pages.size(); i++)
{
if (pages[i] == 0)
continue;
sync(i);
}
if (pages[i])
sync(i);
mutex.unlock();
}
void SharedFileData::sync(size_t page_index)
{
// FIXME: should this be locked?
ASSERT(mutex.is_locked());
if (pages[page_index] == 0)
return;
@@ -105,13 +88,14 @@ namespace Kernel
BAN::ErrorOr<void> FileBackedRegion::msync(vaddr_t address, size_t size, int flags)
{
if (flags != MS_SYNC)
return BAN::Error::from_errno(ENOTSUP);
dprintln("async file backed mmap msync");
if (m_type != Type::SHARED)
return {};
vaddr_t first_page = address & PAGE_ADDR_MASK;
vaddr_t last_page = BAN::Math::div_round_up<vaddr_t>(address + size, PAGE_SIZE) * PAGE_SIZE;
const vaddr_t first_page = address & PAGE_ADDR_MASK;
const vaddr_t last_page = BAN::Math::div_round_up<vaddr_t>(address + size, PAGE_SIZE) * PAGE_SIZE;
LockGuard _(m_shared_data->mutex);
for (vaddr_t page_addr = first_page; page_addr < last_page; page_addr += PAGE_SIZE)
if (contains(page_addr))
m_shared_data->sync((page_addr - m_vaddr) / PAGE_SIZE);
@@ -119,89 +103,96 @@ namespace Kernel
return {};
}
BAN::ErrorOr<bool> FileBackedRegion::allocate_page_containing_impl(vaddr_t address)
BAN::ErrorOr<bool> FileBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write)
{
ASSERT(contains(address));
ASSERT(m_type == Type::SHARED || m_type == Type::PRIVATE);
ASSERT(!wants_write || writable());
// Check if address is already mapped
const vaddr_t vaddr = address & PAGE_ADDR_MASK;
if (m_page_table.physical_address_of(vaddr) != 0)
return false;
if (m_type == Type::PRIVATE)
const size_t local_page_index = (vaddr - m_vaddr) / PAGE_SIZE;
const size_t shared_page_index = local_page_index + m_offset / PAGE_SIZE;
if (m_page_table.physical_address_of(vaddr) == 0)
{
// Map new physcial page to address
paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
ASSERT(m_shared_data);
LockGuard _(m_shared_data->mutex);
// Temporarily force mapping to be writable so kernel can write to it
m_page_table.map_page_at(paddr, vaddr, m_flags | PageTable::Flags::ReadWrite);
ASSERT(&PageTable::current() == &m_page_table);
memset(reinterpret_cast<void*>(vaddr), 0x00, PAGE_SIZE);
const size_t file_offset = m_offset + (vaddr - m_vaddr);
if (file_offset < static_cast<size_t>(m_inode->size()))
bool shared_data_has_correct_page = false;
if (m_shared_data->pages[shared_page_index] == 0)
{
const size_t bytes = BAN::Math::min<size_t>(BAN::Math::min<size_t>(m_offset + m_size, m_inode->size()) - file_offset, PAGE_SIZE);
auto read_ret = m_inode->read(file_offset, BAN::ByteSpan((uint8_t*)vaddr, bytes));
if (read_ret.is_error())
{
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return read_ret.release_error();
}
if (read_ret.value() < bytes)
{
dwarnln("Only {}/{} bytes read", read_ret.value(), bytes);
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return BAN::Error::from_errno(EIO);
}
}
// Disable writable if not wanted
if (!(m_flags & PageTable::Flags::ReadWrite))
m_page_table.map_page_at(paddr, vaddr, m_flags);
}
else if (m_type == Type::SHARED)
{
LockGuard _(m_inode->m_mutex);
ASSERT(m_inode->m_shared_region.valid());
ASSERT(m_shared_data->pages.size() == BAN::Math::div_round_up<size_t>(m_inode->size(), PAGE_SIZE));
auto& pages = m_shared_data->pages;
size_t page_index = (vaddr - m_vaddr) / PAGE_SIZE;
if (pages[page_index] == 0)
{
pages[page_index] = Heap::get().take_free_page();
if (pages[page_index] == 0)
m_shared_data->pages[shared_page_index] = Heap::get().take_free_page();
if (m_shared_data->pages[shared_page_index] == 0)
return BAN::Error::from_errno(ENOMEM);
size_t offset = vaddr - m_vaddr;
size_t bytes = BAN::Math::min<size_t>(m_size - offset, PAGE_SIZE);
const size_t offset = (vaddr - m_vaddr) + m_offset;
ASSERT(offset % 4096 == 0);
const size_t bytes = BAN::Math::min<size_t>(m_inode->size() - offset, PAGE_SIZE);
memset(m_shared_data->page_buffer, 0x00, PAGE_SIZE);
TRY(m_inode->read(offset, BAN::ByteSpan(m_shared_data->page_buffer, bytes)));
shared_data_has_correct_page = true;
PageTable::with_fast_page(pages[page_index], [&] {
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, bytes);
memset(PageTable::fast_page_as_ptr(bytes), 0x00, PAGE_SIZE - bytes);
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
});
}
paddr_t paddr = pages[page_index];
ASSERT(paddr);
m_page_table.map_page_at(paddr, vaddr, m_flags);
if (m_type == Type::PRIVATE && wants_write)
{
const paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
if (!shared_data_has_correct_page)
{
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE);
});
}
PageTable::with_fast_page(paddr, [&] {
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
});
m_dirty_pages[local_page_index] = paddr;
m_page_table.map_page_at(paddr, vaddr, m_flags);
}
else
{
const paddr_t paddr = m_shared_data->pages[shared_page_index];
auto flags = m_flags;
if (m_type == Type::PRIVATE)
flags &= ~PageTable::Flags::ReadWrite;
m_page_table.map_page_at(paddr, vaddr, flags);
}
}
else
{
ASSERT_NOT_REACHED();
// page does not need remappings
if (m_type != Type::PRIVATE || !wants_write)
return false;
ASSERT(writable());
// page is already mapped as writable
if (m_page_table.get_page_flags(vaddr) & PageTable::Flags::ReadWrite)
return false;
const paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
ASSERT(m_shared_data);
LockGuard _(m_shared_data->mutex);
ASSERT(m_shared_data->pages[shared_page_index]);
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE);
});
PageTable::with_fast_page(paddr, [&] {
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
});
m_dirty_pages[local_page_index] = paddr;
m_page_table.map_page_at(paddr, vaddr, m_flags);
}
return true;
@@ -212,31 +203,26 @@ namespace Kernel
const size_t aligned_size = (m_size + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
auto result = TRY(FileBackedRegion::create(m_inode, page_table, m_offset, m_size, { .start = m_vaddr, .end = m_vaddr + aligned_size }, m_type, m_flags));
// shared regions can just go through demand paging
if (m_type == Type::SHARED)
return BAN::UniqPtr<MemoryRegion>(BAN::move(result));
// non-dirty pages can go through demand paging
ASSERT(m_type == Type::PRIVATE);
for (size_t offset = 0; offset < m_size; offset += PAGE_SIZE)
for (size_t i = 0; i < m_dirty_pages.size(); i++)
{
const vaddr_t vaddr = m_vaddr + offset;
if (m_page_table.physical_address_of(vaddr) == 0)
if (m_dirty_pages[i] == 0)
continue;
ASSERT(&PageTable::current() == &m_page_table);
const vaddr_t vaddr = m_vaddr + i * PAGE_SIZE;
const paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
page_table.map_page_at(paddr, vaddr, m_flags);
const size_t to_copy = BAN::Math::min<size_t>(PAGE_SIZE, m_size - offset);
ASSERT(&m_page_table == &PageTable::current() || &m_page_table == &PageTable::kernel());
PageTable::with_fast_page(paddr, [&] {
memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast<void*>(vaddr), to_copy);
memset(PageTable::fast_page_as_ptr(to_copy), 0, PAGE_SIZE - to_copy);
memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast<void*>(vaddr), PAGE_SIZE);
});
result->m_page_table.map_page_at(paddr, vaddr, m_flags);
result->m_dirty_pages[i] = paddr;
}
return BAN::UniqPtr<MemoryRegion>(BAN::move(result));

View File

@@ -38,11 +38,12 @@ namespace Kernel
}
}
BAN::ErrorOr<bool> MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address)
BAN::ErrorOr<bool> MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write)
{
ASSERT(m_type == Type::PRIVATE);
ASSERT(contains(address));
(void)wants_write;
// Check if address is already mapped
vaddr_t vaddr = address & PAGE_ADDR_MASK;
@@ -93,7 +94,7 @@ namespace Kernel
vaddr_t page_offset = write_vaddr % PAGE_SIZE;
size_t bytes = BAN::Math::min<size_t>(buffer_size - written, PAGE_SIZE - page_offset);
TRY(allocate_page_containing(write_vaddr));
TRY(allocate_page_containing(write_vaddr, true));
PageTable::with_fast_page(m_page_table.physical_address_of(write_vaddr & PAGE_ADDR_MASK), [&] {
memcpy(PageTable::fast_page_as_ptr(page_offset), (void*)(buffer + written), bytes);

View File

@@ -47,9 +47,12 @@ namespace Kernel
return true;
}
BAN::ErrorOr<bool> MemoryRegion::allocate_page_containing(vaddr_t address)
BAN::ErrorOr<bool> MemoryRegion::allocate_page_containing(vaddr_t address, bool wants_write)
{
auto ret = allocate_page_containing_impl(address);
ASSERT(contains(address));
if (wants_write && !writable())
return false;
auto ret = allocate_page_containing_impl(address, wants_write);
if (!ret.is_error() && ret.value())
m_physical_page_count++;
return ret;

View File

@@ -87,9 +87,10 @@ namespace Kernel
return BAN::UniqPtr<MemoryRegion>(BAN::move(region));
}
BAN::ErrorOr<bool> SharedMemoryObject::allocate_page_containing_impl(vaddr_t address)
BAN::ErrorOr<bool> SharedMemoryObject::allocate_page_containing_impl(vaddr_t address, bool wants_write)
{
ASSERT(contains(address));
(void)wants_write;
// Check if address is already mapped
vaddr_t vaddr = address & PAGE_ADDR_MASK;