forked from Bananymous/banan-os
Kernel: Implement copy-on-write memory for file backed mmaps
This commit is contained in:
@@ -13,20 +13,8 @@ namespace Kernel
|
||||
|
||||
if (offset < 0 || offset % PAGE_SIZE || size == 0)
|
||||
return BAN::Error::from_errno(EINVAL);
|
||||
switch (type)
|
||||
{
|
||||
case Type::PRIVATE:
|
||||
if (offset >= inode->size())
|
||||
return BAN::Error::from_errno(EOVERFLOW);
|
||||
break;
|
||||
case Type::SHARED:
|
||||
if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size))
|
||||
return BAN::Error::from_errno(EOVERFLOW);
|
||||
break;
|
||||
default:
|
||||
ASSERT_NOT_REACHED();
|
||||
break;
|
||||
}
|
||||
if ((size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size))
|
||||
return BAN::Error::from_errno(EOVERFLOW);
|
||||
|
||||
auto* region_ptr = new FileBackedRegion(inode, page_table, offset, size, type, flags);
|
||||
if (region_ptr == nullptr)
|
||||
@@ -35,19 +23,19 @@ namespace Kernel
|
||||
|
||||
TRY(region->initialize(address_range));
|
||||
|
||||
if (type == Type::SHARED)
|
||||
if (type == Type::PRIVATE && (flags & PageTable::Flags::ReadWrite))
|
||||
TRY(region->m_dirty_pages.resize(BAN::Math::div_round_up<size_t>(size, PAGE_SIZE)));
|
||||
|
||||
LockGuard _(inode->m_mutex);
|
||||
if (inode->m_shared_region.valid())
|
||||
region->m_shared_data = inode->m_shared_region.lock();
|
||||
else
|
||||
{
|
||||
LockGuard _(inode->m_mutex);
|
||||
if (inode->m_shared_region.valid())
|
||||
region->m_shared_data = inode->m_shared_region.lock();
|
||||
else
|
||||
{
|
||||
auto shared_data = TRY(BAN::RefPtr<SharedFileData>::create());
|
||||
TRY(shared_data->pages.resize(BAN::Math::div_round_up<size_t>(inode->size(), PAGE_SIZE)));
|
||||
shared_data->inode = inode;
|
||||
inode->m_shared_region = TRY(shared_data->get_weak_ptr());
|
||||
region->m_shared_data = BAN::move(shared_data);
|
||||
}
|
||||
auto shared_data = TRY(BAN::RefPtr<SharedFileData>::create());
|
||||
TRY(shared_data->pages.resize(BAN::Math::div_round_up<size_t>(inode->size(), PAGE_SIZE)));
|
||||
shared_data->inode = inode;
|
||||
inode->m_shared_region = TRY(shared_data->get_weak_ptr());
|
||||
region->m_shared_data = BAN::move(shared_data);
|
||||
}
|
||||
|
||||
return region;
|
||||
@@ -64,32 +52,27 @@ namespace Kernel
|
||||
{
|
||||
if (m_vaddr == 0)
|
||||
return;
|
||||
|
||||
if (m_type == Type::SHARED)
|
||||
return;
|
||||
|
||||
size_t needed_pages = BAN::Math::div_round_up<size_t>(m_size, PAGE_SIZE);
|
||||
for (size_t i = 0; i < needed_pages; i++)
|
||||
{
|
||||
paddr_t paddr = m_page_table.physical_address_of(m_vaddr + i * PAGE_SIZE);
|
||||
if (paddr != 0)
|
||||
Heap::get().release_page(paddr);
|
||||
}
|
||||
for (paddr_t dirty_page : m_dirty_pages)
|
||||
if (dirty_page)
|
||||
Heap::get().release_page(dirty_page);
|
||||
}
|
||||
|
||||
SharedFileData::~SharedFileData()
|
||||
{
|
||||
// no-one should be referencing this anymore
|
||||
[[maybe_unused]] bool success = mutex.try_lock();
|
||||
ASSERT(success);
|
||||
|
||||
for (size_t i = 0; i < pages.size(); i++)
|
||||
{
|
||||
if (pages[i] == 0)
|
||||
continue;
|
||||
sync(i);
|
||||
}
|
||||
if (pages[i])
|
||||
sync(i);
|
||||
|
||||
mutex.unlock();
|
||||
}
|
||||
|
||||
void SharedFileData::sync(size_t page_index)
|
||||
{
|
||||
// FIXME: should this be locked?
|
||||
ASSERT(mutex.is_locked());
|
||||
|
||||
if (pages[page_index] == 0)
|
||||
return;
|
||||
@@ -105,13 +88,14 @@ namespace Kernel
|
||||
BAN::ErrorOr<void> FileBackedRegion::msync(vaddr_t address, size_t size, int flags)
|
||||
{
|
||||
if (flags != MS_SYNC)
|
||||
return BAN::Error::from_errno(ENOTSUP);
|
||||
dprintln("async file backed mmap msync");
|
||||
if (m_type != Type::SHARED)
|
||||
return {};
|
||||
|
||||
vaddr_t first_page = address & PAGE_ADDR_MASK;
|
||||
vaddr_t last_page = BAN::Math::div_round_up<vaddr_t>(address + size, PAGE_SIZE) * PAGE_SIZE;
|
||||
const vaddr_t first_page = address & PAGE_ADDR_MASK;
|
||||
const vaddr_t last_page = BAN::Math::div_round_up<vaddr_t>(address + size, PAGE_SIZE) * PAGE_SIZE;
|
||||
|
||||
LockGuard _(m_shared_data->mutex);
|
||||
for (vaddr_t page_addr = first_page; page_addr < last_page; page_addr += PAGE_SIZE)
|
||||
if (contains(page_addr))
|
||||
m_shared_data->sync((page_addr - m_vaddr) / PAGE_SIZE);
|
||||
@@ -119,89 +103,96 @@ namespace Kernel
|
||||
return {};
|
||||
}
|
||||
|
||||
BAN::ErrorOr<bool> FileBackedRegion::allocate_page_containing_impl(vaddr_t address)
|
||||
BAN::ErrorOr<bool> FileBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write)
|
||||
{
|
||||
ASSERT(contains(address));
|
||||
ASSERT(m_type == Type::SHARED || m_type == Type::PRIVATE);
|
||||
ASSERT(!wants_write || writable());
|
||||
|
||||
// Check if address is already mapped
|
||||
const vaddr_t vaddr = address & PAGE_ADDR_MASK;
|
||||
if (m_page_table.physical_address_of(vaddr) != 0)
|
||||
return false;
|
||||
|
||||
if (m_type == Type::PRIVATE)
|
||||
const size_t local_page_index = (vaddr - m_vaddr) / PAGE_SIZE;
|
||||
const size_t shared_page_index = local_page_index + m_offset / PAGE_SIZE;
|
||||
|
||||
if (m_page_table.physical_address_of(vaddr) == 0)
|
||||
{
|
||||
// Map new physcial page to address
|
||||
paddr_t paddr = Heap::get().take_free_page();
|
||||
if (paddr == 0)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
ASSERT(m_shared_data);
|
||||
LockGuard _(m_shared_data->mutex);
|
||||
|
||||
// Temporarily force mapping to be writable so kernel can write to it
|
||||
m_page_table.map_page_at(paddr, vaddr, m_flags | PageTable::Flags::ReadWrite);
|
||||
|
||||
ASSERT(&PageTable::current() == &m_page_table);
|
||||
memset(reinterpret_cast<void*>(vaddr), 0x00, PAGE_SIZE);
|
||||
|
||||
const size_t file_offset = m_offset + (vaddr - m_vaddr);
|
||||
|
||||
if (file_offset < static_cast<size_t>(m_inode->size()))
|
||||
bool shared_data_has_correct_page = false;
|
||||
if (m_shared_data->pages[shared_page_index] == 0)
|
||||
{
|
||||
const size_t bytes = BAN::Math::min<size_t>(BAN::Math::min<size_t>(m_offset + m_size, m_inode->size()) - file_offset, PAGE_SIZE);
|
||||
auto read_ret = m_inode->read(file_offset, BAN::ByteSpan((uint8_t*)vaddr, bytes));
|
||||
|
||||
if (read_ret.is_error())
|
||||
{
|
||||
Heap::get().release_page(paddr);
|
||||
m_page_table.unmap_page(vaddr);
|
||||
return read_ret.release_error();
|
||||
}
|
||||
|
||||
if (read_ret.value() < bytes)
|
||||
{
|
||||
dwarnln("Only {}/{} bytes read", read_ret.value(), bytes);
|
||||
Heap::get().release_page(paddr);
|
||||
m_page_table.unmap_page(vaddr);
|
||||
return BAN::Error::from_errno(EIO);
|
||||
}
|
||||
}
|
||||
|
||||
// Disable writable if not wanted
|
||||
if (!(m_flags & PageTable::Flags::ReadWrite))
|
||||
m_page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
}
|
||||
else if (m_type == Type::SHARED)
|
||||
{
|
||||
LockGuard _(m_inode->m_mutex);
|
||||
ASSERT(m_inode->m_shared_region.valid());
|
||||
ASSERT(m_shared_data->pages.size() == BAN::Math::div_round_up<size_t>(m_inode->size(), PAGE_SIZE));
|
||||
|
||||
auto& pages = m_shared_data->pages;
|
||||
size_t page_index = (vaddr - m_vaddr) / PAGE_SIZE;
|
||||
|
||||
if (pages[page_index] == 0)
|
||||
{
|
||||
pages[page_index] = Heap::get().take_free_page();
|
||||
if (pages[page_index] == 0)
|
||||
m_shared_data->pages[shared_page_index] = Heap::get().take_free_page();
|
||||
if (m_shared_data->pages[shared_page_index] == 0)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
|
||||
size_t offset = vaddr - m_vaddr;
|
||||
size_t bytes = BAN::Math::min<size_t>(m_size - offset, PAGE_SIZE);
|
||||
const size_t offset = (vaddr - m_vaddr) + m_offset;
|
||||
ASSERT(offset % 4096 == 0);
|
||||
|
||||
const size_t bytes = BAN::Math::min<size_t>(m_inode->size() - offset, PAGE_SIZE);
|
||||
|
||||
memset(m_shared_data->page_buffer, 0x00, PAGE_SIZE);
|
||||
TRY(m_inode->read(offset, BAN::ByteSpan(m_shared_data->page_buffer, bytes)));
|
||||
shared_data_has_correct_page = true;
|
||||
|
||||
PageTable::with_fast_page(pages[page_index], [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, bytes);
|
||||
memset(PageTable::fast_page_as_ptr(bytes), 0x00, PAGE_SIZE - bytes);
|
||||
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
|
||||
});
|
||||
}
|
||||
|
||||
paddr_t paddr = pages[page_index];
|
||||
ASSERT(paddr);
|
||||
|
||||
m_page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
if (m_type == Type::PRIVATE && wants_write)
|
||||
{
|
||||
const paddr_t paddr = Heap::get().take_free_page();
|
||||
if (paddr == 0)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
if (!shared_data_has_correct_page)
|
||||
{
|
||||
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
|
||||
memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE);
|
||||
});
|
||||
}
|
||||
PageTable::with_fast_page(paddr, [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
|
||||
});
|
||||
m_dirty_pages[local_page_index] = paddr;
|
||||
m_page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
const paddr_t paddr = m_shared_data->pages[shared_page_index];
|
||||
auto flags = m_flags;
|
||||
if (m_type == Type::PRIVATE)
|
||||
flags &= ~PageTable::Flags::ReadWrite;
|
||||
m_page_table.map_page_at(paddr, vaddr, flags);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_NOT_REACHED();
|
||||
// page does not need remappings
|
||||
if (m_type != Type::PRIVATE || !wants_write)
|
||||
return false;
|
||||
ASSERT(writable());
|
||||
|
||||
// page is already mapped as writable
|
||||
if (m_page_table.get_page_flags(vaddr) & PageTable::Flags::ReadWrite)
|
||||
return false;
|
||||
|
||||
const paddr_t paddr = Heap::get().take_free_page();
|
||||
if (paddr == 0)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
|
||||
ASSERT(m_shared_data);
|
||||
LockGuard _(m_shared_data->mutex);
|
||||
ASSERT(m_shared_data->pages[shared_page_index]);
|
||||
|
||||
PageTable::with_fast_page(m_shared_data->pages[shared_page_index], [&] {
|
||||
memcpy(m_shared_data->page_buffer, PageTable::fast_page_as_ptr(), PAGE_SIZE);
|
||||
});
|
||||
PageTable::with_fast_page(paddr, [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(), m_shared_data->page_buffer, PAGE_SIZE);
|
||||
});
|
||||
m_dirty_pages[local_page_index] = paddr;
|
||||
m_page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -212,31 +203,26 @@ namespace Kernel
|
||||
const size_t aligned_size = (m_size + PAGE_SIZE - 1) & PAGE_ADDR_MASK;
|
||||
auto result = TRY(FileBackedRegion::create(m_inode, page_table, m_offset, m_size, { .start = m_vaddr, .end = m_vaddr + aligned_size }, m_type, m_flags));
|
||||
|
||||
// shared regions can just go through demand paging
|
||||
if (m_type == Type::SHARED)
|
||||
return BAN::UniqPtr<MemoryRegion>(BAN::move(result));
|
||||
// non-dirty pages can go through demand paging
|
||||
|
||||
ASSERT(m_type == Type::PRIVATE);
|
||||
|
||||
for (size_t offset = 0; offset < m_size; offset += PAGE_SIZE)
|
||||
for (size_t i = 0; i < m_dirty_pages.size(); i++)
|
||||
{
|
||||
const vaddr_t vaddr = m_vaddr + offset;
|
||||
if (m_page_table.physical_address_of(vaddr) == 0)
|
||||
if (m_dirty_pages[i] == 0)
|
||||
continue;
|
||||
|
||||
ASSERT(&PageTable::current() == &m_page_table);
|
||||
const vaddr_t vaddr = m_vaddr + i * PAGE_SIZE;
|
||||
|
||||
const paddr_t paddr = Heap::get().take_free_page();
|
||||
if (paddr == 0)
|
||||
return BAN::Error::from_errno(ENOMEM);
|
||||
|
||||
page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
|
||||
const size_t to_copy = BAN::Math::min<size_t>(PAGE_SIZE, m_size - offset);
|
||||
ASSERT(&m_page_table == &PageTable::current() || &m_page_table == &PageTable::kernel());
|
||||
PageTable::with_fast_page(paddr, [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast<void*>(vaddr), to_copy);
|
||||
memset(PageTable::fast_page_as_ptr(to_copy), 0, PAGE_SIZE - to_copy);
|
||||
memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast<void*>(vaddr), PAGE_SIZE);
|
||||
});
|
||||
|
||||
result->m_page_table.map_page_at(paddr, vaddr, m_flags);
|
||||
result->m_dirty_pages[i] = paddr;
|
||||
}
|
||||
|
||||
return BAN::UniqPtr<MemoryRegion>(BAN::move(result));
|
||||
|
||||
@@ -38,11 +38,12 @@ namespace Kernel
|
||||
}
|
||||
}
|
||||
|
||||
BAN::ErrorOr<bool> MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address)
|
||||
BAN::ErrorOr<bool> MemoryBackedRegion::allocate_page_containing_impl(vaddr_t address, bool wants_write)
|
||||
{
|
||||
ASSERT(m_type == Type::PRIVATE);
|
||||
|
||||
ASSERT(contains(address));
|
||||
(void)wants_write;
|
||||
|
||||
// Check if address is already mapped
|
||||
vaddr_t vaddr = address & PAGE_ADDR_MASK;
|
||||
@@ -93,7 +94,7 @@ namespace Kernel
|
||||
vaddr_t page_offset = write_vaddr % PAGE_SIZE;
|
||||
size_t bytes = BAN::Math::min<size_t>(buffer_size - written, PAGE_SIZE - page_offset);
|
||||
|
||||
TRY(allocate_page_containing(write_vaddr));
|
||||
TRY(allocate_page_containing(write_vaddr, true));
|
||||
|
||||
PageTable::with_fast_page(m_page_table.physical_address_of(write_vaddr & PAGE_ADDR_MASK), [&] {
|
||||
memcpy(PageTable::fast_page_as_ptr(page_offset), (void*)(buffer + written), bytes);
|
||||
|
||||
@@ -47,9 +47,12 @@ namespace Kernel
|
||||
return true;
|
||||
}
|
||||
|
||||
BAN::ErrorOr<bool> MemoryRegion::allocate_page_containing(vaddr_t address)
|
||||
BAN::ErrorOr<bool> MemoryRegion::allocate_page_containing(vaddr_t address, bool wants_write)
|
||||
{
|
||||
auto ret = allocate_page_containing_impl(address);
|
||||
ASSERT(contains(address));
|
||||
if (wants_write && !writable())
|
||||
return false;
|
||||
auto ret = allocate_page_containing_impl(address, wants_write);
|
||||
if (!ret.is_error() && ret.value())
|
||||
m_physical_page_count++;
|
||||
return ret;
|
||||
|
||||
@@ -87,9 +87,10 @@ namespace Kernel
|
||||
return BAN::UniqPtr<MemoryRegion>(BAN::move(region));
|
||||
}
|
||||
|
||||
BAN::ErrorOr<bool> SharedMemoryObject::allocate_page_containing_impl(vaddr_t address)
|
||||
BAN::ErrorOr<bool> SharedMemoryObject::allocate_page_containing_impl(vaddr_t address, bool wants_write)
|
||||
{
|
||||
ASSERT(contains(address));
|
||||
(void)wants_write;
|
||||
|
||||
// Check if address is already mapped
|
||||
vaddr_t vaddr = address & PAGE_ADDR_MASK;
|
||||
|
||||
Reference in New Issue
Block a user