Kernel: Implement MAP_SHARED for regular files

Every inode holds a weak pointer to shared file data. This contains
physical addresses of pages for inode file data. Physical addresses
are allocated and read on demand.

When last shared mapping is unmapped. The inodes shared data is freed
and written to the inode.
This commit is contained in:
Bananymous 2023-09-29 18:46:44 +03:00
parent 8ff4e1f8c8
commit 43c23db4a6
4 changed files with 139 additions and 41 deletions

View File

@ -3,6 +3,7 @@
#include <BAN/RefPtr.h>
#include <BAN/String.h>
#include <BAN/StringView.h>
#include <BAN/WeakPtr.h>
#include <BAN/Vector.h>
#include <kernel/API/DirectoryEntry.h>
@ -17,6 +18,9 @@ namespace Kernel
using namespace API;
class FileBackedRegion;
class SharedFileData;
class Inode : public BAN::RefCounted<Inode>
{
public:
@ -112,6 +116,9 @@ namespace Kernel
private:
mutable RecursiveSpinLock m_lock;
BAN::WeakPtr<SharedFileData> m_shared_region;
friend class FileBackedRegion;
};
}

View File

@ -6,6 +6,17 @@
namespace Kernel
{
struct SharedFileData : public BAN::RefCounted<SharedFileData>, public BAN::Weakable<SharedFileData>
{
~SharedFileData();
// FIXME: this should probably be ordered tree like map
// for fast lookup and less memory usage
BAN::Vector<paddr_t> pages;
BAN::RefPtr<Inode> inode;
uint8_t page_buffer[PAGE_SIZE];
};
class FileBackedRegion final : public MemoryRegion
{
BAN_NON_COPYABLE(FileBackedRegion);
@ -25,6 +36,8 @@ namespace Kernel
private:
BAN::RefPtr<Inode> m_inode;
const off_t m_offset;
BAN::RefPtr<SharedFileData> m_shared_data;
};
}

View File

@ -9,9 +9,6 @@ namespace Kernel
{
ASSERT(inode->mode().ifreg());
if (type != Type::PRIVATE)
return BAN::Error::from_errno(ENOTSUP);
if (offset < 0 || offset % PAGE_SIZE || size == 0)
return BAN::Error::from_errno(EINVAL);
if (size > (size_t)inode->size() || (size_t)offset > (size_t)inode->size() - size)
@ -24,6 +21,21 @@ namespace Kernel
TRY(region->initialize(address_range));
if (type == Type::SHARED)
{
LockGuard _(inode->m_lock);
if (inode->m_shared_region.valid())
region->m_shared_data = inode->m_shared_region.lock();
else
{
auto shared_data = TRY(BAN::RefPtr<SharedFileData>::create());
TRY(shared_data->pages.resize(BAN::Math::div_round_up<size_t>(inode->size(), PAGE_SIZE)));
shared_data->inode = inode;
inode->m_shared_region = TRY(shared_data->get_weak_ptr());
region->m_shared_data = BAN::move(shared_data);
}
}
return region;
}
@ -38,8 +50,9 @@ namespace Kernel
{
if (m_vaddr == 0)
return;
ASSERT(m_type == Type::PRIVATE);
if (m_type == Type::SHARED)
return;
size_t needed_pages = BAN::Math::div_round_up<size_t>(m_size, PAGE_SIZE);
for (size_t i = 0; i < needed_pages; i++)
@ -50,10 +63,30 @@ namespace Kernel
}
}
SharedFileData::~SharedFileData()
{
for (size_t i = 0; i < pages.size(); i++)
{
if (pages[i] == 0)
continue;
{
auto& page_table = PageTable::current();
LockGuard _(page_table);
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(pages[i], 0, PageTable::Flags::Present);
memcpy(page_buffer, (void*)0, PAGE_SIZE);
page_table.unmap_page(0);
}
if (auto ret = inode->write(i * PAGE_SIZE, page_buffer, PAGE_SIZE); ret.is_error())
dwarnln("{}", ret.error());
}
}
BAN::ErrorOr<bool> FileBackedRegion::allocate_page_containing(vaddr_t address)
{
ASSERT(m_type == Type::PRIVATE);
ASSERT(contains(address));
// Check if address is already mapped
@ -61,44 +94,89 @@ namespace Kernel
if (m_page_table.physical_address_of(vaddr) != 0)
return false;
// Map new physcial page to address
paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
m_page_table.map_page_at(paddr, vaddr, m_flags);
if (m_type == Type::PRIVATE)
{
// Map new physcial page to address
paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
m_page_table.map_page_at(paddr, vaddr, m_flags);
size_t file_offset = m_offset + (vaddr - m_vaddr);
size_t bytes = BAN::Math::min<size_t>(m_size - file_offset, PAGE_SIZE);
size_t file_offset = m_offset + (vaddr - m_vaddr);
size_t bytes = BAN::Math::min<size_t>(m_size - file_offset, PAGE_SIZE);
BAN::ErrorOr<size_t> read_ret = 0;
BAN::ErrorOr<size_t> read_ret = 0;
// Zero out the new page
if (&PageTable::current() == &m_page_table)
read_ret = m_inode->read(file_offset, (void*)vaddr, bytes);
// Zero out the new page
if (&PageTable::current() == &m_page_table)
read_ret = m_inode->read(file_offset, (void*)vaddr, bytes);
else
{
auto& page_table = PageTable::current();
LockGuard _(page_table);
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
read_ret = m_inode->read(file_offset, (void*)0, bytes);
memset((void*)0, 0x00, PAGE_SIZE);
page_table.unmap_page(0);
}
if (read_ret.is_error())
{
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return read_ret.release_error();
}
if (read_ret.value() < bytes)
{
dwarnln("Only {}/{} bytes read", read_ret.value(), bytes);
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return BAN::Error::from_errno(EIO);
}
}
else if (m_type == Type::SHARED)
{
LockGuard _(m_inode->m_lock);
ASSERT(m_inode->m_shared_region.valid());
ASSERT(m_shared_data->pages.size() == BAN::Math::div_round_up<size_t>(m_inode->size(), PAGE_SIZE));
auto& pages = m_shared_data->pages;
size_t page_index = (vaddr - m_vaddr) / PAGE_SIZE;
if (pages[page_index] == 0)
{
pages[page_index] = Heap::get().take_free_page();
if (pages[page_index] == 0)
return BAN::Error::from_errno(ENOMEM);
size_t offset = vaddr - m_vaddr;
size_t bytes = BAN::Math::min<size_t>(m_size - offset, PAGE_SIZE);
TRY(m_inode->read(offset, m_shared_data->page_buffer, bytes));
auto& page_table = PageTable::current();
// TODO: check if this can cause deadlock?
LockGuard page_table_lock(page_table);
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(pages[page_index], 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
memcpy((void*)0, m_shared_data->page_buffer, PAGE_SIZE);
page_table.unmap_page(0);
}
paddr_t paddr = pages[page_index];
ASSERT(paddr);
m_page_table.map_page_at(paddr, vaddr, m_flags);
}
else
{
LockGuard _(PageTable::current());
ASSERT(PageTable::current().is_page_free(0));
PageTable::current().map_page_at(paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
read_ret = m_inode->read(file_offset, (void*)0, bytes);
memset((void*)0, 0x00, PAGE_SIZE);
PageTable::current().unmap_page(0);
}
if (read_ret.is_error())
{
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return read_ret.release_error();
}
if (read_ret.value() < bytes)
{
dwarnln("Only {}/{} bytes read", read_ret.value(), bytes);
Heap::get().release_page(paddr);
m_page_table.unmap_page(vaddr);
return BAN::Error::from_errno(EIO);
ASSERT_NOT_REACHED();
}
return true;

View File

@ -863,7 +863,7 @@ namespace Kernel
if (!(inode_flags & O_RDONLY))
return BAN::Error::from_errno(EACCES);
if (region_type == MemoryRegion::Type::SHARED)
if (!(args->prot & PROT_WRITE) || !(inode_flags & O_WRONLY))
if ((args->prot & PROT_WRITE) && !(inode_flags & O_WRONLY))
return BAN::Error::from_errno(EACCES);
auto region = TRY(FileBackedRegion::create(