Kernel: Rewrite DiskCache

We now cache only clean pages since I don't want to think about
syncing the disk later.
This commit is contained in:
Bananymous 2023-07-27 21:57:32 +03:00
parent 6c0f864a6e
commit a9cf9bceef
4 changed files with 110 additions and 217 deletions

View File

@ -7,16 +7,14 @@
namespace Kernel
{
class StorageDevice;
class DiskCache
{
public:
DiskCache(StorageDevice&);
DiskCache(size_t sector_size);
~DiskCache();
BAN::ErrorOr<void> read_sector(uint64_t sector, uint8_t* buffer);
BAN::ErrorOr<void> write_sector(uint64_t sector, const uint8_t* buffer);
bool read_from_cache(uint64_t sector, uint8_t* buffer);
BAN::ErrorOr<void> write_to_cache(uint64_t sector, const uint8_t* buffer, bool dirty);
void sync();
size_t release_clean_pages(size_t);
@ -30,16 +28,11 @@ namespace Kernel
uint64_t first_sector { 0 };
uint8_t sector_mask { 0 };
uint8_t dirty_mask { 0 };
void sync(StorageDevice&);
BAN::ErrorOr<void> read_sector(StorageDevice&, uint64_t sector, uint8_t* buffer);
BAN::ErrorOr<void> write_sector(StorageDevice&, uint64_t sector, const uint8_t* buffer);
};
private:
SpinLock m_lock;
StorageDevice& m_device;
BAN::Vector<PageCache> m_cache;
const size_t m_sector_size;
BAN::Vector<PageCache> m_cache;
};
}

View File

@ -81,8 +81,8 @@ namespace Kernel
void add_disk_cache();
private:
DiskCache* m_disk_cache { nullptr };
BAN::Vector<Partition*> m_partitions;
BAN::Optional<DiskCache> m_disk_cache;
BAN::Vector<Partition*> m_partitions;
friend class DiskCache;
};

View File

@ -8,105 +8,121 @@
namespace Kernel
{
DiskCache::DiskCache(StorageDevice& device)
: m_device(device)
{ }
DiskCache::DiskCache(size_t sector_size)
: m_sector_size(sector_size)
{
ASSERT(PAGE_SIZE % m_sector_size == 0);
ASSERT(PAGE_SIZE / m_sector_size <= sizeof(PageCache::sector_mask) * 8);
ASSERT(PAGE_SIZE / m_sector_size <= sizeof(PageCache::dirty_mask) * 8);
}
DiskCache::~DiskCache()
{
release_all_pages();
}
BAN::ErrorOr<void> DiskCache::read_sector(uint64_t sector, uint8_t* buffer)
bool DiskCache::read_from_cache(uint64_t sector, uint8_t* buffer)
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
uint64_t sectors_per_page = PAGE_SIZE / m_sector_size;
uint64_t page_cache_offset = sector % sectors_per_page;
uint64_t page_cache_start = sector - page_cache_offset;
LockGuard _(m_lock);
PageTable& page_table = PageTable::current();
LockGuard page_table_locker(page_table);
ASSERT(page_table.is_page_free(0));
uint64_t sectors_per_page = PAGE_SIZE / m_device.sector_size();
ASSERT(sectors_per_page <= sizeof(PageCache::sector_mask) * 8);
CriticalScope _;
for (auto& cache : m_cache)
{
if (cache.first_sector < page_cache_start)
continue;
if (cache.first_sector > page_cache_start)
break;
uint64_t page_cache_start = sector / sectors_per_page * sectors_per_page;
if (!(cache.sector_mask & (1 << page_cache_offset)))
continue;
page_table.map_page_at(cache.paddr, 0, PageTable::Flags::Present);
memcpy(buffer, (void*)(page_cache_offset * m_sector_size), m_sector_size);
page_table.unmap_page(0);
return true;
}
return false;
};
BAN::ErrorOr<void> DiskCache::write_to_cache(uint64_t sector, const uint8_t* buffer, bool dirty)
{
uint64_t sectors_per_page = PAGE_SIZE / m_sector_size;
uint64_t page_cache_offset = sector % sectors_per_page;
uint64_t page_cache_start = sector - page_cache_offset;
PageTable& page_table = PageTable::current();
LockGuard page_table_locker(page_table);
ASSERT(page_table.is_page_free(0));
CriticalScope _;
// Check if we already have a cache for this page
// FIXME: binary search
size_t index = 0;
// Search the cache if the have this sector in memory
for (; index < m_cache.size(); index++)
{
if (m_cache[index].first_sector < page_cache_start)
auto& cache = m_cache[index];
if (cache.first_sector < page_cache_start)
continue;
if (m_cache[index].first_sector > page_cache_start)
if (cache.first_sector > page_cache_start)
break;
TRY(m_cache[index].read_sector(m_device, sector, buffer));
page_table.map_page_at(cache.paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
memcpy((void*)(page_cache_offset * m_sector_size), buffer, m_sector_size);
page_table.unmap_page(0);
cache.sector_mask |= 1 << page_cache_offset;
if (dirty)
cache.dirty_mask |= 1 << page_cache_offset;
return {};
}
// Try to allocate new cache
if (paddr_t paddr = Heap::get().take_free_page())
// Try to add new page to the cache
paddr_t paddr = Heap::get().take_free_page();
if (paddr == 0)
return BAN::Error::from_errno(ENOMEM);
PageCache cache;
cache.paddr = paddr;
cache.first_sector = page_cache_start;
cache.sector_mask = 1 << page_cache_offset;
cache.dirty_mask = dirty ? cache.sector_mask : 0;
if (auto ret = m_cache.insert(index, cache); ret.is_error())
{
MUST(m_cache.insert(index, { .paddr = paddr, .first_sector = page_cache_start }));
TRY(m_cache[index].read_sector(m_device, sector, buffer));
return {};
Heap::get().release_page(paddr);
return ret.error();
}
// Could not allocate new cache, read from disk
TRY(m_device.read_sectors_impl(sector, 1, buffer));
return {};
}
page_table.map_page_at(cache.paddr, 0, PageTable::Flags::Present);
memcpy((void*)(page_cache_offset * m_sector_size), buffer, m_sector_size);
page_table.unmap_page(0);
BAN::ErrorOr<void> DiskCache::write_sector(uint64_t sector, const uint8_t* buffer)
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
LockGuard _(m_lock);
uint64_t sectors_per_page = PAGE_SIZE / m_device.sector_size();
ASSERT(sectors_per_page <= sizeof(PageCache::sector_mask) * 8);
uint64_t page_cache_start = sector / sectors_per_page * sectors_per_page;
// Check if we already have a cache for this page
// FIXME: binary search
size_t index = 0;
for (; index < m_cache.size(); index++)
{
if (m_cache[index].first_sector < page_cache_start)
continue;
if (m_cache[index].first_sector > page_cache_start)
break;
TRY(m_cache[index].write_sector(m_device, sector, buffer));
return {};
}
// Try to allocate new cache
if (paddr_t paddr = Heap::get().take_free_page())
{
MUST(m_cache.insert(index, { .paddr = paddr, .first_sector = page_cache_start }));
TRY(m_cache[index].write_sector(m_device, sector, buffer));
return {};
}
// Could not allocate new cache, write to disk
TRY(m_device.write_sectors_impl(sector, 1, buffer));
return {};
}
void DiskCache::sync()
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
LockGuard _(m_lock);
for (auto& cache_block : m_cache)
cache_block.sync(m_device);
CriticalScope _;
for (auto& cache : m_cache)
ASSERT(cache.dirty_mask == 0);
}
size_t DiskCache::release_clean_pages(size_t page_count)
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
// NOTE: There might not actually be page_count pages after this
// function returns. The synchronization must be done elsewhere.
LockGuard _(m_lock);
CriticalScope _;
size_t released = 0;
for (size_t i = 0; i < m_cache.size() && released < page_count;)
@ -128,138 +144,16 @@ namespace Kernel
size_t DiskCache::release_pages(size_t page_count)
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
size_t released = release_clean_pages(page_count);
if (released >= page_count)
return released;
// NOTE: There might not actually be page_count pages after this
// function returns. The synchronization must be done elsewhere.
LockGuard _(m_lock);
while (!m_cache.empty() && released < page_count)
{
m_cache.back().sync(m_device);
Heap::get().release_page(m_cache.back().paddr);
m_cache.pop_back();
released++;
}
(void)m_cache.shrink_to_fit();
return released;
ASSERT_NOT_REACHED();
}
void DiskCache::release_all_pages()
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
LockGuard _(m_lock);
for (auto& cache_block : m_cache)
{
cache_block.sync(m_device);
Heap::get().release_page(cache_block.paddr);
}
m_cache.clear();
}
void DiskCache::PageCache::sync(StorageDevice& device)
{
if (this->dirty_mask == 0)
return;
ASSERT(device.sector_size() <= PAGE_SIZE);
PageTable& page_table = PageTable::current();
page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
for (size_t i = 0; i < PAGE_SIZE / device.sector_size(); i++)
{
if (!(this->dirty_mask & (1 << i)))
continue;
MUST(device.write_sectors_impl(this->first_sector + i, 1, (const uint8_t*)(i * device.sector_size())));
// FIXME: race condition between here :)
this->dirty_mask &= ~(1 << i);
}
page_table.unmap_page(0);
page_table.unlock();
}
BAN::ErrorOr<void> DiskCache::PageCache::read_sector(StorageDevice& device, uint64_t sector, uint8_t* buffer)
{
ASSERT(device.sector_size() <= PAGE_SIZE);
uint64_t sectors_per_page = PAGE_SIZE / device.sector_size();
uint64_t sector_offset = sector - this->first_sector;
ASSERT(sector_offset < sectors_per_page);
PageTable& page_table = PageTable::current();
page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
// Sector not yet cached
if (!(this->sector_mask & (1 << sector_offset)))
{
TRY(device.read_sectors_impl(sector, 1, buffer));
CriticalScope _;
memcpy((void*)(sector_offset * device.sector_size()), buffer, device.sector_size());
this->sector_mask |= 1 << sector_offset;
}
else
{
CriticalScope _;
memcpy(buffer, (const void*)(sector_offset * device.sector_size()), device.sector_size());
}
page_table.unmap_page(0);
page_table.unlock();
return {};
}
BAN::ErrorOr<void> DiskCache::PageCache::write_sector(StorageDevice& device, uint64_t sector, const uint8_t* buffer)
{
ASSERT(device.sector_size() <= PAGE_SIZE);
uint64_t sectors_per_page = PAGE_SIZE / device.sector_size();
uint64_t sector_offset = sector - this->first_sector;
ASSERT(sector_offset < sectors_per_page);
PageTable& page_table = PageTable::current();
page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
{
CriticalScope _;
memcpy((void*)(sector_offset * device.sector_size()), buffer, device.sector_size());
this->sector_mask |= 1 << sector_offset;
this->dirty_mask |= 1 << sector_offset;
}
page_table.unmap_page(0);
page_table.unlock();
return {};
release_pages(m_cache.size());
}
}

View File

@ -251,35 +251,41 @@ namespace Kernel
StorageDevice::~StorageDevice()
{
if (m_disk_cache)
delete m_disk_cache;
m_disk_cache = nullptr;
}
void StorageDevice::add_disk_cache()
{
ASSERT(m_disk_cache == nullptr);
m_disk_cache = new DiskCache(*this);
ASSERT(m_disk_cache);
ASSERT(!m_disk_cache.has_value());
m_disk_cache = DiskCache(sector_size());
}
BAN::ErrorOr<void> StorageDevice::read_sectors(uint64_t lba, uint8_t sector_count, uint8_t* buffer)
{
if (!m_disk_cache)
return read_sectors_impl(lba, sector_count, buffer);
for (uint8_t sector = 0; sector < sector_count; sector++)
TRY(m_disk_cache->read_sector(lba + sector, buffer + sector * sector_size()));
for (uint8_t offset = 0; offset < sector_count; offset++)
{
uint8_t* buffer_ptr = buffer + offset * sector_size();
if (m_disk_cache.has_value())
if (m_disk_cache->read_from_cache(lba + offset, buffer_ptr))
continue;
TRY(read_sectors_impl(lba + offset, 1, buffer_ptr));
if (m_disk_cache.has_value())
(void)m_disk_cache->write_to_cache(lba + offset, buffer_ptr, false);
}
return {};
}
BAN::ErrorOr<void> StorageDevice::write_sectors(uint64_t lba, uint8_t sector_count, const uint8_t* buffer)
{
if (!m_disk_cache)
return write_sectors_impl(lba, sector_count, buffer);
// TODO: use disk cache for dirty pages. I don't wanna think about how to do it safely now
for (uint8_t sector = 0; sector < sector_count; sector++)
TRY(m_disk_cache->write_sector(lba + sector, buffer + sector * sector_size()));
{
TRY(write_sectors_impl(lba + sector, 1, buffer));
if (m_disk_cache.has_value())
(void)m_disk_cache->write_to_cache(lba + sector, buffer + sector * sector_size(), false);
}
return {};
}
}