Kernel: Rewrite and optimize DiskCache

DiskCache now consists of PageCaches which are caches of contiguous
sectors. This allows the disk cache to be ordered and faster traversal.

We seem to have a problem somewhere during reading. The stack gets
corrupted.
This commit is contained in:
Bananymous 2023-06-19 10:31:47 +03:00
parent 4dce0f9074
commit 0d620f3e0f
2 changed files with 142 additions and 137 deletions

View File

@ -18,30 +18,28 @@ namespace Kernel
BAN::ErrorOr<void> read_sector(uint64_t sector, uint8_t* buffer); BAN::ErrorOr<void> read_sector(uint64_t sector, uint8_t* buffer);
BAN::ErrorOr<void> write_sector(uint64_t sector, const uint8_t* buffer); BAN::ErrorOr<void> write_sector(uint64_t sector, const uint8_t* buffer);
void sync();
size_t release_clean_pages(size_t); size_t release_clean_pages(size_t);
size_t release_pages(size_t); size_t release_pages(size_t);
void release_all_pages(); void release_all_pages();
private: private:
struct SectorCache struct PageCache
{
uint64_t sector { 0 };
bool dirty { false };
};
struct CacheBlock
{ {
paddr_t paddr { 0 }; paddr_t paddr { 0 };
BAN::Array<SectorCache, 4> sectors; uint64_t first_sector { 0 };
uint8_t sector_mask { 0 };
uint8_t dirty_mask { 0 };
void sync(StorageDevice&); void sync(StorageDevice&);
void read_sector(StorageDevice&, size_t, uint8_t*); BAN::ErrorOr<void> read_sector(StorageDevice&, uint64_t sector, uint8_t* buffer);
void write_sector(StorageDevice&, size_t, const uint8_t*); BAN::ErrorOr<void> write_sector(StorageDevice&, uint64_t sector, const uint8_t* buffer);
}; };
private: private:
SpinLock m_lock; SpinLock m_lock;
StorageDevice& m_device; StorageDevice& m_device;
BAN::Vector<CacheBlock> m_cache; BAN::Vector<PageCache> m_cache;
}; };
} }

View File

@ -13,145 +13,111 @@ namespace Kernel
DiskCache::~DiskCache() DiskCache::~DiskCache()
{ {
if (m_device.sector_size() == 0)
return;
release_all_pages(); release_all_pages();
} }
BAN::ErrorOr<void> DiskCache::read_sector(uint64_t sector, uint8_t* buffer) BAN::ErrorOr<void> DiskCache::read_sector(uint64_t sector, uint8_t* buffer)
{ {
LockGuard _(m_lock);
ASSERT(m_device.sector_size() > 0);
ASSERT(m_device.sector_size() <= PAGE_SIZE); ASSERT(m_device.sector_size() <= PAGE_SIZE);
for (auto& cache_block : m_cache) LockGuard _(m_lock);
uint64_t sectors_per_page = PAGE_SIZE / m_device.sector_size();
ASSERT(sectors_per_page <= sizeof(PageCache::sector_mask) * 8);
uint64_t page_cache_start = sector / sectors_per_page * sectors_per_page;
// Check if we already have a cache for this page
// FIXME: binary search
size_t index = 0;
for (; index < m_cache.size(); index++)
{ {
for (size_t i = 0; i < cache_block.sectors.size(); i++) if (m_cache[index].first_sector < page_cache_start)
{
if (cache_block.sectors[i].sector != sector)
continue; continue;
cache_block.read_sector(m_device, i, buffer); if (m_cache[index].first_sector > page_cache_start)
break;
TRY(m_cache[index].read_sector(m_device, sector, buffer));
return {}; return {};
} }
}
// Sector was not cached so we must read it from disk // Try to allocate new cache
TRY(m_device.read_sectors_impl(sector, 1, buffer));
// We try to add the sector to exisiting cache block
if (!m_cache.empty())
{
auto& cache_block = m_cache.back();
for (size_t i = 0; i < m_cache.back().sectors.size(); i++)
{
if (cache_block.sectors[i].sector)
continue;
cache_block.write_sector(m_device, i, buffer);
cache_block.sectors[i].sector = sector;
cache_block.sectors[i].dirty = false;
return {};
}
}
// We try to allocate new cache block for this sector
if (!m_cache.emplace_back().is_error())
{
if (paddr_t paddr = Heap::get().take_free_page()) if (paddr_t paddr = Heap::get().take_free_page())
{ {
auto& cache_block = m_cache.back(); MUST(m_cache.insert(index, { .paddr = paddr, .first_sector = page_cache_start }));
cache_block.paddr = paddr; TRY(m_cache[index].read_sector(m_device, sector, buffer));
cache_block.write_sector(m_device, 0, buffer);
cache_block.sectors[0].sector = sector;
cache_block.sectors[0].dirty = false;
return {}; return {};
} }
m_cache.pop_back();
}
// We could not cache the sector // Could not allocate new cache, read from disk
TRY(m_device.read_sectors_impl(sector, 1, buffer));
return {}; return {};
} }
BAN::ErrorOr<void> DiskCache::write_sector(uint64_t sector, const uint8_t* buffer) BAN::ErrorOr<void> DiskCache::write_sector(uint64_t sector, const uint8_t* buffer)
{ {
LockGuard _(m_lock);
ASSERT(m_device.sector_size() > 0);
ASSERT(m_device.sector_size() <= PAGE_SIZE); ASSERT(m_device.sector_size() <= PAGE_SIZE);
// Try to find this sector in the cache LockGuard _(m_lock);
for (auto& cache_block : m_cache)
uint64_t sectors_per_page = PAGE_SIZE / m_device.sector_size();
ASSERT(sectors_per_page <= sizeof(PageCache::sector_mask) * 8);
uint64_t page_cache_start = sector / sectors_per_page * sectors_per_page;
// Check if we already have a cache for this page
// FIXME: binary search
size_t index = 0;
for (; index < m_cache.size(); index++)
{ {
for (size_t i = 0; i < cache_block.sectors.size(); i++) if (m_cache[index].first_sector < page_cache_start)
{
if (cache_block.sectors[i].sector != sector)
continue; continue;
cache_block.write_sector(m_device, i, buffer); if (m_cache[index].first_sector > page_cache_start)
cache_block.sectors[i].dirty = true; break;
TRY(m_cache[index].write_sector(m_device, sector, buffer));
return {}; return {};
} }
}
// Sector was not in the cache, we try to add it to exisiting cache block // Try to allocate new cache
if (!m_cache.empty())
{
auto& cache_block = m_cache.back();
for (size_t i = 0; i < m_cache.back().sectors.size(); i++)
{
if (cache_block.sectors[i].sector)
continue;
cache_block.write_sector(m_device, i, buffer);
cache_block.sectors[i].sector = sector;
cache_block.sectors[i].dirty = true;
return {};
}
}
// We try to allocate new cache block
if (!m_cache.emplace_back().is_error())
{
if (paddr_t paddr = Heap::get().take_free_page()) if (paddr_t paddr = Heap::get().take_free_page())
{ {
auto& cache_block = m_cache.back(); MUST(m_cache.insert(index, { .paddr = paddr, .first_sector = page_cache_start }));
cache_block.paddr = paddr; TRY(m_cache[index].write_sector(m_device, sector, buffer));
cache_block.write_sector(m_device, 0, buffer);
cache_block.sectors[0].sector = sector;
cache_block.sectors[0].dirty = true;
return {}; return {};
} }
m_cache.pop_back();
}
// We could not allocate cache, so we must sync it to disk // Could not allocate new cache, write to disk
// right away
TRY(m_device.write_sectors_impl(sector, 1, buffer)); TRY(m_device.write_sectors_impl(sector, 1, buffer));
return {}; return {};
} }
void DiskCache::sync()
{
ASSERT(m_device.sector_size() <= PAGE_SIZE);
LockGuard _(m_lock);
for (auto& cache_block : m_cache)
cache_block.sync(m_device);
}
size_t DiskCache::release_clean_pages(size_t page_count) size_t DiskCache::release_clean_pages(size_t page_count)
{ {
LockGuard _(m_lock);
ASSERT(m_device.sector_size() > 0);
ASSERT(m_device.sector_size() <= PAGE_SIZE); ASSERT(m_device.sector_size() <= PAGE_SIZE);
// NOTE: There might not actually be page_count pages after this
// function returns. The synchronization must be done elsewhere.
LockGuard _(m_lock);
size_t released = 0; size_t released = 0;
for (size_t i = 0; i < m_cache.size() && released < page_count;) for (size_t i = 0; i < m_cache.size() && released < page_count;)
{ {
bool dirty = false; if (m_cache[i].dirty_mask == 0)
for (size_t j = 0; j < sizeof(m_cache[i].sectors) / sizeof(SectorCache); j++)
if (m_cache[i].sectors[j].dirty)
dirty = true;
if (dirty)
{ {
i++;
continue;
}
Heap::get().release_page(m_cache[i].paddr); Heap::get().release_page(m_cache[i].paddr);
m_cache.remove(i); m_cache.remove(i);
released++; released++;
continue;
}
i++;
} }
(void)m_cache.shrink_to_fit(); (void)m_cache.shrink_to_fit();
@ -161,12 +127,11 @@ namespace Kernel
size_t DiskCache::release_pages(size_t page_count) size_t DiskCache::release_pages(size_t page_count)
{ {
ASSERT(m_device.sector_size() > 0);
ASSERT(m_device.sector_size() <= PAGE_SIZE); ASSERT(m_device.sector_size() <= PAGE_SIZE);
size_t released = release_clean_pages(page_count); size_t released = release_clean_pages(page_count);
if (released >= page_count) if (released >= page_count)
return page_count; return released;
// NOTE: There might not actually be page_count pages after this // NOTE: There might not actually be page_count pages after this
// function returns. The synchronization must be done elsewhere. // function returns. The synchronization must be done elsewhere.
@ -187,13 +152,9 @@ namespace Kernel
void DiskCache::release_all_pages() void DiskCache::release_all_pages()
{ {
LockGuard _(m_lock);
ASSERT(m_device.sector_size() > 0);
ASSERT(m_device.sector_size() <= PAGE_SIZE); ASSERT(m_device.sector_size() <= PAGE_SIZE);
uint8_t* temp_buffer = (uint8_t*)kmalloc(m_device.sector_size()); LockGuard _(m_lock);
ASSERT(temp_buffer);
for (auto& cache_block : m_cache) for (auto& cache_block : m_cache)
{ {
@ -204,51 +165,97 @@ namespace Kernel
m_cache.clear(); m_cache.clear();
} }
void DiskCache::CacheBlock::sync(StorageDevice& device) void DiskCache::PageCache::sync(StorageDevice& device)
{ {
uint8_t* temp_buffer = (uint8_t*)kmalloc(device.sector_size()); if (this->dirty_mask == 0)
ASSERT(temp_buffer); return;
for (size_t i = 0; i < sectors.size(); i++) ASSERT(device.sector_size() <= PAGE_SIZE);
PageTable& page_table = PageTable::current();
page_table.lock();
ASSERT(page_table.is_page_free(0));
page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.invalidate(0);
for (size_t i = 0; i < PAGE_SIZE / device.sector_size(); i++)
{ {
if (!sectors[i].dirty) if (!(this->dirty_mask & (1 << i)))
continue; continue;
read_sector(device, i, temp_buffer); MUST(device.write_sectors_impl(this->first_sector + i, 1, (const uint8_t*)(i * device.sector_size())));
MUST(device.write_sectors_impl(sectors[i].sector, 1, temp_buffer));
sectors[i].dirty = false;
} }
kfree(temp_buffer); page_table.unmap_page(0);
page_table.invalidate(0);
page_table.unlock();
this->dirty_mask = 0;
} }
void DiskCache::CacheBlock::read_sector(StorageDevice& device, size_t index, uint8_t* buffer) BAN::ErrorOr<void> DiskCache::PageCache::read_sector(StorageDevice& device, uint64_t sector, uint8_t* buffer)
{ {
ASSERT(index < sectors.size()); ASSERT(device.sector_size() <= PAGE_SIZE);
uint64_t sectors_per_page = PAGE_SIZE / device.sector_size();
uint64_t sector_offset = sector - this->first_sector;
ASSERT(sector_offset < sectors_per_page);
PageTable& page_table = PageTable::current(); PageTable& page_table = PageTable::current();
page_table.lock(); page_table.lock();
ASSERT(page_table.is_page_free(0)); ASSERT(page_table.is_page_free(0));
page_table.map_page_at(paddr, 0, PageTable::Flags::Present);
memcpy(buffer, (void*)(index * device.sector_size()), device.sector_size()); page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.unmap_page(0);
page_table.invalidate(0); page_table.invalidate(0);
page_table.unlock();
// Sector not yet cached
if (!(this->sector_mask & (1 << sector_offset)))
{
TRY(device.read_sectors_impl(sector, 1, (uint8_t*)(sector_offset * device.sector_size())));
this->sector_mask |= 1 << sector_offset;
} }
void DiskCache::CacheBlock::write_sector(StorageDevice& device, size_t index, const uint8_t* buffer) memcpy(buffer, (const void*)(sector_offset * device.sector_size()), device.sector_size());
page_table.unmap_page(0);
page_table.invalidate(0);
page_table.unlock();
return {};
}
BAN::ErrorOr<void> DiskCache::PageCache::write_sector(StorageDevice& device, uint64_t sector, const uint8_t* buffer)
{ {
ASSERT(index < sectors.size()); ASSERT(device.sector_size() <= PAGE_SIZE);
uint64_t sectors_per_page = PAGE_SIZE / device.sector_size();
uint64_t sector_offset = sector - this->first_sector;
ASSERT(sector_offset < sectors_per_page);
PageTable& page_table = PageTable::current(); PageTable& page_table = PageTable::current();
page_table.lock(); page_table.lock();
ASSERT(page_table.is_page_free(0)); ASSERT(page_table.is_page_free(0));
page_table.map_page_at(paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
memcpy((void*)(index * device.sector_size()), buffer, device.sector_size()); page_table.map_page_at(this->paddr, 0, PageTable::Flags::ReadWrite | PageTable::Flags::Present);
page_table.invalidate(0);
memcpy((void*)(sector_offset * device.sector_size()), buffer, device.sector_size());
this->sector_mask |= 1 << sector_offset;
this->dirty_mask |= 1 << sector_offset;
page_table.unmap_page(0); page_table.unmap_page(0);
page_table.invalidate(0); page_table.invalidate(0);
page_table.unlock(); page_table.unlock();
return {};
} }
} }