From f31c6b847fb845738eb2cbbc318c0688ed08bec3 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Thu, 31 Aug 2023 23:40:31 +0300 Subject: [PATCH] Kernel: Start work on writable ext2 filesystem --- kernel/include/kernel/FS/Ext2.h | 27 +- kernel/kernel/FS/Ext2.cpp | 817 ++++++++++++++++++++++++-------- 2 files changed, 635 insertions(+), 209 deletions(-) diff --git a/kernel/include/kernel/FS/Ext2.h b/kernel/include/kernel/FS/Ext2.h index d702b7a080..111469c47c 100644 --- a/kernel/include/kernel/FS/Ext2.h +++ b/kernel/include/kernel/FS/Ext2.h @@ -123,7 +123,7 @@ namespace Kernel class Ext2Inode final : public Inode { public: - virtual ino_t ino() const override { return m_index; }; + virtual ino_t ino() const override { return m_ino; }; virtual Mode mode() const override { return { m_inode.mode }; } virtual nlink_t nlink() const override { return m_inode.links_count; } virtual uid_t uid() const override { return m_inode.uid; } @@ -144,28 +144,31 @@ namespace Kernel virtual BAN::ErrorOr read(size_t, void*, size_t) override; virtual BAN::ErrorOr write(size_t, const void*, size_t) override; + virtual BAN::ErrorOr truncate(size_t) override; virtual BAN::ErrorOr create_file(BAN::StringView, mode_t, uid_t, gid_t) override; private: - BAN::ErrorOr data_block_index(uint32_t); + BAN::ErrorOr for_data_block_index(uint32_t, const BAN::Function&, bool allocate); - uint32_t index() const { return m_index; } + BAN::ErrorOr data_block_index(uint32_t); + BAN::ErrorOr allocate_new_block(); + BAN::ErrorOr sync(); + + uint32_t block_group() const; private: - Ext2Inode(Ext2FS& fs, Ext2::Inode inode, uint32_t index, BAN::StringView name) + Ext2Inode(Ext2FS& fs, Ext2::Inode inode, uint32_t ino) : m_fs(fs) - , m_inode(inode) - , m_name(name) - , m_index(index) + , m_inode(inode) + , m_ino(ino) {} - static BAN::ErrorOr> create(Ext2FS&, uint32_t, BAN::StringView); + static BAN::ErrorOr> create(Ext2FS&, uint32_t); private: Ext2FS& m_fs; Ext2::Inode m_inode; - BAN::String m_name; - uint32_t m_index; + const uint32_t m_ino; friend class Ext2FS; friend class BAN::RefPtr; @@ -192,6 +195,9 @@ namespace Kernel void read_block(uint32_t, BAN::Span); void write_block(uint32_t, BAN::Span); + void sync_superblock(); + + BAN::ErrorOr reserve_free_block(uint32_t primary_bgd); const Ext2::Superblock& superblock() const { return m_superblock; } @@ -209,6 +215,7 @@ namespace Kernel Partition& m_partition; BAN::RefPtr m_root_inode; + BAN::Vector m_superblock_backups; Ext2::Superblock m_superblock; diff --git a/kernel/kernel/FS/Ext2.cpp b/kernel/kernel/FS/Ext2.cpp index 49378350e4..0a2ce8840c 100644 --- a/kernel/kernel/FS/Ext2.cpp +++ b/kernel/kernel/FS/Ext2.cpp @@ -10,7 +10,9 @@ namespace Kernel { namespace Ext2::Enum - { + { + + constexpr uint16_t SUPER_MAGIC = 0xEF53; enum State { @@ -40,6 +42,12 @@ namespace Kernel DYNAMIC_REV = 1, }; + enum Rev0Constant + { + GOOD_OLD_FIRST_INO = 11, + GOOD_OLD_INODE_SIZE = 128, + }; + enum FeatureCompat { FEATURE_COMPAT_DIR_PREALLOC = 0x0001, @@ -160,52 +168,90 @@ namespace Kernel return m_inode.blocks / (2 << m_fs.superblock().log_block_size); } - BAN::ErrorOr> Ext2Inode::create(Ext2FS& fs, uint32_t inode_inode, BAN::StringView name) + uint32_t Ext2Inode::block_group() const { + return (m_ino - 1) / m_fs.superblock().blocks_per_group; + } + + BAN::ErrorOr> Ext2Inode::create(Ext2FS& fs, uint32_t inode_ino) + { + auto inode_location = TRY(fs.locate_inode(inode_ino)); + BAN::Vector block_buffer; TRY(block_buffer.resize(fs.block_size())); - auto inode_location = TRY(fs.locate_inode(inode_inode)); fs.read_block(inode_location.block, block_buffer.span()); + auto& inode = *(Ext2::Inode*)(block_buffer.data() + inode_location.offset); - Ext2Inode* result = new Ext2Inode(fs, inode, inode_inode, name); + + Ext2Inode* result = new Ext2Inode(fs, inode, inode_ino); if (result == nullptr) return BAN::Error::from_errno(ENOMEM); return BAN::RefPtr::adopt(result); } - BAN::ErrorOr Ext2Inode::data_block_index(uint32_t asked_data_block) +#define READ_INDIRECT(block, container) \ + if (block) \ + m_fs.read_block(block, block_buffer.span()); \ + else \ + { \ + if (!allocate) \ + return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); \ + memset(block_buffer.data(), 0, block_size); \ + block = TRY(m_fs.reserve_free_block(block_group())); \ + m_fs.write_block(container, block_buffer.span()); \ + } + +#define READ_INDIRECT_TOP(block) \ + if (block) \ + m_fs.read_block(block, block_buffer.span()); \ + else \ + { \ + if (!allocate) \ + return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); \ + memset(block_buffer.data(), 0, block_size); \ + block = TRY(m_fs.reserve_free_block(block_group())); \ + } + + BAN::ErrorOr Ext2Inode::for_data_block_index(uint32_t asked_data_block, const BAN::Function& callback, bool allocate) { - uint32_t data_blocks_count = m_inode.blocks / (2 << m_fs.superblock().log_block_size); - uint32_t blocks_per_array = (1024 << m_fs.superblock().log_block_size) / sizeof(uint32_t); + const uint32_t block_size = blksize(); + const uint32_t data_blocks_count = blocks(); + const uint32_t blocks_per_array = block_size / sizeof(uint32_t); ASSERT(asked_data_block < data_blocks_count); // Direct block if (asked_data_block < 12) { - uint32_t block = m_inode.block[asked_data_block]; - if (block == 0) - return BAN::Error::from_errno(EIO); - return block; + uint32_t& block = m_inode.block[asked_data_block]; + uint32_t block_copy = block; + callback(block); + + if (block != block_copy) + TRY(sync()); + + return {}; } asked_data_block -= 12; - uint32_t block_size = m_fs.block_size(); BAN::Vector block_buffer; TRY(block_buffer.resize(block_size)); // Singly indirect block if (asked_data_block < blocks_per_array) { - if (m_inode.block[12] == 0) - return BAN::Error::from_errno(EIO); - m_fs.read_block(m_inode.block[12], block_buffer.span()); // Block array - uint32_t block = ((uint32_t*)block_buffer.data())[asked_data_block]; - if (block == 0) - return BAN::Error::from_errno(EIO); - return block; + READ_INDIRECT_TOP(m_inode.block[12]); + + uint32_t& block = ((uint32_t*)block_buffer.data())[asked_data_block]; + uint32_t block_copy = block; + callback(block); + + if (block != block_copy) + m_fs.write_block(m_inode.block[12], block_buffer.span()); + + return {}; } asked_data_block -= blocks_per_array; @@ -213,15 +259,19 @@ namespace Kernel // Doubly indirect blocks if (asked_data_block < blocks_per_array * blocks_per_array) { - m_fs.read_block(m_inode.block[13], block_buffer.span()); // Singly indirect array - uint32_t direct_block = ((uint32_t*)block_buffer.data())[asked_data_block / blocks_per_array]; - if (direct_block == 0) - return BAN::Error::from_errno(EIO); - m_fs.read_block(direct_block, block_buffer.span()); // Block array - uint32_t block = ((uint32_t*)block_buffer.data())[asked_data_block % blocks_per_array]; - if (block == 0) - return BAN::Error::from_errno(EIO); - return block; + READ_INDIRECT_TOP(m_inode.block[13]); + + uint32_t& direct_block = ((uint32_t*)block_buffer.data())[asked_data_block / blocks_per_array]; + READ_INDIRECT(direct_block, m_inode.block[13]); + + uint32_t& block = ((uint32_t*)block_buffer.data())[asked_data_block % blocks_per_array]; + uint32_t block_copy = block; + callback(block); + + if (block != block_copy) + m_fs.write_block(direct_block, block_buffer.span()); + + return {}; } asked_data_block -= blocks_per_array * blocks_per_array; @@ -229,24 +279,38 @@ namespace Kernel // Triply indirect blocks if (asked_data_block < blocks_per_array * blocks_per_array * blocks_per_array) { - m_fs.read_block(m_inode.block[14], block_buffer.span()); // Doubly indirect array - uint32_t singly_indirect_block = ((uint32_t*)block_buffer.data())[asked_data_block / (blocks_per_array * blocks_per_array)]; - if (singly_indirect_block == 0) - return BAN::Error::from_errno(EIO); - m_fs.read_block(singly_indirect_block, block_buffer.span()); // Singly indirect array - uint32_t direct_block = ((uint32_t*)block_buffer.data())[(asked_data_block / blocks_per_array) % blocks_per_array]; - if (direct_block == 0) - return BAN::Error::from_errno(EIO); - m_fs.read_block(direct_block, block_buffer.span()); // Block array - uint32_t block = ((uint32_t*)block_buffer.data())[asked_data_block % blocks_per_array]; - if (block == 0) - return BAN::Error::from_errno(EIO); - return block; + READ_INDIRECT_TOP(m_inode.block[14]); + + uint32_t& doubly_indirect_block = ((uint32_t*)block_buffer.data())[asked_data_block / (blocks_per_array * blocks_per_array)]; + READ_INDIRECT(doubly_indirect_block, m_inode.block[14]); + + uint32_t& singly_direct_block = ((uint32_t*)block_buffer.data())[(asked_data_block / blocks_per_array) % blocks_per_array]; + READ_INDIRECT(singly_direct_block, doubly_indirect_block); + + uint32_t& block = ((uint32_t*)block_buffer.data())[asked_data_block % blocks_per_array]; + uint32_t block_copy = block; + callback(block); + + if (block != block_copy) + m_fs.write_block(singly_direct_block, block_buffer.span()); + + return {}; } ASSERT_NOT_REACHED(); } - + +#undef READ_INDIRECT +#undef READ_INDIRECT_TOP + + BAN::ErrorOr Ext2Inode::data_block_index(uint32_t asked_data_block) + { + uint32_t result; + TRY(for_data_block_index(asked_data_block, [&result] (uint32_t& index) { result = index; }, false)); + ASSERT(result != 0); + return result; + } + BAN::ErrorOr Ext2Inode::link_target() { ASSERT(mode().iflnk()); @@ -267,7 +331,8 @@ namespace Kernel if (offset + count > m_inode.size) count = m_inode.size - offset; - uint32_t block_size = 1024 << m_fs.superblock().log_block_size; + const uint32_t block_size = blksize(); + BAN::Vector block_buffer; TRY(block_buffer.resize(block_size)); @@ -293,28 +358,118 @@ namespace Kernel BAN::ErrorOr Ext2Inode::write(size_t offset, const void* buffer, size_t count) { - (void)offset; - (void)buffer; - (void)count; + if (offset >= UINT32_MAX || count == UINT32_MAX || offset + count >= UINT32_MAX) + return BAN::Error::from_errno(EOVERFLOW); + if (mode().ifdir()) return BAN::Error::from_errno(EISDIR); - return BAN::Error::from_errno(ENOTSUP); + + if (m_inode.size < offset + count) + TRY(truncate(offset + count)); + + const uint32_t block_size = blksize(); + + BAN::Vector block_buffer; + TRY(block_buffer.resize(block_size)); + + const uint8_t* u8buffer = (const uint8_t*)buffer; + + // Write partial block + if (offset % block_size) + { + uint32_t block_index = offset / block_size; + uint32_t block_offset = offset % block_size; + + uint32_t data_block_index = TRY(this->data_block_index(block_index)); + uint32_t to_copy = BAN::Math::min(block_size - block_offset, count); + + m_fs.read_block(data_block_index, block_buffer.span()); + memcpy(block_buffer.data() + block_offset, buffer, to_copy); + m_fs.write_block(data_block_index, block_buffer.span()); + + u8buffer += to_copy; + offset += to_copy; + count -= to_copy; + } + + while (count >= block_size) + { + uint32_t data_block_index = TRY(this->data_block_index(offset / block_size)); + + m_fs.write_block(data_block_index, BAN::Span(u8buffer, block_size)); + + u8buffer += block_size; + offset += block_size; + count -= block_size; + } + + if (count > 0) + { + uint32_t data_block_index = TRY(this->data_block_index(offset / block_size)); + + m_fs.read_block(data_block_index, block_buffer.span()); + memcpy(block_buffer.data(), u8buffer, count); + m_fs.write_block(data_block_index, block_buffer.span()); + } + + return count; + } + + BAN::ErrorOr Ext2Inode::truncate(size_t new_size) + { + if (m_inode.size == new_size) + return {}; + + const uint32_t block_size = blksize(); + const uint32_t current_data_blocks = blocks(); + const uint32_t needed_data_blocks = BAN::Math::div_round_up(new_size, block_size); + + if (new_size < m_inode.size) + { + m_inode.size = new_size; + TRY(sync()); + return {}; + } + + BAN::Vector block_buffer; + TRY(block_buffer.resize(block_size)); + + if (uint32_t rem = m_inode.size % block_size) + { + uint32_t last_block_index = TRY(data_block_index(current_data_blocks - 1)); + + m_fs.read_block(last_block_index, block_buffer.span()); + memset(block_buffer.data() + rem, 0, block_size - rem); + m_fs.write_block(last_block_index, block_buffer.span()); + } + + memset(block_buffer.data(), 0, block_size); + while (blocks() < needed_data_blocks) + { + uint32_t block_index = TRY(allocate_new_block()); + m_fs.write_block(block_index, block_buffer.span()); + } + + m_inode.size = new_size; + TRY(sync()); + + return {}; } BAN::ErrorOr Ext2Inode::directory_read_next_entries(off_t offset, DirectoryEntryList* list, size_t list_size) { if (!mode().ifdir()) return BAN::Error::from_errno(ENOTDIR); - - uint32_t data_block_count = blocks(); + + const uint32_t data_block_count = blocks(); if (offset >= data_block_count) { list->entry_count = 0; return {}; } - uint32_t block_size = blksize(); - uint32_t block_index = TRY(data_block_index(offset)); + const uint32_t block_size = blksize(); + const uint32_t block_index = TRY(data_block_index(offset)); BAN::Vector block_buffer; TRY(block_buffer.resize(block_size)); @@ -374,9 +529,14 @@ namespace Kernel if (name.size() > 255) return BAN::Error::from_errno(ENAMETOOLONG); - uint32_t block_size = m_fs.block_size(); - BAN::Vector block_buffer; - TRY(block_buffer.resize(block_size)); + if (!(Mode(mode).ifreg())) + return BAN::Error::from_errno(EINVAL); + + if (m_inode.flags & Ext2::Enum::INDEX_FL) + { + dwarnln("file creation to indexed directory not supported"); + return BAN::Error::from_errno(ENOTSUP); + } auto error_or = directory_find_inode(name); if (!error_or.is_error()) @@ -386,67 +546,149 @@ namespace Kernel timespec current_time = SystemTimer::get().real_time(); - Ext2::Inode ext2_inode; - ext2_inode.mode = mode; - ext2_inode.uid = uid; - ext2_inode.size = 0; - ext2_inode.atime = current_time.tv_sec; - ext2_inode.ctime = current_time.tv_sec; - ext2_inode.mtime = current_time.tv_sec; - ext2_inode.dtime = current_time.tv_sec; - ext2_inode.gid = gid; - ext2_inode.links_count = 1; - ext2_inode.blocks = 0; - ext2_inode.flags = 0; - ext2_inode.osd1 = 0; - memset(ext2_inode.block, 0, sizeof(ext2_inode.block)); - ext2_inode.generation = 0; - ext2_inode.file_acl = 0; - ext2_inode.dir_acl = 0; - ext2_inode.faddr = 0; - memset(ext2_inode.osd2, 0, sizeof(ext2_inode.osd2)); + Ext2::Inode ext2_inode + { + .mode = (uint16_t)mode, + .uid = (uint16_t)uid, + .size = 0, + .atime = (uint32_t)current_time.tv_sec, + .ctime = (uint32_t)current_time.tv_sec, + .mtime = (uint32_t)current_time.tv_sec, + .dtime = 0, + .gid = (uint16_t)gid, + .links_count = 1, + .blocks = 0, + .flags = 0, + .osd1 = 0, + .block = {}, + .generation = 0, + .file_acl = 0, + .dir_acl = 0, + .faddr = 0, + .osd2 = {} + }; - uint32_t inode_index = TRY(m_fs.create_inode(ext2_inode)); + const uint32_t inode_index = TRY(m_fs.create_inode(ext2_inode)); - // Insert inode to this directory - uint32_t data_block_count = m_inode.blocks / (2 << m_fs.superblock().log_block_size); - uint32_t block_index = TRY(data_block_index(data_block_count - 1)); - m_fs.read_block(block_index, block_buffer.span()); + const uint32_t block_size = m_fs.block_size(); + BAN::Vector block_buffer; + TRY(block_buffer.resize(block_size)); - const uint8_t* block_buffer_end = block_buffer.data() + block_size; - const uint8_t* entry_addr = block_buffer.data(); + auto write_inode = [&](uint32_t entry_offset, uint32_t entry_rec_len) + { + auto typed_mode = Mode { mode }; + uint8_t file_type = (m_fs.superblock().rev_level == Ext2::Enum::GOOD_OLD_REV) ? 0 + : typed_mode.ifreg() ? Ext2::Enum::REG_FILE + : typed_mode.ifdir() ? Ext2::Enum::DIR + : typed_mode.ifchr() ? Ext2::Enum::CHRDEV + : typed_mode.ifblk() ? Ext2::Enum::BLKDEV + : typed_mode.ififo() ? Ext2::Enum::FIFO + : typed_mode.ifsock() ? Ext2::Enum::SOCK + : typed_mode.iflnk() ? Ext2::Enum::SYMLINK + : 0; + + auto& new_entry = *(Ext2::LinkedDirectoryEntry*)(block_buffer.data() + entry_offset); + new_entry.inode = inode_index; + new_entry.rec_len = entry_rec_len; + new_entry.name_len = name.size(); + new_entry.file_type = file_type; + memcpy(new_entry.name, name.data(), name.size()); + }; + + uint32_t block_index = 0; + uint32_t entry_offset = 0; uint32_t needed_entry_len = sizeof(Ext2::LinkedDirectoryEntry) + name.size(); + if (auto rem = needed_entry_len % 4) + needed_entry_len += 4 - rem; - bool insered = false; - while (entry_addr < block_buffer_end) + const uint32_t data_block_count = blocks(); + if (data_block_count == 0) + goto needs_new_block; + + // Try to insert inode to last data block + block_index = TRY(data_block_index(data_block_count - 1)); + m_fs.read_block(block_index, block_buffer.span()); + + while (entry_offset < block_size) { - auto& entry = *(Ext2::LinkedDirectoryEntry*)entry_addr; + auto& entry = *(Ext2::LinkedDirectoryEntry*)(block_buffer.data() + entry_offset); - if (needed_entry_len <= entry.rec_len - entry.name_len - sizeof(Ext2::LinkedDirectoryEntry)) + uint32_t entry_min_rec_len = sizeof(Ext2::LinkedDirectoryEntry) + entry.name_len; + if (auto rem = entry_min_rec_len % 4) + entry_min_rec_len += 4 - rem; + + if (entry.inode == 0 && needed_entry_len <= entry.rec_len) { - entry.rec_len = sizeof(Ext2::LinkedDirectoryEntry) + entry.name_len; - if (uint32_t rem = entry.rec_len % 4) - entry.rec_len += 4 - rem; - - auto& new_entry = *(Ext2::LinkedDirectoryEntry*)(entry_addr + entry.rec_len); - new_entry.inode = inode_index; - new_entry.rec_len = block_buffer_end - (uint8_t*)&new_entry; - new_entry.name_len = name.size(); - new_entry.file_type = Ext2::Enum::REG_FILE; - memcpy(new_entry.name, name.data(), name.size()); - + write_inode(entry_offset, entry.rec_len); m_fs.write_block(block_index, block_buffer.span()); - - insered = true; - break; + return {}; } - entry_addr += entry.rec_len; + else if (needed_entry_len <= entry.rec_len - entry_min_rec_len) + { + uint32_t new_rec_len = entry.rec_len - entry_min_rec_len; + entry.rec_len = entry_min_rec_len; + + write_inode(entry_offset + entry.rec_len, new_rec_len); + m_fs.write_block(block_index, block_buffer.span()); + return {}; + } + + entry_offset += entry.rec_len; } - // FIXME: If an entry cannot completely fit in one block, it must be pushed to the - // next data block and the rec_len of the previous entry properly adjusted. - ASSERT(insered); +needs_new_block: + ASSERT_NOT_REACHED(); + + block_index = TRY(allocate_new_block()); + + m_fs.read_block(block_index, block_buffer.span()); + write_inode(0, block_size); + m_fs.write_block(block_index, block_buffer.span()); + + return {}; + } + + BAN::ErrorOr Ext2Inode::allocate_new_block() + { + uint32_t new_block_index = TRY(m_fs.reserve_free_block(block_group())); + auto set_index_func = [new_block_index] (uint32_t& index) { index = new_block_index; }; + + const uint32_t blocks_per_data_block = blksize() / 512; + + m_inode.blocks += blocks_per_data_block; + if (auto res = for_data_block_index(blocks() - 1, set_index_func, true); res.is_error()) + { + m_inode.blocks -= blocks_per_data_block; + return res.release_error(); + } + + TRY(sync()); + return new_block_index; + } + + BAN::ErrorOr Ext2Inode::sync() + { + auto inode_location_or_error = m_fs.locate_inode(ino()); + if (inode_location_or_error.is_error()) + { + dwarnln("Open inode not found from filesystem"); + return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); + } + + auto inode_location = inode_location_or_error.release_value(); + + const uint32_t block_size = blksize(); + + BAN::Vector block_buffer; + TRY(block_buffer.resize(block_size)); + + m_fs.read_block(inode_location.block, block_buffer.span()); + if (memcmp(block_buffer.data() + inode_location.offset, &m_inode, sizeof(Ext2::Inode))) + { + memcpy(block_buffer.data() + inode_location.offset, &m_inode, sizeof(Ext2::Inode)); + m_fs.write_block(inode_location.block, block_buffer.span()); + } return {}; } @@ -456,15 +698,15 @@ namespace Kernel if (!mode().ifdir()) return BAN::Error::from_errno(ENOTDIR); - uint32_t block_size = m_fs.block_size(); + const uint32_t block_size = blksize(); + const uint32_t data_block_count = blocks(); + BAN::Vector block_buffer; TRY(block_buffer.resize(block_size)); - uint32_t data_block_count = blocks(); - for (uint32_t i = 0; i < data_block_count; i++) { - uint32_t block_index = TRY(data_block_index(i)); + const uint32_t block_index = TRY(data_block_index(i)); m_fs.read_block(block_index, block_buffer.span()); const uint8_t* block_buffer_end = block_buffer.data() + block_size; @@ -475,7 +717,7 @@ namespace Kernel const auto& entry = *(const Ext2::LinkedDirectoryEntry*)entry_addr; BAN::StringView entry_name(entry.name, entry.name_len); if (entry.inode && entry_name == file_name) - return TRY(Ext2Inode::create(m_fs, entry.inode, entry_name)); + return TRY(Ext2Inode::create(m_fs, entry.inode)); entry_addr += entry.rec_len; } } @@ -497,42 +739,62 @@ namespace Kernel BAN::ErrorOr Ext2FS::initialize_superblock() { - const uint32_t sector_size = m_partition.device().sector_size(); - ASSERT(1024 % sector_size == 0); - // Read superblock from disk { - BAN::Vector superblock_buffer; - TRY(superblock_buffer.resize(1024)); + const uint32_t sector_size = m_partition.device().sector_size(); + ASSERT(1024 % sector_size == 0); - uint32_t lba = 1024 / sector_size; - uint32_t sector_count = 1024 / sector_size; + const uint32_t lba = 1024 / sector_size; + const uint32_t sector_count = BAN::Math::div_round_up(sizeof(Ext2::Superblock), sector_size); + + BAN::Vector superblock_buffer; + TRY(superblock_buffer.resize(sector_count * sector_size)); TRY(m_partition.read_sectors(lba, sector_count, superblock_buffer.data())); memcpy(&m_superblock, superblock_buffer.data(), sizeof(Ext2::Superblock)); } - if (m_superblock.magic != 0xEF53) + if (m_superblock.magic != Ext2::Enum::SUPER_MAGIC) return BAN::Error::from_error_code(ErrorCode::Ext2_Invalid); - if (m_superblock.rev_level < 1) + if (m_superblock.rev_level == Ext2::Enum::GOOD_OLD_REV) { memset(m_superblock.__extension_start, 0, sizeof(Ext2::Superblock) - offsetof(Ext2::Superblock, Ext2::Superblock::__extension_start)); - m_superblock.first_ino = 11; - m_superblock.inode_size = 128; + m_superblock.first_ino = Ext2::Enum::GOOD_OLD_FIRST_INO; + m_superblock.inode_size = Ext2::Enum::GOOD_OLD_INODE_SIZE; } - uint32_t number_of_block_groups = BAN::Math::div_round_up(superblock().inodes_count, superblock().inodes_per_group); - uint32_t number_of_block_groups_check = BAN::Math::div_round_up(superblock().blocks_count, superblock().blocks_per_group); + const uint32_t number_of_block_groups = BAN::Math::div_round_up(superblock().inodes_count, superblock().inodes_per_group); + const uint32_t number_of_block_groups_check = BAN::Math::div_round_up(superblock().blocks_count, superblock().blocks_per_group); if (number_of_block_groups != number_of_block_groups_check) return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); - ASSERT(!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_COMPRESSION)); - //ASSERT(!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_FILETYPE)); - ASSERT(!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_JOURNAL_DEV)); - ASSERT(!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_META_BG)); - ASSERT(!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_RECOVER)); + if (!(m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_FILETYPE)) + { + dwarnln("Directory entries without filetype not supported"); + return BAN::Error::from_errno(ENOTSUP); + } + if (m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_COMPRESSION) + { + dwarnln("Required FEATURE_INCOMPAT_COMPRESSION"); + return BAN::Error::from_errno(ENOTSUP); + } + if (m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_JOURNAL_DEV) + { + dwarnln("Required FEATURE_INCOMPAT_JOURNAL_DEV"); + return BAN::Error::from_errno(ENOTSUP); + } + if (m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_META_BG) + { + dwarnln("Required FEATURE_INCOMPAT_META_BG"); + return BAN::Error::from_errno(ENOTSUP); + } + if (m_superblock.feature_incompat & Ext2::Enum::FEATURE_INCOMPAT_RECOVER) + { + dwarnln("Required FEATURE_INCOMPAT_RECOVER"); + return BAN::Error::from_errno(ENOTSUP); + } #if EXT2_DEBUG_PRINT dprintln("EXT2"); @@ -541,20 +803,58 @@ namespace Kernel dprintln(" version {}.{}", m_superblock.rev_level, m_superblock.minor_rev_level); dprintln(" first data at {}", m_superblock.first_data_block); dprintln(" block size {}", 1024 << m_superblock.log_block_size); + dprintln(" inode size {}", m_superblock.inode_size); #endif + { + BAN::Vector block_buffer; + TRY(block_buffer.resize(block_size())); + + if (superblock().rev_level == Ext2::Enum::GOOD_OLD_REV) + { + // In revision 0 all blockgroups contain superblock backup + TRY(m_superblock_backups.reserve(number_of_block_groups - 1)); + for (uint32_t i = 1; i < number_of_block_groups; i++) + MUST(block_buffer.push_back(i)); + } + else + { + // In other revision superblock backups are on blocks 1 and powers of 3, 5 and 7 + TRY(m_superblock_backups.push_back(1)); + for (uint32_t i = 3; i < number_of_block_groups; i *= 3) + TRY(m_superblock_backups.push_back(i)); + for (uint32_t i = 5; i < number_of_block_groups; i *= 5) + TRY(m_superblock_backups.push_back(i)); + for (uint32_t i = 7; i < number_of_block_groups; i *= 7) + TRY(m_superblock_backups.push_back(i)); + + // We don't really care if this succeeds or not + (void)m_superblock_backups.shrink_to_fit(); + } + + for (uint32_t bg : m_superblock_backups) + { + read_block(superblock().first_data_block + superblock().blocks_per_group * bg, block_buffer.span()); + Ext2::Superblock& superblock_backup = *(Ext2::Superblock*)block_buffer.data(); + if (superblock_backup.magic != Ext2::Enum::SUPER_MAGIC) + derrorln("superblock backup at block {} is invalid ({4H})", bg, superblock_backup.magic); + } + } + return {}; } BAN::ErrorOr Ext2FS::initialize_root_inode() { - m_root_inode = TRY(Ext2Inode::create(*this, Ext2::Enum::ROOT_INO, "")); + m_root_inode = TRY(Ext2Inode::create(*this, Ext2::Enum::ROOT_INO)); + + //(void)locate_inode(2271); #if EXT2_DEBUG_PRINT dprintln("root inode:"); - dprintln(" created {}", ext2_root_inode().ctime); - dprintln(" modified {}", ext2_root_inode().mtime); - dprintln(" accessed {}", ext2_root_inode().atime); + dprintln(" created {}", root_inode()->ctime().tv_sec); + dprintln(" modified {}", root_inode()->mtime().tv_sec); + dprintln(" accessed {}", root_inode()->atime().tv_sec); #endif return {}; } @@ -563,107 +863,224 @@ namespace Kernel { ASSERT(ext2_inode.size == 0); - uint32_t block_size = this->block_size(); + if (m_superblock.free_inodes_count == 0) + return BAN::Error::from_errno(ENOSPC); + + const uint32_t block_size = this->block_size(); + BAN::Vector bgd_buffer; TRY(bgd_buffer.resize(block_size)); + BAN::Vector inode_bitmap; TRY(inode_bitmap.resize(block_size)); - uint32_t number_of_block_groups = BAN::Math::div_round_up(superblock().inodes_count, superblock().inodes_per_group); - for (uint32_t group = 0; group < number_of_block_groups; group++) - { - auto bgd_location = locate_block_group_descriptior(group); - read_block(bgd_location.block, bgd_buffer.span()); + uint32_t current_group = -1; + BlockLocation bgd_location {}; + Ext2::BlockGroupDescriptor* bgd = nullptr; - auto& bgd = *(Ext2::BlockGroupDescriptor*)(bgd_buffer.data() + bgd_location.offset); - if (bgd.free_inodes_count == 0) + for (uint32_t ino = superblock().first_ino; ino <= superblock().inodes_count; ino++) + { + const uint32_t ino_group = (ino - 1) / superblock().inodes_per_group; + const uint32_t ino_index = (ino - 1) % superblock().inodes_per_group; + + if (ino_group != current_group) + { + current_group = ino_group; + + bgd_location = locate_block_group_descriptior(current_group); + read_block(bgd_location.block, bgd_buffer.span()); + + bgd = (Ext2::BlockGroupDescriptor*)(bgd_buffer.data() + bgd_location.offset); + if (bgd->free_inodes_count == 0) + { + ino = (current_group + 1) * superblock().inodes_per_group; + continue; + } + + read_block(bgd->inode_bitmap, inode_bitmap.span()); + } + + const uint32_t ino_bitmap_byte = ino_index / 8; + const uint32_t ino_bitmap_bit = ino_index % 8; + if (inode_bitmap[ino_bitmap_byte] & (1 << ino_bitmap_bit)) continue; - read_block(bgd.inode_bitmap, inode_bitmap.span()); - for (uint32_t inode_offset = 0; inode_offset < superblock().inodes_per_group; inode_offset++) + inode_bitmap[ino_bitmap_byte] |= 1 << ino_bitmap_bit; + write_block(bgd->inode_bitmap, inode_bitmap.span()); + + bgd->free_inodes_count--; + write_block(bgd_location.block, bgd_buffer.span()); + + const uint32_t inode_table_offset = ino_index * superblock().inode_size; + const BlockLocation inode_location { - uint32_t byte = inode_offset / 8; - uint32_t bit = inode_offset % 8; - if ((inode_bitmap[byte] & (1 << bit)) == 0) - { - inode_bitmap[byte] |= (1 << bit); - write_block(bgd.inode_bitmap, inode_bitmap.span()); + .block = inode_table_offset / block_size + bgd->inode_table, + .offset = inode_table_offset % block_size + }; - bgd.free_inodes_count--; - write_block(bgd_location.block, bgd_buffer.span()); + // NOTE: we don't need inode bitmap anymore, so we can reuse it + auto& inode_buffer = inode_bitmap; - uint32_t inode_table_byte_offset = inode_offset * superblock().inode_size; - BlockLocation inode_location - { - .block = bgd.inode_table + inode_table_byte_offset / block_size, - .offset = inode_table_byte_offset % block_size - }; + read_block(inode_location.block, inode_buffer.span()); + memcpy(inode_buffer.data() + inode_location.offset, &ext2_inode, sizeof(Ext2::Inode)); + if (superblock().inode_size > sizeof(Ext2::Inode)) + memset(inode_buffer.data() + inode_location.offset + sizeof(Ext2::Inode), 0, superblock().inode_size - sizeof(Ext2::Inode)); + write_block(inode_location.block, inode_buffer.span()); - ASSERT(block_size - inode_location.offset >= sizeof(ext2_inode)); + m_superblock.free_inodes_count--; + sync_superblock(); - // NOTE: we don't need bgd_buffer anymore, so we can reuse it to avoid allocations - auto& inode_buffer = bgd_buffer; - read_block(inode_location.block, inode_buffer.span()); - memcpy(inode_buffer.data() + inode_location.offset, &ext2_inode, sizeof(ext2_inode)); - write_block(inode_location.block, inode_buffer.span()); - - return group * superblock().inodes_per_group + inode_offset + 1; - } - } + return ino; } - return BAN::Error::from_error_code(ErrorCode::Ext2_NoInodes); + derrorln("Corrupted file system. Superblock indicates free inodes but none were found."); + return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); } void Ext2FS::read_block(uint32_t block, BAN::Span buffer) { - uint32_t sector_size = m_partition.device().sector_size(); - uint32_t block_size = this->block_size(); - uint32_t sectors_per_block = block_size / sector_size; + const uint32_t sector_size = m_partition.device().sector_size(); + const uint32_t block_size = this->block_size(); + const uint32_t sectors_per_block = block_size / sector_size; + const uint32_t sectors_before = 2048 / sector_size; + ASSERT(block >= 2); ASSERT(buffer.size() >= block_size); - - MUST(m_partition.read_sectors(block * sectors_per_block, sectors_per_block, buffer.data())); + MUST(m_partition.read_sectors(sectors_before + (block - 2) * sectors_per_block, sectors_per_block, buffer.data())); } - void Ext2FS::write_block(uint32_t block, BAN::Span data) + void Ext2FS::write_block(uint32_t block, BAN::Span buffer) { - uint32_t sector_size = m_partition.device().sector_size(); - uint32_t block_size = this->block_size(); - uint32_t sectors_per_block = block_size / sector_size; + const uint32_t sector_size = m_partition.device().sector_size(); + const uint32_t block_size = this->block_size(); + const uint32_t sectors_per_block = block_size / sector_size; + const uint32_t sectors_before = 2048 / sector_size; - ASSERT(data.size() <= block_size); - MUST(m_partition.write_sectors(block * sectors_per_block, sectors_per_block, data.data())); + ASSERT(block >= 2); + ASSERT(buffer.size() >= block_size); + MUST(m_partition.write_sectors(sectors_before + (block - 2) * sectors_per_block, sectors_per_block, buffer.data())); } - BAN::ErrorOr Ext2FS::locate_inode(uint32_t inode_index) + void Ext2FS::sync_superblock() { - ASSERT(inode_index < superblock().inodes_count); + const uint32_t sector_size = m_partition.device().sector_size(); + ASSERT(1024 % sector_size == 0); + + const uint32_t superblock_bytes = + (m_superblock.rev_level == Ext2::Enum::GOOD_OLD_REV) + ? offsetof(Ext2::Superblock, __extension_start) + : sizeof(Ext2::Superblock); + + const uint32_t lba = 1024 / sector_size; + const uint32_t sector_count = BAN::Math::div_round_up(superblock_bytes, sector_size); + + BAN::Vector superblock_buffer; + MUST(superblock_buffer.resize(sector_count * sector_size)); + + MUST(m_partition.read_sectors(lba, sector_count, superblock_buffer.data())); + if (memcmp(superblock_buffer.data(), &m_superblock, superblock_bytes)) + { + memcpy(superblock_buffer.data(), &m_superblock, superblock_bytes); + MUST(m_partition.write_sectors(lba, sector_count, superblock_buffer.data())); + } + } + + BAN::ErrorOr Ext2FS::reserve_free_block(uint32_t primary_bgd) + { + if (m_superblock.r_blocks_count >= m_superblock.free_blocks_count) + return BAN::Error::from_errno(ENOSPC); + + const uint32_t block_size = this->block_size(); - uint32_t block_size = this->block_size(); BAN::Vector bgd_buffer; TRY(bgd_buffer.resize(block_size)); - uint32_t inode_block_group = (inode_index - 1) / superblock().inodes_per_group; - uint32_t local_inode_index = (inode_index - 1) % superblock().inodes_per_group; + BAN::Vector block_bitmap; + TRY(block_bitmap.resize(block_size)); - uint32_t inode_table_byte_offset = (local_inode_index * superblock().inode_size); + auto check_block_group = + [&](uint32_t block_group) -> uint32_t + { + auto bgd_location = locate_block_group_descriptior(block_group); + read_block(bgd_location.block, bgd_buffer.span()); + + auto& bgd = *(Ext2::BlockGroupDescriptor*)(bgd_buffer.data() + bgd_location.offset); + if (bgd.free_blocks_count == 0) + return 0; + + read_block(bgd.block_bitmap, block_bitmap.span()); + for (uint32_t block_offset = 0; block_offset < m_superblock.blocks_per_group; block_offset++) + { + uint32_t byte = block_offset / 8; + uint32_t bit = block_offset % 8; + if (block_bitmap[byte] & (1 << bit)) + continue; + + block_bitmap[byte] |= 1 << bit; + write_block(bgd.block_bitmap, block_bitmap.span()); + + bgd.free_blocks_count--; + write_block(bgd_location.block, bgd_buffer.span()); + + m_superblock.free_blocks_count--; + sync_superblock(); + + return m_superblock.first_data_block + m_superblock.blocks_per_group * block_group + block_offset; + } + + derrorln("Corrupted file system. Block group descriptor indicates free blocks but none were found"); + return 0; + }; + + if (auto ret = check_block_group(primary_bgd)) + return ret; + + uint32_t number_of_block_groups = BAN::Math::div_round_up(m_superblock.blocks_count, m_superblock.blocks_per_group); + for (uint32_t block_group = 0; block_group < number_of_block_groups; block_group++) + if (block_group != primary_bgd) + if (auto ret = check_block_group(block_group)) + return ret; + + derrorln("Corrupted file system. Superblock indicates free blocks but none were found."); + return BAN::Error::from_error_code(ErrorCode::Ext2_Corrupted); + } + + BAN::ErrorOr Ext2FS::locate_inode(uint32_t ino) + { + ASSERT(ino < superblock().inodes_count); + + const uint32_t block_size = this->block_size(); + + BAN::Vector bgd_buffer; + TRY(bgd_buffer.resize(block_size)); + + const uint32_t inode_group = (ino - 1) / superblock().inodes_per_group; + const uint32_t inode_index = (ino - 1) % superblock().inodes_per_group; + + auto bgd_location = locate_block_group_descriptior(inode_group); - auto bgd_location = locate_block_group_descriptior(inode_block_group); read_block(bgd_location.block, bgd_buffer.span()); + auto& bgd = *(Ext2::BlockGroupDescriptor*)(bgd_buffer.data() + bgd_location.offset); - BlockLocation location; - location.block = bgd.inode_table + inode_table_byte_offset / block_size; - location.offset = inode_table_byte_offset % block_size; + const uint32_t inode_byte_offset = inode_index * superblock().inode_size; + BlockLocation location + { + .block = inode_byte_offset / block_size + bgd.inode_table, + .offset = inode_byte_offset % block_size + }; #if VERIFY_INODE_EXISTANCE - // Note we reuse the bgd_buffer since it is not needed anymore - ASSERT(superblock().inodes_per_group <= block_size * 8); - read_block(bgd.inode_bitmap, bgd_buffer.span()); - uint32_t byte = local_inode_index / 8; - uint32_t bit = local_inode_index % 8; - ASSERT(bgd_buffer[byte] & (1 << bit)); + const uint32_t inode_bitmap_block = bgd.inode_bitmap; + + // NOTE: we can reuse the bgd_buffer since it is not needed anymore + auto& inode_bitmap = bgd_buffer; + + read_block(inode_bitmap_block, inode_bitmap.span()); + + const uint32_t byte = inode_index / 8; + const uint32_t bit = inode_index % 8; + ASSERT(inode_bitmap[byte] & (1 << bit)); #endif return location; @@ -671,18 +1088,20 @@ namespace Kernel Ext2FS::BlockLocation Ext2FS::locate_block_group_descriptior(uint32_t group_index) { - uint32_t block_size = this->block_size(); + const uint32_t block_size = this->block_size(); - uint32_t block_group_count = BAN::Math::div_round_up(superblock().inodes_count, superblock().inodes_per_group); + const uint32_t block_group_count = BAN::Math::div_round_up(superblock().inodes_count, superblock().inodes_per_group); ASSERT(group_index < block_group_count); - uint32_t bgd_byte_offset = sizeof(Ext2::BlockGroupDescriptor) * group_index; - uint32_t bgd_table_block = superblock().first_data_block + (bgd_byte_offset / block_size) + 1; + // Block Group Descriptor table is always after the superblock + // Superblock begins at byte 1024 and is exactly 1024 bytes wide + const uint32_t bgd_byte_offset = 2048 + sizeof(Ext2::BlockGroupDescriptor) * group_index; - BlockLocation location; - location.block = bgd_table_block + (bgd_byte_offset / block_size); - location.offset = (bgd_byte_offset % block_size); - return location; + return + { + .block = bgd_byte_offset / block_size, + .offset = bgd_byte_offset % block_size + }; } } \ No newline at end of file