From c849293f3d4a4dfab240193538c2ec2689c2826a Mon Sep 17 00:00:00 2001 From: Bananymous Date: Mon, 13 Apr 2026 16:36:35 +0300 Subject: [PATCH] Kernel: Add support for loading gzip compressed initrd --- kernel/include/kernel/FS/USTARModule.h | 5 +- kernel/kernel/FS/USTARModule.cpp | 392 ++++++++++++++++++------- kernel/kernel/FS/VirtualFileSystem.cpp | 12 +- 3 files changed, 299 insertions(+), 110 deletions(-) diff --git a/kernel/include/kernel/FS/USTARModule.h b/kernel/include/kernel/FS/USTARModule.h index 4abb7f50..ac73e4de 100644 --- a/kernel/include/kernel/FS/USTARModule.h +++ b/kernel/include/kernel/FS/USTARModule.h @@ -1,12 +1,11 @@ #pragma once #include -#include +#include namespace Kernel { - bool is_ustar_boot_module(const BootModule&); - BAN::ErrorOr unpack_boot_module_into_filesystem(BAN::RefPtr, const BootModule&); + BAN::ErrorOr unpack_boot_module_into_directory(BAN::RefPtr, const BootModule&); } diff --git a/kernel/kernel/FS/USTARModule.cpp b/kernel/kernel/FS/USTARModule.cpp index 524f2d6c..a5be5103 100644 --- a/kernel/kernel/FS/USTARModule.cpp +++ b/kernel/kernel/FS/USTARModule.cpp @@ -1,94 +1,262 @@ #include #include +#include +#include #include namespace Kernel { - bool is_ustar_boot_module(const BootModule& module) + class DataSource { - if (module.start % PAGE_SIZE) + public: + DataSource() = default; + virtual ~DataSource() = default; + + size_t data_size() const { - dprintln("ignoring non-page-aligned module"); + return m_data_size; + } + + BAN::ConstByteSpan data() + { + return { m_data_buffer, m_data_size }; + } + + void pop_data(size_t size) + { + ASSERT(size <= m_data_size); + if (size > 0 && size < m_data_size) + memmove(m_data_buffer, m_data_buffer + size, m_data_size - size); + m_data_size -= size; + m_bytes_produced += size; + } + + virtual BAN::ErrorOr produce_data() = 0; + + uint64_t bytes_produced() const + { + return m_bytes_produced; + } + + virtual uint64_t bytes_consumed() const = 0; + + protected: + uint8_t m_data_buffer[4096]; + size_t m_data_size { 0 }; + + private: + uint64_t m_bytes_produced { 0 }; + }; + + class DataSourceRaw final : public DataSource + { + public: + DataSourceRaw(const BootModule& module) + : m_module(module) + { } + + BAN::ErrorOr produce_data() override + { + if (m_offset >= m_module.size || m_data_size >= sizeof(m_data_buffer)) + return false; + + while (m_offset < m_module.size && m_data_size < sizeof(m_data_buffer)) + { + const size_t to_copy = BAN::Math::min( + sizeof(m_data_buffer) - m_data_size, + PAGE_SIZE - (m_offset % PAGE_SIZE) + ); + PageTable::with_fast_page((m_module.start + m_offset) & PAGE_ADDR_MASK, [&] { + memcpy(m_data_buffer + m_data_size, PageTable::fast_page_as_ptr(m_offset % PAGE_SIZE), to_copy); + }); + m_data_size += to_copy; + m_offset += to_copy; + } + + return true; + } + + uint64_t bytes_consumed() const override + { + return bytes_produced(); + } + + private: + const BootModule& m_module; + size_t m_offset { 0 }; + }; + + class DataSourceGZip final : public DataSource + { + public: + DataSourceGZip(BAN::UniqPtr&& data_source) + : m_data_source(BAN::move(data_source)) + , m_decompressor(LibDEFLATE::StreamType::GZip) + { } + + BAN::ErrorOr produce_data() override + { + if (m_is_done) + return false; + + bool did_produce_data { false }; + for (;;) + { + TRY(m_data_source->produce_data()); + + size_t input_consumed, output_produced; + const auto status = TRY(m_decompressor.decompress( + m_data_source->data(), + input_consumed, + { m_data_buffer + m_data_size, sizeof(m_data_buffer) - m_data_size }, + output_produced + )); + + m_data_source->pop_data(input_consumed); + m_data_size += output_produced; + + if (output_produced) + did_produce_data = true; + + switch (status) + { + using DecompStatus = LibDEFLATE::Decompressor::Status; + case DecompStatus::Done: + m_is_done = true; + return did_produce_data; + case DecompStatus::NeedMoreInput: + break; + case DecompStatus::NeedMoreOutput: + return did_produce_data; + } + } + } + + uint64_t bytes_consumed() const override + { + return m_data_source->bytes_consumed(); + } + + private: + BAN::UniqPtr m_data_source; + LibDEFLATE::Decompressor m_decompressor; + bool m_is_done { false }; + }; + + static BAN::ErrorOr unpack_boot_module_into_directory(BAN::RefPtr root_inode, DataSource& data_source); + + BAN::ErrorOr unpack_boot_module_into_directory(BAN::RefPtr root_inode, const BootModule& module) + { + ASSERT(root_inode->mode().ifdir()); + + BAN::UniqPtr data_source = TRY(BAN::UniqPtr::create(module)); + + bool is_compressed = false; + + TRY(data_source->produce_data()); + if (data_source->data_size() >= 2 && memcmp(&data_source->data()[0], "\x1F\x8B", 2) == 0) + { + data_source = TRY(BAN::UniqPtr::create(BAN::move(data_source))); + is_compressed = true; + } + + TRY(data_source->produce_data()); + if (data_source->data_size() < 512 || memcmp(&data_source->data()[257], "ustar", 5) != 0) + { + dwarnln("Unrecognized initrd format"); return false; } - if (module.size < 512) - return false; + const auto module_size_kib = module.size / 1024; + dprintln("unpacking {}.{3} MiB{} initrd", + module_size_kib / 1024, (module_size_kib % 1024) * 1000 / 1024, + is_compressed ? " compressed" : "" + ); - bool has_ustar_signature; - PageTable::with_fast_page(module.start, [&] { - has_ustar_signature = memcmp(PageTable::fast_page_as_ptr(257), "ustar", 5) == 0; - }); + const auto unpack_ms1 = SystemTimer::get().ms_since_boot(); + TRY(unpack_boot_module_into_directory(root_inode, *data_source)); + const auto unpack_ms2 = SystemTimer::get().ms_since_boot(); - return has_ustar_signature; + const auto duration_ms = unpack_ms2 - unpack_ms1; + dprintln("unpacking {}.{3} MiB{} initrd took {}.{3} s", + module_size_kib / 1024, (module_size_kib % 1024) * 1000 / 1024, + is_compressed ? " compressed" : "", + duration_ms / 1000, duration_ms % 1000 + ); + + if (is_compressed) + { + const auto uncompressed_kib = data_source->bytes_produced() / 1024; + dprintln("uncompressed size {}.{3} MiB", + uncompressed_kib / 1024, (uncompressed_kib % 1024) * 1000 / 1024 + ); + } + + return true; } - BAN::ErrorOr unpack_boot_module_into_filesystem(BAN::RefPtr filesystem, const BootModule& module) + BAN::ErrorOr unpack_boot_module_into_directory(BAN::RefPtr root_inode, DataSource& data_source) { - ASSERT(is_ustar_boot_module(module)); - - auto root_inode = filesystem->root_inode(); - - uint8_t* temp_page = static_cast(kmalloc(PAGE_SIZE)); - if (temp_page == nullptr) - return BAN::Error::from_errno(ENOMEM); - BAN::ScopeGuard _([temp_page] { kfree(temp_page); }); - BAN::String next_file_name; BAN::String next_link_name; - size_t offset = 0; - while (offset + 512 <= module.size) + constexpr uint32_t print_interval_ms = 1000; + auto next_print_ms = SystemTimer::get().ms_since_boot() + print_interval_ms; + + while (TRY(data_source.produce_data()), data_source.data_size() >= 512) { - size_t file_size = 0; - mode_t file_mode = 0; - uid_t file_uid = 0; - gid_t file_gid = 0; - uint8_t file_type = 0; - char file_path[100 + 1 + 155 + 1] {}; - - PageTable::with_fast_page((module.start + offset) & PAGE_ADDR_MASK, [&] { - const size_t page_off = offset % PAGE_SIZE; - - const auto parse_octal = - [page_off](size_t offset, size_t length) -> size_t - { - size_t result = 0; - for (size_t i = 0; i < length; i++) - { - const char ch = PageTable::fast_page_as(page_off + offset + i); - if (ch == '\0') - break; - result = (result * 8) + (ch - '0'); - } - return result; - }; - - if (memcmp(PageTable::fast_page_as_ptr(page_off + 257), "ustar", 5)) { - file_size = SIZE_MAX; - return; + if (SystemTimer::get().ms_since_boot() >= next_print_ms) + { + const auto kib_consumed = data_source.bytes_consumed() / 1024; + const auto kib_produced = data_source.bytes_produced() / 1024; + if (kib_consumed == kib_produced) + { + dprintln(" ... {}.{3} MiB", + kib_consumed / 1024, (kib_consumed % 1024) * 1000 / 1024 + ); } + else + { + dprintln(" ... {}.{3} MiB ({}.{3} MiB)", + kib_consumed / 1024, (kib_consumed % 1024) * 1000 / 1024, + kib_produced / 1024, (kib_produced % 1024) * 1000 / 1024 + ); + } + next_print_ms = SystemTimer::get().ms_since_boot() + print_interval_ms; + } - memcpy(file_path, PageTable::fast_page_as_ptr(page_off + 345), 155); - const size_t prefix_len = strlen(file_path); - file_path[prefix_len] = '/'; - memcpy(file_path + prefix_len + 1, PageTable::fast_page_as_ptr(page_off), 100); + const auto parse_octal = + [&data_source](size_t offset, size_t length) -> size_t + { + size_t result = 0; + for (size_t i = 0; i < length; i++) + { + const char ch = data_source.data()[offset + i]; + if (ch == '\0') + break; + result = (result * 8) + (ch - '0'); + } + return result; + }; - file_mode = parse_octal(100, 8); - file_uid = parse_octal(108, 8); - file_gid = parse_octal(116, 8); - file_size = parse_octal(124, 12); - file_type = PageTable::fast_page_as(page_off + 156); - }); - - if (file_size == SIZE_MAX) - break; - if (offset + 512 + file_size > module.size) + if (memcmp(&data_source.data()[257], "ustar", 5) != 0) break; - auto parent_inode = filesystem->root_inode(); + char file_path[100 + 1 + 155 + 1]; + memcpy(file_path, &data_source.data()[345], 155); + const size_t prefix_len = strlen(file_path); + file_path[prefix_len] = '/'; + memcpy(file_path + prefix_len + 1, &data_source.data()[0], 100); + + mode_t file_mode = parse_octal(100, 8); + const uid_t file_uid = parse_octal(108, 8); + const gid_t file_gid = parse_octal(116, 8); + const size_t file_size = parse_octal(124, 12); + const uint8_t file_type = data_source.data()[156]; + + auto parent_inode = root_inode; auto file_path_parts = TRY(BAN::StringView(next_file_name.empty() ? file_path : next_file_name.sv()).split('/')); for (size_t i = 0; i < file_path_parts.size() - 1; i++) @@ -111,27 +279,33 @@ namespace Kernel auto file_name_sv = file_path_parts.back(); + bool did_consume_data = false; + if (file_type == 'L' || file_type == 'K') { - auto& target = (file_type == 'L') ? next_file_name : next_link_name; - TRY(target.resize(file_size)); + auto& target_str = (file_type == 'L') ? next_file_name : next_link_name; + TRY(target_str.resize(file_size)); + + data_source.pop_data(512); size_t nwritten = 0; while (nwritten < file_size) { - const paddr_t paddr = module.start + offset + 512 + nwritten; - PageTable::with_fast_page(paddr & PAGE_ADDR_MASK, [&] { - memcpy(temp_page, PageTable::fast_page_as_ptr(), PAGE_SIZE); - }); + TRY(data_source.produce_data()); + if (data_source.data_size() == 0) + return {}; - const size_t page_off = paddr % PAGE_SIZE; - const size_t to_write = BAN::Math::min(file_size - nwritten, PAGE_SIZE - page_off); - memcpy(target.data() + nwritten, temp_page + page_off, to_write); - nwritten += to_write; + const size_t to_copy = BAN::Math::min(data_source.data_size(), file_size - nwritten); + memcpy(target_str.data() + nwritten, data_source.data().data(), to_copy); + nwritten += to_copy; + + data_source.pop_data(to_copy); } - while (!target.empty() && target.back() == '\0') - target.pop_back(); + did_consume_data = true; + + while (!target_str.empty() && target_str.back() == '\0') + target_str.pop_back(); } else if (file_type == DIRTYPE) { @@ -149,14 +323,11 @@ namespace Kernel link_name = next_link_name.sv(); else { - const paddr_t paddr = module.start + offset; - PageTable::with_fast_page(paddr & PAGE_ADDR_MASK, [&] { - memcpy(link_buffer, PageTable::fast_page_as_ptr((paddr % PAGE_SIZE) + 157), 100); - }); + memcpy(link_buffer, &data_source.data()[157], 100); link_name = link_buffer; } - auto target_inode = filesystem->root_inode(); + auto target_inode = root_inode; auto link_path_parts = TRY(link_name.split('/')); for (const auto part : link_path_parts) @@ -188,10 +359,7 @@ namespace Kernel link_name = next_link_name.sv(); else { - const paddr_t paddr = module.start + offset; - PageTable::with_fast_page(paddr & PAGE_ADDR_MASK, [&] { - memcpy(link_buffer, PageTable::fast_page_as_ptr((paddr % PAGE_SIZE) + 157), 100); - }); + memcpy(link_buffer, &data_source.data()[157], 100); link_name = link_buffer; } @@ -203,26 +371,26 @@ namespace Kernel { if (auto ret = parent_inode->create_file(file_name_sv, file_mode, file_uid, file_gid); ret.is_error()) dwarnln("failed to create file '{}': {}", file_name_sv, ret.error()); - else + else if (file_size) { - if (file_size) + auto inode = TRY(parent_inode->find_inode(file_name_sv)); + + data_source.pop_data(512); + + size_t nwritten = 0; + while (nwritten < file_size) { - auto inode = TRY(parent_inode->find_inode(file_name_sv)); + TRY(data_source.produce_data()); + ASSERT(data_source.data_size() > 0); // what to do? - size_t nwritten = 0; - while (nwritten < file_size) - { - const paddr_t paddr = module.start + offset + 512 + nwritten; - PageTable::with_fast_page(paddr & PAGE_ADDR_MASK, [&] { - memcpy(temp_page, PageTable::fast_page_as_ptr(), PAGE_SIZE); - }); + const size_t to_write = BAN::Math::min(file_size - nwritten, data_source.data_size()); + TRY(inode->write(nwritten, data_source.data().slice(0, to_write))); + nwritten += to_write; - const size_t page_off = paddr % PAGE_SIZE; - const size_t to_write = BAN::Math::min(file_size - nwritten, PAGE_SIZE - page_off); - TRY(inode->write(nwritten, { temp_page + page_off, to_write })); - nwritten += to_write; - } + data_source.pop_data(to_write); } + + did_consume_data = true; } } @@ -232,9 +400,27 @@ namespace Kernel next_link_name.clear(); } - offset += 512 + file_size; - if (auto rem = offset % 512) - offset += 512 - rem; + if (!did_consume_data) + { + data_source.pop_data(512); + + size_t consumed = 0; + while (consumed < file_size) + { + TRY(data_source.produce_data()); + if (data_source.data_size() == 0) + return {}; + data_source.pop_data(BAN::Math::min(file_size - consumed, data_source.data_size())); + } + } + + if (const auto rem = file_size % 512) + { + TRY(data_source.produce_data()); + if (data_source.data_size() < rem) + return {}; + data_source.pop_data(512 - rem); + } } return {}; diff --git a/kernel/kernel/FS/VirtualFileSystem.cpp b/kernel/kernel/FS/VirtualFileSystem.cpp index 682d90a9..cbdc813b 100644 --- a/kernel/kernel/FS/VirtualFileSystem.cpp +++ b/kernel/kernel/FS/VirtualFileSystem.cpp @@ -61,18 +61,22 @@ namespace Kernel if (filesystem_or_error.is_error()) panic("Failed to create fallback filesystem: {}", filesystem_or_error.error()); - dprintln("Loading fallback filesystem from {} modules", g_boot_info.modules.size()); + dprintln("Trying to load fallback filesystem from {} modules", g_boot_info.modules.size()); auto filesystem = BAN::RefPtr::adopt(filesystem_or_error.release_value()); + bool loaded_initrd = false; for (const auto& module : g_boot_info.modules) { - if (!is_ustar_boot_module(module)) - continue; - if (auto ret = unpack_boot_module_into_filesystem(filesystem, module); ret.is_error()) + if (auto ret = unpack_boot_module_into_directory(filesystem->root_inode(), module); ret.is_error()) dwarnln("Failed to unpack boot module: {}", ret.error()); + else + loaded_initrd |= ret.value(); } + if (!loaded_initrd) + panic("Could not load initrd from any boot module :("); + return filesystem; }