userspace: Add LibDEFLATE

This can be used to compress and decompress DEFLATE data either in raw
or zlib format
This commit is contained in:
2025-10-26 22:25:11 +02:00
parent 9f0addbd8b
commit fecda6a034
11 changed files with 1385 additions and 0 deletions

View File

@@ -0,0 +1,118 @@
#pragma once
#include <BAN/Vector.h>
#include <BAN/ByteSpan.h>
namespace LibDEFLATE
{
class BitInputStream
{
public:
BitInputStream(BAN::ConstByteSpan data)
: m_data(data)
{ }
BAN::ErrorOr<uint16_t> peek_bits(size_t count)
{
ASSERT(count <= 16);
while (m_bit_buffer_len < count)
{
if (m_data.empty())
return BAN::Error::from_errno(ENOBUFS);
m_bit_buffer |= m_data[0] << m_bit_buffer_len;
m_bit_buffer_len += 8;
m_data = m_data.slice(1);
}
return m_bit_buffer & ((1 << count) - 1);
}
BAN::ErrorOr<uint16_t> take_bits(size_t count)
{
const uint16_t result = TRY(peek_bits(count));
m_bit_buffer >>= count;
m_bit_buffer_len -= count;
return result;
}
BAN::ErrorOr<void> take_byte_aligned(uint8_t* output, size_t bytes)
{
ASSERT(m_bit_buffer % 8 == 0);
while (m_bit_buffer_len && bytes)
{
*output++ = m_bit_buffer;
m_bit_buffer >>= 8;
m_bit_buffer_len -= 8;
bytes--;
}
if (bytes > m_data.size())
return BAN::Error::from_errno(EINVAL);
memcpy(output, m_data.data(), bytes);
m_data = m_data.slice(bytes);
return {};
}
void skip_to_byte_boundary()
{
const size_t bits_to_remove = m_bit_buffer_len % 8;
m_bit_buffer >>= bits_to_remove;
m_bit_buffer_len -= bits_to_remove;
}
private:
BAN::ConstByteSpan m_data;
uint32_t m_bit_buffer { 0 };
uint8_t m_bit_buffer_len { 0 };
};
class BitOutputStream
{
public:
BAN::ErrorOr<void> write_bits(uint16_t value, size_t count)
{
ASSERT(m_bit_buffer_len < 8);
ASSERT(count <= 16);
const uint16_t mask = (1 << count) - 1;
m_bit_buffer |= (value & mask) << m_bit_buffer_len;
m_bit_buffer_len += count;
while (m_bit_buffer_len >= 8)
{
TRY(m_data.push_back(m_bit_buffer));
m_bit_buffer >>= 8;
m_bit_buffer_len -= 8;
}
return {};
}
BAN::ErrorOr<void> pad_to_byte_boundary()
{
ASSERT(m_bit_buffer_len < 8);
if (m_bit_buffer_len == 0)
return {};
TRY(m_data.push_back(m_bit_buffer));
m_bit_buffer = 0;
m_bit_buffer_len = 0;
return {};
}
BAN::Vector<uint8_t> take_buffer()
{
ASSERT(m_bit_buffer_len == 0);
return BAN::move(m_data);
}
private:
BAN::Vector<uint8_t> m_data;
uint32_t m_bit_buffer { 0 };
uint8_t m_bit_buffer_len { 0 };
};
}

View File

@@ -0,0 +1,67 @@
#pragma once
#include <BAN/ByteSpan.h>
#include <BAN/HashMap.h>
#include <BAN/LinkedList.h>
#include <BAN/NoCopyMove.h>
#include <BAN/Vector.h>
#include <LibDEFLATE/BitStream.h>
#include <LibDEFLATE/StreamType.h>
namespace LibDEFLATE
{
class Compressor
{
BAN_NON_COPYABLE(Compressor);
BAN_NON_MOVABLE(Compressor);
public:
using HashChain = BAN::LinkedList<BAN::ConstByteSpan>;
struct LZ77Entry
{
enum class Type
{
Literal,
DistLength,
} type;
union
{
uint8_t literal;
struct
{
uint16_t length;
uint16_t distance;
} dist_length;
} as;
};
public:
Compressor(BAN::ConstByteSpan data, StreamType type)
: m_type(type)
, m_data(data)
{ }
BAN::ErrorOr<BAN::Vector<uint8_t>> compress();
private:
BAN::ErrorOr<void> compress_block(BAN::ConstByteSpan, bool final);
uint32_t get_hash_key(BAN::ConstByteSpan needle) const;
BAN::ErrorOr<void> update_hash_chain(size_t count);
BAN::ErrorOr<LZ77Entry> find_longest_match(BAN::ConstByteSpan needle) const;
BAN::ErrorOr<BAN::Vector<LZ77Entry>> lz77_compress(BAN::ConstByteSpan data);
private:
const StreamType m_type;
BAN::ConstByteSpan m_data;
BitOutputStream m_stream;
size_t m_hash_chain_index { 0 };
BAN::HashMap<uint32_t, HashChain> m_hash_chain;
};
}

View File

@@ -0,0 +1,46 @@
#pragma once
#include <BAN/ByteSpan.h>
#include <BAN/NoCopyMove.h>
#include <BAN/Vector.h>
#include <LibDEFLATE/BitStream.h>
#include <LibDEFLATE/HuffmanTree.h>
#include <LibDEFLATE/StreamType.h>
namespace LibDEFLATE
{
class Decompressor
{
BAN_NON_COPYABLE(Decompressor);
BAN_NON_MOVABLE(Decompressor);
public:
Decompressor(BAN::ConstByteSpan data, StreamType type)
: m_type(type)
, m_stream(data)
{ }
BAN::ErrorOr<BAN::Vector<uint8_t>> decompress();
private:
BAN::ErrorOr<uint16_t> read_symbol(const HuffmanTree& tree);
BAN::ErrorOr<void> inflate_block(const HuffmanTree& length_tree, const HuffmanTree& distance_tree);
BAN::ErrorOr<void> decompress_type0();
BAN::ErrorOr<void> decompress_type1();
BAN::ErrorOr<void> decompress_type2();
BAN::ErrorOr<void> handle_header();
BAN::ErrorOr<void> handle_footer();
private:
const StreamType m_type;
BitInputStream m_stream;
BAN::Vector<uint8_t> m_output;
BAN::Optional<HuffmanTree> m_fixed_tree;
};
}

View File

@@ -0,0 +1,61 @@
#pragma once
#include <BAN/Array.h>
#include <BAN/NoCopyMove.h>
#include <BAN/Optional.h>
#include <BAN/Vector.h>
namespace LibDEFLATE
{
class HuffmanTree
{
BAN_NON_COPYABLE(HuffmanTree);
public:
static constexpr uint8_t MAX_BITS = 15;
struct Leaf
{
uint16_t code;
uint8_t len;
};
struct Instant
{
uint16_t symbol;
uint8_t len;
};
HuffmanTree() {}
HuffmanTree(HuffmanTree&& other) { *this = BAN::move(other); }
HuffmanTree& operator=(HuffmanTree&& other);
static BAN::ErrorOr<HuffmanTree> create(BAN::Span<const uint8_t> bit_lengths);
static BAN::ErrorOr<HuffmanTree> fixed_tree();
BAN::Optional<Instant> get_symbol_instant(uint16_t code) const;
BAN::Optional<uint16_t> get_symbol(uint16_t code, uint8_t len) const;
uint8_t instant_bits() const { return m_instant_bits; }
uint8_t min_bits() const { return m_min_bits; }
uint8_t max_bits() const { return m_max_bits; }
bool empty() const { return m_min_bits == 0; }
private:
BAN::ErrorOr<void> initialize(BAN::Span<const uint8_t> bit_lengths);
BAN::ErrorOr<void> build_instant_table(BAN::Span<const Leaf> tree);
BAN::ErrorOr<void> build_slow_table(BAN::Span<const Leaf> tree);
private:
uint8_t m_instant_bits { 0 };
uint8_t m_min_bits { 0 };
uint8_t m_max_bits { 0 };
BAN::Vector<Instant> m_instant;
BAN::Array<uint16_t, MAX_BITS + 1> m_min_code;
BAN::Vector<BAN::Vector<uint16_t>> m_slow_table;
};
}

View File

@@ -0,0 +1,12 @@
#pragma once
namespace LibDEFLATE
{
enum class StreamType
{
Raw,
Zlib,
};
}

View File

@@ -0,0 +1,30 @@
#pragma once
#include <BAN/ByteSpan.h>
namespace LibDEFLATE
{
inline uint32_t calculate_adler32(BAN::ConstByteSpan data)
{
uint32_t s1 = 1;
uint32_t s2 = 0;
for (size_t i = 0; i < data.size(); i++)
{
s1 = (s1 + data[i]) % 65521;
s2 = (s2 + s1) % 65521;
}
return (s2 << 16) | s1;
}
inline constexpr uint16_t reverse_bits(uint16_t value, size_t count)
{
uint16_t reverse = 0;
for (uint8_t bit = 0; bit < count; bit++)
reverse |= ((value >> bit) & 1) << (count - bit - 1);
return reverse;
}
}