LibDEFLATE: Optimize decompression
Instead of calculating bit-by-bit crc32, we now calculate a lookup table during compile time. The old crc32 calculation was taking almost 50% of the decompression time. Also handle multiple symbols at once without outputting to user. It is much more efficient to output many bytes instead of the up to 258 that a single symbol can decode to :^)
This commit is contained in:
@@ -22,6 +22,26 @@ namespace LibDEFLATE
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct crc32_table_t
|
||||||
|
{
|
||||||
|
consteval crc32_table_t()
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
uint32_t crc32 = i;
|
||||||
|
for (size_t j = 0; j < 8; j++) {
|
||||||
|
if (crc32 & 1)
|
||||||
|
crc32 = (crc32 >> 1) ^ 0xEDB88320;
|
||||||
|
else
|
||||||
|
crc32 >>= 1;
|
||||||
|
}
|
||||||
|
table[i] = crc32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t table[256];
|
||||||
|
};
|
||||||
|
static constexpr crc32_table_t s_crc32_table;
|
||||||
|
|
||||||
BAN::ErrorOr<uint16_t> Decompressor::read_symbol(const HuffmanTree& tree)
|
BAN::ErrorOr<uint16_t> Decompressor::read_symbol(const HuffmanTree& tree)
|
||||||
{
|
{
|
||||||
const uint8_t instant_bits = tree.instant_bits();
|
const uint8_t instant_bits = tree.instant_bits();
|
||||||
@@ -177,7 +197,7 @@ namespace LibDEFLATE
|
|||||||
m_stream.skip_to_byte_boundary();
|
m_stream.skip_to_byte_boundary();
|
||||||
|
|
||||||
auto& gzip = m_stream_info.gzip;
|
auto& gzip = m_stream_info.gzip;
|
||||||
gzip.crc32 = ~gzip.crc32;
|
gzip.crc32 ^= 0xFFFFFFFF;
|
||||||
|
|
||||||
const uint32_t crc32 =
|
const uint32_t crc32 =
|
||||||
static_cast<uint32_t>(TRY(m_stream.take_bits(16))) |
|
static_cast<uint32_t>(TRY(m_stream.take_bits(16))) |
|
||||||
@@ -370,16 +390,7 @@ namespace LibDEFLATE
|
|||||||
auto& gzip = m_stream_info.gzip;
|
auto& gzip = m_stream_info.gzip;
|
||||||
gzip.isize += to_write;
|
gzip.isize += to_write;
|
||||||
for (size_t i = 0; i < to_write; i++)
|
for (size_t i = 0; i < to_write; i++)
|
||||||
{
|
gzip.crc32 = (gzip.crc32 >> 8) ^ s_crc32_table.table[(gzip.crc32 ^ output[i]) & 0xFF];
|
||||||
gzip.crc32 ^= output[i];
|
|
||||||
|
|
||||||
for (size_t j = 0; j < 8; j++) {
|
|
||||||
if (gzip.crc32 & 1)
|
|
||||||
gzip.crc32 = (gzip.crc32 >> 1) ^ 0xEDB88320;
|
|
||||||
else
|
|
||||||
gzip.crc32 >>= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -515,7 +526,7 @@ namespace LibDEFLATE
|
|||||||
bool need_more_input = false;
|
bool need_more_input = false;
|
||||||
bool restore_saved_stream = false;
|
bool restore_saved_stream = false;
|
||||||
|
|
||||||
const auto saved_stream = m_stream;
|
auto saved_stream = m_stream;
|
||||||
|
|
||||||
switch (m_state)
|
switch (m_state)
|
||||||
{
|
{
|
||||||
@@ -645,12 +656,17 @@ namespace LibDEFLATE
|
|||||||
}
|
}
|
||||||
case State::Symbol:
|
case State::Symbol:
|
||||||
{
|
{
|
||||||
|
while (m_produced_bytes + 258 < total_window_size && m_state == State::Symbol)
|
||||||
|
{
|
||||||
|
saved_stream = m_stream;
|
||||||
if (auto ret = handle_symbol(); ret.is_error())
|
if (auto ret = handle_symbol(); ret.is_error())
|
||||||
{
|
{
|
||||||
if (ret.error().get_error_code() != ENOBUFS)
|
if (ret.error().get_error_code() != ENOBUFS)
|
||||||
return ret.release_error();
|
return ret.release_error();
|
||||||
need_more_input = true;
|
need_more_input = true;
|
||||||
restore_saved_stream = true;
|
restore_saved_stream = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user