Kernel: Fix PC Screen font parsing

I had misread the format and the parsing code was incorrect. I also
changed fonts to store unicode codepoints as 32 bit integers, so
every character can be represented
This commit is contained in:
Bananymous 2023-03-20 14:52:42 +02:00
parent 43f4657566
commit 22e45278a2
3 changed files with 81 additions and 61 deletions

View File

@ -6,23 +6,41 @@
namespace BAN
{
static constexpr uint16_t utf8_to_codepoint(uint8_t* bytes, size_t count)
namespace UTF8
{
if (count > 3)
return 0xFFFF;
for (size_t i = 1; i < count; i++)
if ((bytes[i] & 0xC0) != 0x80)
return 0xFFFF;
switch (count)
{
case 1: return bytes[0];
case 2: return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
case 3: return ((bytes[0] & 0x1F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
static constexpr uint32_t invalid = 0xFFFFFFFF;
}
return 0xFFFF;
static constexpr uint32_t utf8_byte_length(uint8_t first_byte)
{
if ((first_byte & 0x80) == 0x00)
return 1;
if ((first_byte & 0xE0) == 0xC0)
return 2;
if ((first_byte & 0xF0) == 0xE0)
return 3;
if ((first_byte & 0xF8) == 0xF0)
return 4;
return 0;
}
static constexpr uint32_t utf8_to_codepoint(uint8_t* bytes)
{
uint32_t length = utf8_byte_length(bytes[0]);
for (uint32_t i = 1; i < length; i++)
if ((bytes[i] & 0xC0) != 0x80)
return UTF8::invalid;
switch (length)
{
case 1: return ((bytes[0] & 0x80) != 0x00) ? UTF8::invalid : bytes[0];
case 2: return ((bytes[0] & 0xE0) != 0xC0) ? UTF8::invalid : ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
case 3: return ((bytes[0] & 0xF0) != 0xE0) ? UTF8::invalid : ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
case 4: return ((bytes[0] & 0xF8) != 0xF0) ? UTF8::invalid : ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
}
return UTF8::invalid;
}
}

View File

@ -17,15 +17,15 @@ namespace Kernel
uint32_t height() const { return m_height; }
uint32_t pitch() const { return m_pitch; }
bool has_glyph(uint16_t) const;
const uint8_t* glyph(uint16_t) const;
bool has_glyph(uint32_t) const;
const uint8_t* glyph(uint32_t) const;
private:
static BAN::ErrorOr<Font> parse_psf1(const BAN::Span<uint8_t>);
static BAN::ErrorOr<Font> parse_psf2(const BAN::Span<uint8_t>);
private:
BAN::HashMap<uint16_t, uint32_t> m_glyph_offsets;
BAN::HashMap<uint32_t, uint32_t> m_glyph_offsets;
BAN::Vector<uint8_t> m_glyph_data;
uint32_t m_width = 0;
uint32_t m_height = 0;

View File

@ -73,17 +73,16 @@ namespace Kernel
TRY(glyph_data.resize(glyph_data_size));
memcpy(glyph_data.data(), font_data.data() + sizeof(PSF1Header), glyph_data_size);
BAN::HashMap<uint16_t, uint32_t> glyph_offsets;
BAN::HashMap<uint32_t, uint32_t> glyph_offsets;
TRY(glyph_offsets.reserve(glyph_count));
bool unsupported_utf = false;
bool codepoint_redef = false;
bool codepoint_sequence = false;
if (header->magic & (PSF1_MODE_HASTAB | PSF1_MODE_SEQ))
{
uint32_t current_index = sizeof(PSF1Header) + glyph_data_size;
bool in_sequence = false;
uint32_t glyph_index = 0;
while (current_index < font_data.size())
{
@ -91,17 +90,16 @@ namespace Kernel
uint16_t hi = font_data[current_index + 1];
uint16_t codepoint = (hi << 8) | lo;
if (codepoint == 0xFFFF)
if (codepoint == 0xFFFE)
{
codepoint_sequence = true;
break;
}
else if (codepoint == 0xFFFF)
{
glyph_index++;
in_sequence = false;
}
else if (codepoint == 0xFFFE)
{
in_sequence = true;
unsupported_utf = true;
}
else if (!in_sequence)
else
{
if (glyph_offsets.contains(codepoint))
codepoint_redef = true;
@ -111,9 +109,6 @@ namespace Kernel
current_index += 2;
}
if (glyph_index != glyph_count)
return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs");
}
else
{
@ -121,10 +116,10 @@ namespace Kernel
TRY(glyph_offsets.insert(i, i * glyph_size));
}
if (unsupported_utf)
dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)");
if (codepoint_redef)
dwarnln("Font contsins multiple definitions for same codepoint(s)");
if (codepoint_sequence)
dwarnln("Font contains codepoint sequences (not supported)");
Font result;
result.m_glyph_offsets = BAN::move(glyph_offsets);
@ -171,11 +166,12 @@ namespace Kernel
TRY(glyph_data.resize(glyph_data_size));
memcpy(glyph_data.data(), font_data.data() + header.header_size, glyph_data_size);
BAN::HashMap<uint16_t, uint32_t> glyph_offsets;
BAN::HashMap<uint32_t, uint32_t> glyph_offsets;
TRY(glyph_offsets.reserve(400));
bool unsupported_utf = false;
bool invalid_utf = false;
bool codepoint_redef = false;
bool codepoint_sequence = false;
uint8_t bytes[4] {};
uint32_t byte_index = 0;
@ -186,32 +182,44 @@ namespace Kernel
{
uint8_t byte = font_data[i];
if ((byte >> 1) == 0x7F)
if (byte == 0xFE)
{
if (byte_index <= 4)
{
uint16_t codepoint = BAN::utf8_to_codepoint(bytes, byte_index);
if (codepoint == 0xFFFF)
unsupported_utf = true;
else if (glyph_offsets.contains(codepoint))
codepoint_redef = true;
else
TRY(glyph_offsets.insert(codepoint, glyph_index * header.glyph_size));
codepoint_sequence = true;
break;
}
else if (byte == 0xFF)
{
if (byte_index)
{
invalid_utf = true;
byte_index = 0;
if (byte == 0xFF)
}
glyph_index++;
}
else
{
if (byte_index < 4)
ASSERT(byte_index < 4);
bytes[byte_index++] = byte;
uint32_t len = BAN::utf8_byte_length(bytes[0]);
if (len == 0)
{
invalid_utf = true;
byte_index = 0;
}
else if (len == byte_index)
{
uint32_t codepoint = BAN::utf8_to_codepoint(bytes);
if (codepoint == BAN::UTF8::invalid)
invalid_utf = true;
else if (glyph_offsets.contains(codepoint))
codepoint_redef = true;
else
unsupported_utf = true;
TRY(glyph_offsets.insert(codepoint, glyph_index * header.glyph_size));
byte_index = 0;
}
}
}
if (glyph_index != header.glyph_count)
return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs");
}
else
{
@ -219,18 +227,12 @@ namespace Kernel
TRY(glyph_offsets.insert(i, i * header.glyph_size));
}
// Manually add space (empty) character if it is not present
if (!glyph_offsets.contains(' '))
{
TRY(glyph_data.resize(glyph_data_size + header.glyph_size));
memset(glyph_data.data() + glyph_data_size, 0, header.glyph_size);
TRY(glyph_offsets.insert(' ', glyph_data_size));
}
if (unsupported_utf)
dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)");
if (invalid_utf)
dwarnln("Font contains invalid UTF-8 codepoint(s)");
if (codepoint_redef)
dwarnln("Font contsins multiple definitions for same codepoint(s)");
if (codepoint_sequence)
dwarnln("Font contains codepoint sequences (not supported)");
Font result;
result.m_glyph_offsets = BAN::move(glyph_offsets);
@ -241,12 +243,12 @@ namespace Kernel
return result;
}
bool Font::has_glyph(uint16_t codepoint) const
bool Font::has_glyph(uint32_t codepoint) const
{
return m_glyph_offsets.contains(codepoint);
}
const uint8_t* Font::glyph(uint16_t codepoint) const
const uint8_t* Font::glyph(uint32_t codepoint) const
{
return m_glyph_data.data() + m_glyph_offsets[codepoint];
}