Kernel: Fix PC Screen font parsing

I had misread the format and the parsing code was incorrect. I also
changed fonts to store unicode codepoints as 32 bit integers, so
every character can be represented
This commit is contained in:
Bananymous 2023-03-20 14:52:42 +02:00
parent 43f4657566
commit 22e45278a2
3 changed files with 81 additions and 61 deletions

View File

@ -6,23 +6,41 @@
namespace BAN namespace BAN
{ {
static constexpr uint16_t utf8_to_codepoint(uint8_t* bytes, size_t count) namespace UTF8
{ {
if (count > 3) static constexpr uint32_t invalid = 0xFFFFFFFF;
return 0xFFFF;
for (size_t i = 1; i < count; i++)
if ((bytes[i] & 0xC0) != 0x80)
return 0xFFFF;
switch (count)
{
case 1: return bytes[0];
case 2: return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
case 3: return ((bytes[0] & 0x1F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
} }
return 0xFFFF; static constexpr uint32_t utf8_byte_length(uint8_t first_byte)
{
if ((first_byte & 0x80) == 0x00)
return 1;
if ((first_byte & 0xE0) == 0xC0)
return 2;
if ((first_byte & 0xF0) == 0xE0)
return 3;
if ((first_byte & 0xF8) == 0xF0)
return 4;
return 0;
}
static constexpr uint32_t utf8_to_codepoint(uint8_t* bytes)
{
uint32_t length = utf8_byte_length(bytes[0]);
for (uint32_t i = 1; i < length; i++)
if ((bytes[i] & 0xC0) != 0x80)
return UTF8::invalid;
switch (length)
{
case 1: return ((bytes[0] & 0x80) != 0x00) ? UTF8::invalid : bytes[0];
case 2: return ((bytes[0] & 0xE0) != 0xC0) ? UTF8::invalid : ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F);
case 3: return ((bytes[0] & 0xF0) != 0xE0) ? UTF8::invalid : ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
case 4: return ((bytes[0] & 0xF8) != 0xF0) ? UTF8::invalid : ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
}
return UTF8::invalid;
} }
} }

View File

@ -17,15 +17,15 @@ namespace Kernel
uint32_t height() const { return m_height; } uint32_t height() const { return m_height; }
uint32_t pitch() const { return m_pitch; } uint32_t pitch() const { return m_pitch; }
bool has_glyph(uint16_t) const; bool has_glyph(uint32_t) const;
const uint8_t* glyph(uint16_t) const; const uint8_t* glyph(uint32_t) const;
private: private:
static BAN::ErrorOr<Font> parse_psf1(const BAN::Span<uint8_t>); static BAN::ErrorOr<Font> parse_psf1(const BAN::Span<uint8_t>);
static BAN::ErrorOr<Font> parse_psf2(const BAN::Span<uint8_t>); static BAN::ErrorOr<Font> parse_psf2(const BAN::Span<uint8_t>);
private: private:
BAN::HashMap<uint16_t, uint32_t> m_glyph_offsets; BAN::HashMap<uint32_t, uint32_t> m_glyph_offsets;
BAN::Vector<uint8_t> m_glyph_data; BAN::Vector<uint8_t> m_glyph_data;
uint32_t m_width = 0; uint32_t m_width = 0;
uint32_t m_height = 0; uint32_t m_height = 0;

View File

@ -73,17 +73,16 @@ namespace Kernel
TRY(glyph_data.resize(glyph_data_size)); TRY(glyph_data.resize(glyph_data_size));
memcpy(glyph_data.data(), font_data.data() + sizeof(PSF1Header), glyph_data_size); memcpy(glyph_data.data(), font_data.data() + sizeof(PSF1Header), glyph_data_size);
BAN::HashMap<uint16_t, uint32_t> glyph_offsets; BAN::HashMap<uint32_t, uint32_t> glyph_offsets;
TRY(glyph_offsets.reserve(glyph_count)); TRY(glyph_offsets.reserve(glyph_count));
bool unsupported_utf = false;
bool codepoint_redef = false; bool codepoint_redef = false;
bool codepoint_sequence = false;
if (header->magic & (PSF1_MODE_HASTAB | PSF1_MODE_SEQ)) if (header->magic & (PSF1_MODE_HASTAB | PSF1_MODE_SEQ))
{ {
uint32_t current_index = sizeof(PSF1Header) + glyph_data_size; uint32_t current_index = sizeof(PSF1Header) + glyph_data_size;
bool in_sequence = false;
uint32_t glyph_index = 0; uint32_t glyph_index = 0;
while (current_index < font_data.size()) while (current_index < font_data.size())
{ {
@ -91,17 +90,16 @@ namespace Kernel
uint16_t hi = font_data[current_index + 1]; uint16_t hi = font_data[current_index + 1];
uint16_t codepoint = (hi << 8) | lo; uint16_t codepoint = (hi << 8) | lo;
if (codepoint == 0xFFFF) if (codepoint == 0xFFFE)
{
codepoint_sequence = true;
break;
}
else if (codepoint == 0xFFFF)
{ {
glyph_index++; glyph_index++;
in_sequence = false;
} }
else if (codepoint == 0xFFFE) else
{
in_sequence = true;
unsupported_utf = true;
}
else if (!in_sequence)
{ {
if (glyph_offsets.contains(codepoint)) if (glyph_offsets.contains(codepoint))
codepoint_redef = true; codepoint_redef = true;
@ -111,9 +109,6 @@ namespace Kernel
current_index += 2; current_index += 2;
} }
if (glyph_index != glyph_count)
return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs");
} }
else else
{ {
@ -121,10 +116,10 @@ namespace Kernel
TRY(glyph_offsets.insert(i, i * glyph_size)); TRY(glyph_offsets.insert(i, i * glyph_size));
} }
if (unsupported_utf)
dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)");
if (codepoint_redef) if (codepoint_redef)
dwarnln("Font contsins multiple definitions for same codepoint(s)"); dwarnln("Font contsins multiple definitions for same codepoint(s)");
if (codepoint_sequence)
dwarnln("Font contains codepoint sequences (not supported)");
Font result; Font result;
result.m_glyph_offsets = BAN::move(glyph_offsets); result.m_glyph_offsets = BAN::move(glyph_offsets);
@ -171,11 +166,12 @@ namespace Kernel
TRY(glyph_data.resize(glyph_data_size)); TRY(glyph_data.resize(glyph_data_size));
memcpy(glyph_data.data(), font_data.data() + header.header_size, glyph_data_size); memcpy(glyph_data.data(), font_data.data() + header.header_size, glyph_data_size);
BAN::HashMap<uint16_t, uint32_t> glyph_offsets; BAN::HashMap<uint32_t, uint32_t> glyph_offsets;
TRY(glyph_offsets.reserve(400)); TRY(glyph_offsets.reserve(400));
bool unsupported_utf = false; bool invalid_utf = false;
bool codepoint_redef = false; bool codepoint_redef = false;
bool codepoint_sequence = false;
uint8_t bytes[4] {}; uint8_t bytes[4] {};
uint32_t byte_index = 0; uint32_t byte_index = 0;
@ -186,32 +182,44 @@ namespace Kernel
{ {
uint8_t byte = font_data[i]; uint8_t byte = font_data[i];
if ((byte >> 1) == 0x7F) if (byte == 0xFE)
{ {
if (byte_index <= 4) codepoint_sequence = true;
{ break;
uint16_t codepoint = BAN::utf8_to_codepoint(bytes, byte_index);
if (codepoint == 0xFFFF)
unsupported_utf = true;
else if (glyph_offsets.contains(codepoint))
codepoint_redef = true;
else
TRY(glyph_offsets.insert(codepoint, glyph_index * header.glyph_size));
} }
else if (byte == 0xFF)
{
if (byte_index)
{
invalid_utf = true;
byte_index = 0; byte_index = 0;
if (byte == 0xFF) }
glyph_index++; glyph_index++;
} }
else else
{ {
if (byte_index < 4) ASSERT(byte_index < 4);
bytes[byte_index++] = byte; bytes[byte_index++] = byte;
uint32_t len = BAN::utf8_byte_length(bytes[0]);
if (len == 0)
{
invalid_utf = true;
byte_index = 0;
}
else if (len == byte_index)
{
uint32_t codepoint = BAN::utf8_to_codepoint(bytes);
if (codepoint == BAN::UTF8::invalid)
invalid_utf = true;
else if (glyph_offsets.contains(codepoint))
codepoint_redef = true;
else else
unsupported_utf = true; TRY(glyph_offsets.insert(codepoint, glyph_index * header.glyph_size));
byte_index = 0;
}
} }
} }
if (glyph_index != header.glyph_count)
return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs");
} }
else else
{ {
@ -219,18 +227,12 @@ namespace Kernel
TRY(glyph_offsets.insert(i, i * header.glyph_size)); TRY(glyph_offsets.insert(i, i * header.glyph_size));
} }
// Manually add space (empty) character if it is not present if (invalid_utf)
if (!glyph_offsets.contains(' ')) dwarnln("Font contains invalid UTF-8 codepoint(s)");
{
TRY(glyph_data.resize(glyph_data_size + header.glyph_size));
memset(glyph_data.data() + glyph_data_size, 0, header.glyph_size);
TRY(glyph_offsets.insert(' ', glyph_data_size));
}
if (unsupported_utf)
dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)");
if (codepoint_redef) if (codepoint_redef)
dwarnln("Font contsins multiple definitions for same codepoint(s)"); dwarnln("Font contsins multiple definitions for same codepoint(s)");
if (codepoint_sequence)
dwarnln("Font contains codepoint sequences (not supported)");
Font result; Font result;
result.m_glyph_offsets = BAN::move(glyph_offsets); result.m_glyph_offsets = BAN::move(glyph_offsets);
@ -241,12 +243,12 @@ namespace Kernel
return result; return result;
} }
bool Font::has_glyph(uint16_t codepoint) const bool Font::has_glyph(uint32_t codepoint) const
{ {
return m_glyph_offsets.contains(codepoint); return m_glyph_offsets.contains(codepoint);
} }
const uint8_t* Font::glyph(uint16_t codepoint) const const uint8_t* Font::glyph(uint32_t codepoint) const
{ {
return m_glyph_data.data() + m_glyph_offsets[codepoint]; return m_glyph_data.data() + m_glyph_offsets[codepoint];
} }