From 22e45278a2da93e3480840496acd0a919600db29 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Mon, 20 Mar 2023 14:52:42 +0200 Subject: [PATCH] Kernel: Fix PC Screen font parsing I had misread the format and the parsing code was incorrect. I also changed fonts to store unicode codepoints as 32 bit integers, so every character can be represented --- BAN/include/BAN/UTF8.h | 40 ++++++++++----- kernel/include/kernel/Font.h | 6 +-- kernel/kernel/Font.cpp | 96 ++++++++++++++++++------------------ 3 files changed, 81 insertions(+), 61 deletions(-) diff --git a/BAN/include/BAN/UTF8.h b/BAN/include/BAN/UTF8.h index b122a68b4..d712a3954 100644 --- a/BAN/include/BAN/UTF8.h +++ b/BAN/include/BAN/UTF8.h @@ -6,23 +6,41 @@ namespace BAN { - static constexpr uint16_t utf8_to_codepoint(uint8_t* bytes, size_t count) + namespace UTF8 { - if (count > 3) - return 0xFFFF; + static constexpr uint32_t invalid = 0xFFFFFFFF; + } - for (size_t i = 1; i < count; i++) + static constexpr uint32_t utf8_byte_length(uint8_t first_byte) + { + if ((first_byte & 0x80) == 0x00) + return 1; + if ((first_byte & 0xE0) == 0xC0) + return 2; + if ((first_byte & 0xF0) == 0xE0) + return 3; + if ((first_byte & 0xF8) == 0xF0) + return 4; + return 0; + } + + static constexpr uint32_t utf8_to_codepoint(uint8_t* bytes) + { + uint32_t length = utf8_byte_length(bytes[0]); + + for (uint32_t i = 1; i < length; i++) if ((bytes[i] & 0xC0) != 0x80) - return 0xFFFF; - - switch (count) + return UTF8::invalid; + + switch (length) { - case 1: return bytes[0]; - case 2: return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); - case 3: return ((bytes[0] & 0x1F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F); + case 1: return ((bytes[0] & 0x80) != 0x00) ? UTF8::invalid : bytes[0]; + case 2: return ((bytes[0] & 0xE0) != 0xC0) ? UTF8::invalid : ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); + case 3: return ((bytes[0] & 0xF0) != 0xE0) ? UTF8::invalid : ((bytes[0] & 0x0F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F); + case 4: return ((bytes[0] & 0xF8) != 0xF0) ? UTF8::invalid : ((bytes[0] & 0x07) << 18) | ((bytes[1] & 0x3F) << 12) | ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F); } - return 0xFFFF; + return UTF8::invalid; } } \ No newline at end of file diff --git a/kernel/include/kernel/Font.h b/kernel/include/kernel/Font.h index 89470c774..bb10264be 100644 --- a/kernel/include/kernel/Font.h +++ b/kernel/include/kernel/Font.h @@ -17,15 +17,15 @@ namespace Kernel uint32_t height() const { return m_height; } uint32_t pitch() const { return m_pitch; } - bool has_glyph(uint16_t) const; - const uint8_t* glyph(uint16_t) const; + bool has_glyph(uint32_t) const; + const uint8_t* glyph(uint32_t) const; private: static BAN::ErrorOr parse_psf1(const BAN::Span); static BAN::ErrorOr parse_psf2(const BAN::Span); private: - BAN::HashMap m_glyph_offsets; + BAN::HashMap m_glyph_offsets; BAN::Vector m_glyph_data; uint32_t m_width = 0; uint32_t m_height = 0; diff --git a/kernel/kernel/Font.cpp b/kernel/kernel/Font.cpp index 9f79cea59..043aaa951 100644 --- a/kernel/kernel/Font.cpp +++ b/kernel/kernel/Font.cpp @@ -73,17 +73,16 @@ namespace Kernel TRY(glyph_data.resize(glyph_data_size)); memcpy(glyph_data.data(), font_data.data() + sizeof(PSF1Header), glyph_data_size); - BAN::HashMap glyph_offsets; + BAN::HashMap glyph_offsets; TRY(glyph_offsets.reserve(glyph_count)); - bool unsupported_utf = false; bool codepoint_redef = false; + bool codepoint_sequence = false; if (header->magic & (PSF1_MODE_HASTAB | PSF1_MODE_SEQ)) { uint32_t current_index = sizeof(PSF1Header) + glyph_data_size; - bool in_sequence = false; uint32_t glyph_index = 0; while (current_index < font_data.size()) { @@ -91,17 +90,16 @@ namespace Kernel uint16_t hi = font_data[current_index + 1]; uint16_t codepoint = (hi << 8) | lo; - if (codepoint == 0xFFFF) + if (codepoint == 0xFFFE) + { + codepoint_sequence = true; + break; + } + else if (codepoint == 0xFFFF) { glyph_index++; - in_sequence = false; } - else if (codepoint == 0xFFFE) - { - in_sequence = true; - unsupported_utf = true; - } - else if (!in_sequence) + else { if (glyph_offsets.contains(codepoint)) codepoint_redef = true; @@ -111,9 +109,6 @@ namespace Kernel current_index += 2; } - - if (glyph_index != glyph_count) - return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs"); } else { @@ -121,10 +116,10 @@ namespace Kernel TRY(glyph_offsets.insert(i, i * glyph_size)); } - if (unsupported_utf) - dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)"); if (codepoint_redef) dwarnln("Font contsins multiple definitions for same codepoint(s)"); + if (codepoint_sequence) + dwarnln("Font contains codepoint sequences (not supported)"); Font result; result.m_glyph_offsets = BAN::move(glyph_offsets); @@ -171,11 +166,12 @@ namespace Kernel TRY(glyph_data.resize(glyph_data_size)); memcpy(glyph_data.data(), font_data.data() + header.header_size, glyph_data_size); - BAN::HashMap glyph_offsets; + BAN::HashMap glyph_offsets; TRY(glyph_offsets.reserve(400)); - bool unsupported_utf = false; + bool invalid_utf = false; bool codepoint_redef = false; + bool codepoint_sequence = false; uint8_t bytes[4] {}; uint32_t byte_index = 0; @@ -186,32 +182,44 @@ namespace Kernel { uint8_t byte = font_data[i]; - if ((byte >> 1) == 0x7F) + if (byte == 0xFE) { - if (byte_index <= 4) + codepoint_sequence = true; + break; + } + else if (byte == 0xFF) + { + if (byte_index) { - uint16_t codepoint = BAN::utf8_to_codepoint(bytes, byte_index); - if (codepoint == 0xFFFF) - unsupported_utf = true; + invalid_utf = true; + byte_index = 0; + } + glyph_index++; + } + else + { + ASSERT(byte_index < 4); + bytes[byte_index++] = byte; + uint32_t len = BAN::utf8_byte_length(bytes[0]); + + if (len == 0) + { + invalid_utf = true; + byte_index = 0; + } + else if (len == byte_index) + { + uint32_t codepoint = BAN::utf8_to_codepoint(bytes); + if (codepoint == BAN::UTF8::invalid) + invalid_utf = true; else if (glyph_offsets.contains(codepoint)) codepoint_redef = true; else TRY(glyph_offsets.insert(codepoint, glyph_index * header.glyph_size)); + byte_index = 0; } - byte_index = 0; - if (byte == 0xFF) - glyph_index++; - } - else - { - if (byte_index < 4) - bytes[byte_index++] = byte; - else - unsupported_utf = true; } } - if (glyph_index != header.glyph_count) - return BAN::Error::from_c_string("Font did not contain unicode entry for all glyphs"); } else { @@ -219,18 +227,12 @@ namespace Kernel TRY(glyph_offsets.insert(i, i * header.glyph_size)); } - // Manually add space (empty) character if it is not present - if (!glyph_offsets.contains(' ')) - { - TRY(glyph_data.resize(glyph_data_size + header.glyph_size)); - memset(glyph_data.data() + glyph_data_size, 0, header.glyph_size); - TRY(glyph_offsets.insert(' ', glyph_data_size)); - } - - if (unsupported_utf) - dwarnln("Font contains invalid/unsupported UTF-8 codepoint(s)"); + if (invalid_utf) + dwarnln("Font contains invalid UTF-8 codepoint(s)"); if (codepoint_redef) dwarnln("Font contsins multiple definitions for same codepoint(s)"); + if (codepoint_sequence) + dwarnln("Font contains codepoint sequences (not supported)"); Font result; result.m_glyph_offsets = BAN::move(glyph_offsets); @@ -241,12 +243,12 @@ namespace Kernel return result; } - bool Font::has_glyph(uint16_t codepoint) const + bool Font::has_glyph(uint32_t codepoint) const { return m_glyph_offsets.contains(codepoint); } - const uint8_t* Font::glyph(uint16_t codepoint) const + const uint8_t* Font::glyph(uint32_t codepoint) const { return m_glyph_data.data() + m_glyph_offsets[codepoint]; }