From f08d42985153f47add1bdebd8a3c718baf38e797 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Sun, 11 Aug 2024 00:54:51 +0300 Subject: [PATCH] Terminal: Add utf8 parsing for input This patch adds possibility to render multibyte utf8 codepoints! --- userspace/programs/Terminal/Terminal.cpp | 45 ++++++++++++++++++++---- userspace/programs/Terminal/Terminal.h | 5 ++- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/userspace/programs/Terminal/Terminal.cpp b/userspace/programs/Terminal/Terminal.cpp index 27690bd0..c80faf86 100644 --- a/userspace/programs/Terminal/Terminal.cpp +++ b/userspace/programs/Terminal/Terminal.cpp @@ -295,13 +295,13 @@ void Terminal::handle_csi(char ch) m_state = State::Normal; } -void Terminal::putchar(uint32_t codepoint) +void Terminal::putchar(uint8_t ch) { if (m_state == State::ESC) { - if (codepoint != '[') + if (ch != '[') { - dprintln("unknown escape character 0x{H}", codepoint); + dprintln("unknown escape character 0x{2H}", ch); m_state = State::Normal; return; } @@ -314,13 +314,46 @@ void Terminal::putchar(uint32_t codepoint) if (m_state == State::CSI) { - if (codepoint < 0x20 || codepoint > 0xFE) + if (ch < 0x20 || ch > 0xFE) { - dprintln("invalid CSI 0x{H}", codepoint); + dprintln("invalid CSI 0x{2H}", ch); m_state = State::Normal; return; } - handle_csi(codepoint); + handle_csi(ch); + return; + } + + m_utf8_bytes[m_utf8_index++] = ch; + + const size_t utf8_len = BAN::UTF8::byte_length(m_utf8_bytes[0]); + if (utf8_len == 0) + { + dwarnln("invalid utf8 leading byte 0x{2H}", ch); + m_utf8_index = 0; + return; + } + if (m_utf8_index < utf8_len) + return; + + const uint32_t codepoint = BAN::UTF8::to_codepoint(m_utf8_bytes); + m_utf8_index = 0; + + if (codepoint == BAN::UTF8::invalid) + { + char utf8_hex[20]; + char* ptr = utf8_hex; + for (uint8_t i = 0; i < utf8_len; i++) + { + *ptr++ = '0'; + *ptr++ = 'x'; + *ptr++ = (m_utf8_bytes[i] >> 4) < 10 ? (m_utf8_bytes[i] >> 4) + '0' : (m_utf8_bytes[i] >> 4) - 10 + 'A'; + *ptr++ = (m_utf8_bytes[i] & 0xF) < 10 ? (m_utf8_bytes[i] & 0xF) + '0' : (m_utf8_bytes[i] & 0xF) - 10 + 'A'; + *ptr++ = ' '; + } + *--ptr = '\0'; + + dwarnln("invalid utf8 {}", utf8_hex); return; } diff --git a/userspace/programs/Terminal/Terminal.h b/userspace/programs/Terminal/Terminal.h index 26d19c11..f4c63389 100644 --- a/userspace/programs/Terminal/Terminal.h +++ b/userspace/programs/Terminal/Terminal.h @@ -15,7 +15,7 @@ public: private: void handle_csi(char ch); void handle_sgr(); - void putchar(uint32_t codepoint); + void putchar(uint8_t ch); bool read_shell(int fd); void on_key_event(LibGUI::EventPacket::KeyEvent); @@ -58,6 +58,9 @@ private: State m_state { State::Normal }; CSIInfo m_csi_info; + uint8_t m_utf8_index { 0 }; + uint8_t m_utf8_bytes[4] { }; + Cursor m_saved_cursor { 0, 0 }; uint32_t m_fg_color { 0xFFFFFF }; uint32_t m_bg_color { 0x000000 };