Terminal: Add utf8 parsing for input

This patch adds possibility to render multibyte utf8 codepoints!
This commit is contained in:
Bananymous 2024-08-11 00:54:51 +03:00
parent bac3219a01
commit f08d429851
2 changed files with 43 additions and 7 deletions
userspace/programs/Terminal

View File

@ -295,13 +295,13 @@ void Terminal::handle_csi(char ch)
m_state = State::Normal;
}
void Terminal::putchar(uint32_t codepoint)
void Terminal::putchar(uint8_t ch)
{
if (m_state == State::ESC)
{
if (codepoint != '[')
if (ch != '[')
{
dprintln("unknown escape character 0x{H}", codepoint);
dprintln("unknown escape character 0x{2H}", ch);
m_state = State::Normal;
return;
}
@ -314,13 +314,46 @@ void Terminal::putchar(uint32_t codepoint)
if (m_state == State::CSI)
{
if (codepoint < 0x20 || codepoint > 0xFE)
if (ch < 0x20 || ch > 0xFE)
{
dprintln("invalid CSI 0x{H}", codepoint);
dprintln("invalid CSI 0x{2H}", ch);
m_state = State::Normal;
return;
}
handle_csi(codepoint);
handle_csi(ch);
return;
}
m_utf8_bytes[m_utf8_index++] = ch;
const size_t utf8_len = BAN::UTF8::byte_length(m_utf8_bytes[0]);
if (utf8_len == 0)
{
dwarnln("invalid utf8 leading byte 0x{2H}", ch);
m_utf8_index = 0;
return;
}
if (m_utf8_index < utf8_len)
return;
const uint32_t codepoint = BAN::UTF8::to_codepoint(m_utf8_bytes);
m_utf8_index = 0;
if (codepoint == BAN::UTF8::invalid)
{
char utf8_hex[20];
char* ptr = utf8_hex;
for (uint8_t i = 0; i < utf8_len; i++)
{
*ptr++ = '0';
*ptr++ = 'x';
*ptr++ = (m_utf8_bytes[i] >> 4) < 10 ? (m_utf8_bytes[i] >> 4) + '0' : (m_utf8_bytes[i] >> 4) - 10 + 'A';
*ptr++ = (m_utf8_bytes[i] & 0xF) < 10 ? (m_utf8_bytes[i] & 0xF) + '0' : (m_utf8_bytes[i] & 0xF) - 10 + 'A';
*ptr++ = ' ';
}
*--ptr = '\0';
dwarnln("invalid utf8 {}", utf8_hex);
return;
}

View File

@ -15,7 +15,7 @@ public:
private:
void handle_csi(char ch);
void handle_sgr();
void putchar(uint32_t codepoint);
void putchar(uint8_t ch);
bool read_shell(int fd);
void on_key_event(LibGUI::EventPacket::KeyEvent);
@ -58,6 +58,9 @@ private:
State m_state { State::Normal };
CSIInfo m_csi_info;
uint8_t m_utf8_index { 0 };
uint8_t m_utf8_bytes[4] { };
Cursor m_saved_cursor { 0, 0 };
uint32_t m_fg_color { 0xFFFFFF };
uint32_t m_bg_color { 0x000000 };