From a845f8696c7166db4f7392c76550ee17aa665d5e Mon Sep 17 00:00:00 2001 From: Bananymous Date: Wed, 22 Feb 2023 21:45:26 +0200 Subject: [PATCH] BAN: Add function to decode utf-8 encoded strings works for 3 byte unicode characters --- BAN/include/BAN/UTF8.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 BAN/include/BAN/UTF8.h diff --git a/BAN/include/BAN/UTF8.h b/BAN/include/BAN/UTF8.h new file mode 100644 index 00000000..b122a68b --- /dev/null +++ b/BAN/include/BAN/UTF8.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +namespace BAN +{ + + static constexpr uint16_t utf8_to_codepoint(uint8_t* bytes, size_t count) + { + if (count > 3) + return 0xFFFF; + + for (size_t i = 1; i < count; i++) + if ((bytes[i] & 0xC0) != 0x80) + return 0xFFFF; + + switch (count) + { + case 1: return bytes[0]; + case 2: return ((bytes[0] & 0x1F) << 6) | (bytes[1] & 0x3F); + case 3: return ((bytes[0] & 0x1F) << 12) | ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F); + } + + return 0xFFFF; + } + +} \ No newline at end of file