LibC: Add some missing wchar.h functions

isw* functions are just wrappers to is*
tow* functions are just wrappers to to*
This commit is contained in:
Bananymous 2025-06-01 13:45:49 +03:00
parent cc07c3df94
commit 4ac6cbe70e
4 changed files with 298 additions and 3 deletions

View File

@ -47,6 +47,7 @@ set(LIBC_SOURCES
time.cpp
unistd.cpp
utime.cpp
wchar-stdio.cpp
wchar.cpp
icxxabi.cpp

View File

@ -30,7 +30,7 @@ typedef __WINT_TYPE__ wint_t;
wint_t btowc(int c);
wint_t fgetwc(FILE* stream);
wchar_t* fgetws(wchar_t* __restrict ws, int n, FILE* __restrict stream);
wint_t fputwc(wchar_t c, FILE* stream);
wint_t fputwc(wchar_t wc, FILE* stream);
int fputws(const wchar_t* __restrict ws, FILE* __restrict stream);
int fwide(FILE* stream, int mode);
int fwprintf(FILE* __restrict stream, const wchar_t* __restrict format, ...);

View File

@ -0,0 +1,102 @@
#include <BAN/UTF8.h>
#include <errno.h>
#include <wchar.h>
struct FILEScopeLock
{
FILEScopeLock(FILE* stream)
: m_stream(stream)
{
flockfile(m_stream);
}
~FILEScopeLock()
{
funlockfile(m_stream);
}
FILE* m_stream;
};
wint_t getwc(FILE* stream)
{
return fgetwc(stream);
}
wint_t fgetwc(FILE* stream)
{
FILEScopeLock _(stream);
char buffer[4];
buffer[0] = getc_unlocked(stream);
if (buffer[0] == EOF)
return WEOF;
const auto length = BAN::UTF8::byte_length(buffer[0]);
if (length == BAN::UTF8::invalid)
{
errno = EILSEQ;
return WEOF;
}
for (uint32_t i = 1; i < length; i++)
if ((buffer[i] = getc_unlocked(stream)) == EOF)
return WEOF;
const auto ret = BAN::UTF8::to_codepoint(buffer);
if (ret == BAN::UTF8::invalid)
{
errno = EILSEQ;
return WEOF;
}
return ret;
}
wint_t putwc(wchar_t wc, FILE* stream)
{
return fputwc(wc, stream);
}
wint_t fputwc(wchar_t wc, FILE* stream)
{
char buffer[4];
if (!BAN::UTF8::from_codepoints(&wc, 1, buffer))
{
errno = EILSEQ;
return WEOF;
}
FILEScopeLock _(stream);
const auto bytes = BAN::UTF8::byte_length(buffer[0]);
for (uint32_t i = 0; i < bytes; i++)
if (putc_unlocked(buffer[i], stream) == EOF)
return WEOF;
return wc;
}
wint_t ungetwc(wint_t wc, FILE* stream)
{
char buffer[4];
if (!BAN::UTF8::from_codepoints(&wc, 1, buffer))
{
errno = EILSEQ;
return WEOF;
}
FILEScopeLock _(stream);
const auto bytes = BAN::UTF8::byte_length(buffer[0]);
for (uint32_t i = 0; i < bytes; i++)
{
if (ungetc(buffer[i], stream) != EOF)
continue;
for (uint32_t j = 0; j < i; j++)
fgetc(stream);
return WEOF;
}
return wc;
}

View File

@ -1,10 +1,112 @@
#include <BAN/Assert.h>
#include <BAN/UTF8.h>
#include <errno.h>
#include <locale.h>
#include <wchar.h>
size_t mbrtowc(wchar_t* __restrict, const char* __restrict, size_t, mbstate_t* __restrict)
wint_t btowc(int c)
{
ASSERT_NOT_REACHED();
if (c == 0 || c > 0x7F)
return WEOF;
return c;
}
int wctob(wint_t c)
{
if (c > 0x7F)
return WEOF;
return c;
}
int wcwidth(wchar_t wc)
{
return wc != '\0';
}
size_t wcrtomb(char* __restrict s, wchar_t ws, mbstate_t* __restrict ps)
{
(void)ps;
// ws == '\0' doesn't seem to apply to UTF8?
if (s == nullptr)
return 1;
if (!BAN::UTF8::from_codepoints(&ws, 1, s))
{
errno = EILSEQ;
return -1;
}
return BAN::UTF8::byte_length(s[0]);
}
size_t mbrtowc(wchar_t* __restrict pwc, const char* __restrict s, size_t n, mbstate_t* __restrict ps)
{
(void)ps;
if (s == nullptr)
return 0;
const auto bytes = BAN::UTF8::byte_length(*s);
if (bytes == BAN::UTF8::invalid)
{
errno = EILSEQ;
return -1;
}
if (n < bytes)
return -1;
const auto codepoint = BAN::UTF8::to_codepoint(s);
if (codepoint == BAN::UTF8::invalid)
{
errno = EILSEQ;
return -1;
}
if (pwc != nullptr)
*pwc = codepoint;
if (codepoint == 0)
return 0;
return bytes;
}
int wcscoll(const wchar_t* ws1, const wchar_t* ws2)
{
return wcscoll_l(ws1, ws2, __getlocale(LC_COLLATE));
}
int wcscoll_l(const wchar_t* ws1, const wchar_t* ws2, locale_t locale)
{
(void)locale;
// TODO: this isn't really correct :D
return wcscmp(ws1, ws2);
}
size_t wcsxfrm(wchar_t* __restrict ws1, const wchar_t* __restrict ws2, size_t n)
{
return wcsxfrm_l(ws1, ws2, n, __getlocale(LC_COLLATE));
}
size_t wcsxfrm_l(wchar_t* __restrict ws1, const wchar_t* __restrict ws2, size_t n, locale_t locale)
{
(void)locale;
// TODO: this isn't really correct :D
wcsncpy(ws1, ws2, n);
return wcslen(ws2);
}
size_t wcsftime(wchar_t* __restrict wcs, size_t maxsize, const wchar_t* __restrict format, const struct tm* __restrict timeptr)
{
(void)wcs;
(void)maxsize;
(void)format;
(void)timeptr;
fprintf(stddbg, "TODO: wcsftime");
return 0;
}
int wcscmp(const wchar_t* ws1, const wchar_t* ws2)
@ -182,3 +284,93 @@ wchar_t* wmemset(wchar_t* ws, wchar_t wc, size_t n)
ws[i] = wc;
return ws;
}
// FIXME: actually support multibyte :D
wint_t towlower(wint_t wc)
{
return tolower(wc);
}
wint_t towupper(wint_t wc)
{
return toupper(wc);
}
#define DEFINE_ISW(class) \
int isw##class(wint_t wc) { \
return is##class(wc); \
}
DEFINE_ISW(alnum);
DEFINE_ISW(alpha);
DEFINE_ISW(blank);
DEFINE_ISW(cntrl);
DEFINE_ISW(digit);
DEFINE_ISW(graph);
DEFINE_ISW(lower);
DEFINE_ISW(print);
DEFINE_ISW(punct);
DEFINE_ISW(space);
DEFINE_ISW(upper);
DEFINE_ISW(xdigit);
#undef DEFINE_ISW
typedef enum {
_alnum = 1,
_alpha,
_blank,
_cntrl,
_digit,
_graph,
_lower,
_print,
_punct,
_space,
_upper,
_xdigit,
} wctype_values;
wctype_t wctype(const char* property)
{
#define CHECK_PROPERTY(class) \
if (strcmp(property, #class) == 0) \
return _##class
CHECK_PROPERTY(alnum);
CHECK_PROPERTY(alpha);
CHECK_PROPERTY(blank);
CHECK_PROPERTY(cntrl);
CHECK_PROPERTY(digit);
CHECK_PROPERTY(graph);
CHECK_PROPERTY(lower);
CHECK_PROPERTY(print);
CHECK_PROPERTY(punct);
CHECK_PROPERTY(space);
CHECK_PROPERTY(upper);
CHECK_PROPERTY(xdigit);
#undef CHECK_TYPE
return 0;
}
int iswctype(wint_t wc, wctype_t charclass)
{
switch (charclass)
{
#define CLASS_CASE(class) \
case _##class: \
return is##class(wc)
CLASS_CASE(alnum);
CLASS_CASE(alpha);
CLASS_CASE(blank);
CLASS_CASE(cntrl);
CLASS_CASE(digit);
CLASS_CASE(graph);
CLASS_CASE(lower);
CLASS_CASE(print);
CLASS_CASE(punct);
CLASS_CASE(space);
CLASS_CASE(upper);
CLASS_CASE(xdigit);
#undef CLASS_CASE
}
return 0;
}