LibC: Start work on locales

This patch adds 2 locales, POSIX locale and UTF8 locale.

functions `mbstowcs()` and `strcoll()` use locales to do convertions and
comparison respectively.
This commit is contained in:
Bananymous 2024-08-07 19:00:05 +03:00
parent 7afdfb150f
commit 3b23458ecc
5 changed files with 183 additions and 13 deletions

View File

@ -9,7 +9,7 @@ __BEGIN_DECLS
#ifndef __locale_t_defined
#define __locale_t_defined 1
typedef int locale_t;
typedef enum { LOCALE_INVALID, LOCALE_POSIX, LOCALE_UTF8 } locale_t;
#endif
__END_DECLS

View File

@ -68,6 +68,8 @@ locale_t newlocale(int category_mask, const char* locale, locale_t base);
char* setlocale(int category, const char* locale);
locale_t uselocale(locale_t newloc);
locale_t __getlocale(int category);
__END_DECLS
#endif

View File

@ -1,15 +1,112 @@
#include <BAN/Assert.h>
#include <locale.h>
#include <stdio.h>
#include <string.h>
// FIXME: Actually support locales
char* setlocale(int category, const char* locale)
{
(void)category;
static locale_t s_current_locales[LC_ALL] {
LOCALE_POSIX,
LOCALE_POSIX,
LOCALE_POSIX,
LOCALE_POSIX,
LOCALE_POSIX,
LOCALE_POSIX,
};
static_assert(LC_ALL == 6);
static char s_locale[] = "C";
if (locale == nullptr)
return s_locale;
if (strcmp(locale, "") == 0 || strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0)
return s_locale;
return nullptr;
static locale_t str_to_locale(const char* locale)
{
if (*locale == '\0')
return LOCALE_UTF8;
if (strcmp(locale, "C") == 0 || strcmp(locale, "LOCALE_POSIX") == 0)
return LOCALE_POSIX;
if (strcmp(locale, "C.UTF8") == 0)
return LOCALE_UTF8;
return LOCALE_INVALID;
}
static const char* locale_to_str(locale_t locale)
{
if (locale == LOCALE_POSIX)
return "C";
if (locale == LOCALE_UTF8)
return "C.UTF8";
ASSERT_NOT_REACHED();
}
char* setlocale(int category, const char* locale_str)
{
static char s_locale_buffer[128];
if (locale_str == nullptr)
{
switch (category)
{
case LC_COLLATE:
case LC_CTYPE:
case LC_MESSAGES:
case LC_MONETARY:
case LC_NUMERIC:
case LC_TIME:
strcpy(s_locale_buffer, locale_to_str(s_current_locales[category]));
break;
case LC_ALL:
sprintf(s_locale_buffer, "%s;%s;%s;%s;%s;%s",
locale_to_str(s_current_locales[0]),
locale_to_str(s_current_locales[1]),
locale_to_str(s_current_locales[2]),
locale_to_str(s_current_locales[3]),
locale_to_str(s_current_locales[4]),
locale_to_str(s_current_locales[5])
);
break;
default:
return nullptr;
}
return s_locale_buffer;
}
locale_t locale = str_to_locale(locale_str);
if (locale == LOCALE_INVALID)
return nullptr;
switch (category)
{
case LC_COLLATE:
case LC_CTYPE:
case LC_MESSAGES:
case LC_MONETARY:
case LC_NUMERIC:
case LC_TIME:
s_current_locales[category] = locale;
break;
case LC_ALL:
for (auto& current : s_current_locales)
current = locale;
break;
default:
return nullptr;
}
strcpy(s_locale_buffer, locale_to_str(locale));
return s_locale_buffer;
}
locale_t __getlocale(int category)
{
switch (category)
{
case LC_COLLATE:
case LC_CTYPE:
case LC_MESSAGES:
case LC_MONETARY:
case LC_NUMERIC:
case LC_TIME:
return s_current_locales[category];
default:
return LOCALE_INVALID;
}
}

View File

@ -1,8 +1,10 @@
#include <BAN/Assert.h>
#include <BAN/Limits.h>
#include <BAN/Math.h>
#include <BAN/UTF8.h>
#include <ctype.h>
#include <errno.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -512,6 +514,41 @@ int putenv(char* string)
return 0;
}
size_t mbstowcs(wchar_t* __restrict pwcs, const char* __restrict s, size_t n)
{
auto* us = reinterpret_cast<const unsigned char*>(s);
size_t len = 0;
switch (__getlocale(LC_CTYPE))
{
case LOCALE_INVALID:
ASSERT_NOT_REACHED();
case LOCALE_POSIX:
while (*us && len < n)
pwcs[len++] = *us++;
break;
case LOCALE_UTF8:
while (*us && len < n)
{
auto wch = BAN::UTF8::to_codepoint(us);
if (wch == BAN::UTF8::invalid)
{
errno = EILSEQ;
return -1;
}
pwcs[len++] = wch;
us += BAN::UTF8::byte_length(*us);
}
break;
}
if (len < n)
pwcs[len] = 0;
return len;
}
void* bsearch(const void* key, const void* base, size_t nel, size_t width, int (*compar)(const void*, const void*))
{
if (nel == 0)

View File

@ -1,4 +1,8 @@
#include <BAN/Assert.h>
#include <BAN/UTF8.h>
#include <errno.h>
#include <locale.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@ -138,8 +142,38 @@ char* strncat(char* __restrict__ dest, const char* __restrict__ src, size_t n)
int strcoll(const char* s1, const char* s2)
{
// FIXME: support locales
return strcmp(s1, s2);
switch (__getlocale(LC_COLLATE))
{
case LOCALE_INVALID:
ASSERT_NOT_REACHED();
case LOCALE_POSIX:
return strcmp(s1, s2);
case LOCALE_UTF8:
{
const unsigned char* u1 = (unsigned char*)s1;
const unsigned char* u2 = (unsigned char*)s2;
if (!*u1 || !*u2)
return *u1 - *u2;
wchar_t wc1, wc2;
while (*u1 && *u2)
{
wc1 = BAN::UTF8::to_codepoint(u1);
wc2 = BAN::UTF8::to_codepoint(u2);
if (wc1 == (wchar_t)BAN::UTF8::invalid || wc2 == (wchar_t)BAN::UTF8::invalid)
{
errno = EINVAL;
return -1;
}
if (wc1 != wc2)
break;
u1 += BAN::UTF8::byte_length(*u1);
u2 += BAN::UTF8::byte_length(*u2);
}
return wc1 - wc2;
}
}
ASSERT_NOT_REACHED();
}
char* strdup(const char* str)