diff --git a/BAN/include/BAN/Math.h b/BAN/include/BAN/Math.h index 78a77941..87f09b50 100644 --- a/BAN/include/BAN/Math.h +++ b/BAN/include/BAN/Math.h @@ -1,18 +1,18 @@ #pragma once #include +#include #include -#include -#include +#include namespace BAN::Math { template - inline constexpr T abs(T val) + inline constexpr T abs(T x) { - return val < 0 ? -val : val; + return x < 0 ? -x : x; } template @@ -59,11 +59,11 @@ namespace BAN::Math } template - inline constexpr bool is_power_of_two(T value) + inline constexpr bool is_power_of_two(T x) { - if (value == 0) + if (x == 0) return false; - return (value & (value - 1)) == 0; + return (x & (x - 1)) == 0; } template @@ -89,43 +89,181 @@ namespace BAN::Math template requires is_same_v || is_same_v || is_same_v - inline constexpr T ilog2(T value) + inline constexpr T ilog2(T x) { if constexpr(is_same_v) - return sizeof(T) * 8 - __builtin_clz(value) - 1; + return sizeof(T) * 8 - __builtin_clz(x) - 1; if constexpr(is_same_v) - return sizeof(T) * 8 - __builtin_clzl(value) - 1; - return sizeof(T) * 8 - __builtin_clzll(value) - 1; + return sizeof(T) * 8 - __builtin_clzl(x) - 1; + return sizeof(T) * 8 - __builtin_clzll(x) - 1; } template - inline constexpr T log2(T value) + inline constexpr T floor(T x) { - T result; - asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"((T)1.0) : "st(1)"); - return result; + if constexpr(is_same_v) + return __builtin_floorf(x); + if constexpr(is_same_v) + return __builtin_floor(x); + if constexpr(is_same_v) + return __builtin_floorl(x); } template - inline constexpr T log10(T value) + inline constexpr T ceil(T x) { - constexpr T INV_LOG_2_10 = 0.3010299956639811952137388947244930267681898814621085413104274611; - T result; - asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"(INV_LOG_2_10) : "st(1)"); - return result; + if constexpr(is_same_v) + return __builtin_ceilf(x); + if constexpr(is_same_v) + return __builtin_ceil(x); + if constexpr(is_same_v) + return __builtin_ceill(x); } template - inline constexpr T log(T value, T base) + inline constexpr T round(T x) { - return log2(value) / log2(base); + if (x == (T)0.0) + return x; + if (x > (T)0.0) + return floor(x + (T)0.5); + return ceil(x - (T)0.5); } template - inline constexpr T pow(T base, T exp) + inline constexpr T trunc(T x) { - T result; - asm volatile( + if constexpr(is_same_v) + return __builtin_truncf(x); + if constexpr(is_same_v) + return __builtin_trunc(x); + if constexpr(is_same_v) + return __builtin_truncl(x); + } + + template + inline constexpr T rint(T x) + { + asm("frndint" : "+t"(x)); + return x; + } + + template + inline constexpr T fmod(T a, T b) + { + asm( + "1:" + "fprem;" + "fnstsw %%ax;" + "testb $4, %%ah;" + "jne 1b;" + : "+t"(a) + : "u"(b) + : "ax" + ); + return a; + } + + template + static T modf(T x, T* iptr) + { + const T frac = BAN::Math::fmod(x, 1); + *iptr = x - frac; + return frac; + } + + template + inline constexpr T frexp(T num, int* exp) + { + if (num == 0.0) + { + *exp = 0; + return 0.0; + } + + T _exp; + asm("fxtract" : "+t"(num), "=u"(_exp)); + *exp = (int)_exp + 1; + return num / (T)2.0; + } + + template + inline constexpr T copysign(T x, T y) + { + if ((x < (T)0.0) != (y < (T)0.0)) + x = -x; + return x; + } + + namespace detail + { + + template + inline constexpr T fyl2x(T x, T y) + { + asm("fyl2x" : "+t"(x) : "u"(y) : "st(1)"); + return x; + } + + } + + template + inline constexpr T log(T x) + { + return detail::fyl2x(x, numbers::ln2_v); + } + + template + inline constexpr T log2(T x) + { + return detail::fyl2x(x, 1.0); + } + + template + inline constexpr T log10(T x) + { + return detail::fyl2x(x, numbers::lg2_v); + } + + template + inline constexpr T logb(T x) + { + static_assert(FLT_RADIX == 2); + return log2(x); + } + + template + inline constexpr T exp2(T x) + { + if (abs(x) <= (T)1.0) + { + asm("f2xm1" : "+t"(x)); + return x + (T)1.0; + } + + asm( + "fld1;" + "fld %%st(1);" + "fprem;" + "f2xm1;" + "faddp;" + "fscale;" + "fstp %%st(1);" + : "+t"(x) + ); + return x; + } + + template + inline constexpr T exp(T x) + { + return exp2(x * numbers::log2e_v); + } + + template + inline constexpr T pow(T x, T y) + { + asm( "fyl2x;" "fld1;" "fld %%st(1);" @@ -133,12 +271,170 @@ namespace BAN::Math "f2xm1;" "faddp;" "fscale;" - "fxch %%st(1);" - "fstp %%st;" - : "=t"(result) - : "0"(base), "u"(exp) + : "+t"(x), "+u"(y) ); - return result; + + return x; + } + + template + inline constexpr T scalbn(T x, int n) + { + asm("fscale" : "+t"(x) : "u"(static_cast(n))); + return x; + } + + template + inline constexpr T ldexp(T x, int y) + { + const bool exp_sign = y < 0; + if (exp_sign) + y = -y; + + T exp = (T)1.0; + T mult = (T)2.0; + while (y) + { + if (y & 1) + exp *= mult; + mult *= mult; + y >>= 1; + } + + if (exp_sign) + exp = (T)1.0 / exp; + + return x * exp; + } + + template + inline constexpr T sqrt(T x) + { + asm("fsqrt" : "+t"(x)); + return x; + } + + template + inline constexpr T cbrt(T value) + { + if (value == 0.0) + return 0.0; + return pow(value, 1.0 / 3.0); + } + + template + inline constexpr T sin(T x) + { + x = fmod(x, (T)2.0 * numbers::pi_v); + asm("fsin" : "+t"(x)); + return x; + } + + template + inline constexpr T cos(T x) + { + if (abs(x) >= (T)9223372036854775808.0) + x = fmod(x, (T)2.0 * numbers::pi_v); + asm("fcos" : "+t"(x)); + return x; + } + + template + inline constexpr T tan(T x) + { + T one, ret; + asm( + "fptan" + : "=t"(one), "=u"(ret) + : "0"(x) + ); + return ret; + } + + template + inline constexpr T atan2(T y, T x) + { + asm( + "fpatan" + : "+t"(x) + : "u"(y) + : "st(1)" + ); + return x; + } + + template + inline constexpr T atan(T x) + { + return atan2(x, 1.0); + } + + template + inline constexpr T asin(T x) + { + if (x == (T)0.0) + return (T)0.0; + if (x == (T)1.0) + return numbers::pi_v / (T)2.0; + if (x == (T)-1.0) + return -numbers::pi_v / (T)2.0; + return (T)2.0 * atan(x / (T(1.0) + sqrt((T)1.0 - x * x))); + } + + template + inline constexpr T acos(T x) + { + if (x == (T)0.0) + return numbers::pi_v / (T)2.0; + if (x == (T)1.0) + return (T)0.0; + if (x == (T)-1.0) + return numbers::pi_v; + return (T)2.0 * atan(sqrt((T)1.0 - x * x) / ((T)1.0 + x)); + } + + template + inline constexpr T sinh(T x) + { + return (exp(x) - exp(-x)) / (T)2.0; + } + + template + inline constexpr T cosh(T x) + { + return (exp(x) + exp(-x)) / (T)2.0; + } + + template + inline constexpr T tanh(T x) + { + const T exp_px = exp(x); + const T exp_nx = exp(-x); + return (exp_px - exp_nx) / (exp_px + exp_nx); + } + + template + inline constexpr T asinh(T x) + { + return log(x + sqrt(x * x + (T)1.0)); + } + + template + inline constexpr T acosh(T x) + { + return log(x + sqrt(x * x - (T)1.0)); + } + + template + inline constexpr T atanh(T x) + { + return (T)0.5 * log(((T)1.0 + x) / ((T)1.0 - x)); + } + + template + inline constexpr T hypot(T x, T y) + { + return sqrt(x * x + y * y); } } diff --git a/CMakeLists.txt b/CMakeLists.txt index 4db2613d..250b5157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,6 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "banan-os") message(FATAL_ERROR "CMAKE_SYSTEM_NAME is not banan-os") endif () -set(BANAN_ENABLE_SSE 1) - project(banan-os CXX C ASM) set(BANAN_BASE_SYSROOT ${CMAKE_SOURCE_DIR}/base-sysroot.tar.gz) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index c3509e79..718a8527 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -175,7 +175,6 @@ add_executable(kernel ${KERNEL_SOURCES}) target_compile_definitions(kernel PRIVATE __is_kernel) target_compile_definitions(kernel PRIVATE __arch=${BANAN_ARCH}) -target_compile_definitions(kernel PRIVATE __enable_sse=${BANAN_ENABLE_SSE}) target_compile_options(kernel PRIVATE -O2 -g diff --git a/kernel/include/kernel/Thread.h b/kernel/include/kernel/Thread.h index 63c6126f..921b56eb 100644 --- a/kernel/include/kernel/Thread.h +++ b/kernel/include/kernel/Thread.h @@ -86,10 +86,8 @@ namespace Kernel InterruptStack& interrupt_stack() { return m_interrupt_stack; } InterruptRegisters& interrupt_registers() { return m_interrupt_registers; } -#if __enable_sse void save_sse(); void load_sse(); -#endif void add_mutex() { m_mutex_count++; } void remove_mutex() { m_mutex_count--; } @@ -127,9 +125,7 @@ namespace Kernel BAN::Atomic m_mutex_count { 0 }; -#if __enable_sse alignas(16) uint8_t m_sse_storage[512] {}; -#endif friend class Process; friend class Scheduler; diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index d43ce705..edc5e061 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -182,9 +182,7 @@ namespace Kernel if (tid) { auto& thread = Thread::current(); -#if __enable_sse thread.save_sse(); -#endif if (isr == ISR::PageFault && Thread::current().is_userspace()) { @@ -318,12 +316,8 @@ namespace Kernel ASSERT(Thread::current().state() != Thread::State::Terminated); -done: -#if __enable_sse + done: Thread::current().load_sse(); -#else - return; -#endif } extern "C" void cpp_yield_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers) @@ -376,9 +370,7 @@ done: asm volatile("cli; 1: hlt; jmp 1b"); } -#if __enable_sse Thread::current().save_sse(); -#endif if (!InterruptController::get().is_in_service(irq)) dprintln("spurious irq 0x{2H}", irq); @@ -399,9 +391,7 @@ done: ASSERT(Thread::current().state() != Thread::State::Terminated); -#if __enable_sse Thread::current().load_sse(); -#endif } void IDT::register_interrupt_handler(uint8_t index, void (*handler)()) diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index 1e4148c8..4533fbb4 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -36,7 +36,6 @@ namespace Kernel static pid_t s_next_tid = 1; -#if __enable_sse alignas(16) static uint8_t s_default_sse_storage[512]; static bool s_default_sse_storage_initialized = false; @@ -51,7 +50,6 @@ namespace Kernel : [mxcsr]"m"(mxcsr) ); } -#endif BAN::ErrorOr Thread::create_kernel(entry_t entry, void* data, Process* process) { @@ -129,14 +127,12 @@ namespace Kernel Thread::Thread(pid_t tid, Process* process) : m_tid(tid), m_process(process) { -#if __enable_sse if (!s_default_sse_storage_initialized) { initialize_default_sse_storage(); s_default_sse_storage_initialized = true; } memcpy(m_sse_storage, s_default_sse_storage, sizeof(m_sse_storage)); -#endif } Thread& Thread::current() @@ -506,7 +502,6 @@ namespace Kernel ASSERT_NOT_REACHED(); } -#if __enable_sse void Thread::save_sse() { asm volatile("fxsave %0" :: "m"(m_sse_storage)); @@ -516,6 +511,5 @@ namespace Kernel { asm volatile("fxrstor %0" :: "m"(m_sse_storage)); } -#endif } diff --git a/userspace/libraries/CMakeLists.txt b/userspace/libraries/CMakeLists.txt index d34fe70e..e6eab60f 100644 --- a/userspace/libraries/CMakeLists.txt +++ b/userspace/libraries/CMakeLists.txt @@ -21,10 +21,5 @@ foreach(library ${USERSPACE_LIBRARIES}) target_link_options(${library_lower} PRIVATE -nolibc) # Default compile options target_compile_options(${library_lower} PRIVATE -g -O2 -Wall -Wextra -Werror) - - target_compile_definitions(${library_lower} PRIVATE __enable_sse=${BANAN_ENABLE_SSE}) - if (NOT BANAN_ENABLE_SSE) - target_compile_options(${library_lower} PRIVATE -mno-sse -mno-sse2) - endif () endif() endforeach() diff --git a/userspace/libraries/LibC/CMakeLists.txt b/userspace/libraries/LibC/CMakeLists.txt index b1e15d35..f0bef2c6 100644 --- a/userspace/libraries/LibC/CMakeLists.txt +++ b/userspace/libraries/LibC/CMakeLists.txt @@ -6,6 +6,7 @@ set(LIBC_SOURCES dlfcn.cpp errno.cpp fcntl.cpp + fenv.cpp ftw.cpp grp.cpp inttypes.cpp @@ -48,10 +49,6 @@ set(LIBC_SOURCES add_library(objlibc OBJECT ${LIBC_SOURCES}) target_compile_definitions(objlibc PRIVATE __arch=${BANAN_ARCH}) -target_compile_definitions(objlibc PRIVATE __enable_sse=${BANAN_ENABLE_SSE}) -if (NOT BANAN_ENABLE_SSE) - target_compile_options(objlibc PRIVATE -mno-sse -mno-sse2) -endif () target_compile_options(objlibc PRIVATE -O2 -g -Wstack-usage=512 -fno-tree-loop-distribute-patterns -fno-exceptions -fpic -nolibc) target_compile_options(objlibc PUBLIC -Wall -Wextra -Werror -Wno-error=stack-usage=) diff --git a/userspace/libraries/LibC/fenv.cpp b/userspace/libraries/LibC/fenv.cpp new file mode 100644 index 00000000..1fbc8cee --- /dev/null +++ b/userspace/libraries/LibC/fenv.cpp @@ -0,0 +1,134 @@ +#include + +static uint16_t read_fpu_control_word() +{ + uint16_t control; + asm volatile("fstcw %0" : "=m"(control)); + return control; +} + +static uint16_t read_fpu_status_word() +{ + uint16_t status; + asm volatile("fstsw %0" : "=m"(status)); + return status; +} + +static uint32_t read_mxcsr() +{ + uint32_t mxcsr; + asm volatile("stmxcsr %0" : "=m"(mxcsr)); + return mxcsr; +} + +static void write_fpu_control_word(uint16_t control) +{ + asm volatile("fldcw %0" :: "m"(control)); +} + +static void write_mxcsr(uint32_t mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m"(mxcsr)); +} + +int fegetenv(fenv_t* envp) +{ + asm volatile("fstenv %0" : "=m"(envp->x87_fpu)); + envp->mxcsr = read_mxcsr(); + return 0; +} + +int fesetenv(const fenv_t* envp) +{ + if (envp == FE_DFL_ENV) + { + asm volatile("finit"); + write_mxcsr(0x1F80); + return 0; + } + + asm volatile("fldenv %0" :: "m"(envp->x87_fpu)); + write_mxcsr(envp->mxcsr); + return 0; +} + +int fegetround(void) +{ + return (read_fpu_control_word() >> 10) & 0x03; +} + +int fesetround(int round) +{ + uint16_t control = read_fpu_control_word(); + control &= ~(3 << 10); + control |= round << 10; + write_fpu_control_word(control); + + uint32_t mxcsr = read_mxcsr(); + mxcsr &= ~(3 << 13); + mxcsr |= round << 13; + write_mxcsr(mxcsr); + + return 0; +} + +int feclearexcept(int excepts) +{ + excepts &= FE_ALL_EXCEPT; + + fenv_t temp_env; + fegetenv(&temp_env); + temp_env.x87_fpu.status &= ~(excepts | (1 << 7)); + temp_env.mxcsr &= ~excepts; + fesetenv(&temp_env); + return 0; +} + +int fetestexcept(int excepts) +{ + excepts &= FE_ALL_EXCEPT; + + const uint16_t status = read_fpu_status_word(); + const uint32_t mxcsr = read_mxcsr(); + return (status | mxcsr) & excepts; +} + +int feholdexcept(fenv_t*); + +int feraiseexcept(int excepts) +{ + excepts &= FE_ALL_EXCEPT; + + fenv_t temp_env; + fegetenv(&temp_env); + temp_env.x87_fpu.status |= excepts; + fesetenv(&temp_env); + asm volatile("fwait"); + return 0; +} + +int fegetexceptflag(fexcept_t* flagp, int excepts) +{ + *flagp = fetestexcept(excepts); + return 0; +} + +int fesetexceptflag(const fexcept_t* flagp, int excepts) +{ + excepts &= FE_ALL_EXCEPT; + + fenv_t temp_env; + fegetenv(&temp_env); + temp_env.x87_fpu.status &= ~(FE_ALL_EXCEPT | (1 << 7)); + temp_env.x87_fpu.status |= *flagp & excepts; + fesetenv(&temp_env); + return 0; +} + +int feupdateenv(const fenv_t* envp) +{ + int excepts = fetestexcept(FE_ALL_EXCEPT); + fesetenv(envp); + feraiseexcept(excepts); + return 0; +} diff --git a/userspace/libraries/LibC/include/fenv.h b/userspace/libraries/LibC/include/fenv.h index 3dfaf0fd..2b3e6350 100644 --- a/userspace/libraries/LibC/include/fenv.h +++ b/userspace/libraries/LibC/include/fenv.h @@ -7,7 +7,46 @@ __BEGIN_DECLS -// FIXME +#include + +#define FE_INVALID (1 << 0) +#define FE_DIVBYZERO (1 << 2) +#define FE_OVERFLOW (1 << 3) +#define FE_UNDERFLOW (1 << 4) +#define FE_INEXACT (1 << 5) +#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT) + +#define FE_TONEAREST 0 +#define FE_DOWNWARD 1 +#define FE_UPWARD 2 +#define FE_TOWARDZERO 3 + +typedef struct { + uint32_t control; + uint32_t status; + uint32_t __unused[5]; +} __x87_fpu_t; + +typedef struct { + __x87_fpu_t x87_fpu; + uint32_t mxcsr; +} fenv_t; + +typedef uint8_t fexcept_t; + +#define FE_DFL_ENV ((const fenv_t*)0x1) + +int feclearexcept(int); +int fegetenv(fenv_t*); +int fegetexceptflag(fexcept_t*, int); +int fegetround(void); +int feholdexcept(fenv_t*); +int feraiseexcept(int); +int fesetenv(const fenv_t*); +int fesetexceptflag(const fexcept_t*, int); +int fesetround(int); +int fetestexcept(int); +int feupdateenv(const fenv_t*); __END_DECLS diff --git a/userspace/libraries/LibC/include/math.h b/userspace/libraries/LibC/include/math.h index 12583f8d..9b62cbc3 100644 --- a/userspace/libraries/LibC/include/math.h +++ b/userspace/libraries/LibC/include/math.h @@ -5,6 +5,8 @@ #include +#include + __BEGIN_DECLS #ifndef FLT_EVAL_METHOD @@ -22,52 +24,50 @@ __BEGIN_DECLS typedef long double double_t; #endif -// FIXME: define this -// int fpclassify(real-floating x); +#define fpclassify(x) __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) +#define isfinite(x) __builtin_isfinite(x) +#define isgreater(x, y) __builtin_isgreater(x, y) +#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y) +#define isinf(x) __builtin_isinf_sign(x) +#define isless(x, y) __builtin_isless(x, y) +#define islessequal(x, y) __builtin_islessequal(x, y) +#define islessgreater(x, y) __builtin_islessgreater(x, y) +#define isnan(x) __builtin_isnan(x) +#define isnormal(x) __builtin_isnormal(x) +#define isunordered(x, y) __builtin_isunordered(x, y) +#define signbit(x) __builtin_signbit(x) -#define isfinite(x) __builtin_isfinite(x) -#define isgreater(x, y) __builtin_isgreater(x, y) -#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y) -#define isinf(x) __builtin_isinf_sign(x) -#define isless(x, y) __builtin_isless(x, y) -#define islessequal(x, y) __builtin_islessequal(x, y) -#define islessgreater(x, y) __builtin_islessgreater(x, y) -#define isnan(x) __builtin_isnan(x) -#define isnormal(x) __builtin_isnormal(x) -#define isunordered(x, y) __builtin_isunordered(x, y) -#define signbit(x) __builtin_signbit(x) +#define M_E 2.7182818284590452354 +#define M_LOG2E 1.4426950408889634074 +#define M_LOG10E 0.43429448190325182765 +#define M_LN2 0.69314718055994530942 +#define M_LN10 2.30258509299404568402 +#define M_PI 3.14159265358979323846 +#define M_PI_2 1.57079632679489661923 +#define M_PI_4 0.78539816339744830962 +#define M_1_PI 0.31830988618379067154 +#define M_2_PI 0.63661977236758134308 +#define M_2_SQRTPI 1.12837916709551257390 +#define M_SQRT2 1.41421356237309504880 +#define M_SQRT1_2 0.70710678118654752440 -#define M_E 2.7182818284590452354 -#define M_LOG2E 1.4426950408889634074 -#define M_LOG10E 0.43429448190325182765 -#define M_LN2 0.69314718055994530942 -#define M_LN10 2.30258509299404568402 -#define M_PI 3.14159265358979323846 -#define M_PI_2 1.57079632679489661923 -#define M_PI_4 0.78539816339744830962 -#define M_1_PI 0.31830988618379067154 -#define M_2_PI 0.63661977236758134308 -#define M_2_SQRTPI 1.12837916709551257390 -#define M_SQRT2 1.41421356237309504880 -#define M_SQRT1_2 0.70710678118654752440 +#define HUGE_VAL __builtin_huge_val() +#define HUGE_VALF __builtin_huge_valf() +#define HUGE_VALL __builtin_huge_vall() +#define INFINITY __builtin_inff() +#define NAN __builtin_nanf("") -#define HUGE_VAL __builtin_huge_val() -#define HUGE_VALF __builtin_huge_valf() -#define HUGE_VALL __builtin_huge_vall() -#define INFINITY __builtin_inff() -#define NAN __builtin_nanf("") +#define FP_INFINITE 0 +#define FP_NAN 1 +#define FP_NORMAL 2 +#define FP_SUBNORMAL 3 +#define FP_ZERO 4 -#define FP_INFINITE 0 -#define FP_NAN 1 -#define FP_NORMAL 2 -#define FP_SUBNORMAL 3 -#define FP_ZERO 4 +#define FP_ILOGB0 -2147483647 +#define FP_ILOGBNAN +2147483647 -#define FP_ILOGB0 -2147483647 -#define FP_ILOGBNAN +2147483647 - -#define MATH_ERRNO 1 -#define MATH_ERREXCEPT 2 +#define MATH_ERRNO 1 +#define MATH_ERREXCEPT 2 #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) @@ -191,9 +191,9 @@ long lroundl(long double); double modf(double, double*); float modff(float, float*); long double modfl(long double, long double*); -double nan(const char*); -float nanf(const char*); -long double nanl(const char*); +#define nan(tagp) __builtin_nan(tagp) +#define nanf(tagp) __builtin_nanf(tagp) +#define nanl(tagp) __builtin_nanl(tagp) double nearbyint(double); float nearbyintf(float); long double nearbyintl(long double); diff --git a/userspace/libraries/LibC/math.cpp b/userspace/libraries/LibC/math.cpp index b9053042..ffec6812 100644 --- a/userspace/libraries/LibC/math.cpp +++ b/userspace/libraries/LibC/math.cpp @@ -1,71 +1,285 @@ +#include + #include +#include -#define BUILTINS1(func) \ - float func##f(float a) { return __builtin_##func##f(a); } \ - double func(double a) { return __builtin_##func(a); } \ - long double func##l(long double a) { return __builtin_##func##l(a); } +#define BAN_FUNC1(func) \ + float func##f(float a) { return BAN::Math::func(a); } \ + double func(double a) { return BAN::Math::func(a); } \ + long double func##l(long double a) { return BAN::Math::func(a); } -#define BUILTINS2(func) \ - float func##f(float a, float b) { return __builtin_##func##f(a, b); } \ - double func(double a, double b) { return __builtin_##func(a, b); } \ - long double func##l(long double a, long double b) { return __builtin_##func##l(a, b); } +#define BAN_FUNC2(func) \ + float func##f(float a, float b) { return BAN::Math::func(a, b); } \ + double func(double a, double b) { return BAN::Math::func(a, b); } \ + long double func##l(long double a, long double b) { return BAN::Math::func(a, b); } -#define BUILTINS2_TYPE(func, type) \ - float func##f(float a, type b) { return __builtin_##func##f(a, b); } \ - double func(double a, type b) { return __builtin_##func(a, b); } \ - long double func##l(long double a, type b) { return __builtin_##func##l(a, b); } +#define BAN_FUNC2_PTR(func) \ + float func##f(float a, float* b) { return BAN::Math::func(a, b); } \ + double func(double a, double* b) { return BAN::Math::func(a, b); } \ + long double func##l(long double a, long double* b) { return BAN::Math::func(a, b); } + +#define BAN_FUNC2_TYPE(func, type) \ + float func##f(float a, type b) { return BAN::Math::func(a, b); } \ + double func(double a, type b) { return BAN::Math::func(a, b); } \ + long double func##l(long double a, type b) { return BAN::Math::func(a, b); } + +#define FUNC_EXPR1(func, ...) \ + float func##f(float a) { return __VA_ARGS__; } \ + double func(double a) { return __VA_ARGS__; } \ + long double func##l(long double a) { return __VA_ARGS__; } + +#define FUNC_EXPR2(func, ...) \ + float func##f(float a, float b) { return __VA_ARGS__; } \ + double func(double a, double b) { return __VA_ARGS__; } \ + long double func##l(long double a, long double b) { return __VA_ARGS__; } + +#define FUNC_EXPR3(func, ...) \ + float func##f(float a, float b, float c) { return __VA_ARGS__; } \ + double func(double a, double b, double c) { return __VA_ARGS__; } \ + long double func##l(long double a, long double b, long double c) { return __VA_ARGS__; } + +#define FUNC_EXPR1_RET(func, ret, ...) \ + ret func##f(float a) { return __VA_ARGS__; } \ + ret func(double a) { return __VA_ARGS__; } \ + ret func##l(long double a) { return __VA_ARGS__; } + +#define FUNC_EXPR2_TYPE(func, type, ...) \ + float func##f(float a, type b) { return __VA_ARGS__; } \ + double func(double a, type b) { return __VA_ARGS__; } \ + long double func##l(long double a, type b) { return __VA_ARGS__; } + +template +struct integral_atleast; + +template requires(sizeof(T) <= sizeof(uint8_t)) +struct integral_atleast { using type = uint8_t; }; + +template requires(sizeof(T) > sizeof(uint8_t) && sizeof(T) <= sizeof(uint16_t)) +struct integral_atleast { using type = uint16_t; }; + +template requires(sizeof(T) > sizeof(uint16_t) && sizeof(T) <= sizeof(uint32_t)) +struct integral_atleast { using type = uint32_t; }; + +template requires(sizeof(T) > sizeof(uint32_t) && sizeof(T) <= sizeof(uint64_t)) +struct integral_atleast { using type = uint64_t; }; + +template requires(sizeof(T) > sizeof(uint64_t) && sizeof(T) <= sizeof(__uint128_t)) +struct integral_atleast { using type = __uint128_t; }; + +template +struct __FloatDecompose; + +template +struct __FloatDecompose +{ + T mantissa : mantissa_bits; + T : 1; + T exponent : exponent_bits; + T sign : 1; +}; + +template +struct __FloatDecompose +{ + T mantissa : mantissa_bits; + T exponent : exponent_bits; + T sign : 1; +}; + +template +struct FloatDecompose +{ + using value_type = integral_atleast::type; + + static constexpr size_t mantissa_bits + = BAN::is_same_v ? FLT_MANT_DIG - 1 + : BAN::is_same_v ? DBL_MANT_DIG - 1 + : BAN::is_same_v ? LDBL_MANT_DIG - 1 + : 0; + + static constexpr size_t exponent_bits + = BAN::is_same_v ? 8 + : BAN::is_same_v ? 11 + : BAN::is_same_v ? 15 + : 0; + + static constexpr value_type mantissa_max + = (static_cast(1) << mantissa_bits) - 1; + + static constexpr value_type exponent_max + = (static_cast(1) << exponent_bits) - 1; + + FloatDecompose(T x) + : raw(x) + {} + + union + { + T raw; + __FloatDecompose< + value_type, + mantissa_bits, + exponent_bits, + BAN::is_same_v + > bits; + }; +}; + +template +static T1 nextafter_impl(T1 x, T2 y) +{ + if (isnan(x) || isnan(y)) + return BAN::numeric_limits::quiet_NaN(); + + if (!isfinite(x) || x == y) + return x; + + FloatDecompose decompose(x); + + // at zero + if ((T2)x == (T2)0.0) + { + decompose.bits.mantissa = 1; + decompose.bits.sign = (x > y); + return decompose.raw; + } + + // away from zero + if ((x > y) == decompose.bits.sign) + { + decompose.bits.mantissa++; + if (decompose.bits.mantissa == 0) + { + decompose.bits.exponent++; + if (decompose.bits.exponent == decompose.exponent_max) + decompose.bits.mantissa = 0; + } + return decompose.raw; + } + + // towards zero + decompose.bits.mantissa--; + if (decompose.bits.mantissa == decompose.mantissa_max) + { + if (decompose.bits.exponent == 0) + return 0.0; + decompose.bits.exponent--; + } + + return decompose.raw; +} + +static long double tgamma_impl(long double x) +{ + constexpr long double pi = BAN::numbers::pi_v; + + if (x == 0.0L) + return BAN::numeric_limits::infinity(); + + // reflection formula + if (x < 0.5L) + return pi / (BAN::Math::sin(pi * x) * tgamma_impl(1.0L - x)); + x -= 1.0L; + + // Lanczos approximation + + constexpr long double g = 8.0L; + constexpr long double p[] { + 0.9999999999999999298e+0L, + 1.9753739023578852322e+3L, + -4.3973823927922428918e+3L, + 3.4626328459862717019e+3L, + -1.1569851431631167820e+3L, + 1.5453815050252775060e+2L, + -6.2536716123689161798e+0L, + 3.4642762454736807441e-2L, + -7.4776171974442977377e-7L, + 6.3041253821852264261e-8L, + -2.7405717035683877489e-8L, + 4.0486948817567609101e-9L + }; + constexpr long double sqrt_2pi = 2.5066282746310005024L; + + long double A = p[0]; + for (size_t i = 1; i < sizeof(p) / sizeof(*p); i++) + A += p[i] / (x + i); + + const long double t = x + g + 0.5L; + return sqrt_2pi * BAN::Math::pow(t, x + 0.5L) * BAN::Math::exp(-t) * A; +} + +static long double erf_impl(long double x) +{ + long double sum = 0.0L; + for (size_t n = 0; n < 100; n++) + sum += x / (2.0L * n + 1.0L) * BAN::Math::ldexp(-x * x, n) / tgamma_impl(n + 1.0L); + + constexpr long double sqrt_pi = 1.77245385090551602729L; + return 2.0L / sqrt_pi * sum; +} __BEGIN_DECLS -#if __enable_sse -BUILTINS1(acos) -BUILTINS1(acosh) -BUILTINS1(asin) -BUILTINS1(asinh) -BUILTINS1(atan) -BUILTINS2(atan2) -BUILTINS1(atanh) -BUILTINS1(cbrt) -BUILTINS1(ceil) -BUILTINS2(copysign) -BUILTINS1(cos) -BUILTINS1(cosh) -BUILTINS1(erf) -BUILTINS1(erfc) -BUILTINS1(exp) -BUILTINS1(exp2) -BUILTINS1(expm1) -BUILTINS1(fabs) -BUILTINS2(fdim) -BUILTINS1(floor) -BUILTINS2(fmax) -BUILTINS2(fmin) -BUILTINS2(fmod) -BUILTINS2(hypot) -BUILTINS1(j0) -BUILTINS1(j1) -BUILTINS2_TYPE(ldexp, int) -BUILTINS1(lgamma) -BUILTINS1(log) -BUILTINS1(log10) -BUILTINS1(log1p) -BUILTINS1(log2) -BUILTINS1(logb) -BUILTINS1(nearbyint) -BUILTINS2(nextafter) -BUILTINS2(pow) -BUILTINS2(remainder) -BUILTINS1(rint) -BUILTINS1(round) -BUILTINS1(sin) -BUILTINS1(sinh) -BUILTINS1(sqrt) -BUILTINS1(tan) -BUILTINS1(tanh) -BUILTINS1(tgamma) -BUILTINS1(trunc) -BUILTINS1(y0) -BUILTINS1(y1) -#endif +// FIXME: add handling for nan and infinity values + +BAN_FUNC1(acos) +BAN_FUNC1(acosh) +BAN_FUNC1(asin) +BAN_FUNC1(asinh) +BAN_FUNC1(atan) +BAN_FUNC2(atan2) +BAN_FUNC1(atanh) +BAN_FUNC1(cbrt) +BAN_FUNC1(ceil) +BAN_FUNC2(copysign) +BAN_FUNC1(cos) +BAN_FUNC1(cosh) +FUNC_EXPR1(erf, erf_impl(a)) +FUNC_EXPR1(erfc, 1.0 - erf_impl(a)) +BAN_FUNC1(exp) +BAN_FUNC1(exp2) +FUNC_EXPR1(expm1, ({ a -= 1.0; BAN::Math::exp(a); })) +FUNC_EXPR1(fabs, BAN::Math::abs(a)) +FUNC_EXPR2(fdim, a > b ? a - b : 0.0) +BAN_FUNC1(floor) +FUNC_EXPR3(fma, (a * b) + c) +FUNC_EXPR2(fmax, a < b ? b : a) +FUNC_EXPR2(fmin, a < b ? a : b) +BAN_FUNC2(fmod) +BAN_FUNC2_TYPE(frexp, int*) +BAN_FUNC2(hypot) +FUNC_EXPR1_RET(ilogb, int, BAN::Math::logb(a)) +// j0, j1, jn +BAN_FUNC2_TYPE(ldexp, int) +FUNC_EXPR1(lgamma, BAN::Math::log(BAN::Math::abs(tgamma_impl(a)))) +FUNC_EXPR1_RET(llrint, long long, BAN::Math::rint(a)) +FUNC_EXPR1_RET(llround, long long, BAN::Math::round(a)) +BAN_FUNC1(log) +BAN_FUNC1(log10) +FUNC_EXPR1(log1p, ({ a += 1.0; BAN::Math::log(a); })); +BAN_FUNC1(log2) +BAN_FUNC1(logb) +FUNC_EXPR1_RET(lrint, long, BAN::Math::rint(a)) +FUNC_EXPR1_RET(lround, long, BAN::Math::round(a)) +BAN_FUNC2_PTR(modf) +FUNC_EXPR1(nearbyint, BAN::Math::rint(a)) +FUNC_EXPR2(nextafter, nextafter_impl(a, b)) +FUNC_EXPR2_TYPE(nexttoward, long double, nextafter_impl(a, b)) +FUNC_EXPR2(pow, ({ if (isnan(a)) return a; if (isnan(b)) return b; BAN::Math::pow(a, b); })) +// remainder +// remquo +BAN_FUNC1(rint) +FUNC_EXPR1(round, ({ if (!isfinite(a)) return a; BAN::Math::round(a); })) +FUNC_EXPR2_TYPE(scalbln, long, BAN::Math::scalbn(a, b)) +FUNC_EXPR2_TYPE(scalbn, int, BAN::Math::scalbn(a, b)) +BAN_FUNC1(sin) +BAN_FUNC1(sinh) +BAN_FUNC1(sqrt) +BAN_FUNC1(tan) +BAN_FUNC1(tanh) +FUNC_EXPR1(tgamma, tgamma_impl(a)) +BAN_FUNC1(trunc) +// y0, y1, yn + __END_DECLS diff --git a/userspace/libraries/LibC/printf_impl.cpp b/userspace/libraries/LibC/printf_impl.cpp index f2b5660e..58153ed5 100644 --- a/userspace/libraries/LibC/printf_impl.cpp +++ b/userspace/libraries/LibC/printf_impl.cpp @@ -129,7 +129,6 @@ static void integer_to_string(char* buffer, T value, int base, bool upper, forma buffer[offset++] = '\0'; } -#if __enable_sse template static void floating_point_to_string(char* buffer, T value, bool upper, const format_options_t options) { @@ -259,7 +258,6 @@ static void floating_point_to_exponent_string(char* buffer, T value, bool upper, exponent_options.width = 3; integer_to_string(buffer + offset, exponent, 10, upper, exponent_options); } -#endif extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun)(int, void*), void* data) { @@ -496,7 +494,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun format++; break; } -#if __enable_sse case 'e': case 'E': { @@ -529,7 +526,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun case 'A': // TODO break; -#endif case 'c': { conversion[0] = va_arg(arguments, int); diff --git a/userspace/libraries/LibC/scanf_impl.cpp b/userspace/libraries/LibC/scanf_impl.cpp index b5b35c1f..9d0e11b9 100644 --- a/userspace/libraries/LibC/scanf_impl.cpp +++ b/userspace/libraries/LibC/scanf_impl.cpp @@ -260,7 +260,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv } }; -#if __enable_sse auto parse_floating_point_internal = [&parse_integer_internal, &get_input, &in](BASE_TYPE, bool negative, int width, T* out, bool require_start = true) -> ConversionResult { @@ -435,7 +434,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv return ConversionResult::MATCH_FAILURE; } }; -#endif auto parse_string = [&arguments, &get_input, &in](uint8_t* mask, bool exclude, bool suppress, bool allocate, int min_len, int max_len, bool terminate) -> ConversionResult @@ -520,11 +518,9 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv case 'x': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED {}, conversion.suppress, conversion.field_width, conversion.length); break; case 'X': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED {}, conversion.suppress, conversion.field_width, conversion.length); break; case 'p': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED {}, conversion.suppress, conversion.field_width, LengthModifier::j); break; -#if __enable_sse case 'a': case 'e': case 'f': case 'g': result = parse_floating_point(conversion.suppress, conversion.field_width, conversion.length); break; -#endif case 'S': conversion.length = LengthModifier::l; // fall through diff --git a/userspace/libraries/LibC/stdlib.cpp b/userspace/libraries/LibC/stdlib.cpp index 4fc4b561..5c336671 100644 --- a/userspace/libraries/LibC/stdlib.cpp +++ b/userspace/libraries/LibC/stdlib.cpp @@ -162,7 +162,6 @@ static T strtoT(const char* str, char** endp, int base, int& error) return result; } -#if __enable_sse template static T strtoT(const char* str, char** endp, int& error) { @@ -306,16 +305,13 @@ static T strtoT(const char* str, char** endp, int& error) if (exponent) result *= BAN::Math::pow((base == 10) ? 10 : 2, exponent); - return result; + return negative ? -result : result; } -#endif -#if __enable_sse double atof(const char* str) { return strtod(str, nullptr); } -#endif int atoi(const char* str) { @@ -332,7 +328,6 @@ long long atoll(const char* str) return strtoll(str, nullptr, 10); } -#if __enable_sse float strtof(const char* __restrict str, char** __restrict endp) { return strtoT(str, endp, errno); @@ -347,7 +342,6 @@ long double strtold(const char* __restrict str, char** __restrict endp) { return strtoT(str, endp, errno); } -#endif long strtol(const char* __restrict str, char** __restrict endp, int base) { diff --git a/userspace/libraries/LibImage/Image.cpp b/userspace/libraries/LibImage/Image.cpp index 1809baac..aa5d737b 100644 --- a/userspace/libraries/LibImage/Image.cpp +++ b/userspace/libraries/LibImage/Image.cpp @@ -6,7 +6,6 @@ #include #include -#include #include namespace LibImage @@ -132,13 +131,8 @@ namespace LibImage { const double src_x = x * ratio_x; const double src_y = y * ratio_y; -#if __enable_sse - const double weight_x = src_x - floor(src_x); - const double weight_y = src_y - floor(src_y); -#else - const double weight_x = src_x - (uint64_t)src_x; - const double weight_y = src_y - (uint64_t)src_y; -#endif + const double weight_x = src_x - BAN::Math::floor(src_x); + const double weight_y = src_y - BAN::Math::floor(src_y); const Color avg_t = Color::average( get_clamped_color(src_x + 0.0, src_y), @@ -177,13 +171,8 @@ namespace LibImage { const double src_x = x * ratio_x; const double src_y = y * ratio_y; -#if __enable_sse - const double weight_x = src_x - floor(src_x); - const double weight_y = src_y - floor(src_y); -#else - const double weight_x = src_x - (uint64_t)src_x; - const double weight_y = src_y - (uint64_t)src_y; -#endif + const double weight_x = src_x - BAN::Math::floor(src_x); + const double weight_y = src_y - BAN::Math::floor(src_y); FloatingColor values[4]; for (int64_t m = -1; m <= 2; m++) diff --git a/userspace/programs/CMakeLists.txt b/userspace/programs/CMakeLists.txt index 342554f7..6e49c53f 100644 --- a/userspace/programs/CMakeLists.txt +++ b/userspace/programs/CMakeLists.txt @@ -46,9 +46,4 @@ foreach(project ${USERSPACE_PROGRAMS}) target_link_options(${project} PRIVATE -nolibc) # Default compile options target_compile_options(${project} PRIVATE -g -O2 -Wall -Wextra -Werror) - - target_compile_definitions(${project} PRIVATE __enable_sse=${BANAN_ENABLE_SSE}) - if (NOT BANAN_ENABLE_SSE) - target_compile_options(${project} PRIVATE -mno-sse -mno-sse2) - endif () endforeach() diff --git a/userspace/tests/CMakeLists.txt b/userspace/tests/CMakeLists.txt index bbbba1cb..26a8912f 100644 --- a/userspace/tests/CMakeLists.txt +++ b/userspace/tests/CMakeLists.txt @@ -21,9 +21,4 @@ foreach(project ${USERSPACE_TESTS}) target_link_options(${project} PRIVATE -nolibc) # Default compile options target_compile_options(${project} PRIVATE -g -O2) - - target_compile_definitions(${project} PRIVATE __enable_sse=${BANAN_ENABLE_SSE}) - if (NOT BANAN_ENABLE_SSE) - target_compile_options(${project} PRIVATE -mno-sse -mno-sse2) - endif () endforeach()