Kernel/userspace: rework floating point math

SSE is now unconditionally enabled any where and most of math.h is now
actually implemented. using __builtin_<func> lead to many hangs where
the builtin function would just call itself.
This commit is contained in:
2024-11-03 20:25:35 +02:00
parent ed19bb11fe
commit f4be37700f
18 changed files with 827 additions and 210 deletions

View File

@@ -1,18 +1,18 @@
#pragma once
#include <BAN/Limits.h>
#include <BAN/Numbers.h>
#include <BAN/Traits.h>
#include <stddef.h>
#include <stdint.h>
#include <float.h>
namespace BAN::Math
{
template<typename T>
inline constexpr T abs(T val)
inline constexpr T abs(T x)
{
return val < 0 ? -val : val;
return x < 0 ? -x : x;
}
template<typename T>
@@ -59,11 +59,11 @@ namespace BAN::Math
}
template<integral T>
inline constexpr bool is_power_of_two(T value)
inline constexpr bool is_power_of_two(T x)
{
if (value == 0)
if (x == 0)
return false;
return (value & (value - 1)) == 0;
return (x & (x - 1)) == 0;
}
template<BAN::integral T>
@@ -89,43 +89,181 @@ namespace BAN::Math
template<typename T>
requires is_same_v<T, unsigned int> || is_same_v<T, unsigned long> || is_same_v<T, unsigned long long>
inline constexpr T ilog2(T value)
inline constexpr T ilog2(T x)
{
if constexpr(is_same_v<T, unsigned int>)
return sizeof(T) * 8 - __builtin_clz(value) - 1;
return sizeof(T) * 8 - __builtin_clz(x) - 1;
if constexpr(is_same_v<T, unsigned long>)
return sizeof(T) * 8 - __builtin_clzl(value) - 1;
return sizeof(T) * 8 - __builtin_clzll(value) - 1;
return sizeof(T) * 8 - __builtin_clzl(x) - 1;
return sizeof(T) * 8 - __builtin_clzll(x) - 1;
}
template<floating_point T>
inline constexpr T log2(T value)
inline constexpr T floor(T x)
{
T result;
asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"((T)1.0) : "st(1)");
return result;
if constexpr(is_same_v<T, float>)
return __builtin_floorf(x);
if constexpr(is_same_v<T, double>)
return __builtin_floor(x);
if constexpr(is_same_v<T, long double>)
return __builtin_floorl(x);
}
template<floating_point T>
inline constexpr T log10(T value)
inline constexpr T ceil(T x)
{
constexpr T INV_LOG_2_10 = 0.3010299956639811952137388947244930267681898814621085413104274611;
T result;
asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"(INV_LOG_2_10) : "st(1)");
return result;
if constexpr(is_same_v<T, float>)
return __builtin_ceilf(x);
if constexpr(is_same_v<T, double>)
return __builtin_ceil(x);
if constexpr(is_same_v<T, long double>)
return __builtin_ceill(x);
}
template<floating_point T>
inline constexpr T log(T value, T base)
inline constexpr T round(T x)
{
return log2(value) / log2(base);
if (x == (T)0.0)
return x;
if (x > (T)0.0)
return floor<T>(x + (T)0.5);
return ceil<T>(x - (T)0.5);
}
template<floating_point T>
inline constexpr T pow(T base, T exp)
inline constexpr T trunc(T x)
{
T result;
asm volatile(
if constexpr(is_same_v<T, float>)
return __builtin_truncf(x);
if constexpr(is_same_v<T, double>)
return __builtin_trunc(x);
if constexpr(is_same_v<T, long double>)
return __builtin_truncl(x);
}
template<floating_point T>
inline constexpr T rint(T x)
{
asm("frndint" : "+t"(x));
return x;
}
template<floating_point T>
inline constexpr T fmod(T a, T b)
{
asm(
"1:"
"fprem;"
"fnstsw %%ax;"
"testb $4, %%ah;"
"jne 1b;"
: "+t"(a)
: "u"(b)
: "ax"
);
return a;
}
template<floating_point T>
static T modf(T x, T* iptr)
{
const T frac = BAN::Math::fmod<T>(x, 1);
*iptr = x - frac;
return frac;
}
template<floating_point T>
inline constexpr T frexp(T num, int* exp)
{
if (num == 0.0)
{
*exp = 0;
return 0.0;
}
T _exp;
asm("fxtract" : "+t"(num), "=u"(_exp));
*exp = (int)_exp + 1;
return num / (T)2.0;
}
template<floating_point T>
inline constexpr T copysign(T x, T y)
{
if ((x < (T)0.0) != (y < (T)0.0))
x = -x;
return x;
}
namespace detail
{
template<floating_point T>
inline constexpr T fyl2x(T x, T y)
{
asm("fyl2x" : "+t"(x) : "u"(y) : "st(1)");
return x;
}
}
template<floating_point T>
inline constexpr T log(T x)
{
return detail::fyl2x<T>(x, numbers::ln2_v<T>);
}
template<floating_point T>
inline constexpr T log2(T x)
{
return detail::fyl2x<T>(x, 1.0);
}
template<floating_point T>
inline constexpr T log10(T x)
{
return detail::fyl2x<T>(x, numbers::lg2_v<T>);
}
template<floating_point T>
inline constexpr T logb(T x)
{
static_assert(FLT_RADIX == 2);
return log2<T>(x);
}
template<floating_point T>
inline constexpr T exp2(T x)
{
if (abs(x) <= (T)1.0)
{
asm("f2xm1" : "+t"(x));
return x + (T)1.0;
}
asm(
"fld1;"
"fld %%st(1);"
"fprem;"
"f2xm1;"
"faddp;"
"fscale;"
"fstp %%st(1);"
: "+t"(x)
);
return x;
}
template<floating_point T>
inline constexpr T exp(T x)
{
return exp2<T>(x * numbers::log2e_v<T>);
}
template<floating_point T>
inline constexpr T pow(T x, T y)
{
asm(
"fyl2x;"
"fld1;"
"fld %%st(1);"
@@ -133,12 +271,170 @@ namespace BAN::Math
"f2xm1;"
"faddp;"
"fscale;"
"fxch %%st(1);"
"fstp %%st;"
: "=t"(result)
: "0"(base), "u"(exp)
: "+t"(x), "+u"(y)
);
return result;
return x;
}
template<floating_point T>
inline constexpr T scalbn(T x, int n)
{
asm("fscale" : "+t"(x) : "u"(static_cast<T>(n)));
return x;
}
template<floating_point T>
inline constexpr T ldexp(T x, int y)
{
const bool exp_sign = y < 0;
if (exp_sign)
y = -y;
T exp = (T)1.0;
T mult = (T)2.0;
while (y)
{
if (y & 1)
exp *= mult;
mult *= mult;
y >>= 1;
}
if (exp_sign)
exp = (T)1.0 / exp;
return x * exp;
}
template<floating_point T>
inline constexpr T sqrt(T x)
{
asm("fsqrt" : "+t"(x));
return x;
}
template<floating_point T>
inline constexpr T cbrt(T value)
{
if (value == 0.0)
return 0.0;
return pow<T>(value, 1.0 / 3.0);
}
template<floating_point T>
inline constexpr T sin(T x)
{
x = fmod<T>(x, (T)2.0 * numbers::pi_v<T>);
asm("fsin" : "+t"(x));
return x;
}
template<floating_point T>
inline constexpr T cos(T x)
{
if (abs(x) >= (T)9223372036854775808.0)
x = fmod<T>(x, (T)2.0 * numbers::pi_v<T>);
asm("fcos" : "+t"(x));
return x;
}
template<floating_point T>
inline constexpr T tan(T x)
{
T one, ret;
asm(
"fptan"
: "=t"(one), "=u"(ret)
: "0"(x)
);
return ret;
}
template<floating_point T>
inline constexpr T atan2(T y, T x)
{
asm(
"fpatan"
: "+t"(x)
: "u"(y)
: "st(1)"
);
return x;
}
template<floating_point T>
inline constexpr T atan(T x)
{
return atan2<T>(x, 1.0);
}
template<floating_point T>
inline constexpr T asin(T x)
{
if (x == (T)0.0)
return (T)0.0;
if (x == (T)1.0)
return numbers::pi_v<T> / (T)2.0;
if (x == (T)-1.0)
return -numbers::pi_v<T> / (T)2.0;
return (T)2.0 * atan<T>(x / (T(1.0) + sqrt<T>((T)1.0 - x * x)));
}
template<floating_point T>
inline constexpr T acos(T x)
{
if (x == (T)0.0)
return numbers::pi_v<T> / (T)2.0;
if (x == (T)1.0)
return (T)0.0;
if (x == (T)-1.0)
return numbers::pi_v<T>;
return (T)2.0 * atan<T>(sqrt<T>((T)1.0 - x * x) / ((T)1.0 + x));
}
template<floating_point T>
inline constexpr T sinh(T x)
{
return (exp<T>(x) - exp<T>(-x)) / (T)2.0;
}
template<floating_point T>
inline constexpr T cosh(T x)
{
return (exp<T>(x) + exp<T>(-x)) / (T)2.0;
}
template<floating_point T>
inline constexpr T tanh(T x)
{
const T exp_px = exp<T>(x);
const T exp_nx = exp<T>(-x);
return (exp_px - exp_nx) / (exp_px + exp_nx);
}
template<floating_point T>
inline constexpr T asinh(T x)
{
return log<T>(x + sqrt<T>(x * x + (T)1.0));
}
template<floating_point T>
inline constexpr T acosh(T x)
{
return log<T>(x + sqrt<T>(x * x - (T)1.0));
}
template<floating_point T>
inline constexpr T atanh(T x)
{
return (T)0.5 * log<T>(((T)1.0 + x) / ((T)1.0 - x));
}
template<floating_point T>
inline constexpr T hypot(T x, T y)
{
return sqrt<T>(x * x + y * y);
}
}