Kernel/userspace: rework floating point math
SSE is now unconditionally enabled any where and most of math.h is now actually implemented. using __builtin_<func> lead to many hangs where the builtin function would just call itself.
This commit is contained in:
@@ -21,10 +21,5 @@ foreach(library ${USERSPACE_LIBRARIES})
|
||||
target_link_options(${library_lower} PRIVATE -nolibc)
|
||||
# Default compile options
|
||||
target_compile_options(${library_lower} PRIVATE -g -O2 -Wall -Wextra -Werror)
|
||||
|
||||
target_compile_definitions(${library_lower} PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
|
||||
if (NOT BANAN_ENABLE_SSE)
|
||||
target_compile_options(${library_lower} PRIVATE -mno-sse -mno-sse2)
|
||||
endif ()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -6,6 +6,7 @@ set(LIBC_SOURCES
|
||||
dlfcn.cpp
|
||||
errno.cpp
|
||||
fcntl.cpp
|
||||
fenv.cpp
|
||||
ftw.cpp
|
||||
grp.cpp
|
||||
inttypes.cpp
|
||||
@@ -48,10 +49,6 @@ set(LIBC_SOURCES
|
||||
|
||||
add_library(objlibc OBJECT ${LIBC_SOURCES})
|
||||
target_compile_definitions(objlibc PRIVATE __arch=${BANAN_ARCH})
|
||||
target_compile_definitions(objlibc PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
|
||||
if (NOT BANAN_ENABLE_SSE)
|
||||
target_compile_options(objlibc PRIVATE -mno-sse -mno-sse2)
|
||||
endif ()
|
||||
|
||||
target_compile_options(objlibc PRIVATE -O2 -g -Wstack-usage=512 -fno-tree-loop-distribute-patterns -fno-exceptions -fpic -nolibc)
|
||||
target_compile_options(objlibc PUBLIC -Wall -Wextra -Werror -Wno-error=stack-usage=)
|
||||
|
||||
134
userspace/libraries/LibC/fenv.cpp
Normal file
134
userspace/libraries/LibC/fenv.cpp
Normal file
@@ -0,0 +1,134 @@
|
||||
#include <fenv.h>
|
||||
|
||||
static uint16_t read_fpu_control_word()
|
||||
{
|
||||
uint16_t control;
|
||||
asm volatile("fstcw %0" : "=m"(control));
|
||||
return control;
|
||||
}
|
||||
|
||||
static uint16_t read_fpu_status_word()
|
||||
{
|
||||
uint16_t status;
|
||||
asm volatile("fstsw %0" : "=m"(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
static uint32_t read_mxcsr()
|
||||
{
|
||||
uint32_t mxcsr;
|
||||
asm volatile("stmxcsr %0" : "=m"(mxcsr));
|
||||
return mxcsr;
|
||||
}
|
||||
|
||||
static void write_fpu_control_word(uint16_t control)
|
||||
{
|
||||
asm volatile("fldcw %0" :: "m"(control));
|
||||
}
|
||||
|
||||
static void write_mxcsr(uint32_t mxcsr)
|
||||
{
|
||||
asm volatile("ldmxcsr %0" :: "m"(mxcsr));
|
||||
}
|
||||
|
||||
int fegetenv(fenv_t* envp)
|
||||
{
|
||||
asm volatile("fstenv %0" : "=m"(envp->x87_fpu));
|
||||
envp->mxcsr = read_mxcsr();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fesetenv(const fenv_t* envp)
|
||||
{
|
||||
if (envp == FE_DFL_ENV)
|
||||
{
|
||||
asm volatile("finit");
|
||||
write_mxcsr(0x1F80);
|
||||
return 0;
|
||||
}
|
||||
|
||||
asm volatile("fldenv %0" :: "m"(envp->x87_fpu));
|
||||
write_mxcsr(envp->mxcsr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fegetround(void)
|
||||
{
|
||||
return (read_fpu_control_word() >> 10) & 0x03;
|
||||
}
|
||||
|
||||
int fesetround(int round)
|
||||
{
|
||||
uint16_t control = read_fpu_control_word();
|
||||
control &= ~(3 << 10);
|
||||
control |= round << 10;
|
||||
write_fpu_control_word(control);
|
||||
|
||||
uint32_t mxcsr = read_mxcsr();
|
||||
mxcsr &= ~(3 << 13);
|
||||
mxcsr |= round << 13;
|
||||
write_mxcsr(mxcsr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
|
||||
fenv_t temp_env;
|
||||
fegetenv(&temp_env);
|
||||
temp_env.x87_fpu.status &= ~(excepts | (1 << 7));
|
||||
temp_env.mxcsr &= ~excepts;
|
||||
fesetenv(&temp_env);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fetestexcept(int excepts)
|
||||
{
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
|
||||
const uint16_t status = read_fpu_status_word();
|
||||
const uint32_t mxcsr = read_mxcsr();
|
||||
return (status | mxcsr) & excepts;
|
||||
}
|
||||
|
||||
int feholdexcept(fenv_t*);
|
||||
|
||||
int feraiseexcept(int excepts)
|
||||
{
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
|
||||
fenv_t temp_env;
|
||||
fegetenv(&temp_env);
|
||||
temp_env.x87_fpu.status |= excepts;
|
||||
fesetenv(&temp_env);
|
||||
asm volatile("fwait");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fegetexceptflag(fexcept_t* flagp, int excepts)
|
||||
{
|
||||
*flagp = fetestexcept(excepts);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fesetexceptflag(const fexcept_t* flagp, int excepts)
|
||||
{
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
|
||||
fenv_t temp_env;
|
||||
fegetenv(&temp_env);
|
||||
temp_env.x87_fpu.status &= ~(FE_ALL_EXCEPT | (1 << 7));
|
||||
temp_env.x87_fpu.status |= *flagp & excepts;
|
||||
fesetenv(&temp_env);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feupdateenv(const fenv_t* envp)
|
||||
{
|
||||
int excepts = fetestexcept(FE_ALL_EXCEPT);
|
||||
fesetenv(envp);
|
||||
feraiseexcept(excepts);
|
||||
return 0;
|
||||
}
|
||||
@@ -7,7 +7,46 @@
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
// FIXME
|
||||
#include <stdint.h>
|
||||
|
||||
#define FE_INVALID (1 << 0)
|
||||
#define FE_DIVBYZERO (1 << 2)
|
||||
#define FE_OVERFLOW (1 << 3)
|
||||
#define FE_UNDERFLOW (1 << 4)
|
||||
#define FE_INEXACT (1 << 5)
|
||||
#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT)
|
||||
|
||||
#define FE_TONEAREST 0
|
||||
#define FE_DOWNWARD 1
|
||||
#define FE_UPWARD 2
|
||||
#define FE_TOWARDZERO 3
|
||||
|
||||
typedef struct {
|
||||
uint32_t control;
|
||||
uint32_t status;
|
||||
uint32_t __unused[5];
|
||||
} __x87_fpu_t;
|
||||
|
||||
typedef struct {
|
||||
__x87_fpu_t x87_fpu;
|
||||
uint32_t mxcsr;
|
||||
} fenv_t;
|
||||
|
||||
typedef uint8_t fexcept_t;
|
||||
|
||||
#define FE_DFL_ENV ((const fenv_t*)0x1)
|
||||
|
||||
int feclearexcept(int);
|
||||
int fegetenv(fenv_t*);
|
||||
int fegetexceptflag(fexcept_t*, int);
|
||||
int fegetround(void);
|
||||
int feholdexcept(fenv_t*);
|
||||
int feraiseexcept(int);
|
||||
int fesetenv(const fenv_t*);
|
||||
int fesetexceptflag(const fexcept_t*, int);
|
||||
int fesetround(int);
|
||||
int fetestexcept(int);
|
||||
int feupdateenv(const fenv_t*);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
#include <float.h>
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#ifndef FLT_EVAL_METHOD
|
||||
@@ -22,52 +24,50 @@ __BEGIN_DECLS
|
||||
typedef long double double_t;
|
||||
#endif
|
||||
|
||||
// FIXME: define this
|
||||
// int fpclassify(real-floating x);
|
||||
#define fpclassify(x) __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
|
||||
#define isfinite(x) __builtin_isfinite(x)
|
||||
#define isgreater(x, y) __builtin_isgreater(x, y)
|
||||
#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y)
|
||||
#define isinf(x) __builtin_isinf_sign(x)
|
||||
#define isless(x, y) __builtin_isless(x, y)
|
||||
#define islessequal(x, y) __builtin_islessequal(x, y)
|
||||
#define islessgreater(x, y) __builtin_islessgreater(x, y)
|
||||
#define isnan(x) __builtin_isnan(x)
|
||||
#define isnormal(x) __builtin_isnormal(x)
|
||||
#define isunordered(x, y) __builtin_isunordered(x, y)
|
||||
#define signbit(x) __builtin_signbit(x)
|
||||
|
||||
#define isfinite(x) __builtin_isfinite(x)
|
||||
#define isgreater(x, y) __builtin_isgreater(x, y)
|
||||
#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y)
|
||||
#define isinf(x) __builtin_isinf_sign(x)
|
||||
#define isless(x, y) __builtin_isless(x, y)
|
||||
#define islessequal(x, y) __builtin_islessequal(x, y)
|
||||
#define islessgreater(x, y) __builtin_islessgreater(x, y)
|
||||
#define isnan(x) __builtin_isnan(x)
|
||||
#define isnormal(x) __builtin_isnormal(x)
|
||||
#define isunordered(x, y) __builtin_isunordered(x, y)
|
||||
#define signbit(x) __builtin_signbit(x)
|
||||
#define M_E 2.7182818284590452354
|
||||
#define M_LOG2E 1.4426950408889634074
|
||||
#define M_LOG10E 0.43429448190325182765
|
||||
#define M_LN2 0.69314718055994530942
|
||||
#define M_LN10 2.30258509299404568402
|
||||
#define M_PI 3.14159265358979323846
|
||||
#define M_PI_2 1.57079632679489661923
|
||||
#define M_PI_4 0.78539816339744830962
|
||||
#define M_1_PI 0.31830988618379067154
|
||||
#define M_2_PI 0.63661977236758134308
|
||||
#define M_2_SQRTPI 1.12837916709551257390
|
||||
#define M_SQRT2 1.41421356237309504880
|
||||
#define M_SQRT1_2 0.70710678118654752440
|
||||
|
||||
#define M_E 2.7182818284590452354
|
||||
#define M_LOG2E 1.4426950408889634074
|
||||
#define M_LOG10E 0.43429448190325182765
|
||||
#define M_LN2 0.69314718055994530942
|
||||
#define M_LN10 2.30258509299404568402
|
||||
#define M_PI 3.14159265358979323846
|
||||
#define M_PI_2 1.57079632679489661923
|
||||
#define M_PI_4 0.78539816339744830962
|
||||
#define M_1_PI 0.31830988618379067154
|
||||
#define M_2_PI 0.63661977236758134308
|
||||
#define M_2_SQRTPI 1.12837916709551257390
|
||||
#define M_SQRT2 1.41421356237309504880
|
||||
#define M_SQRT1_2 0.70710678118654752440
|
||||
#define HUGE_VAL __builtin_huge_val()
|
||||
#define HUGE_VALF __builtin_huge_valf()
|
||||
#define HUGE_VALL __builtin_huge_vall()
|
||||
#define INFINITY __builtin_inff()
|
||||
#define NAN __builtin_nanf("")
|
||||
|
||||
#define HUGE_VAL __builtin_huge_val()
|
||||
#define HUGE_VALF __builtin_huge_valf()
|
||||
#define HUGE_VALL __builtin_huge_vall()
|
||||
#define INFINITY __builtin_inff()
|
||||
#define NAN __builtin_nanf("")
|
||||
#define FP_INFINITE 0
|
||||
#define FP_NAN 1
|
||||
#define FP_NORMAL 2
|
||||
#define FP_SUBNORMAL 3
|
||||
#define FP_ZERO 4
|
||||
|
||||
#define FP_INFINITE 0
|
||||
#define FP_NAN 1
|
||||
#define FP_NORMAL 2
|
||||
#define FP_SUBNORMAL 3
|
||||
#define FP_ZERO 4
|
||||
#define FP_ILOGB0 -2147483647
|
||||
#define FP_ILOGBNAN +2147483647
|
||||
|
||||
#define FP_ILOGB0 -2147483647
|
||||
#define FP_ILOGBNAN +2147483647
|
||||
|
||||
#define MATH_ERRNO 1
|
||||
#define MATH_ERREXCEPT 2
|
||||
#define MATH_ERRNO 1
|
||||
#define MATH_ERREXCEPT 2
|
||||
|
||||
#define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
|
||||
|
||||
@@ -191,9 +191,9 @@ long lroundl(long double);
|
||||
double modf(double, double*);
|
||||
float modff(float, float*);
|
||||
long double modfl(long double, long double*);
|
||||
double nan(const char*);
|
||||
float nanf(const char*);
|
||||
long double nanl(const char*);
|
||||
#define nan(tagp) __builtin_nan(tagp)
|
||||
#define nanf(tagp) __builtin_nanf(tagp)
|
||||
#define nanl(tagp) __builtin_nanl(tagp)
|
||||
double nearbyint(double);
|
||||
float nearbyintf(float);
|
||||
long double nearbyintl(long double);
|
||||
|
||||
@@ -1,71 +1,285 @@
|
||||
#include <BAN/Math.h>
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define BUILTINS1(func) \
|
||||
float func##f(float a) { return __builtin_##func##f(a); } \
|
||||
double func(double a) { return __builtin_##func(a); } \
|
||||
long double func##l(long double a) { return __builtin_##func##l(a); }
|
||||
#define BAN_FUNC1(func) \
|
||||
float func##f(float a) { return BAN::Math::func<float>(a); } \
|
||||
double func(double a) { return BAN::Math::func<double>(a); } \
|
||||
long double func##l(long double a) { return BAN::Math::func<long double>(a); }
|
||||
|
||||
#define BUILTINS2(func) \
|
||||
float func##f(float a, float b) { return __builtin_##func##f(a, b); } \
|
||||
double func(double a, double b) { return __builtin_##func(a, b); } \
|
||||
long double func##l(long double a, long double b) { return __builtin_##func##l(a, b); }
|
||||
#define BAN_FUNC2(func) \
|
||||
float func##f(float a, float b) { return BAN::Math::func<float>(a, b); } \
|
||||
double func(double a, double b) { return BAN::Math::func<double>(a, b); } \
|
||||
long double func##l(long double a, long double b) { return BAN::Math::func<long double>(a, b); }
|
||||
|
||||
#define BUILTINS2_TYPE(func, type) \
|
||||
float func##f(float a, type b) { return __builtin_##func##f(a, b); } \
|
||||
double func(double a, type b) { return __builtin_##func(a, b); } \
|
||||
long double func##l(long double a, type b) { return __builtin_##func##l(a, b); }
|
||||
#define BAN_FUNC2_PTR(func) \
|
||||
float func##f(float a, float* b) { return BAN::Math::func<float>(a, b); } \
|
||||
double func(double a, double* b) { return BAN::Math::func<double>(a, b); } \
|
||||
long double func##l(long double a, long double* b) { return BAN::Math::func<long double>(a, b); }
|
||||
|
||||
#define BAN_FUNC2_TYPE(func, type) \
|
||||
float func##f(float a, type b) { return BAN::Math::func<float>(a, b); } \
|
||||
double func(double a, type b) { return BAN::Math::func<double>(a, b); } \
|
||||
long double func##l(long double a, type b) { return BAN::Math::func<long double>(a, b); }
|
||||
|
||||
#define FUNC_EXPR1(func, ...) \
|
||||
float func##f(float a) { return __VA_ARGS__; } \
|
||||
double func(double a) { return __VA_ARGS__; } \
|
||||
long double func##l(long double a) { return __VA_ARGS__; }
|
||||
|
||||
#define FUNC_EXPR2(func, ...) \
|
||||
float func##f(float a, float b) { return __VA_ARGS__; } \
|
||||
double func(double a, double b) { return __VA_ARGS__; } \
|
||||
long double func##l(long double a, long double b) { return __VA_ARGS__; }
|
||||
|
||||
#define FUNC_EXPR3(func, ...) \
|
||||
float func##f(float a, float b, float c) { return __VA_ARGS__; } \
|
||||
double func(double a, double b, double c) { return __VA_ARGS__; } \
|
||||
long double func##l(long double a, long double b, long double c) { return __VA_ARGS__; }
|
||||
|
||||
#define FUNC_EXPR1_RET(func, ret, ...) \
|
||||
ret func##f(float a) { return __VA_ARGS__; } \
|
||||
ret func(double a) { return __VA_ARGS__; } \
|
||||
ret func##l(long double a) { return __VA_ARGS__; }
|
||||
|
||||
#define FUNC_EXPR2_TYPE(func, type, ...) \
|
||||
float func##f(float a, type b) { return __VA_ARGS__; } \
|
||||
double func(double a, type b) { return __VA_ARGS__; } \
|
||||
long double func##l(long double a, type b) { return __VA_ARGS__; }
|
||||
|
||||
template<BAN::floating_point T>
|
||||
struct integral_atleast;
|
||||
|
||||
template<BAN::floating_point T> requires(sizeof(T) <= sizeof(uint8_t))
|
||||
struct integral_atleast<T> { using type = uint8_t; };
|
||||
|
||||
template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint8_t) && sizeof(T) <= sizeof(uint16_t))
|
||||
struct integral_atleast<T> { using type = uint16_t; };
|
||||
|
||||
template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint16_t) && sizeof(T) <= sizeof(uint32_t))
|
||||
struct integral_atleast<T> { using type = uint32_t; };
|
||||
|
||||
template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint32_t) && sizeof(T) <= sizeof(uint64_t))
|
||||
struct integral_atleast<T> { using type = uint64_t; };
|
||||
|
||||
template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint64_t) && sizeof(T) <= sizeof(__uint128_t))
|
||||
struct integral_atleast<T> { using type = __uint128_t; };
|
||||
|
||||
template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits, bool integral>
|
||||
struct __FloatDecompose;
|
||||
|
||||
template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits>
|
||||
struct __FloatDecompose<T, mantissa_bits, exponent_bits, true>
|
||||
{
|
||||
T mantissa : mantissa_bits;
|
||||
T : 1;
|
||||
T exponent : exponent_bits;
|
||||
T sign : 1;
|
||||
};
|
||||
|
||||
template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits>
|
||||
struct __FloatDecompose<T, mantissa_bits, exponent_bits, false>
|
||||
{
|
||||
T mantissa : mantissa_bits;
|
||||
T exponent : exponent_bits;
|
||||
T sign : 1;
|
||||
};
|
||||
|
||||
template<BAN::floating_point T>
|
||||
struct FloatDecompose
|
||||
{
|
||||
using value_type = integral_atleast<T>::type;
|
||||
|
||||
static constexpr size_t mantissa_bits
|
||||
= BAN::is_same_v<T, float> ? FLT_MANT_DIG - 1
|
||||
: BAN::is_same_v<T, double> ? DBL_MANT_DIG - 1
|
||||
: BAN::is_same_v<T, long double> ? LDBL_MANT_DIG - 1
|
||||
: 0;
|
||||
|
||||
static constexpr size_t exponent_bits
|
||||
= BAN::is_same_v<T, float> ? 8
|
||||
: BAN::is_same_v<T, double> ? 11
|
||||
: BAN::is_same_v<T, long double> ? 15
|
||||
: 0;
|
||||
|
||||
static constexpr value_type mantissa_max
|
||||
= (static_cast<value_type>(1) << mantissa_bits) - 1;
|
||||
|
||||
static constexpr value_type exponent_max
|
||||
= (static_cast<value_type>(1) << exponent_bits) - 1;
|
||||
|
||||
FloatDecompose(T x)
|
||||
: raw(x)
|
||||
{}
|
||||
|
||||
union
|
||||
{
|
||||
T raw;
|
||||
__FloatDecompose<
|
||||
value_type,
|
||||
mantissa_bits,
|
||||
exponent_bits,
|
||||
BAN::is_same_v<T, long double>
|
||||
> bits;
|
||||
};
|
||||
};
|
||||
|
||||
template<BAN::floating_point T1, BAN::floating_point T2>
|
||||
static T1 nextafter_impl(T1 x, T2 y)
|
||||
{
|
||||
if (isnan(x) || isnan(y))
|
||||
return BAN::numeric_limits<T1>::quiet_NaN();
|
||||
|
||||
if (!isfinite(x) || x == y)
|
||||
return x;
|
||||
|
||||
FloatDecompose<T1> decompose(x);
|
||||
|
||||
// at zero
|
||||
if ((T2)x == (T2)0.0)
|
||||
{
|
||||
decompose.bits.mantissa = 1;
|
||||
decompose.bits.sign = (x > y);
|
||||
return decompose.raw;
|
||||
}
|
||||
|
||||
// away from zero
|
||||
if ((x > y) == decompose.bits.sign)
|
||||
{
|
||||
decompose.bits.mantissa++;
|
||||
if (decompose.bits.mantissa == 0)
|
||||
{
|
||||
decompose.bits.exponent++;
|
||||
if (decompose.bits.exponent == decompose.exponent_max)
|
||||
decompose.bits.mantissa = 0;
|
||||
}
|
||||
return decompose.raw;
|
||||
}
|
||||
|
||||
// towards zero
|
||||
decompose.bits.mantissa--;
|
||||
if (decompose.bits.mantissa == decompose.mantissa_max)
|
||||
{
|
||||
if (decompose.bits.exponent == 0)
|
||||
return 0.0;
|
||||
decompose.bits.exponent--;
|
||||
}
|
||||
|
||||
return decompose.raw;
|
||||
}
|
||||
|
||||
static long double tgamma_impl(long double x)
|
||||
{
|
||||
constexpr long double pi = BAN::numbers::pi_v<long double>;
|
||||
|
||||
if (x == 0.0L)
|
||||
return BAN::numeric_limits<long double>::infinity();
|
||||
|
||||
// reflection formula
|
||||
if (x < 0.5L)
|
||||
return pi / (BAN::Math::sin(pi * x) * tgamma_impl(1.0L - x));
|
||||
x -= 1.0L;
|
||||
|
||||
// Lanczos approximation
|
||||
|
||||
constexpr long double g = 8.0L;
|
||||
constexpr long double p[] {
|
||||
0.9999999999999999298e+0L,
|
||||
1.9753739023578852322e+3L,
|
||||
-4.3973823927922428918e+3L,
|
||||
3.4626328459862717019e+3L,
|
||||
-1.1569851431631167820e+3L,
|
||||
1.5453815050252775060e+2L,
|
||||
-6.2536716123689161798e+0L,
|
||||
3.4642762454736807441e-2L,
|
||||
-7.4776171974442977377e-7L,
|
||||
6.3041253821852264261e-8L,
|
||||
-2.7405717035683877489e-8L,
|
||||
4.0486948817567609101e-9L
|
||||
};
|
||||
constexpr long double sqrt_2pi = 2.5066282746310005024L;
|
||||
|
||||
long double A = p[0];
|
||||
for (size_t i = 1; i < sizeof(p) / sizeof(*p); i++)
|
||||
A += p[i] / (x + i);
|
||||
|
||||
const long double t = x + g + 0.5L;
|
||||
return sqrt_2pi * BAN::Math::pow(t, x + 0.5L) * BAN::Math::exp(-t) * A;
|
||||
}
|
||||
|
||||
static long double erf_impl(long double x)
|
||||
{
|
||||
long double sum = 0.0L;
|
||||
for (size_t n = 0; n < 100; n++)
|
||||
sum += x / (2.0L * n + 1.0L) * BAN::Math::ldexp(-x * x, n) / tgamma_impl(n + 1.0L);
|
||||
|
||||
constexpr long double sqrt_pi = 1.77245385090551602729L;
|
||||
return 2.0L / sqrt_pi * sum;
|
||||
}
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#if __enable_sse
|
||||
BUILTINS1(acos)
|
||||
BUILTINS1(acosh)
|
||||
BUILTINS1(asin)
|
||||
BUILTINS1(asinh)
|
||||
BUILTINS1(atan)
|
||||
BUILTINS2(atan2)
|
||||
BUILTINS1(atanh)
|
||||
BUILTINS1(cbrt)
|
||||
BUILTINS1(ceil)
|
||||
BUILTINS2(copysign)
|
||||
BUILTINS1(cos)
|
||||
BUILTINS1(cosh)
|
||||
BUILTINS1(erf)
|
||||
BUILTINS1(erfc)
|
||||
BUILTINS1(exp)
|
||||
BUILTINS1(exp2)
|
||||
BUILTINS1(expm1)
|
||||
BUILTINS1(fabs)
|
||||
BUILTINS2(fdim)
|
||||
BUILTINS1(floor)
|
||||
BUILTINS2(fmax)
|
||||
BUILTINS2(fmin)
|
||||
BUILTINS2(fmod)
|
||||
BUILTINS2(hypot)
|
||||
BUILTINS1(j0)
|
||||
BUILTINS1(j1)
|
||||
BUILTINS2_TYPE(ldexp, int)
|
||||
BUILTINS1(lgamma)
|
||||
BUILTINS1(log)
|
||||
BUILTINS1(log10)
|
||||
BUILTINS1(log1p)
|
||||
BUILTINS1(log2)
|
||||
BUILTINS1(logb)
|
||||
BUILTINS1(nearbyint)
|
||||
BUILTINS2(nextafter)
|
||||
BUILTINS2(pow)
|
||||
BUILTINS2(remainder)
|
||||
BUILTINS1(rint)
|
||||
BUILTINS1(round)
|
||||
BUILTINS1(sin)
|
||||
BUILTINS1(sinh)
|
||||
BUILTINS1(sqrt)
|
||||
BUILTINS1(tan)
|
||||
BUILTINS1(tanh)
|
||||
BUILTINS1(tgamma)
|
||||
BUILTINS1(trunc)
|
||||
BUILTINS1(y0)
|
||||
BUILTINS1(y1)
|
||||
#endif
|
||||
// FIXME: add handling for nan and infinity values
|
||||
|
||||
BAN_FUNC1(acos)
|
||||
BAN_FUNC1(acosh)
|
||||
BAN_FUNC1(asin)
|
||||
BAN_FUNC1(asinh)
|
||||
BAN_FUNC1(atan)
|
||||
BAN_FUNC2(atan2)
|
||||
BAN_FUNC1(atanh)
|
||||
BAN_FUNC1(cbrt)
|
||||
BAN_FUNC1(ceil)
|
||||
BAN_FUNC2(copysign)
|
||||
BAN_FUNC1(cos)
|
||||
BAN_FUNC1(cosh)
|
||||
FUNC_EXPR1(erf, erf_impl(a))
|
||||
FUNC_EXPR1(erfc, 1.0 - erf_impl(a))
|
||||
BAN_FUNC1(exp)
|
||||
BAN_FUNC1(exp2)
|
||||
FUNC_EXPR1(expm1, ({ a -= 1.0; BAN::Math::exp(a); }))
|
||||
FUNC_EXPR1(fabs, BAN::Math::abs(a))
|
||||
FUNC_EXPR2(fdim, a > b ? a - b : 0.0)
|
||||
BAN_FUNC1(floor)
|
||||
FUNC_EXPR3(fma, (a * b) + c)
|
||||
FUNC_EXPR2(fmax, a < b ? b : a)
|
||||
FUNC_EXPR2(fmin, a < b ? a : b)
|
||||
BAN_FUNC2(fmod)
|
||||
BAN_FUNC2_TYPE(frexp, int*)
|
||||
BAN_FUNC2(hypot)
|
||||
FUNC_EXPR1_RET(ilogb, int, BAN::Math::logb(a))
|
||||
// j0, j1, jn
|
||||
BAN_FUNC2_TYPE(ldexp, int)
|
||||
FUNC_EXPR1(lgamma, BAN::Math::log(BAN::Math::abs(tgamma_impl(a))))
|
||||
FUNC_EXPR1_RET(llrint, long long, BAN::Math::rint(a))
|
||||
FUNC_EXPR1_RET(llround, long long, BAN::Math::round(a))
|
||||
BAN_FUNC1(log)
|
||||
BAN_FUNC1(log10)
|
||||
FUNC_EXPR1(log1p, ({ a += 1.0; BAN::Math::log(a); }));
|
||||
BAN_FUNC1(log2)
|
||||
BAN_FUNC1(logb)
|
||||
FUNC_EXPR1_RET(lrint, long, BAN::Math::rint(a))
|
||||
FUNC_EXPR1_RET(lround, long, BAN::Math::round(a))
|
||||
BAN_FUNC2_PTR(modf)
|
||||
FUNC_EXPR1(nearbyint, BAN::Math::rint(a))
|
||||
FUNC_EXPR2(nextafter, nextafter_impl(a, b))
|
||||
FUNC_EXPR2_TYPE(nexttoward, long double, nextafter_impl(a, b))
|
||||
FUNC_EXPR2(pow, ({ if (isnan(a)) return a; if (isnan(b)) return b; BAN::Math::pow(a, b); }))
|
||||
// remainder
|
||||
// remquo
|
||||
BAN_FUNC1(rint)
|
||||
FUNC_EXPR1(round, ({ if (!isfinite(a)) return a; BAN::Math::round(a); }))
|
||||
FUNC_EXPR2_TYPE(scalbln, long, BAN::Math::scalbn(a, b))
|
||||
FUNC_EXPR2_TYPE(scalbn, int, BAN::Math::scalbn(a, b))
|
||||
BAN_FUNC1(sin)
|
||||
BAN_FUNC1(sinh)
|
||||
BAN_FUNC1(sqrt)
|
||||
BAN_FUNC1(tan)
|
||||
BAN_FUNC1(tanh)
|
||||
FUNC_EXPR1(tgamma, tgamma_impl(a))
|
||||
BAN_FUNC1(trunc)
|
||||
// y0, y1, yn
|
||||
|
||||
|
||||
__END_DECLS
|
||||
|
||||
@@ -129,7 +129,6 @@ static void integer_to_string(char* buffer, T value, int base, bool upper, forma
|
||||
buffer[offset++] = '\0';
|
||||
}
|
||||
|
||||
#if __enable_sse
|
||||
template<BAN::floating_point T>
|
||||
static void floating_point_to_string(char* buffer, T value, bool upper, const format_options_t options)
|
||||
{
|
||||
@@ -259,7 +258,6 @@ static void floating_point_to_exponent_string(char* buffer, T value, bool upper,
|
||||
exponent_options.width = 3;
|
||||
integer_to_string<int>(buffer + offset, exponent, 10, upper, exponent_options);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun)(int, void*), void* data)
|
||||
{
|
||||
@@ -496,7 +494,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
#if __enable_sse
|
||||
case 'e':
|
||||
case 'E':
|
||||
{
|
||||
@@ -529,7 +526,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun
|
||||
case 'A':
|
||||
// TODO
|
||||
break;
|
||||
#endif
|
||||
case 'c':
|
||||
{
|
||||
conversion[0] = va_arg(arguments, int);
|
||||
|
||||
@@ -260,7 +260,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
|
||||
}
|
||||
};
|
||||
|
||||
#if __enable_sse
|
||||
auto parse_floating_point_internal =
|
||||
[&parse_integer_internal, &get_input, &in]<int BASE, typename T>(BASE_TYPE<BASE>, bool negative, int width, T* out, bool require_start = true) -> ConversionResult
|
||||
{
|
||||
@@ -435,7 +434,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
|
||||
return ConversionResult::MATCH_FAILURE;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
auto parse_string =
|
||||
[&arguments, &get_input, &in](uint8_t* mask, bool exclude, bool suppress, bool allocate, int min_len, int max_len, bool terminate) -> ConversionResult
|
||||
@@ -520,11 +518,9 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
|
||||
case 'x': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, conversion.length); break;
|
||||
case 'X': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, conversion.length); break;
|
||||
case 'p': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, LengthModifier::j); break;
|
||||
#if __enable_sse
|
||||
case 'a': case 'e': case 'f': case 'g':
|
||||
result = parse_floating_point(conversion.suppress, conversion.field_width, conversion.length);
|
||||
break;
|
||||
#endif
|
||||
case 'S':
|
||||
conversion.length = LengthModifier::l;
|
||||
// fall through
|
||||
|
||||
@@ -162,7 +162,6 @@ static T strtoT(const char* str, char** endp, int base, int& error)
|
||||
return result;
|
||||
}
|
||||
|
||||
#if __enable_sse
|
||||
template<BAN::floating_point T>
|
||||
static T strtoT(const char* str, char** endp, int& error)
|
||||
{
|
||||
@@ -306,16 +305,13 @@ static T strtoT(const char* str, char** endp, int& error)
|
||||
|
||||
if (exponent)
|
||||
result *= BAN::Math::pow<T>((base == 10) ? 10 : 2, exponent);
|
||||
return result;
|
||||
return negative ? -result : result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if __enable_sse
|
||||
double atof(const char* str)
|
||||
{
|
||||
return strtod(str, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
int atoi(const char* str)
|
||||
{
|
||||
@@ -332,7 +328,6 @@ long long atoll(const char* str)
|
||||
return strtoll(str, nullptr, 10);
|
||||
}
|
||||
|
||||
#if __enable_sse
|
||||
float strtof(const char* __restrict str, char** __restrict endp)
|
||||
{
|
||||
return strtoT<float>(str, endp, errno);
|
||||
@@ -347,7 +342,6 @@ long double strtold(const char* __restrict str, char** __restrict endp)
|
||||
{
|
||||
return strtoT<long double>(str, endp, errno);
|
||||
}
|
||||
#endif
|
||||
|
||||
long strtol(const char* __restrict str, char** __restrict endp, int base)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include <LibImage/PNG.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace LibImage
|
||||
@@ -132,13 +131,8 @@ namespace LibImage
|
||||
{
|
||||
const double src_x = x * ratio_x;
|
||||
const double src_y = y * ratio_y;
|
||||
#if __enable_sse
|
||||
const double weight_x = src_x - floor(src_x);
|
||||
const double weight_y = src_y - floor(src_y);
|
||||
#else
|
||||
const double weight_x = src_x - (uint64_t)src_x;
|
||||
const double weight_y = src_y - (uint64_t)src_y;
|
||||
#endif
|
||||
const double weight_x = src_x - BAN::Math::floor(src_x);
|
||||
const double weight_y = src_y - BAN::Math::floor(src_y);
|
||||
|
||||
const Color avg_t = Color::average(
|
||||
get_clamped_color(src_x + 0.0, src_y),
|
||||
@@ -177,13 +171,8 @@ namespace LibImage
|
||||
{
|
||||
const double src_x = x * ratio_x;
|
||||
const double src_y = y * ratio_y;
|
||||
#if __enable_sse
|
||||
const double weight_x = src_x - floor(src_x);
|
||||
const double weight_y = src_y - floor(src_y);
|
||||
#else
|
||||
const double weight_x = src_x - (uint64_t)src_x;
|
||||
const double weight_y = src_y - (uint64_t)src_y;
|
||||
#endif
|
||||
const double weight_x = src_x - BAN::Math::floor(src_x);
|
||||
const double weight_y = src_y - BAN::Math::floor(src_y);
|
||||
|
||||
FloatingColor values[4];
|
||||
for (int64_t m = -1; m <= 2; m++)
|
||||
|
||||
Reference in New Issue
Block a user