diff --git a/BAN/include/BAN/Math.h b/BAN/include/BAN/Math.h index 0a663471..2f5a0745 100644 --- a/BAN/include/BAN/Math.h +++ b/BAN/include/BAN/Math.h @@ -6,6 +6,19 @@ #include +// This is ugly but my clangd does not like including +// intrinsic headers at all +#if !defined(__SSE__) || !defined(__SSE2__) +#pragma GCC push_options +#ifndef __SSE__ +#pragma GCC target("sse") +#endif +#ifndef __SSE2__ +#pragma GCC target("sse2") +#endif +#define BAN_MATH_POP_OPTIONS +#endif + namespace BAN::Math { @@ -167,7 +180,7 @@ namespace BAN::Math template static T modf(T x, T* iptr) { - const T frac = BAN::Math::fmod(x, 1); + const T frac = BAN::Math::fmod(x, (T)1.0); *iptr = x - frac; return frac; } @@ -175,15 +188,15 @@ namespace BAN::Math template inline constexpr T frexp(T num, int* exp) { - if (num == 0.0) + if (num == (T)0.0) { *exp = 0; - return 0.0; + return (T)0.0; } - T _exp; - asm("fxtract" : "+t"(num), "=u"(_exp)); - *exp = (int)_exp + 1; + T e; + asm("fxtract" : "+t"(num), "=u"(e)); + *exp = (int)e + 1; return num / (T)2.0; } @@ -251,6 +264,7 @@ namespace BAN::Math "fstp %%st(1);" : "+t"(x) ); + return x; } @@ -263,18 +277,9 @@ namespace BAN::Math template inline constexpr T pow(T x, T y) { - asm( - "fyl2x;" - "fld1;" - "fld %%st(1);" - "fprem;" - "f2xm1;" - "faddp;" - "fscale;" - : "+t"(x), "+u"(y) - ); - - return x; + if (x == (T)0.0) + return (T)0.0; + return exp2(y * log2(x)); } template @@ -310,16 +315,27 @@ namespace BAN::Math template inline constexpr T sqrt(T x) { - asm("fsqrt" : "+t"(x)); - return x; + if constexpr(BAN::is_same_v) + { + using v4sf = float __attribute__((vector_size(16))); + return __builtin_ia32_sqrtss((v4sf) { x, 0.0f, 0.0f, 0.0f })[0]; + } + else if constexpr(BAN::is_same_v) + { + using v2df = double __attribute__((vector_size(16))); + return __builtin_ia32_sqrtsd((v2df) { x, 0.0 })[0]; + } + else if constexpr(BAN::is_same_v) + { + asm("fsqrt" : "+t"(x)); + return x; + } } template inline constexpr T cbrt(T value) { - if (value == 0.0) - return 0.0; - return pow(value, 1.0 / 3.0); + return pow(value, (T)1.0 / (T)3.0); } template @@ -346,30 +362,21 @@ namespace BAN::Math inline constexpr T tan(T x) { T one, ret; - asm( - "fptan" - : "=t"(one), "=u"(ret) - : "0"(x) - ); + asm("fptan" : "=t"(one), "=u"(ret) : "0"(x)); return ret; } template inline constexpr T atan2(T y, T x) { - asm( - "fpatan" - : "+t"(x) - : "u"(y) - : "st(1)" - ); + asm("fpatan" : "+t"(x) : "u"(y) : "st(1)"); return x; } template inline constexpr T atan(T x) { - return atan2(x, 1.0); + return atan2(x, (T)1.0); } template @@ -378,10 +385,10 @@ namespace BAN::Math if (x == (T)0.0) return (T)0.0; if (x == (T)1.0) - return numbers::pi_v / (T)2.0; + return +numbers::pi_v / (T)2.0; if (x == (T)-1.0) return -numbers::pi_v / (T)2.0; - return (T)2.0 * atan(x / (T(1.0) + sqrt((T)1.0 - x * x))); + return (T)2.0 * atan(x / ((T)1.0 + sqrt((T)1.0 - x * x))); } template @@ -411,7 +418,7 @@ namespace BAN::Math template inline constexpr T tanh(T x) { - const T exp_px = exp(x); + const T exp_px = exp(+x); const T exp_nx = exp(-x); return (exp_px - exp_nx) / (exp_px + exp_nx); } @@ -441,3 +448,8 @@ namespace BAN::Math } } + +#ifdef BAN_MATH_POP_OPTIONS +#undef BAN_MATH_POP_OPTIONS +#pragma GCC pop_options +#endif