Kernel/userspace: rework floating point math

SSE is now unconditionally enabled any where and most of math.h is now actually implemented. using __builtin_<func> lead to many hangs where the builtin function would just call itself.
2024-11-03 20:25:35 +02:00 · 2024-11-03 20:25:35 +02:00 · f4be37700f
parent ed19bb11fe
commit f4be37700f
18 changed files with 827 additions and 210 deletions
--- a/BAN/include/BAN/Math.h
+++ b/BAN/include/BAN/Math.h
@ -1,18 +1,18 @@
 #pragma once

 #include <BAN/Limits.h>
+#include <BAN/Numbers.h>
 #include <BAN/Traits.h>

-#include <stddef.h>
-#include <stdint.h>
+#include <float.h>

 namespace BAN::Math
 {

 	template<typename T>
-	inline constexpr T abs(T val)
+	inline constexpr T abs(T x)
 	{
-		return val < 0 ? -val : val;
+		return x < 0 ? -x : x;
 	}

 	template<typename T>
@ -59,11 +59,11 @@ namespace BAN::Math
 	}

 	template<integral T>
-	inline constexpr bool is_power_of_two(T value)
+	inline constexpr bool is_power_of_two(T x)
 	{
-		if (value == 0)
+		if (x == 0)
 			return false;
-		return (value & (value - 1)) == 0;
+		return (x & (x - 1)) == 0;
 	}

 	template<BAN::integral T>
@ -89,43 +89,181 @@ namespace BAN::Math

 	template<typename T>
 	requires is_same_v<T, unsigned int> || is_same_v<T, unsigned long> || is_same_v<T, unsigned long long>
-	inline constexpr T ilog2(T value)
+	inline constexpr T ilog2(T x)
 	{
 		if constexpr(is_same_v<T, unsigned int>)
-			return sizeof(T) * 8 - __builtin_clz(value) - 1;
+			return sizeof(T) * 8 - __builtin_clz(x) - 1;
 		if constexpr(is_same_v<T, unsigned long>)
-			return sizeof(T) * 8 - __builtin_clzl(value) - 1;
-		return sizeof(T) * 8 - __builtin_clzll(value) - 1;
+			return sizeof(T) * 8 - __builtin_clzl(x) - 1;
+		return sizeof(T) * 8 - __builtin_clzll(x) - 1;
 	}

 	template<floating_point T>
-	inline constexpr T log2(T value)
+	inline constexpr T floor(T x)
 	{
-		T result;
-		asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"((T)1.0) : "st(1)");
-		return result;
+		if constexpr(is_same_v<T, float>)
+			return __builtin_floorf(x);
+		if constexpr(is_same_v<T, double>)
+			return __builtin_floor(x);
+		if constexpr(is_same_v<T, long double>)
+			return __builtin_floorl(x);
 	}

 	template<floating_point T>
-	inline constexpr T log10(T value)
+	inline constexpr T ceil(T x)
 	{
-		constexpr T INV_LOG_2_10 = 0.3010299956639811952137388947244930267681898814621085413104274611;
-		T result;
-		asm volatile("fyl2x" : "=t"(result) : "0"(value), "u"(INV_LOG_2_10) : "st(1)");
-		return result;
+		if constexpr(is_same_v<T, float>)
+			return __builtin_ceilf(x);
+		if constexpr(is_same_v<T, double>)
+			return __builtin_ceil(x);
+		if constexpr(is_same_v<T, long double>)
+			return __builtin_ceill(x);
 	}

 	template<floating_point T>
-	inline constexpr T log(T value, T base)
+	inline constexpr T round(T x)
 	{
-		return log2(value) / log2(base);
+		if (x == (T)0.0)
+			return x;
+		if (x > (T)0.0)
+			return floor<T>(x + (T)0.5);
+		return ceil<T>(x - (T)0.5);
 	}

 	template<floating_point T>
-	inline constexpr T pow(T base, T exp)
+	inline constexpr T trunc(T x)
 	{
-		T result;
-		asm volatile(
+		if constexpr(is_same_v<T, float>)
+			return __builtin_truncf(x);
+		if constexpr(is_same_v<T, double>)
+			return __builtin_trunc(x);
+		if constexpr(is_same_v<T, long double>)
+			return __builtin_truncl(x);
+	}
+
+	template<floating_point T>
+	inline constexpr T rint(T x)
+	{
+		asm("frndint" : "+t"(x));
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T fmod(T a, T b)
+	{
+		asm(
+			"1:"
+			"fprem;"
+			"fnstsw %%ax;"
+			"testb $4, %%ah;"
+			"jne 1b;"
+			: "+t"(a)
+			: "u"(b)
+			: "ax"
+		);
+		return a;
+	}
+
+	template<floating_point T>
+	static T modf(T x, T* iptr)
+	{
+		const T frac = BAN::Math::fmod<T>(x, 1);
+		*iptr = x - frac;
+		return frac;
+	}
+
+	template<floating_point T>
+	inline constexpr T frexp(T num, int* exp)
+	{
+		if (num == 0.0)
+		{
+			*exp = 0;
+			return 0.0;
+		}
+
+		T _exp;
+		asm("fxtract" : "+t"(num), "=u"(_exp));
+		*exp = (int)_exp + 1;
+		return num / (T)2.0;
+	}
+
+	template<floating_point T>
+	inline constexpr T copysign(T x, T y)
+	{
+		if ((x < (T)0.0) != (y < (T)0.0))
+			x = -x;
+		return x;
+	}
+
+	namespace detail
+	{
+
+		template<floating_point T>
+		inline constexpr T fyl2x(T x, T y)
+		{
+			asm("fyl2x" : "+t"(x) : "u"(y) : "st(1)");
+			return x;
+		}
+
+	}
+
+	template<floating_point T>
+	inline constexpr T log(T x)
+	{
+		return detail::fyl2x<T>(x, numbers::ln2_v<T>);
+	}
+
+	template<floating_point T>
+	inline constexpr T log2(T x)
+	{
+		return detail::fyl2x<T>(x, 1.0);
+	}
+
+	template<floating_point T>
+	inline constexpr T log10(T x)
+	{
+		return detail::fyl2x<T>(x, numbers::lg2_v<T>);
+	}
+
+	template<floating_point T>
+	inline constexpr T logb(T x)
+	{
+		static_assert(FLT_RADIX == 2);
+		return log2<T>(x);
+	}
+
+	template<floating_point T>
+	inline constexpr T exp2(T x)
+	{
+		if (abs(x) <= (T)1.0)
+		{
+			asm("f2xm1" : "+t"(x));
+			return x + (T)1.0;
+		}
+
+		asm(
+			"fld1;"
+			"fld %%st(1);"
+			"fprem;"
+			"f2xm1;"
+			"faddp;"
+			"fscale;"
+			"fstp %%st(1);"
+			: "+t"(x)
+		);
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T exp(T x)
+	{
+		return exp2<T>(x * numbers::log2e_v<T>);
+	}
+
+	template<floating_point T>
+	inline constexpr T pow(T x, T y)
+	{
+		asm(
 			"fyl2x;"
 			"fld1;"
 			"fld %%st(1);"
@ -133,12 +271,170 @@ namespace BAN::Math
 			"f2xm1;"
 			"faddp;"
 			"fscale;"
-			"fxch %%st(1);"
-			"fstp %%st;"
-			: "=t"(result)
-			: "0"(base), "u"(exp)
+			: "+t"(x), "+u"(y)
 		);
-		return result;
+
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T scalbn(T x, int n)
+	{
+		asm("fscale" : "+t"(x) : "u"(static_cast<T>(n)));
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T ldexp(T x, int y)
+	{
+		const bool exp_sign = y < 0;
+		if (exp_sign)
+			y = -y;
+
+		T exp = (T)1.0;
+		T mult = (T)2.0;
+		while (y)
+		{
+			if (y & 1)
+				exp *= mult;
+			mult *= mult;
+			y >>= 1;
+		}
+
+		if (exp_sign)
+			exp = (T)1.0 / exp;
+
+		return x * exp;
+	}
+
+	template<floating_point T>
+	inline constexpr T sqrt(T x)
+	{
+		asm("fsqrt" : "+t"(x));
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T cbrt(T value)
+	{
+		if (value == 0.0)
+			return 0.0;
+		return pow<T>(value, 1.0 / 3.0);
+	}
+
+	template<floating_point T>
+	inline constexpr T sin(T x)
+	{
+		x = fmod<T>(x, (T)2.0 * numbers::pi_v<T>);
+		asm("fsin" : "+t"(x));
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T cos(T x)
+	{
+		if (abs(x) >= (T)9223372036854775808.0)
+			x = fmod<T>(x, (T)2.0 * numbers::pi_v<T>);
+		asm("fcos" : "+t"(x));
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T tan(T x)
+	{
+		T one, ret;
+		asm(
+			"fptan"
+			: "=t"(one), "=u"(ret)
+			: "0"(x)
+		);
+		return ret;
+	}
+
+	template<floating_point T>
+	inline constexpr T atan2(T y, T x)
+	{
+		asm(
+			"fpatan"
+			: "+t"(x)
+			: "u"(y)
+			: "st(1)"
+		);
+		return x;
+	}
+
+	template<floating_point T>
+	inline constexpr T atan(T x)
+	{
+		return atan2<T>(x, 1.0);
+	}
+
+	template<floating_point T>
+	inline constexpr T asin(T x)
+	{
+		if (x == (T)0.0)
+			return (T)0.0;
+		if (x == (T)1.0)
+			return  numbers::pi_v<T> / (T)2.0;
+		if (x == (T)-1.0)
+			return -numbers::pi_v<T> / (T)2.0;
+		return (T)2.0 * atan<T>(x / (T(1.0) + sqrt<T>((T)1.0 - x * x)));
+	}
+
+	template<floating_point T>
+	inline constexpr T acos(T x)
+	{
+		if (x == (T)0.0)
+			return numbers::pi_v<T> / (T)2.0;
+		if (x == (T)1.0)
+			return (T)0.0;
+		if (x == (T)-1.0)
+			return numbers::pi_v<T>;
+		return (T)2.0 * atan<T>(sqrt<T>((T)1.0 - x * x) / ((T)1.0 + x));
+	}
+
+	template<floating_point T>
+	inline constexpr T sinh(T x)
+	{
+		return (exp<T>(x) - exp<T>(-x)) / (T)2.0;
+	}
+
+	template<floating_point T>
+	inline constexpr T cosh(T x)
+	{
+		return (exp<T>(x) + exp<T>(-x)) / (T)2.0;
+	}
+
+	template<floating_point T>
+	inline constexpr T tanh(T x)
+	{
+		const T exp_px = exp<T>(x);
+		const T exp_nx = exp<T>(-x);
+		return (exp_px - exp_nx) / (exp_px + exp_nx);
+	}
+
+	template<floating_point T>
+	inline constexpr T asinh(T x)
+	{
+		return log<T>(x + sqrt<T>(x * x + (T)1.0));
+	}
+
+	template<floating_point T>
+	inline constexpr T acosh(T x)
+	{
+		return log<T>(x + sqrt<T>(x * x - (T)1.0));
+	}
+
+	template<floating_point T>
+	inline constexpr T atanh(T x)
+	{
+		return (T)0.5 * log<T>(((T)1.0 + x) / ((T)1.0 - x));
+	}
+
+	template<floating_point T>
+	inline constexpr T hypot(T x, T y)
+	{
+		return sqrt<T>(x * x + y * y);
 	}

 }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,8 +4,6 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "banan-os")
 	message(FATAL_ERROR "CMAKE_SYSTEM_NAME is not banan-os")
 endif ()

-set(BANAN_ENABLE_SSE 1)
-
 project(banan-os CXX C ASM)

 set(BANAN_BASE_SYSROOT ${CMAKE_SOURCE_DIR}/base-sysroot.tar.gz)
--- a/kernel/CMakeLists.txt
+++ b/kernel/CMakeLists.txt
@ -175,7 +175,6 @@ add_executable(kernel ${KERNEL_SOURCES})

 target_compile_definitions(kernel PRIVATE __is_kernel)
 target_compile_definitions(kernel PRIVATE __arch=${BANAN_ARCH})
-target_compile_definitions(kernel PRIVATE __enable_sse=${BANAN_ENABLE_SSE})

 target_compile_options(kernel PRIVATE
 	-O2 -g
--- a/kernel/include/kernel/Thread.h
+++ b/kernel/include/kernel/Thread.h
@ -86,10 +86,8 @@ namespace Kernel
 		InterruptStack& interrupt_stack() { return m_interrupt_stack; }
 		InterruptRegisters& interrupt_registers() { return m_interrupt_registers; }

-#if __enable_sse
 		void save_sse();
 		void load_sse();
-#endif

 		void add_mutex() { m_mutex_count++; }
 		void remove_mutex() { m_mutex_count--; }
@ -127,9 +125,7 @@ namespace Kernel

 		BAN::Atomic<uint32_t>      m_mutex_count          { 0 };

-#if __enable_sse
 		alignas(16) uint8_t m_sse_storage[512] {};
-#endif

 		friend class Process;
 		friend class Scheduler;
--- a/kernel/kernel/IDT.cpp
+++ b/kernel/kernel/IDT.cpp
@ -182,9 +182,7 @@ namespace Kernel
 		if (tid)
 		{
 			auto& thread = Thread::current();
-#if __enable_sse
 			thread.save_sse();
-#endif

 			if (isr == ISR::PageFault && Thread::current().is_userspace())
 			{
@ -318,12 +316,8 @@ namespace Kernel

 		ASSERT(Thread::current().state() != Thread::State::Terminated);

-done:
-#if __enable_sse
+	done:
 		Thread::current().load_sse();
-#else
-		return;
-#endif
 	}

 	extern "C" void cpp_yield_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
@ -376,9 +370,7 @@ done:
 			asm volatile("cli; 1: hlt; jmp 1b");
 		}

-#if __enable_sse
 		Thread::current().save_sse();
-#endif

 		if (!InterruptController::get().is_in_service(irq))
 			dprintln("spurious irq 0x{2H}", irq);
@ -399,9 +391,7 @@ done:

 		ASSERT(Thread::current().state() != Thread::State::Terminated);

-#if __enable_sse
 		Thread::current().load_sse();
-#endif
 	}

 	void IDT::register_interrupt_handler(uint8_t index, void (*handler)())
--- a/kernel/kernel/Thread.cpp
+++ b/kernel/kernel/Thread.cpp
@ -36,7 +36,6 @@ namespace Kernel

 	static pid_t s_next_tid = 1;

-#if __enable_sse
 	alignas(16) static uint8_t s_default_sse_storage[512];
 	static bool s_default_sse_storage_initialized = false;

@ -51,7 +50,6 @@ namespace Kernel
 			: [mxcsr]"m"(mxcsr)
 		);
 	}
-#endif

 	BAN::ErrorOr<Thread*> Thread::create_kernel(entry_t entry, void* data, Process* process)
 	{
@ -129,14 +127,12 @@ namespace Kernel
 	Thread::Thread(pid_t tid, Process* process)
 		: m_tid(tid), m_process(process)
 	{
-#if __enable_sse
 		if (!s_default_sse_storage_initialized)
 		{
 			initialize_default_sse_storage();
 			s_default_sse_storage_initialized = true;
 		}
 		memcpy(m_sse_storage, s_default_sse_storage, sizeof(m_sse_storage));
-#endif
 	}

 	Thread& Thread::current()
@ -506,7 +502,6 @@ namespace Kernel
 		ASSERT_NOT_REACHED();
 	}

-#if __enable_sse
 	void Thread::save_sse()
 	{
 		asm volatile("fxsave %0" :: "m"(m_sse_storage));
@ -516,6 +511,5 @@ namespace Kernel
 	{
 		asm volatile("fxrstor %0" :: "m"(m_sse_storage));
 	}
-#endif

 }
--- a/userspace/libraries/CMakeLists.txt
+++ b/userspace/libraries/CMakeLists.txt
@ -21,10 +21,5 @@ foreach(library ${USERSPACE_LIBRARIES})
 		target_link_options(${library_lower} PRIVATE -nolibc)
 		# Default compile options
 		target_compile_options(${library_lower} PRIVATE -g -O2 -Wall -Wextra -Werror)
-
-		target_compile_definitions(${library_lower} PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
-		if (NOT BANAN_ENABLE_SSE)
-			target_compile_options(${library_lower} PRIVATE -mno-sse -mno-sse2)
-		endif ()
 	endif()
 endforeach()
--- a/userspace/libraries/LibC/CMakeLists.txt
+++ b/userspace/libraries/LibC/CMakeLists.txt
@ -6,6 +6,7 @@ set(LIBC_SOURCES
 	dlfcn.cpp
 	errno.cpp
 	fcntl.cpp
+	fenv.cpp
 	ftw.cpp
 	grp.cpp
 	inttypes.cpp
@ -48,10 +49,6 @@ set(LIBC_SOURCES

 add_library(objlibc OBJECT ${LIBC_SOURCES})
 target_compile_definitions(objlibc PRIVATE __arch=${BANAN_ARCH})
-target_compile_definitions(objlibc PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
-if (NOT BANAN_ENABLE_SSE)
-	target_compile_options(objlibc PRIVATE -mno-sse -mno-sse2)
-endif ()

 target_compile_options(objlibc PRIVATE -O2 -g -Wstack-usage=512 -fno-tree-loop-distribute-patterns -fno-exceptions -fpic -nolibc)
 target_compile_options(objlibc PUBLIC -Wall -Wextra -Werror -Wno-error=stack-usage=)
--- a/userspace/libraries/LibC/fenv.cpp
+++ b/userspace/libraries/LibC/fenv.cpp
@ -0,0 +1,134 @@
+#include <fenv.h>
+
+static uint16_t read_fpu_control_word()
+{
+	uint16_t control;
+	asm volatile("fstcw %0" : "=m"(control));
+	return control;
+}
+
+static uint16_t read_fpu_status_word()
+{
+	uint16_t status;
+	asm volatile("fstsw %0" : "=m"(status));
+	return status;
+}
+
+static uint32_t read_mxcsr()
+{
+	uint32_t mxcsr;
+	asm volatile("stmxcsr %0" : "=m"(mxcsr));
+	return mxcsr;
+}
+
+static void write_fpu_control_word(uint16_t control)
+{
+	asm volatile("fldcw %0" :: "m"(control));
+}
+
+static void write_mxcsr(uint32_t mxcsr)
+{
+	asm volatile("ldmxcsr %0" :: "m"(mxcsr));
+}
+
+int fegetenv(fenv_t* envp)
+{
+	asm volatile("fstenv %0" : "=m"(envp->x87_fpu));
+	envp->mxcsr = read_mxcsr();
+	return 0;
+}
+
+int fesetenv(const fenv_t* envp)
+{
+	if (envp == FE_DFL_ENV)
+	{
+		asm volatile("finit");
+		write_mxcsr(0x1F80);
+		return 0;
+	}
+
+	asm volatile("fldenv %0" :: "m"(envp->x87_fpu));
+	write_mxcsr(envp->mxcsr);
+	return 0;
+}
+
+int fegetround(void)
+{
+	return (read_fpu_control_word() >> 10) & 0x03;
+}
+
+int fesetround(int round)
+{
+	uint16_t control = read_fpu_control_word();
+	control &= ~(3 << 10);
+	control |= round << 10;
+	write_fpu_control_word(control);
+
+	uint32_t mxcsr = read_mxcsr();
+	mxcsr &= ~(3 << 13);
+	mxcsr |= round << 13;
+	write_mxcsr(mxcsr);
+
+	return 0;
+}
+
+int feclearexcept(int excepts)
+{
+	excepts &= FE_ALL_EXCEPT;
+
+	fenv_t temp_env;
+	fegetenv(&temp_env);
+	temp_env.x87_fpu.status &= ~(excepts | (1 << 7));
+	temp_env.mxcsr &= ~excepts;
+	fesetenv(&temp_env);
+	return 0;
+}
+
+int fetestexcept(int excepts)
+{
+	excepts &= FE_ALL_EXCEPT;
+
+	const uint16_t status = read_fpu_status_word();
+	const uint32_t mxcsr = read_mxcsr();
+	return (status | mxcsr) & excepts;
+}
+
+int feholdexcept(fenv_t*);
+
+int feraiseexcept(int excepts)
+{
+	excepts &= FE_ALL_EXCEPT;
+
+	fenv_t temp_env;
+	fegetenv(&temp_env);
+	temp_env.x87_fpu.status |= excepts;
+	fesetenv(&temp_env);
+	asm volatile("fwait");
+	return 0;
+}
+
+int fegetexceptflag(fexcept_t* flagp, int excepts)
+{
+	*flagp = fetestexcept(excepts);
+	return 0;
+}
+
+int fesetexceptflag(const fexcept_t* flagp, int excepts)
+{
+	excepts &= FE_ALL_EXCEPT;
+
+	fenv_t temp_env;
+	fegetenv(&temp_env);
+	temp_env.x87_fpu.status &= ~(FE_ALL_EXCEPT | (1 << 7));
+	temp_env.x87_fpu.status |= *flagp & excepts;
+	fesetenv(&temp_env);
+	return 0;
+}
+
+int feupdateenv(const fenv_t* envp)
+{
+	int excepts = fetestexcept(FE_ALL_EXCEPT);
+	fesetenv(envp);
+	feraiseexcept(excepts);
+	return 0;
+}
--- a/userspace/libraries/LibC/include/fenv.h
+++ b/userspace/libraries/LibC/include/fenv.h
@ -7,7 +7,46 @@

 __BEGIN_DECLS

-// FIXME
+#include <stdint.h>
+
+#define FE_INVALID   (1 << 0)
+#define FE_DIVBYZERO (1 << 2)
+#define FE_OVERFLOW  (1 << 3)
+#define FE_UNDERFLOW (1 << 4)
+#define FE_INEXACT   (1 << 5)
+#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT)
+
+#define FE_TONEAREST  0
+#define FE_DOWNWARD   1
+#define FE_UPWARD     2
+#define FE_TOWARDZERO 3
+
+typedef struct {
+	uint32_t control;
+	uint32_t status;
+	uint32_t __unused[5];
+} __x87_fpu_t;
+
+typedef struct {
+	__x87_fpu_t x87_fpu;
+	uint32_t mxcsr;
+} fenv_t;
+
+typedef uint8_t fexcept_t;
+
+#define FE_DFL_ENV ((const fenv_t*)0x1)
+
+int feclearexcept(int);
+int fegetenv(fenv_t*);
+int fegetexceptflag(fexcept_t*, int);
+int fegetround(void);
+int feholdexcept(fenv_t*);
+int feraiseexcept(int);
+int fesetenv(const fenv_t*);
+int fesetexceptflag(const fexcept_t*, int);
+int fesetround(int);
+int fetestexcept(int);
+int feupdateenv(const fenv_t*);

 __END_DECLS

--- a/userspace/libraries/LibC/include/math.h
+++ b/userspace/libraries/LibC/include/math.h
@ -5,6 +5,8 @@

 #include <sys/cdefs.h>

+#include <float.h>
+
 __BEGIN_DECLS

 #ifndef FLT_EVAL_METHOD
@ -22,52 +24,50 @@ __BEGIN_DECLS
 	typedef long double double_t;
 #endif

-// FIXME: define this
-// int fpclassify(real-floating x);
+#define fpclassify(x)        __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x)
+#define isfinite(x)          __builtin_isfinite(x)
+#define isgreater(x, y)      __builtin_isgreater(x, y)
+#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y)
+#define isinf(x)             __builtin_isinf_sign(x)
+#define isless(x, y)         __builtin_isless(x, y)
+#define islessequal(x, y)    __builtin_islessequal(x, y)
+#define islessgreater(x, y)  __builtin_islessgreater(x, y)
+#define isnan(x)             __builtin_isnan(x)
+#define isnormal(x)          __builtin_isnormal(x)
+#define isunordered(x, y)    __builtin_isunordered(x, y)
+#define signbit(x)           __builtin_signbit(x)

-#define isfinite(x)				__builtin_isfinite(x)
-#define isgreater(x, y)			__builtin_isgreater(x, y)
-#define isgreaterequal(x, y)	__builtin_isgreaterequal(x, y)
-#define isinf(x)				__builtin_isinf_sign(x)
-#define isless(x, y)			__builtin_isless(x, y)
-#define islessequal(x, y)		__builtin_islessequal(x, y)
-#define islessgreater(x, y)		__builtin_islessgreater(x, y)
-#define isnan(x)				__builtin_isnan(x)
-#define isnormal(x)				__builtin_isnormal(x)
-#define isunordered(x, y)		__builtin_isunordered(x, y)
-#define signbit(x)				__builtin_signbit(x)
+#define M_E        2.7182818284590452354
+#define M_LOG2E    1.4426950408889634074
+#define M_LOG10E   0.43429448190325182765
+#define M_LN2      0.69314718055994530942
+#define M_LN10     2.30258509299404568402
+#define M_PI       3.14159265358979323846
+#define M_PI_2     1.57079632679489661923
+#define M_PI_4     0.78539816339744830962
+#define M_1_PI     0.31830988618379067154
+#define M_2_PI     0.63661977236758134308
+#define M_2_SQRTPI 1.12837916709551257390
+#define M_SQRT2    1.41421356237309504880
+#define M_SQRT1_2  0.70710678118654752440

-#define M_E			2.7182818284590452354
-#define M_LOG2E		1.4426950408889634074
-#define M_LOG10E	0.43429448190325182765
-#define M_LN2		0.69314718055994530942
-#define M_LN10		2.30258509299404568402
-#define M_PI		3.14159265358979323846
-#define M_PI_2		1.57079632679489661923
-#define M_PI_4		0.78539816339744830962
-#define M_1_PI		0.31830988618379067154
-#define M_2_PI		0.63661977236758134308
-#define M_2_SQRTPI	1.12837916709551257390
-#define M_SQRT2		1.41421356237309504880
-#define M_SQRT1_2	0.70710678118654752440
+#define HUGE_VAL  __builtin_huge_val()
+#define HUGE_VALF __builtin_huge_valf()
+#define HUGE_VALL __builtin_huge_vall()
+#define INFINITY  __builtin_inff()
+#define NAN       __builtin_nanf("")

-#define HUGE_VAL	__builtin_huge_val()
-#define HUGE_VALF	__builtin_huge_valf()
-#define HUGE_VALL	__builtin_huge_vall()
-#define INFINITY	__builtin_inff()
-#define NAN			__builtin_nanf("")
+#define FP_INFINITE  0
+#define FP_NAN       1
+#define FP_NORMAL    2
+#define FP_SUBNORMAL 3
+#define FP_ZERO      4

-#define FP_INFINITE		0
-#define FP_NAN			1
-#define FP_NORMAL		2
-#define FP_SUBNORMAL	3
-#define FP_ZERO			4
+#define FP_ILOGB0   -2147483647
+#define FP_ILOGBNAN +2147483647

-#define FP_ILOGB0	-2147483647
-#define FP_ILOGBNAN	+2147483647
-
-#define MATH_ERRNO		1
-#define MATH_ERREXCEPT	2
+#define MATH_ERRNO     1
+#define MATH_ERREXCEPT 2

 #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)

@ -191,9 +191,9 @@ long		lroundl(long double);
 double		modf(double, double*);
 float		modff(float, float*);
 long double modfl(long double, long double*);
-double		nan(const char*);
-float		nanf(const char*);
-long double nanl(const char*);
+#define nan(tagp) __builtin_nan(tagp)
+#define nanf(tagp) __builtin_nanf(tagp)
+#define nanl(tagp) __builtin_nanl(tagp)
 double		nearbyint(double);
 float		nearbyintf(float);
 long double nearbyintl(long double);
--- a/userspace/libraries/LibC/math.cpp
+++ b/userspace/libraries/LibC/math.cpp
@ -1,71 +1,285 @@
+#include <BAN/Math.h>
+
 #include <math.h>
+#include <stddef.h>

-#define BUILTINS1(func) \
-	float func##f(float a) { return __builtin_##func##f(a); } \
-	double func(double a) { return __builtin_##func(a); } \
-	long double func##l(long double a) { return __builtin_##func##l(a); }
+#define BAN_FUNC1(func) \
+	float func##f(float a) { return BAN::Math::func<float>(a); } \
+	double func(double a) { return BAN::Math::func<double>(a); } \
+	long double func##l(long double a) { return BAN::Math::func<long double>(a); }

-#define BUILTINS2(func) \
-	float func##f(float a, float b) { return __builtin_##func##f(a, b); } \
-	double func(double a, double b) { return __builtin_##func(a, b); } \
-	long double func##l(long double a, long double b) { return __builtin_##func##l(a, b); }
+#define BAN_FUNC2(func) \
+	float func##f(float a, float b) { return BAN::Math::func<float>(a, b); } \
+	double func(double a, double b) { return BAN::Math::func<double>(a, b); } \
+	long double func##l(long double a, long double b) { return BAN::Math::func<long double>(a, b); }

-#define BUILTINS2_TYPE(func, type) \
-	float func##f(float a, type b) { return __builtin_##func##f(a, b); } \
-	double func(double a, type b) { return __builtin_##func(a, b); } \
-	long double func##l(long double a, type b) { return __builtin_##func##l(a, b); }
+#define BAN_FUNC2_PTR(func) \
+	float func##f(float a, float* b) { return BAN::Math::func<float>(a, b); } \
+	double func(double a, double* b) { return BAN::Math::func<double>(a, b); } \
+	long double func##l(long double a, long double* b) { return BAN::Math::func<long double>(a, b); }
+
+#define BAN_FUNC2_TYPE(func, type) \
+	float func##f(float a, type b) { return BAN::Math::func<float>(a, b); } \
+	double func(double a, type b) { return BAN::Math::func<double>(a, b); } \
+	long double func##l(long double a, type b) { return BAN::Math::func<long double>(a, b); }
+
+#define FUNC_EXPR1(func, ...) \
+	float func##f(float a) { return __VA_ARGS__; } \
+	double func(double a) { return __VA_ARGS__; } \
+	long double func##l(long double a) { return __VA_ARGS__; }
+
+#define FUNC_EXPR2(func, ...) \
+	float func##f(float a, float b) { return __VA_ARGS__; } \
+	double func(double a, double b) { return __VA_ARGS__; } \
+	long double func##l(long double a, long double b) { return __VA_ARGS__; }
+
+#define FUNC_EXPR3(func, ...) \
+	float func##f(float a, float b, float c) { return __VA_ARGS__; } \
+	double func(double a, double b, double c) { return __VA_ARGS__; } \
+	long double func##l(long double a, long double b, long double c) { return __VA_ARGS__; }
+
+#define FUNC_EXPR1_RET(func, ret, ...) \
+	ret func##f(float a) { return __VA_ARGS__; } \
+	ret func(double a) { return __VA_ARGS__; } \
+	ret func##l(long double a) { return __VA_ARGS__; }
+
+#define FUNC_EXPR2_TYPE(func, type, ...) \
+	float func##f(float a, type b) { return __VA_ARGS__; } \
+	double func(double a, type b) { return __VA_ARGS__; } \
+	long double func##l(long double a, type b) { return __VA_ARGS__; }
+
+template<BAN::floating_point T>
+struct integral_atleast;
+
+template<BAN::floating_point T> requires(sizeof(T) <= sizeof(uint8_t))
+struct integral_atleast<T> { using type = uint8_t; };
+
+template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint8_t) && sizeof(T) <= sizeof(uint16_t))
+struct integral_atleast<T> { using type = uint16_t; };
+
+template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint16_t) && sizeof(T) <= sizeof(uint32_t))
+struct integral_atleast<T> { using type = uint32_t; };
+
+template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint32_t) && sizeof(T) <= sizeof(uint64_t))
+struct integral_atleast<T> { using type = uint64_t; };
+
+template<BAN::floating_point T> requires(sizeof(T) > sizeof(uint64_t) && sizeof(T) <= sizeof(__uint128_t))
+struct integral_atleast<T> { using type = __uint128_t; };
+
+template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits, bool integral>
+struct __FloatDecompose;
+
+template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits>
+struct __FloatDecompose<T, mantissa_bits, exponent_bits, true>
+{
+	T mantissa : mantissa_bits;
+	T          : 1;
+	T exponent : exponent_bits;
+	T sign     : 1;
+};
+
+template<BAN::integral T, size_t mantissa_bits, size_t exponent_bits>
+struct __FloatDecompose<T, mantissa_bits, exponent_bits, false>
+{
+	T mantissa : mantissa_bits;
+	T exponent : exponent_bits;
+	T sign     : 1;
+};
+
+template<BAN::floating_point T>
+struct FloatDecompose
+{
+	using value_type = integral_atleast<T>::type;
+
+	static constexpr size_t mantissa_bits
+		= BAN::is_same_v<T, float>       ?  FLT_MANT_DIG - 1
+		: BAN::is_same_v<T, double>      ?  DBL_MANT_DIG - 1
+		: BAN::is_same_v<T, long double> ? LDBL_MANT_DIG - 1
+		: 0;
+
+	static constexpr size_t exponent_bits
+		= BAN::is_same_v<T, float>       ? 8
+		: BAN::is_same_v<T, double>      ? 11
+		: BAN::is_same_v<T, long double> ? 15
+		: 0;
+
+	static constexpr value_type mantissa_max
+		= (static_cast<value_type>(1) << mantissa_bits) - 1;
+
+	static constexpr value_type exponent_max
+		= (static_cast<value_type>(1) << exponent_bits) - 1;
+
+	FloatDecompose(T x)
+		: raw(x)
+	{}
+
+	union
+	{
+		T raw;
+		__FloatDecompose<
+			value_type,
+			mantissa_bits,
+			exponent_bits,
+			BAN::is_same_v<T, long double>
+		> bits;
+	};
+};
+
+template<BAN::floating_point T1, BAN::floating_point T2>
+static T1 nextafter_impl(T1 x, T2 y)
+{
+	if (isnan(x) || isnan(y))
+		return BAN::numeric_limits<T1>::quiet_NaN();
+
+	if (!isfinite(x) || x == y)
+		return x;
+
+	FloatDecompose<T1> decompose(x);
+
+	// at zero
+	if ((T2)x == (T2)0.0)
+	{
+		decompose.bits.mantissa = 1;
+		decompose.bits.sign = (x > y);
+		return decompose.raw;
+	}
+
+	// away from zero
+	if ((x > y) == decompose.bits.sign)
+	{
+		decompose.bits.mantissa++;
+		if (decompose.bits.mantissa == 0)
+		{
+			decompose.bits.exponent++;
+			if (decompose.bits.exponent == decompose.exponent_max)
+				decompose.bits.mantissa = 0;
+		}
+		return decompose.raw;
+	}
+
+	// towards zero
+	decompose.bits.mantissa--;
+	if (decompose.bits.mantissa == decompose.mantissa_max)
+	{
+		if (decompose.bits.exponent == 0)
+			return 0.0;
+		decompose.bits.exponent--;
+	}
+
+	return decompose.raw;
+}
+
+static long double tgamma_impl(long double x)
+{
+	constexpr long double pi = BAN::numbers::pi_v<long double>;
+
+	if (x == 0.0L)
+		return BAN::numeric_limits<long double>::infinity();
+
+	// reflection formula
+	if (x < 0.5L)
+		return pi / (BAN::Math::sin(pi * x) * tgamma_impl(1.0L - x));
+	x -= 1.0L;
+
+	// Lanczos approximation
+
+	constexpr long double g = 8.0L;
+	constexpr long double p[] {
+		 0.9999999999999999298e+0L,
+		 1.9753739023578852322e+3L,
+		-4.3973823927922428918e+3L,
+		 3.4626328459862717019e+3L,
+		-1.1569851431631167820e+3L,
+		 1.5453815050252775060e+2L,
+		-6.2536716123689161798e+0L,
+		 3.4642762454736807441e-2L,
+		-7.4776171974442977377e-7L,
+		 6.3041253821852264261e-8L,
+		-2.7405717035683877489e-8L,
+		 4.0486948817567609101e-9L
+	};
+	constexpr long double sqrt_2pi = 2.5066282746310005024L;
+
+	long double A = p[0];
+	for (size_t i = 1; i < sizeof(p) / sizeof(*p); i++)
+		A += p[i] / (x + i);
+
+	const long double t = x + g + 0.5L;
+	return sqrt_2pi * BAN::Math::pow(t, x + 0.5L) * BAN::Math::exp(-t) * A;
+}
+
+static long double erf_impl(long double x)
+{
+	long double sum = 0.0L;
+	for (size_t n = 0; n < 100; n++)
+		sum += x / (2.0L * n + 1.0L) * BAN::Math::ldexp(-x * x, n) / tgamma_impl(n + 1.0L);
+
+	constexpr long double sqrt_pi = 1.77245385090551602729L;
+	return 2.0L / sqrt_pi * sum;
+}

 __BEGIN_DECLS

-#if __enable_sse
-BUILTINS1(acos)
-BUILTINS1(acosh)
-BUILTINS1(asin)
-BUILTINS1(asinh)
-BUILTINS1(atan)
-BUILTINS2(atan2)
-BUILTINS1(atanh)
-BUILTINS1(cbrt)
-BUILTINS1(ceil)
-BUILTINS2(copysign)
-BUILTINS1(cos)
-BUILTINS1(cosh)
-BUILTINS1(erf)
-BUILTINS1(erfc)
-BUILTINS1(exp)
-BUILTINS1(exp2)
-BUILTINS1(expm1)
-BUILTINS1(fabs)
-BUILTINS2(fdim)
-BUILTINS1(floor)
-BUILTINS2(fmax)
-BUILTINS2(fmin)
-BUILTINS2(fmod)
-BUILTINS2(hypot)
-BUILTINS1(j0)
-BUILTINS1(j1)
-BUILTINS2_TYPE(ldexp, int)
-BUILTINS1(lgamma)
-BUILTINS1(log)
-BUILTINS1(log10)
-BUILTINS1(log1p)
-BUILTINS1(log2)
-BUILTINS1(logb)
-BUILTINS1(nearbyint)
-BUILTINS2(nextafter)
-BUILTINS2(pow)
-BUILTINS2(remainder)
-BUILTINS1(rint)
-BUILTINS1(round)
-BUILTINS1(sin)
-BUILTINS1(sinh)
-BUILTINS1(sqrt)
-BUILTINS1(tan)
-BUILTINS1(tanh)
-BUILTINS1(tgamma)
-BUILTINS1(trunc)
-BUILTINS1(y0)
-BUILTINS1(y1)
-#endif
+// FIXME: add handling for nan and infinity values
+
+BAN_FUNC1(acos)
+BAN_FUNC1(acosh)
+BAN_FUNC1(asin)
+BAN_FUNC1(asinh)
+BAN_FUNC1(atan)
+BAN_FUNC2(atan2)
+BAN_FUNC1(atanh)
+BAN_FUNC1(cbrt)
+BAN_FUNC1(ceil)
+BAN_FUNC2(copysign)
+BAN_FUNC1(cos)
+BAN_FUNC1(cosh)
+FUNC_EXPR1(erf, erf_impl(a))
+FUNC_EXPR1(erfc, 1.0 - erf_impl(a))
+BAN_FUNC1(exp)
+BAN_FUNC1(exp2)
+FUNC_EXPR1(expm1, ({ a -= 1.0; BAN::Math::exp(a); }))
+FUNC_EXPR1(fabs, BAN::Math::abs(a))
+FUNC_EXPR2(fdim, a > b ? a - b : 0.0)
+BAN_FUNC1(floor)
+FUNC_EXPR3(fma, (a * b) + c)
+FUNC_EXPR2(fmax, a < b ? b : a)
+FUNC_EXPR2(fmin, a < b ? a : b)
+BAN_FUNC2(fmod)
+BAN_FUNC2_TYPE(frexp, int*)
+BAN_FUNC2(hypot)
+FUNC_EXPR1_RET(ilogb, int, BAN::Math::logb(a))
+// j0, j1, jn
+BAN_FUNC2_TYPE(ldexp, int)
+FUNC_EXPR1(lgamma, BAN::Math::log(BAN::Math::abs(tgamma_impl(a))))
+FUNC_EXPR1_RET(llrint,  long long, BAN::Math::rint(a))
+FUNC_EXPR1_RET(llround, long long, BAN::Math::round(a))
+BAN_FUNC1(log)
+BAN_FUNC1(log10)
+FUNC_EXPR1(log1p, ({ a += 1.0; BAN::Math::log(a); }));
+BAN_FUNC1(log2)
+BAN_FUNC1(logb)
+FUNC_EXPR1_RET(lrint,  long, BAN::Math::rint(a))
+FUNC_EXPR1_RET(lround, long, BAN::Math::round(a))
+BAN_FUNC2_PTR(modf)
+FUNC_EXPR1(nearbyint, BAN::Math::rint(a))
+FUNC_EXPR2(nextafter, nextafter_impl(a, b))
+FUNC_EXPR2_TYPE(nexttoward, long double, nextafter_impl(a, b))
+FUNC_EXPR2(pow, ({ if (isnan(a)) return a; if (isnan(b)) return b; BAN::Math::pow(a, b); }))
+// remainder
+// remquo
+BAN_FUNC1(rint)
+FUNC_EXPR1(round, ({ if (!isfinite(a)) return a; BAN::Math::round(a); }))
+FUNC_EXPR2_TYPE(scalbln, long, BAN::Math::scalbn(a, b))
+FUNC_EXPR2_TYPE(scalbn,  int,  BAN::Math::scalbn(a, b))
+BAN_FUNC1(sin)
+BAN_FUNC1(sinh)
+BAN_FUNC1(sqrt)
+BAN_FUNC1(tan)
+BAN_FUNC1(tanh)
+FUNC_EXPR1(tgamma, tgamma_impl(a))
+BAN_FUNC1(trunc)
+// y0, y1, yn
+

 __END_DECLS
--- a/userspace/libraries/LibC/printf_impl.cpp
+++ b/userspace/libraries/LibC/printf_impl.cpp
@ -129,7 +129,6 @@ static void integer_to_string(char* buffer, T value, int base, bool upper, forma
 	buffer[offset++] = '\0';
 }

-#if __enable_sse
 template<BAN::floating_point T>
 static void floating_point_to_string(char* buffer, T value, bool upper, const format_options_t options)
 {
@ -259,7 +258,6 @@ static void floating_point_to_exponent_string(char* buffer, T value, bool upper,
 	exponent_options.width = 3;
 	integer_to_string<int>(buffer + offset, exponent, 10, upper, exponent_options);
 }
-#endif

 extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun)(int, void*), void* data)
 {
@ -496,7 +494,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun
 				format++;
 				break;
 			}
-#if __enable_sse
 			case 'e':
 			case 'E':
 			{
@ -529,7 +526,6 @@ extern "C" int printf_impl(const char* format, va_list arguments, int (*putc_fun
 			case 'A':
 				// TODO
 				break;
-#endif
 			case 'c':
 			{
 				conversion[0] = va_arg(arguments, int);
--- a/userspace/libraries/LibC/scanf_impl.cpp
+++ b/userspace/libraries/LibC/scanf_impl.cpp
@ -260,7 +260,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
 			}
 		};

-#if __enable_sse
 	auto parse_floating_point_internal =
 		[&parse_integer_internal, &get_input, &in]<int BASE, typename T>(BASE_TYPE<BASE>, bool negative, int width, T* out, bool require_start = true) -> ConversionResult
 		{
@ -435,7 +434,6 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
 					return ConversionResult::MATCH_FAILURE;
 			}
 		};
-#endif

 	auto parse_string =
 		[&arguments, &get_input, &in](uint8_t* mask, bool exclude, bool suppress, bool allocate, int min_len, int max_len, bool terminate) -> ConversionResult
@ -520,11 +518,9 @@ int scanf_impl(const char* format, va_list arguments, int (*__getc_fun)(bool adv
 				case 'x': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, conversion.length); break;
 				case 'X': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, conversion.length); break;
 				case 'p': result = parse_integer(BASE_TYPE<16>{}, IS_UNSIGNED<true> {}, conversion.suppress, conversion.field_width, LengthModifier::j); break;
-#if __enable_sse
 				case 'a': case 'e': case 'f': case 'g':
 					result = parse_floating_point(conversion.suppress, conversion.field_width, conversion.length);
 					break;
-#endif
 				case 'S':
 					conversion.length = LengthModifier::l;
 					// fall through
--- a/userspace/libraries/LibC/stdlib.cpp
+++ b/userspace/libraries/LibC/stdlib.cpp
@ -162,7 +162,6 @@ static T strtoT(const char* str, char** endp, int base, int& error)
 	return result;
 }

-#if __enable_sse
 template<BAN::floating_point T>
 static T strtoT(const char* str, char** endp, int& error)
 {
@ -306,16 +305,13 @@ static T strtoT(const char* str, char** endp, int& error)

 	if (exponent)
 		result *= BAN::Math::pow<T>((base == 10) ? 10 : 2, exponent);
-	return result;
+	return negative ? -result : result;
 }
-#endif

-#if __enable_sse
 double atof(const char* str)
 {
 	return strtod(str, nullptr);
 }
-#endif

 int atoi(const char* str)
 {
@ -332,7 +328,6 @@ long long atoll(const char* str)
 	return strtoll(str, nullptr, 10);
 }

-#if __enable_sse
 float strtof(const char* __restrict str, char** __restrict endp)
 {
 	return strtoT<float>(str, endp, errno);
@ -347,7 +342,6 @@ long double strtold(const char* __restrict str, char** __restrict endp)
 {
 	return strtoT<long double>(str, endp, errno);
 }
-#endif

 long strtol(const char* __restrict str, char** __restrict endp, int base)
 {
--- a/userspace/libraries/LibImage/Image.cpp
+++ b/userspace/libraries/LibImage/Image.cpp
@ -6,7 +6,6 @@
 #include <LibImage/PNG.h>

 #include <fcntl.h>
-#include <math.h>
 #include <sys/mman.h>

 namespace LibImage
@ -132,13 +131,8 @@ namespace LibImage
 					{
 						const double src_x = x * ratio_x;
 						const double src_y = y * ratio_y;
-#if __enable_sse
-						const double weight_x = src_x - floor(src_x);
-						const double weight_y = src_y - floor(src_y);
-#else
-						const double weight_x = src_x - (uint64_t)src_x;
-						const double weight_y = src_y - (uint64_t)src_y;
-#endif
+						const double weight_x = src_x - BAN::Math::floor(src_x);
+						const double weight_y = src_y - BAN::Math::floor(src_y);

 						const Color avg_t = Color::average(
 							get_clamped_color(src_x + 0.0, src_y),
@ -177,13 +171,8 @@ namespace LibImage
 					{
 						const double src_x = x * ratio_x;
 						const double src_y = y * ratio_y;
-#if __enable_sse
-						const double weight_x = src_x - floor(src_x);
-						const double weight_y = src_y - floor(src_y);
-#else
-						const double weight_x = src_x - (uint64_t)src_x;
-						const double weight_y = src_y - (uint64_t)src_y;
-#endif
+						const double weight_x = src_x - BAN::Math::floor(src_x);
+						const double weight_y = src_y - BAN::Math::floor(src_y);

 						FloatingColor values[4];
 						for (int64_t m = -1; m <= 2; m++)
--- a/userspace/programs/CMakeLists.txt
+++ b/userspace/programs/CMakeLists.txt
@ -46,9 +46,4 @@ foreach(project ${USERSPACE_PROGRAMS})
 	target_link_options(${project} PRIVATE -nolibc)
 	# Default compile options
 	target_compile_options(${project} PRIVATE -g -O2 -Wall -Wextra -Werror)
-
-	target_compile_definitions(${project} PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
-	if (NOT BANAN_ENABLE_SSE)
-		target_compile_options(${project} PRIVATE -mno-sse -mno-sse2)
-	endif ()
 endforeach()
--- a/userspace/tests/CMakeLists.txt
+++ b/userspace/tests/CMakeLists.txt
@ -21,9 +21,4 @@ foreach(project ${USERSPACE_TESTS})
 	target_link_options(${project} PRIVATE -nolibc)
 	# Default compile options
 	target_compile_options(${project} PRIVATE -g -O2)
-
-	target_compile_definitions(${project} PRIVATE __enable_sse=${BANAN_ENABLE_SSE})
-	if (NOT BANAN_ENABLE_SSE)
-		target_compile_options(${project} PRIVATE -mno-sse -mno-sse2)
-	endif ()
 endforeach()