diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt
index 5ff5f71c65..22604b41e0 100644
--- a/kernel/CMakeLists.txt
+++ b/kernel/CMakeLists.txt
@@ -66,6 +66,7 @@ set(KERNEL_SOURCES
 	kernel/PCI.cpp
 	kernel/PIC.cpp
 	kernel/Process.cpp
+	kernel/Processor.cpp
 	kernel/Random.cpp
 	kernel/Scheduler.cpp
 	kernel/Semaphore.cpp
diff --git a/kernel/arch/x86_64/boot.S b/kernel/arch/x86_64/boot.S
index 0861baca44..253c9fc2a8 100644
--- a/kernel/arch/x86_64/boot.S
+++ b/kernel/arch/x86_64/boot.S
@@ -53,9 +53,9 @@ bananboot_start:
 bananboot_end:
 
 .section .bss, "aw", @nobits
-	# reserve 4096 bytes of initial stack for each processor
-	g_processor_stacks:
-		.skip 4096 * 64
+	boot_stack_bottom:
+		.skip 4096 * 4
+	boot_stack_top:
 
 	.global g_kernel_cmdline
 	g_kernel_cmdline:
@@ -108,6 +108,9 @@ g_ap_startup_done:
 .global g_ap_running_count
 g_ap_running_count:
 	.byte 0
+.global g_ap_stack_loaded
+g_ap_stack_loaded:
+	.byte 0
 
 .section .text
 
@@ -155,19 +158,12 @@ enable_sse:
 	movl %eax, %cr4
 	ret
 
-# NOTE: return address in argument %edi
-initialize_pmode_stack:
+initialize_lapic_id:
 	movl $1, %eax
 	cpuid
 	shrl $24, %ebx
-
 	movw %bx, %gs
-
-	shll $12, %ebx
-	addl $V2P(g_processor_stacks) + 4096, %ebx
-	movl %ebx, %esp
-
-	jmp *%edi
+	ret
 
 initialize_paging:
 	# enable PAE
@@ -195,13 +191,14 @@ initialize_paging:
 .global _start
 .type _start, @function
 _start:
+	cli; cld
+
 	# Initialize stack and multiboot info
 	movl %eax, V2P(bootloader_magic)
 	movl %ebx, V2P(bootloader_info)
 
-	movl $V2P(1f), %edi
-	jmp initialize_pmode_stack
-1:
+	movl $V2P(boot_stack_top), %esp
+	call initialize_lapic_id
 
 	call check_requirements
 	call enable_sse
@@ -253,7 +250,12 @@ system_halt:
 .code16
 .global ap_trampoline
 ap_trampoline:
-	cli
+	jmp 1f
+.align 8
+ap_stack_ptr:
+	.skip 4
+1:
+	cli; cld
 	ljmpl $0x00, $ap_cs_clear
 
 ap_cs_clear:
@@ -274,12 +276,15 @@ ap_protected_mode:
 	movw %ax, %ss
 	movw %ax, %es
 
-	movl $1f, %edi
-	jmp V2P(initialize_pmode_stack)
-1:
+	movl ap_stack_ptr, %esp
+	movb $1, V2P(g_ap_stack_loaded)
+	call V2P(initialize_lapic_id)
+
+	call V2P(enable_sse)
 
-	# load boot gdt, load boot page table and enter long mode
 	call V2P(initialize_paging)
+
+	# load boot gdt and enter long mode
 	lgdt V2P(boot_gdtr)
 	ljmpl $0x08, $ap_long_mode
 
diff --git a/kernel/include/kernel/Lock/SpinLock.h b/kernel/include/kernel/Lock/SpinLock.h
index 5e91e704ee..e9e822e12f 100644
--- a/kernel/include/kernel/Lock/SpinLock.h
+++ b/kernel/include/kernel/Lock/SpinLock.h
@@ -50,10 +50,10 @@ namespace Kernel
 
 		InterruptState lock()
 		{
-			auto id = get_processor_id();
+			auto id = Processor::current_id();
 
-			auto state = get_interrupt_state();
-			set_interrupt_state(InterruptState::Disabled);
+			auto state = Processor::get_interrupt_state();
+			Processor::set_interrupt_state(InterruptState::Disabled);
 
 			while (!m_locker.compare_exchange(PROCESSOR_NONE, id, BAN::MemoryOrder::memory_order_acquire))
 				__builtin_ia32_pause();
@@ -64,7 +64,7 @@ namespace Kernel
 		void unlock(InterruptState state)
 		{
 			m_locker.store(PROCESSOR_NONE, BAN::MemoryOrder::memory_order_release);
-			set_interrupt_state(state);
+			Processor::set_interrupt_state(state);
 		}
 
 		bool is_locked() const { return m_locker != PROCESSOR_NONE; }
diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h
index 6a9fa62067..d3f78d7710 100644
--- a/kernel/include/kernel/Processor.h
+++ b/kernel/include/kernel/Processor.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <BAN/ForwardList.h>
 #include <kernel/Arch.h>
 
 namespace Kernel
@@ -11,36 +12,65 @@ namespace Kernel
 		Enabled,
 	};
 
-#if ARCH(x86_64) || ARCH(i386)
-
-	inline void set_interrupt_state(InterruptState state)
-	{
-		if (state == InterruptState::Enabled)
-			asm volatile("sti");
-		else
-			asm volatile("cli");
-	}
-
-	inline InterruptState get_interrupt_state()
-	{
-		uintptr_t flags;
-		asm volatile("pushf; pop %0" : "=rm"(flags));
-		if (flags & (1 << 9))
-			return InterruptState::Enabled;
-		return InterruptState::Disabled;
-	}
-
 	using ProcessorID = uint8_t;
 	constexpr ProcessorID PROCESSOR_NONE = 0xFF;
-	inline ProcessorID get_processor_id()
-	{
-		uint16_t id;
-		asm volatile("movw %%gs, %0" : "=rm"(id));
-		return id;
-	}
 
+#if ARCH(x86_64)
+	class Processor
+	{
+		BAN_NON_COPYABLE(Processor);
+
+	public:
+		static Processor& create(ProcessorID id);
+
+		static ProcessorID current_id()
+		{
+			uint16_t id;
+			asm volatile("movw %%gs, %0" : "=rm"(id));
+			return id;
+		}
+
+		static Processor& get(ProcessorID);
+
+		static Processor& current() { return get(current_id()); }
+
+		static void set_interrupt_state(InterruptState state)
+		{
+			if (state == InterruptState::Enabled)
+				asm volatile("sti");
+			else
+				asm volatile("cli");
+		}
+
+		static InterruptState get_interrupt_state()
+		{
+			uintptr_t flags;
+			asm volatile("pushf; pop %0" : "=rm"(flags));
+			if (flags & (1 << 9))
+				return InterruptState::Enabled;
+			return InterruptState::Disabled;
+		};
+
+		uintptr_t stack_bottom() const { return reinterpret_cast<uintptr_t>(m_stack); }
+		uintptr_t stack_top() const { return stack_bottom() + m_stack_size; }
+
+	private:
+		Processor() = default;
+		Processor(Processor&& other)
+		{
+			m_stack = other.m_stack;
+			other.m_stack = nullptr;
+		}
+		~Processor();
+
+	private:
+		void* m_stack { nullptr };
+		static constexpr size_t m_stack_size { 4096 };
+
+		friend class BAN::Vector<Processor>;
+	};
 #else
-#error "Unknown architecure"
+	#error
 #endif
 
 }
diff --git a/kernel/kernel/APIC.cpp b/kernel/kernel/APIC.cpp
index 77e22a1852..503cf6514a 100644
--- a/kernel/kernel/APIC.cpp
+++ b/kernel/kernel/APIC.cpp
@@ -26,6 +26,7 @@ extern uint8_t g_ap_init_addr[];
 
 extern volatile uint8_t g_ap_startup_done[];
 extern volatile uint8_t g_ap_running_count[];
+extern volatile uint8_t g_ap_stack_loaded[];
 
 namespace Kernel
 {
@@ -215,7 +216,7 @@ namespace Kernel
 
 		dprintln("System has {} processors", m_processors.size());
 
-		uint8_t bsp_id = get_processor_id();
+		uint8_t bsp_id = Kernel::Processor::current_id();
 		dprintln("BSP lapic id: {}", bsp_id);
 
 		for (auto& processor : m_processors)
@@ -231,6 +232,13 @@ namespace Kernel
 
 			dprintln("Trying to enable processor (lapic id {})", processor.apic_id);
 
+			Kernel::Processor::create(processor.processor_id);
+
+			PageTable::with_fast_page((paddr_t)g_ap_init_addr, [&] {
+				PageTable::fast_page_as_sized<uint32_t>(2) = V2P(Kernel::Processor::get(processor.processor_id).stack_top());
+			});
+			*g_ap_stack_loaded = 0;
+
 			write_to_local_apic(LAPIC_ERROR_REG, 0x00);
 			send_ipi(processor.processor_id, (read_from_local_apic(LAPIC_ICR_LO_REG) & 0xFFF00000) | 0x0000C500, 0);
 			send_ipi(processor.processor_id, (read_from_local_apic(LAPIC_ICR_LO_REG) & 0xFFF0F800) | 0x00008500, 0);
@@ -242,12 +250,13 @@ namespace Kernel
 				write_to_local_apic(LAPIC_ERROR_REG, 0x00);
 				send_ipi(processor.processor_id, (read_from_local_apic(LAPIC_ICR_LO_REG) & 0xFFF0F800) | 0x00000600 | ap_init_page, 200);
 			}
+
+			// give processor upto 100 * 100 us (10 ms to boot)
+			for (int i = 0; *g_ap_stack_loaded == 0 && i < 100; i++)
+				udelay(100);
 		}
 
 		*g_ap_startup_done = 1;
-
-		udelay(100);
-
 		dprintln("{} processors started", *g_ap_running_count);
 	}
 
diff --git a/kernel/kernel/Lock/SpinLock.cpp b/kernel/kernel/Lock/SpinLock.cpp
index 8b613a63de..d2a8b9dd6b 100644
--- a/kernel/kernel/Lock/SpinLock.cpp
+++ b/kernel/kernel/Lock/SpinLock.cpp
@@ -9,11 +9,11 @@ namespace Kernel
 
 	InterruptState SpinLock::lock()
 	{
-		auto id = get_processor_id();
+		auto id = Processor::current_id();
 		ASSERT_NEQ(m_locker.load(), id);
 
-		auto state = get_interrupt_state();
-		set_interrupt_state(InterruptState::Disabled);
+		auto state = Processor::get_interrupt_state();
+		Processor::set_interrupt_state(InterruptState::Disabled);
 
 		while (!m_locker.compare_exchange(PROCESSOR_NONE, id, BAN::MemoryOrder::memory_order_acquire))
 			__builtin_ia32_pause();
@@ -23,17 +23,17 @@ namespace Kernel
 
 	void SpinLock::unlock(InterruptState state)
 	{
-		ASSERT_EQ(m_locker.load(), get_processor_id());
+		ASSERT_EQ(m_locker.load(), Processor::current_id());
 		m_locker.store(PROCESSOR_NONE, BAN::MemoryOrder::memory_order_release);
-		set_interrupt_state(state);
+		Processor::set_interrupt_state(state);
 	}
 
 	InterruptState RecursiveSpinLock::lock()
 	{
-		auto id = get_processor_id();
+		auto id = Processor::current_id();
 
-		auto state = get_interrupt_state();
-		set_interrupt_state(InterruptState::Disabled);
+		auto state = Processor::get_interrupt_state();
+		Processor::set_interrupt_state(InterruptState::Disabled);
 
 		if (id == m_locker)
 			ASSERT_GT(m_lock_depth, 0);
@@ -51,11 +51,11 @@ namespace Kernel
 
 	void RecursiveSpinLock::unlock(InterruptState state)
 	{
-		ASSERT_EQ(m_locker.load(), get_processor_id());
+		ASSERT_EQ(m_locker.load(), Processor::current_id());
 		ASSERT_GT(m_lock_depth, 0);
 		if (--m_lock_depth == 0)
 			m_locker.store(PROCESSOR_NONE, BAN::MemoryOrder::memory_order_release);
-		set_interrupt_state(state);
+		Processor::set_interrupt_state(state);
 	}
 
 }
diff --git a/kernel/kernel/Memory/kmalloc.cpp b/kernel/kernel/Memory/kmalloc.cpp
index 62d11f2178..7f7e645119 100644
--- a/kernel/kernel/Memory/kmalloc.cpp
+++ b/kernel/kernel/Memory/kmalloc.cpp
@@ -75,6 +75,11 @@ struct kmalloc_info
 		return nullptr;
 	}
 
+	bool contains(uintptr_t addr) const
+	{
+		return base <= addr && addr < end;
+	}
+
 	size_t used = 0;
 	size_t free = size;
 };
@@ -161,8 +166,17 @@ static bool is_corrupted()
 	Kernel::SpinLockGuard _(s_kmalloc_lock);
 	auto& info = s_kmalloc_info;
 	auto* temp = info.first();
-	for (; temp->end() <= info.end; temp = temp->after());
-	return (uintptr_t)temp != info.end;
+	while (reinterpret_cast<uintptr_t>(temp) != info.end)
+	{
+		if (!info.contains(reinterpret_cast<uintptr_t>(temp)))
+			return true;
+		if (!info.contains(temp->end() - 1))
+			return true;
+		if (temp->after() <= temp)
+			return true;
+		temp = temp->after();
+	}
+	return false;
 }
 
 [[maybe_unused]] static void debug_dump()
@@ -235,12 +249,12 @@ static void* kmalloc_impl(size_t size, size_t align)
 
 	auto& info = s_kmalloc_info;
 
-	for (auto* node = info.first(); node->end() <= info.end; node = node->after())
+	for (auto* node = info.first(); info.contains(reinterpret_cast<uintptr_t>(node)); node = node->after())
 	{
 		if (node->used())
 			continue;
 
-		if (auto* next = node->after(); next->end() <= info.end)
+		if (auto* next = node->after(); info.contains(reinterpret_cast<uintptr_t>(next)))
 			if (!next->used())
 				node->set_end(next->end());
 
diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp
index fd03ad0c17..adde8e70b1 100644
--- a/kernel/kernel/Process.cpp
+++ b/kernel/kernel/Process.cpp
@@ -220,7 +220,7 @@ namespace Kernel
 
 	void Process::on_thread_exit(Thread& thread)
 	{
-		ASSERT(get_interrupt_state() == InterruptState::Disabled);
+		ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
 
 		ASSERT(m_threads.size() > 0);
 
diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp
new file mode 100644
index 0000000000..2ff0678331
--- /dev/null
+++ b/kernel/kernel/Processor.cpp
@@ -0,0 +1,34 @@
+#include <BAN/Vector.h>
+#include <kernel/Processor.h>
+
+namespace Kernel
+{
+
+	static BAN::Vector<Processor> s_processors;
+
+	Processor& Processor::create(ProcessorID id)
+	{
+		while (s_processors.size() <= id)
+			MUST(s_processors.emplace_back());
+		auto& processor = s_processors[id];
+		if (processor.m_stack == nullptr)
+		{
+			processor.m_stack = kmalloc(m_stack_size, 4096, true);
+			ASSERT(processor.m_stack);
+		}
+		return processor;
+	}
+
+	Processor::~Processor()
+	{
+		if (m_stack)
+			kfree(m_stack);
+		m_stack = nullptr;
+	}
+
+	Processor& Processor::get(ProcessorID id)
+	{
+		return s_processors[id];
+	}
+
+}
diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp
index 205f49e8d0..cda145869c 100644
--- a/kernel/kernel/Scheduler.cpp
+++ b/kernel/kernel/Scheduler.cpp
@@ -16,10 +16,9 @@ namespace Kernel
 
 	static Scheduler* s_instance = nullptr;
 
-	static uint8_t s_temp_stack[1024];
 	ALWAYS_INLINE static void load_temp_stack()
 	{
-		asm volatile("movq %0, %%rsp" :: "r"(s_temp_stack + sizeof(s_temp_stack)));
+		asm volatile("movq %0, %%rsp" :: "rm"(Processor::current().stack_top()));
 	}
 
 	BAN::ErrorOr<void> Scheduler::initialize()
@@ -40,7 +39,7 @@ namespace Kernel
 
 	void Scheduler::start()
 	{
-		ASSERT(get_interrupt_state() == InterruptState::Disabled);
+		ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
 		m_lock.lock();
 		ASSERT(!m_active_threads.empty());
 		m_current_thread = m_active_threads.begin();
@@ -75,7 +74,7 @@ namespace Kernel
 	{
 		auto state = m_lock.lock();
 		if (save_current_thread())
-			return set_interrupt_state(state);
+			return Processor::set_interrupt_state(state);
 		advance_current_thread();
 		execute_current_thread_locked();
 		ASSERT_NOT_REACHED();
@@ -210,6 +209,7 @@ namespace Kernel
 
 	void Scheduler::execute_current_thread_locked()
 	{
+		ASSERT(m_lock.is_locked());
 		load_temp_stack();
 		PageTable::kernel().load();
 		execute_current_thread_stack_loaded();
@@ -220,13 +220,10 @@ namespace Kernel
 	{
 		ASSERT(m_lock.is_locked());
 
-		load_temp_stack();
-		PageTable::kernel().load();
-
 #if SCHEDULER_VERIFY_STACK
 		vaddr_t rsp;
 		read_rsp(rsp);
-		ASSERT((vaddr_t)s_temp_stack <= rsp && rsp <= (vaddr_t)s_temp_stack + sizeof(s_temp_stack));
+		ASSERT(Processor::current().stack_bottom() <= rsp && rsp <= Processor::current().stack_top());
 		ASSERT(&PageTable::current() == &PageTable::kernel());
 #endif
 
@@ -287,10 +284,7 @@ namespace Kernel
 		ASSERT(m_lock.is_locked());
 
 		if (save_current_thread())
-		{
-			set_interrupt_state(InterruptState::Enabled);
 			return;
-		}
 
 		auto it = m_sleeping_threads.begin();
 		for (; it != m_sleeping_threads.end(); it++)
diff --git a/kernel/kernel/kernel.cpp b/kernel/kernel/kernel.cpp
index c5bfecc289..7821769691 100644
--- a/kernel/kernel/kernel.cpp
+++ b/kernel/kernel/kernel.cpp
@@ -83,8 +83,6 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info)
 {
 	using namespace Kernel;
 
-	set_interrupt_state(InterruptState::Disabled);
-
 	if (!validate_boot_magic(boot_magic))
 	{
 		Serial::initialize();
@@ -104,6 +102,9 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info)
 	parse_boot_info(boot_magic, boot_info);
 	dprintln("boot info parsed");
 
+	Processor::create(Processor::current_id());
+	dprintln("BSP initialized");
+
 	GDT::initialize();
 	dprintln("GDT initialized");
 
@@ -214,7 +215,7 @@ extern "C" void ap_main()
 {
 	using namespace Kernel;
 
-	dprintln("hello from processor {}", get_processor_id());
+	dprintln("hello from processor {}", Processor::current_id());
 
 	for (;;)
 		asm volatile("");