diff --git a/kernel/arch/i686/Syscall.S b/kernel/arch/i686/Syscall.S index b668722a..a7e0ccd6 100644 --- a/kernel/arch/i686/Syscall.S +++ b/kernel/arch/i686/Syscall.S @@ -15,9 +15,7 @@ asm_syscall_handler: andl $-16, %esp # push arguments - subl $4, %esp - pushl %ebp - addl $24, (%esp) + subl $8, %esp pushl %edi pushl %esi pushl %edx @@ -65,7 +63,7 @@ sys_fork_trampoline: call read_ip testl %eax, %eax - jz .reload_stack + jz .done movl %esp, %ebx @@ -81,9 +79,3 @@ sys_fork_trampoline: popl %ebx popl %ebp ret - - .reload_stack: - call get_thread_start_sp - movl %eax, %esp - xorl %eax, %eax - jmp .done diff --git a/kernel/arch/i686/interrupts.S b/kernel/arch/i686/interrupts.S index 24b82711..28e5d4d1 100644 --- a/kernel/arch/i686/interrupts.S +++ b/kernel/arch/i686/interrupts.S @@ -1,6 +1,6 @@ .macro maybe_load_kernel_segments, n - cmpb $0x08, \n(%esp) - je 1f + testb $3, \n(%esp) + jz 1f; jnp 1f movw $0x10, %ax movw %ax, %ds @@ -13,8 +13,8 @@ .endm .macro maybe_load_userspace_segments, n - cmpb $0x08, \n(%esp) - je 1f + testb $3, \n(%esp) + jz 1f; jnp 1f movw $(0x20 | 3), %bx movw %bx, %ds diff --git a/kernel/arch/x86_64/Syscall.S b/kernel/arch/x86_64/Syscall.S index 15757dfb..92d58c85 100644 --- a/kernel/arch/x86_64/Syscall.S +++ b/kernel/arch/x86_64/Syscall.S @@ -1,50 +1,26 @@ -// arguments in RAX, RBX, RCX, RDX, RSI, RDI -// System V ABI: RDI, RSI, RDX, RCX, R8, R9 .global asm_syscall_handler asm_syscall_handler: swapgs - pushq %rbx - pushq %rcx - pushq %rdx - pushq %rdi - pushq %rsi - pushq %rbp - pushq %r8 - pushq %r9 - pushq %r10 + + movq %rsp, %rax + movq %gs:8, %rsp + + pushq $(0x20 | 3) + pushq %rax pushq %r11 - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - cld + pushq $(0x28 | 3) + pushq %rcx + subq $8, %rsp - movq %rsi, %r8 - movq %rdi, %r9 - movq %rax, %rdi - movq %rbx, %rsi - xchgq %rcx, %rdx - leaq 112(%rsp), %rbx - pushq %rbx + movq %r10, %rcx call cpp_syscall_handler - addq $8, %rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rbp - popq %rsi - popq %rdi - popq %rdx - popq %rcx - popq %rbx + movq 8(%rsp), %rcx + movq 24(%rsp), %r11 + movq 32(%rsp), %rsp + swapgs - iretq + sysretq .global sys_fork_trampoline sys_fork_trampoline: @@ -57,7 +33,7 @@ sys_fork_trampoline: call read_ip testq %rax, %rax - je .reload_stack + je .done movq %rax, %rsi movq %rsp, %rdi @@ -71,9 +47,3 @@ sys_fork_trampoline: popq %rbp popq %rbx ret - - .reload_stack: - call get_thread_start_sp - movq %rax, %rsp - xorq %rax, %rax - jmp .done diff --git a/kernel/arch/x86_64/interrupts.S b/kernel/arch/x86_64/interrupts.S index 32e4a41c..0995b353 100644 --- a/kernel/arch/x86_64/interrupts.S +++ b/kernel/arch/x86_64/interrupts.S @@ -1,6 +1,6 @@ .macro swapgs_if_necessary, n - cmpb $0x08, \n(%rsp) - je 1f + testb $3, \n(%rsp) + jz 1f; jnp 1f swapgs 1: .endm diff --git a/kernel/include/kernel/API/Syscall.h b/kernel/include/kernel/API/Syscall.h new file mode 100644 index 00000000..ba12a3bf --- /dev/null +++ b/kernel/include/kernel/API/Syscall.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include + +namespace Kernel +{ + + ALWAYS_INLINE long syscall(int syscall, uintptr_t arg1 = 0, uintptr_t arg2 = 0, uintptr_t arg3 = 0, uintptr_t arg4 = 0, uintptr_t arg5 = 0) + { + long ret; +#if ARCH(x86_64) + register uintptr_t r10 asm("r10") = arg3; + register uintptr_t r8 asm( "r8") = arg4; + register uintptr_t r9 asm( "r9") = arg5; + asm volatile( + "syscall" + : "=a"(ret) + , "+D"(syscall) + , "+S"(arg1) + , "+d"(arg2) + , "+r"(r10) + , "+r"(r8) + , "+r"(r9) + :: "rcx", "r11", "memory"); +#elif ARCH(i686) + asm volatile( + "int %[irq]" + : "=a"(ret) + : [irq]"i"(static_cast(IRQ_SYSCALL)) // WTF GCC 15 + , "a"(syscall) + , "b"(arg1) + , "c"(arg2) + , "d"(arg3) + , "S"(arg4) + , "D"(arg5) + : "memory"); +#endif + return ret; + } + +} diff --git a/kernel/include/kernel/GDT.h b/kernel/include/kernel/GDT.h index 4e19d10c..bdb42c4f 100644 --- a/kernel/include/kernel/GDT.h +++ b/kernel/include/kernel/GDT.h @@ -151,8 +151,8 @@ namespace Kernel private: #if ARCH(x86_64) - BAN::Array m_gdt; // null, kernel code, kernel data, user code, user data, tss low, tss high - static constexpr uint16_t m_tss_offset = 0x28; + BAN::Array m_gdt; // null, kernel code, kernel data, user code (32 bit), user data, user code (64 bit), tss low, tss high + static constexpr uint16_t m_tss_offset = 0x30; #elif ARCH(i686) BAN::Array m_gdt; // null, kernel code, kernel data, user code, user data, processor data, fsbase, gsbase, tss static constexpr uint16_t m_tss_offset = 0x40; diff --git a/kernel/include/kernel/IDT.h b/kernel/include/kernel/IDT.h index a7649b2a..f051dfcb 100644 --- a/kernel/include/kernel/IDT.h +++ b/kernel/include/kernel/IDT.h @@ -18,7 +18,10 @@ namespace Kernel constexpr uint8_t IRQ_VECTOR_BASE = 0x20; constexpr uint8_t IRQ_MSI_BASE = 0x80; + constexpr uint8_t IRQ_MSI_END = 0xF0; +#if ARCH(i686) constexpr uint8_t IRQ_SYSCALL = 0xF0; +#endif constexpr uint8_t IRQ_YIELD = 0xF1; constexpr uint8_t IRQ_IPI = 0xF2; constexpr uint8_t IRQ_TIMER = 0xF3; diff --git a/kernel/include/kernel/PCI.h b/kernel/include/kernel/PCI.h index fb7d64b5..d8b2322b 100644 --- a/kernel/include/kernel/PCI.h +++ b/kernel/include/kernel/PCI.h @@ -187,7 +187,7 @@ namespace Kernel::PCI void initialize_impl(); private: - static constexpr uint8_t m_msi_count = IRQ_SYSCALL - IRQ_MSI_BASE; + static constexpr uint8_t m_msi_count = IRQ_MSI_END - IRQ_MSI_BASE; using PCIBus = BAN::Array, 32>; BAN::Array m_buses; BAN::Array m_bus_pcie_paddr; diff --git a/kernel/include/kernel/Processor.h b/kernel/include/kernel/Processor.h index d7e7fbc9..6dd92a9f 100644 --- a/kernel/include/kernel/Processor.h +++ b/kernel/include/kernel/Processor.h @@ -102,6 +102,8 @@ namespace Kernel uintptr_t stack_bottom() const { return reinterpret_cast(m_stack); } uintptr_t stack_top() const { return stack_bottom() + s_stack_size; } + static void set_thread_syscall_stack(vaddr_t vaddr) { write_gs_sized(offsetof(Processor, m_thread_syscall_stack), vaddr); } + static GDT& gdt() { return *read_gs_sized(offsetof(Processor, m_gdt)); } static IDT& idt() { return *read_gs_sized(offsetof(Processor, m_idt)); } @@ -137,6 +139,13 @@ namespace Kernel static void initialize_smp(); static void initialize_shared_page(); + static void dummy() + { +#if ARCH(x86_64) + static_assert(offsetof(Processor, m_thread_syscall_stack) == 8, "This is hardcoded in Syscall.S"); +#endif + } + template static T read_gs_sized(uintptr_t offset) requires(sizeof(T) <= 8) { @@ -180,6 +189,8 @@ namespace Kernel ProcessorID m_id { 0 }; uint8_t m_index { 0xFF }; + vaddr_t m_thread_syscall_stack; + static constexpr size_t s_stack_size { 4096 }; void* m_stack { nullptr }; diff --git a/kernel/include/kernel/Syscall.h b/kernel/include/kernel/Syscall.h deleted file mode 100644 index 7922fdd9..00000000 --- a/kernel/include/kernel/Syscall.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace Kernel -{ - - ALWAYS_INLINE long syscall(int syscall, uintptr_t arg1 = 0, uintptr_t arg2 = 0, uintptr_t arg3 = 0, uintptr_t arg4 = 0, uintptr_t arg5 = 0) - { - long ret; - asm volatile("int %[irq]" - : "=a"(ret) - : [irq]"i"(static_cast(IRQ_SYSCALL)) // WTF GCC 15 - , "a"(syscall) - , "b"((uintptr_t)arg1) - , "c"((uintptr_t)arg2) - , "d"((uintptr_t)arg3) - , "S"((uintptr_t)arg4) - , "D"((uintptr_t)arg5) - : "memory"); - return ret; - } - -} diff --git a/kernel/kernel/GDT.cpp b/kernel/kernel/GDT.cpp index 16f5b204..a83f1980 100644 --- a/kernel/kernel/GDT.cpp +++ b/kernel/kernel/GDT.cpp @@ -15,23 +15,23 @@ namespace Kernel ASSERT(gdt); #if ARCH(x86_64) - constexpr uint8_t code_flags = 0xA; - constexpr uint8_t data_flags = 0xC; + gdt->write_entry(0x00, 0x00000000, 0x00000, 0x00, 0x0); // null + gdt->write_entry(0x08, 0x00000000, 0xFFFFF, 0x9A, 0xA); // kernel code + gdt->write_entry(0x10, 0x00000000, 0xFFFFF, 0x92, 0xC); // kernel data + gdt->write_entry(0x18, 0x00000000, 0xFFFFF, 0xFA, 0xC); // user code (32 bit) + gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, 0xC); // user data + gdt->write_entry(0x28, 0x00000000, 0xFFFFF, 0xFA, 0xA); // user code (64 bit) #elif ARCH(i686) - constexpr uint8_t code_flags = 0xC; - constexpr uint8_t data_flags = 0xC; + gdt->write_entry(0x00, 0x00000000, 0x00000, 0x00, 0x0); // null + gdt->write_entry(0x08, 0x00000000, 0xFFFFF, 0x9A, 0xC); // kernel code + gdt->write_entry(0x10, 0x00000000, 0xFFFFF, 0x92, 0xC); // kernel data + gdt->write_entry(0x18, 0x00000000, 0xFFFFF, 0xFA, 0xC); // user code + gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, 0xC); // user data + gdt->write_entry(0x28, reinterpret_cast(processor), sizeof(Processor), 0x92, 0x4); // processor data + gdt->write_entry(0x30, 0x00000000, 0x00000, 0xF2, 0xC); // fsbase + gdt->write_entry(0x38, 0x00000000, 0x00000, 0xF2, 0xC); // gsbase #endif - gdt->write_entry(0x00, 0x00000000, 0x00000, 0x00, 0x0); // null - gdt->write_entry(0x08, 0x00000000, 0xFFFFF, 0x9A, code_flags); // kernel code - gdt->write_entry(0x10, 0x00000000, 0xFFFFF, 0x92, data_flags); // kernel data - gdt->write_entry(0x18, 0x00000000, 0xFFFFF, 0xFA, code_flags); // user code - gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, data_flags); // user data -#if ARCH(i686) - gdt->write_entry(0x28, reinterpret_cast(processor), sizeof(Processor), 0x92, 0x4); // processor data - gdt->write_entry(0x30, 0x00000000, 0x00000, 0xF2, data_flags); // fsbase - gdt->write_entry(0x38, 0x00000000, 0x00000, 0xF2, data_flags); // gsbase -#endif gdt->write_tss(); return gdt; diff --git a/kernel/kernel/IDT.cpp b/kernel/kernel/IDT.cpp index 164a9c24..ebdc5090 100644 --- a/kernel/kernel/IDT.cpp +++ b/kernel/kernel/IDT.cpp @@ -18,8 +18,6 @@ X(160) X(161) X(162) X(163) X(164) X(165) X(166) X(167) X(168) X(169) X(170) X(171) X(172) X(173) X(174) X(175) X(176) X(177) X(178) X(179) X(180) X(181) X(182) X(183) X(184) X(185) X(186) X(187) X(188) X(189) X(190) X(191) \ X(192) X(193) X(194) X(195) X(196) X(197) X(198) X(199) X(200) X(201) X(202) X(203) X(204) X(205) X(206) X(207) -static_assert(Kernel::IRQ_SYSCALL == Kernel::IRQ_VECTOR_BASE + 208); - namespace Kernel { @@ -446,7 +444,9 @@ namespace Kernel extern "C" void asm_yield_handler(); extern "C" void asm_ipi_handler(); extern "C" void asm_timer_handler(); +#if ARCH(i686) extern "C" void asm_syscall_handler(); +#endif IDT* IDT::create() { @@ -480,7 +480,9 @@ namespace Kernel idt->register_interrupt_handler(IRQ_YIELD, asm_yield_handler); idt->register_interrupt_handler(IRQ_IPI, asm_ipi_handler); idt->register_interrupt_handler(IRQ_TIMER, asm_timer_handler); +#if ARCH(i686) idt->register_syscall_handler(IRQ_SYSCALL, asm_syscall_handler); +#endif return idt; } diff --git a/kernel/kernel/Processor.cpp b/kernel/kernel/Processor.cpp index db6f31ac..a302f7ef 100644 --- a/kernel/kernel/Processor.cpp +++ b/kernel/kernel/Processor.cpp @@ -13,6 +13,11 @@ namespace Kernel static constexpr uint32_t MSR_IA32_FS_BASE = 0xC0000100; static constexpr uint32_t MSR_IA32_GS_BASE = 0xC0000101; static constexpr uint32_t MSR_IA32_KERNEL_GS_BASE = 0xC0000102; + + static constexpr uint32_t MSR_IA32_EFER = 0xC0000080; + static constexpr uint32_t MSR_IA32_STAR = 0xC0000081; + static constexpr uint32_t MSR_IA32_LSTAR = 0xC0000082; + static constexpr uint32_t MSR_IA32_FMASK = 0xC0000084; #endif ProcessorID Processor::s_bsp_id { PROCESSOR_NONE }; @@ -30,6 +35,8 @@ namespace Kernel static BAN::Array s_processors; static BAN::Array s_processor_ids { PROCESSOR_NONE }; + extern "C" void asm_syscall_handler(); + ProcessorID Processor::read_processor_id() { uint32_t id; @@ -87,13 +94,53 @@ namespace Kernel // initialize GS #if ARCH(x86_64) - // set gs base to pointer to this processor - uint64_t ptr = reinterpret_cast(&processor); - uint32_t ptr_hi = ptr >> 32; - uint32_t ptr_lo = ptr & 0xFFFFFFFF; - asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_GS_BASE)); + { + // set gs base to pointer to this processor + const uint64_t val = reinterpret_cast(&processor); + const uint32_t val_hi = val >> 32; + const uint32_t val_lo = val & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(val_hi), "a"(val_lo), "c"(MSR_IA32_GS_BASE)); + } #elif ARCH(i686) - asm volatile("movw $0x28, %%ax; movw %%ax, %%gs" ::: "ax"); + asm volatile("movw %0, %%gs" :: "r"(0x28)); +#endif + +#if ARCH(x86_64) + // enable syscall instruction + asm volatile("rdmsr; orb $1, %%al; wrmsr" :: "c"(MSR_IA32_EFER) : "eax", "edx"); + + { + union STAR + { + struct + { + uint32_t : 32; + uint16_t sel_ring0; + uint16_t sel_ring3; + }; + uint64_t raw; + }; + + // set kernel and user segments + const uint64_t val = STAR { .sel_ring0 = 0x08, .sel_ring3 = 0x18 | 3 }.raw; + const uint32_t val_hi = val >> 32; + const uint32_t val_lo = val & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(val_hi), "a"(val_lo), "c"(MSR_IA32_STAR)); + } + { + // set syscall handler address + const uint64_t val = reinterpret_cast(&asm_syscall_handler); + const uint32_t val_hi = val >> 32; + const uint32_t val_lo = val & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(val_hi), "a"(val_lo), "c"(MSR_IA32_LSTAR)); + } + { + // mask DF and IF + const uint64_t val = (1 << 10) | (1 << 9); + const uint32_t val_hi = val >> 32; + const uint32_t val_lo = val & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(val_hi), "a"(val_lo), "c"(MSR_IA32_FMASK)); + } #endif ASSERT(processor.m_idt); @@ -372,36 +419,17 @@ namespace Kernel void Processor::load_segments() { - { - const auto addr = scheduler().current_thread().get_fsbase(); -#if ARCH(x86_64) - uint32_t ptr_hi = addr >> 32; - uint32_t ptr_lo = addr & 0xFFFFFFFF; - asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_FS_BASE)); -#elif ARCH(i686) - gdt().set_fsbase(addr); -#endif - } - - { - const auto addr = scheduler().current_thread().get_gsbase(); -#if ARCH(x86_64) - uint32_t ptr_hi = addr >> 32; - uint32_t ptr_lo = addr & 0xFFFFFFFF; - asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_KERNEL_GS_BASE)); -#elif ARCH(i686) - gdt().set_gsbase(addr); -#endif - } + load_fsbase(); + load_gsbase(); } void Processor::load_fsbase() { const auto addr = scheduler().current_thread().get_fsbase(); #if ARCH(x86_64) - uint32_t ptr_hi = addr >> 32; - uint32_t ptr_lo = addr & 0xFFFFFFFF; - asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_FS_BASE)); + const uint32_t addr_hi = addr >> 32; + const uint32_t addr_lo = addr & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_FS_BASE)); #elif ARCH(i686) gdt().set_fsbase(addr); #endif @@ -411,9 +439,9 @@ namespace Kernel { const auto addr = scheduler().current_thread().get_gsbase(); #if ARCH(x86_64) - uint32_t ptr_hi = addr >> 32; - uint32_t ptr_lo = addr & 0xFFFFFFFF; - asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_KERNEL_GS_BASE)); + const uint32_t addr_hi = addr >> 32; + const uint32_t addr_lo = addr & 0xFFFFFFFF; + asm volatile("wrmsr" :: "d"(addr_hi), "a"(addr_lo), "c"(MSR_IA32_KERNEL_GS_BASE)); #elif ARCH(i686) gdt().set_gsbase(addr); #endif diff --git a/kernel/kernel/Scheduler.cpp b/kernel/kernel/Scheduler.cpp index f2e3a98b..8d83215d 100644 --- a/kernel/kernel/Scheduler.cpp +++ b/kernel/kernel/Scheduler.cpp @@ -284,9 +284,14 @@ namespace Kernel thread->set_cpu_time_start(); } - Processor::gdt().set_tss_stack(thread->kernel_stack_top()); if (thread->is_userspace()) + { + const vaddr_t kernel_stack_top = thread->kernel_stack_top(); + Processor::gdt().set_tss_stack(kernel_stack_top); + Processor::set_thread_syscall_stack(kernel_stack_top); Processor::load_segments(); + } + *interrupt_stack = thread->interrupt_stack(); *interrupt_registers = thread->interrupt_registers(); diff --git a/kernel/kernel/Syscall.cpp b/kernel/kernel/Syscall.cpp index ff5d8798..0649ecfb 100644 --- a/kernel/kernel/Syscall.cpp +++ b/kernel/kernel/Syscall.cpp @@ -1,9 +1,9 @@ #include +#include #include #include #include #include -#include #include #include @@ -40,10 +40,8 @@ namespace Kernel static bool is_restartable_syscall(int syscall); - extern "C" long cpp_syscall_handler(int syscall, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, InterruptStack* interrupt_stack) + extern "C" long cpp_syscall_handler(int syscall, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5) { - ASSERT(GDT::is_user_segment(interrupt_stack->cs)); - Processor::set_interrupt_state(InterruptState::Enabled); Process::current().wait_while_stopped(); diff --git a/kernel/kernel/Thread.cpp b/kernel/kernel/Thread.cpp index 848b6e7d..50031a21 100644 --- a/kernel/kernel/Thread.cpp +++ b/kernel/kernel/Thread.cpp @@ -490,7 +490,11 @@ namespace Kernel write_to_stack(cur_sp, 0x20 | 3); write_to_stack(cur_sp, sp); write_to_stack(cur_sp, 0x202); +#if ARCH(x86_64) + write_to_stack(cur_sp, 0x28 | 3); +#elif ARCH(i686) write_to_stack(cur_sp, 0x18 | 3); +#endif write_to_stack(cur_sp, ip); }); diff --git a/kernel/kernel/kernel.cpp b/kernel/kernel/kernel.cpp index 6b005aac..acab2d65 100644 --- a/kernel/kernel/kernel.cpp +++ b/kernel/kernel/kernel.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/userspace/libraries/LibC/unistd.cpp b/userspace/libraries/LibC/unistd.cpp index 968adf3b..d268df5c 100644 --- a/userspace/libraries/LibC/unistd.cpp +++ b/userspace/libraries/LibC/unistd.cpp @@ -5,8 +5,8 @@ #include #include +#include #include -#include #include #include diff --git a/userspace/programs/DynamicLoader/utils.h b/userspace/programs/DynamicLoader/utils.h index bd085f34..f07751ad 100644 --- a/userspace/programs/DynamicLoader/utils.h +++ b/userspace/programs/DynamicLoader/utils.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include