From 969563c06ad401b284c1807e3d87b12f65444a7d Mon Sep 17 00:00:00 2001 From: Bananymous Date: Wed, 21 Aug 2024 13:37:50 +0300 Subject: [PATCH] Kernel: Don't load AP init code to 0xF000, but relocate it later This cleans up the kernel executable as bootloaders don't have to load AP init code straight to 0xF000, but it will be moved there once kernel is doing the AP initialization. --- kernel/arch/i686/Signal.S | 2 +- kernel/arch/i686/boot.S | 28 ++++++++++++---------- kernel/arch/i686/linker.ld | 16 ++++++------- kernel/arch/x86_64/Signal.S | 2 +- kernel/arch/x86_64/boot.S | 46 +++++++++++++++++------------------- kernel/arch/x86_64/linker.ld | 16 ++++++------- kernel/kernel/APIC.cpp | 30 +++++++++++++---------- 7 files changed, 71 insertions(+), 69 deletions(-) diff --git a/kernel/arch/i686/Signal.S b/kernel/arch/i686/Signal.S index 633f01bc32..9621c2cfdf 100644 --- a/kernel/arch/i686/Signal.S +++ b/kernel/arch/i686/Signal.S @@ -1,4 +1,4 @@ -.section .userspace, "aw" +.section .userspace, "ax" // stack contains // return address diff --git a/kernel/arch/i686/boot.S b/kernel/arch/i686/boot.S index 45bd6b0e75..520cdca416 100644 --- a/kernel/arch/i686/boot.S +++ b/kernel/arch/i686/boot.S @@ -235,26 +235,31 @@ system_halt: jmp 1b +#define AP_V2P(vaddr) ((vaddr) - ap_trampoline + 0xF000) + .section .ap_init, "ax" .code16 .global ap_trampoline ap_trampoline: jmp 1f + .align 8 ap_stack_ptr: .skip 4 -1: - cli; cld - ljmpl $0x00, $ap_cs_clear -ap_cs_clear: +ap_stack_loaded: + .skip 1 +1: cli; cld + ljmpl $0x00, $AP_V2P(ap_cs_clear) + +ap_cs_clear: # load ap gdt and enter protected mode - lgdt ap_gdtr + lgdt AP_V2P(ap_gdtr) movl %cr0, %eax orb $1, %al movl %eax, %cr0 - ljmpl $0x08, $ap_protected_mode + ljmpl $0x08, $AP_V2P(ap_protected_mode) .code32 ap_protected_mode: @@ -263,16 +268,15 @@ ap_protected_mode: movw %ax, %ss movw %ax, %es - movl ap_stack_ptr, %esp - movb $1, V2P(g_ap_stack_loaded) + movl AP_V2P(ap_stack_ptr), %esp + movb $1, AP_V2P(ap_stack_loaded) - call V2P(enable_sse) - - call V2P(initialize_paging) + leal V2P(enable_sse), %ecx; call *%ecx + leal V2P(initialize_paging), %ecx; call *%ecx # load boot gdt and enter long mode lgdt V2P(boot_gdtr) - ljmpl $0x08, $ap_flush_gdt + ljmpl $0x08, $AP_V2P(ap_flush_gdt) ap_flush_gdt: # move stack pointer to higher half diff --git a/kernel/arch/i686/linker.ld b/kernel/arch/i686/linker.ld index 767bfb687b..64b107c224 100644 --- a/kernel/arch/i686/linker.ld +++ b/kernel/arch/i686/linker.ld @@ -4,13 +4,6 @@ KERNEL_OFFSET = 0xC0000000; SECTIONS { - . = 0xF000; - .ap_init ALIGN(4K) : AT(ADDR(.ap_init)) - { - g_ap_init_addr = .; - *(.ap_init) - } - . = 0x00100000 + KERNEL_OFFSET; g_kernel_start = .; @@ -28,15 +21,20 @@ SECTIONS g_userspace_end = .; g_kernel_execute_end = .; } - .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) + .ap_init ALIGN(4K) : AT(ADDR(.ap_init)) { - *(.rodata.*) + g_ap_init_addr = .; + *(.ap_init) } .data ALIGN(4K) : AT(ADDR(.data) - KERNEL_OFFSET) { g_kernel_writable_start = .; *(.data) } + .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) + { + *(.rodata.*) + } .bss ALIGN(4K) : AT(ADDR(.bss) - KERNEL_OFFSET) { *(COMMON) diff --git a/kernel/arch/x86_64/Signal.S b/kernel/arch/x86_64/Signal.S index 5cf227605a..e11a7adb98 100644 --- a/kernel/arch/x86_64/Signal.S +++ b/kernel/arch/x86_64/Signal.S @@ -1,4 +1,4 @@ -.section .userspace, "aw" +.section .userspace, "ax" // stack contains // return address diff --git a/kernel/arch/x86_64/boot.S b/kernel/arch/x86_64/boot.S index 70f209e889..c3b921312f 100644 --- a/kernel/arch/x86_64/boot.S +++ b/kernel/arch/x86_64/boot.S @@ -108,9 +108,6 @@ g_ap_startup_done: .global g_ap_running_count g_ap_running_count: .byte 0 -.global g_ap_stack_loaded -g_ap_stack_loaded: - .byte 0 .section .text @@ -194,7 +191,6 @@ _start: call check_requirements call enable_sse - call initialize_paging # flush gdt and jump to 64 bit @@ -237,26 +233,31 @@ system_halt: jmp 1b +#define AP_V2P(vaddr) ((vaddr) - ap_trampoline + 0xF000) + .section .ap_init, "ax" .code16 .global ap_trampoline ap_trampoline: jmp 1f + .align 8 ap_stack_ptr: .skip 4 -1: - cli; cld - ljmpl $0x00, $ap_cs_clear -ap_cs_clear: +ap_stack_loaded: + .skip 1 +1: cli; cld + ljmpl $0x00, $AP_V2P(ap_cs_clear) + +ap_cs_clear: # load ap gdt and enter protected mode - lgdt ap_gdtr + lgdt AP_V2P(ap_gdtr) movl %cr0, %eax orb $1, %al movl %eax, %cr0 - ljmpl $0x08, $ap_protected_mode + ljmpl $0x08, $AP_V2P(ap_protected_mode) .code32 ap_protected_mode: @@ -265,16 +266,15 @@ ap_protected_mode: movw %ax, %ss movw %ax, %es - movl ap_stack_ptr, %esp - movb $1, V2P(g_ap_stack_loaded) + movl AP_V2P(ap_stack_ptr), %esp + movb $1, AP_V2P(ap_stack_loaded) - call V2P(enable_sse) - - call V2P(initialize_paging) + leal V2P(enable_sse), %ecx; call *%ecx + leal V2P(initialize_paging), %ecx; call *%ecx # load boot gdt and enter long mode lgdt V2P(boot_gdtr) - ljmpl $0x08, $ap_long_mode + ljmpl $0x08, $AP_V2P(ap_long_mode) .code64 ap_long_mode: @@ -282,22 +282,20 @@ ap_long_mode: movl %esp, %esp addq $KERNEL_OFFSET, %rsp - # jump to higher half - movabsq $ap_higher_half, %rcx - jmp *%rcx - -ap_higher_half: # clear rbp for stacktrace xorq %rbp, %rbp + xorb %al, %al 1: pause - cmpb $0, g_ap_startup_done + cmpb %al, g_ap_startup_done jz 1b lock incb g_ap_running_count - call ap_main - jmp system_halt + # jump to ap_main in higher half + movabsq $ap_main, %rcx + call *%rcx + jmp V2P(system_halt) ap_gdt: .quad 0x0000000000000000 # null descriptor diff --git a/kernel/arch/x86_64/linker.ld b/kernel/arch/x86_64/linker.ld index cb871f463f..d7ccace15c 100644 --- a/kernel/arch/x86_64/linker.ld +++ b/kernel/arch/x86_64/linker.ld @@ -4,13 +4,6 @@ KERNEL_OFFSET = 0xFFFFFFFF80000000; SECTIONS { - . = 0xF000; - .ap_init ALIGN(4K) : AT(ADDR(.ap_init)) - { - g_ap_init_addr = .; - *(.ap_init) - } - . = 0x00100000 + KERNEL_OFFSET; g_kernel_start = .; @@ -28,15 +21,20 @@ SECTIONS g_userspace_end = .; g_kernel_execute_end = .; } - .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) + .ap_init ALIGN(4K) : AT(ADDR(.ap_init)) { - *(.rodata.*) + g_ap_init_addr = .; + *(.ap_init) } .data ALIGN(4K) : AT(ADDR(.data) - KERNEL_OFFSET) { g_kernel_writable_start = .; *(.data) } + .rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET) + { + *(.rodata.*) + } .bss ALIGN(4K) : AT(ADDR(.bss) - KERNEL_OFFSET) { *(COMMON) diff --git a/kernel/kernel/APIC.cpp b/kernel/kernel/APIC.cpp index ac393a1b4c..af07bf7cc6 100644 --- a/kernel/kernel/APIC.cpp +++ b/kernel/kernel/APIC.cpp @@ -31,7 +31,6 @@ extern uint8_t g_ap_init_addr[]; extern volatile uint8_t g_ap_startup_done[]; extern volatile uint8_t g_ap_running_count[]; -extern volatile uint8_t g_ap_stack_loaded[]; namespace Kernel { @@ -261,8 +260,6 @@ namespace Kernel __builtin_ia32_pause(); }; - const size_t ap_init_page = reinterpret_cast(g_ap_init_addr) / PAGE_SIZE; - dprintln("System has {} processors", m_processors.size()); uint8_t bsp_id = Kernel::Processor::current_id().as_u32(); @@ -275,6 +272,11 @@ namespace Kernel return; } + constexpr paddr_t ap_init_paddr = 0xF000; + PageTable::with_fast_page(ap_init_paddr, [&] { + memcpy(PageTable::fast_page_as_ptr(), g_ap_init_addr, PAGE_SIZE); + }); + for (auto& processor : m_processors) { if (processor.apic_id == bsp_id) @@ -289,10 +291,10 @@ namespace Kernel dprintln("Trying to enable processor (lapic id {})", processor.apic_id); auto& proc = Kernel::Processor::create(ProcessorID(processor.apic_id)); - PageTable::with_fast_page((paddr_t)g_ap_init_addr, [&] { - PageTable::fast_page_as_sized(2) = V2P(proc.stack_top()); + PageTable::with_fast_page(ap_init_paddr, [&] { + PageTable::fast_page_as_sized(2) = kmalloc_paddr_of(proc.stack_top()).value(); + PageTable::fast_page_as_sized(13) = 0; }); - *g_ap_stack_loaded = 0; write_to_local_apic(LAPIC_ERROR_REG, 0x00); @@ -323,18 +325,20 @@ namespace Kernel | ICR_LO_level_assert | ICR_LO_trigger_mode_edge | ICR_LO_destination_shorthand_none - | ap_init_page + | (ap_init_paddr / PAGE_SIZE) , 200 ); } // give processor upto 100 * 100 us + 200 us to boot - for (int i = 0; i < 100; i++) - { - if (__atomic_load_n(&g_ap_stack_loaded[0], __ATOMIC_SEQ_CST)) - break; - udelay(100); - } + PageTable::with_fast_page(ap_init_paddr, [&] { + for (int i = 0; i < 100; i++) + { + if (__atomic_load_n(&PageTable::fast_page_as_sized(13), __ATOMIC_SEQ_CST)) + break; + udelay(100); + } + }); } __atomic_store_n(&g_ap_startup_done[0], 1, __ATOMIC_SEQ_CST);