From 8fd0162393a6b8b917756aa5a905c5925be4116c Mon Sep 17 00:00:00 2001 From: Bananymous Date: Mon, 14 Oct 2024 11:40:30 +0300 Subject: [PATCH] Kernel: Rewrite x86_64 page tables to use HHDM instead of kmalloc This allows page tables to not crash the kernel once kmalloc runs out of its (limited) static memory. --- kernel/arch/i686/PageTable.cpp | 7 +- kernel/arch/x86_64/PageTable.cpp | 549 +++++++++++++++++------ kernel/include/kernel/Memory/PageTable.h | 3 +- kernel/kernel/kernel.cpp | 7 +- 4 files changed, 423 insertions(+), 143 deletions(-) diff --git a/kernel/arch/i686/PageTable.cpp b/kernel/arch/i686/PageTable.cpp index 90119569..0a9057d1 100644 --- a/kernel/arch/i686/PageTable.cpp +++ b/kernel/arch/i686/PageTable.cpp @@ -46,7 +46,7 @@ namespace Kernel return result; } - void PageTable::initialize() + void PageTable::initialize_pre_heap() { if (CPUID::has_nxe()) s_has_nxe = true; @@ -65,6 +65,11 @@ namespace Kernel s_kernel->initial_load(); } + void PageTable::initialize_post_heap() + { + // NOTE: this is no-op as our 32 bit target does not use hhdm + } + void PageTable::initial_load() { if (s_has_nxe) diff --git a/kernel/arch/x86_64/PageTable.cpp b/kernel/arch/x86_64/PageTable.cpp index 4ceb67d0..08b0c389 100644 --- a/kernel/arch/x86_64/PageTable.cpp +++ b/kernel/arch/x86_64/PageTable.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -21,12 +22,18 @@ namespace Kernel SpinLock PageTable::s_fast_page_lock; + static constexpr vaddr_t s_hhdm_offset = 0xFFFF800000000000; + static bool s_is_hddm_initialized = false; + + constexpr uint64_t s_page_flag_mask = 0x8000000000000FFF; + constexpr uint64_t s_page_addr_mask = ~s_page_flag_mask; + static PageTable* s_kernel = nullptr; static bool s_has_nxe = false; static bool s_has_pge = false; + static bool s_has_gib = false; - // PML4 entry for kernel memory - static paddr_t s_global_pml4e = 0; + static paddr_t s_global_pml4_entries[512] { 0 }; static constexpr inline bool is_canonical(uintptr_t addr) { @@ -47,6 +54,67 @@ namespace Kernel return addr; } + struct FuncsKmalloc + { + static paddr_t allocate_zeroed_page_aligned_page() + { + void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true); + ASSERT(page); + memset(page, 0, PAGE_SIZE); + return kmalloc_paddr_of(reinterpret_cast(page)).value(); + } + + static void unallocate_page(paddr_t paddr) + { + kfree(reinterpret_cast(kmalloc_vaddr_of(paddr).value())); + } + + static paddr_t V2P(vaddr_t vaddr) + { + return vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr; + } + + static uint64_t* P2V(paddr_t paddr) + { + return reinterpret_cast(paddr - g_boot_info.kernel_paddr + KERNEL_OFFSET); + } + }; + + struct FuncsHHDM + { + static paddr_t allocate_zeroed_page_aligned_page() + { + const paddr_t paddr = Heap::get().take_free_page(); + ASSERT(paddr); + memset(reinterpret_cast(paddr + s_hhdm_offset), 0, PAGE_SIZE); + return paddr; + } + + static void unallocate_page(paddr_t paddr) + { + Heap::get().release_page(paddr); + } + + static paddr_t V2P(vaddr_t vaddr) + { + ASSERT(vaddr >= s_hhdm_offset); + ASSERT(vaddr < KERNEL_OFFSET); + return vaddr - s_hhdm_offset; + } + + static uint64_t* P2V(paddr_t paddr) + { + ASSERT(paddr != 0); + ASSERT(!BAN::Math::will_addition_overflow(paddr, s_hhdm_offset)); + return reinterpret_cast(paddr + s_hhdm_offset); + } + }; + + static paddr_t (*allocate_zeroed_page_aligned_page)() = &FuncsKmalloc::allocate_zeroed_page_aligned_page; + static void (*unallocate_page)(paddr_t) = &FuncsKmalloc::unallocate_page; + static paddr_t (*V2P)(vaddr_t) = &FuncsKmalloc::V2P; + static uint64_t* (*P2V)(paddr_t) = &FuncsKmalloc::P2V; + static inline PageTable::flags_t parse_flags(uint64_t entry) { using Flags = PageTable::Flags; @@ -65,7 +133,190 @@ namespace Kernel return result; } - void PageTable::initialize() + // page size: + // 0: 4 KiB + // 1: 2 MiB + // 2: 1 GiB + static void init_map_hhdm_page(paddr_t pml4, paddr_t paddr, uint8_t page_size) + { + ASSERT(0 <= page_size && page_size <= 2); + + const vaddr_t vaddr = paddr + s_hhdm_offset; + ASSERT(vaddr < KERNEL_OFFSET); + + const vaddr_t uc_vaddr = uncanonicalize(vaddr); + const uint16_t pml4e = (uc_vaddr >> 39) & 0x1FF; + const uint16_t pdpte = (uc_vaddr >> 30) & 0x1FF; + const uint16_t pde = (uc_vaddr >> 21) & 0x1FF; + const uint16_t pte = (uc_vaddr >> 12) & 0x1FF; + + static constexpr uint64_t hhdm_flags = (1u << 1) | (1u << 0); + + const auto get_or_allocate_entry = + [](paddr_t table, uint16_t table_entry, uint64_t extra_flags) -> paddr_t + { + paddr_t result = 0; + PageTable::with_fast_page(table, [&] { + const uint64_t entry = PageTable::fast_page_as_sized(table_entry); + if (entry & (1u << 0)) + result = entry & s_page_addr_mask; + }); + if (result != 0) + return result; + + const paddr_t new_paddr = Heap::get().take_free_page(); + ASSERT(new_paddr); + + PageTable::with_fast_page(new_paddr, [] { + memset(reinterpret_cast(PageTable::fast_page_as_ptr()), 0, PAGE_SIZE); + }); + + PageTable::with_fast_page(table, [&] { + uint64_t& entry = PageTable::fast_page_as_sized(table_entry); + entry = new_paddr | hhdm_flags | extra_flags; + }); + + return new_paddr; + }; + + const uint64_t pgsize_flag = page_size ? (static_cast(1) << 7) : 0; + const uint64_t global_flag = s_has_pge ? (static_cast(1) << 8) : 0; + const uint64_t noexec_flag = s_has_nxe ? (static_cast(1) << 63) : 0; + + const paddr_t pdpt = get_or_allocate_entry(pml4, pml4e, noexec_flag); + s_global_pml4_entries[pml4e] = pdpt | hhdm_flags; + + paddr_t lowest_paddr = pdpt; + uint16_t lowest_entry = pdpte; + + if (page_size < 2) + { + lowest_paddr = get_or_allocate_entry(lowest_paddr, lowest_entry, noexec_flag); + lowest_entry = pde; + } + + if (page_size < 1) + { + lowest_paddr = get_or_allocate_entry(lowest_paddr, lowest_entry, noexec_flag); + lowest_entry = pte; + } + + PageTable::with_fast_page(lowest_paddr, [&] { + uint64_t& entry = PageTable::fast_page_as_sized(lowest_entry); + entry = paddr | hhdm_flags | noexec_flag | global_flag | pgsize_flag; + }); + } + + static void init_map_hhdm(paddr_t pml4) + { + for (const auto& entry : g_boot_info.memory_map_entries) + { + bool should_map = false; + switch (entry.type) + { + case MemoryMapEntry::Type::Available: + should_map = true; + break; + case MemoryMapEntry::Type::ACPIReclaim: + case MemoryMapEntry::Type::ACPINVS: + case MemoryMapEntry::Type::Reserved: + should_map = false; + break; + } + if (!should_map) + continue; + + constexpr size_t one_gib = 1024 * 1024 * 1024; + constexpr size_t two_mib = 2 * 1024 * 1024; + + const paddr_t entry_start = (entry.address + PAGE_SIZE - 1) & PAGE_ADDR_MASK; + const paddr_t entry_end = (entry.address + entry.length) & PAGE_ADDR_MASK; + for (paddr_t paddr = entry_start; paddr < entry_end;) + { + if (s_has_gib && paddr % one_gib == 0 && paddr + one_gib <= entry_end) + { + init_map_hhdm_page(pml4, paddr, 2); + paddr += one_gib; + } + else if (paddr % two_mib == 0 && paddr + two_mib <= entry_end) + { + init_map_hhdm_page(pml4, paddr, 1); + paddr += two_mib; + } + else + { + init_map_hhdm_page(pml4, paddr, 0); + paddr += PAGE_SIZE; + } + } + } + } + + static paddr_t copy_page_from_kmalloc_to_heap(paddr_t kmalloc_paddr) + { + const paddr_t heap_paddr = Heap::get().take_free_page(); + ASSERT(heap_paddr); + + const vaddr_t kmalloc_vaddr = kmalloc_vaddr_of(kmalloc_paddr).value(); + + PageTable::with_fast_page(heap_paddr, [kmalloc_vaddr] { + memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast(kmalloc_vaddr), PAGE_SIZE); + }); + + return heap_paddr; + } + + static void copy_paging_structure_to_heap(uint64_t* old_table, uint64_t* new_table, int depth) + { + if (depth == 0) + return; + + constexpr uint64_t page_flag_mask = 0x8000000000000FFF; + constexpr uint64_t page_addr_mask = ~page_flag_mask; + + for (uint16_t index = 0; index < 512; index++) + { + const uint64_t old_entry = old_table[index]; + if (old_entry == 0) + { + new_table[index] = 0; + continue; + } + + const paddr_t old_paddr = old_entry & page_addr_mask; + const paddr_t new_paddr = copy_page_from_kmalloc_to_heap(old_paddr); + new_table[index] = new_paddr | (old_entry & page_flag_mask); + + uint64_t* next_old_table = reinterpret_cast(old_paddr + s_hhdm_offset); + uint64_t* next_new_table = reinterpret_cast(new_paddr + s_hhdm_offset); + copy_paging_structure_to_heap(next_old_table, next_new_table, depth - 1); + } + } + + static void free_kmalloc_paging_structure(uint64_t* table, int depth) + { + if (depth == 0) + return; + + constexpr uint64_t page_flag_mask = 0x8000000000000FFF; + constexpr uint64_t page_addr_mask = ~page_flag_mask; + + for (uint16_t index = 0; index < 512; index++) + { + const uint64_t entry = table[index]; + if (entry == 0) + continue; + + const paddr_t paddr = entry & page_addr_mask; + + uint64_t* next_table = reinterpret_cast(paddr + s_hhdm_offset); + free_kmalloc_paging_structure(next_table, depth - 1); + + kfree(reinterpret_cast(kmalloc_vaddr_of(paddr).value())); + } + } + + void PageTable::initialize_pre_heap() { if (CPUID::has_nxe()) s_has_nxe = true; @@ -73,11 +324,64 @@ namespace Kernel if (CPUID::has_pge()) s_has_pge = true; + if (CPUID::has_1gib_pages()) + s_has_gib = true; + ASSERT(s_kernel == nullptr); s_kernel = new PageTable(); ASSERT(s_kernel); + s_kernel->m_highest_paging_struct = allocate_zeroed_page_aligned_page(); + s_kernel->prepare_fast_page(); s_kernel->initialize_kernel(); + + for (auto pml4e : s_global_pml4_entries) + ASSERT(pml4e == 0); + const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct); + s_global_pml4_entries[511] = pml4[511]; + } + + void PageTable::initialize_post_heap() + { + ASSERT(s_kernel); + + init_map_hhdm(s_kernel->m_highest_paging_struct); + + const paddr_t old_pml4_paddr = s_kernel->m_highest_paging_struct; + const paddr_t new_pml4_paddr = copy_page_from_kmalloc_to_heap(old_pml4_paddr); + + uint64_t* old_pml4 = reinterpret_cast(kmalloc_vaddr_of(old_pml4_paddr).value()); + uint64_t* new_pml4 = reinterpret_cast(new_pml4_paddr + s_hhdm_offset); + + const paddr_t old_pdpt_paddr = old_pml4[511] & s_page_addr_mask; + const paddr_t new_pdpt_paddr = Heap::get().take_free_page(); + ASSERT(new_pdpt_paddr); + + uint64_t* old_pdpt = reinterpret_cast(old_pdpt_paddr + s_hhdm_offset); + uint64_t* new_pdpt = reinterpret_cast(new_pdpt_paddr + s_hhdm_offset); + copy_paging_structure_to_heap(old_pdpt, new_pdpt, 2); + + new_pml4[511] = new_pdpt_paddr | (old_pml4[511] & s_page_flag_mask); + s_global_pml4_entries[511] = new_pml4[511]; + + s_kernel->m_highest_paging_struct = new_pml4_paddr; + s_kernel->load(); + + free_kmalloc_paging_structure(old_pdpt, 2); + kfree(reinterpret_cast(kmalloc_vaddr_of(old_pdpt_paddr).value())); + kfree(reinterpret_cast(kmalloc_vaddr_of(old_pml4_paddr).value())); + + allocate_zeroed_page_aligned_page = &FuncsHHDM::allocate_zeroed_page_aligned_page; + unallocate_page = &FuncsHHDM::unallocate_page; + V2P = &FuncsHHDM::V2P; + P2V = &FuncsHHDM::P2V; + + s_is_hddm_initialized = true; + + // This is a hack to unmap fast page. fast page pt is copied + // while it is mapped, so we need to manually unmap it + SpinLockGuard _(s_fast_page_lock); + unmap_fast_page(); } void PageTable::initial_load() @@ -136,67 +440,40 @@ namespace Kernel return true; } - static uint64_t* allocate_zeroed_page_aligned_page() - { - void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true); - ASSERT(page); - memset(page, 0, PAGE_SIZE); - return (uint64_t*)page; - } - - template - static paddr_t V2P(const T vaddr) - { - return (vaddr_t)vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr; - } - - template - static vaddr_t P2V(const T paddr) - { - return (paddr_t)paddr - g_boot_info.kernel_paddr + KERNEL_OFFSET; - } - void PageTable::initialize_kernel() { - ASSERT(s_global_pml4e == 0); - s_global_pml4e = V2P(allocate_zeroed_page_aligned_page()); - - m_highest_paging_struct = V2P(allocate_zeroed_page_aligned_page()); - - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); - pml4[511] = s_global_pml4e; - - prepare_fast_page(); - // Map (phys_kernel_start -> phys_kernel_end) to (virt_kernel_start -> virt_kernel_end) - ASSERT((vaddr_t)g_kernel_start % PAGE_SIZE == 0); + const vaddr_t kernel_start = reinterpret_cast(g_kernel_start); map_range_at( - V2P(g_kernel_start), - (vaddr_t)g_kernel_start, + V2P(kernel_start), + kernel_start, g_kernel_end - g_kernel_start, Flags::Present ); // Map executable kernel memory as executable + const vaddr_t kernel_execute_start = reinterpret_cast(g_kernel_execute_start); map_range_at( - V2P(g_kernel_execute_start), - (vaddr_t)g_kernel_execute_start, + V2P(kernel_execute_start), + kernel_execute_start, g_kernel_execute_end - g_kernel_execute_start, Flags::Execute | Flags::Present ); // Map writable kernel memory as writable + const vaddr_t kernel_writable_start = reinterpret_cast(g_kernel_writable_start); map_range_at( - V2P(g_kernel_writable_start), - (vaddr_t)g_kernel_writable_start, + V2P(kernel_writable_start), + kernel_writable_start, g_kernel_writable_end - g_kernel_writable_start, Flags::ReadWrite | Flags::Present ); // Map userspace memory + const vaddr_t userspace_start = reinterpret_cast(g_userspace_start); map_range_at( - V2P(g_userspace_start), - (vaddr_t)g_userspace_start, + V2P(userspace_start), + userspace_start, g_userspace_end - g_userspace_start, Flags::Execute | Flags::UserSupervisor | Flags::Present ); @@ -209,17 +486,17 @@ namespace Kernel constexpr uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF; constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF; - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); + uint64_t* pml4 = P2V(m_highest_paging_struct); ASSERT(!(pml4[pml4e] & Flags::Present)); - pml4[pml4e] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present; + pml4[pml4e] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present; - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); + uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); ASSERT(!(pdpt[pdpte] & Flags::Present)); - pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present; + pdpt[pdpte] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present; - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); ASSERT(!(pd[pde] & Flags::Present)); - pd[pde] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present; + pd[pde] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present; } void PageTable::map_fast_page(paddr_t paddr) @@ -235,10 +512,10 @@ namespace Kernel constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF; constexpr uint64_t pte = (uc_vaddr >> 12) & 0x1FF; - uint64_t* pml4 = (uint64_t*)P2V(s_kernel->m_highest_paging_struct); - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); + uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); ASSERT(!(pt[pte] & Flags::Present)); pt[pte] = paddr | Flags::ReadWrite | Flags::Present; @@ -258,10 +535,10 @@ namespace Kernel constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF; constexpr uint64_t pte = (uc_vaddr >> 12) & 0x1FF; - uint64_t* pml4 = (uint64_t*)P2V(s_kernel->m_highest_paging_struct); - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); + uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); ASSERT(pt[pte] & Flags::Present); pt[pte] = 0; @@ -282,43 +559,46 @@ namespace Kernel void PageTable::map_kernel_memory() { ASSERT(s_kernel); - ASSERT(s_global_pml4e); - + ASSERT(s_global_pml4_entries[511]); ASSERT(m_highest_paging_struct == 0); - m_highest_paging_struct = V2P(allocate_zeroed_page_aligned_page()); + m_highest_paging_struct = allocate_zeroed_page_aligned_page(); - uint64_t* kernel_pml4 = (uint64_t*)P2V(s_kernel->m_highest_paging_struct); - - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); - pml4[511] = kernel_pml4[511]; + PageTable::with_fast_page(m_highest_paging_struct, [] { + for (size_t i = 0; i < 512; i++) + { + if (s_global_pml4_entries[i] == 0) + continue; + ASSERT(i >= 256); + PageTable::fast_page_as_sized(i) = s_global_pml4_entries[i]; + } + }); } PageTable::~PageTable() { - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); - - // NOTE: we only loop until 511 since the last one is the kernel memory - for (uint64_t pml4e = 0; pml4e < 511; pml4e++) + // NOTE: we only loop until 256 since after that is hhdm + const uint64_t* pml4 = P2V(m_highest_paging_struct); + for (uint64_t pml4e = 0; pml4e < 256; pml4e++) { if (!(pml4[pml4e] & Flags::Present)) continue; - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); for (uint64_t pdpte = 0; pdpte < 512; pdpte++) { if (!(pdpt[pdpte] & Flags::Present)) continue; - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); for (uint64_t pde = 0; pde < 512; pde++) { if (!(pd[pde] & Flags::Present)) continue; - kfree((void*)P2V(pd[pde] & PAGE_ADDR_MASK)); + unallocate_page(pd[pde] & s_page_addr_mask); } - kfree(pd); + unallocate_page(pdpt[pdpte] & s_page_addr_mask); } - kfree(pdpt); + unallocate_page(pml4[pml4e] & s_page_addr_mask); } - kfree(pml4); + unallocate_page(m_highest_paging_struct); } void PageTable::load() @@ -355,24 +635,24 @@ namespace Kernel Kernel::panic("unmapping {8H}, kernel: {}", vaddr, this == s_kernel); ASSERT(is_canonical(vaddr)); - vaddr_t uc_vaddr = uncanonicalize(vaddr); + const vaddr_t uc_vaddr = uncanonicalize(vaddr); ASSERT(vaddr % PAGE_SIZE == 0); - uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF; - uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF; - uint64_t pde = (uc_vaddr >> 21) & 0x1FF; - uint64_t pte = (uc_vaddr >> 12) & 0x1FF; + const uint16_t pml4e = (uc_vaddr >> 39) & 0x1FF; + const uint16_t pdpte = (uc_vaddr >> 30) & 0x1FF; + const uint16_t pde = (uc_vaddr >> 21) & 0x1FF; + const uint16_t pte = (uc_vaddr >> 12) & 0x1FF; SpinLockGuard _(m_lock); if (is_page_free(vaddr)) Kernel::panic("trying to unmap unmapped page 0x{H}", vaddr); - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + uint64_t* pml4 = P2V(m_highest_paging_struct); + uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); + uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); + uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); pt[pte] = 0; invalidate(vaddr, send_smp_message); @@ -401,20 +681,22 @@ namespace Kernel { ASSERT(vaddr); ASSERT(vaddr != fast_page()); - if ((vaddr >= KERNEL_OFFSET) != (this == s_kernel)) - Kernel::panic("mapping {8H} to {8H}, kernel: {}", paddr, vaddr, this == s_kernel); + if (vaddr < KERNEL_OFFSET && this == s_kernel) + panic("kernel is mapping below kernel offset"); + if (vaddr >= s_hhdm_offset && this != s_kernel) + panic("user is mapping above hhdm offset"); ASSERT(is_canonical(vaddr)); - vaddr_t uc_vaddr = uncanonicalize(vaddr); + const vaddr_t uc_vaddr = uncanonicalize(vaddr); ASSERT(paddr % PAGE_SIZE == 0); ASSERT(vaddr % PAGE_SIZE == 0); ASSERT(flags & Flags::Used); - uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF; - uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF; - uint64_t pde = (uc_vaddr >> 21) & 0x1FF; - uint64_t pte = (uc_vaddr >> 12) & 0x1FF; + const uint16_t pml4e = (uc_vaddr >> 39) & 0x1FF; + const uint16_t pdpte = (uc_vaddr >> 30) & 0x1FF; + const uint16_t pde = (uc_vaddr >> 21) & 0x1FF; + const uint16_t pte = (uc_vaddr >> 12) & 0x1FF; uint64_t extra_flags = 0; if (s_has_pge && pml4e == 511) // Map kernel memory as global @@ -436,34 +718,26 @@ namespace Kernel SpinLockGuard _(m_lock); - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); - if ((pml4[pml4e] & uwr_flags) != uwr_flags) - { - if (!(pml4[pml4e] & Flags::Present)) - pml4[pml4e] = V2P(allocate_zeroed_page_aligned_page()); - pml4[pml4e] |= uwr_flags; - } + const auto allocate_entry_if_needed = + [](uint64_t* table, uint16_t index, flags_t flags) -> uint64_t* + { + uint64_t entry = table[index]; + if ((entry & flags) == flags) + return P2V(entry & s_page_addr_mask); + if (!(entry & Flags::Present)) + entry = allocate_zeroed_page_aligned_page(); + table[index] = entry | flags; + return P2V(entry & s_page_addr_mask); + }; - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); - if ((pdpt[pdpte] & uwr_flags) != uwr_flags) - { - if (!(pdpt[pdpte] & Flags::Present)) - pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()); - pdpt[pdpte] |= uwr_flags; - } - - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); - if ((pd[pde] & uwr_flags) != uwr_flags) - { - if (!(pd[pde] & Flags::Present)) - pd[pde] = V2P(allocate_zeroed_page_aligned_page()); - pd[pde] |= uwr_flags; - } + uint64_t* pml4 = P2V(m_highest_paging_struct); + uint64_t* pdpt = allocate_entry_if_needed(pml4, pml4e, uwr_flags); + uint64_t* pd = allocate_entry_if_needed(pdpt, pdpte, uwr_flags); + uint64_t* pt = allocate_entry_if_needed(pd, pde, uwr_flags); if (!(flags & Flags::Present)) uwr_flags &= ~Flags::Present; - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); pt[pte] = paddr | uwr_flags | extra_flags; invalidate(vaddr, send_smp_message); @@ -495,30 +769,30 @@ namespace Kernel uint64_t PageTable::get_page_data(vaddr_t vaddr) const { ASSERT(is_canonical(vaddr)); - vaddr_t uc_vaddr = uncanonicalize(vaddr); + const vaddr_t uc_vaddr = uncanonicalize(vaddr); ASSERT(vaddr % PAGE_SIZE == 0); - uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF; - uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF; - uint64_t pde = (uc_vaddr >> 21) & 0x1FF; - uint64_t pte = (uc_vaddr >> 12) & 0x1FF; + const uint16_t pml4e = (uc_vaddr >> 39) & 0x1FF; + const uint16_t pdpte = (uc_vaddr >> 30) & 0x1FF; + const uint16_t pde = (uc_vaddr >> 21) & 0x1FF; + const uint16_t pte = (uc_vaddr >> 12) & 0x1FF; SpinLockGuard _(m_lock); - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); + const uint64_t* pml4 = P2V(m_highest_paging_struct); if (!(pml4[pml4e] & Flags::Present)) return 0; - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); if (!(pdpt[pdpte] & Flags::Present)) return 0; - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); if (!(pd[pde] & Flags::Present)) return 0; - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + const uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); if (!(pt[pte] & Flags::Used)) return 0; @@ -533,7 +807,7 @@ namespace Kernel paddr_t PageTable::physical_address_of(vaddr_t addr) const { uint64_t page_data = get_page_data(addr); - return (page_data & PAGE_ADDR_MASK) & ~(1ull << 63); + return page_data & s_page_addr_mask; } bool PageTable::reserve_page(vaddr_t vaddr, bool only_free) @@ -588,28 +862,28 @@ namespace Kernel // Try to find free page that can be mapped without // allocations (page table with unused entries) - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); + const uint64_t* pml4 = P2V(m_highest_paging_struct); for (; pml4e < 512; pml4e++) { if (pml4e > e_pml4e) break; if (!(pml4[pml4e] & Flags::Present)) continue; - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); for (; pdpte < 512; pdpte++) { if (pml4e == e_pml4e && pdpte > e_pdpte) break; if (!(pdpt[pdpte] & Flags::Present)) continue; - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); for (; pde < 512; pde++) { if (pml4e == e_pml4e && pdpte == e_pdpte && pde > e_pde) break; if (!(pd[pde] & Flags::Present)) continue; - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + const uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); for (; pte < 512; pte++) { if (pml4e == e_pml4e && pdpte == e_pdpte && pde == e_pde && pte >= e_pte) @@ -617,10 +891,10 @@ namespace Kernel if (!(pt[pte] & Flags::Used)) { vaddr_t vaddr = 0; - vaddr |= (uint64_t)pml4e << 39; - vaddr |= (uint64_t)pdpte << 30; - vaddr |= (uint64_t)pde << 21; - vaddr |= (uint64_t)pte << 12; + vaddr |= static_cast(pml4e) << 39; + vaddr |= static_cast(pdpte) << 30; + vaddr |= static_cast(pde) << 21; + vaddr |= static_cast(pte) << 12; vaddr = canonicalize(vaddr); ASSERT(reserve_page(vaddr)); return vaddr; @@ -630,16 +904,13 @@ namespace Kernel } } - // Find any free page - vaddr_t uc_vaddr = uc_vaddr_start; - while (uc_vaddr < uc_vaddr_end) + for (vaddr_t uc_vaddr = uc_vaddr_start; uc_vaddr < uc_vaddr_end; uc_vaddr += PAGE_SIZE) { if (vaddr_t vaddr = canonicalize(uc_vaddr); is_page_free(vaddr)) { ASSERT(reserve_page(vaddr)); return vaddr; } - uc_vaddr += PAGE_SIZE; } ASSERT_NOT_REACHED(); @@ -726,16 +997,16 @@ namespace Kernel flags_t flags = 0; vaddr_t start = 0; - uint64_t* pml4 = (uint64_t*)P2V(m_highest_paging_struct); + const uint64_t* pml4 = P2V(m_highest_paging_struct); for (uint64_t pml4e = 0; pml4e < 512; pml4e++) { - if (!(pml4[pml4e] & Flags::Present)) + if (!(pml4[pml4e] & Flags::Present) || (pml4e >= 256 && pml4e < 511)) { dump_range(start, (pml4e << 39), flags); start = 0; continue; } - uint64_t* pdpt = (uint64_t*)P2V(pml4[pml4e] & PAGE_ADDR_MASK); + const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask); for (uint64_t pdpte = 0; pdpte < 512; pdpte++) { if (!(pdpt[pdpte] & Flags::Present)) @@ -744,7 +1015,7 @@ namespace Kernel start = 0; continue; } - uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask); for (uint64_t pde = 0; pde < 512; pde++) { if (!(pd[pde] & Flags::Present)) @@ -753,7 +1024,7 @@ namespace Kernel start = 0; continue; } - uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + const uint64_t* pt = P2V(pd[pde] & s_page_addr_mask); for (uint64_t pte = 0; pte < 512; pte++) { if (parse_flags(pt[pte]) != flags) diff --git a/kernel/include/kernel/Memory/PageTable.h b/kernel/include/kernel/Memory/PageTable.h index a1cc2b83..12403f5c 100644 --- a/kernel/include/kernel/Memory/PageTable.h +++ b/kernel/include/kernel/Memory/PageTable.h @@ -43,7 +43,8 @@ namespace Kernel }; public: - static void initialize(); + static void initialize_pre_heap(); + static void initialize_post_heap(); static PageTable& kernel(); static PageTable& current() { return *reinterpret_cast(Processor::get_current_page_table()); } diff --git a/kernel/kernel/kernel.cpp b/kernel/kernel/kernel.cpp index 821cf8db..0b54b3d2 100644 --- a/kernel/kernel/kernel.cpp +++ b/kernel/kernel/kernel.cpp @@ -131,13 +131,16 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info) Processor::initialize(); dprintln("BSP initialized"); - PageTable::initialize(); + PageTable::initialize_pre_heap(); PageTable::kernel().initial_load(); - dprintln("PageTable initialized"); + dprintln("PageTable stage1 initialized"); Heap::initialize(); dprintln("Heap initialzed"); + PageTable::initialize_post_heap(); + dprintln("PageTable stage2 initialized"); + parse_command_line(); dprintln("command line parsed, root='{}', console='{}'", cmdline.root, cmdline.console);