diff --git a/kernel/arch/i686/PageTable.cpp b/kernel/arch/i686/PageTable.cpp index fe28366c..a5e60c59 100644 --- a/kernel/arch/i686/PageTable.cpp +++ b/kernel/arch/i686/PageTable.cpp @@ -1,144 +1,630 @@ -#include +#include #include +#include +#include + +extern uint8_t g_kernel_start[]; +extern uint8_t g_kernel_end[]; + +extern uint8_t g_kernel_execute_start[]; +extern uint8_t g_kernel_execute_end[]; + +extern uint8_t g_userspace_start[]; +extern uint8_t g_userspace_end[]; namespace Kernel { RecursiveSpinLock PageTable::s_fast_page_lock; + static PageTable* s_kernel = nullptr; + static bool s_has_nxe = false; + static bool s_has_pge = false; + + static paddr_t s_global_pdpte = 0; + + static inline PageTable::flags_t parse_flags(uint64_t entry) + { + using Flags = PageTable::Flags; + + PageTable::flags_t result = 0; + if (s_has_nxe && !(entry & (1ull << 63))) + result |= Flags::Execute; + if (entry & Flags::Reserved) + result |= Flags::Reserved; + if (entry & Flags::CacheDisable) + result |= Flags::CacheDisable; + if (entry & Flags::UserSupervisor) + result |= Flags::UserSupervisor; + if (entry & Flags::ReadWrite) + result |= Flags::ReadWrite; + if (entry & Flags::Present) + result |= Flags::Present; + return result; + } + void PageTable::initialize() { - ASSERT_NOT_REACHED(); - } + if (CPUID::has_nxe()) + s_has_nxe = true; - PageTable& PageTable::kernel() - { - ASSERT_NOT_REACHED(); - } + if (CPUID::has_pge()) + s_has_pge = true; - bool PageTable::is_valid_pointer(uintptr_t) - { - ASSERT_NOT_REACHED(); - } + ASSERT(s_kernel == nullptr); + s_kernel = new PageTable(); + ASSERT(s_kernel); - BAN::ErrorOr PageTable::create_userspace() - { - ASSERT_NOT_REACHED(); - } - - PageTable::~PageTable() - { - ASSERT_NOT_REACHED(); - } - - void PageTable::unmap_page(vaddr_t) - { - ASSERT_NOT_REACHED(); - } - - void PageTable::unmap_range(vaddr_t, size_t) - { - ASSERT_NOT_REACHED(); - } - - void PageTable::map_range_at(paddr_t, vaddr_t, size_t, flags_t) - { - ASSERT_NOT_REACHED(); - } - - void PageTable::map_page_at(paddr_t, vaddr_t, flags_t) - { - ASSERT_NOT_REACHED(); - } - - paddr_t PageTable::physical_address_of(vaddr_t) const - { - ASSERT_NOT_REACHED(); - } - - PageTable::flags_t PageTable::get_page_flags(vaddr_t) const - { - ASSERT_NOT_REACHED(); - } - - bool PageTable::is_page_free(vaddr_t) const - { - ASSERT_NOT_REACHED(); - } - - bool PageTable::is_range_free(vaddr_t, size_t) const - { - ASSERT_NOT_REACHED(); - } - - bool PageTable::reserve_page(vaddr_t, bool) - { - ASSERT_NOT_REACHED(); - } - - bool PageTable::reserve_range(vaddr_t, size_t, bool) - { - ASSERT_NOT_REACHED(); - } - - vaddr_t PageTable::reserve_free_page(vaddr_t, vaddr_t) - { - ASSERT_NOT_REACHED(); - } - - vaddr_t PageTable::reserve_free_contiguous_pages(size_t, vaddr_t, vaddr_t) - { - ASSERT_NOT_REACHED(); - } - - void PageTable::load() - { - ASSERT_NOT_REACHED(); + s_kernel->initialize_kernel(); + s_kernel->initial_load(); } void PageTable::initial_load() { - ASSERT_NOT_REACHED(); + if (s_has_nxe) + { + asm volatile( + "movl $0xC0000080, %%ecx;" + "rdmsr;" + "orl $0x800, %%eax;" + "wrmsr" + ::: "eax", "ecx", "edx", "memory" + ); + } + + if (s_has_pge) + { + asm volatile( + "movl %%cr4, %%eax;" + "orl $0x80, %%eax;" + "movl %%eax, %%cr4;" + ::: "eax" + ); + } + + // enable write protect + asm volatile( + "movl %%cr0, %%eax;" + "orl $0x10000, %%eax;" + "movl %%eax, %%cr0;" + ::: "rax" + ); + + load(); } - void PageTable::debug_dump() + PageTable& PageTable::kernel() { - ASSERT_NOT_REACHED(); + ASSERT(s_kernel); + return *s_kernel; } - uint64_t PageTable::get_page_data(vaddr_t) const + bool PageTable::is_valid_pointer(uintptr_t) { - ASSERT_NOT_REACHED(); + return true; + } + + static uint64_t* allocate_zeroed_page_aligned_page() + { + void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true); + ASSERT(page); + memset(page, 0, PAGE_SIZE); + return (uint64_t*)page; } void PageTable::initialize_kernel() { - ASSERT_NOT_REACHED(); - } + ASSERT(s_global_pdpte == 0); + s_global_pdpte = V2P(allocate_zeroed_page_aligned_page()); - void PageTable::map_kernel_memory() - { - ASSERT_NOT_REACHED(); + ASSERT(m_highest_paging_struct == 0); + m_highest_paging_struct = V2P(kmalloc(32, 32, true)); + ASSERT(m_highest_paging_struct); + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + pdpt[0] = 0; + pdpt[1] = 0; + pdpt[2] = 0; + pdpt[3] = s_global_pdpte; + static_assert(KERNEL_OFFSET == 0xC0000000); + + prepare_fast_page(); + + // Map main bios area below 1 MiB + map_range_at( + 0x000E0000, + P2V(0x000E0000), + 0x00100000 - 0x000E0000, + PageTable::Flags::Present + ); + + // Map (phys_kernel_start -> phys_kernel_end) to (virt_kernel_start -> virt_kernel_end) + ASSERT((vaddr_t)g_kernel_start % PAGE_SIZE == 0); + map_range_at( + V2P(g_kernel_start), + (vaddr_t)g_kernel_start, + g_kernel_end - g_kernel_start, + Flags::ReadWrite | Flags::Present + ); + + // Map executable kernel memory as executable + map_range_at( + V2P(g_kernel_execute_start), + (vaddr_t)g_kernel_execute_start, + g_kernel_execute_end - g_kernel_execute_start, + Flags::Execute | Flags::Present + ); + + // Map userspace memory + map_range_at( + V2P(g_userspace_start), + (vaddr_t)g_userspace_start, + g_userspace_end - g_userspace_start, + Flags::Execute | Flags::UserSupervisor | Flags::Present + ); } void PageTable::prepare_fast_page() { - ASSERT_NOT_REACHED(); + constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF; + constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF; + constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF; + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + ASSERT(!(pdpt[pdpte] & Flags::Present)); + pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()) | Flags::Present; + + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte]) & PAGE_ADDR_MASK); + ASSERT(!(pd[pde] & Flags::Present)); + pd[pde] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present; + + uint64_t* pt = reinterpret_cast(P2V(pd[pde]) & PAGE_ADDR_MASK); + ASSERT(!(pt[pte] & Flags::Present)); + pt[pte] = V2P(allocate_zeroed_page_aligned_page()); } - void PageTable::invalidate(vaddr_t) + void PageTable::map_fast_page(paddr_t paddr) { - ASSERT_NOT_REACHED(); - } + ASSERT(s_kernel); + ASSERT(paddr); - void PageTable::map_fast_page(paddr_t) - { - ASSERT_NOT_REACHED(); + SpinLockGuard _(s_fast_page_lock); + + constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF; + constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF; + constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF; + + uint64_t* pdpt = reinterpret_cast(P2V(s_kernel->m_highest_paging_struct)); + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); + + ASSERT(!(pt[pte] & Flags::Present)); + pt[pte] = paddr | Flags::ReadWrite | Flags::Present; + + invalidate(fast_page()); } void PageTable::unmap_fast_page() { + ASSERT(s_kernel); + + SpinLockGuard _(s_fast_page_lock); + + constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF; + constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF; + constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF; + + uint64_t* pdpt = reinterpret_cast(P2V(s_kernel->m_highest_paging_struct)); + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); + + ASSERT(pt[pte] & Flags::Present); + pt[pte] = 0; + + invalidate(fast_page()); + } + + BAN::ErrorOr PageTable::create_userspace() + { + SpinLockGuard _(s_kernel->m_lock); + PageTable* page_table = new PageTable; + if (page_table == nullptr) + return BAN::Error::from_errno(ENOMEM); + page_table->map_kernel_memory(); + return page_table; + } + + void PageTable::map_kernel_memory() + { + ASSERT(s_kernel); + ASSERT(s_global_pdpte); + + ASSERT(m_highest_paging_struct == 0); + m_highest_paging_struct = V2P(kmalloc(32, 32, true)); + ASSERT(m_highest_paging_struct); + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + pdpt[0] = 0; + pdpt[1] = 0; + pdpt[2] = 0; + pdpt[3] = s_global_pdpte; + static_assert(KERNEL_OFFSET == 0xC0000000); + } + + PageTable::~PageTable() + { + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + + for (uint32_t pdpte = 0; pdpte < 3; pdpte++) + { + if (!(pdpt[pdpte] & Flags::Present)) + continue; + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + for (uint32_t pde = 0; pde < 512; pde++) + { + if (!(pd[pde] & Flags::Present)) + continue; + kfree(reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK))); + } + kfree(pd); + } + kfree(pdpt); + } + + void PageTable::load() + { + SpinLockGuard _(m_lock); + ASSERT(m_highest_paging_struct < 0x100000000); + const uint32_t pdpt_lo = m_highest_paging_struct; + asm volatile("movl %0, %%cr3" :: "r"(pdpt_lo)); + Processor::set_current_page_table(this); + } + + void PageTable::invalidate(vaddr_t vaddr) + { + ASSERT(vaddr % PAGE_SIZE == 0); + asm volatile("invlpg (%0)" :: "r"(vaddr) : "memory"); + } + + void PageTable::unmap_page(vaddr_t vaddr) + { + ASSERT(vaddr); + ASSERT(vaddr % PAGE_SIZE == 0); + ASSERT(vaddr != fast_page()); + if (vaddr >= KERNEL_OFFSET) + ASSERT(vaddr >= (vaddr_t)g_kernel_start); + if ((vaddr >= KERNEL_OFFSET) != (this == s_kernel)) + Kernel::panic("unmapping {8H}, kernel: {}", vaddr, this == s_kernel); + + const uint64_t pdpte = (vaddr >> 30) & 0x1FF; + const uint64_t pde = (vaddr >> 21) & 0x1FF; + const uint64_t pte = (vaddr >> 12) & 0x1FF; + + SpinLockGuard _(m_lock); + + if (is_page_free(vaddr)) + { + dwarnln("unmapping unmapped page {8H}", vaddr); + return; + } + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); + + pt[pte] = 0; + invalidate(vaddr); + } + + void PageTable::unmap_range(vaddr_t vaddr, size_t size) + { + vaddr_t s_page = vaddr / PAGE_SIZE; + vaddr_t e_page = BAN::Math::div_round_up(vaddr + size, PAGE_SIZE); + + SpinLockGuard _(m_lock); + for (vaddr_t page = s_page; page < e_page; page++) + unmap_page(page * PAGE_SIZE); + } + + void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags) + { + ASSERT(vaddr); + ASSERT(vaddr != fast_page()); + if ((vaddr >= KERNEL_OFFSET) != (this == s_kernel)) + Kernel::panic("mapping {8H} to {8H}, kernel: {}", paddr, vaddr, this == s_kernel); + + ASSERT(paddr % PAGE_SIZE == 0); + ASSERT(vaddr % PAGE_SIZE == 0); + ASSERT(flags & Flags::Used); + + const uint64_t pdpte = (vaddr >> 30) & 0x1FF; + const uint64_t pde = (vaddr >> 21) & 0x1FF; + const uint64_t pte = (vaddr >> 12) & 0x1FF; + + uint64_t extra_flags = 0; + if (s_has_pge && vaddr >= KERNEL_OFFSET) // Map kernel memory as global + extra_flags |= 1ull << 8; + if (s_has_nxe && !(flags & Flags::Execute)) + extra_flags |= 1ull << 63; + if (flags & Flags::Reserved) + extra_flags |= Flags::Reserved; + if (flags & Flags::CacheDisable) + extra_flags |= Flags::CacheDisable; + + // NOTE: we add present here, since it has to be available in higher level structures + flags_t uwr_flags = (flags & (Flags::UserSupervisor | Flags::ReadWrite)) | Flags::Present; + + SpinLockGuard _(m_lock); + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + if (!(pdpt[pdpte] & Flags::Present)) + pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()) | Flags::Present; + + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + if ((pd[pde] & uwr_flags) != uwr_flags) + { + if (!(pd[pde] & Flags::Present)) + pd[pde] = V2P(allocate_zeroed_page_aligned_page()); + pd[pde] |= uwr_flags; + } + + if (!(flags & Flags::Present)) + uwr_flags &= ~Flags::Present; + + uint64_t* pt = reinterpret_cast(P2V(pd[pde] & PAGE_ADDR_MASK)); + pt[pte] = paddr | uwr_flags | extra_flags; + + invalidate(vaddr); + } + + void PageTable::map_range_at(paddr_t paddr, vaddr_t vaddr, size_t size, flags_t flags) + { + ASSERT(vaddr); + ASSERT(paddr % PAGE_SIZE == 0); + ASSERT(vaddr % PAGE_SIZE == 0); + + size_t page_count = range_page_count(vaddr, size); + + SpinLockGuard _(m_lock); + for (size_t page = 0; page < page_count; page++) + map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags); + } + + uint64_t PageTable::get_page_data(vaddr_t vaddr) const + { + ASSERT(vaddr % PAGE_SIZE == 0); + + const uint64_t pdpte = (vaddr >> 30) & 0x1FF; + const uint64_t pde = (vaddr >> 21) & 0x1FF; + const uint64_t pte = (vaddr >> 12) & 0x1FF; + + SpinLockGuard _(m_lock); + + uint64_t* pdpt = (uint64_t*)P2V(m_highest_paging_struct); + if (!(pdpt[pdpte] & Flags::Present)) + return 0; + + uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + if (!(pd[pde] & Flags::Present)) + return 0; + + uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + if (!(pt[pte] & Flags::Used)) + return 0; + + return pt[pte]; + } + + PageTable::flags_t PageTable::get_page_flags(vaddr_t vaddr) const + { + return parse_flags(get_page_data(vaddr)); + } + + paddr_t PageTable::physical_address_of(vaddr_t vaddr) const + { + uint64_t page_data = get_page_data(vaddr); + return (page_data & PAGE_ADDR_MASK) & ~(1ull << 63); + } + + bool PageTable::is_page_free(vaddr_t vaddr) const + { + ASSERT(vaddr % PAGE_SIZE == 0); + return !(get_page_flags(vaddr) & Flags::Used); + } + + bool PageTable::is_range_free(vaddr_t vaddr, size_t size) const + { + vaddr_t s_page = vaddr / PAGE_SIZE; + vaddr_t e_page = BAN::Math::div_round_up(vaddr + size, PAGE_SIZE); + + SpinLockGuard _(m_lock); + for (vaddr_t page = s_page; page < e_page; page++) + if (!is_page_free(page * PAGE_SIZE)) + return false; + return true; + } + + bool PageTable::reserve_page(vaddr_t vaddr, bool only_free) + { + SpinLockGuard _(m_lock); + ASSERT(vaddr % PAGE_SIZE == 0); + if (only_free && !is_page_free(vaddr)) + return false; + map_page_at(0, vaddr, Flags::Reserved); + return true; + } + + bool PageTable::reserve_range(vaddr_t vaddr, size_t bytes, bool only_free) + { + if (size_t rem = bytes % PAGE_SIZE) + bytes += PAGE_SIZE - rem; + ASSERT(vaddr % PAGE_SIZE == 0); + + SpinLockGuard _(m_lock); + if (only_free && !is_range_free(vaddr, bytes)) + return false; + for (size_t offset = 0; offset < bytes; offset += PAGE_SIZE) + reserve_page(vaddr + offset); + return true; + } + + vaddr_t PageTable::reserve_free_page(vaddr_t first_address, vaddr_t last_address) + { + if (first_address >= KERNEL_OFFSET && first_address < (vaddr_t)g_kernel_end) + first_address = (vaddr_t)g_kernel_end; + if (size_t rem = first_address % PAGE_SIZE) + first_address += PAGE_SIZE - rem; + if (size_t rem = last_address % PAGE_SIZE) + last_address -= rem; + + const uint32_t s_pdpte = (first_address >> 30) & 0x1FF; + const uint32_t s_pde = (first_address >> 21) & 0x1FF; + const uint32_t s_pte = (first_address >> 12) & 0x1FF; + + const uint32_t e_pdpte = (last_address >> 30) & 0x1FF; + const uint32_t e_pde = (last_address >> 21) & 0x1FF; + const uint32_t e_pte = (last_address >> 12) & 0x1FF; + + SpinLockGuard _(m_lock); + + // Try to find free page that can be mapped without + // allocations (page table with unused entries) + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + for (uint32_t pdpte = s_pdpte; pdpte < 4; pdpte++) + { + if (pdpte > e_pdpte) + break; + if (!(pdpt[pdpte] & Flags::Present)) + continue; + uint64_t* pd = reinterpret_cast(P2V(pdpt[pdpte] & PAGE_ADDR_MASK)); + for (uint32_t pde = s_pde; pde < 512; pde++) + { + if (pdpte == e_pdpte && pde > e_pde) + break; + if (!(pd[pde] & Flags::Present)) + continue; + uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + for (uint32_t pte = s_pte; pte < 512; pte++) + { + if (pdpte == e_pdpte && pde == e_pde && pte >= e_pte) + break; + if (!(pt[pte] & Flags::Used)) + { + vaddr_t vaddr = 0; + vaddr |= (vaddr_t)pdpte << 30; + vaddr |= (vaddr_t)pde << 21; + vaddr |= (vaddr_t)pte << 12; + ASSERT(reserve_page(vaddr)); + return vaddr; + } + } + } + } + + // Find any free page + for (vaddr_t vaddr = first_address; vaddr < last_address; vaddr += PAGE_SIZE) + { + if (is_page_free(vaddr)) + { + ASSERT(reserve_page(vaddr)); + return vaddr; + } + } + ASSERT_NOT_REACHED(); } + vaddr_t PageTable::reserve_free_contiguous_pages(size_t page_count, vaddr_t first_address, vaddr_t last_address) + { + if (first_address >= KERNEL_OFFSET && first_address < (vaddr_t)g_kernel_start) + first_address = (vaddr_t)g_kernel_start; + if (size_t rem = first_address % PAGE_SIZE) + first_address += PAGE_SIZE - rem; + if (size_t rem = last_address % PAGE_SIZE) + last_address -= rem; + + SpinLockGuard _(m_lock); + + for (vaddr_t vaddr = first_address; vaddr < last_address;) + { + bool valid { true }; + for (size_t page = 0; page < page_count; page++) + { + if (!is_page_free(vaddr + page * PAGE_SIZE)) + { + vaddr += (page + 1) * PAGE_SIZE; + valid = false; + break; + } + } + if (valid) + { + ASSERT(reserve_range(vaddr, page_count * PAGE_SIZE)); + return vaddr; + } + } + + ASSERT_NOT_REACHED(); + } + + static void dump_range(vaddr_t start, vaddr_t end, PageTable::flags_t flags) + { + if (start == 0) + return; + dprintln("{}-{}: {}{}{}{}", + (void*)(start), (void*)(end - 1), + flags & PageTable::Flags::Execute ? 'x' : '-', + flags & PageTable::Flags::UserSupervisor ? 'u' : '-', + flags & PageTable::Flags::ReadWrite ? 'w' : '-', + flags & PageTable::Flags::Present ? 'r' : '-' + ); + } + + void PageTable::debug_dump() + { + SpinLockGuard _(m_lock); + + flags_t flags = 0; + vaddr_t start = 0; + + uint64_t* pdpt = reinterpret_cast(P2V(m_highest_paging_struct)); + for (uint32_t pdpte = 0; pdpte < 4; pdpte++) + { + if (!(pdpt[pdpte] & Flags::Present)) + { + dump_range(start, (pdpte << 30), flags); + start = 0; + continue; + } + uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK); + for (uint64_t pde = 0; pde < 512; pde++) + { + if (!(pd[pde] & Flags::Present)) + { + dump_range(start, (pdpte << 30) | (pde << 21), flags); + start = 0; + continue; + } + uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK); + for (uint64_t pte = 0; pte < 512; pte++) + { + if (parse_flags(pt[pte]) != flags) + { + dump_range(start, (pdpte << 30) | (pde << 21) | (pte << 12), flags); + start = 0; + } + + if (!(pt[pte] & Flags::Used)) + continue; + + if (start == 0) + { + flags = parse_flags(pt[pte]); + start = (pdpte << 30) | (pde << 21) | (pte << 12); + } + } + } + } + } + }