Compare commits

...

11 Commits

Author SHA1 Message Date
DcraftBg
b8dc199738 Kernel: Fix ByteRingBuffer->back() 2026-05-04 20:26:02 +03:00
74127c0f45 Kernel: Cleanup inline assembly accessing cpu specific data 2026-05-04 20:26:02 +03:00
28499b890c LibC: Mark exit as noreturn 2026-05-04 20:26:02 +03:00
2f45349658 Kernel: Calculate internet checksum in host endian
No need to swap bytes of every 16 bit word in the packet, we can just do
one swap at the return
2026-05-04 20:26:02 +03:00
fde085e04b Kernel: Pass current cpu index as a GDT limit
I had no idea LSL was an instruction. This cleans up code to get the
current cpu by a lot and does not require extra segment usage :D
2026-05-04 20:26:02 +03:00
77ca525552 BAN: Fix HashSet 2026-05-04 20:26:02 +03:00
3b83daef17 Kernel: Use empty string instead of nullptr for non existing proc name 2026-05-04 20:26:01 +03:00
f37d9dbdb1 Kernel: Remove kmalloc_vaddr_of
This is no longer needed. It was only used for x86_64 paging and AP
stack initialization
2026-05-04 20:26:01 +03:00
b7cedad891 Kernel: Wrap syscall macro value in paranthesis 2026-05-04 20:26:01 +03:00
cdf0de34fb Kernel: Stop stacktrace dump on null bp
This makes stack traces not crash before IDT is initialized
2026-05-04 20:26:01 +03:00
03fccdffe1 Kernel: Rewrite paging and AP initialization
Initial step of paging now just prepares fast page for heap, actual page
table initialization happens after heap is initialized which allows
x86_64 to never depend on kmalloc for pages.

Processor's stacks are now also spawned with PMM/VMM allocated stacks
instead of kmalloc identity mapped.
2026-05-04 20:26:01 +03:00
29 changed files with 482 additions and 627 deletions

View File

@@ -60,7 +60,7 @@ namespace BAN
void skip_to_valid_bucket()
{
while (!m_bucket->used && !m_bucket->end)
while (m_bucket->state != Bucket::USED && !m_bucket->end)
m_bucket++;
if (m_bucket->end)
m_bucket = nullptr;
@@ -83,10 +83,13 @@ namespace BAN
private:
struct Bucket
{
static constexpr uint8_t UNUSED = 0;
static constexpr uint8_t USED = 1;
static constexpr uint8_t REMOVED = 2;
alignas(T) uint8_t storage[sizeof(T)];
hash_t hash;
uint8_t used : 1;
uint8_t removed : 1;
uint8_t state : 2;
uint8_t chain_start : 1;
uint8_t end : 1;
@@ -148,38 +151,7 @@ namespace BAN
{
if (should_rehash_with_size(m_size + 1))
TRY(rehash(m_size * 2));
bool first = true;
const hash_t orig_hash = HASH()(value);
for (auto hash = orig_hash;; hash = get_next_hash_in_chain(hash, orig_hash), first = false)
{
auto& bucket = m_buckets[hash & (m_capacity - 1)];
if (!first)
bucket.chain_start = false;
if (bucket.used)
{
if (!COMP()(*bucket.element(), value))
continue;
bucket.element()->~T();
new (bucket.element()) T(BAN::move(value));
}
else
{
m_removed -= bucket.removed;
bucket.used = true;
bucket.removed = false;
new (bucket.element()) T(BAN::move(value));
m_size++;
}
if (first)
bucket.chain_start = true;
bucket.hash = orig_hash;
return iterator(&bucket);
}
return insert_impl(BAN::move(value), HASH()(value));
}
template<detail::HashSetFindable<T, HASH, COMP> U>
@@ -193,8 +165,7 @@ namespace BAN
{
auto& bucket = *it.m_bucket;
bucket.element()->~T();
bucket.used = false;
bucket.removed = true;
bucket.state = Bucket::REMOVED;
m_size--;
m_removed++;
return iterator(&bucket);
@@ -218,11 +189,8 @@ namespace BAN
return;
for (size_type i = 0; i < m_capacity; i++)
{
auto& bucket = m_buckets[i];
if (bucket.used)
bucket.element()->~T();
}
if (m_buckets[i].state == Bucket::USED)
m_buckets[i].element()->~T();
BAN::deallocator(m_buckets);
m_buckets = nullptr;
@@ -281,9 +249,9 @@ namespace BAN
for (size_type i = 0; i < old_capacity; i++)
{
auto& old_bucket = old_buckets[i];
if (!old_bucket.used)
if (old_bucket.state != Bucket::USED)
continue;
MUST(insert(BAN::move(*old_bucket.element())));
insert_impl(BAN::move(*old_bucket.element()), old_bucket.hash);
old_bucket.element()->~T();
}
@@ -305,15 +273,66 @@ namespace BAN
for (auto hash = orig_hash;; hash = get_next_hash_in_chain(hash, orig_hash), first = false)
{
auto& bucket = m_buckets[hash & (m_capacity - 1)];
if (bucket.used && bucket.hash == orig_hash && COMP()(*bucket.element(), value))
if (bucket.state == Bucket::USED && bucket.hash == orig_hash && COMP()(*bucket.element(), value))
return const_iterator(&bucket);
if (!bucket.used && !bucket.removed)
if (bucket.state == Bucket::UNUSED)
return end();
if (!first && bucket.chain_start)
return end();
}
}
iterator insert_impl(T&& value, hash_t orig_hash)
{
ASSERT(!should_rehash_with_size(m_size + 1));
Bucket* target = nullptr;
bool first = true;
for (auto hash = orig_hash;; hash = get_next_hash_in_chain(hash, orig_hash), first = false)
{
auto& bucket = m_buckets[hash & (m_capacity - 1)];
if (!first)
bucket.chain_start = false;
if (bucket.state == Bucket::USED)
{
if (bucket.hash != orig_hash || !COMP()(*bucket.element(), value))
continue;
target = &bucket;
break;
}
if (target == nullptr)
target = &bucket;
if (bucket.state == Bucket::UNUSED)
break;
}
switch (target->state)
{
case Bucket::USED:
target->element()->~T();
break;
case Bucket::REMOVED:
m_removed--;
[[fallthrough]];
case Bucket::UNUSED:
m_size++;
break;
}
target->chain_start = first && target->state == Bucket::UNUSED;
target->hash = orig_hash;
target->state = Bucket::USED;
new (target->element()) T(BAN::move(value));
return iterator(target);
}
bool should_rehash_with_size(size_type size) const
{
if (m_capacity < 16)

View File

@@ -16,6 +16,8 @@ extern uint8_t g_kernel_writable_end[];
extern uint8_t g_userspace_start[];
extern uint8_t g_userspace_end[];
extern uint64_t g_boot_fast_page_pt[];
namespace Kernel
{
@@ -24,7 +26,7 @@ namespace Kernel
constexpr uint64_t s_page_flag_mask = 0x8000000000000FFF;
constexpr uint64_t s_page_addr_mask = ~s_page_flag_mask;
static bool s_is_post_heap_done = false;
static bool s_is_initialized = false;
static PageTable* s_kernel = nullptr;
static bool s_has_nxe = false;
@@ -33,6 +35,28 @@ namespace Kernel
static paddr_t s_global_pdpte = 0;
static uint64_t* s_fast_page_pt { nullptr };
static uint64_t* allocate_zeroed_page_aligned_page()
{
void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true);
ASSERT(page);
memset(page, 0, PAGE_SIZE);
return (uint64_t*)page;
}
template<typename T>
static paddr_t V2P(const T vaddr)
{
return (vaddr_t)vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr;
}
template<typename T>
static uint64_t* P2V(const T paddr)
{
return reinterpret_cast<uint64_t*>(reinterpret_cast<paddr_t>(paddr) - g_boot_info.kernel_paddr + KERNEL_OFFSET);
}
static inline PageTable::flags_t parse_flags(uint64_t entry)
{
using Flags = PageTable::Flags;
@@ -51,31 +75,22 @@ namespace Kernel
return result;
}
void PageTable::initialize_pre_heap()
void PageTable::initialize_fast_page()
{
s_fast_page_pt = g_boot_fast_page_pt;
}
static void detect_cpu_features()
{
if (CPUID::has_nxe())
s_has_nxe = true;
if (CPUID::has_pge())
s_has_pge = true;
if (CPUID::has_pat())
s_has_pat = true;
ASSERT(s_kernel == nullptr);
s_kernel = new PageTable();
ASSERT(s_kernel);
s_kernel->initialize_kernel();
s_kernel->initial_load();
}
void PageTable::initialize_post_heap()
{
s_is_post_heap_done = true;
}
void PageTable::initial_load()
void PageTable::enable_cpu_features()
{
if (s_has_nxe)
{
@@ -116,8 +131,56 @@ namespace Kernel
"movl %%eax, %%cr0;"
::: "rax"
);
}
load();
void PageTable::initialize_and_load()
{
detect_cpu_features();
enable_cpu_features();
ASSERT(s_kernel == nullptr);
s_kernel = new PageTable();
ASSERT(s_kernel);
auto* pdpt = allocate_zeroed_page_aligned_page();
ASSERT(pdpt);
s_kernel->m_highest_paging_struct = V2P(pdpt);
s_kernel->map_kernel_memory();
PageTable::with_fast_page(s_kernel->m_highest_paging_struct, [] {
s_global_pdpte = PageTable::fast_page_as_sized<paddr_t>(3);
});
// update fast page pt
{
constexpr vaddr_t vaddr = fast_page();
constexpr uint16_t pdpte = (vaddr >> 30) & 0x1FF;
constexpr uint16_t pde = (vaddr >> 21) & 0x1FF;
const auto get_or_allocate_entry =
[](paddr_t table_paddr, uint16_t entry, uint64_t flags)
{
uint64_t* table = P2V(table_paddr);
if (!(table[entry] & Flags::Present))
{
auto* vaddr = allocate_zeroed_page_aligned_page();
ASSERT(vaddr);
table[entry] = V2P(vaddr);
}
table[entry] |= flags;
return table[entry] & s_page_addr_mask;
};
const paddr_t pdpt = s_kernel->m_highest_paging_struct;
const paddr_t pd = get_or_allocate_entry(pdpt, pdpte, Flags::Present);
s_fast_page_pt = P2V(get_or_allocate_entry(pd, pde, Flags::ReadWrite | Flags::Present));
}
s_kernel->load();
}
PageTable& PageTable::kernel()
@@ -131,40 +194,12 @@ namespace Kernel
return true;
}
static uint64_t* allocate_zeroed_page_aligned_page()
void PageTable::map_kernel_memory()
{
void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true);
ASSERT(page);
memset(page, 0, PAGE_SIZE);
return (uint64_t*)page;
}
template<typename T>
static paddr_t V2P(const T vaddr)
{
return (vaddr_t)vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr;
}
template<typename T>
static uint64_t* P2V(const T paddr)
{
return reinterpret_cast<uint64_t*>(reinterpret_cast<paddr_t>(paddr) - g_boot_info.kernel_paddr + KERNEL_OFFSET);
}
void PageTable::initialize_kernel()
{
ASSERT(s_global_pdpte == 0);
s_global_pdpte = V2P(allocate_zeroed_page_aligned_page());
map_kernel_memory();
prepare_fast_page();
// Map (phys_kernel_start -> phys_kernel_end) to (virt_kernel_start -> virt_kernel_end)
ASSERT((vaddr_t)g_kernel_start % PAGE_SIZE == 0);
map_range_at(
V2P(g_kernel_start),
(vaddr_t)g_kernel_start,
reinterpret_cast<vaddr_t>(g_kernel_start),
g_kernel_end - g_kernel_start,
Flags::Present
);
@@ -172,7 +207,7 @@ namespace Kernel
// Map executable kernel memory as executable
map_range_at(
V2P(g_kernel_execute_start),
(vaddr_t)g_kernel_execute_start,
reinterpret_cast<vaddr_t>(g_kernel_execute_start),
g_kernel_execute_end - g_kernel_execute_start,
Flags::Execute | Flags::Present
);
@@ -180,7 +215,7 @@ namespace Kernel
// Map writable kernel memory as writable
map_range_at(
V2P(g_kernel_writable_start),
(vaddr_t)g_kernel_writable_start,
reinterpret_cast<vaddr_t>(g_kernel_writable_start),
g_kernel_writable_end - g_kernel_writable_start,
Flags::ReadWrite | Flags::Present
);
@@ -188,70 +223,34 @@ namespace Kernel
// Map userspace memory
map_range_at(
V2P(g_userspace_start),
(vaddr_t)g_userspace_start,
reinterpret_cast<vaddr_t>(g_userspace_start),
g_userspace_end - g_userspace_start,
Flags::Execute | Flags::UserSupervisor | Flags::Present
);
}
void PageTable::prepare_fast_page()
{
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
const uint64_t* pdpt = P2V(m_highest_paging_struct);
ASSERT(pdpt[pdpte] & Flags::Present);
uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
ASSERT(!(pd[pde] & Flags::Present));
pd[pde] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present;
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(pt[pte] == 0);
pt[pte] = Flags::Reserved;
}
void PageTable::map_fast_page(paddr_t paddr)
{
ASSERT(s_kernel);
ASSERT(paddr);
ASSERT(paddr % PAGE_SIZE == 0);
ASSERT(paddr && paddr % PAGE_SIZE == 0);
ASSERT(s_fast_page_pt);
ASSERT(s_fast_page_lock.current_processor_has_lock());
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
ASSERT(!(*s_fast_page_pt & Flags::Present));
s_fast_page_pt[0] = paddr | Flags::ReadWrite | Flags::Present;
uint64_t* pdpt = P2V(s_kernel->m_highest_paging_struct);
uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(!(pt[pte] & Flags::Present));
pt[pte] = paddr | Flags::ReadWrite | Flags::Present;
asm volatile("invlpg (%0)" :: "r"(fast_page()) : "memory");
asm volatile("invlpg (%0)" :: "r"(fast_page()));
}
void PageTable::unmap_fast_page()
{
ASSERT(s_kernel);
ASSERT(s_fast_page_pt);
ASSERT(s_fast_page_lock.current_processor_has_lock());
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
ASSERT((*s_fast_page_pt & Flags::Present));
s_fast_page_pt[0] = 0;
uint64_t* pdpt = P2V(s_kernel->m_highest_paging_struct);
uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(pt[pte] & Flags::Present);
pt[pte] = Flags::Reserved;
asm volatile("invlpg (%0)" :: "r"(fast_page()) : "memory");
asm volatile("invlpg (%0)" :: "r"(fast_page()));
}
BAN::ErrorOr<PageTable*> PageTable::create_userspace()
@@ -260,25 +259,23 @@ namespace Kernel
PageTable* page_table = new PageTable;
if (page_table == nullptr)
return BAN::Error::from_errno(ENOMEM);
page_table->map_kernel_memory();
return page_table;
uint64_t* pdpt = allocate_zeroed_page_aligned_page();
if (pdpt == nullptr)
{
delete page_table;
return BAN::Error::from_errno(ENOMEM);
}
void PageTable::map_kernel_memory()
{
ASSERT(s_kernel);
ASSERT(s_global_pdpte);
page_table->m_highest_paging_struct = V2P(pdpt);
ASSERT(m_highest_paging_struct == 0);
m_highest_paging_struct = V2P(kmalloc(32, 32, true));
ASSERT(m_highest_paging_struct);
uint64_t* pdpt = P2V(m_highest_paging_struct);
pdpt[0] = 0;
pdpt[1] = 0;
pdpt[2] = 0;
pdpt[3] = s_global_pdpte | Flags::Present;
static_assert(KERNEL_OFFSET == 0xC0000000);
return page_table;
}
PageTable::~PageTable()
@@ -318,7 +315,7 @@ namespace Kernel
const bool is_userspace = (vaddr < KERNEL_OFFSET);
if (is_userspace && this != &PageTable::current())
;
else if (pages <= 32 || !s_is_post_heap_done)
else if (pages <= 32 || !s_is_initialized)
{
for (size_t i = 0; i < pages; i++, vaddr += PAGE_SIZE)
asm volatile("invlpg (%0)" :: "r"(vaddr));

View File

@@ -98,8 +98,7 @@ bananboot_end:
boot_pdpt:
.long V2P(boot_pd) + (PG_PRESENT)
.long 0
.quad 0
.quad 0
.skip 2 * 8
.long V2P(boot_pd) + (PG_PRESENT)
.long 0
.align 4096
@@ -112,13 +111,16 @@ boot_pd:
.endr
boot_pts:
.set i, 0
.rept 512
.rept 511
.rept 512
.long i + (PG_READ_WRITE | PG_PRESENT)
.long 0
.set i, i + 0x1000
.endr
.endr
.global g_boot_fast_page_pt
g_boot_fast_page_pt:
.skip 512 * 8
boot_gdt:
.quad 0x0000000000000000 # null descriptor
@@ -274,7 +276,7 @@ system_halt:
jmp 1b
#define AP_V2P(vaddr) ((vaddr) - ap_trampoline + 0xF000)
#define AP_REL(vaddr) ((vaddr) - ap_trampoline + 0xF000)
.section .ap_init, "ax"
@@ -284,21 +286,27 @@ ap_trampoline:
jmp 1f
.align 8
ap_stack_ptr:
ap_stack_paddr:
.skip 4
ap_stack_vaddr:
.skip 4
ap_prepare_paging:
.skip 4
ap_page_table:
.skip 4
ap_ready:
.skip 4
ap_stack_loaded:
.skip 1
1: cli; cld
ljmpl $0x00, $AP_V2P(ap_cs_clear)
ljmpl $0x00, $AP_REL(ap_cs_clear)
ap_cs_clear:
# load ap gdt and enter protected mode
lgdt AP_V2P(ap_gdtr)
lgdt AP_REL(ap_gdtr)
movl %cr0, %eax
orb $1, %al
movl %eax, %cr0
ljmpl $0x08, $AP_V2P(ap_protected_mode)
ljmpl $0x08, $AP_REL(ap_protected_mode)
.code32
ap_protected_mode:
@@ -307,8 +315,7 @@ ap_protected_mode:
movw %ax, %ss
movw %ax, %es
movl AP_V2P(ap_stack_ptr), %esp
movb $1, AP_V2P(ap_stack_loaded)
movl AP_REL(ap_stack_paddr), %esp
leal V2P(enable_sse), %ecx; call *%ecx
leal V2P(enable_tsc), %ecx; call *%ecx
@@ -316,24 +323,28 @@ ap_protected_mode:
# load boot gdt and enter long mode
lgdt V2P(boot_gdtr)
ljmpl $0x08, $AP_V2P(ap_flush_gdt)
ljmpl $0x08, $AP_REL(ap_flush_gdt)
ap_flush_gdt:
# move stack pointer to higher half
movl %esp, %esp
addl $KERNEL_OFFSET, %esp
# jump to higher half
leal ap_higher_half, %ecx
movl $ap_higher_half, %ecx
jmp *%ecx
ap_higher_half:
movl AP_REL(ap_prepare_paging), %eax
call *%eax
# load AP's initial values
movl AP_REL(ap_stack_vaddr), %esp
movl AP_REL(ap_page_table), %eax
movl $1, AP_REL(ap_ready)
movl %eax, %cr3
# clear rbp for stacktrace
xorl %ebp, %ebp
1: pause
cmpb $0, g_ap_startup_done
jz 1b
je 1b
lock incb g_ap_running_count

View File

@@ -15,17 +15,17 @@ SECTIONS
*(.bananboot)
*(.text.*)
}
.ap_init ALIGN(4K) : AT(ADDR(.ap_init) - KERNEL_OFFSET)
{
g_ap_init_addr = .;
*(.ap_init)
g_kernel_execute_end = .;
}
.userspace ALIGN(4K) : AT(ADDR(.userspace) - KERNEL_OFFSET)
{
g_userspace_start = .;
*(.userspace)
g_userspace_end = .;
g_kernel_execute_end = .;
}
.ap_init ALIGN(4K) : AT(ADDR(.ap_init) - KERNEL_OFFSET)
{
g_ap_init_addr = .;
*(.ap_init)
}
.rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET)
{

View File

@@ -2,7 +2,6 @@
#include <kernel/CPUID.h>
#include <kernel/Lock/SpinLock.h>
#include <kernel/Memory/Heap.h>
#include <kernel/Memory/kmalloc.h>
#include <kernel/Memory/PageTable.h>
extern uint8_t g_kernel_start[];
@@ -17,13 +16,15 @@ extern uint8_t g_kernel_writable_end[];
extern uint8_t g_userspace_start[];
extern uint8_t g_userspace_end[];
extern uint64_t g_boot_fast_page_pt[];
namespace Kernel
{
SpinLock PageTable::s_fast_page_lock;
static constexpr vaddr_t s_hhdm_offset = 0xFFFF800000000000;
static bool s_is_post_heap_done = false;
static bool s_is_initialized = false;
constexpr uint64_t s_page_flag_mask = 0x8000000000000FFF;
constexpr uint64_t s_page_addr_mask = ~s_page_flag_mask;
@@ -35,6 +36,8 @@ namespace Kernel
static paddr_t s_global_pml4_entries[512] { 0 };
static uint64_t* s_fast_page_pt { nullptr };
static constexpr inline bool is_canonical(uintptr_t addr)
{
constexpr uintptr_t mask = 0xFFFF800000000000;
@@ -54,34 +57,6 @@ namespace Kernel
return addr;
}
struct FuncsKmalloc
{
static paddr_t allocate_zeroed_page_aligned_page()
{
void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true);
ASSERT(page);
memset(page, 0, PAGE_SIZE);
return kmalloc_paddr_of(reinterpret_cast<vaddr_t>(page)).value();
}
static void unallocate_page(paddr_t paddr)
{
kfree(reinterpret_cast<void*>(kmalloc_vaddr_of(paddr).value()));
}
static paddr_t V2P(vaddr_t vaddr)
{
return vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr;
}
static uint64_t* P2V(paddr_t paddr)
{
return reinterpret_cast<uint64_t*>(paddr - g_boot_info.kernel_paddr + KERNEL_OFFSET);
}
};
struct FuncsHHDM
{
static paddr_t allocate_zeroed_page_aligned_page()
{
const paddr_t paddr = Heap::get().take_free_page();
@@ -95,27 +70,14 @@ namespace Kernel
Heap::get().release_page(paddr);
}
static paddr_t V2P(vaddr_t vaddr)
{
ASSERT(vaddr >= s_hhdm_offset);
ASSERT(vaddr < KERNEL_OFFSET);
return vaddr - s_hhdm_offset;
}
static uint64_t* P2V(paddr_t paddr)
{
ASSERT(paddr != 0);
ASSERT(!BAN::Math::will_addition_overflow(paddr, s_hhdm_offset));
return reinterpret_cast<uint64_t*>(paddr + s_hhdm_offset);
}
};
static paddr_t (*allocate_zeroed_page_aligned_page)() = &FuncsKmalloc::allocate_zeroed_page_aligned_page;
static void (*unallocate_page)(paddr_t) = &FuncsKmalloc::unallocate_page;
static paddr_t (*V2P)(vaddr_t) = &FuncsKmalloc::V2P;
static uint64_t* (*P2V)(paddr_t) = &FuncsKmalloc::P2V;
static inline PageTable::flags_t parse_flags(uint64_t entry)
static PageTable::flags_t parse_flags(uint64_t entry)
{
using Flags = PageTable::Flags;
@@ -137,7 +99,7 @@ namespace Kernel
// 0: 4 KiB
// 1: 2 MiB
// 2: 1 GiB
static void init_map_hhdm_page(paddr_t pml4, paddr_t paddr, uint8_t page_size)
static void map_hhdm_page(paddr_t pml4, paddr_t paddr, uint8_t page_size)
{
ASSERT(0 <= page_size && page_size <= 2);
@@ -184,7 +146,7 @@ namespace Kernel
const uint64_t noexec_flag = s_has_nxe ? (static_cast<uint64_t>(1) << 63) : 0;
const paddr_t pdpt = get_or_allocate_entry(pml4, pml4e, noexec_flag);
s_global_pml4_entries[pml4e] = pdpt | hhdm_flags;
s_global_pml4_entries[pml4e] = pdpt | hhdm_flags | noexec_flag;
paddr_t lowest_paddr = pdpt;
uint16_t lowest_entry = pdpte;
@@ -207,23 +169,11 @@ namespace Kernel
});
}
static void init_map_hhdm(paddr_t pml4)
static void initialize_hhdm(paddr_t pml4)
{
for (const auto& entry : g_boot_info.memory_map_entries)
{
bool should_map = false;
switch (entry.type)
{
case MemoryMapEntry::Type::Available:
should_map = true;
break;
case MemoryMapEntry::Type::ACPIReclaim:
case MemoryMapEntry::Type::ACPINVS:
case MemoryMapEntry::Type::Reserved:
should_map = false;
break;
}
if (!should_map)
if (entry.type != MemoryMapEntry::Type::Available)
continue;
constexpr size_t one_gib = 1024 * 1024 * 1024;
@@ -235,156 +185,39 @@ namespace Kernel
{
if (s_has_gib && paddr % one_gib == 0 && paddr + one_gib <= entry_end)
{
init_map_hhdm_page(pml4, paddr, 2);
map_hhdm_page(pml4, paddr, 2);
paddr += one_gib;
}
else if (paddr % two_mib == 0 && paddr + two_mib <= entry_end)
{
init_map_hhdm_page(pml4, paddr, 1);
map_hhdm_page(pml4, paddr, 1);
paddr += two_mib;
}
else
{
init_map_hhdm_page(pml4, paddr, 0);
map_hhdm_page(pml4, paddr, 0);
paddr += PAGE_SIZE;
}
}
}
}
static paddr_t copy_page_from_kmalloc_to_heap(paddr_t kmalloc_paddr)
void PageTable::initialize_fast_page()
{
const paddr_t heap_paddr = Heap::get().take_free_page();
ASSERT(heap_paddr);
const vaddr_t kmalloc_vaddr = kmalloc_vaddr_of(kmalloc_paddr).value();
PageTable::with_fast_page(heap_paddr, [kmalloc_vaddr] {
memcpy(PageTable::fast_page_as_ptr(), reinterpret_cast<void*>(kmalloc_vaddr), PAGE_SIZE);
});
return heap_paddr;
s_fast_page_pt = g_boot_fast_page_pt;
}
static void copy_paging_structure_to_heap(uint64_t* old_table, uint64_t* new_table, int depth)
{
if (depth == 0)
return;
constexpr uint64_t page_flag_mask = 0x8000000000000FFF;
constexpr uint64_t page_addr_mask = ~page_flag_mask;
for (uint16_t index = 0; index < 512; index++)
{
const uint64_t old_entry = old_table[index];
if (old_entry == 0)
{
new_table[index] = 0;
continue;
}
const paddr_t old_paddr = old_entry & page_addr_mask;
const paddr_t new_paddr = copy_page_from_kmalloc_to_heap(old_paddr);
new_table[index] = new_paddr | (old_entry & page_flag_mask);
uint64_t* next_old_table = reinterpret_cast<uint64_t*>(old_paddr + s_hhdm_offset);
uint64_t* next_new_table = reinterpret_cast<uint64_t*>(new_paddr + s_hhdm_offset);
copy_paging_structure_to_heap(next_old_table, next_new_table, depth - 1);
}
}
static void free_kmalloc_paging_structure(uint64_t* table, int depth)
{
if (depth == 0)
return;
constexpr uint64_t page_flag_mask = 0x8000000000000FFF;
constexpr uint64_t page_addr_mask = ~page_flag_mask;
for (uint16_t index = 0; index < 512; index++)
{
const uint64_t entry = table[index];
if (entry == 0)
continue;
const paddr_t paddr = entry & page_addr_mask;
uint64_t* next_table = reinterpret_cast<uint64_t*>(paddr + s_hhdm_offset);
free_kmalloc_paging_structure(next_table, depth - 1);
kfree(reinterpret_cast<void*>(kmalloc_vaddr_of(paddr).value()));
}
}
void PageTable::initialize_pre_heap()
static void detect_cpu_features()
{
if (CPUID::has_nxe())
s_has_nxe = true;
if (CPUID::has_pge())
s_has_pge = true;
if (CPUID::has_1gib_pages())
s_has_gib = true;
ASSERT(s_kernel == nullptr);
s_kernel = new PageTable();
ASSERT(s_kernel);
s_kernel->m_highest_paging_struct = allocate_zeroed_page_aligned_page();
s_kernel->prepare_fast_page();
s_kernel->initialize_kernel();
for (auto pml4e : s_global_pml4_entries)
ASSERT(pml4e == 0);
const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct);
s_global_pml4_entries[511] = pml4[511];
}
void PageTable::initialize_post_heap()
{
ASSERT(s_kernel);
init_map_hhdm(s_kernel->m_highest_paging_struct);
const paddr_t old_pml4_paddr = s_kernel->m_highest_paging_struct;
const paddr_t new_pml4_paddr = copy_page_from_kmalloc_to_heap(old_pml4_paddr);
uint64_t* old_pml4 = reinterpret_cast<uint64_t*>(kmalloc_vaddr_of(old_pml4_paddr).value());
uint64_t* new_pml4 = reinterpret_cast<uint64_t*>(new_pml4_paddr + s_hhdm_offset);
const paddr_t old_pdpt_paddr = old_pml4[511] & s_page_addr_mask;
const paddr_t new_pdpt_paddr = Heap::get().take_free_page();
ASSERT(new_pdpt_paddr);
uint64_t* old_pdpt = reinterpret_cast<uint64_t*>(old_pdpt_paddr + s_hhdm_offset);
uint64_t* new_pdpt = reinterpret_cast<uint64_t*>(new_pdpt_paddr + s_hhdm_offset);
copy_paging_structure_to_heap(old_pdpt, new_pdpt, 2);
new_pml4[511] = new_pdpt_paddr | (old_pml4[511] & s_page_flag_mask);
s_global_pml4_entries[511] = new_pml4[511];
s_kernel->m_highest_paging_struct = new_pml4_paddr;
s_kernel->load();
free_kmalloc_paging_structure(old_pdpt, 2);
kfree(reinterpret_cast<void*>(kmalloc_vaddr_of(old_pdpt_paddr).value()));
kfree(reinterpret_cast<void*>(kmalloc_vaddr_of(old_pml4_paddr).value()));
allocate_zeroed_page_aligned_page = &FuncsHHDM::allocate_zeroed_page_aligned_page;
unallocate_page = &FuncsHHDM::unallocate_page;
V2P = &FuncsHHDM::V2P;
P2V = &FuncsHHDM::P2V;
s_is_post_heap_done = true;
// This is a hack to unmap fast page. fast page pt is copied
// while it is mapped, so we need to manually unmap it
SpinLockGuard _(s_fast_page_lock);
unmap_fast_page();
}
void PageTable::initial_load()
void PageTable::enable_cpu_features()
{
if (s_has_nxe)
{
@@ -423,8 +256,63 @@ namespace Kernel
"movq %%rax, %%cr0;"
::: "rax"
);
}
load();
void PageTable::initialize_and_load()
{
detect_cpu_features();
enable_cpu_features();
const paddr_t boot_pml4_paddr = ({
paddr_t paddr;
asm volatile("movq %%cr3, %0" : "=r"(paddr));
paddr;
});
initialize_hhdm(boot_pml4_paddr);
ASSERT(s_kernel == nullptr);
s_kernel = new PageTable();
ASSERT(s_kernel != nullptr);
s_kernel->m_highest_paging_struct = allocate_zeroed_page_aligned_page();
ASSERT(s_kernel->m_highest_paging_struct);
uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct);
memcpy(pml4, s_global_pml4_entries, sizeof(s_global_pml4_entries));
s_kernel->map_kernel_memory();
s_global_pml4_entries[511] = pml4[511];
// update fast page pt
{
constexpr vaddr_t uc_vaddr = uncanonicalize(fast_page());
constexpr uint16_t pml4e = (uc_vaddr >> 39) & 0x1FF;
constexpr uint16_t pdpte = (uc_vaddr >> 30) & 0x1FF;
constexpr uint16_t pde = (uc_vaddr >> 21) & 0x1FF;
const auto get_or_allocate_entry =
[](paddr_t table_paddr, uint16_t entry, uint64_t flags)
{
uint64_t* table = P2V(table_paddr);
if (!(table[entry] & Flags::Present))
{
table[entry] = allocate_zeroed_page_aligned_page();
ASSERT(table[entry]);
}
table[entry] |= flags;
return table[entry] & s_page_addr_mask;
};
const paddr_t pml4 = s_kernel->m_highest_paging_struct;
const paddr_t pdpt = get_or_allocate_entry(pml4, pml4e, Flags::ReadWrite | Flags::Present);
const paddr_t pd = get_or_allocate_entry(pdpt, pdpte, Flags::ReadWrite | Flags::Present);
s_fast_page_pt = P2V(get_or_allocate_entry(pd, pde, Flags::ReadWrite | Flags::Present));
}
s_kernel->load();
}
PageTable& PageTable::kernel()
@@ -440,12 +328,12 @@ namespace Kernel
return true;
}
void PageTable::initialize_kernel()
void PageTable::map_kernel_memory()
{
// Map (phys_kernel_start -> phys_kernel_end) to (virt_kernel_start -> virt_kernel_end)
const vaddr_t kernel_start = reinterpret_cast<vaddr_t>(g_kernel_start);
map_range_at(
V2P(kernel_start),
kernel_start - KERNEL_OFFSET + g_boot_info.kernel_paddr,
kernel_start,
g_kernel_end - g_kernel_start,
Flags::Present
@@ -454,7 +342,7 @@ namespace Kernel
// Map executable kernel memory as executable
const vaddr_t kernel_execute_start = reinterpret_cast<vaddr_t>(g_kernel_execute_start);
map_range_at(
V2P(kernel_execute_start),
kernel_execute_start - KERNEL_OFFSET + g_boot_info.kernel_paddr,
kernel_execute_start,
g_kernel_execute_end - g_kernel_execute_start,
Flags::Execute | Flags::Present
@@ -463,7 +351,7 @@ namespace Kernel
// Map writable kernel memory as writable
const vaddr_t kernel_writable_start = reinterpret_cast<vaddr_t>(g_kernel_writable_start);
map_range_at(
V2P(kernel_writable_start),
kernel_writable_start - KERNEL_OFFSET + g_boot_info.kernel_paddr,
kernel_writable_start,
g_kernel_writable_end - g_kernel_writable_start,
Flags::ReadWrite | Flags::Present
@@ -472,114 +360,58 @@ namespace Kernel
// Map userspace memory
const vaddr_t userspace_start = reinterpret_cast<vaddr_t>(g_userspace_start);
map_range_at(
V2P(userspace_start),
userspace_start - KERNEL_OFFSET + g_boot_info.kernel_paddr,
userspace_start,
g_userspace_end - g_userspace_start,
Flags::Execute | Flags::UserSupervisor | Flags::Present
);
}
void PageTable::prepare_fast_page()
{
constexpr vaddr_t uc_vaddr = uncanonicalize(fast_page());
constexpr uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF;
constexpr uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF;
constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF;
constexpr uint64_t pte = (uc_vaddr >> 12) & 0x1FF;
uint64_t* pml4 = P2V(m_highest_paging_struct);
ASSERT(!(pml4[pml4e] & Flags::Present));
pml4[pml4e] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present;
uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask);
ASSERT(!(pdpt[pdpte] & Flags::Present));
pdpt[pdpte] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present;
uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
ASSERT(!(pd[pde] & Flags::Present));
pd[pde] = allocate_zeroed_page_aligned_page() | Flags::ReadWrite | Flags::Present;
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(pt[pte] == 0);
pt[pte] = Flags::Reserved;
}
void PageTable::map_fast_page(paddr_t paddr)
{
ASSERT(s_kernel);
ASSERT(paddr);
ASSERT(paddr % PAGE_SIZE == 0);
ASSERT(paddr && paddr % PAGE_SIZE == 0);
ASSERT(s_fast_page_pt);
ASSERT(s_fast_page_lock.current_processor_has_lock());
constexpr vaddr_t uc_vaddr = uncanonicalize(fast_page());
constexpr uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF;
constexpr uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF;
constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF;
constexpr uint64_t pte = (uc_vaddr >> 12) & 0x1FF;
ASSERT(!(*s_fast_page_pt & Flags::Present));
s_fast_page_pt[0] = paddr | Flags::ReadWrite | Flags::Present;
const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct);
const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask);
const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(!(pt[pte] & Flags::Present));
pt[pte] = paddr | Flags::ReadWrite | Flags::Present;
asm volatile("invlpg (%0)" :: "r"(fast_page()) : "memory");
asm volatile("invlpg (%0)" :: "r"(fast_page()));
}
void PageTable::unmap_fast_page()
{
ASSERT(s_kernel);
ASSERT(s_fast_page_pt);
ASSERT(s_fast_page_lock.current_processor_has_lock());
constexpr vaddr_t uc_vaddr = uncanonicalize(fast_page());
constexpr uint64_t pml4e = (uc_vaddr >> 39) & 0x1FF;
constexpr uint64_t pdpte = (uc_vaddr >> 30) & 0x1FF;
constexpr uint64_t pde = (uc_vaddr >> 21) & 0x1FF;
constexpr uint64_t pte = (uc_vaddr >> 12) & 0x1FF;
ASSERT((*s_fast_page_pt & Flags::Present));
s_fast_page_pt[0] = 0;
const uint64_t* pml4 = P2V(s_kernel->m_highest_paging_struct);
const uint64_t* pdpt = P2V(pml4[pml4e] & s_page_addr_mask);
const uint64_t* pd = P2V(pdpt[pdpte] & s_page_addr_mask);
uint64_t* pt = P2V(pd[pde] & s_page_addr_mask);
ASSERT(pt[pte] & Flags::Present);
pt[pte] = Flags::Reserved;
asm volatile("invlpg (%0)" :: "r"(fast_page()) : "memory");
asm volatile("invlpg (%0)" :: "r"(fast_page()));
}
BAN::ErrorOr<PageTable*> PageTable::create_userspace()
{
SpinLockGuard _(s_kernel->m_lock);
PageTable* page_table = new PageTable;
if (page_table == nullptr)
return BAN::Error::from_errno(ENOMEM);
page_table->map_kernel_memory();
page_table->m_highest_paging_struct = allocate_zeroed_page_aligned_page();
if (page_table->m_highest_paging_struct == 0)
{
delete page_table;
return BAN::Error::from_errno(ENOMEM);
}
uint64_t* pml4 = P2V(page_table->m_highest_paging_struct);
memcpy(pml4, s_global_pml4_entries, sizeof(s_global_pml4_entries));
return page_table;
}
void PageTable::map_kernel_memory()
{
ASSERT(s_kernel);
ASSERT(s_global_pml4_entries[511]);
ASSERT(m_highest_paging_struct == 0);
m_highest_paging_struct = allocate_zeroed_page_aligned_page();
PageTable::with_fast_page(m_highest_paging_struct, [] {
for (size_t i = 0; i < 512; i++)
{
if (s_global_pml4_entries[i] == 0)
continue;
ASSERT(i >= 256);
PageTable::fast_page_as_sized<uint64_t>(i) = s_global_pml4_entries[i];
}
});
}
PageTable::~PageTable()
{
if (m_highest_paging_struct == 0)
@@ -624,7 +456,7 @@ namespace Kernel
const bool is_userspace = (vaddr < KERNEL_OFFSET);
if (is_userspace && this != &PageTable::current())
;
else if (pages <= 32 || !s_is_post_heap_done)
else if (pages <= 32 || !s_is_initialized)
{
for (size_t i = 0; i < pages; i++, vaddr += PAGE_SIZE)
asm volatile("invlpg (%0)" :: "r"(vaddr));

View File

@@ -97,27 +97,25 @@ bananboot_end:
.align 4096
boot_pml4:
.quad V2P(boot_pdpt_lo) + (PG_READ_WRITE | PG_PRESENT)
.rept 510
.quad 0
.endr
.skip 510 * 8
.quad V2P(boot_pdpt_hi) + (PG_READ_WRITE | PG_PRESENT)
boot_pdpt_lo:
.quad V2P(boot_pd) + (PG_READ_WRITE | PG_PRESENT)
.rept 511
.quad 0
.endr
.skip 511 * 8
boot_pdpt_hi:
.rept 510
.quad 0
.endr
.skip 510 * 8
.quad V2P(boot_pd) + (PG_READ_WRITE | PG_PRESENT)
.quad 0
.skip 8
boot_pd:
.set i, 0
.rept 512
.rept 511
.quad i + (PG_PAGE_SIZE | PG_READ_WRITE | PG_PRESENT)
.set i, i + 0x200000
.endr
.quad V2P(g_boot_fast_page_pt) + (PG_READ_WRITE | PG_PRESENT)
.global g_boot_fast_page_pt
g_boot_fast_page_pt:
.skip 512 * 8
boot_gdt:
.quad 0x0000000000000000 # null descriptor
@@ -273,7 +271,7 @@ system_halt:
jmp 1b
#define AP_V2P(vaddr) ((vaddr) - ap_trampoline + 0xF000)
#define AP_REL(vaddr) ((vaddr) - ap_trampoline + 0xF000)
.section .ap_init, "ax"
@@ -283,21 +281,27 @@ ap_trampoline:
jmp 1f
.align 8
ap_stack_ptr:
.skip 4
ap_stack_loaded:
.skip 1
ap_stack_paddr:
.skip 8
ap_stack_vaddr:
.skip 8
ap_prepare_paging:
.skip 8
ap_page_table:
.skip 8
ap_ready:
.skip 8
1: cli; cld
ljmpl $0x00, $AP_V2P(ap_cs_clear)
ljmpl $0x00, $AP_REL(ap_cs_clear)
ap_cs_clear:
# load ap gdt and enter protected mode
lgdt AP_V2P(ap_gdtr)
lgdt AP_REL(ap_gdtr)
movl %cr0, %eax
orb $1, %al
movl %eax, %cr0
ljmpl $0x08, $AP_V2P(ap_protected_mode)
ljmpl $0x08, $AP_REL(ap_protected_mode)
.code32
ap_protected_mode:
@@ -306,8 +310,7 @@ ap_protected_mode:
movw %ax, %ss
movw %ax, %es
movl AP_V2P(ap_stack_ptr), %esp
movb $1, AP_V2P(ap_stack_loaded)
movl AP_REL(ap_stack_paddr), %esp
leal V2P(enable_sse), %ecx; call *%ecx
leal V2P(enable_tsc), %ecx; call *%ecx
@@ -315,28 +318,34 @@ ap_protected_mode:
# load boot gdt and enter long mode
lgdt V2P(boot_gdtr)
ljmpl $0x08, $AP_V2P(ap_long_mode)
ljmpl $0x08, $AP_REL(ap_long_mode)
.code64
ap_long_mode:
# move stack pointer to higher half
movl %esp, %esp
addq $KERNEL_OFFSET, %rsp
movq $ap_higher_half, %rax
jmp *%rax
ap_higher_half:
movq AP_REL(ap_prepare_paging), %rax
call *%rax
# load AP's initial values
movq AP_REL(ap_stack_vaddr), %rsp
movq AP_REL(ap_page_table), %rax
movq $1, AP_REL(ap_ready)
movq %rax, %cr3
# clear rbp for stacktrace
xorq %rbp, %rbp
xorb %al, %al
1: pause
cmpb %al, g_ap_startup_done
jz 1b
cmpb $0, g_ap_startup_done
je 1b
lock incb g_ap_running_count
# jump to ap_main in higher half
movabsq $ap_main, %rcx
call *%rcx
jmp V2P(system_halt)
call ap_main
jmp system_halt
ap_gdt:
.quad 0x0000000000000000 # null descriptor

View File

@@ -15,17 +15,17 @@ SECTIONS
*(.bananboot)
*(.text.*)
}
.ap_init ALIGN(4K) : AT(ADDR(.ap_init) - KERNEL_OFFSET)
{
g_ap_init_addr = .;
*(.ap_init)
g_kernel_execute_end = .;
}
.userspace ALIGN(4K) : AT(ADDR(.userspace) - KERNEL_OFFSET)
{
g_userspace_start = .;
*(.userspace)
g_userspace_end = .;
g_kernel_execute_end = .;
}
.ap_init ALIGN(4K) : AT(ADDR(.ap_init) - KERNEL_OFFSET)
{
g_ap_init_addr = .;
*(.ap_init)
}
.rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET)
{

View File

@@ -12,7 +12,7 @@ namespace Kernel::API
struct SharedPage
{
uint8_t __sequence[0x100];
uint16_t gdt_cpu_offset;
uint32_t features;

View File

@@ -14,7 +14,7 @@
#define _kas_globbers
#endif
#define _kas_argument_var(index, value) register long _kas_a##index asm(_ban_stringify(_ban_get(index, _kas_arguments))) = (long)value;
#define _kas_argument_var(index, value) register long _kas_a##index asm(_ban_stringify(_ban_get(index, _kas_arguments))) = (long)(value);
#define _kas_dummy_var(index, value) register long _kas_d##index asm(#value);
#define _kas_input(index, _) "r"(_kas_a##index)
#define _kas_output(index, _) , "=r"(_kas_d##index)

View File

@@ -133,6 +133,12 @@ namespace Kernel
void set_gsbase(uintptr_t addr);
#endif
static uint16_t cpu_index_offset() { return m_cpu_index_offset; }
void set_cpu_index(uint8_t index)
{
write_entry(m_cpu_index_offset, 0, index, 0xF2, 0x4);
}
private:
GDT() = default;
@@ -151,11 +157,13 @@ namespace Kernel
private:
#if ARCH(x86_64)
BAN::Array<SegmentDescriptor, 8> m_gdt; // null, kernel code, kernel data, user code (32 bit), user data, user code (64 bit), tss low, tss high
static constexpr uint16_t m_tss_offset = 0x30;
BAN::Array<SegmentDescriptor, 9> m_gdt; // null, kernel code, kernel data, user code (32 bit), user data, user code (64 bit), cpu-index, tss low, tss high
static constexpr uint16_t m_cpu_index_offset = 0x30;
static constexpr uint16_t m_tss_offset = 0x38;
#elif ARCH(i686)
BAN::Array<SegmentDescriptor, 9> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, fsbase, gsbase, tss
static constexpr uint16_t m_tss_offset = 0x40;
BAN::Array<SegmentDescriptor, 10> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, fsbase, gsbase, cpu-index, tss
static constexpr uint16_t m_cpu_index_offset = 0x40;
static constexpr uint16_t m_tss_offset = 0x48;
#endif
TaskStateSegment m_tss;
const GDTR m_gdtr {

View File

@@ -51,7 +51,7 @@ namespace Kernel
uint8_t back() const
{
ASSERT(!empty());
return reinterpret_cast<const uint8_t*>(m_vaddr)[m_tail + m_size];
return reinterpret_cast<const uint8_t*>(m_vaddr)[m_tail + m_size - 1];
}
bool empty() const { return m_size == 0; }

View File

@@ -46,13 +46,22 @@ namespace Kernel
};
public:
static void initialize_pre_heap();
static void initialize_post_heap();
static void initialize_fast_page();
static void initialize_and_load();
static void enable_cpu_features();
static PageTable& kernel();
static PageTable& current() { return *reinterpret_cast<PageTable*>(Processor::get_current_page_table()); }
static constexpr vaddr_t fast_page() { return KERNEL_OFFSET; }
static constexpr vaddr_t fast_page()
{
#if ARCH(x86_64)
return 0xffffffffbfe00000;
#elif ARCH(i686)
return 0xffe00000;
#endif
}
template<with_fast_page_callback F>
static void with_fast_page(paddr_t paddr, F callback)
@@ -121,7 +130,6 @@ namespace Kernel
vaddr_t reserve_free_contiguous_pages(size_t page_count, vaddr_t first_address, vaddr_t last_address = UINTPTR_MAX);
void load();
void initial_load();
void invalidate_page(vaddr_t addr, bool send_smp_message) { invalidate_range(addr, 1, send_smp_message); }
void invalidate_range(vaddr_t addr, size_t pages, bool send_smp_message);
@@ -129,14 +137,14 @@ namespace Kernel
InterruptState lock() const { return m_lock.lock(); }
void unlock(InterruptState state) const { m_lock.unlock(state); }
paddr_t paddr() const { return m_highest_paging_struct; }
void debug_dump();
private:
PageTable() = default;
uint64_t get_page_data(vaddr_t) const;
void initialize_kernel();
void map_kernel_memory();
void prepare_fast_page();
static void map_fast_page(paddr_t);
static void unmap_fast_page();

View File

@@ -13,4 +13,3 @@ void* kmalloc(size_t size, size_t align, bool force_identity_map = false);
void kfree(void*);
BAN::Optional<Kernel::paddr_t> kmalloc_paddr_of(Kernel::vaddr_t);
BAN::Optional<Kernel::vaddr_t> kmalloc_vaddr_of(Kernel::paddr_t);

View File

@@ -2,6 +2,7 @@
#include <BAN/Atomic.h>
#include <BAN/ForwardList.h>
#include <BAN/Math.h>
#include <kernel/API/SharedPage.h>
#include <kernel/Arch.h>
@@ -58,6 +59,8 @@ namespace Kernel
static Processor& create(ProcessorID id);
static Processor& initialize();
void allocate_stack();
static ProcessorID current_id() { return read_gs_sized<ProcessorID>(offsetof(Processor, m_id)); }
static uint8_t current_index() { return read_gs_sized<uint8_t>(offsetof(Processor, m_index)); }
static ProcessorID id_from_index(size_t index);
@@ -100,11 +103,8 @@ namespace Kernel
handle_smp_messages();
}
static uintptr_t current_stack_bottom() { return read_gs_sized<uintptr_t>(offsetof(Processor, m_stack)); }
static uintptr_t current_stack_top() { return current_stack_bottom() + s_stack_size; }
uintptr_t stack_bottom() const { return reinterpret_cast<uintptr_t>(m_stack); }
uintptr_t stack_top() const { return stack_bottom() + s_stack_size; }
vaddr_t stack_top_vaddr() const { return m_stack_vaddr + s_stack_size; }
paddr_t stack_top_paddr() const { return m_stack_paddr + s_stack_size; }
static void set_thread_syscall_stack(vaddr_t vaddr) { write_gs_sized<vaddr_t>(offsetof(Processor, m_thread_syscall_stack), vaddr); }
@@ -140,11 +140,7 @@ namespace Kernel
static void disable_sse()
{
uintptr_t dummy;
#if ARCH(x86_64)
asm volatile("movq %%cr0, %0; orq $0x08, %0; movq %0, %%cr0" : "=r"(dummy));
#elif ARCH(i686)
asm volatile("movl %%cr0, %0; orl $0x08, %0; movl %0, %%cr0" : "=r"(dummy));
#endif
asm volatile("mov %%cr0, %0; or $0x08, %0; mov %0, %%cr0" : "=r"(dummy));
}
static void enable_sse()
@@ -169,35 +165,17 @@ namespace Kernel
}
template<typename T>
static T read_gs_sized(uintptr_t offset) requires(sizeof(T) <= 8)
static T read_gs_sized(uintptr_t offset) requires(sizeof(T) <= 8 && BAN::Math::is_power_of_two(sizeof(T)))
{
#define __ASM_INPUT(operation) asm volatile(operation " %%gs:%a[offset], %[result]" : [result]"=r"(result) : [offset]"ir"(offset))
T result;
if constexpr(sizeof(T) == 8)
__ASM_INPUT("movq");
if constexpr(sizeof(T) == 4)
__ASM_INPUT("movl");
if constexpr(sizeof(T) == 2)
__ASM_INPUT("movw");
if constexpr(sizeof(T) == 1)
__ASM_INPUT("movb");
return result;
#undef __ASM_INPUT
T value;
asm volatile("mov %%gs:%a[offset], %[value]" : [value]"=r"(value) : [offset]"ir"(offset));
return value;
}
template<typename T>
static void write_gs_sized(uintptr_t offset, T value) requires(sizeof(T) <= 8)
static void write_gs_sized(uintptr_t offset, T value) requires(sizeof(T) <= 8 && BAN::Math::is_power_of_two(sizeof(T)))
{
#define __ASM_INPUT(operation) asm volatile(operation " %[value], %%gs:%a[offset]" :: [value]"r"(value), [offset]"ir"(offset) : "memory")
if constexpr(sizeof(T) == 8)
__ASM_INPUT("movq");
if constexpr(sizeof(T) == 4)
__ASM_INPUT("movl");
if constexpr(sizeof(T) == 2)
__ASM_INPUT("movw");
if constexpr(sizeof(T) == 1)
__ASM_INPUT("movb");
#undef __ASM_INPUT
asm volatile("mov %[value], %%gs:%a[offset]" :: [value]"r"(value), [offset]"ir"(offset) : "memory");
}
private:
@@ -215,8 +193,9 @@ namespace Kernel
Thread* m_sse_thread { nullptr };
static constexpr size_t s_stack_size { 4096 };
void* m_stack { nullptr };
static constexpr size_t s_stack_size { PAGE_SIZE };
vaddr_t m_stack_vaddr { 0 };
paddr_t m_stack_paddr { 0 };
GDT* m_gdt { nullptr };
IDT* m_idt { nullptr };

View File

@@ -178,9 +178,7 @@ namespace Kernel
bool m_is_userspace { false };
bool m_delete_process { false };
bool m_has_custom_fsbase { false };
vaddr_t m_fsbase { 0 };
bool m_has_custom_gsbase { false };
vaddr_t m_gsbase { 0 };
SchedulerQueue::Node* m_scheduler_node { nullptr };

View File

@@ -293,9 +293,25 @@ namespace Kernel
dprintln("Trying to enable processor (lapic id {})", processor.apic_id);
auto& proc = Kernel::Processor::create(ProcessorID(processor.apic_id));
proc.allocate_stack();
struct ap_init_info_t
{
uintptr_t stack_paddr;
uintptr_t stack_vaddr;
uintptr_t prepare_paging;
uintptr_t page_table;
uintptr_t ready;
};
PageTable::with_fast_page(ap_init_paddr, [&] {
PageTable::fast_page_as_sized<uint32_t>(2) = kmalloc_paddr_of(proc.stack_top()).value();
PageTable::fast_page_as_sized<uint8_t>(13) = 0;
PageTable::fast_page_as<ap_init_info_t>(8) = {
.stack_paddr = static_cast<uintptr_t>(proc.stack_top_paddr()),
.stack_vaddr = proc.stack_top_vaddr(),
.prepare_paging = reinterpret_cast<uintptr_t>(&PageTable::enable_cpu_features),
.page_table = static_cast<uintptr_t>(PageTable::kernel().paddr()),
.ready = 0,
};
});
write_to_local_apic(LAPIC_ERROR_REG, 0x00);
@@ -334,12 +350,9 @@ namespace Kernel
// give processor upto 100 * 100 us + 200 us to boot
PageTable::with_fast_page(ap_init_paddr, [&] {
for (int i = 0; i < 100; i++)
{
if (__atomic_load_n(&PageTable::fast_page_as_sized<uint8_t>(13), __ATOMIC_SEQ_CST))
for (int i = 0; i < 100; i++, udelay(100))
if (__atomic_load_n(&PageTable::fast_page_as<ap_init_info_t>(8).ready, __ATOMIC_SEQ_CST))
break;
udelay(100);
}
});
initialized_aps++;

View File

@@ -69,7 +69,7 @@ namespace Debug
else for (size_t depth = 0; depth < 64; depth++)
{
BAN::Formatter::print(Debug::putchar, " {}\r\n", reinterpret_cast<void*>(frame.ip));
if (!safe_user_memcpy(&frame, frame.bp, sizeof(stackframe)))
if (frame.bp == nullptr || !safe_user_memcpy(&frame, frame.bp, sizeof(stackframe)))
break;
}

View File

@@ -32,6 +32,8 @@ namespace Kernel
gdt->write_entry(0x38, 0x00000000, 0x00000, 0xF2, 0xC); // gsbase
#endif
gdt->write_entry(m_cpu_index_offset, 0, 0, 0, 0);
gdt->write_tss();
return gdt;

View File

@@ -293,7 +293,7 @@ namespace Kernel
const char* process_name = (tid && Thread::current().has_process())
? Process::current().name()
: nullptr;
: "";
#if ARCH(x86_64)
dwarnln(

View File

@@ -436,12 +436,3 @@ BAN::Optional<Kernel::paddr_t> kmalloc_paddr_of(Kernel::vaddr_t vaddr)
return {};
return vaddr - KERNEL_OFFSET + g_boot_info.kernel_paddr;
}
BAN::Optional<Kernel::vaddr_t> kmalloc_vaddr_of(Kernel::paddr_t paddr)
{
using namespace Kernel;
const vaddr_t vaddr = paddr + KERNEL_OFFSET - g_boot_info.kernel_paddr;
if (!is_kmalloc_vaddr(vaddr))
return {};
return vaddr;
}

View File

@@ -18,16 +18,20 @@ namespace Kernel
const uint16_t* buffer_u16 = reinterpret_cast<const uint16_t*>(buffer.data());
for (size_t j = 0; j < buffer.size() / 2; j++)
checksum += BAN::host_to_network_endian(buffer_u16[j]);
if (buffer.size() % 2 == 0)
continue;
checksum += buffer_u16[j];
if (buffer.size() % 2)
{
// NOTE: we only allow last buffer to be odd-length
ASSERT(i == buffers.size() - 1);
checksum += buffer[buffer.size() - 1] << 8;
checksum += buffer[buffer.size() - 1];
}
}
while (checksum >> 16)
checksum = (checksum >> 16) + (checksum & 0xFFFF);
return ~(uint16_t)checksum;
checksum = (checksum & 0xFFFF) + (checksum >> 16);
return BAN::host_to_network_endian<uint16_t>(~checksum);
}
}

View File

@@ -3314,7 +3314,6 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_set_fsbase(void* addr)
{
auto& thread = Thread::current();
thread.m_has_custom_fsbase = true;
thread.set_fsbase(reinterpret_cast<vaddr_t>(addr));
Processor::load_fsbase();
return 0;
@@ -3328,7 +3327,6 @@ namespace Kernel
BAN::ErrorOr<long> Process::sys_set_gsbase(void* addr)
{
auto& thread = Thread::current();
thread.m_has_custom_gsbase = true;
thread.set_gsbase(reinterpret_cast<vaddr_t>(addr));
Processor::load_gsbase();
return 0;

View File

@@ -73,9 +73,6 @@ namespace Kernel
ASSERT(processor.m_id == PROCESSOR_NONE);
processor.m_id = id;
processor.m_stack = kmalloc(s_stack_size, 4096, true);
ASSERT(processor.m_stack);
processor.m_gdt = GDT::create(&processor);
ASSERT(processor.m_gdt);
@@ -157,6 +154,21 @@ namespace Kernel
return processor;
}
// NOTE: I don't like this being a separate function but we need heap and page tables for this :)
void Processor::allocate_stack()
{
ASSERT(m_stack_paddr == 0);
ASSERT(m_stack_vaddr == 0);
m_stack_paddr = Heap::get().take_free_page();
ASSERT(m_stack_paddr);
m_stack_vaddr = PageTable::kernel().reserve_free_page(KERNEL_OFFSET);
ASSERT(m_stack_vaddr);
PageTable::kernel().map_page_at(m_stack_paddr, m_stack_vaddr, PageTable::ReadWrite | PageTable::Present);
}
void Processor::initialize_smp()
{
const auto processor_id = current_id();
@@ -205,8 +217,7 @@ namespace Kernel
memset(reinterpret_cast<void*>(s_shared_page_vaddr), 0, PAGE_SIZE);
auto& shared_page = *reinterpret_cast<volatile API::SharedPage*>(s_shared_page_vaddr);
for (size_t i = 0; i <= 0xFF; i++)
shared_page.__sequence[i] = i;
shared_page.gdt_cpu_offset = GDT::cpu_index_offset();
shared_page.features = 0;
ASSERT(Processor::count() + sizeof(Kernel::API::SharedPage) <= PAGE_SIZE);
@@ -565,7 +576,7 @@ namespace Kernel
if (!scheduler().is_idle())
Thread::current().set_cpu_time_stop();
asm_yield_trampoline(Processor::current_stack_top());
asm_yield_trampoline(processor_info.stack_top_vaddr());
processor_info.m_start_ns = SystemTimer::get().ns_since_boot();

View File

@@ -303,22 +303,7 @@ namespace Kernel
{
if (!is_userspace() || !has_process())
return;
#if ARCH(x86_64)
if (m_has_custom_gsbase)
return;
#elif ARCH(i686)
if (m_has_custom_fsbase)
return;
#endif
const vaddr_t vaddr = process().shared_page_vaddr() + Processor::current_index();
#if ARCH(x86_64)
set_gsbase(vaddr);
#elif ARCH(i686)
set_fsbase(vaddr);
#endif
Processor::gdt().set_cpu_index(Processor::current_index());
}
BAN::ErrorOr<Thread*> Thread::pthread_create(entry_t entry, void* arg)

View File

@@ -126,19 +126,20 @@ extern "C" void kernel_main(uint32_t boot_magic, uint32_t boot_info)
parse_boot_info(boot_magic, boot_info);
dprintln("boot info parsed");
Processor::create(PROCESSOR_NONE);
auto& processor = Processor::create(PROCESSOR_NONE);
Processor::initialize();
dprintln("BSP initialized");
PageTable::initialize_pre_heap();
PageTable::kernel().initial_load();
dprintln("PageTable stage1 initialized");
PageTable::initialize_fast_page();
dprintln("fast page initialized");
Heap::initialize();
dprintln("Heap initialzed");
dprintln("Heap initialized");
PageTable::initialize_post_heap();
dprintln("PageTable stage2 initialized");
PageTable::initialize_and_load();
dprintln("PageTable initialized");
processor.allocate_stack();
parse_command_line();
dprintln("command line parsed, root='{}', console='{}'", cmdline.root, cmdline.console);
@@ -270,7 +271,7 @@ extern "C" void ap_main()
using namespace Kernel;
Processor::initialize();
PageTable::kernel().initial_load();
InterruptController::get().enable();
Processor::wait_until_processors_ready();

View File

@@ -56,7 +56,7 @@ int clearenv(void);
div_t div(int numer, int denom);
double drand48(void);
double erand48(unsigned short xsubi[3]);
void exit(int status);
void exit(int status) __attribute__((__noreturn__));
void free(void* ptr);
char* getenv(const char* name);
int getsubopt(char** optionp, char* const* keylistp, char** valuep);

View File

@@ -27,12 +27,7 @@ int sched_getcpu(void)
{
if (g_shared_page == nullptr)
return -1;
uint8_t cpu;
#if defined(__x86_64__)
asm volatile("movb %%gs:0, %0" : "=r"(cpu));
#elif defined(__i686__)
asm volatile("movb %%fs:0, %0" : "=q"(cpu));
#endif
return cpu;
uint16_t limit;
asm volatile("lsl %1, %0" : "=r"(limit) : "r"(g_shared_page->gdt_cpu_offset));
return limit;
}

View File

@@ -37,7 +37,6 @@ void exit(int status)
__cxa_finalize(nullptr);
fflush(nullptr);
_exit(status);
ASSERT_NOT_REACHED();
}
void _Exit(int status)

View File

@@ -30,13 +30,9 @@ int clock_gettime(clockid_t clock_id, struct timespec* tp)
const auto get_cpu =
[]() -> uint8_t {
uint8_t cpu;
#if defined(__x86_64__)
asm volatile("movb %%gs:0, %0" : "=r"(cpu));
#elif defined(__i686__)
asm volatile("movb %%fs:0, %0" : "=q"(cpu));
#endif
return cpu;
uint16_t limit;
asm volatile("lsl %1, %0" : "=r"(limit) : "r"(g_shared_page->gdt_cpu_offset));
return limit;
};
for (;;)