Compare commits

...

12 Commits

Author SHA1 Message Date
Bananymous 5050047cef Kernel: Rewrite whole scheduler
Current context saving was very hacky and dependant on compiler
behaviour that was not consistent. Now we always use iret for
context saving. This makes everything more clean.
2024-03-29 18:02:12 +02:00
Bananymous 1b65f850ee Kernel: Rename thread stacks to more appropriate names 2024-03-27 15:06:24 +02:00
Bananymous 7c2933aae1 Kernel: Fix ISR error code formatting to 32 bit 2024-03-26 21:01:18 +02:00
Bananymous 96babec22a Kernel: Implement Thread trampolines for x86_32 2024-03-26 21:01:18 +02:00
Bananymous c12d1e9bd9 Kernel: Implement PageTable for x86_32
This is mostly copied from x86_64 with necessary modifications
2024-03-26 20:16:20 +02:00
Bananymous 4d1f0e77f2 Kernel: Fix physical address size for x86_32
Having 32 bit address space does not mean physical address space
is also only 32 bits...
2024-03-26 20:16:20 +02:00
Bananymous d7bf34ecd0 Kernel: Write isr handler for x86_32 and cleanup x86_64 2024-03-26 20:16:20 +02:00
Bananymous 1943c3e7a1 Kernel: Unify IDT and GDT code between x86_64 and x86_32
The code is pretty much the same, so there are just couple macros
differiating initialization.
2024-03-26 16:42:02 +02:00
Bananymous af050cc729 Kernel: Fix boot code for x86_32
Boot assembly now initializes processor and jumps to kernel
2024-03-26 13:25:22 +02:00
Bananymous 84ef2161a1 BuildSystem: Allow running qemu with i686 target 2024-03-26 03:18:54 +02:00
Bananymous ca23360d07 Bootloader: Fix GDRT pointer size to 32 bits 2024-03-26 03:04:57 +02:00
Bananymous 5dbe51a52e Userspace: Update printf formats to compile on 32 bit 2024-03-26 03:03:33 +02:00
38 changed files with 1494 additions and 707 deletions

View File

@ -161,7 +161,7 @@ gdt:
.quad 0x00CF9A000000FFFF # 32-bit code
gdtr:
.short . - gdt - 1
.quad gdt
.long gdt
banan_boot_info:
boot_command_line:

View File

@ -33,6 +33,8 @@ set(KERNEL_SOURCES
kernel/FS/TmpFS/FileSystem.cpp
kernel/FS/TmpFS/Inode.cpp
kernel/FS/VirtualFileSystem.cpp
kernel/GDT.cpp
kernel/IDT.cpp
kernel/Input/KeyboardLayout.cpp
kernel/Input/KeyEvent.cpp
kernel/Input/PS2/Controller.cpp
@ -105,8 +107,6 @@ if("${BANAN_ARCH}" STREQUAL "x86_64")
set(KERNEL_SOURCES
${KERNEL_SOURCES}
arch/x86_64/boot.S
arch/x86_64/GDT.cpp
arch/x86_64/IDT.cpp
arch/x86_64/interrupts.S
arch/x86_64/PageTable.cpp
arch/x86_64/Signal.S
@ -124,8 +124,7 @@ elseif("${BANAN_ARCH}" STREQUAL "i686")
set(KERNEL_SOURCES
${KERNEL_SOURCES}
arch/i686/boot.S
arch/i686/GDT.cpp
arch/i686/IDT.cpp
arch/i686/interrupts.S
arch/i686/PageTable.cpp
arch/i686/Signal.S
arch/i686/Syscall.S

View File

@ -1,21 +0,0 @@
#include <kernel/GDT.h>
namespace Kernel
{
GDT* GDT::create()
{
ASSERT_NOT_REACHED();
}
void GDT::write_entry(uint8_t, uint32_t, uint32_t, uint8_t, uint8_t)
{
ASSERT_NOT_REACHED();
}
void GDT::write_tss()
{
ASSERT_NOT_REACHED();
}
}

View File

@ -1,31 +0,0 @@
#include <kernel/IDT.h>
namespace Kernel
{
IDT* IDT::create()
{
ASSERT_NOT_REACHED();
}
[[noreturn]] void IDT::force_triple_fault()
{
ASSERT_NOT_REACHED();
}
void IDT::register_irq_handler(uint8_t, Interruptable*)
{
ASSERT_NOT_REACHED();
}
void IDT::register_interrupt_handler(uint8_t, void (*)())
{
ASSERT_NOT_REACHED();
}
void IDT::register_syscall_handler(uint8_t, void (*)())
{
ASSERT_NOT_REACHED();
}
}

View File

@ -1,144 +1,630 @@
#include <kernel/Memory/PageTable.h>
#include <kernel/CPUID.h>
#include <kernel/Lock/SpinLock.h>
#include <kernel/Memory/kmalloc.h>
#include <kernel/Memory/PageTable.h>
extern uint8_t g_kernel_start[];
extern uint8_t g_kernel_end[];
extern uint8_t g_kernel_execute_start[];
extern uint8_t g_kernel_execute_end[];
extern uint8_t g_userspace_start[];
extern uint8_t g_userspace_end[];
namespace Kernel
{
RecursiveSpinLock PageTable::s_fast_page_lock;
static PageTable* s_kernel = nullptr;
static bool s_has_nxe = false;
static bool s_has_pge = false;
static paddr_t s_global_pdpte = 0;
static inline PageTable::flags_t parse_flags(uint64_t entry)
{
using Flags = PageTable::Flags;
PageTable::flags_t result = 0;
if (s_has_nxe && !(entry & (1ull << 63)))
result |= Flags::Execute;
if (entry & Flags::Reserved)
result |= Flags::Reserved;
if (entry & Flags::CacheDisable)
result |= Flags::CacheDisable;
if (entry & Flags::UserSupervisor)
result |= Flags::UserSupervisor;
if (entry & Flags::ReadWrite)
result |= Flags::ReadWrite;
if (entry & Flags::Present)
result |= Flags::Present;
return result;
}
void PageTable::initialize()
{
ASSERT_NOT_REACHED();
}
if (CPUID::has_nxe())
s_has_nxe = true;
PageTable& PageTable::kernel()
{
ASSERT_NOT_REACHED();
}
if (CPUID::has_pge())
s_has_pge = true;
bool PageTable::is_valid_pointer(uintptr_t)
{
ASSERT_NOT_REACHED();
}
ASSERT(s_kernel == nullptr);
s_kernel = new PageTable();
ASSERT(s_kernel);
BAN::ErrorOr<PageTable*> PageTable::create_userspace()
{
ASSERT_NOT_REACHED();
}
PageTable::~PageTable()
{
ASSERT_NOT_REACHED();
}
void PageTable::unmap_page(vaddr_t)
{
ASSERT_NOT_REACHED();
}
void PageTable::unmap_range(vaddr_t, size_t)
{
ASSERT_NOT_REACHED();
}
void PageTable::map_range_at(paddr_t, vaddr_t, size_t, flags_t)
{
ASSERT_NOT_REACHED();
}
void PageTable::map_page_at(paddr_t, vaddr_t, flags_t)
{
ASSERT_NOT_REACHED();
}
paddr_t PageTable::physical_address_of(vaddr_t) const
{
ASSERT_NOT_REACHED();
}
PageTable::flags_t PageTable::get_page_flags(vaddr_t) const
{
ASSERT_NOT_REACHED();
}
bool PageTable::is_page_free(vaddr_t) const
{
ASSERT_NOT_REACHED();
}
bool PageTable::is_range_free(vaddr_t, size_t) const
{
ASSERT_NOT_REACHED();
}
bool PageTable::reserve_page(vaddr_t, bool)
{
ASSERT_NOT_REACHED();
}
bool PageTable::reserve_range(vaddr_t, size_t, bool)
{
ASSERT_NOT_REACHED();
}
vaddr_t PageTable::reserve_free_page(vaddr_t, vaddr_t)
{
ASSERT_NOT_REACHED();
}
vaddr_t PageTable::reserve_free_contiguous_pages(size_t, vaddr_t, vaddr_t)
{
ASSERT_NOT_REACHED();
}
void PageTable::load()
{
ASSERT_NOT_REACHED();
s_kernel->initialize_kernel();
s_kernel->initial_load();
}
void PageTable::initial_load()
{
ASSERT_NOT_REACHED();
if (s_has_nxe)
{
asm volatile(
"movl $0xC0000080, %%ecx;"
"rdmsr;"
"orl $0x800, %%eax;"
"wrmsr"
::: "eax", "ecx", "edx", "memory"
);
}
if (s_has_pge)
{
asm volatile(
"movl %%cr4, %%eax;"
"orl $0x80, %%eax;"
"movl %%eax, %%cr4;"
::: "eax"
);
}
// enable write protect
asm volatile(
"movl %%cr0, %%eax;"
"orl $0x10000, %%eax;"
"movl %%eax, %%cr0;"
::: "rax"
);
load();
}
void PageTable::debug_dump()
PageTable& PageTable::kernel()
{
ASSERT_NOT_REACHED();
ASSERT(s_kernel);
return *s_kernel;
}
uint64_t PageTable::get_page_data(vaddr_t) const
bool PageTable::is_valid_pointer(uintptr_t)
{
ASSERT_NOT_REACHED();
return true;
}
static uint64_t* allocate_zeroed_page_aligned_page()
{
void* page = kmalloc(PAGE_SIZE, PAGE_SIZE, true);
ASSERT(page);
memset(page, 0, PAGE_SIZE);
return (uint64_t*)page;
}
void PageTable::initialize_kernel()
{
ASSERT_NOT_REACHED();
}
ASSERT(s_global_pdpte == 0);
s_global_pdpte = V2P(allocate_zeroed_page_aligned_page());
void PageTable::map_kernel_memory()
{
ASSERT_NOT_REACHED();
ASSERT(m_highest_paging_struct == 0);
m_highest_paging_struct = V2P(kmalloc(32, 32, true));
ASSERT(m_highest_paging_struct);
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
pdpt[0] = 0;
pdpt[1] = 0;
pdpt[2] = 0;
pdpt[3] = s_global_pdpte;
static_assert(KERNEL_OFFSET == 0xC0000000);
prepare_fast_page();
// Map main bios area below 1 MiB
map_range_at(
0x000E0000,
P2V(0x000E0000),
0x00100000 - 0x000E0000,
PageTable::Flags::Present
);
// Map (phys_kernel_start -> phys_kernel_end) to (virt_kernel_start -> virt_kernel_end)
ASSERT((vaddr_t)g_kernel_start % PAGE_SIZE == 0);
map_range_at(
V2P(g_kernel_start),
(vaddr_t)g_kernel_start,
g_kernel_end - g_kernel_start,
Flags::ReadWrite | Flags::Present
);
// Map executable kernel memory as executable
map_range_at(
V2P(g_kernel_execute_start),
(vaddr_t)g_kernel_execute_start,
g_kernel_execute_end - g_kernel_execute_start,
Flags::Execute | Flags::Present
);
// Map userspace memory
map_range_at(
V2P(g_userspace_start),
(vaddr_t)g_userspace_start,
g_userspace_end - g_userspace_start,
Flags::Execute | Flags::UserSupervisor | Flags::Present
);
}
void PageTable::prepare_fast_page()
{
ASSERT_NOT_REACHED();
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
ASSERT(!(pdpt[pdpte] & Flags::Present));
pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()) | Flags::Present;
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte]) & PAGE_ADDR_MASK);
ASSERT(!(pd[pde] & Flags::Present));
pd[pde] = V2P(allocate_zeroed_page_aligned_page()) | Flags::ReadWrite | Flags::Present;
uint64_t* pt = reinterpret_cast<uint64_t*>(P2V(pd[pde]) & PAGE_ADDR_MASK);
ASSERT(!(pt[pte] & Flags::Present));
pt[pte] = V2P(allocate_zeroed_page_aligned_page());
}
void PageTable::invalidate(vaddr_t)
void PageTable::map_fast_page(paddr_t paddr)
{
ASSERT_NOT_REACHED();
}
ASSERT(s_kernel);
ASSERT(paddr);
void PageTable::map_fast_page(paddr_t)
{
ASSERT_NOT_REACHED();
SpinLockGuard _(s_fast_page_lock);
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(s_kernel->m_highest_paging_struct));
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
uint64_t* pt = reinterpret_cast<uint64_t*>(P2V(pd[pde] & PAGE_ADDR_MASK));
ASSERT(!(pt[pte] & Flags::Present));
pt[pte] = paddr | Flags::ReadWrite | Flags::Present;
invalidate(fast_page());
}
void PageTable::unmap_fast_page()
{
ASSERT(s_kernel);
SpinLockGuard _(s_fast_page_lock);
constexpr uint64_t pdpte = (fast_page() >> 30) & 0x1FF;
constexpr uint64_t pde = (fast_page() >> 21) & 0x1FF;
constexpr uint64_t pte = (fast_page() >> 12) & 0x1FF;
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(s_kernel->m_highest_paging_struct));
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
uint64_t* pt = reinterpret_cast<uint64_t*>(P2V(pd[pde] & PAGE_ADDR_MASK));
ASSERT(pt[pte] & Flags::Present);
pt[pte] = 0;
invalidate(fast_page());
}
BAN::ErrorOr<PageTable*> PageTable::create_userspace()
{
SpinLockGuard _(s_kernel->m_lock);
PageTable* page_table = new PageTable;
if (page_table == nullptr)
return BAN::Error::from_errno(ENOMEM);
page_table->map_kernel_memory();
return page_table;
}
void PageTable::map_kernel_memory()
{
ASSERT(s_kernel);
ASSERT(s_global_pdpte);
ASSERT(m_highest_paging_struct == 0);
m_highest_paging_struct = V2P(kmalloc(32, 32, true));
ASSERT(m_highest_paging_struct);
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
pdpt[0] = 0;
pdpt[1] = 0;
pdpt[2] = 0;
pdpt[3] = s_global_pdpte;
static_assert(KERNEL_OFFSET == 0xC0000000);
}
PageTable::~PageTable()
{
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
for (uint32_t pdpte = 0; pdpte < 3; pdpte++)
{
if (!(pdpt[pdpte] & Flags::Present))
continue;
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
for (uint32_t pde = 0; pde < 512; pde++)
{
if (!(pd[pde] & Flags::Present))
continue;
kfree(reinterpret_cast<uint64_t*>(P2V(pd[pde] & PAGE_ADDR_MASK)));
}
kfree(pd);
}
kfree(pdpt);
}
void PageTable::load()
{
SpinLockGuard _(m_lock);
ASSERT(m_highest_paging_struct < 0x100000000);
const uint32_t pdpt_lo = m_highest_paging_struct;
asm volatile("movl %0, %%cr3" :: "r"(pdpt_lo));
Processor::set_current_page_table(this);
}
void PageTable::invalidate(vaddr_t vaddr)
{
ASSERT(vaddr % PAGE_SIZE == 0);
asm volatile("invlpg (%0)" :: "r"(vaddr) : "memory");
}
void PageTable::unmap_page(vaddr_t vaddr)
{
ASSERT(vaddr);
ASSERT(vaddr % PAGE_SIZE == 0);
ASSERT(vaddr != fast_page());
if (vaddr >= KERNEL_OFFSET)
ASSERT(vaddr >= (vaddr_t)g_kernel_start);
if ((vaddr >= KERNEL_OFFSET) != (this == s_kernel))
Kernel::panic("unmapping {8H}, kernel: {}", vaddr, this == s_kernel);
const uint64_t pdpte = (vaddr >> 30) & 0x1FF;
const uint64_t pde = (vaddr >> 21) & 0x1FF;
const uint64_t pte = (vaddr >> 12) & 0x1FF;
SpinLockGuard _(m_lock);
if (is_page_free(vaddr))
{
dwarnln("unmapping unmapped page {8H}", vaddr);
return;
}
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
uint64_t* pt = reinterpret_cast<uint64_t*>(P2V(pd[pde] & PAGE_ADDR_MASK));
pt[pte] = 0;
invalidate(vaddr);
}
void PageTable::unmap_range(vaddr_t vaddr, size_t size)
{
vaddr_t s_page = vaddr / PAGE_SIZE;
vaddr_t e_page = BAN::Math::div_round_up<vaddr_t>(vaddr + size, PAGE_SIZE);
SpinLockGuard _(m_lock);
for (vaddr_t page = s_page; page < e_page; page++)
unmap_page(page * PAGE_SIZE);
}
void PageTable::map_page_at(paddr_t paddr, vaddr_t vaddr, flags_t flags)
{
ASSERT(vaddr);
ASSERT(vaddr != fast_page());
if ((vaddr >= KERNEL_OFFSET) != (this == s_kernel))
Kernel::panic("mapping {8H} to {8H}, kernel: {}", paddr, vaddr, this == s_kernel);
ASSERT(paddr % PAGE_SIZE == 0);
ASSERT(vaddr % PAGE_SIZE == 0);
ASSERT(flags & Flags::Used);
const uint64_t pdpte = (vaddr >> 30) & 0x1FF;
const uint64_t pde = (vaddr >> 21) & 0x1FF;
const uint64_t pte = (vaddr >> 12) & 0x1FF;
uint64_t extra_flags = 0;
if (s_has_pge && vaddr >= KERNEL_OFFSET) // Map kernel memory as global
extra_flags |= 1ull << 8;
if (s_has_nxe && !(flags & Flags::Execute))
extra_flags |= 1ull << 63;
if (flags & Flags::Reserved)
extra_flags |= Flags::Reserved;
if (flags & Flags::CacheDisable)
extra_flags |= Flags::CacheDisable;
// NOTE: we add present here, since it has to be available in higher level structures
flags_t uwr_flags = (flags & (Flags::UserSupervisor | Flags::ReadWrite)) | Flags::Present;
SpinLockGuard _(m_lock);
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
if (!(pdpt[pdpte] & Flags::Present))
pdpt[pdpte] = V2P(allocate_zeroed_page_aligned_page()) | Flags::Present;
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
if ((pd[pde] & uwr_flags) != uwr_flags)
{
if (!(pd[pde] & Flags::Present))
pd[pde] = V2P(allocate_zeroed_page_aligned_page());
pd[pde] |= uwr_flags;
}
if (!(flags & Flags::Present))
uwr_flags &= ~Flags::Present;
uint64_t* pt = reinterpret_cast<uint64_t*>(P2V(pd[pde] & PAGE_ADDR_MASK));
pt[pte] = paddr | uwr_flags | extra_flags;
invalidate(vaddr);
}
void PageTable::map_range_at(paddr_t paddr, vaddr_t vaddr, size_t size, flags_t flags)
{
ASSERT(vaddr);
ASSERT(paddr % PAGE_SIZE == 0);
ASSERT(vaddr % PAGE_SIZE == 0);
size_t page_count = range_page_count(vaddr, size);
SpinLockGuard _(m_lock);
for (size_t page = 0; page < page_count; page++)
map_page_at(paddr + page * PAGE_SIZE, vaddr + page * PAGE_SIZE, flags);
}
uint64_t PageTable::get_page_data(vaddr_t vaddr) const
{
ASSERT(vaddr % PAGE_SIZE == 0);
const uint64_t pdpte = (vaddr >> 30) & 0x1FF;
const uint64_t pde = (vaddr >> 21) & 0x1FF;
const uint64_t pte = (vaddr >> 12) & 0x1FF;
SpinLockGuard _(m_lock);
uint64_t* pdpt = (uint64_t*)P2V(m_highest_paging_struct);
if (!(pdpt[pdpte] & Flags::Present))
return 0;
uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK);
if (!(pd[pde] & Flags::Present))
return 0;
uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK);
if (!(pt[pte] & Flags::Used))
return 0;
return pt[pte];
}
PageTable::flags_t PageTable::get_page_flags(vaddr_t vaddr) const
{
return parse_flags(get_page_data(vaddr));
}
paddr_t PageTable::physical_address_of(vaddr_t vaddr) const
{
uint64_t page_data = get_page_data(vaddr);
return (page_data & PAGE_ADDR_MASK) & ~(1ull << 63);
}
bool PageTable::is_page_free(vaddr_t vaddr) const
{
ASSERT(vaddr % PAGE_SIZE == 0);
return !(get_page_flags(vaddr) & Flags::Used);
}
bool PageTable::is_range_free(vaddr_t vaddr, size_t size) const
{
vaddr_t s_page = vaddr / PAGE_SIZE;
vaddr_t e_page = BAN::Math::div_round_up<vaddr_t>(vaddr + size, PAGE_SIZE);
SpinLockGuard _(m_lock);
for (vaddr_t page = s_page; page < e_page; page++)
if (!is_page_free(page * PAGE_SIZE))
return false;
return true;
}
bool PageTable::reserve_page(vaddr_t vaddr, bool only_free)
{
SpinLockGuard _(m_lock);
ASSERT(vaddr % PAGE_SIZE == 0);
if (only_free && !is_page_free(vaddr))
return false;
map_page_at(0, vaddr, Flags::Reserved);
return true;
}
bool PageTable::reserve_range(vaddr_t vaddr, size_t bytes, bool only_free)
{
if (size_t rem = bytes % PAGE_SIZE)
bytes += PAGE_SIZE - rem;
ASSERT(vaddr % PAGE_SIZE == 0);
SpinLockGuard _(m_lock);
if (only_free && !is_range_free(vaddr, bytes))
return false;
for (size_t offset = 0; offset < bytes; offset += PAGE_SIZE)
reserve_page(vaddr + offset);
return true;
}
vaddr_t PageTable::reserve_free_page(vaddr_t first_address, vaddr_t last_address)
{
if (first_address >= KERNEL_OFFSET && first_address < (vaddr_t)g_kernel_end)
first_address = (vaddr_t)g_kernel_end;
if (size_t rem = first_address % PAGE_SIZE)
first_address += PAGE_SIZE - rem;
if (size_t rem = last_address % PAGE_SIZE)
last_address -= rem;
const uint32_t s_pdpte = (first_address >> 30) & 0x1FF;
const uint32_t s_pde = (first_address >> 21) & 0x1FF;
const uint32_t s_pte = (first_address >> 12) & 0x1FF;
const uint32_t e_pdpte = (last_address >> 30) & 0x1FF;
const uint32_t e_pde = (last_address >> 21) & 0x1FF;
const uint32_t e_pte = (last_address >> 12) & 0x1FF;
SpinLockGuard _(m_lock);
// Try to find free page that can be mapped without
// allocations (page table with unused entries)
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
for (uint32_t pdpte = s_pdpte; pdpte < 4; pdpte++)
{
if (pdpte > e_pdpte)
break;
if (!(pdpt[pdpte] & Flags::Present))
continue;
uint64_t* pd = reinterpret_cast<uint64_t*>(P2V(pdpt[pdpte] & PAGE_ADDR_MASK));
for (uint32_t pde = s_pde; pde < 512; pde++)
{
if (pdpte == e_pdpte && pde > e_pde)
break;
if (!(pd[pde] & Flags::Present))
continue;
uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK);
for (uint32_t pte = s_pte; pte < 512; pte++)
{
if (pdpte == e_pdpte && pde == e_pde && pte >= e_pte)
break;
if (!(pt[pte] & Flags::Used))
{
vaddr_t vaddr = 0;
vaddr |= (vaddr_t)pdpte << 30;
vaddr |= (vaddr_t)pde << 21;
vaddr |= (vaddr_t)pte << 12;
ASSERT(reserve_page(vaddr));
return vaddr;
}
}
}
}
// Find any free page
for (vaddr_t vaddr = first_address; vaddr < last_address; vaddr += PAGE_SIZE)
{
if (is_page_free(vaddr))
{
ASSERT(reserve_page(vaddr));
return vaddr;
}
}
ASSERT_NOT_REACHED();
}
vaddr_t PageTable::reserve_free_contiguous_pages(size_t page_count, vaddr_t first_address, vaddr_t last_address)
{
if (first_address >= KERNEL_OFFSET && first_address < (vaddr_t)g_kernel_start)
first_address = (vaddr_t)g_kernel_start;
if (size_t rem = first_address % PAGE_SIZE)
first_address += PAGE_SIZE - rem;
if (size_t rem = last_address % PAGE_SIZE)
last_address -= rem;
SpinLockGuard _(m_lock);
for (vaddr_t vaddr = first_address; vaddr < last_address;)
{
bool valid { true };
for (size_t page = 0; page < page_count; page++)
{
if (!is_page_free(vaddr + page * PAGE_SIZE))
{
vaddr += (page + 1) * PAGE_SIZE;
valid = false;
break;
}
}
if (valid)
{
ASSERT(reserve_range(vaddr, page_count * PAGE_SIZE));
return vaddr;
}
}
ASSERT_NOT_REACHED();
}
static void dump_range(vaddr_t start, vaddr_t end, PageTable::flags_t flags)
{
if (start == 0)
return;
dprintln("{}-{}: {}{}{}{}",
(void*)(start), (void*)(end - 1),
flags & PageTable::Flags::Execute ? 'x' : '-',
flags & PageTable::Flags::UserSupervisor ? 'u' : '-',
flags & PageTable::Flags::ReadWrite ? 'w' : '-',
flags & PageTable::Flags::Present ? 'r' : '-'
);
}
void PageTable::debug_dump()
{
SpinLockGuard _(m_lock);
flags_t flags = 0;
vaddr_t start = 0;
uint64_t* pdpt = reinterpret_cast<uint64_t*>(P2V(m_highest_paging_struct));
for (uint32_t pdpte = 0; pdpte < 4; pdpte++)
{
if (!(pdpt[pdpte] & Flags::Present))
{
dump_range(start, (pdpte << 30), flags);
start = 0;
continue;
}
uint64_t* pd = (uint64_t*)P2V(pdpt[pdpte] & PAGE_ADDR_MASK);
for (uint64_t pde = 0; pde < 512; pde++)
{
if (!(pd[pde] & Flags::Present))
{
dump_range(start, (pdpte << 30) | (pde << 21), flags);
start = 0;
continue;
}
uint64_t* pt = (uint64_t*)P2V(pd[pde] & PAGE_ADDR_MASK);
for (uint64_t pte = 0; pte < 512; pte++)
{
if (parse_flags(pt[pte]) != flags)
{
dump_range(start, (pdpte << 30) | (pde << 21) | (pte << 12), flags);
start = 0;
}
if (!(pt[pte] & Flags::Used))
continue;
if (start == 0)
{
flags = parse_flags(pt[pte]);
start = (pdpte << 30) | (pde << 21) | (pte << 12);
}
}
}
}
}
}

View File

@ -7,12 +7,35 @@ read_ip:
# void start_thread(uint32_t sp, uint32_t ip)
.global start_thread
start_thread:
ud2
movl 8(%esp), %edi # ip
movl 4(%esp), %esp # sp
# STACK LAYOUT
# NULL
# thread ptr
# &Thread::on_exit
# data
xorl %ebp, %ebp
sti
call *%edi
movl 4(%esp), %edi # &Thread::on_exit
movl 8(%esp), %eax # thread ptr
movl %eax, (%esp)
call *%edi
# void continue_thread(uint32_t sp, uint32_t ip)
.global continue_thread
continue_thread:
ud2
movl 8(%esp), %edi # ip
movl 4(%esp), %esp # sp
xorl %eax, %eax
jmp *%edi
# void thread_userspace_trampoline(uint32_t sp, uint32_t ip, int argc, char** argv, char** envp)
.global thread_userspace_trampoline

View File

@ -53,6 +53,7 @@ bananboot_start:
bananboot_end:
.section .bss, "aw", @nobits
.align 4096
boot_stack_bottom:
.skip 4096 * 4
boot_stack_top:
@ -68,11 +69,23 @@ bananboot_end:
.section .data
# Map first GiB to 0x00000000 and 0xC0000000
.align 32
boot_pdpt:
.long V2P(boot_pd) + (PG_PRESENT)
.long 0
.quad 0
.quad 0
.long V2P(boot_pd) + (PG_PRESENT)
.long 0
.align 4096
boot_pml4:
boot_pdpt_lo:
boot_pdpt_hi:
boot_pd:
.set i, 0
.rept 512
.long i + (PG_PAGE_SIZE | PG_READ_WRITE | PG_PRESENT)
.long 0
.set i, i + 0x200000
.endr
boot_gdt:
.quad 0x0000000000000000 # null descriptor
@ -106,9 +119,25 @@ has_cpuid:
testl $0x00200000, %eax
ret
has_pae:
movl $0, %eax
cpuid
testl $(1 << 6), %edx
ret
has_sse:
movl $1, %eax
cpuid
testl $(1 << 25), %edx
ret
check_requirements:
call has_cpuid
jz .exit
call has_pae
jz .exit
call has_sse
jz .exit
ret
.exit:
jmp system_halt
@ -126,16 +155,16 @@ enable_sse:
initialize_paging:
# enable PAE
movl %cr4, %ecx
orl $0x20, %ecx
orl $(1 << 5), %ecx
movl %ecx, %cr4
# set address of paging structures
movl $V2P(boot_pml4), %ecx
# load page tables
movl $V2P(boot_pdpt), %ecx
movl %ecx, %cr3
# enable paging
movl %cr0, %ecx
orl $0x80000000, %ecx
orl $(1 << 31), %ecx
movl %ecx, %cr0
ret
@ -145,30 +174,30 @@ initialize_paging:
_start:
cli; cld
# Initialize stack and multiboot info
# save bootloader magic and info
movl %eax, V2P(bootloader_magic)
movl %ebx, V2P(bootloader_info)
# load boot stack
movl $V2P(boot_stack_top), %esp
call check_requirements
call enable_sse
call initialize_paging
# flush gdt
# load boot GDT
lgdt V2P(boot_gdtr)
ljmpl $0x08, $V2P(gdt_flush)
gdt_flush:
# set correct segment registers
movw $0x10, %ax
movw %ax, %ds
movw %ax, %ss
movw %ax, %es
# move stack pointer to higher half
movl %esp, %esp
addl $KERNEL_OFFSET, %esp
# do processor initialization
call check_requirements
call enable_sse
call initialize_paging
# load higher half stack pointer
movl $boot_stack_top, %esp
# jump to higher half
leal higher_half, %ecx
@ -181,9 +210,11 @@ higher_half:
# call to the kernel itself (clear ebp for stacktrace)
xorl %ebp, %ebp
movl V2P(bootloader_magic), %edi
movl V2P(bootloader_info), %esi
subl $8, %esp
pushl bootloader_info
pushl bootloader_magic
call kernel_main
addl $16, %esp
# call global destructors
call _fini
@ -200,6 +231,7 @@ system_halt:
.code16
.global ap_trampoline
ap_trampoline:
ud2
jmp 1f
.align 8
ap_stack_ptr:

View File

@ -0,0 +1,161 @@
isr_stub:
pusha
movl %cr0, %eax; pushl %eax
movl %cr2, %eax; pushl %eax
movl %cr3, %eax; pushl %eax
movl %cr4, %eax; pushl %eax
movl %esp, %eax // register ptr
leal 56(%esp), %ebx // interrupt stack ptr
movl 52(%esp), %ecx // error code
movl 48(%esp), %edx // isr number
subl $12, %esp
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
call cpp_isr_handler
addl $44, %esp
popa
addl $8, %esp
iret
irq_stub:
pusha
leal 40(%esp), %eax // interrupt stack ptr
movl 32(%esp), %ebx // irq number
subl $12, %esp
pushl %eax
pushl %ebx
call cpp_irq_handler
addl $20, %esp
popa
addl $8, %esp
iret
// arguments in EAX, EBX, ECX, EDX, ESI, EDI
.global syscall_asm
syscall_asm:
ud2
pusha
pushl %esp
addl $36, (%esp)
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
pushl %eax
call cpp_syscall_handler
addl $60, %esp
popl %edi
popl %esi
popl %ebp
addl $4, %esp
popl %ebx
popl %edx
popl %ecx
addl $4, %esp
iret
.macro isr n
.global isr\n
isr\n:
pushl $0
pushl $\n
jmp isr_stub
.endm
.macro isr_err n
.global isr\n
isr\n:
pushl $\n
jmp isr_stub
.endm
.macro irq n
.global irq\n
irq\n:
pushl $0
pushl $\n
jmp irq_stub
.endm
isr 0
isr 1
isr 2
isr 3
isr 4
isr 5
isr 6
isr 7
isr_err 8
isr 9
isr_err 10
isr_err 11
isr_err 12
isr_err 13
isr_err 14
isr 15
isr 16
isr_err 17
isr 18
isr 19
isr 20
isr 21
isr 22
isr 23
isr 24
isr 25
isr 26
isr 27
isr 28
isr 29
isr 30
isr 31
irq 0
irq 1
irq 2
irq 3
irq 4
irq 5
irq 6
irq 7
irq 8
irq 9
irq 10
irq 11
irq 12
irq 13
irq 14
irq 15
irq 16
irq 17
irq 18
irq 19
irq 20
irq 21
irq 22
irq 23
irq 24
irq 25
irq 26
irq 27
irq 28
irq 29
irq 30
irq 31
irq 32

View File

@ -1,56 +0,0 @@
#include <kernel/GDT.h>
#include <kernel/Debug.h>
#include <string.h>
namespace Kernel
{
GDT* GDT::create()
{
auto* gdt = new GDT();
ASSERT(gdt);
gdt->write_entry(0x00, 0x00000000, 0x00000, 0x00, 0x0); // null
gdt->write_entry(0x08, 0x00000000, 0xFFFFF, 0x9A, 0xA); // kernel code
gdt->write_entry(0x10, 0x00000000, 0xFFFFF, 0x92, 0xC); // kernel data
gdt->write_entry(0x18, 0x00000000, 0xFFFFF, 0xFA, 0xA); // user code
gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, 0xC); // user data
gdt->write_tss();
return gdt;
}
void GDT::write_entry(uint8_t offset, uint32_t base, uint32_t limit, uint8_t access, uint8_t flags)
{
ASSERT(offset % sizeof(SegmentDescriptor) == 0);
uint8_t idx = offset / sizeof(SegmentDescriptor);
auto& desc = m_gdt[idx];
desc.base1 = (base >> 0) & 0xFFFF;
desc.base2 = (base >> 16) & 0xFF;
desc.base3 = (base >> 24) & 0xFF;
desc.limit1 = (limit >> 0) & 0xFFFF;
desc.limit2 = (limit >> 16) & 0x0F;
desc.access = access & 0xFF;
desc.flags = flags & 0x0F;
}
void GDT::write_tss()
{
memset(&m_tss, 0x00, sizeof(TaskStateSegment));
m_tss.iopb = sizeof(TaskStateSegment);
uint64_t base = reinterpret_cast<uint64_t>(&m_tss);
write_entry(0x28, (uint32_t)base, sizeof(TaskStateSegment), 0x89, 0x0);
auto& desc = m_gdt[0x30 / sizeof(SegmentDescriptor)];
desc.low = base >> 32;
desc.high = 0;
}
}

View File

@ -4,36 +4,19 @@ read_ip:
popq %rax
jmp *%rax
exit_thread_trampoline:
# void start_thread()
.global start_kernel_thread
start_kernel_thread:
# STACK LAYOUT
# on_exit arg
# on_exit func
# entry arg
# entry func
movq 8(%rsp), %rdi
ret
movq 0(%rsp), %rsi
call *%rsi
# void start_thread(uint64_t sp, uint64_t ip)
.global start_thread
start_thread:
movq %rdi, %rsp
popq %rdi
movq $0, %rbp
pushq $exit_thread_trampoline
sti
jmp *%rsi
# void continue_thread(uint64_t sp, uint64_t ip)
.global continue_thread
continue_thread:
movq %rdi, %rsp
movq $0, %rax
jmp *%rsi
# void thread_userspace_trampoline(uint64_t sp, uint64_t ip, int argc, char** argv, char** envp)
.global thread_userspace_trampoline
thread_userspace_trampoline:
pushq $0x23
pushq %rdi
pushfq
pushq $0x1B
pushq %rsi
movq %rdx, %rdi
movq %rcx, %rsi
movq %r8, %rdx
iretq
movq 24(%rsp), %rdi
movq 16(%rsp), %rsi
call *%rsi

View File

@ -1,11 +1,11 @@
.macro pushaq
pushq %rax
pushq %rbx
pushq %rcx
pushq %rdx
pushq %rbx
pushq %rbp
pushq %rdi
pushq %rsi
pushq %rdi
pushq %r8
pushq %r9
pushq %r10
@ -25,12 +25,12 @@
popq %r10
popq %r9
popq %r8
popq %rsi
popq %rdi
popq %rsi
popq %rbp
popq %rbx
popq %rdx
popq %rcx
popq %rbx
popq %rax
.endm
@ -43,44 +43,36 @@
popq %r10
popq %r9
popq %r8
popq %rsi
popq %rdi
popq %rsi
popq %rbp
popq %rbx
popq %rdx
popq %rcx
popq %rbx
.endm
isr_stub:
pushaq
movq %cr0, %rax; pushq %rax
movq %cr2, %rax; pushq %rax
movq %cr3, %rax; pushq %rax
movq %cr4, %rax; pushq %rax
movq 184(%rsp), %rax; pushq %rax
movq 176(%rsp), %rax; pushq %rax
movq 208(%rsp), %rax; pushq %rax
movq 176(%rsp), %rdi
movq 184(%rsp), %rsi
movq %rsp, %rdx
addq $192, %rdx
movq %rsp, %rcx
movq %cr0, %rax; pushq %rax
movq %cr2, %rax; pushq %rax
movq %cr3, %rax; pushq %rax
movq %cr4, %rax; pushq %rax
movq 152(%rsp), %rdi // isr number
movq 160(%rsp), %rsi // error code
leaq 168(%rsp), %rdx // interrupt stack ptr
movq %rsp, %rcx // register ptr
call cpp_isr_handler
addq $56, %rsp
addq $32, %rsp
popaq
addq $16, %rsp
iretq
irq_stub:
pushaq
movq 0x78(%rsp), %rdi # irq number
movq %rsp, %rsi
addq $136, %rsi
movq 120(%rsp), %rdi # irq number
call cpp_irq_handler
popaq
addq $16, %rsp
@ -174,7 +166,15 @@ irq 28
irq 29
irq 30
irq 31
irq 32
.global asm_reschedule_handler
asm_reschedule_handler:
pushaq
leaq 120(%rsp), %rdi # interrupt stack ptr
movq %rsp, %rsi # interrupt register ptr
call cpp_reschedule_handler
popaq
iretq
// arguments in RAX, RBX, RCX, RDX, RSI, RDI
// System V ABI: RDI, RSI, RDX, RCX, R8, R9

View File

@ -29,12 +29,13 @@ namespace Kernel
BAN::Array<paddr_t, 5> block;
static constexpr size_t direct_block_count = 2;
#elif ARCH(i686)
// 14x direct blocks
uint32_t __padding;
// 5x direct blocks
// 1x singly indirect
// 1x doubly indirect
// 1x triply indirect
BAN::Array<paddr_t, 17> block;
static constexpr size_t direct_block_count = 14;
BAN::Array<paddr_t, 8> block;
static constexpr size_t direct_block_count = 5;
#else
#error
#endif

View File

@ -2,12 +2,14 @@
#include <BAN/Array.h>
#include <BAN/NoCopyMove.h>
#include <kernel/Arch.h>
#include <stdint.h>
namespace Kernel
{
#if ARCH(x86_64)
struct TaskStateSegment
{
uint32_t reserved1;
@ -26,6 +28,54 @@ namespace Kernel
uint16_t reserved4;
uint16_t iopb;
} __attribute__((packed));
static_assert(sizeof(TaskStateSegment) == 104);
#elif ARCH(i686)
struct TaskStateSegment
{
uint16_t link;
uint16_t __reserved0;
uint32_t esp0;
uint16_t ss0;
uint16_t __reserved1;
uint32_t esp1;
uint16_t ss1;
uint16_t __reserved2;
uint32_t esp2;
uint16_t ss2;
uint16_t __reserved3;
uint32_t cr3;
uint32_t eip;
uint32_t eflags;
uint32_t eax;
uint32_t ecx;
uint32_t edx;
uint32_t ebx;
uint32_t esp;
uint32_t ebp;
uint32_t esi;
uint32_t edi;
uint16_t es;
uint16_t __reserved4;
uint16_t cs;
uint16_t __reserved5;
uint16_t ss;
uint16_t __reserved6;
uint16_t ds;
uint16_t __reserved7;
uint16_t fs;
uint16_t __reserved8;
uint16_t gs;
uint16_t __reserved9;
uint16_t ldtr;
uint16_t __reserved10;
uint16_t __reserved11;
uint16_t iopb;
uint32_t ssp;
};
static_assert(sizeof(TaskStateSegment) == 108);
#else
#error
#endif
union SegmentDescriptor
{
@ -38,20 +88,20 @@ namespace Kernel
uint8_t limit2 : 4;
uint8_t flags : 4;
uint8_t base3;
} __attribute__((packed));
};
struct
{
uint32_t low;
uint32_t high;
} __attribute__((packed));
} __attribute__((packed));
};
};
static_assert(sizeof(SegmentDescriptor) == 8);
struct GDTR
{
uint16_t size;
uint64_t address;
uintptr_t address;
} __attribute__((packed));
class GDT
@ -60,7 +110,7 @@ namespace Kernel
BAN_NON_MOVABLE(GDT);
public:
static GDT* create();
static GDT* create(void* processor);
void load() { flush_gdt(); flush_tss(); }
static constexpr inline bool is_user_segment(uint8_t segment)
@ -68,9 +118,13 @@ namespace Kernel
return (segment & 3) == 3;
}
void set_tss_stack(uintptr_t rsp)
void set_tss_stack(uintptr_t sp)
{
m_tss.rsp0 = rsp;
#if ARCH(x86_64)
m_tss.rsp0 = sp;
#elif ARCH(i686)
m_tss.esp0 = sp;
#endif
}
private:
@ -86,15 +140,21 @@ namespace Kernel
void flush_tss()
{
asm volatile("ltr %0" :: "rm"((uint16_t)0x28) : "memory");
asm volatile("ltr %0" :: "rm"(m_tss_offset) : "memory");
}
private:
#if ARCH(x86_64)
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, tss low, tss high
static constexpr uint16_t m_tss_offset = 0x28;
#elif ARCH(i686)
BAN::Array<SegmentDescriptor, 7> m_gdt; // null, kernel code, kernel data, user code, user data, processor data, tss
static constexpr uint16_t m_tss_offset = 0x30;
#endif
TaskStateSegment m_tss;
const GDTR m_gdtr {
.size = m_gdt.size() * sizeof(SegmentDescriptor) - 1,
.address = reinterpret_cast<uint64_t>(m_gdt.data())
.address = reinterpret_cast<uintptr_t>(m_gdt.data())
};
};

View File

@ -2,6 +2,7 @@
#include <BAN/Array.h>
#include <BAN/NoCopyMove.h>
#include <kernel/Arch.h>
#include <kernel/Interruptable.h>
#include <stdint.h>
@ -12,21 +13,36 @@ constexpr uint8_t IRQ_IPI = 32;
namespace Kernel
{
#if ARCH(x86_64)
struct GateDescriptor
{
uint16_t offset1;
uint16_t offset0;
uint16_t selector;
uint8_t IST;
uint8_t flags;
uint16_t offset2;
uint32_t offset3;
uint16_t offset1;
uint32_t offset2;
uint32_t reserved;
} __attribute__((packed));
};
static_assert(sizeof(GateDescriptor) == 16);
#elif ARCH(i686)
struct GateDescriptor
{
uint16_t offset0;
uint16_t selector;
uint8_t reserved;
uint8_t flags;
uint16_t offset1;
};
static_assert(sizeof(GateDescriptor) == 8);
#else
#error
#endif
struct IDTR
{
uint16_t size;
uint64_t offset;
uintptr_t offset;
} __attribute__((packed));
class IDT

View File

@ -18,6 +18,7 @@ namespace Kernel
virtual bool is_in_service(uint8_t) = 0;
static void initialize(bool force_pic);
static bool is_initialized();
static InterruptController& get();
virtual void initialize_multiprocessor() = 0;

View File

@ -14,4 +14,24 @@ namespace Kernel
uintptr_t ss;
};
struct InterruptRegisters
{
uintptr_t r15;
uintptr_t r14;
uintptr_t r13;
uintptr_t r12;
uintptr_t r11;
uintptr_t r10;
uintptr_t r9;
uintptr_t r8;
uintptr_t rdi;
uintptr_t rsi;
uintptr_t rbp;
uintptr_t rbx;
uintptr_t rdx;
uintptr_t rcx;
uintptr_t rax;
};
}

View File

@ -25,7 +25,7 @@ namespace Kernel
else
{
while (!m_locker.compare_exchange(-1, tid))
Scheduler::get().reschedule();
Scheduler::get().yield();
ASSERT(m_lock_depth == 0);
}
m_lock_depth++;
@ -81,7 +81,7 @@ namespace Kernel
if (has_priority)
m_queue_length++;
while (!(has_priority || m_queue_length == 0) || !m_locker.compare_exchange(-1, tid))
Scheduler::get().reschedule();
Scheduler::get().yield();
ASSERT(m_lock_depth == 0);
}
m_lock_depth++;

View File

@ -21,6 +21,6 @@ namespace Kernel
{
using vaddr_t = uintptr_t;
using paddr_t = uintptr_t;
using paddr_t = uint64_t;
}

View File

@ -5,6 +5,7 @@
#include <kernel/Arch.h>
#include <kernel/GDT.h>
#include <kernel/IDT.h>
#include <kernel/InterruptStack.h>
#include <kernel/SchedulerQueue.h>
namespace Kernel
@ -68,6 +69,11 @@ namespace Kernel
static SchedulerQueue::Node* get_current_thread() { return reinterpret_cast<SchedulerQueue::Node*>(read_gs_ptr(offsetof(Processor, m_current_thread))); }
static void set_current_thread(SchedulerQueue::Node* thread) { write_gs_ptr(offsetof(Processor, m_current_thread), thread); }
static void enter_interrupt(InterruptStack*, InterruptRegisters*);
static void leave_interrupt();
static InterruptStack& get_interrupt_stack();
static InterruptRegisters& get_interrupt_registers();
private:
Processor() = default;
~Processor() { ASSERT_NOT_REACHED(); }
@ -121,6 +127,9 @@ namespace Kernel
Thread* m_idle_thread { nullptr };
SchedulerQueue::Node* m_current_thread { nullptr };
InterruptStack* m_interrupt_stack { nullptr };
InterruptRegisters* m_interrupt_registers { nullptr };
void* m_current_page_table { nullptr };
friend class BAN::Array<Processor, 0xFF>;

View File

@ -16,8 +16,10 @@ namespace Kernel
[[noreturn]] void start();
void yield();
void timer_reschedule();
void reschedule();
void irq_reschedule();
void reschedule_if_idling();
void set_current_thread_sleeping(uint64_t wake_time);
@ -30,9 +32,6 @@ namespace Kernel
Thread& current_thread();
static pid_t current_tid();
[[noreturn]] void execute_current_thread();
[[noreturn]] void delete_current_process_and_thread();
// This is no return if called on current thread
void terminate_thread(Thread*);
@ -41,11 +40,7 @@ namespace Kernel
void set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time);
[[nodiscard]] bool save_current_thread();
void advance_current_thread();
[[noreturn]] void execute_current_thread_locked();
[[noreturn]] void execute_current_thread_stack_loaded();
void setup_next_thread();
BAN::ErrorOr<void> add_thread(Thread*);

View File

@ -26,6 +26,7 @@ namespace Kernel
Thread* thread;
uint64_t wake_time { 0 };
Semaphore* semaphore { nullptr };
bool should_block { false };
private:
Node* next { nullptr };

View File

@ -4,6 +4,7 @@
#include <BAN/RefPtr.h>
#include <BAN/UniqPtr.h>
#include <kernel/Memory/VirtualRange.h>
#include <kernel/InterruptStack.h>
#include <signal.h>
#include <sys/types.h>
@ -25,7 +26,7 @@ namespace Kernel
{
NotStarted,
Executing,
Terminated
Terminated,
};
public:
@ -52,28 +53,17 @@ namespace Kernel
BAN::ErrorOr<void> block_or_eintr_or_timeout(Semaphore& semaphore, uint64_t timeout_ms, bool etimedout);
BAN::ErrorOr<void> block_or_eintr_or_waketime(Semaphore& semaphore, uint64_t wake_time_ms, bool etimedout);
void set_return_sp(uintptr_t& sp) { m_return_sp = &sp; }
void set_return_ip(uintptr_t& ip) { m_return_ip = &ip; }
uintptr_t return_sp() { ASSERT(m_return_sp); return *m_return_sp; }
uintptr_t return_ip() { ASSERT(m_return_ip); return *m_return_ip; }
pid_t tid() const { return m_tid; }
void set_sp(uintptr_t sp) { m_sp = sp; validate_stack(); }
void set_ip(uintptr_t ip) { m_ip = ip; }
uintptr_t sp() const { return m_sp; }
uintptr_t ip() const { return m_ip; }
void set_started() { ASSERT(m_state == State::NotStarted); m_state = State::Executing; }
State state() const { return m_state; }
vaddr_t stack_base() const { return m_stack->vaddr(); }
size_t stack_size() const { return m_stack->size(); }
VirtualRange& stack() { return *m_stack; }
VirtualRange& interrupt_stack() { return *m_interrupt_stack; }
vaddr_t kernel_stack_bottom() const { return m_kernel_stack->vaddr(); }
vaddr_t kernel_stack_top() const { return m_kernel_stack->vaddr() + m_kernel_stack->size(); }
VirtualRange& kernel_stack() { return *m_kernel_stack; }
vaddr_t interrupt_stack_base() const { return m_interrupt_stack ? m_interrupt_stack->vaddr() : 0; }
size_t interrupt_stack_size() const { return m_interrupt_stack ? m_interrupt_stack->size() : 0; }
vaddr_t userspace_stack_bottom() const { return is_userspace() ? m_userspace_stack->vaddr() : 0; }
vaddr_t userspace_stack_top() const { return is_userspace() ? m_userspace_stack->vaddr() + m_userspace_stack->size() : 0; }
VirtualRange& userspace_stack() { ASSERT(is_userspace()); return *m_userspace_stack; }
static Thread& current();
static pid_t current_tid();
@ -84,9 +74,13 @@ namespace Kernel
bool is_userspace() const { return m_is_userspace; }
size_t virtual_page_count() const { return m_stack->size() / PAGE_SIZE; }
size_t virtual_page_count() const { return (m_kernel_stack->size() / PAGE_SIZE) + (m_userspace_stack->size() / PAGE_SIZE); }
size_t physical_page_count() const { return virtual_page_count(); }
uintptr_t& interrupt_sp() { return m_interrupt_sp; }
InterruptStack& interrupt_stack() { return m_interrupt_stack; }
InterruptRegisters& interrupt_registers() { return m_interrupt_registers; }
#if __enable_sse
void save_sse();
void load_sse();
@ -97,23 +91,20 @@ namespace Kernel
Thread(pid_t tid, Process*);
void on_exit();
void validate_stack() const;
private:
static constexpr size_t m_kernel_stack_size = PAGE_SIZE * 4;
static constexpr size_t m_userspace_stack_size = PAGE_SIZE * 2;
static constexpr size_t m_interrupt_stack_size = PAGE_SIZE * 2;
BAN::UniqPtr<VirtualRange> m_interrupt_stack;
BAN::UniqPtr<VirtualRange> m_stack;
uintptr_t m_ip { 0 };
uintptr_t m_sp { 0 };
static constexpr size_t m_userspace_stack_size = PAGE_SIZE * 4;
BAN::UniqPtr<VirtualRange> m_kernel_stack;
BAN::UniqPtr<VirtualRange> m_userspace_stack;
const pid_t m_tid { 0 };
State m_state { State::NotStarted };
Process* m_process { nullptr };
bool m_is_userspace { false };
bool m_delete_process { false };
uintptr_t* m_return_sp { nullptr };
uintptr_t* m_return_ip { nullptr };
InterruptStack m_interrupt_stack { };
InterruptRegisters m_interrupt_registers { };
uintptr_t m_interrupt_sp { };
uint64_t m_signal_pending_mask { 0 };
uint64_t m_signal_block_mask { 0 };
@ -124,6 +115,7 @@ namespace Kernel
alignas(16) uint8_t m_sse_storage[512] {};
#endif
friend class Process;
friend class Scheduler;
};

69
kernel/kernel/GDT.cpp Normal file
View File

@ -0,0 +1,69 @@
#include <kernel/GDT.h>
#include <kernel/Processor.h>
#include <string.h>
namespace Kernel
{
GDT* GDT::create([[maybe_unused]] void* processor)
{
auto* gdt = new GDT();
ASSERT(gdt);
#if ARCH(x86_64)
constexpr uint8_t code_flags = 0xA;
constexpr uint8_t data_flags = 0xC;
#elif ARCH(i686)
constexpr uint8_t code_flags = 0xC;
constexpr uint8_t data_flags = 0xC;
#endif
gdt->write_entry(0x00, 0x00000000, 0x00000, 0x00, 0x0); // null
gdt->write_entry(0x08, 0x00000000, 0xFFFFF, 0x9A, code_flags); // kernel code
gdt->write_entry(0x10, 0x00000000, 0xFFFFF, 0x92, data_flags); // kernel data
gdt->write_entry(0x18, 0x00000000, 0xFFFFF, 0xFA, code_flags); // user code
gdt->write_entry(0x20, 0x00000000, 0xFFFFF, 0xF2, data_flags); // user data
#if ARCH(i686)
gdt->write_entry(0x28, reinterpret_cast<uint32_t>(processor), sizeof(Processor), 0x92, 0x4); // processor data
#endif
gdt->write_tss();
return gdt;
}
void GDT::write_entry(uint8_t offset, uint32_t base, uint32_t limit, uint8_t access, uint8_t flags)
{
ASSERT(offset % sizeof(SegmentDescriptor) == 0);
uint8_t idx = offset / sizeof(SegmentDescriptor);
auto& desc = m_gdt[idx];
desc.base1 = (base >> 0) & 0xFFFF;
desc.base2 = (base >> 16) & 0xFF;
desc.base3 = (base >> 24) & 0xFF;
desc.limit1 = (limit >> 0) & 0xFFFF;
desc.limit2 = (limit >> 16) & 0x0F;
desc.access = access & 0xFF;
desc.flags = flags & 0x0F;
}
void GDT::write_tss()
{
memset(&m_tss, 0x00, sizeof(TaskStateSegment));
m_tss.iopb = sizeof(TaskStateSegment);
uintptr_t base = reinterpret_cast<uintptr_t>(&m_tss);
write_entry(m_tss_offset, (uint32_t)base, sizeof(TaskStateSegment), 0x89, 0x0);
#if ARCH(x86_64)
auto& desc = m_gdt[(m_tss_offset + 8) / sizeof(SegmentDescriptor)];
desc.low = base >> 32;
desc.high = 0;
#endif
}
}

View File

@ -10,16 +10,14 @@
#include <kernel/Timer/PIT.h>
#define ISR_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31)
#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31) X(32)
#define IRQ_LIST_X X(0) X(1) X(2) X(3) X(4) X(5) X(6) X(7) X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17) X(18) X(19) X(20) X(21) X(22) X(23) X(24) X(25) X(26) X(27) X(28) X(29) X(30) X(31)
namespace Kernel
{
#if ARCH(x86_64)
struct Registers
{
uint64_t rsp;
uint64_t rip;
uint64_t rflags;
uint64_t cr4;
uint64_t cr3;
uint64_t cr2;
@ -33,14 +31,33 @@ namespace Kernel
uint64_t r10;
uint64_t r9;
uint64_t r8;
uint64_t rsi;
uint64_t rdi;
uint64_t rsi;
uint64_t rbp;
uint64_t rbx;
uint64_t rdx;
uint64_t rcx;
uint64_t rbx;
uint64_t rax;
};
#elif ARCH(i686)
struct Registers
{
uint32_t cr4;
uint32_t cr3;
uint32_t cr2;
uint32_t cr0;
uint32_t edi;
uint32_t esi;
uint32_t ebp;
uint32_t unused;
uint32_t ebx;
uint32_t edx;
uint32_t ecx;
uint32_t eax;
};
#endif
#define X(num) 1 +
static BAN::Array<Interruptable*, IRQ_LIST_X 0> s_interruptables;
@ -141,46 +158,37 @@ namespace Kernel
"Unkown Exception 0x1F",
};
extern "C" void cpp_isr_handler(uint64_t isr, uint64_t error, InterruptStack& interrupt_stack, const Registers* regs)
extern "C" void cpp_isr_handler(uint32_t isr, uint32_t error, InterruptStack* interrupt_stack, const Registers* regs)
{
if (g_paniced)
{
dprintln("Processor {} halted", Processor::current_id());
InterruptController::get().broadcast_ipi();
if (InterruptController::is_initialized())
InterruptController::get().broadcast_ipi();
asm volatile("cli; 1: hlt; jmp 1b");
}
#if __enable_sse
bool from_userspace = (interrupt_stack.cs & 0b11) == 0b11;
if (from_userspace)
Thread::current().save_sse();
#endif
pid_t tid = Scheduler::current_tid();
pid_t pid = tid ? Process::current().pid() : 0;
if (tid)
{
Thread::current().set_return_sp(interrupt_stack.sp);
Thread::current().set_return_ip(interrupt_stack.ip);
if (isr == ISR::PageFault)
{
// Check if stack is OOB
auto& stack = Thread::current().stack();
auto& istack = Thread::current().interrupt_stack();
if (stack.vaddr() < interrupt_stack.sp && interrupt_stack.sp <= stack.vaddr() + stack.size())
; // using normal stack
else if (istack.vaddr() < interrupt_stack.sp && interrupt_stack.sp <= istack.vaddr() + istack.size())
; // using interrupt stack
auto& thread = Thread::current();
if (thread.userspace_stack_bottom() < interrupt_stack->sp && interrupt_stack->sp <= thread.userspace_stack_top())
; // using userspace stack
else if (thread.kernel_stack_bottom() < interrupt_stack->sp && interrupt_stack->sp <= thread.kernel_stack_top())
; // using kernel stack
else
{
derrorln("Stack pointer out of bounds!");
derrorln("rip {H}", interrupt_stack.ip);
derrorln("rsp {H}, stack {H}->{H}, istack {H}->{H}",
interrupt_stack.sp,
stack.vaddr(), stack.vaddr() + stack.size(),
istack.vaddr(), istack.vaddr() + istack.size()
derrorln("rip {H}", interrupt_stack->ip);
derrorln("rsp {H}, userspace stack {H}->{H}, kernel stack {H}->{H}",
interrupt_stack->sp,
thread.userspace_stack_bottom(), thread.userspace_stack_top(),
thread.kernel_stack_bottom(), thread.kernel_stack_top()
);
Thread::current().handle_signal(SIGKILL);
goto done;
@ -191,9 +199,9 @@ namespace Kernel
page_fault_error.raw = error;
if (!page_fault_error.present)
{
asm volatile("sti");
Processor::set_interrupt_state(InterruptState::Enabled);
auto result = Process::current().allocate_page_for_demand_paging(regs->cr2);
asm volatile("cli");
Processor::set_interrupt_state(InterruptState::Disabled);
if (!result.is_error() && result.value())
goto done;
@ -209,11 +217,19 @@ namespace Kernel
#if __enable_sse
else if (isr == ISR::DeviceNotAvailable)
{
#if ARCH(x86_64)
asm volatile(
"movq %cr0, %rax;"
"andq $~(1 << 3), %rax;"
"movq %rax, %cr0;"
);
#elif ARCH(i686)
asm volatile(
"movl %cr0, %eax;"
"andl $~(1 << 3), %eax;"
"movl %eax, %cr0;"
);
#endif
if (auto* current = &Thread::current(); current != Thread::sse_thread())
{
if (auto* sse = Thread::sse_thread())
@ -225,9 +241,9 @@ namespace Kernel
#endif
}
if (PageTable::current().get_page_flags(interrupt_stack.ip & PAGE_ADDR_MASK) & PageTable::Flags::Present)
if (PageTable::current().get_page_flags(interrupt_stack->ip & PAGE_ADDR_MASK) & PageTable::Flags::Present)
{
auto* machine_code = (const uint8_t*)interrupt_stack.ip;
auto* machine_code = (const uint8_t*)interrupt_stack->ip;
dwarnln("While executing: {2H}{2H}{2H}{2H}{2H}{2H}{2H}{2H}",
machine_code[0],
machine_code[1],
@ -240,8 +256,9 @@ namespace Kernel
);
}
#if ARCH(x86_64)
dwarnln(
"{} (error code: 0x{16H}), pid {}, tid {}\r\n"
"{} (error code: 0x{8H}), pid {}, tid {}\r\n"
"Register dump\r\n"
"rax=0x{16H}, rbx=0x{16H}, rcx=0x{16H}, rdx=0x{16H}\r\n"
"rsp=0x{16H}, rbp=0x{16H}, rdi=0x{16H}, rsi=0x{16H}\r\n"
@ -249,10 +266,25 @@ namespace Kernel
"cr0=0x{16H}, cr2=0x{16H}, cr3=0x{16H}, cr4=0x{16H}",
isr_exceptions[isr], error, pid, tid,
regs->rax, regs->rbx, regs->rcx, regs->rdx,
regs->rsp, regs->rbp, regs->rdi, regs->rsi,
regs->rip, regs->rflags,
interrupt_stack->sp, regs->rbp, regs->rdi, regs->rsi,
interrupt_stack->ip, interrupt_stack->flags,
regs->cr0, regs->cr2, regs->cr3, regs->cr4
);
#elif ARCH(i686)
dwarnln(
"{} (error code: 0x{8H}), pid {}, tid {}\r\n"
"Register dump\r\n"
"eax=0x{8H}, ebx=0x{8H}, ecx=0x{8H}, edx=0x{8H}\r\n"
"esp=0x{8H}, ebp=0x{8H}, edi=0x{8H}, esi=0x{8H}\r\n"
"eip=0x{8H}, eflags=0x{8H}\r\n"
"cr0=0x{8H}, cr2=0x{8H}, cr3=0x{8H}, cr4=0x{8H}",
isr_exceptions[isr], error, pid, tid,
regs->eax, regs->ebx, regs->ecx, regs->edx,
interrupt_stack->sp, regs->ebp, regs->edi, regs->esi,
interrupt_stack->ip, interrupt_stack->flags,
regs->cr0, regs->cr2, regs->cr3, regs->cr4
);
#endif
if (isr == ISR::PageFault)
PageTable::current().debug_dump();
Debug::dump_stack_trace();
@ -297,29 +329,31 @@ done:
return;
}
extern "C" void cpp_irq_handler(uint64_t irq, InterruptStack& interrupt_stack)
extern "C" void cpp_reschedule_handler(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
{
Processor::enter_interrupt(interrupt_stack, interrupt_registers);
Scheduler::get().irq_reschedule();
Processor::leave_interrupt();
}
extern "C" void cpp_irq_handler(uint32_t irq)
{
if (g_paniced)
{
dprintln("Processor {} halted", Processor::current_id());
InterruptController::get().broadcast_ipi();
if (InterruptController::is_initialized())
InterruptController::get().broadcast_ipi();
asm volatile("cli; 1: hlt; jmp 1b");
}
if (Scheduler::current_tid())
{
Thread::current().set_return_sp(interrupt_stack.sp);
Thread::current().set_return_ip(interrupt_stack.ip);
}
ASSERT(irq != IRQ_IPI);
if (!InterruptController::get().is_in_service(irq))
dprintln("spurious irq 0x{2H}", irq);
else
{
InterruptController::get().eoi(irq);
if (irq == IRQ_IPI)
Scheduler::get().reschedule();
else if (auto* handler = s_interruptables[irq])
if (auto* handler = s_interruptables[irq])
handler->handle_irq();
else
dprintln("no handler for irq 0x{2H}", irq);
@ -332,14 +366,17 @@ done:
void IDT::register_interrupt_handler(uint8_t index, void (*handler)())
{
auto& descriptor = m_idt[index];
descriptor.offset1 = (uint16_t)((uint64_t)handler >> 0);
descriptor.offset2 = (uint16_t)((uint64_t)handler >> 16);
descriptor.offset3 = (uint32_t)((uint64_t)handler >> 32);
auto& desc = m_idt[index];
memset(&desc, 0, sizeof(GateDescriptor));
descriptor.selector = 0x08;
descriptor.IST = 0;
descriptor.flags = 0x8E;
desc.offset0 = (uint16_t)((uintptr_t)handler >> 0);
desc.offset1 = (uint16_t)((uintptr_t)handler >> 16);
#if ARCH(x86_64)
desc.offset2 = (uint32_t)((uintptr_t)handler >> 32);
#endif
desc.selector = 0x08;
desc.flags = 0x8E;
}
void IDT::register_syscall_handler(uint8_t index, void (*handler)())
@ -363,6 +400,7 @@ done:
IRQ_LIST_X
#undef X
extern "C" void asm_reschedule_handler();
extern "C" void syscall_asm();
IDT* IDT::create()
@ -380,6 +418,8 @@ done:
IRQ_LIST_X
#undef X
idt->register_interrupt_handler(IRQ_VECTOR_BASE + IRQ_IPI, asm_reschedule_handler);
idt->register_syscall_handler(0x80, syscall_asm);
return idt;

View File

@ -40,6 +40,11 @@ namespace Kernel
s_instance->m_using_apic = false;
}
bool InterruptController::is_initialized()
{
return s_instance;
}
void InterruptController::enter_acpi_mode()
{
#if ARCH(x86_64)

View File

@ -79,7 +79,7 @@ namespace Kernel
if (it != m_arp_table.end())
return it->value;
}
Scheduler::get().reschedule();
Scheduler::get().yield();
}
return BAN::Error::from_errno(ETIMEDOUT);

View File

@ -164,7 +164,7 @@ namespace Kernel
}
while (!connection_info.connection_done)
Scheduler::get().reschedule();
Scheduler::get().yield();
return {};
}

View File

@ -127,9 +127,6 @@ namespace Kernel
}
process->m_loadable_elf->reserve_address_space();
process->m_is_userspace = true;
process->m_userspace_info.entry = process->m_loadable_elf->entry_point();
char** argv = nullptr;
{
size_t needed_bytes = sizeof(char*) * 2 + path.size() + 1;
@ -155,6 +152,8 @@ namespace Kernel
MUST(process->m_mapped_regions.push_back(BAN::move(argv_region)));
}
process->m_is_userspace = true;
process->m_userspace_info.entry = process->m_loadable_elf->entry_point();
process->m_userspace_info.argc = 1;
process->m_userspace_info.argv = argv;
process->m_userspace_info.envp = nullptr;
@ -207,7 +206,7 @@ namespace Kernel
m_exit_status.semaphore.unblock();
while (m_exit_status.waiting > 0)
Scheduler::get().reschedule();
Scheduler::get().yield();
m_process_lock.lock();
@ -220,7 +219,7 @@ namespace Kernel
bool Process::on_thread_exit(Thread& thread)
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
LockGuard _(m_process_lock);
ASSERT(m_threads.size() > 0);
@ -228,8 +227,6 @@ namespace Kernel
{
ASSERT(m_threads.front() == &thread);
m_threads.clear();
thread.setup_process_cleanup();
return true;
}
@ -248,11 +245,18 @@ namespace Kernel
void Process::exit(int status, int signal)
{
m_exit_status.exit_code = __WGENEXITCODE(status, signal);
for (auto* thread : m_threads)
if (thread != &Thread::current())
Scheduler::get().terminate_thread(thread);
if (this == &Process::current())
Scheduler::get().terminate_thread(&Thread::current());
while (!m_threads.empty())
m_threads.front()->on_exit();
//for (auto* thread : m_threads)
// if (thread != &Thread::current())
// Scheduler::get().terminate_thread(thread);
//if (this == &Process::current())
//{
// m_threads.clear();
// Processor::set_interrupt_state(InterruptState::Disabled);
// Thread::current().setup_process_cleanup();
// Scheduler::get().yield();
//}
}
size_t Process::proc_meminfo(off_t offset, BAN::ByteSpan buffer) const
@ -533,7 +537,7 @@ namespace Kernel
m_has_called_exec = true;
m_threads.front()->setup_exec();
Scheduler::get().execute_current_thread();
Scheduler::get().yield();
ASSERT_NOT_REACHED();
}
@ -676,9 +680,9 @@ namespace Kernel
LockGuard _(m_process_lock);
if (Thread::current().stack().contains(address))
if (Thread::current().userspace_stack().contains(address))
{
TRY(Thread::current().stack().allocate_page_for_demand_paging(address));
TRY(Thread::current().userspace_stack().allocate_page_for_demand_paging(address));
return true;
}
@ -1879,7 +1883,7 @@ namespace Kernel
if (vaddr == 0)
return {};
if (vaddr >= thread.stack_base() && vaddr + size <= thread.stack_base() + thread.stack_size())
if (vaddr >= thread.userspace_stack_bottom() && vaddr + size <= thread.userspace_stack_top())
return {};
// FIXME: should we allow cross mapping access?

View File

@ -38,7 +38,7 @@ namespace Kernel
processor.m_stack = kmalloc(s_stack_size, 4096, true);
ASSERT(processor.m_stack);
processor.m_gdt = GDT::create();
processor.m_gdt = GDT::create(&processor);
ASSERT(processor.m_gdt);
processor.m_idt = IDT::create();
@ -52,14 +52,19 @@ namespace Kernel
auto id = read_processor_id();
auto& processor = s_processors[id];
ASSERT(processor.m_gdt);
processor.m_gdt->load();
// initialize GS
#if ARCH(x86_64)
// set gs base to pointer to this processor
uint64_t ptr = reinterpret_cast<uint64_t>(&processor);
uint32_t ptr_hi = ptr >> 32;
uint32_t ptr_lo = ptr & 0xFFFFFFFF;
asm volatile("wrmsr" :: "d"(ptr_hi), "a"(ptr_lo), "c"(MSR_IA32_GS_BASE));
ASSERT(processor.m_gdt);
processor.gdt().load();
#elif ARCH(i686)
asm volatile("movw $0x28, %%ax; movw %%ax, %%gs" ::: "ax");
#endif
ASSERT(processor.m_idt);
processor.idt().load();
@ -74,4 +79,34 @@ namespace Kernel
write_gs_ptr(offsetof(Processor, m_idle_thread), idle_thread);
}
void Processor::enter_interrupt(InterruptStack* interrupt_stack, InterruptRegisters* interrupt_registers)
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) == nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_stack), interrupt_stack);
write_gs_ptr(offsetof(Processor, m_interrupt_registers), interrupt_registers);
}
void Processor::leave_interrupt()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)) != nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_stack), nullptr);
write_gs_ptr(offsetof(Processor, m_interrupt_registers), nullptr);
}
InterruptStack& Processor::get_interrupt_stack()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_stack)));
return *read_gs_sized<InterruptStack*>(offsetof(Processor, m_interrupt_stack));
}
InterruptRegisters& Processor::get_interrupt_registers()
{
ASSERT(get_interrupt_state() == InterruptState::Disabled);
ASSERT(read_gs_ptr(offsetof(Processor, m_interrupt_registers)));
return *read_gs_sized<InterruptRegisters*>(offsetof(Processor, m_interrupt_registers));
}
}

View File

@ -11,9 +11,6 @@
namespace Kernel
{
extern "C" [[noreturn]] void start_thread(uintptr_t sp, uintptr_t ip);
extern "C" [[noreturn]] void continue_thread(uintptr_t sp, uintptr_t ip);
static Scheduler* s_instance = nullptr;
static BAN::Atomic<bool> s_started { false };
@ -46,10 +43,8 @@ namespace Kernel
void Scheduler::start()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
m_lock.lock();
s_started = true;
advance_current_thread();
execute_current_thread_locked();
ASSERT(!m_active_threads.empty());
yield();
ASSERT_NOT_REACHED();
}
@ -71,41 +66,125 @@ namespace Kernel
return Scheduler::get().current_thread().tid();
}
void Scheduler::setup_next_thread()
{
ASSERT(m_lock.current_processor_has_lock());
if (auto* current = Processor::get_current_thread())
{
auto* thread = current->thread;
if (thread->state() == Thread::State::Terminated)
{
PageTable::kernel().load();
delete thread;
delete current;
}
else
{
// thread->state() can be NotStarted when calling exec or cleaning up process
if (thread->state() != Thread::State::NotStarted)
{
thread->interrupt_stack() = Processor::get_interrupt_stack();
thread->interrupt_stack().sp = thread->interrupt_sp();
thread->interrupt_registers() = Processor::get_interrupt_registers();
}
if (current->should_block)
{
current->should_block = false;
m_blocking_threads.add_with_wake_time(current);
}
else
{
m_active_threads.push_back(current);
}
}
}
SchedulerQueue::Node* node = nullptr;
while (!m_active_threads.empty())
{
node = m_active_threads.pop_front();
if (node->thread->state() != Thread::State::Terminated)
break;
PageTable::kernel().load();
delete node->thread;
delete node;
node = nullptr;
}
Processor::set_current_thread(node);
auto* thread = node ? node->thread : Processor::idle_thread();
if (thread->has_process())
thread->process().page_table().load();
else
PageTable::kernel().load();
if (thread->state() == Thread::State::NotStarted)
thread->m_state = Thread::State::Executing;
ASSERT(thread->interrupt_stack().ip);
ASSERT(thread->interrupt_stack().sp);
Processor::gdt().set_tss_stack(thread->kernel_stack_top());
Processor::get_interrupt_stack() = thread->interrupt_stack();
Processor::get_interrupt_registers() = thread->interrupt_registers();
}
void Scheduler::timer_reschedule()
{
// Broadcast IPI to all other processors for them
// to perform reschedule
InterruptController::get().broadcast_ipi();
auto state = m_lock.lock();
m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot());
if (save_current_thread())
return Processor::set_interrupt_state(state);
advance_current_thread();
execute_current_thread_locked();
ASSERT_NOT_REACHED();
{
SpinLockGuard _(m_lock);
m_blocking_threads.remove_with_wake_time(m_active_threads, SystemTimer::get().ms_since_boot());
}
yield();
}
void Scheduler::reschedule()
void Scheduler::yield()
{
auto state = m_lock.lock();
if (save_current_thread())
return Processor::set_interrupt_state(state);
advance_current_thread();
execute_current_thread_locked();
ASSERT_NOT_REACHED();
auto state = Processor::get_interrupt_state();
Processor::set_interrupt_state(InterruptState::Disabled);
asm volatile(
"movq %%rsp, %[save_sp];"
"movq %[load_sp], %%rsp;"
"int %[ipi];"
: [save_sp]"=m"(Thread::current().interrupt_sp())
: [load_sp]"r"(Processor::current_stack_top()),
[ipi]"i"(IRQ_VECTOR_BASE + IRQ_IPI)
: "memory"
);
Processor::set_interrupt_state(state);
}
void Scheduler::irq_reschedule()
{
SpinLockGuard _(m_lock);
setup_next_thread();
}
void Scheduler::reschedule_if_idling()
{
auto state = m_lock.lock();
if (m_active_threads.empty() || Processor::get_current_thread())
return m_lock.unlock(state);
if (save_current_thread())
return Processor::set_interrupt_state(state);
advance_current_thread();
execute_current_thread_locked();
ASSERT_NOT_REACHED();
{
SpinLockGuard _(m_lock);
if (Processor::get_current_thread())
return;
if (m_active_threads.empty())
return;
}
yield();
}
BAN::ErrorOr<void> Scheduler::add_thread(Thread* thread)
@ -120,190 +199,49 @@ namespace Kernel
void Scheduler::terminate_thread(Thread* thread)
{
SpinLockGuard _(m_lock);
auto state = m_lock.lock();
ASSERT(thread->state() == Thread::State::Executing);
thread->m_state = Thread::State::Terminated;
if (thread == &current_thread())
execute_current_thread_locked();
}
thread->interrupt_stack().sp = Processor::current_stack_top();
void Scheduler::advance_current_thread()
{
ASSERT(m_lock.current_processor_has_lock());
m_lock.unlock(InterruptState::Disabled);
if (auto* current = Processor::get_current_thread())
m_active_threads.push_back(current);
Processor::set_current_thread(nullptr);
// actual deletion will be done while rescheduling
if (!m_active_threads.empty())
Processor::set_current_thread(m_active_threads.pop_front());
}
// NOTE: this is declared always inline, so we don't corrupt the stack
// after getting the rsp
ALWAYS_INLINE bool Scheduler::save_current_thread()
{
ASSERT(m_lock.current_processor_has_lock());
uintptr_t sp, ip;
push_callee_saved();
if (!(ip = read_ip()))
if (&current_thread() == thread)
{
pop_callee_saved();
return true;
}
read_rsp(sp);
Thread& current = current_thread();
current.set_ip(ip);
current.set_sp(sp);
load_temp_stack();
return false;
}
void Scheduler::delete_current_process_and_thread()
{
m_lock.lock();
load_temp_stack();
PageTable::kernel().load();
auto* current = Processor::get_current_thread();
ASSERT(current);
delete &current->thread->process();
delete current->thread;
delete current;
Processor::set_current_thread(nullptr);
advance_current_thread();
execute_current_thread_locked();
ASSERT_NOT_REACHED();
}
void Scheduler::execute_current_thread()
{
m_lock.lock();
load_temp_stack();
PageTable::kernel().load();
execute_current_thread_stack_loaded();
ASSERT_NOT_REACHED();
}
void Scheduler::execute_current_thread_locked()
{
ASSERT(m_lock.current_processor_has_lock());
load_temp_stack();
PageTable::kernel().load();
execute_current_thread_stack_loaded();
ASSERT_NOT_REACHED();
}
NEVER_INLINE void Scheduler::execute_current_thread_stack_loaded()
{
ASSERT(m_lock.current_processor_has_lock());
#if SCHEDULER_VERIFY_STACK
vaddr_t rsp;
read_rsp(rsp);
ASSERT(Processor::current_stack_bottom() <= rsp && rsp <= Processor::current_stack_top());
ASSERT(&PageTable::current() == &PageTable::kernel());
#endif
Thread* current = &current_thread();
#if __enable_sse
if (current != Thread::sse_thread())
{
#if ARCH(x86_64)
asm volatile(
"movq %cr0, %rax;"
"orq $(1 << 3), %rax;"
"movq %rax, %cr0"
);
#elif ARCH(i686)
asm volatile(
"movl %cr0, %eax;"
"orl $(1 << 3), %eax;"
"movl %eax, %cr0"
);
#else
#error
#endif
}
#endif
while (current->state() == Thread::State::Terminated)
{
auto* node = Processor::get_current_thread();
if (node->thread->has_process())
if (node->thread->process().on_thread_exit(*node->thread))
break;
delete node->thread;
delete node;
Processor::set_current_thread(nullptr);
advance_current_thread();
current = &current_thread();
yield();
ASSERT_NOT_REACHED();
}
if (current->has_process())
{
current->process().page_table().load();
Processor::gdt().set_tss_stack(current->interrupt_stack_base() + current->interrupt_stack_size());
}
else
PageTable::kernel().load();
switch (current->state())
{
case Thread::State::NotStarted:
current->set_started();
m_lock.unlock(InterruptState::Disabled);
start_thread(current->sp(), current->ip());
case Thread::State::Executing:
m_lock.unlock(InterruptState::Disabled);
while (current->can_add_signal_to_execute())
current->handle_signal();
continue_thread(current->sp(), current->ip());
case Thread::State::Terminated:
ASSERT_NOT_REACHED();
}
ASSERT_NOT_REACHED();
Processor::set_interrupt_state(state);
}
void Scheduler::set_current_thread_sleeping_impl(Semaphore* semaphore, uint64_t wake_time)
{
ASSERT(m_lock.current_processor_has_lock());
if (save_current_thread())
return;
auto state = m_lock.lock();
auto* current = Processor::get_current_thread();
current->semaphore = semaphore;
current->wake_time = wake_time;
m_blocking_threads.add_with_wake_time(current);
Processor::set_current_thread(nullptr);
current->should_block = true;
advance_current_thread();
execute_current_thread_locked();
ASSERT_NOT_REACHED();
m_lock.unlock(InterruptState::Disabled);
yield();
Processor::set_interrupt_state(state);
}
void Scheduler::set_current_thread_sleeping(uint64_t wake_time)
{
auto state = m_lock.lock();
set_current_thread_sleeping_impl(nullptr, wake_time);
Processor::set_interrupt_state(state);
}
void Scheduler::block_current_thread(Semaphore* semaphore, uint64_t wake_time)
{
auto state = m_lock.lock();
set_current_thread_sleeping_impl(semaphore, wake_time);
Processor::set_interrupt_state(state);
}
void Scheduler::unblock_threads(Semaphore* semaphore)

View File

@ -168,7 +168,7 @@ namespace Kernel
// This doesn't allow scheduler to go properly idle.
while (SystemTimer::get().ms_since_boot() < start_time + s_ata_timeout)
{
Scheduler::get().reschedule();
Scheduler::get().yield();
if (!(m_port->ci & (1 << command_slot)))
return {};
}

View File

@ -32,9 +32,6 @@ namespace Kernel
{
ASSERT((interrupt_stack.cs & 0b11) == 0b11);
Thread::current().set_return_sp(interrupt_stack.sp);
Thread::current().set_return_ip(interrupt_stack.ip);
asm volatile("sti");
BAN::ErrorOr<long> ret = BAN::Error::from_errno(ENOSYS);

View File

@ -12,8 +12,8 @@
namespace Kernel
{
extern "C" void thread_userspace_trampoline(uint64_t sp, uint64_t ip, int argc, char** argv, char** envp);
extern "C" uintptr_t read_ip();
extern "C" [[noreturn]] void start_userspace_thread();
extern "C" [[noreturn]] void start_kernel_thread();
extern "C" void signal_trampoline();
@ -38,7 +38,7 @@ namespace Kernel
BAN::ScopeGuard thread_deleter([thread] { delete thread; });
// Initialize stack and registers
thread->m_stack = TRY(VirtualRange::create_to_vaddr_range(
thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range(
PageTable::kernel(),
KERNEL_OFFSET,
~(uintptr_t)0,
@ -46,14 +46,21 @@ namespace Kernel
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
));
thread->m_sp = thread->stack_base() + thread->stack_size();
thread->m_ip = (uintptr_t)entry;
// Initialize stack for returning
write_to_stack(thread->m_sp, nullptr); // alignment
write_to_stack(thread->m_sp, thread);
write_to_stack(thread->m_sp, &Thread::on_exit);
write_to_stack(thread->m_sp, data);
uintptr_t sp = thread->kernel_stack_top();
write_to_stack(sp, thread);
write_to_stack(sp, &Thread::on_exit);
write_to_stack(sp, data);
write_to_stack(sp, entry);
thread->m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
thread->m_interrupt_stack.cs = 0x08;
thread->m_interrupt_stack.flags = 0x202;
thread->m_interrupt_stack.sp = sp;
thread->m_interrupt_stack.ss = 0x10;
memset(&thread->m_interrupt_registers, 0, sizeof(InterruptRegisters));
thread_deleter.disable();
@ -72,7 +79,15 @@ namespace Kernel
thread->m_is_userspace = true;
thread->m_stack = TRY(VirtualRange::create_to_vaddr_range(
thread->m_kernel_stack = TRY(VirtualRange::create_to_vaddr_range(
process->page_table(),
0x300000, KERNEL_OFFSET,
m_kernel_stack_size,
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
));
thread->m_userspace_stack = TRY(VirtualRange::create_to_vaddr_range(
process->page_table(),
0x300000, KERNEL_OFFSET,
m_userspace_stack_size,
@ -80,14 +95,6 @@ namespace Kernel
true
));
thread->m_interrupt_stack = TRY(VirtualRange::create_to_vaddr_range(
process->page_table(),
0x300000, KERNEL_OFFSET,
m_interrupt_stack_size,
PageTable::Flags::ReadWrite | PageTable::Flags::Present,
true
));
thread->setup_exec();
thread_deleter.disable();
@ -148,6 +155,11 @@ namespace Kernel
Thread::~Thread()
{
if (m_delete_process)
{
ASSERT(m_process);
delete m_process;
}
}
BAN::ErrorOr<Thread*> Thread::clone(Process* new_process, uintptr_t sp, uintptr_t ip)
@ -162,13 +174,16 @@ namespace Kernel
thread->m_is_userspace = true;
thread->m_interrupt_stack = TRY(m_interrupt_stack->clone(new_process->page_table()));
thread->m_stack = TRY(m_stack->clone(new_process->page_table()));
thread->m_kernel_stack = TRY(m_kernel_stack->clone(new_process->page_table()));
thread->m_userspace_stack = TRY(m_userspace_stack->clone(new_process->page_table()));
thread->m_state = State::Executing;
thread->m_state = State::NotStarted;
thread->m_ip = ip;
thread->m_sp = sp;
thread->m_interrupt_stack.ip = ip;
thread->m_interrupt_stack.cs = 0x08;
thread->m_interrupt_stack.flags = 0x002;
thread->m_interrupt_stack.sp = sp;
thread->m_interrupt_stack.ss = 0x10;
thread_deleter.disable();
@ -179,58 +194,69 @@ namespace Kernel
{
ASSERT(is_userspace());
m_state = State::NotStarted;
static entry_t entry_trampoline(
[](void*)
{
const auto& info = Process::current().userspace_info();
thread_userspace_trampoline(Thread::current().sp(), info.entry, info.argc, info.argv, info.envp);
ASSERT_NOT_REACHED();
}
);
m_sp = stack_base() + stack_size();
m_ip = (uintptr_t)entry_trampoline;
// Signal mask is inherited
// Setup stack for returning
ASSERT(m_sp % PAGE_SIZE == 0);
PageTable::with_fast_page(process().page_table().physical_address_of(m_sp - PAGE_SIZE), [&] {
auto& userspace_info = process().userspace_info();
ASSERT(userspace_info.entry);
// Initialize stack for returning
PageTable::with_fast_page(process().page_table().physical_address_of(userspace_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, nullptr); // alignment
write_to_stack(sp, this);
write_to_stack(sp, &Thread::on_exit);
write_to_stack(sp, nullptr);
m_sp -= 4 * sizeof(uintptr_t);
write_to_stack(sp, userspace_info.argc);
write_to_stack(sp, userspace_info.argv);
write_to_stack(sp, userspace_info.envp);
});
m_interrupt_stack.ip = userspace_info.entry;
m_interrupt_stack.cs = 0x18 | 3;
m_interrupt_stack.flags = 0x202;
m_interrupt_stack.sp = userspace_stack_top() - 4 * sizeof(uintptr_t);
m_interrupt_stack.ss = 0x20 | 3;
memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters));
}
void Thread::setup_process_cleanup()
{
ASSERT(Processor::get_interrupt_state() == InterruptState::Disabled);
m_state = State::NotStarted;
static entry_t entry(
[](void* process_ptr)
{
auto& process = *reinterpret_cast<Process*>(process_ptr);
process.cleanup_function();
Scheduler::get().delete_current_process_and_thread();
ASSERT_NOT_REACHED();
auto* thread = &Thread::current();
auto* process = static_cast<Process*>(process_ptr);
ASSERT(thread->m_process == process);
process->cleanup_function();
thread->m_delete_process = true;
// will call on thread exit after return
}
);
m_sp = stack_base() + stack_size();
m_ip = (uintptr_t)entry;
m_signal_pending_mask = 0;
m_signal_block_mask = ~0ull;
ASSERT(m_sp % PAGE_SIZE == 0);
PageTable::with_fast_page(process().page_table().physical_address_of(m_sp - PAGE_SIZE), [&] {
PageTable::with_fast_page(process().page_table().physical_address_of(kernel_stack_top() - PAGE_SIZE), [&] {
uintptr_t sp = PageTable::fast_page() + PAGE_SIZE;
write_to_stack(sp, nullptr); // alignment
write_to_stack(sp, this);
write_to_stack(sp, &Thread::on_exit);
write_to_stack(sp, m_process);
m_sp -= 4 * sizeof(uintptr_t);
write_to_stack(sp, entry);
});
m_interrupt_stack.ip = reinterpret_cast<vaddr_t>(start_kernel_thread);
m_interrupt_stack.cs = 0x08;
m_interrupt_stack.flags = 0x202;
m_interrupt_stack.sp = kernel_stack_top() - 4 * sizeof(uintptr_t);
m_interrupt_stack.ss = 0x10;
memset(&m_interrupt_registers, 0, sizeof(InterruptRegisters));
}
bool Thread::is_interrupted_by_signal()
@ -244,7 +270,7 @@ namespace Kernel
{
if (!is_userspace() || m_state != State::Executing)
return false;
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(interrupt_stack_base() + interrupt_stack_size() - sizeof(InterruptStack));
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(kernel_stack_top() - sizeof(InterruptStack));
if (!GDT::is_user_segment(interrupt_stack.cs))
return false;
uint64_t full_pending_mask = m_signal_pending_mask | process().signal_pending_mask();;
@ -255,7 +281,7 @@ namespace Kernel
{
if (!is_userspace() || m_state != State::Executing)
return false;
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(interrupt_stack_base() + interrupt_stack_size() - sizeof(InterruptStack));
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(kernel_stack_top() - sizeof(InterruptStack));
return interrupt_stack.ip == (uintptr_t)signal_trampoline;
}
@ -266,7 +292,7 @@ namespace Kernel
SpinLockGuard _(m_signal_lock);
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(interrupt_stack_base() + interrupt_stack_size() - sizeof(InterruptStack));
auto& interrupt_stack = *reinterpret_cast<InterruptStack*>(kernel_stack_top() - sizeof(InterruptStack));
ASSERT(GDT::is_user_segment(interrupt_stack.cs));
if (signal == 0)
@ -396,22 +422,24 @@ namespace Kernel
return {};
}
void Thread::validate_stack() const
{
if (stack_base() <= m_sp && m_sp <= stack_base() + stack_size())
return;
if (interrupt_stack_base() <= m_sp && m_sp <= interrupt_stack_base() + interrupt_stack_size())
return;
Kernel::panic("sp {8H}, stack {8H}->{8H}, interrupt_stack {8H}->{8H}", m_sp,
stack_base(), stack_base() + stack_size(),
interrupt_stack_base(), interrupt_stack_base() + interrupt_stack_size()
);
}
void Thread::on_exit()
{
ASSERT(this == &Thread::current());
Scheduler::get().terminate_thread(this);
if (!m_delete_process && has_process())
{
if (process().on_thread_exit(*this))
{
Processor::set_interrupt_state(InterruptState::Disabled);
setup_process_cleanup();
Scheduler::get().yield();
}
else
Scheduler::get().terminate_thread(this);
}
else
{
Scheduler::get().terminate_thread(this);
}
ASSERT_NOT_REACHED();
}

View File

@ -2,34 +2,29 @@
.global _start
_start:
# Set up end of the stack frame linked list.
movq $0, %rbp
pushq %rbp # rip=0
pushq %rbp # rbp=0
movq %rsp, %rbp
# STACK LAYOUT
# null
# argc
# argv
# envp
# Save argc, argv, environ
pushq %rdx
pushq %rsi
pushq %rdi
xorq %rbp, %rbp
# Prepare malloc, environment
movq %rdx, %rdi
# init libc
movq 0(%rsp), %rdi
call _init_libc
# Call global constructos
# call global constructors
call _init
# Restore argc, argv, environ
popq %rdi
popq %rsi
popq %rdx
# Run main
# call main
movq 16(%rsp), %rdi
movq 8(%rsp), %rsi
movq 0(%rsp), %rdx
call main
# Cleanly exit the process
movl %eax, %edi
# call exit
movq %rax, %rdi
call exit
.size _start, . - _start

View File

@ -19,7 +19,12 @@ else
DISK_ARGS="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0"
fi
qemu-system-$BANAN_ARCH \
QEMU_ARCH=$BANAN_ARCH
if [ $BANAN_ARCH = "i686" ]; then
QEMU_ARCH=i386
fi
qemu-system-$QEMU_ARCH \
-m 1G \
-smp 4 \
$BIOS_ARGS \

View File

@ -260,7 +260,7 @@ i64 puzzle2(FILE* fp)
clock_gettime(CLOCK_MONOTONIC, &time_stop);
u64 duration_us = (time_stop.tv_sec * 1'000'000 + time_stop.tv_nsec / 1'000) - (time_start.tv_sec * 1'000'000 + time_start.tv_nsec / 1'000);
printf("took %lu.%03lu ms, estimate %lu s\n", duration_us / 1000, duration_us % 1000, (values_sorted[0].size() - xi - 2) * duration_us / 1'000'000);
printf("step took %" PRIu64 ".%03" PRIu64 " ms, estimate %" PRIu64 " s\n", duration_us / 1000, duration_us % 1000, (values_sorted[0].size() - xi - 2) * duration_us / 1'000'000);
}
return result;

View File

@ -87,7 +87,7 @@ BAN::ErrorOr<BAN::UniqPtr<Image>> load_netbpm(const void* mmap_addr, size_t size
return BAN::Error::from_errno(EINVAL);
}
printf("Netbpm image %" PRIuPTR "x%" PRIuPTR "\n", *width, *height);
printf("Netbpm image %" PRIu64 "x%" PRIu64 "\n", *width, *height);
BAN::Vector<Image::Color> bitmap;
TRY(bitmap.resize(*width * *height));

View File

@ -152,7 +152,7 @@ int test2_job2()
return 1;
}
*(size_t*)addr = 0x123456789;
*(size_t*)addr = 0x12345678;
if (msync(addr, sizeof(size_t), MS_SYNC) == -1)
{