Kernel/LibC: Implement basic epoll

This implementation is on top of inodes instead of fds as linux does it.
If I start finding ports/software that relies on epoll allowing
duplicate inodes, I will do what linux does.

I'm probably missing multiple epoll_notify's which may cause hangs but
the system seems to work fine :dd:
This commit is contained in:
Bananymous 2025-05-13 10:10:35 +03:00
parent 143a00626b
commit 1bcd1edbf5
43 changed files with 627 additions and 119 deletions

View File

@ -17,6 +17,7 @@ set(KERNEL_SOURCES
kernel/Device/RandomDevice.cpp
kernel/Device/ZeroDevice.cpp
kernel/ELF.cpp
kernel/Epoll.cpp
kernel/Errors.cpp
kernel/FS/DevFS/FileSystem.cpp
kernel/FS/Ext2/FileSystem.cpp

View File

@ -3,7 +3,7 @@
namespace Kernel
{
class DebugDevice : public CharacterDevice
class DebugDevice final : public CharacterDevice
{
public:
static BAN::ErrorOr<BAN::RefPtr<DebugDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
const dev_t m_rdev;

View File

@ -38,6 +38,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
FramebufferDevice(mode_t mode, uid_t uid, gid_t gid, dev_t rdev, paddr_t paddr, uint32_t width, uint32_t height, uint32_t pitch, uint8_t bpp);

View File

@ -5,7 +5,7 @@
namespace Kernel
{
class NullDevice : public CharacterDevice
class NullDevice final : public CharacterDevice
{
public:
static BAN::ErrorOr<BAN::RefPtr<NullDevice>> create(mode_t, uid_t, gid_t);
@ -26,6 +26,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
const dev_t m_rdev;

View File

@ -3,7 +3,7 @@
namespace Kernel
{
class RandomDevice : public CharacterDevice
class RandomDevice final : public CharacterDevice
{
public:
static BAN::ErrorOr<BAN::RefPtr<RandomDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
const dev_t m_rdev;

View File

@ -3,7 +3,7 @@
namespace Kernel
{
class ZeroDevice : public CharacterDevice
class ZeroDevice final : public CharacterDevice
{
public:
static BAN::ErrorOr<BAN::RefPtr<ZeroDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
const dev_t m_rdev;

View File

@ -0,0 +1,68 @@
#pragma once
#include <BAN/CircularQueue.h>
#include <BAN/HashMap.h>
#include <BAN/HashSet.h>
#include <kernel/FS/Inode.h>
#include <sys/epoll.h>
namespace Kernel
{
class Epoll final : public Inode
{
public:
static BAN::ErrorOr<BAN::RefPtr<Epoll>> create();
~Epoll();
BAN::ErrorOr<void> ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event);
BAN::ErrorOr<size_t> wait(BAN::Span<epoll_event> events, uint64_t waketime_ns);
void notify(BAN::RefPtr<Inode> inode, uint32_t event);
private:
Epoll() = default;
public:
ino_t ino() const override { return 0; }
Mode mode() const override { return { Mode::IRUSR | Mode::IWUSR }; }
nlink_t nlink() const override { return 0; }
uid_t uid() const override { return 0; }
gid_t gid() const override { return 0; }
off_t size() const override { return 0; }
timespec atime() const override { return {}; }
timespec mtime() const override { return {}; }
timespec ctime() const override { return {}; }
blksize_t blksize() const override { return PAGE_SIZE; }
blkcnt_t blocks() const override { return 0; }
dev_t dev() const override { return 0; }
dev_t rdev() const override { return 0; }
bool is_epoll() const override { return true; }
const FileSystem* filesystem() const override { return nullptr; }
bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::ErrorOr<void> fsync_impl() override { return {}; }
private:
struct InodeRefPtrHash
{
BAN::hash_t operator()(const BAN::RefPtr<Inode>& inode)
{
return BAN::hash<const Inode*>()(inode.ptr());
}
};
private:
ThreadBlocker m_thread_blocker;
BAN::HashMap<BAN::RefPtr<Inode>, uint32_t, InodeRefPtrHash> m_ready_events;
BAN::HashMap<BAN::RefPtr<Inode>, epoll_event, InodeRefPtrHash> m_listening_events;
};
}

View File

@ -51,6 +51,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
// Returns maximum number of data blocks in use

View File

@ -49,6 +49,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
FATInode(FATFS& fs, const FAT::DirectoryEntry& entry, ino_t ino, uint32_t block_count)

View File

@ -1,6 +1,7 @@
#pragma once
#include <BAN/ByteSpan.h>
#include <BAN/LinkedList.h>
#include <BAN/RefPtr.h>
#include <BAN/String.h>
#include <BAN/StringView.h>
@ -19,9 +20,8 @@
namespace Kernel
{
class FileSystem;
class FileBackedRegion;
class FileSystem;
class SharedFileData;
class Inode : public BAN::RefCounted<Inode>
@ -85,6 +85,7 @@ namespace Kernel
virtual dev_t rdev() const = 0;
virtual bool is_device() const { return false; }
virtual bool is_epoll() const { return false; }
virtual bool is_pipe() const { return false; }
virtual bool is_tty() const { return false; }
@ -123,9 +124,14 @@ namespace Kernel
bool can_read() const;
bool can_write() const;
bool has_error() const;
bool has_hangup() const;
BAN::ErrorOr<long> ioctl(int request, void* arg);
BAN::ErrorOr<void> add_epoll(class Epoll*);
void del_epoll(class Epoll*);
void epoll_notify(uint32_t event);
protected:
// Directory API
virtual BAN::ErrorOr<BAN::RefPtr<Inode>> find_inode_impl(BAN::StringView) { return BAN::Error::from_errno(ENOTSUP); }
@ -160,6 +166,7 @@ namespace Kernel
virtual bool can_read_impl() const = 0;
virtual bool can_write_impl() const = 0;
virtual bool has_error_impl() const = 0;
virtual bool has_hangup_impl() const = 0;
virtual BAN::ErrorOr<long> ioctl_impl(int, void*) { return BAN::Error::from_errno(ENOTSUP); }
@ -168,6 +175,7 @@ namespace Kernel
private:
BAN::WeakPtr<SharedFileData> m_shared_region;
BAN::LinkedList<class Epoll*> m_epolls;
friend class FileBackedRegion;
friend class OpenFileDescriptorSet;
friend class SharedFileData;

View File

@ -40,6 +40,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return m_buffer_size > 0; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return m_writing_count == 0; }
private:
Pipe(const Credentials&);

View File

@ -47,6 +47,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
ProcROProcessInode(Process&, size_t (Process::*)(off_t, BAN::ByteSpan) const, TmpFileSystem&, const TmpInodeInfo&);
@ -72,6 +73,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
ProcROInode(size_t (*callback)(off_t, BAN::ByteSpan), TmpFileSystem&, const TmpInodeInfo&);

View File

@ -80,6 +80,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
TmpFileInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -102,6 +103,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
TmpSocketInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -123,6 +125,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
TmpSymlinkInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -153,6 +156,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
template<TmpFuncs::for_each_valid_entry_callback F>

View File

@ -31,7 +31,7 @@ namespace Kernel
bool can_read_impl() const override { SpinLockGuard _(m_event_lock); return m_event_count > 0; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private:
BAN::ErrorOr<size_t> read_non_block(BAN::ByteSpan);
@ -64,7 +64,7 @@ namespace Kernel
public:
static BAN::ErrorOr<BAN::RefPtr<KeyboardDevice>> create(mode_t mode, uid_t uid, gid_t gid);
void notify() { m_thread_blocker.unblock(); }
void notify();
private:
KeyboardDevice(mode_t mode, uid_t uid, gid_t gid);
@ -73,6 +73,7 @@ namespace Kernel
bool can_read_impl() const override;
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::StringView name() const final override { return m_name; }
dev_t rdev() const final override { return m_rdev; }
@ -90,7 +91,7 @@ namespace Kernel
public:
static BAN::ErrorOr<BAN::RefPtr<MouseDevice>> create(mode_t mode, uid_t uid, gid_t gid);
void notify() { m_thread_blocker.unblock(); }
void notify();
private:
MouseDevice(mode_t mode, uid_t uid, gid_t gid);
@ -99,6 +100,7 @@ namespace Kernel
bool can_read_impl() const override;
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::StringView name() const final override { return m_name; }
dev_t rdev() const final override { return m_rdev; }

View File

@ -28,7 +28,7 @@ namespace Kernel
virtual bool link_up() override { return m_link_up; }
virtual int link_speed() override;
virtual size_t payload_mtu() const { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); }
virtual size_t payload_mtu() const override { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); }
virtual void handle_irq() final override;
@ -50,6 +50,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
BAN::ErrorOr<void> read_mac_address();

View File

@ -30,6 +30,7 @@ namespace Kernel
bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private:
SpinLock m_buffer_lock;

View File

@ -36,6 +36,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
BAN::ErrorOr<void> reset();

View File

@ -67,6 +67,7 @@ namespace Kernel
virtual bool can_read_impl() const override;
virtual bool can_write_impl() const override;
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override;
private:
enum class State

View File

@ -40,6 +40,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return !m_packets.empty(); }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
UDPSocket(NetworkLayer&, const Socket::Info&);

View File

@ -1,5 +1,6 @@
#pragma once
#include <BAN/CircularQueue.h>
#include <BAN/Queue.h>
#include <BAN/WeakPtr.h>
#include <kernel/FS/Socket.h>
@ -28,6 +29,7 @@ namespace Kernel
virtual bool can_read_impl() const override;
virtual bool can_write_impl() const override;
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override;
private:
UnixDomainSocket(Socket::Type, const Socket::Info&);
@ -48,7 +50,7 @@ namespace Kernel
mutable BAN::Atomic<bool> target_closed { false };
BAN::WeakPtr<UnixDomainSocket> connection;
BAN::Queue<BAN::RefPtr<UnixDomainSocket>> pending_connections;
ThreadBlocker pending_thread_blocker;
ThreadBlocker pending_thread_blocker;
SpinLock pending_lock;
};
@ -67,7 +69,7 @@ namespace Kernel
size_t m_packet_size_total { 0 };
BAN::UniqPtr<VirtualRange> m_packet_buffer;
SpinLock m_packet_lock;
ThreadBlocker m_packet_thread_blocker;
ThreadBlocker m_packet_thread_blocker;
friend class BAN::RefPtr<UnixDomainSocket>;
};

View File

@ -24,6 +24,8 @@
#include <sys/time.h>
#include <termios.h>
struct epoll_event;
namespace Kernel
{
@ -130,6 +132,10 @@ namespace Kernel
BAN::ErrorOr<long> sys_pselect(sys_pselect_t* arguments);
BAN::ErrorOr<long> sys_epoll_create1(int flags);
BAN::ErrorOr<long> sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event* event);
BAN::ErrorOr<long> sys_epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask);
BAN::ErrorOr<long> sys_pipe(int fildes[2]);
BAN::ErrorOr<long> sys_dup2(int fildes, int fildes2);

View File

@ -27,6 +27,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
NVMeController(PCI::Device& pci_device);

View File

@ -17,7 +17,7 @@ namespace Kernel
virtual uint64_t total_size() const override { return m_block_size * m_block_count; }
virtual dev_t rdev() const override { return m_rdev; }
virtual BAN::StringView name() const { return m_name; }
virtual BAN::StringView name() const override { return m_name; }
private:
NVMeNamespace(NVMeController&, uint32_t ns_index, uint32_t nsid, uint64_t block_count, uint32_t block_size);

View File

@ -53,6 +53,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
const dev_t m_rdev;

View File

@ -47,6 +47,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private:
Mutex m_mutex;

View File

@ -29,6 +29,7 @@ namespace Kernel
bool can_read_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size > 0; }
bool can_write_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size < m_buffer->size(); }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return !m_slave.valid(); }
BAN::ErrorOr<long> ioctl_impl(int, void*) override;
@ -63,6 +64,8 @@ namespace Kernel
protected:
bool putchar_impl(uint8_t ch) override;
bool has_hangup_impl() const override { return !m_master.valid(); }
BAN::ErrorOr<long> ioctl_impl(int, void*) override;
private:

View File

@ -54,6 +54,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return m_output.flush; }
virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
protected:
TTY(mode_t mode, uid_t uid, gid_t gid);

View File

@ -73,6 +73,7 @@ namespace Kernel::ACPI
bool can_read_impl() const override { return true; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private:
BatteryInfoInode(AML::Namespace& acpi_namespace, AML::Scope&& battery_path, AML::NameString&& method, size_t index, ino_t ino, const TmpInodeInfo& info)

137
kernel/kernel/Epoll.cpp Normal file
View File

@ -0,0 +1,137 @@
#include <kernel/Epoll.h>
#include <kernel/Lock/LockGuard.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
{
BAN::ErrorOr<BAN::RefPtr<Epoll>> Epoll::create()
{
auto* epoll_ptr = new Epoll();
if (epoll_ptr == nullptr)
return BAN::Error::from_errno(ENOMEM);
return BAN::RefPtr<Epoll>::adopt(epoll_ptr);
}
Epoll::~Epoll()
{
for (auto [inode, _] : m_listening_events)
inode->del_epoll(this);
}
BAN::ErrorOr<void> Epoll::ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event)
{
LockGuard _(m_mutex);
auto it = m_listening_events.find(inode);
switch (op)
{
case EPOLL_CTL_ADD:
if (it != m_listening_events.end())
return BAN::Error::from_errno(EEXIST);
TRY(m_listening_events.reserve(m_listening_events.size() + 1));
TRY(m_ready_events.reserve(m_listening_events.size() + 1));
TRY(inode->add_epoll(this));
MUST(m_listening_events.insert(inode, event));
MUST(m_ready_events.insert(inode, event.events));
return {};
case EPOLL_CTL_MOD:
if (it == m_listening_events.end())
return BAN::Error::from_errno(ENOENT);
MUST(m_ready_events.emplace_or_assign(inode, event.events));
it->value = event;
return {};
case EPOLL_CTL_DEL:
if (it == m_listening_events.end())
return BAN::Error::from_errno(ENOENT);
m_listening_events.remove(it);
m_ready_events.remove(inode);
inode->del_epoll(this);
return {};
}
return BAN::Error::from_errno(EINVAL);
}
BAN::ErrorOr<size_t> Epoll::wait(BAN::Span<epoll_event> event_span, uint64_t waketime_ns)
{
size_t count = 0;
for (;;)
{
{
LockGuard _(m_mutex);
for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();)
{
auto& [inode, events] = *it;
auto& listen = m_listening_events[inode];
const uint32_t listen_mask = (listen.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
events &= listen_mask;
#define CHECK_EVENT_BIT(mask, func) \
if ((events & mask) && !inode->func()) \
events &= ~mask;
CHECK_EVENT_BIT(EPOLLIN, can_read);
CHECK_EVENT_BIT(EPOLLOUT, can_write);
CHECK_EVENT_BIT(EPOLLERR, has_error);
CHECK_EVENT_BIT(EPOLLHUP, has_hangup);
#undef CHECK_EVENT_BIT
if (events == 0)
{
m_ready_events.remove(it);
it = m_ready_events.begin();
continue;
}
event_span[count++] = {
.events = events,
.data = listen.data,
};
if (listen.events & EPOLLONESHOT)
listen.events = 0;
if (listen.events & EPOLLET)
events &= ~listen_mask;
it++;
}
}
if (count)
break;
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
if (current_ns >= waketime_ns)
break;
const uint64_t timeout_ns = BAN::Math::min<uint64_t>(100'000'000, waketime_ns - current_ns);
TRY(Thread::current().block_or_eintr_or_timeout_ns(m_thread_blocker, timeout_ns, false));
}
return count;
}
void Epoll::notify(BAN::RefPtr<Inode> inode, uint32_t event)
{
LockGuard _(m_mutex);
auto listen_it = m_listening_events.find(inode);
if (listen_it == m_listening_events.end())
return;
event &= (listen_it->value.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
if (event == 0)
return;
if (auto ready_it = m_ready_events.find(inode); ready_it != m_ready_events.end())
ready_it->value |= event;
else
MUST(m_ready_events.insert(inode, event));
m_thread_blocker.unblock();
}
}

View File

@ -1,3 +1,4 @@
#include <kernel/Epoll.h>
#include <kernel/FS/Inode.h>
#include <kernel/Lock/LockGuard.h>
#include <kernel/Memory/FileBackedRegion.h>
@ -249,10 +250,39 @@ namespace Kernel
return has_error_impl();
}
bool Inode::has_hangup() const
{
LockGuard _(m_mutex);
return has_hangup_impl();
}
BAN::ErrorOr<long> Inode::ioctl(int request, void* arg)
{
LockGuard _(m_mutex);
return ioctl_impl(request, arg);
}
BAN::ErrorOr<void> Inode::add_epoll(class Epoll* epoll)
{
TRY(m_epolls.push_back(epoll));
return {};
}
void Inode::del_epoll(class Epoll* epoll)
{
for (auto it = m_epolls.begin(); it != m_epolls.end(); it++)
{
if (*it != epoll)
continue;
m_epolls.remove(it);
break;
}
}
void Inode::epoll_notify(uint32_t event)
{
for (auto* epoll : m_epolls)
epoll->notify(this, event);
}
}

View File

@ -3,7 +3,7 @@
#include <kernel/Thread.h>
#include <kernel/Timer/Timer.h>
#include <kernel/Process.h>
#include <sys/epoll.h>
namespace Kernel
{
@ -36,8 +36,10 @@ namespace Kernel
{
auto old_writing_count = m_writing_count.fetch_sub(1);
ASSERT(old_writing_count > 0);
if (old_writing_count == 1)
m_thread_blocker.unblock();
if (old_writing_count != 1)
return;
epoll_notify(EPOLLHUP);
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> Pipe::read_impl(off_t, BAN::ByteSpan buffer)
@ -69,6 +71,8 @@ namespace Kernel
m_atime = SystemTimer::get().real_time();
epoll_notify(EPOLLOUT);
m_thread_blocker.unblock();
return to_copy;
@ -103,6 +107,8 @@ namespace Kernel
m_mtime = current_time;
m_ctime = current_time;
epoll_notify(EPOLLIN);
m_thread_blocker.unblock();
return to_copy;

View File

@ -226,7 +226,6 @@ namespace Kernel
}
/* SOCKET INODE */
BAN::ErrorOr<BAN::RefPtr<TmpSocketInode>> TmpSocketInode::create_new(TmpFileSystem& fs, mode_t mode, uid_t uid, gid_t gid)
{
auto info = create_inode_info(Mode::IFSOCK | mode, uid, gid);

View File

@ -6,6 +6,7 @@
#include <LibInput/KeyEvent.h>
#include <LibInput/MouseEvent.h>
#include <sys/epoll.h>
#include <sys/sysmacros.h>
namespace Kernel
@ -85,54 +86,58 @@ namespace Kernel
void InputDevice::add_event(BAN::ConstByteSpan event)
{
SpinLockGuard _(m_event_lock);
ASSERT(event.size() == m_event_size);
if (m_type == Type::Mouse && m_event_count > 0)
{
const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count;
SpinLockGuard _(m_event_lock);
ASSERT(event.size() == m_event_size);
auto& last_event = *reinterpret_cast<LibInput::MouseEvent*>(&m_event_buffer[last_index * m_event_size]);
auto& curr_event = event.as<const LibInput::MouseEvent>();
if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent)
if (m_type == Type::Mouse && m_event_count > 0)
{
last_event.move_event.rel_x += curr_event.move_event.rel_x;
last_event.move_event.rel_y += curr_event.move_event.rel_y;
return;
}
if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent)
{
last_event.scroll_event.scroll += curr_event.scroll_event.scroll;
return;
}
}
const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count;
if (m_type == Type::Keyboard)
{
auto& key_event = event.as<const LibInput::RawKeyEvent>();
if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed)
{
switch (key_event.keycode)
auto& last_event = *reinterpret_cast<LibInput::MouseEvent*>(&m_event_buffer[last_index * m_event_size]);
auto& curr_event = event.as<const LibInput::MouseEvent>();
if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent)
{
case LibInput::keycode_function(1):
Processor::toggle_should_print_cpu_load();
break;
case LibInput::keycode_function(12):
Kernel::panic("Keyboard kernel panic :)");
break;
last_event.move_event.rel_x += curr_event.move_event.rel_x;
last_event.move_event.rel_y += curr_event.move_event.rel_y;
return;
}
if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent)
{
last_event.scroll_event.scroll += curr_event.scroll_event.scroll;
return;
}
}
if (m_type == Type::Keyboard)
{
auto& key_event = event.as<const LibInput::RawKeyEvent>();
if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed)
{
switch (key_event.keycode)
{
case LibInput::keycode_function(1):
Processor::toggle_should_print_cpu_load();
break;
case LibInput::keycode_function(12):
Kernel::panic("Keyboard kernel panic :)");
break;
}
}
}
if (m_event_count == m_max_event_count)
{
m_event_tail = (m_event_tail + 1) % m_max_event_count;
m_event_count--;
}
memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size);
m_event_head = (m_event_head + 1) % m_max_event_count;
m_event_count++;
}
if (m_event_count == m_max_event_count)
{
m_event_tail = (m_event_tail + 1) % m_max_event_count;
m_event_count--;
}
memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size);
m_event_head = (m_event_head + 1) % m_max_event_count;
m_event_count++;
epoll_notify(EPOLLIN);
m_event_thread_blocker.unblock();
if (m_type == Type::Keyboard && s_keyboard_device)
@ -197,6 +202,12 @@ namespace Kernel
, m_name("keyboard"_sv)
{}
void KeyboardDevice::notify()
{
epoll_notify(EPOLLIN);
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> KeyboardDevice::read_impl(off_t, BAN::ByteSpan buffer)
{
if (buffer.size() < sizeof(LibInput::RawKeyEvent))
@ -243,6 +254,12 @@ namespace Kernel
, m_name("mouse"_sv)
{}
void MouseDevice::notify()
{
epoll_notify(EPOLLIN);
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> MouseDevice::read_impl(off_t, BAN::ByteSpan buffer)
{
if (buffer.size() < sizeof(LibInput::MouseEvent))

View File

@ -6,6 +6,7 @@
#include <fcntl.h>
#include <netinet/in.h>
#include <sys/epoll.h>
namespace Kernel
{
@ -271,6 +272,11 @@ namespace Kernel
return m_send_window.data_size < m_send_window.buffer->size();
}
bool TCPSocket::has_hangup_impl() const
{
return m_has_connected && m_state != State::Established;
}
BAN::ErrorOr<size_t> TCPSocket::return_with_maybe_zero()
{
ASSERT(m_state != State::Established);
@ -577,6 +583,8 @@ namespace Kernel
memcpy(buffer + m_recv_window.data_size, payload.data(), payload.size());
m_recv_window.data_size += payload.size();
epoll_notify(EPOLLIN);
dprintln_if(DEBUG_TCP, "Received {} bytes", payload.size());
if (m_next_flags == 0)
@ -726,6 +734,8 @@ namespace Kernel
m_send_window.current_seq += to_send;
i += to_send;
epoll_notify(EPOLLOUT);
}
m_send_window.last_send_ms = current_ms;

View File

@ -2,6 +2,8 @@
#include <kernel/Networking/UDPSocket.h>
#include <kernel/Thread.h>
#include <sys/epoll.h>
namespace Kernel
{
@ -70,6 +72,8 @@ namespace Kernel
m_packets.emplace(packet_info);
m_packet_total_size += payload.size();
epoll_notify(EPOLLIN);
m_packet_thread_blocker.unblock();
}

View File

@ -5,6 +5,7 @@
#include <kernel/Scheduler.h>
#include <fcntl.h>
#include <sys/epoll.h>
#include <sys/un.h>
namespace Kernel
@ -62,6 +63,7 @@ namespace Kernel
if (auto connection = connection_info.connection.lock(); connection && connection->m_info.has<ConnectionInfo>())
{
connection->m_info.get<ConnectionInfo>().target_closed = true;
connection->epoll_notify(EPOLLHUP);
connection->m_packet_thread_blocker.unblock();
}
}
@ -172,6 +174,8 @@ namespace Kernel
TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker));
}
target->epoll_notify(EPOLLIN);
while (!connection_info.connection_done)
Processor::yield();
@ -263,6 +267,8 @@ namespace Kernel
if (!is_streaming())
m_packet_sizes.push(packet.size());
epoll_notify(EPOLLIN);
m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state);
return {};
@ -295,6 +301,17 @@ namespace Kernel
return true;
}
bool UnixDomainSocket::has_hangup_impl() const
{
if (m_info.has<ConnectionInfo>())
{
auto& connection_info = m_info.get<ConnectionInfo>();
return connection_info.target_closed;
}
return false;
}
BAN::ErrorOr<size_t> UnixDomainSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len)
{
if (message.size() > s_packet_buffer_size)
@ -390,6 +407,8 @@ namespace Kernel
m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state);
epoll_notify(EPOLLOUT);
return nread;
}

View File

@ -2,6 +2,7 @@
#include <BAN/StringView.h>
#include <kernel/ACPI/ACPI.h>
#include <kernel/ELF.h>
#include <kernel/Epoll.h>
#include <kernel/FS/DevFS/FileSystem.h>
#include <kernel/FS/ProcFS/FileSystem.h>
#include <kernel/FS/VirtualFileSystem.h>
@ -1452,21 +1453,19 @@ namespace Kernel
return TRY(inode->ioctl(request, arg));
}
BAN::ErrorOr<long> Process::sys_pselect(sys_pselect_t* _arguments)
BAN::ErrorOr<long> Process::sys_pselect(sys_pselect_t* user_arguments)
{
sys_pselect_t arguments;
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(_arguments, sizeof(sys_pselect_t), false));
arguments = *_arguments;
TRY(validate_pointer_access(user_arguments, sizeof(sys_pselect_t), false));
arguments = *user_arguments;
}
MemoryRegion* readfd_region = nullptr;
MemoryRegion* writefd_region = nullptr;
MemoryRegion* errorfd_region = nullptr;
MemoryRegion* timeout_region = nullptr;
MemoryRegion* sigmask_region = nullptr;
BAN::ScopeGuard _([&] {
if (readfd_region)
@ -1475,75 +1474,57 @@ namespace Kernel
writefd_region->unpin();
if (errorfd_region)
errorfd_region->unpin();
if (timeout_region)
timeout_region->unpin();
if (sigmask_region)
sigmask_region->unpin();
});
readfd_region = TRY(validate_and_pin_pointer_access(arguments.readfds, sizeof(fd_set), true));
writefd_region = TRY(validate_and_pin_pointer_access(arguments.writefds, sizeof(fd_set), true));
errorfd_region = TRY(validate_and_pin_pointer_access(arguments.errorfds, sizeof(fd_set), true));
timeout_region = TRY(validate_and_pin_pointer_access(arguments.timeout, sizeof(timespec), false));
sigmask_region = TRY(validate_and_pin_pointer_access(arguments.sigmask, sizeof(sigset_t), false));
const auto old_sigmask = Thread::current().m_signal_block_mask;
if (arguments.sigmask)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(arguments.sigmask, sizeof(sigset_t), false));
Thread::current().m_signal_block_mask = *arguments.sigmask;
}
BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; });
uint64_t timedout_ns = SystemTimer::get().ns_since_boot();
uint64_t waketime_ns = BAN::numeric_limits<uint64_t>::max();
if (arguments.timeout)
{
timedout_ns += arguments.timeout->tv_sec * 1'000'000'000;
timedout_ns += arguments.timeout->tv_nsec;
LockGuard _(m_process_lock);
TRY(validate_pointer_access(arguments.timeout, sizeof(timespec), false));
waketime_ns =
SystemTimer::get().ns_since_boot() +
(arguments.timeout->tv_sec * 1'000'000'000) +
arguments.timeout->tv_nsec;
}
fd_set readfds; FD_ZERO(&readfds);
fd_set writefds; FD_ZERO(&writefds);
fd_set errorfds; FD_ZERO(&errorfds);
int set_bits = 0;
for (;;)
auto epoll = TRY(Epoll::create());
for (int fd = 0; fd < user_arguments->nfds; fd++)
{
auto update_fds =
[&](int fd, fd_set* source, fd_set* dest, bool (Inode::*func)() const)
{
if (source == nullptr)
return;
uint32_t events = 0;
if (arguments.readfds && FD_ISSET(fd, arguments.readfds))
events |= EPOLLIN;
if (arguments.writefds && FD_ISSET(fd, arguments.writefds))
events |= EPOLLOUT;
if (arguments.errorfds && FD_ISSET(fd, arguments.errorfds))
events |= EPOLLERR;
if (events == 0)
continue;
if (!FD_ISSET(fd, source))
return;
auto inode_or_error = m_open_file_descriptors.inode_of(fd);
if (inode_or_error.is_error())
continue;
auto inode_or_error = m_open_file_descriptors.inode_of(fd);
if (inode_or_error.is_error())
return;
auto inode = inode_or_error.release_value();
if ((inode.ptr()->*func)())
{
FD_SET(fd, dest);
set_bits++;
}
};
for (int i = 0; i < arguments.nfds; i++)
{
update_fds(i, arguments.readfds, &readfds, &Inode::can_read);
update_fds(i, arguments.writefds, &writefds, &Inode::can_write);
update_fds(i, arguments.errorfds, &errorfds, &Inode::has_error);
}
if (set_bits > 0)
break;
if (arguments.timeout && SystemTimer::get().ns_since_boot() >= timedout_ns)
break;
// FIXME: implement some multi thread blocker system?
TRY(Thread::current().sleep_or_eintr_ms(1));
TRY(epoll->ctl(EPOLL_CTL_ADD, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }}));
}
BAN::Vector<epoll_event> event_buffer;
TRY(event_buffer.resize(user_arguments->nfds));
const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns));
if (arguments.readfds)
FD_ZERO(arguments.readfds);
if (arguments.writefds)
@ -1551,17 +1532,98 @@ namespace Kernel
if (arguments.errorfds)
FD_ZERO(arguments.errorfds);
for (int i = 0; i < arguments.nfds; i++)
for (size_t i = 0; i < waited_events; i++)
{
if (arguments.readfds && FD_ISSET(i, &readfds))
FD_SET(i, arguments.readfds);
if (arguments.writefds && FD_ISSET(i, &writefds))
FD_SET(i, arguments.writefds);
if (arguments.errorfds && FD_ISSET(i, &errorfds))
FD_SET(i, arguments.errorfds);
const int fd = event_buffer[i].data.fd;
if (arguments.readfds && event_buffer[i].events & (EPOLLIN | EPOLLHUP))
FD_SET(fd, arguments.readfds);
if (arguments.writefds && event_buffer[i].events & (EPOLLOUT))
FD_SET(fd, arguments.writefds);
if (arguments.errorfds && event_buffer[i].events & (EPOLLERR))
FD_SET(fd, arguments.errorfds);
}
return set_bits;
return waited_events;
}
BAN::ErrorOr<long> Process::sys_epoll_create1(int flags)
{
if (flags && (flags & ~EPOLL_CLOEXEC))
return BAN::Error::from_errno(EINVAL);
if (flags & EPOLL_CLOEXEC)
flags = O_CLOEXEC;
VirtualFileSystem::File epoll_file;
epoll_file.inode = TRY(Epoll::create());
TRY(epoll_file.canonical_path.append("<epoll>"_sv));
return TRY(m_open_file_descriptors.open(BAN::move(epoll_file), flags | O_RDWR));
}
BAN::ErrorOr<long> Process::sys_epoll_ctl(int epfd, int op, int fd, epoll_event* user_event)
{
if (epfd == fd)
return BAN::Error::from_errno(EINVAL);
if (op != EPOLL_CTL_DEL && user_event == nullptr)
return BAN::Error::from_errno(EINVAL);
auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd));
if (!epoll_inode->is_epoll())
return BAN::Error::from_errno(EINVAL);
auto inode = TRY(m_open_file_descriptors.inode_of(fd));
epoll_event event {};
if (user_event)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(user_event, sizeof(epoll_event), false));
event = *user_event;
}
TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, inode, event));
return 0;
}
BAN::ErrorOr<long> Process::sys_epoll_pwait2(int epfd, epoll_event* events, int maxevents, const timespec* timeout, const sigset_t* sigmask)
{
(void)sigmask;
if (maxevents <= 0)
return BAN::Error::from_errno(EINVAL);
auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd));
if (!epoll_inode->is_epoll())
return BAN::Error::from_errno(EINVAL);
uint64_t waketime_ns = BAN::numeric_limits<uint64_t>::max();
if (timeout)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(timeout, sizeof(timespec), false));
waketime_ns =
SystemTimer::get().ns_since_boot() +
(timeout->tv_sec * 1'000'000'000) +
timeout->tv_nsec;
}
auto* events_region = TRY(validate_and_pin_pointer_access(events, maxevents * sizeof(epoll_event), true));
BAN::ScopeGuard _([events_region] {
if (events_region)
events_region->unpin();
});
const auto old_sigmask = Thread::current().m_signal_block_mask;
if (sigmask)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(sigmask, sizeof(sigset_t), false));
Thread::current().m_signal_block_mask = *sigmask;
}
BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; });
return TRY(static_cast<Epoll*>(epoll_inode.ptr())->wait(BAN::Span<epoll_event>(events, maxevents), waketime_ns));
}
BAN::ErrorOr<long> Process::sys_pipe(int fildes[2])

View File

@ -4,6 +4,7 @@
#include <BAN/ScopeGuard.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/sysmacros.h>
@ -95,6 +96,8 @@ namespace Kernel
reinterpret_cast<uint8_t*>(m_buffer->vaddr())[(m_buffer_tail + m_buffer_size) % m_buffer->size()] = ch;
m_buffer_size++;
epoll_notify(EPOLLIN);
m_buffer_blocker.unblock();
return true;
@ -127,6 +130,8 @@ namespace Kernel
m_buffer_size -= to_copy;
m_buffer_tail = (m_buffer_tail + to_copy) % m_buffer->size();
epoll_notify(EPOLLOUT);
m_buffer_lock.unlock(state);
return to_copy;
@ -137,7 +142,6 @@ namespace Kernel
auto slave = m_slave.lock();
if (!slave)
return BAN::Error::from_errno(ENODEV);
for (size_t i = 0; i < buffer.size(); i++)
slave->handle_input_byte(buffer[i]);
return buffer.size();

View File

@ -15,6 +15,7 @@
#include <string.h>
#include <stropts.h>
#include <sys/banan-os.h>
#include <sys/epoll.h>
#include <sys/sysmacros.h>
namespace Kernel
@ -40,6 +41,7 @@ namespace Kernel
bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private:
DevTTY(mode_t mode, uid_t uid, gid_t gid)
@ -238,6 +240,7 @@ namespace Kernel
if (ch == '\x04' && (m_termios.c_lflag & ICANON))
{
m_output.flush = true;
epoll_notify(EPOLLIN);
m_output.thread_blocker.unblock();
return;
}
@ -280,6 +283,7 @@ namespace Kernel
if (ch == '\n' || !(m_termios.c_lflag & ICANON))
{
m_output.flush = true;
epoll_notify(EPOLLIN);
m_output.thread_blocker.unblock();
}
}

View File

@ -30,6 +30,7 @@ set(LIBC_SOURCES
string.cpp
strings.cpp
sys/banan-os.cpp
sys/epoll.cpp
sys/ioctl.cpp
sys/mman.cpp
sys/resource.cpp

View File

@ -0,0 +1,49 @@
#ifndef _SYS_EPOLL_H
#define _SYS_EPOLL_H 1
#include <sys/cdefs.h>
__BEGIN_DECLS
#include <stdint.h>
#include <signal.h>
union epoll_data
{
void* ptr;
int fd;
uint32_t u32;
uint64_t u64;
};
typedef union epoll_data epoll_data_t;
struct epoll_event
{
uint32_t events;
epoll_data_t data;
};
#define EPOLL_CTL_ADD 0
#define EPOLL_CTL_MOD 1
#define EPOLL_CTL_DEL 2
#define EPOLLIN 0x01
#define EPOLLOUT 0x02
#define EPOLLPRI 0x04
#define EPOLLERR 0x08
#define EPOLLHUP 0x10
#define EPOLLET 0x20
#define EPOLLONESHOT 0x40
#define EPOLL_CLOEXEC 1
int epoll_create(int size);
int epoll_create1(int flags);
int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event);
int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout);
int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask);
int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask);
__END_DECLS
#endif

View File

@ -98,6 +98,9 @@ __BEGIN_DECLS
O(SYS_PTHREAD_EXIT, pthread_exit) \
O(SYS_PTHREAD_JOIN, pthread_join) \
O(SYS_PTHREAD_SELF, pthread_self) \
O(SYS_EPOLL_CREATE1, epoll_create1) \
O(SYS_EPOLL_CTL, epoll_ctl) \
O(SYS_EPOLL_PWAIT2, epoll_pwait2) \
enum Syscall
{

View File

@ -0,0 +1,49 @@
#include <errno.h>
#include <unistd.h>
#include <sys/epoll.h>
#include <sys/syscall.h>
int epoll_create(int size)
{
if (size <= 0)
{
errno = EINVAL;
return -1;
}
return epoll_create1(0);
}
int epoll_create1(int flags)
{
return syscall(SYS_EPOLL_CREATE1, flags);
}
int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event)
{
return syscall(SYS_EPOLL_CTL, epfd, op, fd, event);
}
int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout)
{
return epoll_pwait(epfd, events, maxevents, timeout, nullptr);
}
int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask)
{
timespec ts;
timespec* ts_ptr = nullptr;
if (timeout >= 0)
{
ts.tv_sec = static_cast<time_t>(timeout / 1000),
ts.tv_nsec = (timeout % 1000) * 1'000'000,
ts_ptr = &ts;
}
return epoll_pwait2(epfd, events, maxevents, ts_ptr, sigmask);
}
int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask)
{
return syscall(SYS_EPOLL_PWAIT2, epfd, events, maxevents, timeout, sigmask);
}