From 1bcd1edbf5c08a184ed1e77deb259be1124d56c3 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Tue, 13 May 2025 10:10:35 +0300 Subject: [PATCH] Kernel/LibC: Implement basic epoll This implementation is on top of inodes instead of fds as linux does it. If I start finding ports/software that relies on epoll allowing duplicate inodes, I will do what linux does. I'm probably missing multiple epoll_notify's which may cause hangs but the system seems to work fine :dd: --- kernel/CMakeLists.txt | 1 + kernel/include/kernel/Device/DebugDevice.h | 3 +- .../include/kernel/Device/FramebufferDevice.h | 1 + kernel/include/kernel/Device/NullDevice.h | 3 +- kernel/include/kernel/Device/RandomDevice.h | 3 +- kernel/include/kernel/Device/ZeroDevice.h | 3 +- kernel/include/kernel/Epoll.h | 68 +++++++ kernel/include/kernel/FS/Ext2/Inode.h | 1 + kernel/include/kernel/FS/FAT/Inode.h | 1 + kernel/include/kernel/FS/Inode.h | 12 +- kernel/include/kernel/FS/Pipe.h | 1 + kernel/include/kernel/FS/ProcFS/Inode.h | 2 + kernel/include/kernel/FS/TmpFS/Inode.h | 4 + kernel/include/kernel/Input/InputDevice.h | 8 +- .../include/kernel/Networking/E1000/E1000.h | 3 +- kernel/include/kernel/Networking/Loopback.h | 1 + .../kernel/Networking/RTL8169/RTL8169.h | 1 + kernel/include/kernel/Networking/TCPSocket.h | 1 + kernel/include/kernel/Networking/UDPSocket.h | 1 + .../include/kernel/Networking/UNIX/Socket.h | 6 +- kernel/include/kernel/Process.h | 6 + .../include/kernel/Storage/NVMe/Controller.h | 1 + .../include/kernel/Storage/NVMe/Namespace.h | 2 +- kernel/include/kernel/Storage/Partition.h | 1 + kernel/include/kernel/Storage/StorageDevice.h | 1 + .../include/kernel/Terminal/PseudoTerminal.h | 3 + kernel/include/kernel/Terminal/TTY.h | 1 + kernel/kernel/ACPI/BatterySystem.cpp | 1 + kernel/kernel/Epoll.cpp | 137 +++++++++++++ kernel/kernel/FS/Inode.cpp | 30 +++ kernel/kernel/FS/Pipe.cpp | 12 +- kernel/kernel/FS/TmpFS/Inode.cpp | 1 - kernel/kernel/Input/InputDevice.cpp | 95 +++++---- kernel/kernel/Networking/TCPSocket.cpp | 10 + kernel/kernel/Networking/UDPSocket.cpp | 4 + kernel/kernel/Networking/UNIX/Socket.cpp | 19 ++ kernel/kernel/Process.cpp | 186 ++++++++++++------ kernel/kernel/Terminal/PseudoTerminal.cpp | 6 +- kernel/kernel/Terminal/TTY.cpp | 4 + userspace/libraries/LibC/CMakeLists.txt | 1 + userspace/libraries/LibC/include/sys/epoll.h | 49 +++++ .../libraries/LibC/include/sys/syscall.h | 3 + userspace/libraries/LibC/sys/epoll.cpp | 49 +++++ 43 files changed, 627 insertions(+), 119 deletions(-) create mode 100644 kernel/include/kernel/Epoll.h create mode 100644 kernel/kernel/Epoll.cpp create mode 100644 userspace/libraries/LibC/include/sys/epoll.h create mode 100644 userspace/libraries/LibC/sys/epoll.cpp diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index eb470b67..2f4e3bf4 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -17,6 +17,7 @@ set(KERNEL_SOURCES kernel/Device/RandomDevice.cpp kernel/Device/ZeroDevice.cpp kernel/ELF.cpp + kernel/Epoll.cpp kernel/Errors.cpp kernel/FS/DevFS/FileSystem.cpp kernel/FS/Ext2/FileSystem.cpp diff --git a/kernel/include/kernel/Device/DebugDevice.h b/kernel/include/kernel/Device/DebugDevice.h index 7403be70..074222c1 100644 --- a/kernel/include/kernel/Device/DebugDevice.h +++ b/kernel/include/kernel/Device/DebugDevice.h @@ -3,7 +3,7 @@ namespace Kernel { - class DebugDevice : public CharacterDevice + class DebugDevice final : public CharacterDevice { public: static BAN::ErrorOr> create(mode_t, uid_t, gid_t); @@ -24,6 +24,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: const dev_t m_rdev; diff --git a/kernel/include/kernel/Device/FramebufferDevice.h b/kernel/include/kernel/Device/FramebufferDevice.h index a9093406..7c66c5fa 100644 --- a/kernel/include/kernel/Device/FramebufferDevice.h +++ b/kernel/include/kernel/Device/FramebufferDevice.h @@ -38,6 +38,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: FramebufferDevice(mode_t mode, uid_t uid, gid_t gid, dev_t rdev, paddr_t paddr, uint32_t width, uint32_t height, uint32_t pitch, uint8_t bpp); diff --git a/kernel/include/kernel/Device/NullDevice.h b/kernel/include/kernel/Device/NullDevice.h index 2114aa0e..e305909a 100644 --- a/kernel/include/kernel/Device/NullDevice.h +++ b/kernel/include/kernel/Device/NullDevice.h @@ -5,7 +5,7 @@ namespace Kernel { - class NullDevice : public CharacterDevice + class NullDevice final : public CharacterDevice { public: static BAN::ErrorOr> create(mode_t, uid_t, gid_t); @@ -26,6 +26,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: const dev_t m_rdev; diff --git a/kernel/include/kernel/Device/RandomDevice.h b/kernel/include/kernel/Device/RandomDevice.h index 4016908c..9254884d 100644 --- a/kernel/include/kernel/Device/RandomDevice.h +++ b/kernel/include/kernel/Device/RandomDevice.h @@ -3,7 +3,7 @@ namespace Kernel { - class RandomDevice : public CharacterDevice + class RandomDevice final : public CharacterDevice { public: static BAN::ErrorOr> create(mode_t, uid_t, gid_t); @@ -24,6 +24,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: const dev_t m_rdev; diff --git a/kernel/include/kernel/Device/ZeroDevice.h b/kernel/include/kernel/Device/ZeroDevice.h index cf3b7339..bd9e3447 100644 --- a/kernel/include/kernel/Device/ZeroDevice.h +++ b/kernel/include/kernel/Device/ZeroDevice.h @@ -3,7 +3,7 @@ namespace Kernel { - class ZeroDevice : public CharacterDevice + class ZeroDevice final : public CharacterDevice { public: static BAN::ErrorOr> create(mode_t, uid_t, gid_t); @@ -24,6 +24,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: const dev_t m_rdev; diff --git a/kernel/include/kernel/Epoll.h b/kernel/include/kernel/Epoll.h new file mode 100644 index 00000000..3ca50374 --- /dev/null +++ b/kernel/include/kernel/Epoll.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace Kernel +{ + + class Epoll final : public Inode + { + public: + static BAN::ErrorOr> create(); + ~Epoll(); + + BAN::ErrorOr ctl(int op, BAN::RefPtr inode, epoll_event event); + BAN::ErrorOr wait(BAN::Span events, uint64_t waketime_ns); + + void notify(BAN::RefPtr inode, uint32_t event); + + private: + Epoll() = default; + + public: + ino_t ino() const override { return 0; } + Mode mode() const override { return { Mode::IRUSR | Mode::IWUSR }; } + nlink_t nlink() const override { return 0; } + uid_t uid() const override { return 0; } + gid_t gid() const override { return 0; } + off_t size() const override { return 0; } + timespec atime() const override { return {}; } + timespec mtime() const override { return {}; } + timespec ctime() const override { return {}; } + blksize_t blksize() const override { return PAGE_SIZE; } + blkcnt_t blocks() const override { return 0; } + dev_t dev() const override { return 0; } + dev_t rdev() const override { return 0; } + + bool is_epoll() const override { return true; } + + const FileSystem* filesystem() const override { return nullptr; } + + bool can_read_impl() const override { return false; } + bool can_write_impl() const override { return false; } + bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } + + BAN::ErrorOr fsync_impl() override { return {}; } + + private: + struct InodeRefPtrHash + { + BAN::hash_t operator()(const BAN::RefPtr& inode) + { + return BAN::hash()(inode.ptr()); + } + }; + + private: + ThreadBlocker m_thread_blocker; + BAN::HashMap, uint32_t, InodeRefPtrHash> m_ready_events; + BAN::HashMap, epoll_event, InodeRefPtrHash> m_listening_events; + }; + +} diff --git a/kernel/include/kernel/FS/Ext2/Inode.h b/kernel/include/kernel/FS/Ext2/Inode.h index 8ca37125..274e9c1f 100644 --- a/kernel/include/kernel/FS/Ext2/Inode.h +++ b/kernel/include/kernel/FS/Ext2/Inode.h @@ -51,6 +51,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: // Returns maximum number of data blocks in use diff --git a/kernel/include/kernel/FS/FAT/Inode.h b/kernel/include/kernel/FS/FAT/Inode.h index 8d248a9d..486e7502 100644 --- a/kernel/include/kernel/FS/FAT/Inode.h +++ b/kernel/include/kernel/FS/FAT/Inode.h @@ -49,6 +49,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: FATInode(FATFS& fs, const FAT::DirectoryEntry& entry, ino_t ino, uint32_t block_count) diff --git a/kernel/include/kernel/FS/Inode.h b/kernel/include/kernel/FS/Inode.h index 0504334d..35edb475 100644 --- a/kernel/include/kernel/FS/Inode.h +++ b/kernel/include/kernel/FS/Inode.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -19,9 +20,8 @@ namespace Kernel { - class FileSystem; - class FileBackedRegion; + class FileSystem; class SharedFileData; class Inode : public BAN::RefCounted @@ -85,6 +85,7 @@ namespace Kernel virtual dev_t rdev() const = 0; virtual bool is_device() const { return false; } + virtual bool is_epoll() const { return false; } virtual bool is_pipe() const { return false; } virtual bool is_tty() const { return false; } @@ -123,9 +124,14 @@ namespace Kernel bool can_read() const; bool can_write() const; bool has_error() const; + bool has_hangup() const; BAN::ErrorOr ioctl(int request, void* arg); + BAN::ErrorOr add_epoll(class Epoll*); + void del_epoll(class Epoll*); + void epoll_notify(uint32_t event); + protected: // Directory API virtual BAN::ErrorOr> find_inode_impl(BAN::StringView) { return BAN::Error::from_errno(ENOTSUP); } @@ -160,6 +166,7 @@ namespace Kernel virtual bool can_read_impl() const = 0; virtual bool can_write_impl() const = 0; virtual bool has_error_impl() const = 0; + virtual bool has_hangup_impl() const = 0; virtual BAN::ErrorOr ioctl_impl(int, void*) { return BAN::Error::from_errno(ENOTSUP); } @@ -168,6 +175,7 @@ namespace Kernel private: BAN::WeakPtr m_shared_region; + BAN::LinkedList m_epolls; friend class FileBackedRegion; friend class OpenFileDescriptorSet; friend class SharedFileData; diff --git a/kernel/include/kernel/FS/Pipe.h b/kernel/include/kernel/FS/Pipe.h index ea464253..3058f9c0 100644 --- a/kernel/include/kernel/FS/Pipe.h +++ b/kernel/include/kernel/FS/Pipe.h @@ -40,6 +40,7 @@ namespace Kernel virtual bool can_read_impl() const override { return m_buffer_size > 0; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return m_writing_count == 0; } private: Pipe(const Credentials&); diff --git a/kernel/include/kernel/FS/ProcFS/Inode.h b/kernel/include/kernel/FS/ProcFS/Inode.h index 145f6593..c3f5f766 100644 --- a/kernel/include/kernel/FS/ProcFS/Inode.h +++ b/kernel/include/kernel/FS/ProcFS/Inode.h @@ -47,6 +47,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: ProcROProcessInode(Process&, size_t (Process::*)(off_t, BAN::ByteSpan) const, TmpFileSystem&, const TmpInodeInfo&); @@ -72,6 +73,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: ProcROInode(size_t (*callback)(off_t, BAN::ByteSpan), TmpFileSystem&, const TmpInodeInfo&); diff --git a/kernel/include/kernel/FS/TmpFS/Inode.h b/kernel/include/kernel/FS/TmpFS/Inode.h index 79bd9dd8..2131f2b4 100644 --- a/kernel/include/kernel/FS/TmpFS/Inode.h +++ b/kernel/include/kernel/FS/TmpFS/Inode.h @@ -80,6 +80,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: TmpFileInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); @@ -102,6 +103,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: TmpSocketInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); @@ -123,6 +125,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: TmpSymlinkInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); @@ -153,6 +156,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: template diff --git a/kernel/include/kernel/Input/InputDevice.h b/kernel/include/kernel/Input/InputDevice.h index 5adf0b89..8c8fa5c5 100644 --- a/kernel/include/kernel/Input/InputDevice.h +++ b/kernel/include/kernel/Input/InputDevice.h @@ -31,7 +31,7 @@ namespace Kernel bool can_read_impl() const override { SpinLockGuard _(m_event_lock); return m_event_count > 0; } bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } - + bool has_hangup_impl() const override { return false; } private: BAN::ErrorOr read_non_block(BAN::ByteSpan); @@ -64,7 +64,7 @@ namespace Kernel public: static BAN::ErrorOr> create(mode_t mode, uid_t uid, gid_t gid); - void notify() { m_thread_blocker.unblock(); } + void notify(); private: KeyboardDevice(mode_t mode, uid_t uid, gid_t gid); @@ -73,6 +73,7 @@ namespace Kernel bool can_read_impl() const override; bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } BAN::StringView name() const final override { return m_name; } dev_t rdev() const final override { return m_rdev; } @@ -90,7 +91,7 @@ namespace Kernel public: static BAN::ErrorOr> create(mode_t mode, uid_t uid, gid_t gid); - void notify() { m_thread_blocker.unblock(); } + void notify(); private: MouseDevice(mode_t mode, uid_t uid, gid_t gid); @@ -99,6 +100,7 @@ namespace Kernel bool can_read_impl() const override; bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } BAN::StringView name() const final override { return m_name; } dev_t rdev() const final override { return m_rdev; } diff --git a/kernel/include/kernel/Networking/E1000/E1000.h b/kernel/include/kernel/Networking/E1000/E1000.h index 34d3c720..f63d1410 100644 --- a/kernel/include/kernel/Networking/E1000/E1000.h +++ b/kernel/include/kernel/Networking/E1000/E1000.h @@ -28,7 +28,7 @@ namespace Kernel virtual bool link_up() override { return m_link_up; } virtual int link_speed() override; - virtual size_t payload_mtu() const { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); } + virtual size_t payload_mtu() const override { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); } virtual void handle_irq() final override; @@ -50,6 +50,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: BAN::ErrorOr read_mac_address(); diff --git a/kernel/include/kernel/Networking/Loopback.h b/kernel/include/kernel/Networking/Loopback.h index e7875544..a7205cef 100644 --- a/kernel/include/kernel/Networking/Loopback.h +++ b/kernel/include/kernel/Networking/Loopback.h @@ -30,6 +30,7 @@ namespace Kernel bool can_read_impl() const override { return false; } bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } private: SpinLock m_buffer_lock; diff --git a/kernel/include/kernel/Networking/RTL8169/RTL8169.h b/kernel/include/kernel/Networking/RTL8169/RTL8169.h index 0d041084..d3d967be 100644 --- a/kernel/include/kernel/Networking/RTL8169/RTL8169.h +++ b/kernel/include/kernel/Networking/RTL8169/RTL8169.h @@ -36,6 +36,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: BAN::ErrorOr reset(); diff --git a/kernel/include/kernel/Networking/TCPSocket.h b/kernel/include/kernel/Networking/TCPSocket.h index cf6b2e9c..c6d88a7a 100644 --- a/kernel/include/kernel/Networking/TCPSocket.h +++ b/kernel/include/kernel/Networking/TCPSocket.h @@ -67,6 +67,7 @@ namespace Kernel virtual bool can_read_impl() const override; virtual bool can_write_impl() const override; virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override; private: enum class State diff --git a/kernel/include/kernel/Networking/UDPSocket.h b/kernel/include/kernel/Networking/UDPSocket.h index 0cbda27d..256ceb1d 100644 --- a/kernel/include/kernel/Networking/UDPSocket.h +++ b/kernel/include/kernel/Networking/UDPSocket.h @@ -40,6 +40,7 @@ namespace Kernel virtual bool can_read_impl() const override { return !m_packets.empty(); } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: UDPSocket(NetworkLayer&, const Socket::Info&); diff --git a/kernel/include/kernel/Networking/UNIX/Socket.h b/kernel/include/kernel/Networking/UNIX/Socket.h index cfbff8e1..6946ba33 100644 --- a/kernel/include/kernel/Networking/UNIX/Socket.h +++ b/kernel/include/kernel/Networking/UNIX/Socket.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ namespace Kernel virtual bool can_read_impl() const override; virtual bool can_write_impl() const override; virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override; private: UnixDomainSocket(Socket::Type, const Socket::Info&); @@ -48,7 +50,7 @@ namespace Kernel mutable BAN::Atomic target_closed { false }; BAN::WeakPtr connection; BAN::Queue> pending_connections; - ThreadBlocker pending_thread_blocker; + ThreadBlocker pending_thread_blocker; SpinLock pending_lock; }; @@ -67,7 +69,7 @@ namespace Kernel size_t m_packet_size_total { 0 }; BAN::UniqPtr m_packet_buffer; SpinLock m_packet_lock; - ThreadBlocker m_packet_thread_blocker; + ThreadBlocker m_packet_thread_blocker; friend class BAN::RefPtr; }; diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index 7eec9f75..31e3e282 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -24,6 +24,8 @@ #include #include +struct epoll_event; + namespace Kernel { @@ -130,6 +132,10 @@ namespace Kernel BAN::ErrorOr sys_pselect(sys_pselect_t* arguments); + BAN::ErrorOr sys_epoll_create1(int flags); + BAN::ErrorOr sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event* event); + BAN::ErrorOr sys_epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask); + BAN::ErrorOr sys_pipe(int fildes[2]); BAN::ErrorOr sys_dup2(int fildes, int fildes2); diff --git a/kernel/include/kernel/Storage/NVMe/Controller.h b/kernel/include/kernel/Storage/NVMe/Controller.h index ba509349..d3862db7 100644 --- a/kernel/include/kernel/Storage/NVMe/Controller.h +++ b/kernel/include/kernel/Storage/NVMe/Controller.h @@ -27,6 +27,7 @@ namespace Kernel virtual bool can_read_impl() const override { return false; } virtual bool can_write_impl() const override { return false; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: NVMeController(PCI::Device& pci_device); diff --git a/kernel/include/kernel/Storage/NVMe/Namespace.h b/kernel/include/kernel/Storage/NVMe/Namespace.h index d4085276..a25b2079 100644 --- a/kernel/include/kernel/Storage/NVMe/Namespace.h +++ b/kernel/include/kernel/Storage/NVMe/Namespace.h @@ -17,7 +17,7 @@ namespace Kernel virtual uint64_t total_size() const override { return m_block_size * m_block_count; } virtual dev_t rdev() const override { return m_rdev; } - virtual BAN::StringView name() const { return m_name; } + virtual BAN::StringView name() const override { return m_name; } private: NVMeNamespace(NVMeController&, uint32_t ns_index, uint32_t nsid, uint64_t block_count, uint32_t block_size); diff --git a/kernel/include/kernel/Storage/Partition.h b/kernel/include/kernel/Storage/Partition.h index 0e167a22..511e9c49 100644 --- a/kernel/include/kernel/Storage/Partition.h +++ b/kernel/include/kernel/Storage/Partition.h @@ -53,6 +53,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: const dev_t m_rdev; diff --git a/kernel/include/kernel/Storage/StorageDevice.h b/kernel/include/kernel/Storage/StorageDevice.h index 3b909500..83ae8edd 100644 --- a/kernel/include/kernel/Storage/StorageDevice.h +++ b/kernel/include/kernel/Storage/StorageDevice.h @@ -47,6 +47,7 @@ namespace Kernel virtual bool can_read_impl() const override { return true; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } private: Mutex m_mutex; diff --git a/kernel/include/kernel/Terminal/PseudoTerminal.h b/kernel/include/kernel/Terminal/PseudoTerminal.h index 29d80e82..0fd1db7e 100644 --- a/kernel/include/kernel/Terminal/PseudoTerminal.h +++ b/kernel/include/kernel/Terminal/PseudoTerminal.h @@ -29,6 +29,7 @@ namespace Kernel bool can_read_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size > 0; } bool can_write_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size < m_buffer->size(); } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return !m_slave.valid(); } BAN::ErrorOr ioctl_impl(int, void*) override; @@ -63,6 +64,8 @@ namespace Kernel protected: bool putchar_impl(uint8_t ch) override; + bool has_hangup_impl() const override { return !m_master.valid(); } + BAN::ErrorOr ioctl_impl(int, void*) override; private: diff --git a/kernel/include/kernel/Terminal/TTY.h b/kernel/include/kernel/Terminal/TTY.h index d2be6e63..5a6e9b7c 100644 --- a/kernel/include/kernel/Terminal/TTY.h +++ b/kernel/include/kernel/Terminal/TTY.h @@ -54,6 +54,7 @@ namespace Kernel virtual bool can_read_impl() const override { return m_output.flush; } virtual bool can_write_impl() const override { return true; } virtual bool has_error_impl() const override { return false; } + virtual bool has_hangup_impl() const override { return false; } protected: TTY(mode_t mode, uid_t uid, gid_t gid); diff --git a/kernel/kernel/ACPI/BatterySystem.cpp b/kernel/kernel/ACPI/BatterySystem.cpp index 8feb299b..2816a688 100644 --- a/kernel/kernel/ACPI/BatterySystem.cpp +++ b/kernel/kernel/ACPI/BatterySystem.cpp @@ -73,6 +73,7 @@ namespace Kernel::ACPI bool can_read_impl() const override { return true; } bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } private: BatteryInfoInode(AML::Namespace& acpi_namespace, AML::Scope&& battery_path, AML::NameString&& method, size_t index, ino_t ino, const TmpInodeInfo& info) diff --git a/kernel/kernel/Epoll.cpp b/kernel/kernel/Epoll.cpp new file mode 100644 index 00000000..7ba689f7 --- /dev/null +++ b/kernel/kernel/Epoll.cpp @@ -0,0 +1,137 @@ +#include +#include +#include + +namespace Kernel +{ + + BAN::ErrorOr> Epoll::create() + { + auto* epoll_ptr = new Epoll(); + if (epoll_ptr == nullptr) + return BAN::Error::from_errno(ENOMEM); + return BAN::RefPtr::adopt(epoll_ptr); + } + + Epoll::~Epoll() + { + for (auto [inode, _] : m_listening_events) + inode->del_epoll(this); + } + + BAN::ErrorOr Epoll::ctl(int op, BAN::RefPtr inode, epoll_event event) + { + LockGuard _(m_mutex); + + auto it = m_listening_events.find(inode); + + switch (op) + { + case EPOLL_CTL_ADD: + if (it != m_listening_events.end()) + return BAN::Error::from_errno(EEXIST); + TRY(m_listening_events.reserve(m_listening_events.size() + 1)); + TRY(m_ready_events.reserve(m_listening_events.size() + 1)); + TRY(inode->add_epoll(this)); + MUST(m_listening_events.insert(inode, event)); + MUST(m_ready_events.insert(inode, event.events)); + return {}; + case EPOLL_CTL_MOD: + if (it == m_listening_events.end()) + return BAN::Error::from_errno(ENOENT); + MUST(m_ready_events.emplace_or_assign(inode, event.events)); + it->value = event; + return {}; + case EPOLL_CTL_DEL: + if (it == m_listening_events.end()) + return BAN::Error::from_errno(ENOENT); + m_listening_events.remove(it); + m_ready_events.remove(inode); + inode->del_epoll(this); + return {}; + } + + return BAN::Error::from_errno(EINVAL); + } + + BAN::ErrorOr Epoll::wait(BAN::Span event_span, uint64_t waketime_ns) + { + size_t count = 0; + + for (;;) + { + { + LockGuard _(m_mutex); + for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();) + { + auto& [inode, events] = *it; + + auto& listen = m_listening_events[inode]; + const uint32_t listen_mask = (listen.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP; + + events &= listen_mask; +#define CHECK_EVENT_BIT(mask, func) \ + if ((events & mask) && !inode->func()) \ + events &= ~mask; + CHECK_EVENT_BIT(EPOLLIN, can_read); + CHECK_EVENT_BIT(EPOLLOUT, can_write); + CHECK_EVENT_BIT(EPOLLERR, has_error); + CHECK_EVENT_BIT(EPOLLHUP, has_hangup); +#undef CHECK_EVENT_BIT + + if (events == 0) + { + m_ready_events.remove(it); + it = m_ready_events.begin(); + continue; + } + + event_span[count++] = { + .events = events, + .data = listen.data, + }; + + if (listen.events & EPOLLONESHOT) + listen.events = 0; + + if (listen.events & EPOLLET) + events &= ~listen_mask; + + it++; + } + } + + if (count) + break; + + const uint64_t current_ns = SystemTimer::get().ns_since_boot(); + if (current_ns >= waketime_ns) + break; + const uint64_t timeout_ns = BAN::Math::min(100'000'000, waketime_ns - current_ns); + TRY(Thread::current().block_or_eintr_or_timeout_ns(m_thread_blocker, timeout_ns, false)); + } + + return count; + } + + void Epoll::notify(BAN::RefPtr inode, uint32_t event) + { + LockGuard _(m_mutex); + + auto listen_it = m_listening_events.find(inode); + if (listen_it == m_listening_events.end()) + return; + + event &= (listen_it->value.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP; + if (event == 0) + return; + + if (auto ready_it = m_ready_events.find(inode); ready_it != m_ready_events.end()) + ready_it->value |= event; + else + MUST(m_ready_events.insert(inode, event)); + + m_thread_blocker.unblock(); + } + +} diff --git a/kernel/kernel/FS/Inode.cpp b/kernel/kernel/FS/Inode.cpp index 68de32e6..69f795ba 100644 --- a/kernel/kernel/FS/Inode.cpp +++ b/kernel/kernel/FS/Inode.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -249,10 +250,39 @@ namespace Kernel return has_error_impl(); } + bool Inode::has_hangup() const + { + LockGuard _(m_mutex); + return has_hangup_impl(); + } + BAN::ErrorOr Inode::ioctl(int request, void* arg) { LockGuard _(m_mutex); return ioctl_impl(request, arg); } + BAN::ErrorOr Inode::add_epoll(class Epoll* epoll) + { + TRY(m_epolls.push_back(epoll)); + return {}; + } + + void Inode::del_epoll(class Epoll* epoll) + { + for (auto it = m_epolls.begin(); it != m_epolls.end(); it++) + { + if (*it != epoll) + continue; + m_epolls.remove(it); + break; + } + } + + void Inode::epoll_notify(uint32_t event) + { + for (auto* epoll : m_epolls) + epoll->notify(this, event); + } + } diff --git a/kernel/kernel/FS/Pipe.cpp b/kernel/kernel/FS/Pipe.cpp index f8d0b9bb..30199fbc 100644 --- a/kernel/kernel/FS/Pipe.cpp +++ b/kernel/kernel/FS/Pipe.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include namespace Kernel { @@ -36,8 +36,10 @@ namespace Kernel { auto old_writing_count = m_writing_count.fetch_sub(1); ASSERT(old_writing_count > 0); - if (old_writing_count == 1) - m_thread_blocker.unblock(); + if (old_writing_count != 1) + return; + epoll_notify(EPOLLHUP); + m_thread_blocker.unblock(); } BAN::ErrorOr Pipe::read_impl(off_t, BAN::ByteSpan buffer) @@ -69,6 +71,8 @@ namespace Kernel m_atime = SystemTimer::get().real_time(); + epoll_notify(EPOLLOUT); + m_thread_blocker.unblock(); return to_copy; @@ -103,6 +107,8 @@ namespace Kernel m_mtime = current_time; m_ctime = current_time; + epoll_notify(EPOLLIN); + m_thread_blocker.unblock(); return to_copy; diff --git a/kernel/kernel/FS/TmpFS/Inode.cpp b/kernel/kernel/FS/TmpFS/Inode.cpp index a7d3d979..4e3eb048 100644 --- a/kernel/kernel/FS/TmpFS/Inode.cpp +++ b/kernel/kernel/FS/TmpFS/Inode.cpp @@ -226,7 +226,6 @@ namespace Kernel } /* SOCKET INODE */ - BAN::ErrorOr> TmpSocketInode::create_new(TmpFileSystem& fs, mode_t mode, uid_t uid, gid_t gid) { auto info = create_inode_info(Mode::IFSOCK | mode, uid, gid); diff --git a/kernel/kernel/Input/InputDevice.cpp b/kernel/kernel/Input/InputDevice.cpp index 8790c5bc..281e03ec 100644 --- a/kernel/kernel/Input/InputDevice.cpp +++ b/kernel/kernel/Input/InputDevice.cpp @@ -6,6 +6,7 @@ #include #include +#include #include namespace Kernel @@ -85,54 +86,58 @@ namespace Kernel void InputDevice::add_event(BAN::ConstByteSpan event) { - SpinLockGuard _(m_event_lock); - ASSERT(event.size() == m_event_size); - - if (m_type == Type::Mouse && m_event_count > 0) { - const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count; + SpinLockGuard _(m_event_lock); + ASSERT(event.size() == m_event_size); - auto& last_event = *reinterpret_cast(&m_event_buffer[last_index * m_event_size]); - auto& curr_event = event.as(); - if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent) + if (m_type == Type::Mouse && m_event_count > 0) { - last_event.move_event.rel_x += curr_event.move_event.rel_x; - last_event.move_event.rel_y += curr_event.move_event.rel_y; - return; - } - if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent) - { - last_event.scroll_event.scroll += curr_event.scroll_event.scroll; - return; - } - } + const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count; - if (m_type == Type::Keyboard) - { - auto& key_event = event.as(); - if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed) - { - switch (key_event.keycode) + auto& last_event = *reinterpret_cast(&m_event_buffer[last_index * m_event_size]); + auto& curr_event = event.as(); + if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent) { - case LibInput::keycode_function(1): - Processor::toggle_should_print_cpu_load(); - break; - case LibInput::keycode_function(12): - Kernel::panic("Keyboard kernel panic :)"); - break; + last_event.move_event.rel_x += curr_event.move_event.rel_x; + last_event.move_event.rel_y += curr_event.move_event.rel_y; + return; + } + if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent) + { + last_event.scroll_event.scroll += curr_event.scroll_event.scroll; + return; } } + + if (m_type == Type::Keyboard) + { + auto& key_event = event.as(); + if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed) + { + switch (key_event.keycode) + { + case LibInput::keycode_function(1): + Processor::toggle_should_print_cpu_load(); + break; + case LibInput::keycode_function(12): + Kernel::panic("Keyboard kernel panic :)"); + break; + } + } + } + + if (m_event_count == m_max_event_count) + { + m_event_tail = (m_event_tail + 1) % m_max_event_count; + m_event_count--; + } + + memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size); + m_event_head = (m_event_head + 1) % m_max_event_count; + m_event_count++; } - if (m_event_count == m_max_event_count) - { - m_event_tail = (m_event_tail + 1) % m_max_event_count; - m_event_count--; - } - - memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size); - m_event_head = (m_event_head + 1) % m_max_event_count; - m_event_count++; + epoll_notify(EPOLLIN); m_event_thread_blocker.unblock(); if (m_type == Type::Keyboard && s_keyboard_device) @@ -197,6 +202,12 @@ namespace Kernel , m_name("keyboard"_sv) {} + void KeyboardDevice::notify() + { + epoll_notify(EPOLLIN); + m_thread_blocker.unblock(); + } + BAN::ErrorOr KeyboardDevice::read_impl(off_t, BAN::ByteSpan buffer) { if (buffer.size() < sizeof(LibInput::RawKeyEvent)) @@ -243,6 +254,12 @@ namespace Kernel , m_name("mouse"_sv) {} + void MouseDevice::notify() + { + epoll_notify(EPOLLIN); + m_thread_blocker.unblock(); + } + BAN::ErrorOr MouseDevice::read_impl(off_t, BAN::ByteSpan buffer) { if (buffer.size() < sizeof(LibInput::MouseEvent)) diff --git a/kernel/kernel/Networking/TCPSocket.cpp b/kernel/kernel/Networking/TCPSocket.cpp index 459d7020..666e71b9 100644 --- a/kernel/kernel/Networking/TCPSocket.cpp +++ b/kernel/kernel/Networking/TCPSocket.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace Kernel { @@ -271,6 +272,11 @@ namespace Kernel return m_send_window.data_size < m_send_window.buffer->size(); } + bool TCPSocket::has_hangup_impl() const + { + return m_has_connected && m_state != State::Established; + } + BAN::ErrorOr TCPSocket::return_with_maybe_zero() { ASSERT(m_state != State::Established); @@ -577,6 +583,8 @@ namespace Kernel memcpy(buffer + m_recv_window.data_size, payload.data(), payload.size()); m_recv_window.data_size += payload.size(); + epoll_notify(EPOLLIN); + dprintln_if(DEBUG_TCP, "Received {} bytes", payload.size()); if (m_next_flags == 0) @@ -726,6 +734,8 @@ namespace Kernel m_send_window.current_seq += to_send; i += to_send; + + epoll_notify(EPOLLOUT); } m_send_window.last_send_ms = current_ms; diff --git a/kernel/kernel/Networking/UDPSocket.cpp b/kernel/kernel/Networking/UDPSocket.cpp index 1db1666c..93e06912 100644 --- a/kernel/kernel/Networking/UDPSocket.cpp +++ b/kernel/kernel/Networking/UDPSocket.cpp @@ -2,6 +2,8 @@ #include #include +#include + namespace Kernel { @@ -70,6 +72,8 @@ namespace Kernel m_packets.emplace(packet_info); m_packet_total_size += payload.size(); + epoll_notify(EPOLLIN); + m_packet_thread_blocker.unblock(); } diff --git a/kernel/kernel/Networking/UNIX/Socket.cpp b/kernel/kernel/Networking/UNIX/Socket.cpp index d6cc91d4..aad5e70c 100644 --- a/kernel/kernel/Networking/UNIX/Socket.cpp +++ b/kernel/kernel/Networking/UNIX/Socket.cpp @@ -5,6 +5,7 @@ #include #include +#include #include namespace Kernel @@ -62,6 +63,7 @@ namespace Kernel if (auto connection = connection_info.connection.lock(); connection && connection->m_info.has()) { connection->m_info.get().target_closed = true; + connection->epoll_notify(EPOLLHUP); connection->m_packet_thread_blocker.unblock(); } } @@ -172,6 +174,8 @@ namespace Kernel TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker)); } + target->epoll_notify(EPOLLIN); + while (!connection_info.connection_done) Processor::yield(); @@ -263,6 +267,8 @@ namespace Kernel if (!is_streaming()) m_packet_sizes.push(packet.size()); + epoll_notify(EPOLLIN); + m_packet_thread_blocker.unblock(); m_packet_lock.unlock(state); return {}; @@ -295,6 +301,17 @@ namespace Kernel return true; } + bool UnixDomainSocket::has_hangup_impl() const + { + if (m_info.has()) + { + auto& connection_info = m_info.get(); + return connection_info.target_closed; + } + + return false; + } + BAN::ErrorOr UnixDomainSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len) { if (message.size() > s_packet_buffer_size) @@ -390,6 +407,8 @@ namespace Kernel m_packet_thread_blocker.unblock(); m_packet_lock.unlock(state); + epoll_notify(EPOLLOUT); + return nread; } diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index ee507037..fd2b7359 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -1452,21 +1453,19 @@ namespace Kernel return TRY(inode->ioctl(request, arg)); } - BAN::ErrorOr Process::sys_pselect(sys_pselect_t* _arguments) + BAN::ErrorOr Process::sys_pselect(sys_pselect_t* user_arguments) { sys_pselect_t arguments; { LockGuard _(m_process_lock); - TRY(validate_pointer_access(_arguments, sizeof(sys_pselect_t), false)); - arguments = *_arguments; + TRY(validate_pointer_access(user_arguments, sizeof(sys_pselect_t), false)); + arguments = *user_arguments; } MemoryRegion* readfd_region = nullptr; MemoryRegion* writefd_region = nullptr; MemoryRegion* errorfd_region = nullptr; - MemoryRegion* timeout_region = nullptr; - MemoryRegion* sigmask_region = nullptr; BAN::ScopeGuard _([&] { if (readfd_region) @@ -1475,75 +1474,57 @@ namespace Kernel writefd_region->unpin(); if (errorfd_region) errorfd_region->unpin(); - if (timeout_region) - timeout_region->unpin(); - if (sigmask_region) - sigmask_region->unpin(); }); readfd_region = TRY(validate_and_pin_pointer_access(arguments.readfds, sizeof(fd_set), true)); writefd_region = TRY(validate_and_pin_pointer_access(arguments.writefds, sizeof(fd_set), true)); errorfd_region = TRY(validate_and_pin_pointer_access(arguments.errorfds, sizeof(fd_set), true)); - timeout_region = TRY(validate_and_pin_pointer_access(arguments.timeout, sizeof(timespec), false)); - sigmask_region = TRY(validate_and_pin_pointer_access(arguments.sigmask, sizeof(sigset_t), false)); const auto old_sigmask = Thread::current().m_signal_block_mask; if (arguments.sigmask) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(arguments.sigmask, sizeof(sigset_t), false)); Thread::current().m_signal_block_mask = *arguments.sigmask; + } BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; }); - uint64_t timedout_ns = SystemTimer::get().ns_since_boot(); + uint64_t waketime_ns = BAN::numeric_limits::max(); if (arguments.timeout) { - timedout_ns += arguments.timeout->tv_sec * 1'000'000'000; - timedout_ns += arguments.timeout->tv_nsec; + LockGuard _(m_process_lock); + TRY(validate_pointer_access(arguments.timeout, sizeof(timespec), false)); + waketime_ns = + SystemTimer::get().ns_since_boot() + + (arguments.timeout->tv_sec * 1'000'000'000) + + arguments.timeout->tv_nsec; } - fd_set readfds; FD_ZERO(&readfds); - fd_set writefds; FD_ZERO(&writefds); - fd_set errorfds; FD_ZERO(&errorfds); - - int set_bits = 0; - for (;;) + auto epoll = TRY(Epoll::create()); + for (int fd = 0; fd < user_arguments->nfds; fd++) { - auto update_fds = - [&](int fd, fd_set* source, fd_set* dest, bool (Inode::*func)() const) - { - if (source == nullptr) - return; + uint32_t events = 0; + if (arguments.readfds && FD_ISSET(fd, arguments.readfds)) + events |= EPOLLIN; + if (arguments.writefds && FD_ISSET(fd, arguments.writefds)) + events |= EPOLLOUT; + if (arguments.errorfds && FD_ISSET(fd, arguments.errorfds)) + events |= EPOLLERR; + if (events == 0) + continue; - if (!FD_ISSET(fd, source)) - return; + auto inode_or_error = m_open_file_descriptors.inode_of(fd); + if (inode_or_error.is_error()) + continue; - auto inode_or_error = m_open_file_descriptors.inode_of(fd); - if (inode_or_error.is_error()) - return; - - auto inode = inode_or_error.release_value(); - if ((inode.ptr()->*func)()) - { - FD_SET(fd, dest); - set_bits++; - } - }; - - for (int i = 0; i < arguments.nfds; i++) - { - update_fds(i, arguments.readfds, &readfds, &Inode::can_read); - update_fds(i, arguments.writefds, &writefds, &Inode::can_write); - update_fds(i, arguments.errorfds, &errorfds, &Inode::has_error); - } - - if (set_bits > 0) - break; - - if (arguments.timeout && SystemTimer::get().ns_since_boot() >= timedout_ns) - break; - - // FIXME: implement some multi thread blocker system? - TRY(Thread::current().sleep_or_eintr_ms(1)); + TRY(epoll->ctl(EPOLL_CTL_ADD, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }})); } + BAN::Vector event_buffer; + TRY(event_buffer.resize(user_arguments->nfds)); + + const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns)); + if (arguments.readfds) FD_ZERO(arguments.readfds); if (arguments.writefds) @@ -1551,17 +1532,98 @@ namespace Kernel if (arguments.errorfds) FD_ZERO(arguments.errorfds); - for (int i = 0; i < arguments.nfds; i++) + for (size_t i = 0; i < waited_events; i++) { - if (arguments.readfds && FD_ISSET(i, &readfds)) - FD_SET(i, arguments.readfds); - if (arguments.writefds && FD_ISSET(i, &writefds)) - FD_SET(i, arguments.writefds); - if (arguments.errorfds && FD_ISSET(i, &errorfds)) - FD_SET(i, arguments.errorfds); + const int fd = event_buffer[i].data.fd; + if (arguments.readfds && event_buffer[i].events & (EPOLLIN | EPOLLHUP)) + FD_SET(fd, arguments.readfds); + if (arguments.writefds && event_buffer[i].events & (EPOLLOUT)) + FD_SET(fd, arguments.writefds); + if (arguments.errorfds && event_buffer[i].events & (EPOLLERR)) + FD_SET(fd, arguments.errorfds); } - return set_bits; + return waited_events; + } + + BAN::ErrorOr Process::sys_epoll_create1(int flags) + { + if (flags && (flags & ~EPOLL_CLOEXEC)) + return BAN::Error::from_errno(EINVAL); + if (flags & EPOLL_CLOEXEC) + flags = O_CLOEXEC; + + VirtualFileSystem::File epoll_file; + epoll_file.inode = TRY(Epoll::create()); + TRY(epoll_file.canonical_path.append(""_sv)); + + return TRY(m_open_file_descriptors.open(BAN::move(epoll_file), flags | O_RDWR)); + } + + BAN::ErrorOr Process::sys_epoll_ctl(int epfd, int op, int fd, epoll_event* user_event) + { + if (epfd == fd) + return BAN::Error::from_errno(EINVAL); + if (op != EPOLL_CTL_DEL && user_event == nullptr) + return BAN::Error::from_errno(EINVAL); + + auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd)); + if (!epoll_inode->is_epoll()) + return BAN::Error::from_errno(EINVAL); + + auto inode = TRY(m_open_file_descriptors.inode_of(fd)); + + epoll_event event {}; + if (user_event) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(user_event, sizeof(epoll_event), false)); + event = *user_event; + } + + TRY(static_cast(epoll_inode.ptr())->ctl(op, inode, event)); + + return 0; + } + + BAN::ErrorOr Process::sys_epoll_pwait2(int epfd, epoll_event* events, int maxevents, const timespec* timeout, const sigset_t* sigmask) + { + (void)sigmask; + + if (maxevents <= 0) + return BAN::Error::from_errno(EINVAL); + + auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd)); + if (!epoll_inode->is_epoll()) + return BAN::Error::from_errno(EINVAL); + + uint64_t waketime_ns = BAN::numeric_limits::max(); + if (timeout) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(timeout, sizeof(timespec), false)); + waketime_ns = + SystemTimer::get().ns_since_boot() + + (timeout->tv_sec * 1'000'000'000) + + timeout->tv_nsec; + } + + auto* events_region = TRY(validate_and_pin_pointer_access(events, maxevents * sizeof(epoll_event), true)); + BAN::ScopeGuard _([events_region] { + if (events_region) + events_region->unpin(); + }); + + const auto old_sigmask = Thread::current().m_signal_block_mask; + if (sigmask) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(sigmask, sizeof(sigset_t), false)); + Thread::current().m_signal_block_mask = *sigmask; + } + BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; }); + + return TRY(static_cast(epoll_inode.ptr())->wait(BAN::Span(events, maxevents), waketime_ns)); } BAN::ErrorOr Process::sys_pipe(int fildes[2]) diff --git a/kernel/kernel/Terminal/PseudoTerminal.cpp b/kernel/kernel/Terminal/PseudoTerminal.cpp index a6b78ecb..04c2b15e 100644 --- a/kernel/kernel/Terminal/PseudoTerminal.cpp +++ b/kernel/kernel/Terminal/PseudoTerminal.cpp @@ -4,6 +4,7 @@ #include +#include #include #include @@ -95,6 +96,8 @@ namespace Kernel reinterpret_cast(m_buffer->vaddr())[(m_buffer_tail + m_buffer_size) % m_buffer->size()] = ch; m_buffer_size++; + epoll_notify(EPOLLIN); + m_buffer_blocker.unblock(); return true; @@ -127,6 +130,8 @@ namespace Kernel m_buffer_size -= to_copy; m_buffer_tail = (m_buffer_tail + to_copy) % m_buffer->size(); + epoll_notify(EPOLLOUT); + m_buffer_lock.unlock(state); return to_copy; @@ -137,7 +142,6 @@ namespace Kernel auto slave = m_slave.lock(); if (!slave) return BAN::Error::from_errno(ENODEV); - for (size_t i = 0; i < buffer.size(); i++) slave->handle_input_byte(buffer[i]); return buffer.size(); diff --git a/kernel/kernel/Terminal/TTY.cpp b/kernel/kernel/Terminal/TTY.cpp index 4f3bcb1b..f75f8781 100644 --- a/kernel/kernel/Terminal/TTY.cpp +++ b/kernel/kernel/Terminal/TTY.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include namespace Kernel @@ -40,6 +41,7 @@ namespace Kernel bool can_read_impl() const override { return false; } bool can_write_impl() const override { return false; } bool has_error_impl() const override { return false; } + bool has_hangup_impl() const override { return false; } private: DevTTY(mode_t mode, uid_t uid, gid_t gid) @@ -238,6 +240,7 @@ namespace Kernel if (ch == '\x04' && (m_termios.c_lflag & ICANON)) { m_output.flush = true; + epoll_notify(EPOLLIN); m_output.thread_blocker.unblock(); return; } @@ -280,6 +283,7 @@ namespace Kernel if (ch == '\n' || !(m_termios.c_lflag & ICANON)) { m_output.flush = true; + epoll_notify(EPOLLIN); m_output.thread_blocker.unblock(); } } diff --git a/userspace/libraries/LibC/CMakeLists.txt b/userspace/libraries/LibC/CMakeLists.txt index ba65d7eb..01a20abb 100644 --- a/userspace/libraries/LibC/CMakeLists.txt +++ b/userspace/libraries/LibC/CMakeLists.txt @@ -30,6 +30,7 @@ set(LIBC_SOURCES string.cpp strings.cpp sys/banan-os.cpp + sys/epoll.cpp sys/ioctl.cpp sys/mman.cpp sys/resource.cpp diff --git a/userspace/libraries/LibC/include/sys/epoll.h b/userspace/libraries/LibC/include/sys/epoll.h new file mode 100644 index 00000000..2ae26d0d --- /dev/null +++ b/userspace/libraries/LibC/include/sys/epoll.h @@ -0,0 +1,49 @@ +#ifndef _SYS_EPOLL_H +#define _SYS_EPOLL_H 1 + +#include + +__BEGIN_DECLS + +#include +#include + +union epoll_data +{ + void* ptr; + int fd; + uint32_t u32; + uint64_t u64; +}; +typedef union epoll_data epoll_data_t; + +struct epoll_event +{ + uint32_t events; + epoll_data_t data; +}; + +#define EPOLL_CTL_ADD 0 +#define EPOLL_CTL_MOD 1 +#define EPOLL_CTL_DEL 2 + +#define EPOLLIN 0x01 +#define EPOLLOUT 0x02 +#define EPOLLPRI 0x04 +#define EPOLLERR 0x08 +#define EPOLLHUP 0x10 +#define EPOLLET 0x20 +#define EPOLLONESHOT 0x40 + +#define EPOLL_CLOEXEC 1 + +int epoll_create(int size); +int epoll_create1(int flags); +int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event); +int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout); +int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask); +int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask); + +__END_DECLS + +#endif diff --git a/userspace/libraries/LibC/include/sys/syscall.h b/userspace/libraries/LibC/include/sys/syscall.h index 517b45da..21688f16 100644 --- a/userspace/libraries/LibC/include/sys/syscall.h +++ b/userspace/libraries/LibC/include/sys/syscall.h @@ -98,6 +98,9 @@ __BEGIN_DECLS O(SYS_PTHREAD_EXIT, pthread_exit) \ O(SYS_PTHREAD_JOIN, pthread_join) \ O(SYS_PTHREAD_SELF, pthread_self) \ + O(SYS_EPOLL_CREATE1, epoll_create1) \ + O(SYS_EPOLL_CTL, epoll_ctl) \ + O(SYS_EPOLL_PWAIT2, epoll_pwait2) \ enum Syscall { diff --git a/userspace/libraries/LibC/sys/epoll.cpp b/userspace/libraries/LibC/sys/epoll.cpp new file mode 100644 index 00000000..39a56b45 --- /dev/null +++ b/userspace/libraries/LibC/sys/epoll.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +int epoll_create(int size) +{ + if (size <= 0) + { + errno = EINVAL; + return -1; + } + + return epoll_create1(0); +} + +int epoll_create1(int flags) +{ + return syscall(SYS_EPOLL_CREATE1, flags); +} + +int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event) +{ + return syscall(SYS_EPOLL_CTL, epfd, op, fd, event); +} + +int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout) +{ + return epoll_pwait(epfd, events, maxevents, timeout, nullptr); +} + +int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask) +{ + timespec ts; + timespec* ts_ptr = nullptr; + if (timeout >= 0) + { + ts.tv_sec = static_cast(timeout / 1000), + ts.tv_nsec = (timeout % 1000) * 1'000'000, + ts_ptr = &ts; + } + + return epoll_pwait2(epfd, events, maxevents, ts_ptr, sigmask); +} + +int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask) +{ + return syscall(SYS_EPOLL_PWAIT2, epfd, events, maxevents, timeout, sigmask); +}