Kernel/LibC: Implement basic epoll

This implementation is on top of inodes instead of fds as linux does it.
If I start finding ports/software that relies on epoll allowing
duplicate inodes, I will do what linux does.

I'm probably missing multiple epoll_notify's which may cause hangs but
the system seems to work fine :dd:
This commit is contained in:
Bananymous 2025-05-13 10:10:35 +03:00
parent 143a00626b
commit 1bcd1edbf5
43 changed files with 627 additions and 119 deletions

View File

@ -17,6 +17,7 @@ set(KERNEL_SOURCES
kernel/Device/RandomDevice.cpp kernel/Device/RandomDevice.cpp
kernel/Device/ZeroDevice.cpp kernel/Device/ZeroDevice.cpp
kernel/ELF.cpp kernel/ELF.cpp
kernel/Epoll.cpp
kernel/Errors.cpp kernel/Errors.cpp
kernel/FS/DevFS/FileSystem.cpp kernel/FS/DevFS/FileSystem.cpp
kernel/FS/Ext2/FileSystem.cpp kernel/FS/Ext2/FileSystem.cpp

View File

@ -3,7 +3,7 @@
namespace Kernel namespace Kernel
{ {
class DebugDevice : public CharacterDevice class DebugDevice final : public CharacterDevice
{ {
public: public:
static BAN::ErrorOr<BAN::RefPtr<DebugDevice>> create(mode_t, uid_t, gid_t); static BAN::ErrorOr<BAN::RefPtr<DebugDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
const dev_t m_rdev; const dev_t m_rdev;

View File

@ -38,6 +38,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
FramebufferDevice(mode_t mode, uid_t uid, gid_t gid, dev_t rdev, paddr_t paddr, uint32_t width, uint32_t height, uint32_t pitch, uint8_t bpp); FramebufferDevice(mode_t mode, uid_t uid, gid_t gid, dev_t rdev, paddr_t paddr, uint32_t width, uint32_t height, uint32_t pitch, uint8_t bpp);

View File

@ -5,7 +5,7 @@
namespace Kernel namespace Kernel
{ {
class NullDevice : public CharacterDevice class NullDevice final : public CharacterDevice
{ {
public: public:
static BAN::ErrorOr<BAN::RefPtr<NullDevice>> create(mode_t, uid_t, gid_t); static BAN::ErrorOr<BAN::RefPtr<NullDevice>> create(mode_t, uid_t, gid_t);
@ -26,6 +26,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
const dev_t m_rdev; const dev_t m_rdev;

View File

@ -3,7 +3,7 @@
namespace Kernel namespace Kernel
{ {
class RandomDevice : public CharacterDevice class RandomDevice final : public CharacterDevice
{ {
public: public:
static BAN::ErrorOr<BAN::RefPtr<RandomDevice>> create(mode_t, uid_t, gid_t); static BAN::ErrorOr<BAN::RefPtr<RandomDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
const dev_t m_rdev; const dev_t m_rdev;

View File

@ -3,7 +3,7 @@
namespace Kernel namespace Kernel
{ {
class ZeroDevice : public CharacterDevice class ZeroDevice final : public CharacterDevice
{ {
public: public:
static BAN::ErrorOr<BAN::RefPtr<ZeroDevice>> create(mode_t, uid_t, gid_t); static BAN::ErrorOr<BAN::RefPtr<ZeroDevice>> create(mode_t, uid_t, gid_t);
@ -24,6 +24,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
const dev_t m_rdev; const dev_t m_rdev;

View File

@ -0,0 +1,68 @@
#pragma once
#include <BAN/CircularQueue.h>
#include <BAN/HashMap.h>
#include <BAN/HashSet.h>
#include <kernel/FS/Inode.h>
#include <sys/epoll.h>
namespace Kernel
{
class Epoll final : public Inode
{
public:
static BAN::ErrorOr<BAN::RefPtr<Epoll>> create();
~Epoll();
BAN::ErrorOr<void> ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event);
BAN::ErrorOr<size_t> wait(BAN::Span<epoll_event> events, uint64_t waketime_ns);
void notify(BAN::RefPtr<Inode> inode, uint32_t event);
private:
Epoll() = default;
public:
ino_t ino() const override { return 0; }
Mode mode() const override { return { Mode::IRUSR | Mode::IWUSR }; }
nlink_t nlink() const override { return 0; }
uid_t uid() const override { return 0; }
gid_t gid() const override { return 0; }
off_t size() const override { return 0; }
timespec atime() const override { return {}; }
timespec mtime() const override { return {}; }
timespec ctime() const override { return {}; }
blksize_t blksize() const override { return PAGE_SIZE; }
blkcnt_t blocks() const override { return 0; }
dev_t dev() const override { return 0; }
dev_t rdev() const override { return 0; }
bool is_epoll() const override { return true; }
const FileSystem* filesystem() const override { return nullptr; }
bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::ErrorOr<void> fsync_impl() override { return {}; }
private:
struct InodeRefPtrHash
{
BAN::hash_t operator()(const BAN::RefPtr<Inode>& inode)
{
return BAN::hash<const Inode*>()(inode.ptr());
}
};
private:
ThreadBlocker m_thread_blocker;
BAN::HashMap<BAN::RefPtr<Inode>, uint32_t, InodeRefPtrHash> m_ready_events;
BAN::HashMap<BAN::RefPtr<Inode>, epoll_event, InodeRefPtrHash> m_listening_events;
};
}

View File

@ -51,6 +51,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
// Returns maximum number of data blocks in use // Returns maximum number of data blocks in use

View File

@ -49,6 +49,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
FATInode(FATFS& fs, const FAT::DirectoryEntry& entry, ino_t ino, uint32_t block_count) FATInode(FATFS& fs, const FAT::DirectoryEntry& entry, ino_t ino, uint32_t block_count)

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <BAN/ByteSpan.h> #include <BAN/ByteSpan.h>
#include <BAN/LinkedList.h>
#include <BAN/RefPtr.h> #include <BAN/RefPtr.h>
#include <BAN/String.h> #include <BAN/String.h>
#include <BAN/StringView.h> #include <BAN/StringView.h>
@ -19,9 +20,8 @@
namespace Kernel namespace Kernel
{ {
class FileSystem;
class FileBackedRegion; class FileBackedRegion;
class FileSystem;
class SharedFileData; class SharedFileData;
class Inode : public BAN::RefCounted<Inode> class Inode : public BAN::RefCounted<Inode>
@ -85,6 +85,7 @@ namespace Kernel
virtual dev_t rdev() const = 0; virtual dev_t rdev() const = 0;
virtual bool is_device() const { return false; } virtual bool is_device() const { return false; }
virtual bool is_epoll() const { return false; }
virtual bool is_pipe() const { return false; } virtual bool is_pipe() const { return false; }
virtual bool is_tty() const { return false; } virtual bool is_tty() const { return false; }
@ -123,9 +124,14 @@ namespace Kernel
bool can_read() const; bool can_read() const;
bool can_write() const; bool can_write() const;
bool has_error() const; bool has_error() const;
bool has_hangup() const;
BAN::ErrorOr<long> ioctl(int request, void* arg); BAN::ErrorOr<long> ioctl(int request, void* arg);
BAN::ErrorOr<void> add_epoll(class Epoll*);
void del_epoll(class Epoll*);
void epoll_notify(uint32_t event);
protected: protected:
// Directory API // Directory API
virtual BAN::ErrorOr<BAN::RefPtr<Inode>> find_inode_impl(BAN::StringView) { return BAN::Error::from_errno(ENOTSUP); } virtual BAN::ErrorOr<BAN::RefPtr<Inode>> find_inode_impl(BAN::StringView) { return BAN::Error::from_errno(ENOTSUP); }
@ -160,6 +166,7 @@ namespace Kernel
virtual bool can_read_impl() const = 0; virtual bool can_read_impl() const = 0;
virtual bool can_write_impl() const = 0; virtual bool can_write_impl() const = 0;
virtual bool has_error_impl() const = 0; virtual bool has_error_impl() const = 0;
virtual bool has_hangup_impl() const = 0;
virtual BAN::ErrorOr<long> ioctl_impl(int, void*) { return BAN::Error::from_errno(ENOTSUP); } virtual BAN::ErrorOr<long> ioctl_impl(int, void*) { return BAN::Error::from_errno(ENOTSUP); }
@ -168,6 +175,7 @@ namespace Kernel
private: private:
BAN::WeakPtr<SharedFileData> m_shared_region; BAN::WeakPtr<SharedFileData> m_shared_region;
BAN::LinkedList<class Epoll*> m_epolls;
friend class FileBackedRegion; friend class FileBackedRegion;
friend class OpenFileDescriptorSet; friend class OpenFileDescriptorSet;
friend class SharedFileData; friend class SharedFileData;

View File

@ -40,6 +40,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return m_buffer_size > 0; } virtual bool can_read_impl() const override { return m_buffer_size > 0; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return m_writing_count == 0; }
private: private:
Pipe(const Credentials&); Pipe(const Credentials&);

View File

@ -47,6 +47,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
ProcROProcessInode(Process&, size_t (Process::*)(off_t, BAN::ByteSpan) const, TmpFileSystem&, const TmpInodeInfo&); ProcROProcessInode(Process&, size_t (Process::*)(off_t, BAN::ByteSpan) const, TmpFileSystem&, const TmpInodeInfo&);
@ -72,6 +73,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
ProcROInode(size_t (*callback)(off_t, BAN::ByteSpan), TmpFileSystem&, const TmpInodeInfo&); ProcROInode(size_t (*callback)(off_t, BAN::ByteSpan), TmpFileSystem&, const TmpInodeInfo&);

View File

@ -80,6 +80,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
TmpFileInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); TmpFileInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -102,6 +103,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
TmpSocketInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); TmpSocketInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -123,6 +125,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
TmpSymlinkInode(TmpFileSystem&, ino_t, const TmpInodeInfo&); TmpSymlinkInode(TmpFileSystem&, ino_t, const TmpInodeInfo&);
@ -153,6 +156,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
template<TmpFuncs::for_each_valid_entry_callback F> template<TmpFuncs::for_each_valid_entry_callback F>

View File

@ -31,7 +31,7 @@ namespace Kernel
bool can_read_impl() const override { SpinLockGuard _(m_event_lock); return m_event_count > 0; } bool can_read_impl() const override { SpinLockGuard _(m_event_lock); return m_event_count > 0; }
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private: private:
BAN::ErrorOr<size_t> read_non_block(BAN::ByteSpan); BAN::ErrorOr<size_t> read_non_block(BAN::ByteSpan);
@ -64,7 +64,7 @@ namespace Kernel
public: public:
static BAN::ErrorOr<BAN::RefPtr<KeyboardDevice>> create(mode_t mode, uid_t uid, gid_t gid); static BAN::ErrorOr<BAN::RefPtr<KeyboardDevice>> create(mode_t mode, uid_t uid, gid_t gid);
void notify() { m_thread_blocker.unblock(); } void notify();
private: private:
KeyboardDevice(mode_t mode, uid_t uid, gid_t gid); KeyboardDevice(mode_t mode, uid_t uid, gid_t gid);
@ -73,6 +73,7 @@ namespace Kernel
bool can_read_impl() const override; bool can_read_impl() const override;
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::StringView name() const final override { return m_name; } BAN::StringView name() const final override { return m_name; }
dev_t rdev() const final override { return m_rdev; } dev_t rdev() const final override { return m_rdev; }
@ -90,7 +91,7 @@ namespace Kernel
public: public:
static BAN::ErrorOr<BAN::RefPtr<MouseDevice>> create(mode_t mode, uid_t uid, gid_t gid); static BAN::ErrorOr<BAN::RefPtr<MouseDevice>> create(mode_t mode, uid_t uid, gid_t gid);
void notify() { m_thread_blocker.unblock(); } void notify();
private: private:
MouseDevice(mode_t mode, uid_t uid, gid_t gid); MouseDevice(mode_t mode, uid_t uid, gid_t gid);
@ -99,6 +100,7 @@ namespace Kernel
bool can_read_impl() const override; bool can_read_impl() const override;
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
BAN::StringView name() const final override { return m_name; } BAN::StringView name() const final override { return m_name; }
dev_t rdev() const final override { return m_rdev; } dev_t rdev() const final override { return m_rdev; }

View File

@ -28,7 +28,7 @@ namespace Kernel
virtual bool link_up() override { return m_link_up; } virtual bool link_up() override { return m_link_up; }
virtual int link_speed() override; virtual int link_speed() override;
virtual size_t payload_mtu() const { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); } virtual size_t payload_mtu() const override { return E1000_RX_BUFFER_SIZE - sizeof(EthernetHeader); }
virtual void handle_irq() final override; virtual void handle_irq() final override;
@ -50,6 +50,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
BAN::ErrorOr<void> read_mac_address(); BAN::ErrorOr<void> read_mac_address();

View File

@ -30,6 +30,7 @@ namespace Kernel
bool can_read_impl() const override { return false; } bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private: private:
SpinLock m_buffer_lock; SpinLock m_buffer_lock;

View File

@ -36,6 +36,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
BAN::ErrorOr<void> reset(); BAN::ErrorOr<void> reset();

View File

@ -67,6 +67,7 @@ namespace Kernel
virtual bool can_read_impl() const override; virtual bool can_read_impl() const override;
virtual bool can_write_impl() const override; virtual bool can_write_impl() const override;
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override;
private: private:
enum class State enum class State

View File

@ -40,6 +40,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return !m_packets.empty(); } virtual bool can_read_impl() const override { return !m_packets.empty(); }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
UDPSocket(NetworkLayer&, const Socket::Info&); UDPSocket(NetworkLayer&, const Socket::Info&);

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <BAN/CircularQueue.h>
#include <BAN/Queue.h> #include <BAN/Queue.h>
#include <BAN/WeakPtr.h> #include <BAN/WeakPtr.h>
#include <kernel/FS/Socket.h> #include <kernel/FS/Socket.h>
@ -28,6 +29,7 @@ namespace Kernel
virtual bool can_read_impl() const override; virtual bool can_read_impl() const override;
virtual bool can_write_impl() const override; virtual bool can_write_impl() const override;
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override;
private: private:
UnixDomainSocket(Socket::Type, const Socket::Info&); UnixDomainSocket(Socket::Type, const Socket::Info&);
@ -48,7 +50,7 @@ namespace Kernel
mutable BAN::Atomic<bool> target_closed { false }; mutable BAN::Atomic<bool> target_closed { false };
BAN::WeakPtr<UnixDomainSocket> connection; BAN::WeakPtr<UnixDomainSocket> connection;
BAN::Queue<BAN::RefPtr<UnixDomainSocket>> pending_connections; BAN::Queue<BAN::RefPtr<UnixDomainSocket>> pending_connections;
ThreadBlocker pending_thread_blocker; ThreadBlocker pending_thread_blocker;
SpinLock pending_lock; SpinLock pending_lock;
}; };
@ -67,7 +69,7 @@ namespace Kernel
size_t m_packet_size_total { 0 }; size_t m_packet_size_total { 0 };
BAN::UniqPtr<VirtualRange> m_packet_buffer; BAN::UniqPtr<VirtualRange> m_packet_buffer;
SpinLock m_packet_lock; SpinLock m_packet_lock;
ThreadBlocker m_packet_thread_blocker; ThreadBlocker m_packet_thread_blocker;
friend class BAN::RefPtr<UnixDomainSocket>; friend class BAN::RefPtr<UnixDomainSocket>;
}; };

View File

@ -24,6 +24,8 @@
#include <sys/time.h> #include <sys/time.h>
#include <termios.h> #include <termios.h>
struct epoll_event;
namespace Kernel namespace Kernel
{ {
@ -130,6 +132,10 @@ namespace Kernel
BAN::ErrorOr<long> sys_pselect(sys_pselect_t* arguments); BAN::ErrorOr<long> sys_pselect(sys_pselect_t* arguments);
BAN::ErrorOr<long> sys_epoll_create1(int flags);
BAN::ErrorOr<long> sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event* event);
BAN::ErrorOr<long> sys_epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask);
BAN::ErrorOr<long> sys_pipe(int fildes[2]); BAN::ErrorOr<long> sys_pipe(int fildes[2]);
BAN::ErrorOr<long> sys_dup2(int fildes, int fildes2); BAN::ErrorOr<long> sys_dup2(int fildes, int fildes2);

View File

@ -27,6 +27,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return false; } virtual bool can_read_impl() const override { return false; }
virtual bool can_write_impl() const override { return false; } virtual bool can_write_impl() const override { return false; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
NVMeController(PCI::Device& pci_device); NVMeController(PCI::Device& pci_device);

View File

@ -17,7 +17,7 @@ namespace Kernel
virtual uint64_t total_size() const override { return m_block_size * m_block_count; } virtual uint64_t total_size() const override { return m_block_size * m_block_count; }
virtual dev_t rdev() const override { return m_rdev; } virtual dev_t rdev() const override { return m_rdev; }
virtual BAN::StringView name() const { return m_name; } virtual BAN::StringView name() const override { return m_name; }
private: private:
NVMeNamespace(NVMeController&, uint32_t ns_index, uint32_t nsid, uint64_t block_count, uint32_t block_size); NVMeNamespace(NVMeController&, uint32_t ns_index, uint32_t nsid, uint64_t block_count, uint32_t block_size);

View File

@ -53,6 +53,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
const dev_t m_rdev; const dev_t m_rdev;

View File

@ -47,6 +47,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return true; } virtual bool can_read_impl() const override { return true; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
private: private:
Mutex m_mutex; Mutex m_mutex;

View File

@ -29,6 +29,7 @@ namespace Kernel
bool can_read_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size > 0; } bool can_read_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size > 0; }
bool can_write_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size < m_buffer->size(); } bool can_write_impl() const override { SpinLockGuard _(m_buffer_lock); return m_buffer_size < m_buffer->size(); }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return !m_slave.valid(); }
BAN::ErrorOr<long> ioctl_impl(int, void*) override; BAN::ErrorOr<long> ioctl_impl(int, void*) override;
@ -63,6 +64,8 @@ namespace Kernel
protected: protected:
bool putchar_impl(uint8_t ch) override; bool putchar_impl(uint8_t ch) override;
bool has_hangup_impl() const override { return !m_master.valid(); }
BAN::ErrorOr<long> ioctl_impl(int, void*) override; BAN::ErrorOr<long> ioctl_impl(int, void*) override;
private: private:

View File

@ -54,6 +54,7 @@ namespace Kernel
virtual bool can_read_impl() const override { return m_output.flush; } virtual bool can_read_impl() const override { return m_output.flush; }
virtual bool can_write_impl() const override { return true; } virtual bool can_write_impl() const override { return true; }
virtual bool has_error_impl() const override { return false; } virtual bool has_error_impl() const override { return false; }
virtual bool has_hangup_impl() const override { return false; }
protected: protected:
TTY(mode_t mode, uid_t uid, gid_t gid); TTY(mode_t mode, uid_t uid, gid_t gid);

View File

@ -73,6 +73,7 @@ namespace Kernel::ACPI
bool can_read_impl() const override { return true; } bool can_read_impl() const override { return true; }
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private: private:
BatteryInfoInode(AML::Namespace& acpi_namespace, AML::Scope&& battery_path, AML::NameString&& method, size_t index, ino_t ino, const TmpInodeInfo& info) BatteryInfoInode(AML::Namespace& acpi_namespace, AML::Scope&& battery_path, AML::NameString&& method, size_t index, ino_t ino, const TmpInodeInfo& info)

137
kernel/kernel/Epoll.cpp Normal file
View File

@ -0,0 +1,137 @@
#include <kernel/Epoll.h>
#include <kernel/Lock/LockGuard.h>
#include <kernel/Timer/Timer.h>
namespace Kernel
{
BAN::ErrorOr<BAN::RefPtr<Epoll>> Epoll::create()
{
auto* epoll_ptr = new Epoll();
if (epoll_ptr == nullptr)
return BAN::Error::from_errno(ENOMEM);
return BAN::RefPtr<Epoll>::adopt(epoll_ptr);
}
Epoll::~Epoll()
{
for (auto [inode, _] : m_listening_events)
inode->del_epoll(this);
}
BAN::ErrorOr<void> Epoll::ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event)
{
LockGuard _(m_mutex);
auto it = m_listening_events.find(inode);
switch (op)
{
case EPOLL_CTL_ADD:
if (it != m_listening_events.end())
return BAN::Error::from_errno(EEXIST);
TRY(m_listening_events.reserve(m_listening_events.size() + 1));
TRY(m_ready_events.reserve(m_listening_events.size() + 1));
TRY(inode->add_epoll(this));
MUST(m_listening_events.insert(inode, event));
MUST(m_ready_events.insert(inode, event.events));
return {};
case EPOLL_CTL_MOD:
if (it == m_listening_events.end())
return BAN::Error::from_errno(ENOENT);
MUST(m_ready_events.emplace_or_assign(inode, event.events));
it->value = event;
return {};
case EPOLL_CTL_DEL:
if (it == m_listening_events.end())
return BAN::Error::from_errno(ENOENT);
m_listening_events.remove(it);
m_ready_events.remove(inode);
inode->del_epoll(this);
return {};
}
return BAN::Error::from_errno(EINVAL);
}
BAN::ErrorOr<size_t> Epoll::wait(BAN::Span<epoll_event> event_span, uint64_t waketime_ns)
{
size_t count = 0;
for (;;)
{
{
LockGuard _(m_mutex);
for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();)
{
auto& [inode, events] = *it;
auto& listen = m_listening_events[inode];
const uint32_t listen_mask = (listen.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
events &= listen_mask;
#define CHECK_EVENT_BIT(mask, func) \
if ((events & mask) && !inode->func()) \
events &= ~mask;
CHECK_EVENT_BIT(EPOLLIN, can_read);
CHECK_EVENT_BIT(EPOLLOUT, can_write);
CHECK_EVENT_BIT(EPOLLERR, has_error);
CHECK_EVENT_BIT(EPOLLHUP, has_hangup);
#undef CHECK_EVENT_BIT
if (events == 0)
{
m_ready_events.remove(it);
it = m_ready_events.begin();
continue;
}
event_span[count++] = {
.events = events,
.data = listen.data,
};
if (listen.events & EPOLLONESHOT)
listen.events = 0;
if (listen.events & EPOLLET)
events &= ~listen_mask;
it++;
}
}
if (count)
break;
const uint64_t current_ns = SystemTimer::get().ns_since_boot();
if (current_ns >= waketime_ns)
break;
const uint64_t timeout_ns = BAN::Math::min<uint64_t>(100'000'000, waketime_ns - current_ns);
TRY(Thread::current().block_or_eintr_or_timeout_ns(m_thread_blocker, timeout_ns, false));
}
return count;
}
void Epoll::notify(BAN::RefPtr<Inode> inode, uint32_t event)
{
LockGuard _(m_mutex);
auto listen_it = m_listening_events.find(inode);
if (listen_it == m_listening_events.end())
return;
event &= (listen_it->value.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
if (event == 0)
return;
if (auto ready_it = m_ready_events.find(inode); ready_it != m_ready_events.end())
ready_it->value |= event;
else
MUST(m_ready_events.insert(inode, event));
m_thread_blocker.unblock();
}
}

View File

@ -1,3 +1,4 @@
#include <kernel/Epoll.h>
#include <kernel/FS/Inode.h> #include <kernel/FS/Inode.h>
#include <kernel/Lock/LockGuard.h> #include <kernel/Lock/LockGuard.h>
#include <kernel/Memory/FileBackedRegion.h> #include <kernel/Memory/FileBackedRegion.h>
@ -249,10 +250,39 @@ namespace Kernel
return has_error_impl(); return has_error_impl();
} }
bool Inode::has_hangup() const
{
LockGuard _(m_mutex);
return has_hangup_impl();
}
BAN::ErrorOr<long> Inode::ioctl(int request, void* arg) BAN::ErrorOr<long> Inode::ioctl(int request, void* arg)
{ {
LockGuard _(m_mutex); LockGuard _(m_mutex);
return ioctl_impl(request, arg); return ioctl_impl(request, arg);
} }
BAN::ErrorOr<void> Inode::add_epoll(class Epoll* epoll)
{
TRY(m_epolls.push_back(epoll));
return {};
}
void Inode::del_epoll(class Epoll* epoll)
{
for (auto it = m_epolls.begin(); it != m_epolls.end(); it++)
{
if (*it != epoll)
continue;
m_epolls.remove(it);
break;
}
}
void Inode::epoll_notify(uint32_t event)
{
for (auto* epoll : m_epolls)
epoll->notify(this, event);
}
} }

View File

@ -3,7 +3,7 @@
#include <kernel/Thread.h> #include <kernel/Thread.h>
#include <kernel/Timer/Timer.h> #include <kernel/Timer/Timer.h>
#include <kernel/Process.h> #include <sys/epoll.h>
namespace Kernel namespace Kernel
{ {
@ -36,8 +36,10 @@ namespace Kernel
{ {
auto old_writing_count = m_writing_count.fetch_sub(1); auto old_writing_count = m_writing_count.fetch_sub(1);
ASSERT(old_writing_count > 0); ASSERT(old_writing_count > 0);
if (old_writing_count == 1) if (old_writing_count != 1)
m_thread_blocker.unblock(); return;
epoll_notify(EPOLLHUP);
m_thread_blocker.unblock();
} }
BAN::ErrorOr<size_t> Pipe::read_impl(off_t, BAN::ByteSpan buffer) BAN::ErrorOr<size_t> Pipe::read_impl(off_t, BAN::ByteSpan buffer)
@ -69,6 +71,8 @@ namespace Kernel
m_atime = SystemTimer::get().real_time(); m_atime = SystemTimer::get().real_time();
epoll_notify(EPOLLOUT);
m_thread_blocker.unblock(); m_thread_blocker.unblock();
return to_copy; return to_copy;
@ -103,6 +107,8 @@ namespace Kernel
m_mtime = current_time; m_mtime = current_time;
m_ctime = current_time; m_ctime = current_time;
epoll_notify(EPOLLIN);
m_thread_blocker.unblock(); m_thread_blocker.unblock();
return to_copy; return to_copy;

View File

@ -226,7 +226,6 @@ namespace Kernel
} }
/* SOCKET INODE */ /* SOCKET INODE */
BAN::ErrorOr<BAN::RefPtr<TmpSocketInode>> TmpSocketInode::create_new(TmpFileSystem& fs, mode_t mode, uid_t uid, gid_t gid) BAN::ErrorOr<BAN::RefPtr<TmpSocketInode>> TmpSocketInode::create_new(TmpFileSystem& fs, mode_t mode, uid_t uid, gid_t gid)
{ {
auto info = create_inode_info(Mode::IFSOCK | mode, uid, gid); auto info = create_inode_info(Mode::IFSOCK | mode, uid, gid);

View File

@ -6,6 +6,7 @@
#include <LibInput/KeyEvent.h> #include <LibInput/KeyEvent.h>
#include <LibInput/MouseEvent.h> #include <LibInput/MouseEvent.h>
#include <sys/epoll.h>
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
namespace Kernel namespace Kernel
@ -85,54 +86,58 @@ namespace Kernel
void InputDevice::add_event(BAN::ConstByteSpan event) void InputDevice::add_event(BAN::ConstByteSpan event)
{ {
SpinLockGuard _(m_event_lock);
ASSERT(event.size() == m_event_size);
if (m_type == Type::Mouse && m_event_count > 0)
{ {
const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count; SpinLockGuard _(m_event_lock);
ASSERT(event.size() == m_event_size);
auto& last_event = *reinterpret_cast<LibInput::MouseEvent*>(&m_event_buffer[last_index * m_event_size]); if (m_type == Type::Mouse && m_event_count > 0)
auto& curr_event = event.as<const LibInput::MouseEvent>();
if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent)
{ {
last_event.move_event.rel_x += curr_event.move_event.rel_x; const size_t last_index = (m_event_head + m_max_event_count - 1) % m_max_event_count;
last_event.move_event.rel_y += curr_event.move_event.rel_y;
return;
}
if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent)
{
last_event.scroll_event.scroll += curr_event.scroll_event.scroll;
return;
}
}
if (m_type == Type::Keyboard) auto& last_event = *reinterpret_cast<LibInput::MouseEvent*>(&m_event_buffer[last_index * m_event_size]);
{ auto& curr_event = event.as<const LibInput::MouseEvent>();
auto& key_event = event.as<const LibInput::RawKeyEvent>(); if (last_event.type == LibInput::MouseEventType::MouseMoveEvent && curr_event.type == LibInput::MouseEventType::MouseMoveEvent)
if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed)
{
switch (key_event.keycode)
{ {
case LibInput::keycode_function(1): last_event.move_event.rel_x += curr_event.move_event.rel_x;
Processor::toggle_should_print_cpu_load(); last_event.move_event.rel_y += curr_event.move_event.rel_y;
break; return;
case LibInput::keycode_function(12): }
Kernel::panic("Keyboard kernel panic :)"); if (last_event.type == LibInput::MouseEventType::MouseScrollEvent && curr_event.type == LibInput::MouseEventType::MouseScrollEvent)
break; {
last_event.scroll_event.scroll += curr_event.scroll_event.scroll;
return;
} }
} }
if (m_type == Type::Keyboard)
{
auto& key_event = event.as<const LibInput::RawKeyEvent>();
if (key_event.modifier & LibInput::KeyEvent::Modifier::Pressed)
{
switch (key_event.keycode)
{
case LibInput::keycode_function(1):
Processor::toggle_should_print_cpu_load();
break;
case LibInput::keycode_function(12):
Kernel::panic("Keyboard kernel panic :)");
break;
}
}
}
if (m_event_count == m_max_event_count)
{
m_event_tail = (m_event_tail + 1) % m_max_event_count;
m_event_count--;
}
memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size);
m_event_head = (m_event_head + 1) % m_max_event_count;
m_event_count++;
} }
if (m_event_count == m_max_event_count) epoll_notify(EPOLLIN);
{
m_event_tail = (m_event_tail + 1) % m_max_event_count;
m_event_count--;
}
memcpy(&m_event_buffer[m_event_head * m_event_size], event.data(), m_event_size);
m_event_head = (m_event_head + 1) % m_max_event_count;
m_event_count++;
m_event_thread_blocker.unblock(); m_event_thread_blocker.unblock();
if (m_type == Type::Keyboard && s_keyboard_device) if (m_type == Type::Keyboard && s_keyboard_device)
@ -197,6 +202,12 @@ namespace Kernel
, m_name("keyboard"_sv) , m_name("keyboard"_sv)
{} {}
void KeyboardDevice::notify()
{
epoll_notify(EPOLLIN);
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> KeyboardDevice::read_impl(off_t, BAN::ByteSpan buffer) BAN::ErrorOr<size_t> KeyboardDevice::read_impl(off_t, BAN::ByteSpan buffer)
{ {
if (buffer.size() < sizeof(LibInput::RawKeyEvent)) if (buffer.size() < sizeof(LibInput::RawKeyEvent))
@ -243,6 +254,12 @@ namespace Kernel
, m_name("mouse"_sv) , m_name("mouse"_sv)
{} {}
void MouseDevice::notify()
{
epoll_notify(EPOLLIN);
m_thread_blocker.unblock();
}
BAN::ErrorOr<size_t> MouseDevice::read_impl(off_t, BAN::ByteSpan buffer) BAN::ErrorOr<size_t> MouseDevice::read_impl(off_t, BAN::ByteSpan buffer)
{ {
if (buffer.size() < sizeof(LibInput::MouseEvent)) if (buffer.size() < sizeof(LibInput::MouseEvent))

View File

@ -6,6 +6,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <sys/epoll.h>
namespace Kernel namespace Kernel
{ {
@ -271,6 +272,11 @@ namespace Kernel
return m_send_window.data_size < m_send_window.buffer->size(); return m_send_window.data_size < m_send_window.buffer->size();
} }
bool TCPSocket::has_hangup_impl() const
{
return m_has_connected && m_state != State::Established;
}
BAN::ErrorOr<size_t> TCPSocket::return_with_maybe_zero() BAN::ErrorOr<size_t> TCPSocket::return_with_maybe_zero()
{ {
ASSERT(m_state != State::Established); ASSERT(m_state != State::Established);
@ -577,6 +583,8 @@ namespace Kernel
memcpy(buffer + m_recv_window.data_size, payload.data(), payload.size()); memcpy(buffer + m_recv_window.data_size, payload.data(), payload.size());
m_recv_window.data_size += payload.size(); m_recv_window.data_size += payload.size();
epoll_notify(EPOLLIN);
dprintln_if(DEBUG_TCP, "Received {} bytes", payload.size()); dprintln_if(DEBUG_TCP, "Received {} bytes", payload.size());
if (m_next_flags == 0) if (m_next_flags == 0)
@ -726,6 +734,8 @@ namespace Kernel
m_send_window.current_seq += to_send; m_send_window.current_seq += to_send;
i += to_send; i += to_send;
epoll_notify(EPOLLOUT);
} }
m_send_window.last_send_ms = current_ms; m_send_window.last_send_ms = current_ms;

View File

@ -2,6 +2,8 @@
#include <kernel/Networking/UDPSocket.h> #include <kernel/Networking/UDPSocket.h>
#include <kernel/Thread.h> #include <kernel/Thread.h>
#include <sys/epoll.h>
namespace Kernel namespace Kernel
{ {
@ -70,6 +72,8 @@ namespace Kernel
m_packets.emplace(packet_info); m_packets.emplace(packet_info);
m_packet_total_size += payload.size(); m_packet_total_size += payload.size();
epoll_notify(EPOLLIN);
m_packet_thread_blocker.unblock(); m_packet_thread_blocker.unblock();
} }

View File

@ -5,6 +5,7 @@
#include <kernel/Scheduler.h> #include <kernel/Scheduler.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/epoll.h>
#include <sys/un.h> #include <sys/un.h>
namespace Kernel namespace Kernel
@ -62,6 +63,7 @@ namespace Kernel
if (auto connection = connection_info.connection.lock(); connection && connection->m_info.has<ConnectionInfo>()) if (auto connection = connection_info.connection.lock(); connection && connection->m_info.has<ConnectionInfo>())
{ {
connection->m_info.get<ConnectionInfo>().target_closed = true; connection->m_info.get<ConnectionInfo>().target_closed = true;
connection->epoll_notify(EPOLLHUP);
connection->m_packet_thread_blocker.unblock(); connection->m_packet_thread_blocker.unblock();
} }
} }
@ -172,6 +174,8 @@ namespace Kernel
TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker)); TRY(Thread::current().block_or_eintr_indefinite(target_info.pending_thread_blocker));
} }
target->epoll_notify(EPOLLIN);
while (!connection_info.connection_done) while (!connection_info.connection_done)
Processor::yield(); Processor::yield();
@ -263,6 +267,8 @@ namespace Kernel
if (!is_streaming()) if (!is_streaming())
m_packet_sizes.push(packet.size()); m_packet_sizes.push(packet.size());
epoll_notify(EPOLLIN);
m_packet_thread_blocker.unblock(); m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state); m_packet_lock.unlock(state);
return {}; return {};
@ -295,6 +301,17 @@ namespace Kernel
return true; return true;
} }
bool UnixDomainSocket::has_hangup_impl() const
{
if (m_info.has<ConnectionInfo>())
{
auto& connection_info = m_info.get<ConnectionInfo>();
return connection_info.target_closed;
}
return false;
}
BAN::ErrorOr<size_t> UnixDomainSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len) BAN::ErrorOr<size_t> UnixDomainSocket::sendto_impl(BAN::ConstByteSpan message, const sockaddr* address, socklen_t address_len)
{ {
if (message.size() > s_packet_buffer_size) if (message.size() > s_packet_buffer_size)
@ -390,6 +407,8 @@ namespace Kernel
m_packet_thread_blocker.unblock(); m_packet_thread_blocker.unblock();
m_packet_lock.unlock(state); m_packet_lock.unlock(state);
epoll_notify(EPOLLOUT);
return nread; return nread;
} }

View File

@ -2,6 +2,7 @@
#include <BAN/StringView.h> #include <BAN/StringView.h>
#include <kernel/ACPI/ACPI.h> #include <kernel/ACPI/ACPI.h>
#include <kernel/ELF.h> #include <kernel/ELF.h>
#include <kernel/Epoll.h>
#include <kernel/FS/DevFS/FileSystem.h> #include <kernel/FS/DevFS/FileSystem.h>
#include <kernel/FS/ProcFS/FileSystem.h> #include <kernel/FS/ProcFS/FileSystem.h>
#include <kernel/FS/VirtualFileSystem.h> #include <kernel/FS/VirtualFileSystem.h>
@ -1452,21 +1453,19 @@ namespace Kernel
return TRY(inode->ioctl(request, arg)); return TRY(inode->ioctl(request, arg));
} }
BAN::ErrorOr<long> Process::sys_pselect(sys_pselect_t* _arguments) BAN::ErrorOr<long> Process::sys_pselect(sys_pselect_t* user_arguments)
{ {
sys_pselect_t arguments; sys_pselect_t arguments;
{ {
LockGuard _(m_process_lock); LockGuard _(m_process_lock);
TRY(validate_pointer_access(_arguments, sizeof(sys_pselect_t), false)); TRY(validate_pointer_access(user_arguments, sizeof(sys_pselect_t), false));
arguments = *_arguments; arguments = *user_arguments;
} }
MemoryRegion* readfd_region = nullptr; MemoryRegion* readfd_region = nullptr;
MemoryRegion* writefd_region = nullptr; MemoryRegion* writefd_region = nullptr;
MemoryRegion* errorfd_region = nullptr; MemoryRegion* errorfd_region = nullptr;
MemoryRegion* timeout_region = nullptr;
MemoryRegion* sigmask_region = nullptr;
BAN::ScopeGuard _([&] { BAN::ScopeGuard _([&] {
if (readfd_region) if (readfd_region)
@ -1475,75 +1474,57 @@ namespace Kernel
writefd_region->unpin(); writefd_region->unpin();
if (errorfd_region) if (errorfd_region)
errorfd_region->unpin(); errorfd_region->unpin();
if (timeout_region)
timeout_region->unpin();
if (sigmask_region)
sigmask_region->unpin();
}); });
readfd_region = TRY(validate_and_pin_pointer_access(arguments.readfds, sizeof(fd_set), true)); readfd_region = TRY(validate_and_pin_pointer_access(arguments.readfds, sizeof(fd_set), true));
writefd_region = TRY(validate_and_pin_pointer_access(arguments.writefds, sizeof(fd_set), true)); writefd_region = TRY(validate_and_pin_pointer_access(arguments.writefds, sizeof(fd_set), true));
errorfd_region = TRY(validate_and_pin_pointer_access(arguments.errorfds, sizeof(fd_set), true)); errorfd_region = TRY(validate_and_pin_pointer_access(arguments.errorfds, sizeof(fd_set), true));
timeout_region = TRY(validate_and_pin_pointer_access(arguments.timeout, sizeof(timespec), false));
sigmask_region = TRY(validate_and_pin_pointer_access(arguments.sigmask, sizeof(sigset_t), false));
const auto old_sigmask = Thread::current().m_signal_block_mask; const auto old_sigmask = Thread::current().m_signal_block_mask;
if (arguments.sigmask) if (arguments.sigmask)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(arguments.sigmask, sizeof(sigset_t), false));
Thread::current().m_signal_block_mask = *arguments.sigmask; Thread::current().m_signal_block_mask = *arguments.sigmask;
}
BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; }); BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; });
uint64_t timedout_ns = SystemTimer::get().ns_since_boot(); uint64_t waketime_ns = BAN::numeric_limits<uint64_t>::max();
if (arguments.timeout) if (arguments.timeout)
{ {
timedout_ns += arguments.timeout->tv_sec * 1'000'000'000; LockGuard _(m_process_lock);
timedout_ns += arguments.timeout->tv_nsec; TRY(validate_pointer_access(arguments.timeout, sizeof(timespec), false));
waketime_ns =
SystemTimer::get().ns_since_boot() +
(arguments.timeout->tv_sec * 1'000'000'000) +
arguments.timeout->tv_nsec;
} }
fd_set readfds; FD_ZERO(&readfds); auto epoll = TRY(Epoll::create());
fd_set writefds; FD_ZERO(&writefds); for (int fd = 0; fd < user_arguments->nfds; fd++)
fd_set errorfds; FD_ZERO(&errorfds);
int set_bits = 0;
for (;;)
{ {
auto update_fds = uint32_t events = 0;
[&](int fd, fd_set* source, fd_set* dest, bool (Inode::*func)() const) if (arguments.readfds && FD_ISSET(fd, arguments.readfds))
{ events |= EPOLLIN;
if (source == nullptr) if (arguments.writefds && FD_ISSET(fd, arguments.writefds))
return; events |= EPOLLOUT;
if (arguments.errorfds && FD_ISSET(fd, arguments.errorfds))
events |= EPOLLERR;
if (events == 0)
continue;
if (!FD_ISSET(fd, source)) auto inode_or_error = m_open_file_descriptors.inode_of(fd);
return; if (inode_or_error.is_error())
continue;
auto inode_or_error = m_open_file_descriptors.inode_of(fd); TRY(epoll->ctl(EPOLL_CTL_ADD, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }}));
if (inode_or_error.is_error())
return;
auto inode = inode_or_error.release_value();
if ((inode.ptr()->*func)())
{
FD_SET(fd, dest);
set_bits++;
}
};
for (int i = 0; i < arguments.nfds; i++)
{
update_fds(i, arguments.readfds, &readfds, &Inode::can_read);
update_fds(i, arguments.writefds, &writefds, &Inode::can_write);
update_fds(i, arguments.errorfds, &errorfds, &Inode::has_error);
}
if (set_bits > 0)
break;
if (arguments.timeout && SystemTimer::get().ns_since_boot() >= timedout_ns)
break;
// FIXME: implement some multi thread blocker system?
TRY(Thread::current().sleep_or_eintr_ms(1));
} }
BAN::Vector<epoll_event> event_buffer;
TRY(event_buffer.resize(user_arguments->nfds));
const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns));
if (arguments.readfds) if (arguments.readfds)
FD_ZERO(arguments.readfds); FD_ZERO(arguments.readfds);
if (arguments.writefds) if (arguments.writefds)
@ -1551,17 +1532,98 @@ namespace Kernel
if (arguments.errorfds) if (arguments.errorfds)
FD_ZERO(arguments.errorfds); FD_ZERO(arguments.errorfds);
for (int i = 0; i < arguments.nfds; i++) for (size_t i = 0; i < waited_events; i++)
{ {
if (arguments.readfds && FD_ISSET(i, &readfds)) const int fd = event_buffer[i].data.fd;
FD_SET(i, arguments.readfds); if (arguments.readfds && event_buffer[i].events & (EPOLLIN | EPOLLHUP))
if (arguments.writefds && FD_ISSET(i, &writefds)) FD_SET(fd, arguments.readfds);
FD_SET(i, arguments.writefds); if (arguments.writefds && event_buffer[i].events & (EPOLLOUT))
if (arguments.errorfds && FD_ISSET(i, &errorfds)) FD_SET(fd, arguments.writefds);
FD_SET(i, arguments.errorfds); if (arguments.errorfds && event_buffer[i].events & (EPOLLERR))
FD_SET(fd, arguments.errorfds);
} }
return set_bits; return waited_events;
}
BAN::ErrorOr<long> Process::sys_epoll_create1(int flags)
{
if (flags && (flags & ~EPOLL_CLOEXEC))
return BAN::Error::from_errno(EINVAL);
if (flags & EPOLL_CLOEXEC)
flags = O_CLOEXEC;
VirtualFileSystem::File epoll_file;
epoll_file.inode = TRY(Epoll::create());
TRY(epoll_file.canonical_path.append("<epoll>"_sv));
return TRY(m_open_file_descriptors.open(BAN::move(epoll_file), flags | O_RDWR));
}
BAN::ErrorOr<long> Process::sys_epoll_ctl(int epfd, int op, int fd, epoll_event* user_event)
{
if (epfd == fd)
return BAN::Error::from_errno(EINVAL);
if (op != EPOLL_CTL_DEL && user_event == nullptr)
return BAN::Error::from_errno(EINVAL);
auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd));
if (!epoll_inode->is_epoll())
return BAN::Error::from_errno(EINVAL);
auto inode = TRY(m_open_file_descriptors.inode_of(fd));
epoll_event event {};
if (user_event)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(user_event, sizeof(epoll_event), false));
event = *user_event;
}
TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, inode, event));
return 0;
}
BAN::ErrorOr<long> Process::sys_epoll_pwait2(int epfd, epoll_event* events, int maxevents, const timespec* timeout, const sigset_t* sigmask)
{
(void)sigmask;
if (maxevents <= 0)
return BAN::Error::from_errno(EINVAL);
auto epoll_inode = TRY(m_open_file_descriptors.inode_of(epfd));
if (!epoll_inode->is_epoll())
return BAN::Error::from_errno(EINVAL);
uint64_t waketime_ns = BAN::numeric_limits<uint64_t>::max();
if (timeout)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(timeout, sizeof(timespec), false));
waketime_ns =
SystemTimer::get().ns_since_boot() +
(timeout->tv_sec * 1'000'000'000) +
timeout->tv_nsec;
}
auto* events_region = TRY(validate_and_pin_pointer_access(events, maxevents * sizeof(epoll_event), true));
BAN::ScopeGuard _([events_region] {
if (events_region)
events_region->unpin();
});
const auto old_sigmask = Thread::current().m_signal_block_mask;
if (sigmask)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(sigmask, sizeof(sigset_t), false));
Thread::current().m_signal_block_mask = *sigmask;
}
BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; });
return TRY(static_cast<Epoll*>(epoll_inode.ptr())->wait(BAN::Span<epoll_event>(events, maxevents), waketime_ns));
} }
BAN::ErrorOr<long> Process::sys_pipe(int fildes[2]) BAN::ErrorOr<long> Process::sys_pipe(int fildes[2])

View File

@ -4,6 +4,7 @@
#include <BAN/ScopeGuard.h> #include <BAN/ScopeGuard.h>
#include <sys/epoll.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
@ -95,6 +96,8 @@ namespace Kernel
reinterpret_cast<uint8_t*>(m_buffer->vaddr())[(m_buffer_tail + m_buffer_size) % m_buffer->size()] = ch; reinterpret_cast<uint8_t*>(m_buffer->vaddr())[(m_buffer_tail + m_buffer_size) % m_buffer->size()] = ch;
m_buffer_size++; m_buffer_size++;
epoll_notify(EPOLLIN);
m_buffer_blocker.unblock(); m_buffer_blocker.unblock();
return true; return true;
@ -127,6 +130,8 @@ namespace Kernel
m_buffer_size -= to_copy; m_buffer_size -= to_copy;
m_buffer_tail = (m_buffer_tail + to_copy) % m_buffer->size(); m_buffer_tail = (m_buffer_tail + to_copy) % m_buffer->size();
epoll_notify(EPOLLOUT);
m_buffer_lock.unlock(state); m_buffer_lock.unlock(state);
return to_copy; return to_copy;
@ -137,7 +142,6 @@ namespace Kernel
auto slave = m_slave.lock(); auto slave = m_slave.lock();
if (!slave) if (!slave)
return BAN::Error::from_errno(ENODEV); return BAN::Error::from_errno(ENODEV);
for (size_t i = 0; i < buffer.size(); i++) for (size_t i = 0; i < buffer.size(); i++)
slave->handle_input_byte(buffer[i]); slave->handle_input_byte(buffer[i]);
return buffer.size(); return buffer.size();

View File

@ -15,6 +15,7 @@
#include <string.h> #include <string.h>
#include <stropts.h> #include <stropts.h>
#include <sys/banan-os.h> #include <sys/banan-os.h>
#include <sys/epoll.h>
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
namespace Kernel namespace Kernel
@ -40,6 +41,7 @@ namespace Kernel
bool can_read_impl() const override { return false; } bool can_read_impl() const override { return false; }
bool can_write_impl() const override { return false; } bool can_write_impl() const override { return false; }
bool has_error_impl() const override { return false; } bool has_error_impl() const override { return false; }
bool has_hangup_impl() const override { return false; }
private: private:
DevTTY(mode_t mode, uid_t uid, gid_t gid) DevTTY(mode_t mode, uid_t uid, gid_t gid)
@ -238,6 +240,7 @@ namespace Kernel
if (ch == '\x04' && (m_termios.c_lflag & ICANON)) if (ch == '\x04' && (m_termios.c_lflag & ICANON))
{ {
m_output.flush = true; m_output.flush = true;
epoll_notify(EPOLLIN);
m_output.thread_blocker.unblock(); m_output.thread_blocker.unblock();
return; return;
} }
@ -280,6 +283,7 @@ namespace Kernel
if (ch == '\n' || !(m_termios.c_lflag & ICANON)) if (ch == '\n' || !(m_termios.c_lflag & ICANON))
{ {
m_output.flush = true; m_output.flush = true;
epoll_notify(EPOLLIN);
m_output.thread_blocker.unblock(); m_output.thread_blocker.unblock();
} }
} }

View File

@ -30,6 +30,7 @@ set(LIBC_SOURCES
string.cpp string.cpp
strings.cpp strings.cpp
sys/banan-os.cpp sys/banan-os.cpp
sys/epoll.cpp
sys/ioctl.cpp sys/ioctl.cpp
sys/mman.cpp sys/mman.cpp
sys/resource.cpp sys/resource.cpp

View File

@ -0,0 +1,49 @@
#ifndef _SYS_EPOLL_H
#define _SYS_EPOLL_H 1
#include <sys/cdefs.h>
__BEGIN_DECLS
#include <stdint.h>
#include <signal.h>
union epoll_data
{
void* ptr;
int fd;
uint32_t u32;
uint64_t u64;
};
typedef union epoll_data epoll_data_t;
struct epoll_event
{
uint32_t events;
epoll_data_t data;
};
#define EPOLL_CTL_ADD 0
#define EPOLL_CTL_MOD 1
#define EPOLL_CTL_DEL 2
#define EPOLLIN 0x01
#define EPOLLOUT 0x02
#define EPOLLPRI 0x04
#define EPOLLERR 0x08
#define EPOLLHUP 0x10
#define EPOLLET 0x20
#define EPOLLONESHOT 0x40
#define EPOLL_CLOEXEC 1
int epoll_create(int size);
int epoll_create1(int flags);
int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event);
int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout);
int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask);
int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask);
__END_DECLS
#endif

View File

@ -98,6 +98,9 @@ __BEGIN_DECLS
O(SYS_PTHREAD_EXIT, pthread_exit) \ O(SYS_PTHREAD_EXIT, pthread_exit) \
O(SYS_PTHREAD_JOIN, pthread_join) \ O(SYS_PTHREAD_JOIN, pthread_join) \
O(SYS_PTHREAD_SELF, pthread_self) \ O(SYS_PTHREAD_SELF, pthread_self) \
O(SYS_EPOLL_CREATE1, epoll_create1) \
O(SYS_EPOLL_CTL, epoll_ctl) \
O(SYS_EPOLL_PWAIT2, epoll_pwait2) \
enum Syscall enum Syscall
{ {

View File

@ -0,0 +1,49 @@
#include <errno.h>
#include <unistd.h>
#include <sys/epoll.h>
#include <sys/syscall.h>
int epoll_create(int size)
{
if (size <= 0)
{
errno = EINVAL;
return -1;
}
return epoll_create1(0);
}
int epoll_create1(int flags)
{
return syscall(SYS_EPOLL_CREATE1, flags);
}
int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event)
{
return syscall(SYS_EPOLL_CTL, epfd, op, fd, event);
}
int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout)
{
return epoll_pwait(epfd, events, maxevents, timeout, nullptr);
}
int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout, const sigset_t* sigmask)
{
timespec ts;
timespec* ts_ptr = nullptr;
if (timeout >= 0)
{
ts.tv_sec = static_cast<time_t>(timeout / 1000),
ts.tv_nsec = (timeout % 1000) * 1'000'000,
ts_ptr = &ts;
}
return epoll_pwait2(epfd, events, maxevents, ts_ptr, sigmask);
}
int epoll_pwait2(int epfd, struct epoll_event* events, int maxevents, const struct timespec* timeout, const sigset_t* sigmask)
{
return syscall(SYS_EPOLL_PWAIT2, epfd, events, maxevents, timeout, sigmask);
}