From b1065fa01d2c534c174a58ce9dc14031e25d5cd4 Mon Sep 17 00:00:00 2001 From: Bananymous Date: Fri, 30 May 2025 22:13:01 +0300 Subject: [PATCH] Kernel/LibC: Add ppoll syscall and fix poll poll is now using its own syscall instead of wrapping aroung pselect. This adds less overhead on top of poll and adds support for POLLHUP --- kernel/include/kernel/Process.h | 2 + kernel/kernel/Process.cpp | 107 ++++++++++++++++++ userspace/libraries/LibC/include/poll.h | 3 + .../libraries/LibC/include/sys/syscall.h | 1 + userspace/libraries/LibC/poll.cpp | 65 +++-------- 5 files changed, 127 insertions(+), 51 deletions(-) diff --git a/kernel/include/kernel/Process.h b/kernel/include/kernel/Process.h index c6baf7cd..66ce6972 100644 --- a/kernel/include/kernel/Process.h +++ b/kernel/include/kernel/Process.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -135,6 +136,7 @@ namespace Kernel BAN::ErrorOr sys_ioctl(int fildes, int request, void* arg); BAN::ErrorOr sys_pselect(sys_pselect_t* arguments); + BAN::ErrorOr sys_ppoll(pollfd* fds, nfds_t nfds, const timespec* tmp_p, const sigset_t* sigmask); BAN::ErrorOr sys_epoll_create1(int flags); BAN::ErrorOr sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event* event); diff --git a/kernel/kernel/Process.cpp b/kernel/kernel/Process.cpp index e4d37016..f2d00581 100644 --- a/kernel/kernel/Process.cpp +++ b/kernel/kernel/Process.cpp @@ -1628,6 +1628,113 @@ namespace Kernel return waited_events; } + BAN::ErrorOr Process::sys_ppoll(pollfd* fds, nfds_t nfds, const timespec* timeout, const sigset_t* sigmask) + { + auto* fds_region = TRY(validate_and_pin_pointer_access(fds, nfds * sizeof(pollfd), true)); + BAN::ScopeGuard _([fds_region] { if (fds_region) fds_region->unpin(); }); + + const auto old_sigmask = Thread::current().m_signal_block_mask; + if (sigmask) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(sigmask, sizeof(sigset_t), false)); + Thread::current().m_signal_block_mask = *sigmask; + } + BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; }); + + uint64_t waketime_ns = BAN::numeric_limits::max(); + if (timeout) + { + LockGuard _(m_process_lock); + TRY(validate_pointer_access(timeout, sizeof(timespec), false)); + waketime_ns = + SystemTimer::get().ns_since_boot() + + (timeout->tv_sec * 1'000'000'000) + + timeout->tv_nsec; + } + + uint32_t events_per_fd[OPEN_MAX] {}; + for (nfds_t i = 0; i < nfds; i++) + { + if (fds[i].fd < 0 || fds[i].fd >= OPEN_MAX) + continue; + events_per_fd[fds[i].fd] |= fds[i].events; + } + + size_t fd_count = 0; + + auto epoll = TRY(Epoll::create()); + for (int fd = 0; fd < OPEN_MAX; fd++) + { + if (events_per_fd[fd] == 0) + continue; + + auto inode_or_error = m_open_file_descriptors.inode_of(fd); + if (inode_or_error.is_error()) + continue; + + uint32_t events = 0; + if (events_per_fd[fd] & (POLLIN | POLLRDNORM)) + events |= EPOLLIN; + if (events_per_fd[fd] & (POLLOUT | POLLWRNORM)) + events |= EPOLLOUT; + if (events_per_fd[fd] & POLLPRI) + events |= EPOLLPRI; + // POLLRDBAND + // POLLWRBAND + + TRY(epoll->ctl(EPOLL_CTL_ADD, fd, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }})); + + fd_count++; + } + + BAN::Vector event_buffer; + TRY(event_buffer.resize(fd_count)); + + const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns)); + + size_t return_value = 0; + for (size_t i = 0; i < nfds; i++) + { + fds[i].revents = 0; + + if (fds[i].fd < 0) + continue; + + if (m_open_file_descriptors.inode_of(fds[i].fd).is_error()) + { + fds[i].revents = POLLNVAL; + return_value++; + continue; + } + + for (size_t j = 0; j < waited_events; j++) + { + if (fds[i].fd != event_buffer[j].data.fd) + continue; + const uint32_t wanted = fds[i].events; + const uint32_t got = event_buffer[j].events; + if (got & EPOLLIN) + fds[i].revents |= wanted & (POLLIN | POLLRDNORM); + if (got & EPOLLOUT) + fds[i].revents |= wanted & (POLLOUT | POLLWRNORM); + if (got & EPOLLPRI) + fds[i].revents |= wanted & POLLPRI; + if (got & EPOLLERR) + fds[i].revents |= POLLERR; + if (got & EPOLLHUP) + fds[i].revents |= POLLHUP; + // POLLRDBAND + // POLLWRBAND + if (fds[i].revents) + return_value++; + break; + } + } + + return return_value; + } + BAN::ErrorOr Process::sys_epoll_create1(int flags) { if (flags && (flags & ~EPOLL_CLOEXEC)) diff --git a/userspace/libraries/LibC/include/poll.h b/userspace/libraries/LibC/include/poll.h index 5bd35706..5e4fd35a 100644 --- a/userspace/libraries/LibC/include/poll.h +++ b/userspace/libraries/LibC/include/poll.h @@ -7,6 +7,8 @@ __BEGIN_DECLS +#include + struct pollfd { int fd; /* The following descriptor being polled. */ @@ -28,6 +30,7 @@ typedef unsigned long nfds_t; #define POLLNVAL 0x200 int poll(struct pollfd fds[], nfds_t nfds, int timeout); +int ppoll(struct pollfd fds[], nfds_t nfds, const struct timespec* timeout, const sigset_t* sigmask); __END_DECLS diff --git a/userspace/libraries/LibC/include/sys/syscall.h b/userspace/libraries/LibC/include/sys/syscall.h index df7ad078..97216532 100644 --- a/userspace/libraries/LibC/include/sys/syscall.h +++ b/userspace/libraries/LibC/include/sys/syscall.h @@ -73,6 +73,7 @@ __BEGIN_DECLS O(SYS_CONNECT, connect) \ O(SYS_LISTEN, listen) \ O(SYS_PSELECT, pselect) \ + O(SYS_PPOLL, ppoll) \ O(SYS_TRUNCATE, truncate) \ O(SYS_SMO_CREATE, smo_create) \ O(SYS_SMO_DELETE, smo_delete) \ diff --git a/userspace/libraries/LibC/poll.cpp b/userspace/libraries/LibC/poll.cpp index db9d8a28..290978d6 100644 --- a/userspace/libraries/LibC/poll.cpp +++ b/userspace/libraries/LibC/poll.cpp @@ -1,56 +1,19 @@ #include -#include +#include +#include int poll(struct pollfd fds[], nfds_t nfds, int timeout) { - fd_set rfds, wfds, efds; - FD_ZERO(&rfds); - FD_ZERO(&wfds); - FD_ZERO(&efds); - - for (nfds_t i = 0; i < nfds; i++) - fds[i].revents = 0; - - constexpr short rmask = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI | POLLHUP; - constexpr short wmask = POLLOUT | POLLWRNORM | POLLWRBAND; - constexpr short emask = POLLERR; - - int max_fd = 0; - for (nfds_t i = 0; i < nfds; i++) - { - if (fds[i].fd < 0) - continue; - - if (fds[i].events & rmask) - FD_SET(fds[i].fd, &rfds); - if (fds[i].events & wmask) - FD_SET(fds[i].fd, &wfds); - if (fds[i].events & emask) - FD_SET(fds[i].fd, &efds); - - if (fds[i].fd > max_fd) - max_fd = fds[i].fd; - } - - timeval tv; - tv.tv_sec = timeout / 1000; - tv.tv_usec = timeout % 1000 * 1000; - int nselect = select(max_fd + 1, &rfds, &wfds, &efds, &tv); - if (nselect == -1) - return -1; - - for (nfds_t i = 0; i < nfds; i++) - { - if (fds[i].fd < 0) - continue; - - if (FD_ISSET(fds[i].fd, &rfds)) - fds[i].revents |= fds[i].events & rmask; - if (FD_ISSET(fds[i].fd, &wfds)) - fds[i].revents |= fds[i].events & wmask; - if (FD_ISSET(fds[i].fd, &efds)) - fds[i].revents |= fds[i].events & emask; - } - - return nselect; + if (timeout < 0) + return ppoll(fds, nfds, nullptr, nullptr); + const timespec timeout_ts { + .tv_sec = static_cast(timeout / 1000), + .tv_nsec = static_cast(timeout % 1000), + }; + return ppoll(fds, nfds, &timeout_ts, nullptr); +} + +int ppoll(struct pollfd fds[], nfds_t nfds, const struct timespec* timeout, const sigset_t* sigmask) +{ + return syscall(SYS_PPOLL, fds, nfds, timeout, sigmask); }