Kernel/LibC: Add ppoll syscall and fix poll

poll is now using its own syscall instead of wrapping aroung pselect.
This adds less overhead on top of poll and adds support for POLLHUP
This commit is contained in:
Bananymous 2025-05-30 22:13:01 +03:00
parent 8ff9c030bf
commit b1065fa01d
5 changed files with 127 additions and 51 deletions

View File

@ -16,6 +16,7 @@
#include <kernel/Terminal/TTY.h>
#include <kernel/Thread.h>
#include <poll.h>
#include <sys/banan-os.h>
#include <sys/mman.h>
#include <sys/select.h>
@ -135,6 +136,7 @@ namespace Kernel
BAN::ErrorOr<long> sys_ioctl(int fildes, int request, void* arg);
BAN::ErrorOr<long> sys_pselect(sys_pselect_t* arguments);
BAN::ErrorOr<long> sys_ppoll(pollfd* fds, nfds_t nfds, const timespec* tmp_p, const sigset_t* sigmask);
BAN::ErrorOr<long> sys_epoll_create1(int flags);
BAN::ErrorOr<long> sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event* event);

View File

@ -1628,6 +1628,113 @@ namespace Kernel
return waited_events;
}
BAN::ErrorOr<long> Process::sys_ppoll(pollfd* fds, nfds_t nfds, const timespec* timeout, const sigset_t* sigmask)
{
auto* fds_region = TRY(validate_and_pin_pointer_access(fds, nfds * sizeof(pollfd), true));
BAN::ScopeGuard _([fds_region] { if (fds_region) fds_region->unpin(); });
const auto old_sigmask = Thread::current().m_signal_block_mask;
if (sigmask)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(sigmask, sizeof(sigset_t), false));
Thread::current().m_signal_block_mask = *sigmask;
}
BAN::ScopeGuard sigmask_restore([old_sigmask] { Thread::current().m_signal_block_mask = old_sigmask; });
uint64_t waketime_ns = BAN::numeric_limits<uint64_t>::max();
if (timeout)
{
LockGuard _(m_process_lock);
TRY(validate_pointer_access(timeout, sizeof(timespec), false));
waketime_ns =
SystemTimer::get().ns_since_boot() +
(timeout->tv_sec * 1'000'000'000) +
timeout->tv_nsec;
}
uint32_t events_per_fd[OPEN_MAX] {};
for (nfds_t i = 0; i < nfds; i++)
{
if (fds[i].fd < 0 || fds[i].fd >= OPEN_MAX)
continue;
events_per_fd[fds[i].fd] |= fds[i].events;
}
size_t fd_count = 0;
auto epoll = TRY(Epoll::create());
for (int fd = 0; fd < OPEN_MAX; fd++)
{
if (events_per_fd[fd] == 0)
continue;
auto inode_or_error = m_open_file_descriptors.inode_of(fd);
if (inode_or_error.is_error())
continue;
uint32_t events = 0;
if (events_per_fd[fd] & (POLLIN | POLLRDNORM))
events |= EPOLLIN;
if (events_per_fd[fd] & (POLLOUT | POLLWRNORM))
events |= EPOLLOUT;
if (events_per_fd[fd] & POLLPRI)
events |= EPOLLPRI;
// POLLRDBAND
// POLLWRBAND
TRY(epoll->ctl(EPOLL_CTL_ADD, fd, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }}));
fd_count++;
}
BAN::Vector<epoll_event> event_buffer;
TRY(event_buffer.resize(fd_count));
const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns));
size_t return_value = 0;
for (size_t i = 0; i < nfds; i++)
{
fds[i].revents = 0;
if (fds[i].fd < 0)
continue;
if (m_open_file_descriptors.inode_of(fds[i].fd).is_error())
{
fds[i].revents = POLLNVAL;
return_value++;
continue;
}
for (size_t j = 0; j < waited_events; j++)
{
if (fds[i].fd != event_buffer[j].data.fd)
continue;
const uint32_t wanted = fds[i].events;
const uint32_t got = event_buffer[j].events;
if (got & EPOLLIN)
fds[i].revents |= wanted & (POLLIN | POLLRDNORM);
if (got & EPOLLOUT)
fds[i].revents |= wanted & (POLLOUT | POLLWRNORM);
if (got & EPOLLPRI)
fds[i].revents |= wanted & POLLPRI;
if (got & EPOLLERR)
fds[i].revents |= POLLERR;
if (got & EPOLLHUP)
fds[i].revents |= POLLHUP;
// POLLRDBAND
// POLLWRBAND
if (fds[i].revents)
return_value++;
break;
}
}
return return_value;
}
BAN::ErrorOr<long> Process::sys_epoll_create1(int flags)
{
if (flags && (flags & ~EPOLL_CLOEXEC))

View File

@ -7,6 +7,8 @@
__BEGIN_DECLS
#include <signal.h>
struct pollfd
{
int fd; /* The following descriptor being polled. */
@ -28,6 +30,7 @@ typedef unsigned long nfds_t;
#define POLLNVAL 0x200
int poll(struct pollfd fds[], nfds_t nfds, int timeout);
int ppoll(struct pollfd fds[], nfds_t nfds, const struct timespec* timeout, const sigset_t* sigmask);
__END_DECLS

View File

@ -73,6 +73,7 @@ __BEGIN_DECLS
O(SYS_CONNECT, connect) \
O(SYS_LISTEN, listen) \
O(SYS_PSELECT, pselect) \
O(SYS_PPOLL, ppoll) \
O(SYS_TRUNCATE, truncate) \
O(SYS_SMO_CREATE, smo_create) \
O(SYS_SMO_DELETE, smo_delete) \

View File

@ -1,56 +1,19 @@
#include <poll.h>
#include <sys/select.h>
#include <sys/syscall.h>
#include <unistd.h>
int poll(struct pollfd fds[], nfds_t nfds, int timeout)
{
fd_set rfds, wfds, efds;
FD_ZERO(&rfds);
FD_ZERO(&wfds);
FD_ZERO(&efds);
for (nfds_t i = 0; i < nfds; i++)
fds[i].revents = 0;
constexpr short rmask = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI | POLLHUP;
constexpr short wmask = POLLOUT | POLLWRNORM | POLLWRBAND;
constexpr short emask = POLLERR;
int max_fd = 0;
for (nfds_t i = 0; i < nfds; i++)
{
if (fds[i].fd < 0)
continue;
if (fds[i].events & rmask)
FD_SET(fds[i].fd, &rfds);
if (fds[i].events & wmask)
FD_SET(fds[i].fd, &wfds);
if (fds[i].events & emask)
FD_SET(fds[i].fd, &efds);
if (fds[i].fd > max_fd)
max_fd = fds[i].fd;
}
timeval tv;
tv.tv_sec = timeout / 1000;
tv.tv_usec = timeout % 1000 * 1000;
int nselect = select(max_fd + 1, &rfds, &wfds, &efds, &tv);
if (nselect == -1)
return -1;
for (nfds_t i = 0; i < nfds; i++)
{
if (fds[i].fd < 0)
continue;
if (FD_ISSET(fds[i].fd, &rfds))
fds[i].revents |= fds[i].events & rmask;
if (FD_ISSET(fds[i].fd, &wfds))
fds[i].revents |= fds[i].events & wmask;
if (FD_ISSET(fds[i].fd, &efds))
fds[i].revents |= fds[i].events & emask;
}
return nselect;
if (timeout < 0)
return ppoll(fds, nfds, nullptr, nullptr);
const timespec timeout_ts {
.tv_sec = static_cast<time_t>(timeout / 1000),
.tv_nsec = static_cast<long>(timeout % 1000),
};
return ppoll(fds, nfds, &timeout_ts, nullptr);
}
int ppoll(struct pollfd fds[], nfds_t nfds, const struct timespec* timeout, const sigset_t* sigmask)
{
return syscall(SYS_PPOLL, fds, nfds, timeout, sigmask);
}