Kernel: Make epoll work with different fds pointing to same inode
This commit is contained in:
parent
857bac4b78
commit
9b875fb930
|
@ -1,10 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <BAN/Array.h>
|
||||
#include <BAN/CircularQueue.h>
|
||||
#include <BAN/HashMap.h>
|
||||
#include <BAN/HashSet.h>
|
||||
#include <kernel/FS/Inode.h>
|
||||
|
||||
#include <limits.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
namespace Kernel
|
||||
|
@ -16,7 +18,7 @@ namespace Kernel
|
|||
static BAN::ErrorOr<BAN::RefPtr<Epoll>> create();
|
||||
~Epoll();
|
||||
|
||||
BAN::ErrorOr<void> ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event);
|
||||
BAN::ErrorOr<void> ctl(int op, int fd, BAN::RefPtr<Inode> inode, epoll_event event);
|
||||
BAN::ErrorOr<size_t> wait(BAN::Span<epoll_event> events, uint64_t waketime_ns);
|
||||
|
||||
void notify(BAN::RefPtr<Inode> inode, uint32_t event);
|
||||
|
@ -59,10 +61,45 @@ namespace Kernel
|
|||
}
|
||||
};
|
||||
|
||||
struct ListenEventList
|
||||
{
|
||||
BAN::Array<epoll_event, OPEN_MAX> events;
|
||||
uint32_t bitmap[(OPEN_MAX + 31) / 32] {};
|
||||
|
||||
bool has_fd(int fd) const
|
||||
{
|
||||
if (fd < 0 || static_cast<size_t>(fd) >= events.size())
|
||||
return false;
|
||||
return bitmap[fd / 32] & (1u << (fd % 32));
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
for (auto val : bitmap)
|
||||
if (val != 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void add_fd(int fd, epoll_event event)
|
||||
{
|
||||
ASSERT(!has_fd(fd));
|
||||
bitmap[fd / 32] |= (1u << (fd % 32));
|
||||
events[fd] = event;
|
||||
}
|
||||
|
||||
void remove_fd(int fd)
|
||||
{
|
||||
ASSERT(has_fd(fd));
|
||||
bitmap[fd / 32] &= ~(1u << (fd % 32));
|
||||
events[fd] = {};
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
ThreadBlocker m_thread_blocker;
|
||||
BAN::HashMap<BAN::RefPtr<Inode>, uint32_t, InodeRefPtrHash> m_ready_events;
|
||||
BAN::HashMap<BAN::RefPtr<Inode>, epoll_event, InodeRefPtrHash> m_listening_events;
|
||||
BAN::HashMap<BAN::RefPtr<Inode>, uint32_t, InodeRefPtrHash> m_ready_events;
|
||||
BAN::HashMap<BAN::RefPtr<Inode>, ListenEventList, InodeRefPtrHash> m_listening_events;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ namespace Kernel
|
|||
inode->del_epoll(this);
|
||||
}
|
||||
|
||||
BAN::ErrorOr<void> Epoll::ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event)
|
||||
BAN::ErrorOr<void> Epoll::ctl(int op, int fd, BAN::RefPtr<Inode> inode, epoll_event event)
|
||||
{
|
||||
LockGuard _(m_mutex);
|
||||
|
||||
|
@ -28,27 +28,51 @@ namespace Kernel
|
|||
switch (op)
|
||||
{
|
||||
case EPOLL_CTL_ADD:
|
||||
if (it != m_listening_events.end())
|
||||
{
|
||||
if (it == m_listening_events.end())
|
||||
it = TRY(m_listening_events.emplace(inode));
|
||||
if (it->value.has_fd(fd))
|
||||
return BAN::Error::from_errno(EEXIST);
|
||||
TRY(m_listening_events.reserve(m_listening_events.size() + 1));
|
||||
TRY(m_ready_events.reserve(m_listening_events.size() + 1));
|
||||
TRY(m_ready_events.reserve(m_listening_events.size()));
|
||||
TRY(inode->add_epoll(this));
|
||||
MUST(m_listening_events.insert(inode, event));
|
||||
MUST(m_ready_events.insert(inode, event.events));
|
||||
it->value.add_fd(fd, event);
|
||||
|
||||
auto ready_it = m_ready_events.find(inode);
|
||||
if (ready_it == m_ready_events.end())
|
||||
ready_it = MUST(m_ready_events.insert(inode, 0));
|
||||
ready_it->value |= event.events;
|
||||
|
||||
return {};
|
||||
}
|
||||
case EPOLL_CTL_MOD:
|
||||
{
|
||||
if (it == m_listening_events.end())
|
||||
return BAN::Error::from_errno(ENOENT);
|
||||
MUST(m_ready_events.emplace_or_assign(inode, event.events));
|
||||
it->value = event;
|
||||
if (!it->value.has_fd(fd))
|
||||
return BAN::Error::from_errno(ENOENT);
|
||||
it->value.events[fd] = event;
|
||||
|
||||
auto ready_it = m_ready_events.find(inode);
|
||||
if (ready_it == m_ready_events.end())
|
||||
ready_it = MUST(m_ready_events.insert(inode, 0));
|
||||
ready_it->value |= event.events;
|
||||
|
||||
return {};
|
||||
}
|
||||
case EPOLL_CTL_DEL:
|
||||
{
|
||||
if (it == m_listening_events.end())
|
||||
return BAN::Error::from_errno(ENOENT);
|
||||
m_listening_events.remove(it);
|
||||
m_ready_events.remove(inode);
|
||||
inode->del_epoll(this);
|
||||
if (!it->value.has_fd(fd))
|
||||
return BAN::Error::from_errno(ENOENT);
|
||||
it->value.remove_fd(fd);
|
||||
if (it->value.empty())
|
||||
{
|
||||
m_listening_events.remove(it);
|
||||
m_ready_events.remove(inode);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return BAN::Error::from_errno(EINVAL);
|
||||
|
@ -56,6 +80,9 @@ namespace Kernel
|
|||
|
||||
BAN::ErrorOr<size_t> Epoll::wait(BAN::Span<epoll_event> event_span, uint64_t waketime_ns)
|
||||
{
|
||||
if (event_span.empty())
|
||||
return BAN::Error::from_errno(EINVAL);
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
for (;;)
|
||||
|
@ -64,13 +91,17 @@ namespace Kernel
|
|||
|
||||
{
|
||||
LockGuard _(m_mutex);
|
||||
|
||||
for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();)
|
||||
{
|
||||
auto& [inode, events] = *it;
|
||||
|
||||
auto& listen = m_listening_events[inode];
|
||||
const uint32_t listen_mask = (listen.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
|
||||
|
||||
uint32_t listen_mask = EPOLLERR | EPOLLHUP;
|
||||
for (int fd = 0; fd < OPEN_MAX; fd++)
|
||||
if (listen.has_fd(fd))
|
||||
listen_mask |= listen.events[fd].events;
|
||||
events &= listen_mask;
|
||||
|
||||
// This prevents a possible deadlock
|
||||
|
@ -98,16 +129,27 @@ namespace Kernel
|
|||
continue;
|
||||
}
|
||||
|
||||
event_span[count++] = {
|
||||
.events = events,
|
||||
.data = listen.data,
|
||||
};
|
||||
for (int fd = 0; fd < OPEN_MAX && count < event_span.size(); fd++)
|
||||
{
|
||||
if (!listen.has_fd(fd))
|
||||
continue;
|
||||
auto& listen_event = listen.events[fd];
|
||||
|
||||
if (listen.events & EPOLLONESHOT)
|
||||
listen.events = 0;
|
||||
const auto new_events = listen_event.events & events;
|
||||
if (new_events == 0)
|
||||
continue;
|
||||
|
||||
if (listen.events & EPOLLET)
|
||||
events &= ~listen_mask;
|
||||
event_span[count++] = {
|
||||
.events = new_events,
|
||||
.data = listen_event.data,
|
||||
};
|
||||
|
||||
if (listen_event.events & EPOLLONESHOT)
|
||||
listen_event.events = 0;
|
||||
// this doesn't work with multiple of the same inode
|
||||
if (listen_event.events & EPOLLET)
|
||||
events &= ~new_events;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
@ -132,18 +174,13 @@ namespace Kernel
|
|||
{
|
||||
LockGuard _(m_mutex);
|
||||
|
||||
auto listen_it = m_listening_events.find(inode);
|
||||
if (listen_it == m_listening_events.end())
|
||||
if (!m_listening_events.contains(inode))
|
||||
return;
|
||||
|
||||
event &= (listen_it->value.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP;
|
||||
if (event == 0)
|
||||
return;
|
||||
|
||||
if (auto ready_it = m_ready_events.find(inode); ready_it != m_ready_events.end())
|
||||
ready_it->value |= event;
|
||||
else
|
||||
MUST(m_ready_events.insert(inode, event));
|
||||
auto ready_it = m_ready_events.find(inode);
|
||||
if (ready_it == m_ready_events.end())
|
||||
ready_it = MUST(m_ready_events.insert(inode, 0));
|
||||
ready_it->value |= event;
|
||||
|
||||
m_thread_blocker.unblock();
|
||||
}
|
||||
|
|
|
@ -1583,7 +1583,7 @@ namespace Kernel
|
|||
}
|
||||
|
||||
auto epoll = TRY(Epoll::create());
|
||||
for (int fd = 0; fd < user_arguments->nfds; fd++)
|
||||
for (int fd = 0; fd < arguments.nfds; fd++)
|
||||
{
|
||||
uint32_t events = 0;
|
||||
if (arguments.readfds && FD_ISSET(fd, arguments.readfds))
|
||||
|
@ -1599,11 +1599,11 @@ namespace Kernel
|
|||
if (inode_or_error.is_error())
|
||||
continue;
|
||||
|
||||
TRY(epoll->ctl(EPOLL_CTL_ADD, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }}));
|
||||
TRY(epoll->ctl(EPOLL_CTL_ADD, fd, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }}));
|
||||
}
|
||||
|
||||
BAN::Vector<epoll_event> event_buffer;
|
||||
TRY(event_buffer.resize(user_arguments->nfds));
|
||||
TRY(event_buffer.resize(arguments.nfds));
|
||||
|
||||
const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns));
|
||||
|
||||
|
@ -1663,7 +1663,7 @@ namespace Kernel
|
|||
event = *user_event;
|
||||
}
|
||||
|
||||
TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, inode, event));
|
||||
TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, fd, inode, event));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue