forked from Bananymous/banan-os
				
			Kernel: Make epoll work with different fds pointing to same inode
This commit is contained in:
		
							parent
							
								
									857bac4b78
								
							
						
					
					
						commit
						9b875fb930
					
				|  | @ -1,10 +1,12 @@ | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <BAN/Array.h> | ||||||
| #include <BAN/CircularQueue.h> | #include <BAN/CircularQueue.h> | ||||||
| #include <BAN/HashMap.h> | #include <BAN/HashMap.h> | ||||||
| #include <BAN/HashSet.h> | #include <BAN/HashSet.h> | ||||||
| #include <kernel/FS/Inode.h> | #include <kernel/FS/Inode.h> | ||||||
| 
 | 
 | ||||||
|  | #include <limits.h> | ||||||
| #include <sys/epoll.h> | #include <sys/epoll.h> | ||||||
| 
 | 
 | ||||||
| namespace Kernel | namespace Kernel | ||||||
|  | @ -16,7 +18,7 @@ namespace Kernel | ||||||
| 		static BAN::ErrorOr<BAN::RefPtr<Epoll>> create(); | 		static BAN::ErrorOr<BAN::RefPtr<Epoll>> create(); | ||||||
| 		~Epoll(); | 		~Epoll(); | ||||||
| 
 | 
 | ||||||
| 		BAN::ErrorOr<void> ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event); | 		BAN::ErrorOr<void> ctl(int op, int fd, BAN::RefPtr<Inode> inode, epoll_event event); | ||||||
| 		BAN::ErrorOr<size_t> wait(BAN::Span<epoll_event> events, uint64_t waketime_ns); | 		BAN::ErrorOr<size_t> wait(BAN::Span<epoll_event> events, uint64_t waketime_ns); | ||||||
| 
 | 
 | ||||||
| 		void notify(BAN::RefPtr<Inode> inode, uint32_t event); | 		void notify(BAN::RefPtr<Inode> inode, uint32_t event); | ||||||
|  | @ -59,10 +61,45 @@ namespace Kernel | ||||||
| 			} | 			} | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
|  | 		struct ListenEventList | ||||||
|  | 		{ | ||||||
|  | 			BAN::Array<epoll_event, OPEN_MAX> events; | ||||||
|  | 			uint32_t bitmap[(OPEN_MAX + 31) / 32] {}; | ||||||
|  | 
 | ||||||
|  | 			bool has_fd(int fd) const | ||||||
|  | 			{ | ||||||
|  | 				if (fd < 0 || static_cast<size_t>(fd) >= events.size()) | ||||||
|  | 					return false; | ||||||
|  | 				return bitmap[fd / 32] & (1u << (fd % 32)); | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			bool empty() const | ||||||
|  | 			{ | ||||||
|  | 				for (auto val : bitmap) | ||||||
|  | 					if (val != 0) | ||||||
|  | 						return false; | ||||||
|  | 				return true; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			void add_fd(int fd, epoll_event event) | ||||||
|  | 			{ | ||||||
|  | 				ASSERT(!has_fd(fd)); | ||||||
|  | 				bitmap[fd / 32] |= (1u << (fd % 32)); | ||||||
|  | 				events[fd] = event; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			void remove_fd(int fd) | ||||||
|  | 			{ | ||||||
|  | 				ASSERT(has_fd(fd)); | ||||||
|  | 				bitmap[fd / 32] &= ~(1u << (fd % 32)); | ||||||
|  | 				events[fd] = {}; | ||||||
|  | 			} | ||||||
|  | 		}; | ||||||
|  | 
 | ||||||
| 	private: | 	private: | ||||||
| 		ThreadBlocker m_thread_blocker; | 		ThreadBlocker m_thread_blocker; | ||||||
| 		BAN::HashMap<BAN::RefPtr<Inode>, uint32_t,    InodeRefPtrHash> m_ready_events; | 		BAN::HashMap<BAN::RefPtr<Inode>, uint32_t,        InodeRefPtrHash> m_ready_events; | ||||||
| 		BAN::HashMap<BAN::RefPtr<Inode>, epoll_event, InodeRefPtrHash> m_listening_events; | 		BAN::HashMap<BAN::RefPtr<Inode>, ListenEventList, InodeRefPtrHash> m_listening_events; | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -19,7 +19,7 @@ namespace Kernel | ||||||
| 			inode->del_epoll(this); | 			inode->del_epoll(this); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	BAN::ErrorOr<void> Epoll::ctl(int op, BAN::RefPtr<Inode> inode, epoll_event event) | 	BAN::ErrorOr<void> Epoll::ctl(int op, int fd, BAN::RefPtr<Inode> inode, epoll_event event) | ||||||
| 	{ | 	{ | ||||||
| 		LockGuard _(m_mutex); | 		LockGuard _(m_mutex); | ||||||
| 
 | 
 | ||||||
|  | @ -28,27 +28,51 @@ namespace Kernel | ||||||
| 		switch (op) | 		switch (op) | ||||||
| 		{ | 		{ | ||||||
| 			case EPOLL_CTL_ADD: | 			case EPOLL_CTL_ADD: | ||||||
| 				if (it != m_listening_events.end()) | 			{ | ||||||
|  | 				if (it == m_listening_events.end()) | ||||||
|  | 					it = TRY(m_listening_events.emplace(inode)); | ||||||
|  | 				if (it->value.has_fd(fd)) | ||||||
| 					return BAN::Error::from_errno(EEXIST); | 					return BAN::Error::from_errno(EEXIST); | ||||||
| 				TRY(m_listening_events.reserve(m_listening_events.size() + 1)); | 				TRY(m_ready_events.reserve(m_listening_events.size())); | ||||||
| 				TRY(m_ready_events.reserve(m_listening_events.size() + 1)); |  | ||||||
| 				TRY(inode->add_epoll(this)); | 				TRY(inode->add_epoll(this)); | ||||||
| 				MUST(m_listening_events.insert(inode, event)); | 				it->value.add_fd(fd, event); | ||||||
| 				MUST(m_ready_events.insert(inode, event.events)); | 
 | ||||||
|  | 				auto ready_it = m_ready_events.find(inode); | ||||||
|  | 				if (ready_it == m_ready_events.end()) | ||||||
|  | 					ready_it = MUST(m_ready_events.insert(inode, 0)); | ||||||
|  | 				ready_it->value |= event.events; | ||||||
|  | 
 | ||||||
| 				return {}; | 				return {}; | ||||||
|  | 			} | ||||||
| 			case EPOLL_CTL_MOD: | 			case EPOLL_CTL_MOD: | ||||||
|  | 			{ | ||||||
| 				if (it == m_listening_events.end()) | 				if (it == m_listening_events.end()) | ||||||
| 					return BAN::Error::from_errno(ENOENT); | 					return BAN::Error::from_errno(ENOENT); | ||||||
| 				MUST(m_ready_events.emplace_or_assign(inode, event.events)); | 				if (!it->value.has_fd(fd)) | ||||||
| 				it->value = event; | 					return BAN::Error::from_errno(ENOENT); | ||||||
|  | 				it->value.events[fd] = event; | ||||||
|  | 
 | ||||||
|  | 				auto ready_it = m_ready_events.find(inode); | ||||||
|  | 				if (ready_it == m_ready_events.end()) | ||||||
|  | 					ready_it = MUST(m_ready_events.insert(inode, 0)); | ||||||
|  | 				ready_it->value |= event.events; | ||||||
|  | 
 | ||||||
| 				return {}; | 				return {}; | ||||||
|  | 			} | ||||||
| 			case EPOLL_CTL_DEL: | 			case EPOLL_CTL_DEL: | ||||||
|  | 			{ | ||||||
| 				if (it == m_listening_events.end()) | 				if (it == m_listening_events.end()) | ||||||
| 					return BAN::Error::from_errno(ENOENT); | 					return BAN::Error::from_errno(ENOENT); | ||||||
| 				m_listening_events.remove(it); | 				if (!it->value.has_fd(fd)) | ||||||
| 				m_ready_events.remove(inode); | 					return BAN::Error::from_errno(ENOENT); | ||||||
| 				inode->del_epoll(this); | 				it->value.remove_fd(fd); | ||||||
|  | 				if (it->value.empty()) | ||||||
|  | 				{ | ||||||
|  | 					m_listening_events.remove(it); | ||||||
|  | 					m_ready_events.remove(inode); | ||||||
|  | 				} | ||||||
| 				return {}; | 				return {}; | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		return BAN::Error::from_errno(EINVAL); | 		return BAN::Error::from_errno(EINVAL); | ||||||
|  | @ -56,6 +80,9 @@ namespace Kernel | ||||||
| 
 | 
 | ||||||
| 	BAN::ErrorOr<size_t> Epoll::wait(BAN::Span<epoll_event> event_span, uint64_t waketime_ns) | 	BAN::ErrorOr<size_t> Epoll::wait(BAN::Span<epoll_event> event_span, uint64_t waketime_ns) | ||||||
| 	{ | 	{ | ||||||
|  | 		if (event_span.empty()) | ||||||
|  | 			return BAN::Error::from_errno(EINVAL); | ||||||
|  | 
 | ||||||
| 		size_t count = 0; | 		size_t count = 0; | ||||||
| 
 | 
 | ||||||
| 		for (;;) | 		for (;;) | ||||||
|  | @ -64,13 +91,17 @@ namespace Kernel | ||||||
| 
 | 
 | ||||||
| 			{ | 			{ | ||||||
| 				LockGuard _(m_mutex); | 				LockGuard _(m_mutex); | ||||||
|  | 
 | ||||||
| 				for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();) | 				for (auto it = m_ready_events.begin(); it != m_ready_events.end() && count < event_span.size();) | ||||||
| 				{ | 				{ | ||||||
| 					auto& [inode, events] = *it; | 					auto& [inode, events] = *it; | ||||||
| 
 | 
 | ||||||
| 					auto& listen = m_listening_events[inode]; | 					auto& listen = m_listening_events[inode]; | ||||||
| 					const uint32_t listen_mask = (listen.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP; |  | ||||||
| 
 | 
 | ||||||
|  | 					uint32_t listen_mask = EPOLLERR | EPOLLHUP; | ||||||
|  | 					for (int fd = 0; fd < OPEN_MAX; fd++) | ||||||
|  | 						if (listen.has_fd(fd)) | ||||||
|  | 							listen_mask |= listen.events[fd].events; | ||||||
| 					events &= listen_mask; | 					events &= listen_mask; | ||||||
| 
 | 
 | ||||||
| 					// This prevents a possible deadlock
 | 					// This prevents a possible deadlock
 | ||||||
|  | @ -98,16 +129,27 @@ namespace Kernel | ||||||
| 						continue; | 						continue; | ||||||
| 					} | 					} | ||||||
| 
 | 
 | ||||||
| 					event_span[count++] = { | 					for (int fd = 0; fd < OPEN_MAX && count < event_span.size(); fd++) | ||||||
| 						.events = events, | 					{ | ||||||
| 						.data = listen.data, | 						if (!listen.has_fd(fd)) | ||||||
| 					}; | 							continue; | ||||||
|  | 						auto& listen_event = listen.events[fd]; | ||||||
| 
 | 
 | ||||||
| 					if (listen.events & EPOLLONESHOT) | 						const auto new_events = listen_event.events & events; | ||||||
| 						listen.events = 0; | 						if (new_events == 0) | ||||||
|  | 							continue; | ||||||
| 
 | 
 | ||||||
| 					if (listen.events & EPOLLET) | 						event_span[count++] = { | ||||||
| 						events &= ~listen_mask; | 							.events = new_events, | ||||||
|  | 							.data = listen_event.data, | ||||||
|  | 						}; | ||||||
|  | 
 | ||||||
|  | 						if (listen_event.events & EPOLLONESHOT) | ||||||
|  | 							listen_event.events = 0; | ||||||
|  | 						// this doesn't work with multiple of the same inode
 | ||||||
|  | 						if (listen_event.events & EPOLLET) | ||||||
|  | 							events &= ~new_events; | ||||||
|  | 					} | ||||||
| 
 | 
 | ||||||
| 					it++; | 					it++; | ||||||
| 				} | 				} | ||||||
|  | @ -132,18 +174,13 @@ namespace Kernel | ||||||
| 	{ | 	{ | ||||||
| 		LockGuard _(m_mutex); | 		LockGuard _(m_mutex); | ||||||
| 
 | 
 | ||||||
| 		auto listen_it = m_listening_events.find(inode); | 		if (!m_listening_events.contains(inode)) | ||||||
| 		if (listen_it == m_listening_events.end()) |  | ||||||
| 			return; | 			return; | ||||||
| 
 | 
 | ||||||
| 		event &= (listen_it->value.events & (EPOLLIN | EPOLLOUT)) | EPOLLERR | EPOLLHUP; | 		auto ready_it = m_ready_events.find(inode); | ||||||
| 		if (event == 0) | 		if (ready_it == m_ready_events.end()) | ||||||
| 			return; | 			ready_it = MUST(m_ready_events.insert(inode, 0)); | ||||||
| 
 | 		ready_it->value |= event; | ||||||
| 		if (auto ready_it = m_ready_events.find(inode); ready_it != m_ready_events.end()) |  | ||||||
| 			ready_it->value |= event; |  | ||||||
| 		else |  | ||||||
| 			MUST(m_ready_events.insert(inode, event)); |  | ||||||
| 
 | 
 | ||||||
| 		m_thread_blocker.unblock(); | 		m_thread_blocker.unblock(); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -1583,7 +1583,7 @@ namespace Kernel | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		auto epoll = TRY(Epoll::create()); | 		auto epoll = TRY(Epoll::create()); | ||||||
| 		for (int fd = 0; fd < user_arguments->nfds; fd++) | 		for (int fd = 0; fd < arguments.nfds; fd++) | ||||||
| 		{ | 		{ | ||||||
| 			uint32_t events = 0; | 			uint32_t events = 0; | ||||||
| 			if (arguments.readfds && FD_ISSET(fd, arguments.readfds)) | 			if (arguments.readfds && FD_ISSET(fd, arguments.readfds)) | ||||||
|  | @ -1599,11 +1599,11 @@ namespace Kernel | ||||||
| 			if (inode_or_error.is_error()) | 			if (inode_or_error.is_error()) | ||||||
| 				continue; | 				continue; | ||||||
| 
 | 
 | ||||||
| 			TRY(epoll->ctl(EPOLL_CTL_ADD, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }})); | 			TRY(epoll->ctl(EPOLL_CTL_ADD, fd, inode_or_error.release_value(), { .events = events, .data = { .fd = fd }})); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		BAN::Vector<epoll_event> event_buffer; | 		BAN::Vector<epoll_event> event_buffer; | ||||||
| 		TRY(event_buffer.resize(user_arguments->nfds)); | 		TRY(event_buffer.resize(arguments.nfds)); | ||||||
| 
 | 
 | ||||||
| 		const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns)); | 		const size_t waited_events = TRY(epoll->wait(event_buffer.span(), waketime_ns)); | ||||||
| 
 | 
 | ||||||
|  | @ -1663,7 +1663,7 @@ namespace Kernel | ||||||
| 			event = *user_event; | 			event = *user_event; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, inode, event)); | 		TRY(static_cast<Epoll*>(epoll_inode.ptr())->ctl(op, fd, inode, event)); | ||||||
| 
 | 
 | ||||||
| 		return 0; | 		return 0; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue