From 36d07065fb1fc07abc09d6a38aa99c558c5365ac Mon Sep 17 00:00:00 2001 From: Bananymous Date: Sun, 17 Nov 2024 04:35:03 +0200 Subject: [PATCH] WindowServer: Optimize msync calls to synchronize less bytes This *probably* makes framebuffer much faster :D Window server does not have to do page aligned and page sized syncs which used way too many pixels. --- userspace/programs/WindowServer/Utils.h | 29 ++++ .../programs/WindowServer/WindowServer.cpp | 130 +++++++++++++----- .../programs/WindowServer/WindowServer.h | 11 +- 3 files changed, 134 insertions(+), 36 deletions(-) diff --git a/userspace/programs/WindowServer/Utils.h b/userspace/programs/WindowServer/Utils.h index 914caf32..d9c51f96 100644 --- a/userspace/programs/WindowServer/Utils.h +++ b/userspace/programs/WindowServer/Utils.h @@ -80,3 +80,32 @@ struct Circle } }; + +struct Range +{ + uint32_t start { 0 }; + uint32_t count { 0 }; + + bool is_continuous_with(const Range& range) const + { + return start <= range.start + range.count && range.start <= start + count; + } + + uint32_t distance_between(const Range& range) const + { + if (is_continuous_with(range)) + return 0; + if (start < range.start) + return range.start - (start + count); + return start - (range.start + range.count); + } + + void merge_with(const Range& range) + { + const uint32_t new_start = BAN::Math::min(start, range.start); + const uint32_t new_end = BAN::Math::max(start + count, range.start + range.count); + + start = new_start; + count = new_end - new_start; + } +}; diff --git a/userspace/programs/WindowServer/WindowServer.cpp b/userspace/programs/WindowServer/WindowServer.cpp index abee0a2d..19f9d269 100644 --- a/userspace/programs/WindowServer/WindowServer.cpp +++ b/userspace/programs/WindowServer/WindowServer.cpp @@ -24,6 +24,8 @@ WindowServer::WindowServer(Framebuffer& framebuffer, int32_t corner_radius) MUST(bitmap.resize(m_framebuffer.width * m_framebuffer.height, { 0x10, 0x10, 0x10, 0xFF })); m_background_image = MUST(BAN::UniqPtr::create(m_framebuffer.width, m_framebuffer.height, BAN::move(bitmap))); + MUST(m_pending_syncs.resize(m_framebuffer.height)); + invalidate(m_framebuffer.area()); } @@ -910,24 +912,71 @@ void WindowServer::invalidate(Rectangle area) mark_pending_sync(area); } -void WindowServer::mark_pending_sync(Rectangle area) +void WindowServer::RangeList::add_range(const Range& range) { - // FIXME: this marks too many pages - - const uintptr_t mmap_start = reinterpret_cast(m_framebuffer.mmap) + area.y * m_framebuffer.width * 4; - const uintptr_t mmap_end = mmap_start + (area.height + 1) * m_framebuffer.width * 4; - - uintptr_t mmap_addr = mmap_start & ~(uintptr_t)0xFFF; - while (mmap_addr < mmap_end) + if (range_count == 0) { - size_t index = (mmap_addr - reinterpret_cast(m_framebuffer.mmap)) / 4096; - size_t byte = index / 8; - size_t bit = index % 8; - //dprintln("{}/{}", byte, m_pages_to_sync_bitmap.size()); - if (byte < m_pages_to_sync_bitmap.size()) - m_pages_to_sync_bitmap[byte] |= 1 << bit; - mmap_addr += 4096; + ranges[0] = range; + range_count++; + return; } + + size_t min_distance_value = SIZE_MAX; + size_t min_distance_index = 0; + for (size_t i = 0; i < range_count; i++) + { + if (ranges[i].is_continuous_with(range)) + { + ranges[i].merge_with(range); + + size_t last_continuous = i; + for (size_t j = i + 1; j < range_count; j++) + { + if (!ranges[i].is_continuous_with(ranges[j])) + break; + last_continuous = j; + } + + if (last_continuous != i) + { + ranges[i].merge_with(ranges[last_continuous]); + for (size_t j = 1; last_continuous + j < range_count; j++) + ranges[i + j] = ranges[last_continuous + j]; + range_count -= last_continuous - i; + } + + return; + } + + const auto distance = ranges[i].distance_between(range); + if (distance < min_distance_value) + { + min_distance_value = distance; + min_distance_index = i; + } + } + + if (range_count >= ranges.size()) + { + ranges[min_distance_index].merge_with(range); + return; + } + + size_t insert_idx = 0; + for (; insert_idx < range_count; insert_idx++) + if (range.start < ranges[insert_idx].start) + break; + for (size_t i = range_count; i > insert_idx; i--) + ranges[i] = ranges[i - 1]; + ranges[insert_idx] = range; + range_count++; +} + +void WindowServer::mark_pending_sync(Rectangle to_sync) +{ + ASSERT(to_sync == to_sync.get_overlap(m_framebuffer.area()).value()); + for (int32_t y_off = 0; y_off < to_sync.height; y_off++) + m_pending_syncs[to_sync.y + y_off].add_range({ static_cast(to_sync.x), static_cast(to_sync.width) }); } void WindowServer::sync() @@ -951,33 +1000,44 @@ void WindowServer::sync() dir_y = -dir_y; } - for (size_t i = 0; i < m_pages_to_sync_bitmap.size() * 8; i++) + size_t range_start = 0; + size_t range_count = 0; + for (int32_t y = 0; y < m_framebuffer.height; y++) { - size_t byte = i / 8; - size_t bit = i % 8; - if (!(m_pages_to_sync_bitmap[byte] & (1 << bit))) - continue; + auto& range_list = m_pending_syncs[y]; - size_t len = 1; - while (i + len < m_pages_to_sync_bitmap.size() * 8) + for (size_t i = 0; i < range_list.range_count; i++) { - size_t byte = (i + len) / 8; - size_t bit = (i + len) % 8; - if (!(m_pages_to_sync_bitmap[byte] & (1 << bit))) - break; - len++; + const size_t cur_start = y * m_framebuffer.width + range_list.ranges[i].start; + const size_t cur_count = range_list.ranges[i].count; + + if (range_count == 0) + { + range_start = cur_start; + range_count = cur_count; + } + else + { + const size_t distance = cur_start - (range_start + range_count); + + // combine nearby ranges to reduce msync calls + // NOTE: value of 128 is an arbitary constant that *just* felt nice + if (distance <= 128) + range_count = (cur_start + cur_count) - range_start; + else + { + msync(m_framebuffer.mmap + range_start, range_count * 4, MS_SYNC); + range_start = cur_start; + range_count = cur_count; + } + } } - msync( - reinterpret_cast(m_framebuffer.mmap) + i * 4096, - len * 4096, - MS_SYNC - ); - - i += len; + range_list.range_count = 0; } - memset(m_pages_to_sync_bitmap.data(), 0, m_pages_to_sync_bitmap.size()); + if (range_count) + msync(m_framebuffer.mmap + range_start, range_count * 4, MS_SYNC); } Rectangle WindowServer::cursor_area() const diff --git a/userspace/programs/WindowServer/WindowServer.h b/userspace/programs/WindowServer/WindowServer.h index 5c3b7ca5..c768c289 100644 --- a/userspace/programs/WindowServer/WindowServer.h +++ b/userspace/programs/WindowServer/WindowServer.h @@ -3,6 +3,7 @@ #include "Framebuffer.h" #include "Window.h" +#include #include #include #include @@ -59,6 +60,14 @@ public: private: void mark_pending_sync(Rectangle area); +private: + struct RangeList + { + size_t range_count { 0 }; + BAN::Array ranges; + void add_range(const Range& range); + }; + private: Framebuffer& m_framebuffer; BAN::Vector> m_client_windows; @@ -67,7 +76,7 @@ private: const int32_t m_corner_radius; - BAN::Vector m_pages_to_sync_bitmap; + BAN::Vector m_pending_syncs; BAN::UniqPtr m_background_image;