From acf439e96d3950eb166f67fdac98eaacc90bfe70 Mon Sep 17 00:00:00 2001 From: "Matias N. Goldberg" <dark_sylinc@yahoo.com.ar> Date: Tue, 10 Dec 2024 19:25:46 -0300 Subject: [PATCH] Keep processing Graphics if there are pending operations Fixes #90017 Fixes #90030 Fixes #98044 This PR makes the following changes: # Force processing of GPU commands for frame_count frames The variable `frames_pending_resources_for_processing` is added to track this. The ticket #98044 suggested to use `_flush_and_stall_for_all_frames()` while minimized. Technically this works and is a viable solution. However I noticed that this issue was happening because Logic/Physics continue to work "business as usual" while minimized(\*). Only Graphics was being deactivated (which caused commands to accumulate until window is restored). To continue this behavior of "business as usual", I decided that GPU work should also "continue as usual" by buffering commands in a double or triple buffer scheme until all commands are done processing (if they ever stop coming). This is specially important if the app specifically intends to keep processing while minimized. Calling `_flush_and_stall_for_all_frames()` would fix the leak, but it would make Godot's behavior different while minimized vs while the window is presenting. \* `OS::add_frame_delay` _does_ consider being minimized, but it just throttles CPU usage. Some platforms such as Android completely disable processing because the higher level code stops being called when the app goes into background. But this seems like an implementation-detail that diverges from the rest of the platforms (e.g. Windows, Linux & macOS continue to process while minimized). # Rename p_swap_buffers for p_present **This is potentially a breaking change** (if it actually breaks anything, I ignore. But I strongly suspect it doesn't break anything). "Swap Buffers" is a concept carried from OpenGL, where a frame is "done" when `glSwapBuffers()` is called, which basically means "present to the screen". However it _also_ means that OpenGL internally swaps its internal buffers in a double/triple buffer scheme (in Vulkan, we do that ourselves and is tracked by `RenderingDevice::frame`). Modern APIs like Vulkan differentiate between "submitting GPU work" and "presenting". Before this PR, calling `RendererCompositorRD::end_frame(false)` would literally do nothing. This is often undesired and the cause of the leak. After this PR, calling `RendererCompositorRD::end_frame(false)` will now process commands, swap our internal buffers in a double/triple buffer scheme **but avoid presenting to the screen**. Hence the rename of the variable from `p_swap_buffers` to `p_present` (which slightly alters its behavior). If we want `RendererCompositorRD::end_frame(false)` to do nothing, then we should not call it at all. This PR reflects such change: When we're minimized **_and_** `has_pending_resources_for_processing()` returns false, we don't call `RendererCompositorRD::end_frame()` at all. But if `has_pending_resources_for_processing()` returns true, we will call it, but with `p_present = false` because we're minimized. There's still the issue that Godot keeps processing work (logic, scripts, physics) while minimized, which we shouldn't do by default. But that's work for follow up PR. --- main/main.cpp | 13 +++++++++---- servers/rendering/dummy/rasterizer_dummy.h | 4 ++-- servers/rendering/renderer_compositor.h | 2 +- .../renderer_rd/renderer_compositor_rd.cpp | 8 +++----- .../rendering/renderer_rd/renderer_compositor_rd.h | 2 +- servers/rendering/rendering_device.cpp | 10 ++++++++-- servers/rendering/rendering_device.h | 13 ++++++++++++- servers/rendering/rendering_server_default.cpp | 6 +++--- servers/rendering/rendering_server_default.h | 2 +- 9 files changed, 40 insertions(+), 20 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 2c2ef94e0eaa..51b26a63f2c4 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -4441,15 +4441,20 @@ bool Main::iteration() { RenderingServer::get_singleton()->sync(); //sync if still drawing from previous frames. - if ((DisplayServer::get_singleton()->can_any_window_draw() || DisplayServer::get_singleton()->has_additional_outputs()) && - RenderingServer::get_singleton()->is_render_loop_enabled()) { + const bool has_pending_resources_for_processing = RD::get_singleton() && RD::get_singleton()->has_pending_resources_for_processing(); + bool wants_present = (DisplayServer::get_singleton()->can_any_window_draw() || + DisplayServer::get_singleton()->has_additional_outputs()) && + RenderingServer::get_singleton()->is_render_loop_enabled(); + + if (wants_present || has_pending_resources_for_processing) { + wants_present |= force_redraw_requested; if ((!force_redraw_requested) && OS::get_singleton()->is_in_low_processor_usage_mode()) { if (RenderingServer::get_singleton()->has_changed()) { - RenderingServer::get_singleton()->draw(true, scaled_step); // flush visual commands + RenderingServer::get_singleton()->draw(wants_present, scaled_step); // flush visual commands Engine::get_singleton()->increment_frames_drawn(); } } else { - RenderingServer::get_singleton()->draw(true, scaled_step); // flush visual commands + RenderingServer::get_singleton()->draw(wants_present, scaled_step); // flush visual commands Engine::get_singleton()->increment_frames_drawn(); force_redraw_requested = false; } diff --git a/servers/rendering/dummy/rasterizer_dummy.h b/servers/rendering/dummy/rasterizer_dummy.h index 6205193d9a99..33b4c38018b4 100644 --- a/servers/rendering/dummy/rasterizer_dummy.h +++ b/servers/rendering/dummy/rasterizer_dummy.h @@ -90,8 +90,8 @@ class RasterizerDummy : public RendererCompositor { void gl_end_frame(bool p_swap_buffers) override {} - void end_frame(bool p_swap_buffers) override { - if (p_swap_buffers) { + void end_frame(bool p_present) override { + if (p_present) { DisplayServer::get_singleton()->swap_buffers(); } } diff --git a/servers/rendering/renderer_compositor.h b/servers/rendering/renderer_compositor.h index dbc8f155bfbd..e52cab740e01 100644 --- a/servers/rendering/renderer_compositor.h +++ b/servers/rendering/renderer_compositor.h @@ -99,7 +99,7 @@ class RendererCompositor { virtual void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount) = 0; virtual void gl_end_frame(bool p_swap_buffers) = 0; - virtual void end_frame(bool p_swap_buffers) = 0; + virtual void end_frame(bool p_present) = 0; virtual void finalize() = 0; virtual uint64_t get_frame_number() const = 0; virtual double get_frame_delta_time() const = 0; diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index ba47508700fa..05dda5e74c55 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -112,10 +112,8 @@ void RendererCompositorRD::begin_frame(double frame_step) { scene->set_time(time, frame_step); } -void RendererCompositorRD::end_frame(bool p_swap_buffers) { - if (p_swap_buffers) { - RD::get_singleton()->swap_buffers(); - } +void RendererCompositorRD::end_frame(bool p_present) { + RD::get_singleton()->swap_buffers(p_present); } void RendererCompositorRD::initialize() { @@ -264,7 +262,7 @@ void RendererCompositorRD::set_boot_image(const Ref<Image> &p_image, const Color RD::get_singleton()->draw_list_end(); - RD::get_singleton()->swap_buffers(); + RD::get_singleton()->swap_buffers(true); texture_storage->texture_free(texture); RD::get_singleton()->free(sampler); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index 6821fa737e4e..41c11113fefa 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -128,7 +128,7 @@ class RendererCompositorRD : public RendererCompositor { void blit_render_targets_to_screen(DisplayServer::WindowID p_screen, const BlitToScreen *p_render_targets, int p_amount); void gl_end_frame(bool p_swap_buffers) {} - void end_frame(bool p_swap_buffers); + void end_frame(bool p_present); void finalize(); _ALWAYS_INLINE_ uint64_t get_frame_number() const { return frame; } diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 72984e5abf8b..85b5759d1246 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -5660,6 +5660,8 @@ void RenderingDevice::_free_internal(RID p_id) { ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id())); #endif } + + frames_pending_resources_for_processing = uint32_t(frames.size()); } // The full list of resources that can be named is in the VkObjectType enum. @@ -5762,11 +5764,11 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const { return driver->get_pipeline_cache_uuid(); } -void RenderingDevice::swap_buffers() { +void RenderingDevice::swap_buffers(bool p_present) { ERR_RENDER_THREAD_GUARD(); _end_frame(); - _execute_frame(true); + _execute_frame(p_present); // Advance to the next frame and begin recording again. frame = (frame + 1) % frames.size(); @@ -5868,6 +5870,10 @@ void RenderingDevice::_free_pending_resources(int p_frame) { frames[p_frame].buffers_to_dispose_of.pop_front(); } + + if (frames_pending_resources_for_processing > 0u) { + --frames_pending_resources_for_processing; + } } uint32_t RenderingDevice::get_frame_delay() const { diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 6b19d05fca48..2595aed62d7b 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1400,6 +1400,17 @@ class RenderingDevice : public RenderingDeviceCommons { TightLocalVector<Frame> frames; uint64_t frames_drawn = 0; + // Whenever logic/physics request a graphics operation (not just deleting a resource) that requires + // us to flush all graphics commands, we must set frames_pending_resources_for_processing = frames.size(). + // This is important for when the user requested for the logic loop to still be updated while + // graphics should not (e.g. headless Multiplayer servers, minimized windows that need to still + // process something on the background). + uint32_t frames_pending_resources_for_processing = 0u; + +public: + bool has_pending_resources_for_processing() const { return frames_pending_resources_for_processing != 0u; } + +private: void _free_pending_resources(int p_frame); uint64_t texture_memory = 0; @@ -1444,7 +1455,7 @@ class RenderingDevice : public RenderingDeviceCommons { uint64_t limit_get(Limit p_limit) const; - void swap_buffers(); + void swap_buffers(bool p_present); uint32_t get_frame_delay() const; diff --git a/servers/rendering/rendering_server_default.cpp b/servers/rendering/rendering_server_default.cpp index 2ec693cbbfe7..b5d0d014791f 100644 --- a/servers/rendering/rendering_server_default.cpp +++ b/servers/rendering/rendering_server_default.cpp @@ -406,15 +406,15 @@ void RenderingServerDefault::sync() { } } -void RenderingServerDefault::draw(bool p_swap_buffers, double frame_step) { +void RenderingServerDefault::draw(bool p_present, double frame_step) { ERR_FAIL_COND_MSG(!Thread::is_main_thread(), "Manually triggering the draw function from the RenderingServer can only be done on the main thread. Call this function from the main thread or use call_deferred()."); // Needs to be done before changes is reset to 0, to not force the editor to redraw. RS::get_singleton()->emit_signal(SNAME("frame_pre_draw")); changes = 0; if (create_thread) { - command_queue.push(this, &RenderingServerDefault::_draw, p_swap_buffers, frame_step); + command_queue.push(this, &RenderingServerDefault::_draw, p_present, frame_step); } else { - _draw(p_swap_buffers, frame_step); + _draw(p_present, frame_step); } } diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index 29b1e163c78a..4a0df4a0cf3b 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -1124,7 +1124,7 @@ class RenderingServerDefault : public RenderingServer { virtual void request_frame_drawn_callback(const Callable &p_callable) override; - virtual void draw(bool p_swap_buffers, double frame_step) override; + virtual void draw(bool p_present, double frame_step) override; virtual void sync() override; virtual bool has_changed() const override; virtual void init() override;