Skip to content

Commit 5216ef5

Browse files
committed
Add dependency detection improvements to the render graph.
- Buffers changing their usage are no longer treated as write usage unless the API requires it. - Draw lists are not treated as being dependent on each other if their regions do not intersect despite both being write commands. - Particles were tweaked to use different unused buffers to reduce dependencies.
1 parent c6c464c commit 5216ef5

10 files changed

+79
-35
lines changed

drivers/d3d12/rendering_device_driver_d3d12.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -6177,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
61776177
return false;
61786178
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
61796179
return true;
6180+
case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS:
6181+
return !barrier_capabilities.enhanced_barriers_supported;
61806182
default:
61816183
return RenderingDeviceDriver::api_trait_get(p_trait);
61826184
}

servers/rendering/renderer_rd/effects/copy_effects.cpp

+11-7
Original file line numberDiff line numberDiff line change
@@ -1002,15 +1002,19 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf
10021002
MaterialStorage *material_storage = MaterialStorage::get_singleton();
10031003
ERR_FAIL_NULL(material_storage);
10041004

1005+
Rect2i screen_rect;
1006+
float atlas_width = p_dst_size.width / p_rect.size.width;
1007+
float atlas_height = p_dst_size.height / p_rect.size.height;
1008+
screen_rect.position.x = (int32_t)(Math::round(p_rect.position.x * atlas_width));
1009+
screen_rect.position.y = (int32_t)(Math::round(p_rect.position.y * atlas_height));
1010+
screen_rect.size.width = (int32_t)(Math::round(p_dst_size.width));
1011+
screen_rect.size.height = (int32_t)(Math::round(p_dst_size.height));
1012+
10051013
CopyToDPPushConstant push_constant;
1006-
push_constant.screen_rect[0] = p_rect.position.x;
1007-
push_constant.screen_rect[1] = p_rect.position.y;
1008-
push_constant.screen_rect[2] = p_rect.size.width;
1009-
push_constant.screen_rect[3] = p_rect.size.height;
10101014
push_constant.z_far = p_z_far;
10111015
push_constant.z_near = p_z_near;
1012-
push_constant.texel_size[0] = 1.0f / p_dst_size.x;
1013-
push_constant.texel_size[1] = 1.0f / p_dst_size.y;
1016+
push_constant.texel_size[0] = 1.0f / p_dst_size.width;
1017+
push_constant.texel_size[1] = 1.0f / p_dst_size.height;
10141018
push_constant.texel_size[0] *= p_dp_flip ? -1.0f : 1.0f; // Encode dp flip as x size sign
10151019

10161020
// setup our uniforms
@@ -1021,7 +1025,7 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf
10211025
RID shader = cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0);
10221026
ERR_FAIL_COND(shader.is_null());
10231027

1024-
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE);
1028+
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, Vector<Color>(), 1.0f, 0, screen_rect);
10251029
RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cube_to_dp.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer)));
10261030
RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0);
10271031
RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array());

servers/rendering/renderer_rd/effects/copy_effects.h

-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ class CopyEffects {
217217
float z_far;
218218
float z_near;
219219
float texel_size[2];
220-
float screen_rect[4];
221220
};
222221

223222
struct CopyToDP {

servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl

+1-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ layout(push_constant, std430) uniform Params {
88
float z_far;
99
float z_near;
1010
vec2 texel_size;
11-
vec4 screen_rect;
1211
}
1312
params;
1413

@@ -17,8 +16,7 @@ layout(location = 0) out vec2 uv_interp;
1716
void main() {
1817
vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
1918
uv_interp = base_arr[gl_VertexIndex];
20-
vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy;
21-
gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0);
19+
gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0);
2220
}
2321

2422
#[fragment]
@@ -35,7 +33,6 @@ layout(push_constant, std430) uniform Params {
3533
float z_far;
3634
float z_near;
3735
vec2 texel_size;
38-
vec4 screen_rect;
3936
}
4037
params;
4138

servers/rendering/renderer_rd/storage_rd/particles_storage.cpp

+23-12
Original file line numberDiff line numberDiff line change
@@ -306,9 +306,14 @@ void ParticlesStorage::_particles_free_data(Particles *particles) {
306306
particles->emission_storage_buffer = RID();
307307
}
308308

309-
if (particles->unused_storage_buffer.is_valid()) {
310-
RD::get_singleton()->free(particles->unused_storage_buffer);
311-
particles->unused_storage_buffer = RID();
309+
if (particles->unused_emission_storage_buffer.is_valid()) {
310+
RD::get_singleton()->free(particles->unused_emission_storage_buffer);
311+
particles->unused_emission_storage_buffer = RID();
312+
}
313+
314+
if (particles->unused_trail_storage_buffer.is_valid()) {
315+
RD::get_singleton()->free(particles->unused_trail_storage_buffer);
316+
particles->unused_trail_storage_buffer = RID();
312317
}
313318

314319
if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) {
@@ -534,9 +539,15 @@ void ParticlesStorage::_particles_allocate_emission_buffer(Particles *particles)
534539
}
535540
}
536541

537-
void ParticlesStorage::_particles_ensure_unused_buffer(Particles *particles) {
538-
if (particles->unused_storage_buffer.is_null()) {
539-
particles->unused_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4);
542+
void ParticlesStorage::_particles_ensure_unused_emission_buffer(Particles *particles) {
543+
if (particles->unused_emission_storage_buffer.is_null()) {
544+
particles->unused_emission_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4);
545+
}
546+
}
547+
548+
void ParticlesStorage::_particles_ensure_unused_trail_buffer(Particles *particles) {
549+
if (particles->unused_trail_storage_buffer.is_null()) {
550+
particles->unused_trail_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4);
540551
}
541552
}
542553

@@ -763,8 +774,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta
763774
if (p_particles->emission_storage_buffer.is_valid()) {
764775
u.append_id(p_particles->emission_storage_buffer);
765776
} else {
766-
_particles_ensure_unused_buffer(p_particles);
767-
u.append_id(p_particles->unused_storage_buffer);
777+
_particles_ensure_unused_emission_buffer(p_particles);
778+
u.append_id(p_particles->unused_emission_storage_buffer);
768779
}
769780
uniforms.push_back(u);
770781
}
@@ -779,8 +790,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta
779790
}
780791
u.append_id(sub_emitter->emission_storage_buffer);
781792
} else {
782-
_particles_ensure_unused_buffer(p_particles);
783-
u.append_id(p_particles->unused_storage_buffer);
793+
_particles_ensure_unused_emission_buffer(p_particles);
794+
u.append_id(p_particles->unused_emission_storage_buffer);
784795
}
785796
uniforms.push_back(u);
786797
}
@@ -1481,8 +1492,8 @@ void ParticlesStorage::update_particles() {
14811492
if (particles->trail_bind_pose_buffer.is_valid()) {
14821493
u.append_id(particles->trail_bind_pose_buffer);
14831494
} else {
1484-
_particles_ensure_unused_buffer(particles);
1485-
u.append_id(particles->unused_storage_buffer);
1495+
_particles_ensure_unused_trail_buffer(particles);
1496+
u.append_id(particles->unused_trail_storage_buffer);
14861497
}
14871498
uniforms.push_back(u);
14881499
}

servers/rendering/renderer_rd/storage_rd/particles_storage.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ class ParticlesStorage : public RendererParticlesStorage {
247247
ParticleEmissionBuffer *emission_buffer = nullptr;
248248
RID emission_storage_buffer;
249249

250-
RID unused_storage_buffer;
250+
RID unused_emission_storage_buffer;
251+
RID unused_trail_storage_buffer;
251252

252253
HashSet<RID> collisions;
253254

@@ -265,7 +266,8 @@ class ParticlesStorage : public RendererParticlesStorage {
265266

266267
void _particles_process(Particles *p_particles, double p_delta);
267268
void _particles_allocate_emission_buffer(Particles *particles);
268-
void _particles_ensure_unused_buffer(Particles *particles);
269+
void _particles_ensure_unused_emission_buffer(Particles *particles);
270+
void _particles_ensure_unused_trail_buffer(Particles *particles);
269271
void _particles_free_data(Particles *particles);
270272
void _particles_update_buffers(Particles *particles);
271273

servers/rendering/rendering_device_driver.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
376376
return true;
377377
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
378378
return false;
379+
case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS:
380+
return false;
379381
default:
380382
ERR_FAIL_V(0);
381383
}

servers/rendering/rendering_device_driver.h

+1
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons {
756756
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
757757
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
758758
API_TRAIT_USE_GENERAL_IN_COPY_QUEUES,
759+
API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS,
759760
};
760761

761762
enum ShaderChangeInvalidation {

servers/rendering/rendering_device_graph.cpp

+33-9
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage
140140
#endif
141141
}
142142

143+
bool RenderingDeviceGraph::_check_command_intersection(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index) const {
144+
if (p_resource_tracker->usage != RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE && p_resource_tracker->usage != RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE) {
145+
// We don't check possible intersections for usages that aren't consecutive color or depth writes.
146+
return true;
147+
}
148+
149+
const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index];
150+
const uint32_t current_command_data_offset = command_data_offsets[p_command_index];
151+
const RecordedDrawListCommand &previous_draw_list_command = *reinterpret_cast<const RecordedDrawListCommand *>(&command_data[previous_command_data_offset]);
152+
const RecordedDrawListCommand &current_draw_list_command = *reinterpret_cast<const RecordedDrawListCommand *>(&command_data[current_command_data_offset]);
153+
if (previous_draw_list_command.type != RecordedCommand::TYPE_DRAW_LIST || current_draw_list_command.type != RecordedCommand::TYPE_DRAW_LIST) {
154+
// We don't check possible intersections if both commands aren't draw lists.
155+
return true;
156+
}
157+
158+
// We check if the region used by both draw lists have an intersection.
159+
return previous_draw_list_command.region.intersects(current_draw_list_command.region);
160+
}
161+
143162
int32_t RenderingDeviceGraph::_add_to_command_list(int32_t p_command_index, int32_t p_list_index) {
144163
DEV_ASSERT(p_command_index < int32_t(command_count));
145164
DEV_ASSERT(p_list_index < int32_t(command_list_nodes.size()));
@@ -425,11 +444,9 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
425444
#if USE_BUFFER_BARRIERS
426445
_add_buffer_barrier_to_command(resource_tracker->buffer_driver_id, resource_tracker->usage_access, new_usage_access, r_command->buffer_barrier_index, r_command->buffer_barrier_count);
427446
#endif
428-
// FIXME: Memory barriers are currently pushed regardless of whether buffer barriers are being used or not. Refer to the comment on the
429-
// definition of USE_BUFFER_BARRIERS for the reason behind this. This can be fixed to be one case or the other once it's been confirmed
430-
// the buffer and memory barrier behavior discrepancy has been solved.
431-
r_command->memory_barrier.src_access = resource_tracker->usage_access;
432-
r_command->memory_barrier.dst_access = new_usage_access;
447+
// Memory barriers are pushed regardless of buffer barriers being used or not.
448+
r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access;
449+
r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access;
433450
} else {
434451
DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID.");
435452
}
@@ -449,10 +466,12 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
449466

450467
if (different_usage) {
451468
// Even if the usage of the resource isn't a write usage explicitly, a different usage implies a transition and it should therefore be considered a write.
452-
write_usage = true;
469+
// In the case of buffers however, this is not exactly necessary if the driver does not consider different buffer usages as different states.
470+
write_usage = write_usage || bool(resource_tracker->texture_driver_id) || driver_buffers_require_transitions;
453471
resource_tracker->usage = new_resource_usage;
454472
}
455473

474+
bool command_intersection_failed = false;
456475
if (search_tracker->write_command_or_list_index >= 0) {
457476
if (search_tracker->write_command_list_enabled) {
458477
// Make this command adjacent to any commands that wrote to this resource and intersect with the slice if it applies.
@@ -464,7 +483,7 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
464483
if (!resource_has_parent || search_tracker_rect.intersects(write_list_node.subresources)) {
465484
if (write_list_node.command_index == p_command_index) {
466485
ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency.");
467-
} else {
486+
} else if (_check_command_intersection(resource_tracker, write_list_node.command_index, p_command_index)) {
468487
// Command is dependent on this command. Add this command to the adjacency list of the write command.
469488
_add_adjacent_command(write_list_node.command_index, p_command_index, r_command);
470489

@@ -480,6 +499,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
480499
write_list_index = write_list_node.next_list_index;
481500
continue;
482501
}
502+
} else {
503+
command_intersection_failed = true;
483504
}
484505
}
485506

@@ -490,14 +511,16 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
490511
// The index is just the latest command index that wrote to the resource.
491512
if (search_tracker->write_command_or_list_index == p_command_index) {
492513
ERR_FAIL_MSG("Command can't have itself as a dependency.");
493-
} else {
514+
} else if (_check_command_intersection(resource_tracker, search_tracker->write_command_or_list_index, p_command_index)) {
494515
_add_adjacent_command(search_tracker->write_command_or_list_index, p_command_index, r_command);
516+
} else {
517+
command_intersection_failed = true;
495518
}
496519
}
497520
}
498521

499522
if (write_usage) {
500-
if (resource_has_parent) {
523+
if (resource_has_parent || command_intersection_failed) {
501524
if (!search_tracker->write_command_list_enabled && search_tracker->write_command_or_list_index >= 0) {
502525
// Write command list was not being used but there was a write command recorded. Add a new node with the entire parent resource's subresources and the recorded command index to the list.
503526
const RDD::TextureSubresourceRange &tracker_subresources = search_tracker->texture_subresources;
@@ -1318,6 +1341,7 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Dev
13181341

13191342
driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS);
13201343
driver_clears_with_copy_engine = driver->api_trait_get(RDD::API_TRAIT_CLEARS_WITH_COPY_ENGINE);
1344+
driver_buffers_require_transitions = driver->api_trait_get(RDD::API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS);
13211345
}
13221346

13231347
void RenderingDeviceGraph::finalize() {

servers/rendering/rendering_device_graph.h

+2
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,7 @@ class RenderingDeviceGraph {
637637
BarrierGroup barrier_group;
638638
bool driver_honors_barriers : 1;
639639
bool driver_clears_with_copy_engine : 1;
640+
bool driver_buffers_require_transitions : 1;
640641
WorkaroundsState workarounds_state;
641642
TightLocalVector<Frame> frames;
642643
uint32_t frame = 0;
@@ -648,6 +649,7 @@ class RenderingDeviceGraph {
648649
static bool _is_write_usage(ResourceUsage p_usage);
649650
static RDD::TextureLayout _usage_to_image_layout(ResourceUsage p_usage);
650651
static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage);
652+
bool _check_command_intersection(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index) const;
651653
int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index);
652654
void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command);
653655
int32_t _add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);

0 commit comments

Comments
 (0)