Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements from TheForge #99257

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
@@ -2861,6 +2861,9 @@
[b]Note:[/b] Some platforms may restrict the actual value.
</member>
<member name="rendering/rendering_device/vulkan/max_descriptors_per_pool" type="int" setter="" getter="" default="64">
The number of descriptors per pool. Godot's Vulkan backend uses linear pools for descriptors that will be created and destroyed within a single frame. Instead of destroying every single descriptor every frame, they all can be destroyed at once by resetting the pool they belong to.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
The number of descriptors per pool. Godot's Vulkan backend uses linear pools for descriptors that will be created and destroyed within a single frame. Instead of destroying every single descriptor every frame, they all can be destroyed at once by resetting the pool they belong to.
The number of descriptors per pool. The Vulkan rendering driver uses linear pools for descriptors that will be created and destroyed within a single frame. Instead of destroying every single descriptor every frame, they all can be destroyed at once by resetting the pool they belong to.

Try to avoid ambiguous "backend", prefer explicit "renderer/rendering method" or "rendering driver", see #98744.

A larger number is more efficient up to a limit, after that it will only waste RAM (maximum efficiency is achieved when there is no more than 1 pool per frame). A small number could end up with one pool per descriptor, which negatively impacts performance.
[b]Note:[/b] Changing this property requires a restart to take effect.
</member>
<member name="rendering/scaling_3d/fsr_sharpness" type="float" setter="" getter="" default="0.2">
Determines how sharp the upscaled image will be when using the FSR upscaling mode. Sharpness halves with every whole number. Values go from 0.0 (sharpest) to 2.0. Values above 2.0 won't make a visible difference.
28 changes: 26 additions & 2 deletions drivers/d3d12/rendering_device_driver_d3d12.cpp
Original file line number Diff line number Diff line change
@@ -2286,6 +2286,10 @@ RDD::CommandPoolID RenderingDeviceDriverD3D12::command_pool_create(CommandQueueF
return CommandPoolID(command_pool);
}

bool RenderingDeviceDriverD3D12::command_pool_reset(CommandPoolID p_cmd_pool) {
return true;
}

void RenderingDeviceDriverD3D12::command_pool_free(CommandPoolID p_cmd_pool) {
CommandPoolInfo *command_pool = (CommandPoolInfo *)(p_cmd_pool.id);
memdelete(command_pool);
@@ -3616,7 +3620,7 @@ Vector<uint8_t> RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec
return ret;
}

RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) {
RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) {
r_shader_desc = {}; // Driver-agnostic.
ShaderInfo shader_info_in; // Driver-specific.

@@ -3825,7 +3829,9 @@ static void _add_descriptor_count_for_uniform(RenderingDevice::UniformType p_typ
}
}

RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) {
RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
// p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.
//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.

Commented out code should have no space


// Pre-bookkeep.
UniformSetInfo *uniform_set_info = VersatileResource::allocate<UniformSetInfo>(resources_allocator);

@@ -5352,6 +5358,13 @@ void RenderingDeviceDriverD3D12::command_bind_render_uniform_set(CommandBufferID
_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, false);
}

void RenderingDeviceDriverD3D12::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
for (uint32_t i = 0u; i < p_set_count; ++i) {
// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, false);
}
}

void RenderingDeviceDriverD3D12::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
_bind_vertex_buffers(cmd_buf_info);
@@ -5856,6 +5869,13 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI
_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, true);
}

void RenderingDeviceDriverD3D12::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
for (uint32_t i = 0u; i < p_set_count; ++i) {
// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.
_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, true);
}
}

void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
if (!barrier_capabilities.enhanced_barriers_supported) {
@@ -6139,6 +6159,10 @@ uint64_t RenderingDeviceDriverD3D12::get_total_memory_used() {
return stats.Total.Stats.BlockBytes;
}

uint64_t RenderingDeviceDriverD3D12::get_lazily_memory_used() {
return 0;
}

uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {
uint64_t safe_unbounded = ((uint64_t)1 << 30);
switch (p_limit) {
9 changes: 7 additions & 2 deletions drivers/d3d12/rendering_device_driver_d3d12.h
Original file line number Diff line number Diff line change
@@ -434,6 +434,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {

public:
virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final;
virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override final;
virtual void command_pool_free(CommandPoolID p_cmd_pool) override final;

// ----- BUFFER -----
@@ -697,7 +698,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
public:
virtual String shader_get_binary_cache_key() override final;
virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final;
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final;
virtual void shader_free(ShaderID p_shader) override final;
virtual void shader_destroy_modules(ShaderID p_shader) override final;
@@ -747,7 +748,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
};

public:
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) override final;
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) override final;
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;

// ----- COMMANDS -----
@@ -757,6 +758,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
private:
void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer);
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute);
void _command_bind_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, bool p_for_compute);

public:
/******************/
@@ -846,6 +848,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
// Binding.
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;

// Drawing.
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
@@ -893,6 +896,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
// Binding.
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;

// Dispatching.
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
@@ -986,6 +990,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final;
virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final;
virtual uint64_t get_total_memory_used() override final;
virtual uint64_t get_lazily_memory_used() override final;
virtual uint64_t limit_get(Limit p_limit) override final;
virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
virtual bool has_feature(Features p_feature) override final;
2 changes: 2 additions & 0 deletions drivers/metal/metal_objects.h
Original file line number Diff line number Diff line change
@@ -502,6 +502,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer {
#pragma mark - Render Commands

void render_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects);
void render_set_viewport(VectorView<Rect2i> p_viewports);
void render_set_scissor(VectorView<Rect2i> p_scissors);
@@ -535,6 +536,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer {
#pragma mark - Compute Commands

void compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index);
void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count);
void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);

54 changes: 54 additions & 0 deletions drivers/metal/metal_objects.mm
Original file line number Diff line number Diff line change
@@ -223,6 +223,26 @@
}
}

void MDCommandBuffer::render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);

for (size_t i = 0u; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
if (render.uniform_sets.size() <= set->index) {
uint32_t s = render.uniform_sets.size();
render.uniform_sets.resize(set->index + 1);
// Set intermediate values to null.
std::fill(&render.uniform_sets[s], &render.uniform_sets[set->index] + 1, nullptr);
}

if (render.uniform_sets[set->index] != set) {
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
render.uniform_set_mask |= 1ULL << set->index;
render.uniform_sets[set->index] = set;
}
}
}

void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);

@@ -964,6 +984,40 @@
}
}

void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);

id<MTLComputeCommandEncoder> enc = compute.encoder;
id<MTLDevice> device = enc.device;

MDShader *shader = (MDShader *)(p_shader.id);

thread_local LocalVector<__unsafe_unretained id<MTLBuffer>> buffers;
thread_local LocalVector<NSUInteger> offsets;

buffers.resize(p_set_count);
offsets.resize(p_set_count);

for (size_t i = 0u; i < p_set_count; ++i) {
UniformSet const &set_info = shader->sets[p_first_set_index + i];

MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
bus.merge_into(compute.resource_usage);

uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
if (offset) {
buffers[i] = bus.buffer;
offsets[i] = *offset;
} else {
buffers[i] = nullptr;
offsets[i] = 0u;
}
}

[enc setBuffers:buffers.ptr() offsets:offsets.ptr() withRange:NSMakeRange(p_first_set_index, p_set_count)];
}

void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);

8 changes: 6 additions & 2 deletions drivers/metal/rendering_device_driver_metal.h
Original file line number Diff line number Diff line change
@@ -185,6 +185,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
// ----- POOL -----

virtual CommandPoolID command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) override final;
virtual bool command_pool_reset(CommandPoolID p_cmd_pool) override final;
virtual void command_pool_free(CommandPoolID p_cmd_pool) override final;

// ----- BUFFER -----
@@ -251,14 +252,14 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
public:
virtual String shader_get_binary_cache_key() override final;
virtual Vector<uint8_t> shader_compile_binary_from_spirv(VectorView<ShaderStageSPIRVData> p_spirv, const String &p_shader_name) override final;
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name) override final;
virtual ShaderID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector<ImmutableSampler> &p_immutable_samplers) override final;
virtual void shader_free(ShaderID p_shader) override final;
virtual void shader_destroy_modules(ShaderID p_shader) override final;

#pragma mark - Uniform Set

public:
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index) override final;
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) override final;
virtual void uniform_set_free(UniformSetID p_uniform_set) override final;

#pragma mark - Commands
@@ -331,6 +332,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
// Binding.
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;

// Drawing.
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) override final;
@@ -371,6 +373,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
// Binding.
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) override final;
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final;
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) override final;

// Dispatching.
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final;
@@ -413,6 +416,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public
virtual void set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) override final;
virtual uint64_t get_resource_native_handle(DriverResource p_type, ID p_driver_id) override final;
virtual uint64_t get_total_memory_used() override final;
virtual uint64_t get_lazily_memory_used() override final;
virtual uint64_t limit_get(Limit p_limit) override final;
virtual uint64_t api_trait_get(ApiTrait p_trait) override final;
virtual bool has_feature(Features p_feature) override final;
Loading