Skip to content

Commit eb669f5

Browse files
authored
Fix storage padding and re-enable buffer clean-up (#324)
Fix the padding of all storage buffers to comply with the GPU device requirements, which on most Desktop platforms is 32 bytes, but can be different (for example on macOS M1 and later, it's generally 256 bytes). This fixes a number of issues where effects are not rendering at all, or display strong artifacts. The change is likely over-doing it by padding all structures used in arrays, however this can be optimized as a later pass. Fix a bug in the `CloneModifier` where the shader code was allocating cloned particles without checking the buffer capacity, leading to underflow error and stomping over other parts of the buffer. Re-enable buffer cleaning up on effect deletion, which was commented out during the ribbon change but not fixed, and left disabled. This caused the buffers to fill, leaving no space for new effects even after some effects were deleting, which resulted in newly spawned effects not updating (code was dispatching 0 workground in update pass). Fixes #322
1 parent 2da6d8d commit eb669f5

File tree

7 files changed

+356
-96
lines changed

7 files changed

+356
-96
lines changed

.github/workflows/ci.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,19 @@ jobs:
147147
for example in .github/example-run/3d/*.ron; do
148148
example_name=`basename $example .ron`
149149
echo "running $example_name - "`date`
150-
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr 3d bevy/bevy_ci_testing"
150+
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font 3d bevy/bevy_ci_testing"
151151
sleep 10
152152
done
153153
for example in .github/example-run/3dpng/*.ron; do
154154
example_name=`basename $example .ron`
155155
echo "running $example_name - "`date`
156-
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/png 3d bevy/bevy_ci_testing"
156+
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font bevy/png 3d bevy/bevy_ci_testing"
157157
sleep 10
158158
done
159159
for example in .github/example-run/2d/*.ron; do
160160
example_name=`basename $example .ron`
161161
echo "running $example_name - "`date`
162-
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite 2d bevy/bevy_ci_testing"
162+
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite bevy/bevy_ui bevy/default_font 2d bevy/bevy_ci_testing"
163163
sleep 10
164164
done
165165
env:
@@ -234,7 +234,7 @@ jobs:
234234
# if: runner.os == 'linux'
235235
- name: Install cargo-tarpaulin
236236
run: |
237-
RUST_BACKTRACE=1 cargo install --version 0.22.0 cargo-tarpaulin
237+
RUST_BACKTRACE=1 cargo install --version 0.30.0 cargo-tarpaulin
238238
- name: Generate code coverage
239239
run: |
240240
RUST_BACKTRACE=1 cargo tarpaulin --engine llvm --verbose --timeout 120 --out Lcov --workspace --all-features

src/modifier/clone.rs

+6
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ impl Modifier for CloneModifier {
100100
101101
// Recycle a dead particle.
102102
let dead_index = atomicSub(&render_group_indirect[{dest}u].dead_count, 1u) - 1u;
103+
// HACK - we have no limiter for dead_count, so could go negative (wrap around).
104+
// Assume that any value above 2^31 is a wrap around, undo the atomic op and return.
105+
if (dead_index >= 0xF0000000) {{
106+
atomicAdd(&render_group_indirect[{dest}u].dead_count, 1u);
107+
return;
108+
}}
103109
let new_index = indirect_buffer.indices[3u * (base_index + dead_index) + 2u];
104110
105111
// Initialize the new particle.

src/plugin.rs

+24-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ use crate::{
2222
render::{
2323
extract_effect_events, extract_effects, prepare_bind_groups, prepare_effects,
2424
prepare_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, EffectAssetEvents,
25-
EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuSpawnerParams,
25+
EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuDispatchIndirect,
26+
GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams,
2627
ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, ShaderCache,
2728
SimParams, VfxSimulateDriverNode, VfxSimulateNode,
2829
},
@@ -129,8 +130,29 @@ impl HanabiPlugin {
129130
pub(crate) fn make_common_shader(min_storage_buffer_offset_alignment: usize) -> Shader {
130131
let spawner_padding_code =
131132
GpuSpawnerParams::padding_code(min_storage_buffer_offset_alignment);
133+
let dispatch_indirect_padding_code =
134+
GpuDispatchIndirect::padding_code(min_storage_buffer_offset_alignment);
135+
let render_effect_indirect_padding_code =
136+
GpuRenderEffectMetadata::padding_code(min_storage_buffer_offset_alignment);
137+
let render_group_indirect_padding_code =
138+
GpuRenderGroupIndirect::padding_code(min_storage_buffer_offset_alignment);
139+
let particle_group_padding_code =
140+
GpuParticleGroup::padding_code(min_storage_buffer_offset_alignment);
132141
let common_code = include_str!("render/vfx_common.wgsl")
133-
.replace("{{SPAWNER_PADDING}}", &spawner_padding_code);
142+
.replace("{{SPAWNER_PADDING}}", &spawner_padding_code)
143+
.replace(
144+
"{{DISPATCH_INDIRECT_PADDING}}",
145+
&dispatch_indirect_padding_code,
146+
)
147+
.replace(
148+
"{{RENDER_EFFECT_INDIRECT_PADDING}}",
149+
&render_effect_indirect_padding_code,
150+
)
151+
.replace(
152+
"{{RENDER_GROUP_INDIRECT_PADDING}}",
153+
&render_group_indirect_padding_code,
154+
)
155+
.replace("{{PARTICLE_GROUP_PADDING}}", &particle_group_padding_code);
134156
Shader::from_wgsl(
135157
common_code,
136158
std::path::Path::new(file!())

src/render/buffer_table.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ impl<T: Pod + ShaderSize> BufferTable<T> {
167167
item_size
168168
};
169169
trace!(
170-
"BufferTable: item_size={} aligned_size={}",
170+
"BufferTable[\"{}\"]: item_size={} aligned_size={}",
171+
label.as_ref().unwrap_or(&String::new()),
171172
item_size,
172173
aligned_size
173174
);

src/render/effect_cache.rs

+26-10
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,12 @@ impl PartialOrd for EffectSlices {
5757
/// Describes all particle groups' slices of particles in the particle buffer
5858
/// for a single effect, as well as the [`DispatchBufferIndices`].
5959
pub struct SlicesRef {
60-
ranges: Vec<u32>,
60+
pub ranges: Vec<u32>,
6161
/// Size of a single item in the slice. Currently equal to the unique size
6262
/// of all items in an [`EffectBuffer`] (no mixed size supported in same
6363
/// buffer), so cached only for convenience.
6464
particle_layout: ParticleLayout,
65-
dispatch_buffer_indices: DispatchBufferIndices,
65+
pub dispatch_buffer_indices: DispatchBufferIndices,
6666
}
6767

6868
/// A reference to a slice allocated inside an [`EffectBuffer`].
@@ -239,7 +239,12 @@ impl EffectBuffer {
239239

240240
// TODO - Cache particle_layout and associated bind group layout, instead of
241241
// creating one bind group layout per buffer using that layout...
242+
let particle_group_size = NonZeroU64::new(GpuParticleGroup::aligned_size(
243+
render_device.limits().min_storage_buffer_offset_alignment as usize,
244+
) as u64)
245+
.unwrap();
242246
let mut entries = vec![
247+
// @binding(0) var<storage, read_write> particle_buffer : ParticleBuffer
243248
BindGroupLayoutEntry {
244249
binding: 0,
245250
visibility: ShaderStages::COMPUTE,
@@ -250,6 +255,7 @@ impl EffectBuffer {
250255
},
251256
count: None,
252257
},
258+
// @binding(1) var<storage, read_write> indirect_buffer : IndirectBuffer
253259
BindGroupLayoutEntry {
254260
binding: 1,
255261
visibility: ShaderStages::COMPUTE,
@@ -260,13 +266,16 @@ impl EffectBuffer {
260266
},
261267
count: None,
262268
},
269+
// @binding(2) var<storage, read> particle_groups : array<ParticleGroup>
263270
BindGroupLayoutEntry {
264271
binding: 2,
265272
visibility: ShaderStages::COMPUTE,
266273
ty: BindingType::Buffer {
267274
ty: BufferBindingType::Storage { read_only: true },
268275
has_dynamic_offset: false,
269-
min_binding_size: Some(GpuParticleGroup::min_size()),
276+
// Despite no dynamic offset, we do bind a non-zero offset sometimes,
277+
// so keep this aligned
278+
min_binding_size: Some(particle_group_size),
270279
},
271280
count: None,
272281
},
@@ -292,6 +301,10 @@ impl EffectBuffer {
292301
let particles_buffer_layout_sim = render_device.create_bind_group_layout(label, &entries);
293302

294303
// Create the render layout.
304+
let dispatch_indirect_size = NonZeroU64::new(GpuDispatchIndirect::aligned_size(
305+
render_device.limits().min_storage_buffer_offset_alignment as usize,
306+
) as u64)
307+
.unwrap();
295308
let mut entries = vec![
296309
BindGroupLayoutEntry {
297310
binding: 0,
@@ -319,7 +332,7 @@ impl EffectBuffer {
319332
ty: BindingType::Buffer {
320333
ty: BufferBindingType::Storage { read_only: true },
321334
has_dynamic_offset: true,
322-
min_binding_size: Some(GpuDispatchIndirect::min_size()),
335+
min_binding_size: Some(dispatch_indirect_size),
323336
},
324337
count: None,
325338
},
@@ -785,10 +798,12 @@ impl EffectCache {
785798
let id = EffectCacheId::new();
786799

787800
let mut ranges = vec![slice.range.start];
801+
let group_count = capacities.len();
788802
for capacity in capacities {
789803
let start_index = ranges.last().unwrap();
790804
ranges.push(start_index + capacity);
791805
}
806+
debug_assert_eq!(ranges.len(), group_count + 1);
792807

793808
let slices = SlicesRef {
794809
ranges,
@@ -858,20 +873,22 @@ impl EffectCache {
858873

859874
/// Remove an effect from the cache. If this was the last effect, drop the
860875
/// underlying buffer and return the index of the dropped buffer.
861-
pub fn remove(&mut self, id: EffectCacheId) -> Option<u32> {
876+
pub fn remove(&mut self, id: EffectCacheId) -> Option<CachedEffectIndices> {
862877
let indices = self.effects.remove(&id)?;
863878
let &mut Some(ref mut buffer) = &mut self.buffers[indices.buffer_index as usize] else {
864879
return None;
865880
};
866881

867882
let slice = SliceRef {
868883
range: indices.slices.ranges[0]..*indices.slices.ranges.last().unwrap(),
869-
particle_layout: indices.slices.particle_layout,
884+
// FIXME: clone() needed to return CachedEffectIndices, but really we don't care about
885+
// returning the ParticleLayout, so should split...
886+
particle_layout: indices.slices.particle_layout.clone(),
870887
};
871888

872889
if buffer.free_slice(slice) == BufferState::Free {
873890
self.buffers[indices.buffer_index as usize] = None;
874-
return Some(indices.buffer_index);
891+
return Some(indices);
875892
}
876893

877894
None
@@ -1166,9 +1183,8 @@ mod gpu_tests {
11661183
assert_eq!(slice2.slices, vec![0, capacity]);
11671184
assert_eq!(effect_cache.buffers().len(), 2);
11681185

1169-
let buffer_index = effect_cache.remove(id1);
1170-
assert!(buffer_index.is_some());
1171-
assert_eq!(buffer_index.unwrap(), 0);
1186+
let cached_effect_indices = effect_cache.remove(id1).unwrap();
1187+
assert_eq!(cached_effect_indices.buffer_index, 0);
11721188
assert_eq!(effect_cache.buffers().len(), 2);
11731189
{
11741190
let buffers = effect_cache.buffers();

0 commit comments

Comments
 (0)