Skip to content

Commit e55ceeb

Browse files
Add some more comments to AudioServer and remove playback fade-in
Co-authored-by: Ellen Poe <ellen@ellenhp.me>
1 parent 92e51fc commit e55ceeb

File tree

2 files changed

+54
-27
lines changed

2 files changed

+54
-27
lines changed

servers/audio_server.cpp

+46-27
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,14 @@ void AudioServer::_mix_step() {
371371
bus->soloed = false;
372372
}
373373
}
374+
// This is legacy code from 3.x that allows video players and other audio sources that do not implement AudioStreamPlayback to output audio.
374375
for (CallbackItem *ci : mix_callback_list) {
375376
ci->callback(ci->userdata);
376377
}
377378

379+
// Main mixing loop for audio streams.
380+
// The basic idea here is to copy the samples returned by the AudioStreamPlayback's mix function into the audio buffers,
381+
// while always maintaining a lookahead buffer of size LOOKAHEAD_BUFFER_SIZE to allow fade-outs for sudden stoppages.
378382
for (AudioStreamPlaybackListNode *playback : playback_list) {
379383
// Paused streams are no-ops. Don't even mix audio from the stream playback.
380384
if (playback->state.load() == AudioStreamPlaybackListNode::PAUSED) {
@@ -385,22 +389,26 @@ void AudioServer::_mix_step() {
385389
continue;
386390
}
387391

392+
// If `fading_out` is true, we're in the process of fading out the stream playback.
393+
// TODO: Currently this sets the volume of the stream to 0 which creates a linear interpolation between its previous volume and silence.
394+
// A more punchy option for fading out could be to just use the lookahead buffer.
388395
bool fading_out = playback->state.load() == AudioStreamPlaybackListNode::FADE_OUT_TO_DELETION || playback->state.load() == AudioStreamPlaybackListNode::FADE_OUT_TO_PAUSE;
389396

390397
AudioFrame *buf = mix_buffer.ptrw();
391398

392-
// Copy the lookeahead buffer into the mix buffer.
399+
// Copy the old contents of the lookahead buffer into the beginning of the mix buffer.
393400
for (int i = 0; i < LOOKAHEAD_BUFFER_SIZE; i++) {
394401
buf[i] = playback->lookahead[i];
395402
}
396403

397-
// Mix the audio stream
404+
// Mix the audio stream.
398405
unsigned int mixed_frames = playback->stream_playback->mix(&buf[LOOKAHEAD_BUFFER_SIZE], playback->pitch_scale.get(), buffer_size);
399406

400407
if (tag_used_audio_streams && playback->stream_playback->is_playing()) {
401408
playback->stream_playback->tag_used_streams();
402409
}
403410

411+
// Check to see if the stream has run out of samples.
404412
if (mixed_frames != buffer_size) {
405413
// We know we have at least the size of our lookahead buffer for fade-out purposes.
406414

@@ -416,42 +424,52 @@ void AudioServer::_mix_step() {
416424
new_state = AudioStreamPlaybackListNode::AWAITING_DELETION;
417425
playback->state.store(new_state);
418426
} else {
419-
// Move the last little bit of what we just mixed into our lookahead buffer.
427+
// Move the last little bit of what we just mixed into our lookahead buffer for the next call to _mix_step.
420428
for (int i = 0; i < LOOKAHEAD_BUFFER_SIZE; i++) {
421429
playback->lookahead[i] = buf[buffer_size + i];
422430
}
423431
}
424432

425-
AudioStreamPlaybackBusDetails *ptr = playback->bus_details.load();
426-
ERR_FAIL_NULL(ptr);
427-
// By putting null into the bus details pointers, we're taking ownership of their memory for the duration of this mix.
428-
AudioStreamPlaybackBusDetails bus_details = *ptr;
433+
// Get the bus details for this playback. This contains information about which buses the playback is assigned to and the volume of the playback on each bus.
434+
AudioStreamPlaybackBusDetails *bus_details_ptr = playback->bus_details.load();
435+
ERR_FAIL_NULL(bus_details_ptr);
436+
// Make a copy of the bus details so we can modify it without worrying about other threads.
437+
AudioStreamPlaybackBusDetails bus_details = *bus_details_ptr;
429438

430439
// Mix to any active buses.
431440
for (int idx = 0; idx < MAX_BUSES_PER_PLAYBACK; idx++) {
432441
if (!bus_details.bus_active[idx]) {
433442
continue;
434443
}
444+
// This is the AudioServer-internal index of the bus we're mixing to in this step of the loop. Not to be confused with `idx` which is an index into `AudioStreamPlaybackBusDetails` member var arrays.
435445
int bus_idx = thread_find_bus_index(bus_details.bus[idx]);
436446

447+
// It's important to know whether or not this bus was active in the previous mix step of this stream. If it was, we need to perform volume interpolation to avoid pops.
437448
int prev_bus_idx = -1;
438449
for (int search_idx = 0; search_idx < MAX_BUSES_PER_PLAYBACK; search_idx++) {
439450
if (!playback->prev_bus_details->bus_active[search_idx]) {
440451
continue;
441452
}
453+
// If the StringNames of the buses match, we've found the previous bus index. This indicates that this playback mixed to `prev_bus_details->bus[prev_bus_index]` in the previous mix step, which gives us a way to look up the playback's previous volume.
442454
if (playback->prev_bus_details->bus[search_idx].hash() == bus_details.bus[idx].hash()) {
443455
prev_bus_idx = search_idx;
456+
break;
444457
}
445458
}
446459

460+
// It's now time to mix to the bus. We do this by going through each channel of the bus and mixing to it.
461+
// The channels correspond to output channels of the audio device, e.g. stereo or 5.1. To reduce needless nesting, this is done with a helper method named `_mix_step_for_channel`.
447462
for (int channel_idx = 0; channel_idx < channel_count; channel_idx++) {
448463
AudioFrame *channel_buf = thread_get_channel_mix_buffer(bus_idx, channel_idx);
464+
// TODO: This `fading_out` check could be replaced with with an exponential fadeout of the samples from the lookahead buffer for more punchy results.
449465
if (fading_out) {
450466
bus_details.volume[idx][channel_idx] = AudioFrame(0, 0);
451467
}
452468
AudioFrame channel_vol = bus_details.volume[idx][channel_idx];
453469

454-
AudioFrame prev_channel_vol = AudioFrame(0, 0);
470+
// If this bus was not active in the previous mix step, we want to start playback at the full volume to avoid crushing transients.
471+
AudioFrame prev_channel_vol = channel_vol;
472+
// If this bus was active in the previous mix step, we need to interpolate between the previous volume and the current volume to avoid pops. Set `prev_channel_volume` accordingly.
455473
if (prev_bus_idx != -1) {
456474
prev_channel_vol = playback->prev_bus_details->volume[prev_bus_idx][channel_idx];
457475
}
@@ -480,7 +498,7 @@ void AudioServer::_mix_step() {
480498
for (int channel_idx = 0; channel_idx < channel_count; channel_idx++) {
481499
AudioFrame *channel_buf = thread_get_channel_mix_buffer(bus_idx, channel_idx);
482500
AudioFrame prev_channel_vol = playback->prev_bus_details->volume[idx][channel_idx];
483-
// Fade out to silence
501+
// Fade out to silence. This could be replaced with an exponential fadeout of the samples from the lookahead buffer for more punchy results.
484502
_mix_step_for_channel(channel_buf, buf, prev_channel_vol, AudioFrame(0, 0), playback->attenuation_filter_cutoff_hz.get(), playback->highshelf_gain.get(), &playback->filter_process[channel_idx * 2], &playback->filter_process[channel_idx * 2 + 1]);
485503
}
486504
}
@@ -501,15 +519,12 @@ void AudioServer::_mix_step() {
501519
switch (playback->state.load()) {
502520
case AudioStreamPlaybackListNode::AWAITING_DELETION:
503521
case AudioStreamPlaybackListNode::FADE_OUT_TO_DELETION:
522+
// Remove the playback from the list.
504523
_delete_stream_playback_list_node(playback);
505524
break;
506525
case AudioStreamPlaybackListNode::FADE_OUT_TO_PAUSE: {
507526
// Pause the stream.
508-
AudioStreamPlaybackListNode::PlaybackState old_state, new_state;
509-
do {
510-
old_state = playback->state.load();
511-
new_state = AudioStreamPlaybackListNode::PAUSED;
512-
} while (!playback->state.compare_exchange_strong(/* expected= */ old_state, new_state));
527+
playback->state.store(AudioStreamPlaybackListNode::PAUSED);
513528
} break;
514529
case AudioStreamPlaybackListNode::PLAYING:
515530
case AudioStreamPlaybackListNode::PAUSED:
@@ -518,13 +533,13 @@ void AudioServer::_mix_step() {
518533
}
519534
}
520535

536+
// Now that all of the buses have their audio sources mixed into them, we can process the effects and bus sends.
521537
for (int i = buses.size() - 1; i >= 0; i--) {
522-
//go bus by bus
523538
Bus *bus = buses[i];
524539

525540
for (int k = 0; k < bus->channels.size(); k++) {
526541
if (bus->channels[k].active && !bus->channels[k].used) {
527-
//buffer was not used, but it's still active, so it must be cleaned
542+
// Buffer was not used, but it's still active, so it must be cleaned.
528543
AudioFrame *buf = bus->channels.write[k].buffer.ptrw();
529544

530545
for (uint32_t j = 0; j < buffer_size; j++) {
@@ -533,7 +548,7 @@ void AudioServer::_mix_step() {
533548
}
534549
}
535550

536-
//process effects
551+
// Process effects.
537552
if (!bus->bypass) {
538553
for (int j = 0; j < bus->effects.size(); j++) {
539554
if (!bus->effects[j].enabled) {
@@ -551,7 +566,7 @@ void AudioServer::_mix_step() {
551566
bus->channels.write[k].effect_instances.write[j]->process(bus->channels[k].buffer.ptr(), temp_buffer.write[k].ptrw(), buffer_size);
552567
}
553568

554-
//swap buffers, so internal buffer always has the right data
569+
// Swap buffers, so internal buffer always has the right data.
555570
for (int k = 0; k < bus->channels.size(); k++) {
556571
if (!(buses[i]->channels[k].active || bus->channels[k].effect_instances[j]->process_silence())) {
557572
continue;
@@ -565,17 +580,17 @@ void AudioServer::_mix_step() {
565580
}
566581
}
567582

568-
//process send
583+
// Process send.
569584

570585
Bus *send = nullptr;
571586

572587
if (i > 0) {
573-
//everything has a send save for master bus
588+
// Everything has a send except for the master bus.
574589
if (!bus_map.has(bus->send)) {
575590
send = buses[0];
576591
} else {
577592
send = bus_map[bus->send];
578-
if (send->index_cache >= bus->index_cache) { //invalid, send to master
593+
if (send->index_cache >= bus->index_cache) { // Invalid, send to master.
579594
send = buses[0];
580595
}
581596
}
@@ -603,7 +618,7 @@ void AudioServer::_mix_step() {
603618
}
604619
}
605620

606-
//apply volume and compute peak
621+
// Apply volume and compute peak.
607622
for (uint32_t j = 0; j < buffer_size; j++) {
608623
buf[j] *= volume;
609624

@@ -620,7 +635,7 @@ void AudioServer::_mix_step() {
620635
bus->channels.write[k].peak_volume = AudioFrame(Math::linear_to_db(peak.left + AUDIO_PEAK_OFFSET), Math::linear_to_db(peak.right + AUDIO_PEAK_OFFSET));
621636

622637
if (!bus->channels[k].used) {
623-
//see if any audio is contained, because channel was not used
638+
// See if any audio is contained, because channel was not used.
624639

625640
if (MAX(peak.right, peak.left) > Math::db_to_linear(channel_disable_threshold_db)) {
626641
bus->channels.write[k].last_mix_with_audio = mix_frames;
@@ -631,7 +646,7 @@ void AudioServer::_mix_step() {
631646
}
632647

633648
if (send) {
634-
//if not master bus, send
649+
// If not master bus, send.
635650
AudioFrame *target_buf = thread_get_channel_mix_buffer(send->index_cache, k);
636651

637652
for (uint32_t j = 0; j < buffer_size; j++) {
@@ -646,6 +661,7 @@ void AudioServer::_mix_step() {
646661
}
647662

648663
void AudioServer::_mix_step_for_channel(AudioFrame *p_out_buf, AudioFrame *p_source_buf, AudioFrame p_vol_start, AudioFrame p_vol_final, float p_attenuation_filter_cutoff_hz, float p_highshelf_gain, AudioFilterSW::Processor *p_processor_l, AudioFilterSW::Processor *p_processor_r) {
664+
// TODO: In the future it could be nice to replace all of these hardcoded effects with something a bit cleaner and more flexible, but for now this is what we do to support 3D audio players.
649665
if (p_highshelf_gain != 0) {
650666
AudioFilterSW filter;
651667
filter.set_mode(AudioFilterSW::HIGHSHELF);
@@ -665,7 +681,7 @@ void AudioServer::_mix_step_for_channel(AudioFrame *p_out_buf, AudioFrame *p_sou
665681
p_processor_r->update_coeffs(buffer_size);
666682

667683
for (unsigned int frame_idx = 0; frame_idx < buffer_size; frame_idx++) {
668-
// Make this buffer size invariant if buffer_size ever becomes a project setting.
684+
// TODO: Make lerp speed buffer-size-invariant if buffer_size ever becomes a project setting to avoid very small buffer sizes causing pops due to too-fast lerps.
669685
float lerp_param = (float)frame_idx / buffer_size;
670686
AudioFrame vol = p_vol_final * lerp_param + (1 - lerp_param) * p_vol_start;
671687
AudioFrame mixed = vol * p_source_buf[frame_idx];
@@ -676,7 +692,7 @@ void AudioServer::_mix_step_for_channel(AudioFrame *p_out_buf, AudioFrame *p_sou
676692

677693
} else {
678694
for (unsigned int frame_idx = 0; frame_idx < buffer_size; frame_idx++) {
679-
// Make this buffer size invariant if buffer_size ever becomes a project setting.
695+
// TODO: Make lerp speed buffer-size-invariant if buffer_size ever becomes a project setting to avoid very small buffer sizes causing pops due to too-fast lerps.
680696
float lerp_param = (float)frame_idx / buffer_size;
681697
p_out_buf[frame_idx] += (p_vol_final * lerp_param + (1 - lerp_param) * p_vol_start) * p_source_buf[frame_idx];
682698
}
@@ -701,6 +717,7 @@ void AudioServer::_delete_stream_playback(Ref<AudioStreamPlayback> p_playback) {
701717
}
702718

703719
void AudioServer::_delete_stream_playback_list_node(AudioStreamPlaybackListNode *p_playback_node) {
720+
// Remove the playback from the list, registering a destructor to be run on the main thread.
704721
playback_list.erase(p_playback_node, [](AudioStreamPlaybackListNode *p) {
705722
delete p->prev_bus_details;
706723
delete p->bus_details.load();
@@ -1467,7 +1484,9 @@ void AudioServer::init_channels_and_buffers() {
14671484
void AudioServer::init() {
14681485
channel_disable_threshold_db = GLOBAL_DEF_RST("audio/buses/channel_disable_threshold_db", -60.0);
14691486
channel_disable_frames = float(GLOBAL_DEF_RST(PropertyInfo(Variant::FLOAT, "audio/buses/channel_disable_time", PROPERTY_HINT_RANGE, "0,5,0.01,or_greater"), 2.0)) * get_mix_rate();
1470-
buffer_size = 512; //hardcoded for now
1487+
// TODO: Buffer size is hardcoded for now. This would be really nice to have as a project setting because currently it limits audio latency to an absolute minimum of 11ms with default mix rate, but there's some additional work required to make that happen. See TODOs in `_mix_step_for_channel`.
1488+
// When this becomes a project setting, it should be specified in milliseconds rather than raw sample count, because 512 samples at 192khz is shorter than it is at 48khz, for example.
1489+
buffer_size = 512;
14711490

14721491
init_channels_and_buffers();
14731492

servers/audio_server.h

+8
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,14 @@ class AudioServer : public Object {
270270
};
271271

272272
struct AudioStreamPlaybackListNode {
273+
// The state machine for audio stream playbacks is as follows:
274+
// 1. The playback is created and added to the playback list in the playing state.
275+
// 2. The playback is (maybe) paused, and the state is set to FADE_OUT_TO_PAUSE.
276+
// 2.1. The playback is mixed after being paused, and the audio server thread atomically sets the state to PAUSED after performing a brief fade-out.
277+
// 3. The playback is (maybe) deleted, and the state is set to FADE_OUT_TO_DELETION.
278+
// 3.1. The playback is mixed after being deleted, and the audio server thread atomically sets the state to AWAITING_DELETION after performing a brief fade-out.
279+
// NOTE: The playback is not deallocated at this time because allocation and deallocation are not realtime-safe.
280+
// 4. The playback is removed and deallocated on the main thread using the SafeList maybe_cleanup method.
273281
enum PlaybackState {
274282
PAUSED = 0, // Paused. Keep this stream playback around though so it can be restarted.
275283
PLAYING = 1, // Playing. Fading may still be necessary if volume changes!

0 commit comments

Comments
 (0)