From 6dce0bf202267616fc63c3b818fc9439def925a5 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 28 May 2021 17:17:01 +0200 Subject: Vectorize downsample for audio mixdown This results in 3K cycles less per sound_mix call --- src/uxn/devices/apu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/uxn/devices/apu.c b/src/uxn/devices/apu.c index d968840..0b502f8 100644 --- a/src/uxn/devices/apu.c +++ b/src/uxn/devices/apu.c @@ -183,7 +183,7 @@ void sound_mix() { u32 fill = 0; dma_fill(mix_buffer, fill, sizeof(mix_buffer), 3); - // Mix channels into the temporary buffer. + // Mix channels into the temporary buffer. for (size_t j = 0; j < POLYPHONY; ++j) { AudioChannel *ch = &channels[j]; // Check if channel is active. @@ -203,7 +203,7 @@ void sound_mix() { // stopping. for(size_t i = 0; i < AUDIO_BUF_LEN; i++) { // Remember we are using fixed point values. - mix_buffer[i] += (0x80 + (u8)ch->data[ch->pos >> 12]) * vol; + mix_buffer[i] += (0x80 ^ ch->data[ch->pos >> 12]) * vol; ch->pos += ch->inc; if (ch->pos >= ch->length) { @@ -223,16 +223,20 @@ void sound_mix() { // Sample still have room to go, no need to check for looping or // end of sample. for(size_t i = 0; i < AUDIO_BUF_LEN; i++) { - mix_buffer[i] += (0x80 + (u8)ch->data[ch->pos>>12]) * ch->vol; + mix_buffer[i] += (0x80 ^ ch->data[ch->pos>>12]) * vol; ch->pos += ch->inc; } } } - // Downsample and copy to the playing buffer. - for (size_t i = 0; i < AUDIO_BUF_LEN; ++i) { - // >> 6 to divide off the volume, >> 2 to divide by 4 channels to - // prevent overflow. - audio.current_buffer[i] = mix_buffer[i] >> 8; + // Downsample and copy to the playing buffer (Vectorized). + u64 *mix = mix_buffer; + u32 *buf = audio.current_buffer; + for (size_t i = 0, k = 0; i < AUDIO_BUF_LEN; i += 4, k++) { + u64 x = mix[k]; + buf[k] = (x >> 8) & 0xFF + | (x >> 16) & 0xFF00 + | (x >> 24) & 0xFF0000 + | (x >> 32) & 0xFF000000; } } -- cgit v1.2.1