Vectorize downsample for audio mixdown

This results in 3K cycles less per sound_mix call
author: Bad Diode <bd@badd10de.dev> 2021-05-28 17:17:01 +0200
committer: Bad Diode <bd@badd10de.dev> 2021-05-28 17:17:01 +0200
commit: 6dce0bf202267616fc63c3b818fc9439def925a5 (patch)
tree: 432fea18e1352d4e97de0b7e87c24364addfb549 /src
parent: bc7f470714032c9e97798a5e532517c3d01adeef (diff)
download: uxngba-6dce0bf202267616fc63c3b818fc9439def925a5.tar.gz
uxngba-6dce0bf202267616fc63c3b818fc9439def925a5.zip
1 files changed, 12 insertions, 8 deletions
diff --git a/src/uxn/devices/apu.c b/src/uxn/devices/apu.c
index d968840..0b502f8 100644
--- a/src/uxn/devices/apu.c
+++ b/src/uxn/devices/apu.c
@@ -183,7 +183,7 @@ void sound_mix() {
    u32 fill = 0;
    dma_fill(mix_buffer, fill, sizeof(mix_buffer), 3);
-    //  Mix channels into the temporary buffer.
+    // Mix channels into the temporary buffer.
    for (size_t j = 0; j < POLYPHONY; ++j) {
        AudioChannel *ch = &channels[j];
        // Check if channel is active.
@@ -203,7 +203,7 @@ void sound_mix() {
            // stopping.
            for(size_t i = 0; i < AUDIO_BUF_LEN; i++) {
                // Remember we are using fixed point values.
-                mix_buffer[i] += (0x80 + (u8)ch->data[ch->pos >> 12]) * vol;
+                mix_buffer[i] += (0x80 ^ ch->data[ch->pos >> 12]) * vol;
                ch->pos += ch->inc;
                if (ch->pos >= ch->length) {
@@ -223,16 +223,20 @@ void sound_mix() {
            // Sample still have room to go, no need to check for looping or
            // end of sample.
            for(size_t i = 0; i < AUDIO_BUF_LEN; i++) {
-                mix_buffer[i] +=  (0x80 + (u8)ch->data[ch->pos>>12]) * ch->vol;
+                mix_buffer[i] +=  (0x80 ^ ch->data[ch->pos>>12]) * vol;
                ch->pos += ch->inc;
            }
        }
    }
-    // Downsample and copy to the playing buffer.
+    // Downsample and copy to the playing buffer (Vectorized).
-    for (size_t i = 0; i < AUDIO_BUF_LEN; ++i) {
+    u64 *mix = mix_buffer;
-        // >> 6 to divide off the volume, >> 2 to divide by 4 channels to
+    u32 *buf = audio.current_buffer;
-        // prevent overflow.
+    for (size_t i = 0, k = 0; i < AUDIO_BUF_LEN; i += 4, k++) {
-        audio.current_buffer[i] = mix_buffer[i] >> 8;
+        u64 x = mix[k];
+        buf[k] = (x >> 8) & 0xFF
+            | (x >> 16) & 0xFF00
+            | (x >> 24) & 0xFF0000
+            | (x >> 32) & 0xFF000000;
    }
 }
author	Bad Diode <bd@badd10de.dev>	2021-05-28 17:17:01 +0200
committer	Bad Diode <bd@badd10de.dev>	2021-05-28 17:17:01 +0200
commit	6dce0bf202267616fc63c3b818fc9439def925a5 (patch)
tree	432fea18e1352d4e97de0b7e87c24364addfb549 /src
parent	bc7f470714032c9e97798a5e532517c3d01adeef (diff)
download	uxngba-6dce0bf202267616fc63c3b818fc9439def925a5.tar.gz uxngba-6dce0bf202267616fc63c3b818fc9439def925a5.zip