瀏覽代碼

Experimental SP/DIF output on expansion pins of RP2040 boards

saybur 2 年之前
父節點
當前提交
2b6a100fca

+ 74 - 0
lib/BlueSCSI_platform_RP2040/BlueSCSI_platform.cpp

@@ -6,6 +6,8 @@
 #include <assert.h>
 #include <hardware/gpio.h>
 #include <hardware/uart.h>
+#include <hardware/pll.h>
+#include <hardware/clocks.h>
 #include <hardware/spi.h>
 #include <hardware/adc.h>
 #include <hardware/flash.h>
@@ -14,6 +16,7 @@
 #include <platform/mbed_error.h>
 #include <multicore.h>
 #include <USB/PluggableUSBSerial.h>
+#include "audio.h"
 #include "scsi_accel_rp2040.h"
 
 extern "C" {
@@ -43,6 +46,43 @@ static void gpio_conf(uint gpio, enum gpio_function fn, bool pullup, bool pulldo
     }
 }
 
+#ifdef ENABLE_AUDIO_OUTPUT
+// Increases clk_sys and clk_peri to 135.428571MHz at runtime to support
+// division to audio output rates. Invoke before anything is using clk_peri
+// except for the logging UART, which is handled below.
+static void reclock_for_audio() {
+    // ensure UART is fully drained before we mess up its clock
+    uart_tx_wait_blocking(uart0);
+    // switch clk_sys and clk_peri to pll_usb
+    // see code in 2.15.6.1 of the datasheet for useful comments
+    clock_configure(clk_sys,
+            CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX,
+            CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_CLKSRC_PLL_USB,
+            48 * MHZ,
+            48 * MHZ);
+    clock_configure(clk_peri,
+            0,
+            CLOCKS_CLK_PERI_CTRL_AUXSRC_VALUE_CLKSRC_PLL_USB,
+            48 * MHZ,
+            48 * MHZ);
+    // reset PLL for 135.428571MHz
+    pll_init(pll_sys, 1, 948000000, 7, 1);
+    // switch clocks back to pll_sys
+    clock_configure(clk_sys,
+            CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX,
+            CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS,
+            135428571,
+            135428571);
+    clock_configure(clk_peri,
+            0,
+            CLOCKS_CLK_PERI_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS,
+            135428571,
+            135428571);
+    // reset UART for the new clock speed
+    uart_init(uart0, 1000000);
+}
+#endif
+
 void platform_init()
 {
     // Make sure second core is stopped
@@ -87,6 +127,12 @@ void platform_init()
     //     log("NOTE: SCSI termination is disabled");
     // }
 
+#ifdef ENABLE_AUDIO_OUTPUT
+    log("SP/DIF audio to expansion header enabled");
+    log("-- Overclocking to 135.428571MHz");
+    reclock_for_audio();
+#endif
+
     // Get flash chip size
     uint8_t cmd_read_jedec_id[4] = {0x9f, 0, 0, 0};
     uint8_t response_jedec[4] = {0};
@@ -109,10 +155,18 @@ void platform_init()
     // LED pin
     gpio_conf(LED_PIN,        GPIO_FUNC_SIO, false,false, true,  false, false);
 
+#ifndef ENABLE_AUDIO_OUTPUT
+#ifdef GPIO_I2C_SDA
     // I2C pins
     //        pin             function       pup   pdown  out    state fast
     //gpio_conf(GPIO_I2C_SCL,   GPIO_FUNC_I2C, true,false, false,  true, true);
     //gpio_conf(GPIO_I2C_SDA,   GPIO_FUNC_I2C, true,false, false,  true, true);
+#endif
+#else
+    //        pin             function       pup   pdown  out    state fast
+    //gpio_conf(GPIO_EXP_AUDIO, GPIO_FUNC_SPI, true,false, false,  true, true);
+    // configuration of corresponding SPI unit occurs in audio_setup()
+#endif
 }
 
 static bool read_initiator_dip_switch()
@@ -213,6 +267,11 @@ void platform_late_init()
         gpio_conf(SCSI_IN_ACK,    GPIO_FUNC_SIO, false, false, false, true, false);
         gpio_conf(SCSI_IN_ATN,    GPIO_FUNC_SIO, false, false, false, true, false);
         gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+
+#ifdef ENABLE_AUDIO_OUTPUT
+        // one-time control setup for DMA channels and second core
+        audio_setup();
+#endif
     }
     else
     {
@@ -374,6 +433,17 @@ static void adc_poll()
         initialized = true;
     }
 
+#ifdef ENABLE_AUDIO_OUTPUT
+    /*
+    * If ADC sample reads are done, either via direct reading, FIFO, or DMA,
+    * at the same time a SPI DMA write begins, it appears that the first
+    * 16-bit word of the DMA data is lost. This causes the bitstream to glitch
+    * and audio to 'pop' noticably. For now, just disable ADC reads when audio
+    * is playing.
+    */
+   if (audio_is_active()) return;
+#endif
+
     int adc_value_max = 0;
     while (!adc_fifo_is_empty())
     {
@@ -507,6 +577,10 @@ void platform_poll()
 {
     usb_log_poll();
     adc_poll();
+    
+#ifdef ENABLE_AUDIO_OUTPUT
+    audio_poll();
+#endif
 }
 
 /*****************************************/

+ 500 - 0
lib/BlueSCSI_platform_RP2040/audio.cpp

@@ -0,0 +1,500 @@
+/** 
+ * Copyright (C) 2023 saybur
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#ifdef ENABLE_AUDIO_OUTPUT
+
+#include <SdFat.h>
+#include <stdbool.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/spi.h>
+#include <pico/multicore.h>
+#include "audio.h"
+#include "BlueSCSI_config.h"
+#include "BlueSCSI_log.h"
+#include "BlueSCSI_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern SdFs SD;
+
+// Table with the number of '1' bits for each index.
+// Used for SP/DIF parity calculations.
+// Placed in SRAM5 for the second core to use with reduced contention.
+const uint8_t snd_parity[256] __attribute__((aligned(256), section(".scratch_y.snd_parity"))) = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, };
+
+/*
+ * Precomputed biphase-mark patterns for data. For an 8-bit value this has
+ * 16-bits in MSB-first order for the correct high/low transitions to
+ * represent the data, given an output clocking rate twice the bitrate (so the
+ * bits '11' or '00' reflect a zero and '10' or '01' represent a one). Each
+ * value below starts with a '1' and will need to be inverted if the last bit
+ * of the previous mask was also a '1'. These values can be written to an
+ * appropriately configured SPI peripheral to blast biphase data at a
+ * receiver.
+ * 
+ * To facilitate fast lookups this table should be put in SRAM with low
+ * contention, aligned to an apppropriate boundry.
+ */
+const uint16_t biphase[256] __attribute__((aligned(512), section(".scratch_y.biphase"))) = {
+    0xCCCC, 0xB333, 0xD333, 0xACCC, 0xCB33, 0xB4CC, 0xD4CC, 0xAB33,
+    0xCD33, 0xB2CC, 0xD2CC, 0xAD33, 0xCACC, 0xB533, 0xD533, 0xAACC,
+    0xCCB3, 0xB34C, 0xD34C, 0xACB3, 0xCB4C, 0xB4B3, 0xD4B3, 0xAB4C,
+    0xCD4C, 0xB2B3, 0xD2B3, 0xAD4C, 0xCAB3, 0xB54C, 0xD54C, 0xAAB3,
+    0xCCD3, 0xB32C, 0xD32C, 0xACD3, 0xCB2C, 0xB4D3, 0xD4D3, 0xAB2C,
+    0xCD2C, 0xB2D3, 0xD2D3, 0xAD2C, 0xCAD3, 0xB52C, 0xD52C, 0xAAD3,
+    0xCCAC, 0xB353, 0xD353, 0xACAC, 0xCB53, 0xB4AC, 0xD4AC, 0xAB53,
+    0xCD53, 0xB2AC, 0xD2AC, 0xAD53, 0xCAAC, 0xB553, 0xD553, 0xAAAC,
+    0xCCCB, 0xB334, 0xD334, 0xACCB, 0xCB34, 0xB4CB, 0xD4CB, 0xAB34,
+    0xCD34, 0xB2CB, 0xD2CB, 0xAD34, 0xCACB, 0xB534, 0xD534, 0xAACB,
+    0xCCB4, 0xB34B, 0xD34B, 0xACB4, 0xCB4B, 0xB4B4, 0xD4B4, 0xAB4B,
+    0xCD4B, 0xB2B4, 0xD2B4, 0xAD4B, 0xCAB4, 0xB54B, 0xD54B, 0xAAB4,
+    0xCCD4, 0xB32B, 0xD32B, 0xACD4, 0xCB2B, 0xB4D4, 0xD4D4, 0xAB2B,
+    0xCD2B, 0xB2D4, 0xD2D4, 0xAD2B, 0xCAD4, 0xB52B, 0xD52B, 0xAAD4,
+    0xCCAB, 0xB354, 0xD354, 0xACAB, 0xCB54, 0xB4AB, 0xD4AB, 0xAB54,
+    0xCD54, 0xB2AB, 0xD2AB, 0xAD54, 0xCAAB, 0xB554, 0xD554, 0xAAAB,
+    0xCCCD, 0xB332, 0xD332, 0xACCD, 0xCB32, 0xB4CD, 0xD4CD, 0xAB32,
+    0xCD32, 0xB2CD, 0xD2CD, 0xAD32, 0xCACD, 0xB532, 0xD532, 0xAACD,
+    0xCCB2, 0xB34D, 0xD34D, 0xACB2, 0xCB4D, 0xB4B2, 0xD4B2, 0xAB4D,
+    0xCD4D, 0xB2B2, 0xD2B2, 0xAD4D, 0xCAB2, 0xB54D, 0xD54D, 0xAAB2,
+    0xCCD2, 0xB32D, 0xD32D, 0xACD2, 0xCB2D, 0xB4D2, 0xD4D2, 0xAB2D,
+    0xCD2D, 0xB2D2, 0xD2D2, 0xAD2D, 0xCAD2, 0xB52D, 0xD52D, 0xAAD2,
+    0xCCAD, 0xB352, 0xD352, 0xACAD, 0xCB52, 0xB4AD, 0xD4AD, 0xAB52,
+    0xCD52, 0xB2AD, 0xD2AD, 0xAD52, 0xCAAD, 0xB552, 0xD552, 0xAAAD,
+    0xCCCA, 0xB335, 0xD335, 0xACCA, 0xCB35, 0xB4CA, 0xD4CA, 0xAB35,
+    0xCD35, 0xB2CA, 0xD2CA, 0xAD35, 0xCACA, 0xB535, 0xD535, 0xAACA,
+    0xCCB5, 0xB34A, 0xD34A, 0xACB5, 0xCB4A, 0xB4B5, 0xD4B5, 0xAB4A,
+    0xCD4A, 0xB2B5, 0xD2B5, 0xAD4A, 0xCAB5, 0xB54A, 0xD54A, 0xAAB5,
+    0xCCD5, 0xB32A, 0xD32A, 0xACD5, 0xCB2A, 0xB4D5, 0xD4D5, 0xAB2A,
+    0xCD2A, 0xB2D5, 0xD2D5, 0xAD2A, 0xCAD5, 0xB52A, 0xD52A, 0xAAD5,
+    0xCCAA, 0xB355, 0xD355, 0xACAA, 0xCB55, 0xB4AA, 0xD4AA, 0xAB55,
+    0xCD55, 0xB2AA, 0xD2AA, 0xAD55, 0xCAAA, 0xB555, 0xD555, 0xAAAA };
+/*
+ * Biphase frame headers for SP/DIF, including the special bit framing
+ * errors used to detect (sub)frame start conditions. See above table
+ * for details.
+ */
+const uint16_t x_preamble = 0xE2CC;
+const uint16_t y_preamble = 0xE4CC;
+const uint16_t z_preamble = 0xE8CC;
+
+// DMA configuration info
+static dma_channel_config snd_dma_a_cfg;
+static dma_channel_config snd_dma_b_cfg;
+
+// some chonky buffers to store audio samples
+static uint8_t sample_buf_a[AUDIO_BUFFER_SIZE];
+static uint8_t sample_buf_b[AUDIO_BUFFER_SIZE];
+
+// tracking for the state of the above buffers
+enum bufstate { STALE, FILLING, READY };
+static volatile bufstate sbufst_a = STALE;
+static volatile bufstate sbufst_b = STALE;
+enum bufselect { A, B };
+static bufselect sbufsel = A;
+static uint16_t sbufpos = 0;
+static uint8_t sbufswap = 0;
+
+// buffers for storing biphase patterns
+#define SAMPLE_CHUNK_SIZE 1024 // ~5.8ms
+#define WIRE_BUFFER_SIZE (SAMPLE_CHUNK_SIZE * 2)
+static uint16_t wire_buf_a[WIRE_BUFFER_SIZE];
+static uint16_t wire_buf_b[WIRE_BUFFER_SIZE];
+
+// tracking for audio playback
+static bool audio_active = false;
+static volatile bool audio_stopping = false;
+static FsFile audio_file;
+static uint32_t fleft;
+
+// trackers for the below function call
+static uint16_t sfcnt = 0; // sub-frame count; 2 per frame, 192 frames/block
+static uint8_t invert = 0; // biphase encode help: set if last wire bit was '1'
+
+/*
+ * Translates 16-bit stereo sound samples to biphase wire patterns for the
+ * SPI peripheral. Produces 8 patterns (128 bits, or 1 SP/DIF frame) per pair
+ * of input samples. Provided length is the total number of sample bytes present,
+ * _twice_ the number of samples (little-endian order assumed)
+ * 
+ * This function operates with side-effects and is not safe to call from both
+ * cores. It must also be called in the same order data is intended to be
+ * output.
+ */
+static void snd_encode(uint8_t* samples, uint16_t* wire_patterns, uint16_t len, uint8_t swap) {
+    uint16_t widx = 0;
+    for (uint16_t i = 0; i < len; i += 2) {
+        uint32_t sample = 0;
+        uint8_t parity = 0;
+        if (samples != NULL) {
+            if (swap) {
+                sample = samples[i + 1] + (samples[i] << 8);
+            } else {
+                sample = samples[i] + (samples[i + 1] << 8);
+            }
+            // determine parity, simplified to one lookup via an XOR
+            parity = (sample >> 8) ^ sample;
+            parity = snd_parity[parity];
+
+            /*
+             * Shift sample into the correct bit positions of the sub-frame. This
+             * would normally be << 12, but with my DACs I've had persistent issues
+             * with signal clipping when sending data in the highest bit position.
+             */
+            sample = sample << 11;
+            if (sample & 0x04000000) {
+                // handle two's complement
+                sample |= 0x08000000;
+                parity++;
+            }
+        }
+
+        // if needed, establish even parity with P bit
+        if (parity % 2) sample |= 0x80000000;
+
+        // translate sample into biphase encoding
+        // first is low 8 bits: preamble and 4 least-significant bits of 
+        // 24-bit audio, pre-encoded as all '0' due to 16-bit samples
+        uint16_t wp;
+        if (sfcnt == 0) {
+            wp = z_preamble; // left channel, block start
+        } else if (sfcnt % 2) {
+            wp = y_preamble; // right channel
+        } else {
+            wp = x_preamble; // left channel, not block start
+        }
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // next 8 bits (only high 4 have data)
+        wp = biphase[(uint8_t) (sample >> 8)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // next 8 again, all audio data
+        wp = biphase[(uint8_t) (sample >> 16)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // final 8, low 4 audio data and high 4 control bits
+        wp = biphase[(uint8_t) (sample >> 24)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // increment subframe counter for next pass
+        sfcnt++;
+        if (sfcnt == 384) sfcnt = 0; // if true, block complete
+    }
+}
+
+// functions for passing to Core1
+static void snd_process_a() {
+    if (sbufsel == A) {
+        if (sbufst_a == READY) {
+            snd_encode(sample_buf_a + sbufpos, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = B;
+                sbufpos = 0;
+                sbufst_a = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    } else {
+        if (sbufst_b == READY) {
+            snd_encode(sample_buf_b + sbufpos, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = A;
+                sbufpos = 0;
+                sbufst_b = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    }
+}
+static void snd_process_b() {
+    // clone of above for the other wire buffer
+    if (sbufsel == A) {
+        if (sbufst_a == READY) {
+            snd_encode(sample_buf_a + sbufpos, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = B;
+                sbufpos = 0;
+                sbufst_a = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    } else {
+        if (sbufst_b == READY) {
+            snd_encode(sample_buf_b + sbufpos, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = A;
+                sbufpos = 0;
+                sbufst_b = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    }
+}
+
+// Allows execution on Core1 via function pointers. Each function can take
+// no parameters and should return nothing, operating via side-effects only.
+static void core1_handler() {
+    while (1) {
+        void (*function)() = (void (*)()) multicore_fifo_pop_blocking();
+        (*function)();
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ---------- VISIBLE FUNCTIONS ------------------------------------------- */
+/* ------------------------------------------------------------------------ */
+
+void audio_dma_irq() {
+    if (dma_hw->intr & (1 << SOUND_DMA_CHA)) {
+        dma_hw->ints0 = (1 << SOUND_DMA_CHA);
+        multicore_fifo_push_blocking((uintptr_t) &snd_process_a);
+        if (audio_stopping) {
+            channel_config_set_chain_to(&snd_dma_a_cfg, SOUND_DMA_CHA);
+        }
+        dma_channel_configure(SOUND_DMA_CHA,
+                &snd_dma_a_cfg,
+                &(spi_get_hw(AUDIO_SPI)->dr),
+                &wire_buf_a,
+                WIRE_BUFFER_SIZE,
+                false);
+    } else if (dma_hw->intr & (1 << SOUND_DMA_CHB)) {
+        dma_hw->ints0 = (1 << SOUND_DMA_CHB);
+        multicore_fifo_push_blocking((uintptr_t) &snd_process_b);
+        if (audio_stopping) {
+            channel_config_set_chain_to(&snd_dma_b_cfg, SOUND_DMA_CHB);
+        }
+        dma_channel_configure(SOUND_DMA_CHB,
+                &snd_dma_b_cfg,
+                &(spi_get_hw(AUDIO_SPI)->dr),
+                &wire_buf_b,
+                WIRE_BUFFER_SIZE,
+                false);
+    }
+}
+
+bool audio_is_active() {
+    return audio_active;
+}
+
+void audio_setup() {
+    // setup SPI to blast SP/DIF data over the TX pin
+    spi_set_baudrate(AUDIO_SPI, 5644800); // will be slightly wrong, ~0.03% slow
+    hw_write_masked(&spi_get_hw(AUDIO_SPI)->cr0,
+            0x1F, // TI mode with 16 bits
+            SPI_SSPCR0_DSS_BITS | SPI_SSPCR0_FRF_BITS);
+    spi_get_hw(AUDIO_SPI)->dmacr = SPI_SSPDMACR_TXDMAE_BITS;
+    hw_set_bits(&spi_get_hw(AUDIO_SPI)->cr1, SPI_SSPCR1_SSE_BITS);
+
+    dma_channel_claim(SOUND_DMA_CHA);
+	dma_channel_claim(SOUND_DMA_CHB);
+
+    log("Starting Core1 for audio");
+    multicore_launch_core1(core1_handler);
+}
+
+void audio_poll() {
+    if (!audio_active) return;
+    if (fleft == 0 && sbufst_a == STALE && sbufst_b == STALE) {
+        // out of data and ready to stop
+        audio_stop();
+        return;
+    } else if (fleft == 0) {
+        // out of data to read but still working on remainder
+        return;
+    }
+
+    // are new audio samples needed from the memory card?
+    uint8_t* audiobuf;
+    if (sbufst_a == STALE) {
+        sbufst_a = FILLING;
+        audiobuf = sample_buf_a;
+    } else if (sbufst_b == STALE) {
+        sbufst_b = FILLING;
+        audiobuf = sample_buf_b;
+    } else {
+        // no data needed this time
+        return;
+    }
+
+    platform_set_sd_callback(NULL, NULL);
+    uint16_t toRead = AUDIO_BUFFER_SIZE;
+    if (fleft < toRead) toRead = fleft;
+    if (audio_file.read(audiobuf, toRead) != toRead) {
+        log("Audio sample data underrun");
+    }
+    fleft -= toRead;
+
+    if (sbufst_a == FILLING) {
+        sbufst_a = READY;
+    } else if (sbufst_b == FILLING) {
+        sbufst_b = READY;
+    }
+}
+
+bool audio_play(const char* file, uint64_t start, uint64_t end, bool swap) {
+    // stop any existing playback first
+    if (audio_active) audio_stop();
+
+    // debuglog("Request to play ('", file, "':", start, ":", end, ")");
+
+    // verify audio file is present and inputs are (somewhat) sane
+    if (start >= end) {
+        log("Invalid range for audio (", start, ":", end, ")");
+        return false;
+    }
+    platform_set_sd_callback(NULL, NULL);
+    audio_file = SD.open(file, O_RDONLY);
+    if (!audio_file.isOpen()) {
+        log("Unable to open file for audio playback: ", file);
+        return false;
+    }
+    uint64_t len = audio_file.size();
+    if (start > len || end > len) {
+        log("File '", file, "' playback request (",
+                start, ":", end, ":", len, ") outside bounds");
+        audio_file.close();
+        return false;
+    }
+    fleft = end - start;
+    if (fleft <= 2 * AUDIO_BUFFER_SIZE) {
+        log("File '", file, "' playback request (",
+                start, ":", end, ") too short");
+        audio_file.close();
+        return false;
+    }
+
+    // read in initial sample buffers
+    if (!audio_file.seek(start)) {
+        log("Sample file (", file, ") failed start seek to ", start);
+        audio_file.close();
+        return false;
+    }
+    if (audio_file.read(sample_buf_a, AUDIO_BUFFER_SIZE) != AUDIO_BUFFER_SIZE) {
+        log("File '", file, "' playback start returned fewer bytes than allowed");
+        audio_file.close();
+        return false;
+    }
+    if (audio_file.read(sample_buf_b, AUDIO_BUFFER_SIZE) != AUDIO_BUFFER_SIZE) {
+        log("File '", file, "' playback start returned fewer bytes than allowed");
+        audio_file.close();
+        return false;
+    }
+
+    // prepare initial tracking state
+    fleft -= AUDIO_BUFFER_SIZE * 2;
+    sbufsel = A;
+    sbufpos = 0;
+    sbufswap = swap;
+    sbufst_a = READY;
+    sbufst_b = READY;
+
+    // prepare the wire buffers
+    for (uint16_t i = 0; i < WIRE_BUFFER_SIZE; i++) {
+        wire_buf_a[i] = 0;
+        wire_buf_b[i] = 0;
+    }
+    sfcnt = 0;
+    invert = 0;
+
+    // setup the two DMA units to hand-off to each other
+    // to maintain a stable bitstream these need to run without interruption
+	snd_dma_a_cfg = dma_channel_get_default_config(SOUND_DMA_CHA);
+	channel_config_set_transfer_data_size(&snd_dma_a_cfg, DMA_SIZE_16);
+	channel_config_set_dreq(&snd_dma_a_cfg, spi_get_dreq(AUDIO_SPI, true));
+	channel_config_set_read_increment(&snd_dma_a_cfg, true);
+	channel_config_set_chain_to(&snd_dma_a_cfg, SOUND_DMA_CHB);
+    // version of pico-sdk lacks channel_config_set_high_priority()
+    snd_dma_a_cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS;
+	dma_channel_configure(SOUND_DMA_CHA, &snd_dma_a_cfg, &(spi_get_hw(AUDIO_SPI)->dr),
+			&wire_buf_a, WIRE_BUFFER_SIZE, false);
+    dma_channel_set_irq0_enabled(SOUND_DMA_CHA, true);
+	snd_dma_b_cfg = dma_channel_get_default_config(SOUND_DMA_CHB);
+	channel_config_set_transfer_data_size(&snd_dma_b_cfg, DMA_SIZE_16);
+	channel_config_set_dreq(&snd_dma_b_cfg, spi_get_dreq(AUDIO_SPI, true));
+	channel_config_set_read_increment(&snd_dma_b_cfg, true);
+	channel_config_set_chain_to(&snd_dma_b_cfg, SOUND_DMA_CHA);
+    snd_dma_b_cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS;
+	dma_channel_configure(SOUND_DMA_CHB, &snd_dma_b_cfg, &(spi_get_hw(AUDIO_SPI)->dr),
+			&wire_buf_b, WIRE_BUFFER_SIZE, false);
+    dma_channel_set_irq0_enabled(SOUND_DMA_CHB, true);
+
+    // ready to go
+    dma_channel_start(SOUND_DMA_CHA);
+    audio_active = true;
+    return true;
+}
+
+void audio_stop() {
+    if (!audio_active) return;
+
+    // to help mute external hardware, send a bunch of '0' samples prior to
+    // halting the datastream; easiest way to do this is invalidating the
+    // sample buffers, same as if there was a sample data underrun
+    sbufst_a = STALE;
+    sbufst_b = STALE;
+
+    // then indicate that the streams should no longer chain to one another
+    // and wait for them to shut down naturally
+    audio_stopping = true;
+    while (dma_channel_is_busy(SOUND_DMA_CHA)) tight_loop_contents();
+    while (dma_channel_is_busy(SOUND_DMA_CHB)) tight_loop_contents();
+    while (spi_is_busy(AUDIO_SPI)) tight_loop_contents();
+    audio_stopping = false;
+
+    // idle the subsystem
+    if (audio_file.isOpen()) {
+        audio_file.close();
+    }
+    audio_active = false;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // ENABLE_AUDIO_OUTPUT

+ 86 - 0
lib/BlueSCSI_platform_RP2040/audio.h

@@ -0,0 +1,86 @@
+/** 
+ * Copyright (C) 2023 saybur
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#pragma once
+#ifdef ENABLE_AUDIO_OUTPUT
+
+#include <Arduino.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// audio subsystem DMA channels
+#define SOUND_DMA_CHA 6
+#define SOUND_DMA_CHB 7
+
+// size of the two audio sample buffers, in bytes
+// these must be divisible by 1024
+#define AUDIO_BUFFER_SIZE 8192 // ~46.44ms
+
+/**
+ * Handler for DMA interrupts
+ *
+ * This is called from scsi_dma_irq() in scsi_accel_rp2040.cpp. That is
+ * obviously a silly way to handle things. However, using
+ * irq_add_shared_handler() causes a lockup, likely due to pico-sdk issue #724
+ * fixed in 1.3.1. Current builds use pico-sdk 1.3.0 and are affected by
+ * the bug. To work around the problem the above exclusive handler
+ * delegates to this function if its normal mask is not matched.
+ */
+void audio_dma_irq();
+
+/**
+ * Indicates if the audio subsystem is actively streaming, including if it is
+ * sending silent data during sample stall events.
+ *
+ * \return true if audio streaming is active, false otherwise.
+ */
+bool audio_is_active();
+
+/**
+ * Initializes the audio subsystem. Should be called only once, toward the end
+ * of platform_late_init().
+ */
+void audio_setup();
+
+/**
+ * Called from platform_poll() to fill sample buffer(s) if needed.
+ */
+void audio_poll();
+
+/**
+ * Begins audio playback for a file.
+ *
+ * \param file   Path of a file containing PCM samples to play.
+ * \param start  Byte offset within file where playback will begin, inclusive.
+ * \param end    Byte offset within file where playback will end, exclusive.
+ * \param swap   If false, little-endian sample order, otherwise big-endian.
+ * \return       True if successful, false otherwise.
+ */
+bool audio_play(const char* file, uint64_t start, uint64_t end, bool swap);
+
+/**
+ * Stops audio playback.
+ */
+void audio_stop();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // ENABLE_AUDIO_OUTPUT

+ 10 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -804,4 +804,14 @@ void rp2040_sdio_init(int clock_divider)
     // Set up IRQ handler when DMA completes.
     irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
     irq_set_enabled(DMA_IRQ_1, true);
+#if 0
+#ifndef ENABLE_AUDIO_OUTPUT
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
+#else
+    // seem to hit assertion in _exclusive_handler call due to DMA_IRQ_0 being shared?
+    // slightly less efficient to do it this way, so investigate further at some point
+    irq_add_shared_handler(DMA_IRQ_1, rp2040_sdio_tx_irq, 0xFF);
+#endif
+    irq_set_enabled(DMA_IRQ_1, true);
+#endif
 }

+ 11 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_rp2040.cpp

@@ -15,6 +15,7 @@
 #include <hardware/irq.h>
 #include <hardware/structs/iobank0.h>
 #include <hardware/sync.h>
+#include <audio.h>
 #include <multicore.h>
 
 // SCSI bus write acceleration uses up to 3 PIO state machines:
@@ -722,7 +723,17 @@ void scsi_accel_rp2040_finishRead(const uint8_t *data, uint32_t count, int *pari
 
 static void scsi_dma_irq()
 {
+#ifndef ENABLE_AUDIO_OUTPUT
     dma_hw->ints0 = (1 << SCSI_DMA_CH_A);
+#else
+    // see audio.h for whats going on here
+    if (dma_hw->intr & (1 << SCSI_DMA_CH_A)) {
+        dma_hw->ints0 = (1 << SCSI_DMA_CH_A);
+    } else {
+        audio_dma_irq();
+        return;
+    }
+#endif
 
     scsidma_state_t state = g_scsi_dma_state;
     if (state == SCSIDMA_WRITE)