Browse Source

Attempting to get ZuluSCSI to work with a Pico 2

Morio 1 year ago
parent
commit
6f69dfb7b5
45 changed files with 9466 additions and 1 deletions
  1. 57 0
      boards/rpipico2.json
  2. 921 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform.cpp
  3. 190 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform.h
  4. 136 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_BS2.h
  5. 172 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_Pico.h
  6. 170 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_RP2040.h
  7. 227 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_msc.cpp
  8. 40 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_msc.h
  9. 349 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_network.cpp
  10. 45 0
      lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_network.h
  11. 598 0
      lib/ZuluSCSI_platform_RP2350/audio.cpp
  12. 63 0
      lib/ZuluSCSI_platform_RP2350/audio.h
  13. 28 0
      lib/ZuluSCSI_platform_RP2350/bsp.h
  14. 41 0
      lib/ZuluSCSI_platform_RP2350/process-linker-script.py
  15. 145 0
      lib/ZuluSCSI_platform_RP2350/program_flash.cpp
  16. 222 0
      lib/ZuluSCSI_platform_RP2350/rp2040-template.ld
  17. 352 0
      lib/ZuluSCSI_platform_RP2350/rp2350-template.ld
  18. 319 0
      lib/ZuluSCSI_platform_RP2350/rp2350_btldr.ld
  19. 14 0
      lib/ZuluSCSI_platform_RP2350/run_pioasm.sh
  20. 36 0
      lib/ZuluSCSI_platform_RP2350/scsi2sd_time.h
  21. 303 0
      lib/ZuluSCSI_platform_RP2350/scsiHostPhy.cpp
  22. 55 0
      lib/ZuluSCSI_platform_RP2350/scsiHostPhy.h
  23. 418 0
      lib/ZuluSCSI_platform_RP2350/scsiPhy.cpp
  24. 97 0
      lib/ZuluSCSI_platform_RP2350/scsiPhy.h
  25. 173 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host.cpp
  26. 32 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host.h
  27. 46 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host_Pico.pio
  28. 43 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host_Pico.pio.h
  29. 46 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host_RP2040.pio
  30. 44 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_host_RP2040.pio.h
  31. 1139 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target.cpp
  32. 69 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target.h
  33. 124 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target_Pico.pio
  34. 225 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target_Pico.pio.h
  35. 124 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target_RP2040.pio
  36. 225 0
      lib/ZuluSCSI_platform_RP2350/scsi_accel_target_RP2040.pio.h
  37. 535 0
      lib/ZuluSCSI_platform_RP2350/sd_card_sdio.cpp
  38. 103 0
      lib/ZuluSCSI_platform_RP2350/sd_card_spi.cpp
  39. 848 0
      lib/ZuluSCSI_platform_RP2350/sdio.cpp
  40. 73 0
      lib/ZuluSCSI_platform_RP2350/sdio.h
  41. 164 0
      lib/ZuluSCSI_platform_RP2350/sdio_Pico.pio
  42. 121 0
      lib/ZuluSCSI_platform_RP2350/sdio_Pico.pio.h
  43. 164 0
      lib/ZuluSCSI_platform_RP2350/sdio_RP2040.pio
  44. 121 0
      lib/ZuluSCSI_platform_RP2350/sdio_RP2040.pio.h
  45. 49 1
      platformio.ini

+ 57 - 0
boards/rpipico2.json

@@ -0,0 +1,57 @@
+{
+    "build": {
+        "arduino": {
+            "earlephilhower": {
+                "boot2_source": "boot2_generic_03h_2_padded_checksum.S",
+                "usb_vid": "0x2E8A",
+                "usb_pid": "0x000F"
+            }
+        },
+        "core": "earlephilhower",
+        "cpu": "cortex-m33",
+        "extra_flags": "-DARDUINO_RASPBERRY_PI_PICO_2 -DARDUINO_ARCH_RP2040 -DUSBD_MAX_POWER_MA=250 ",
+        "f_cpu": "150000000L",
+        "hwids": [
+            [
+                "0x2E8A",
+                "0x00C0"
+            ],
+            [
+                "0x2E8A",
+                "0x000F"
+            ]
+        ],
+        "mcu": "rp2350",
+        "variant": "rpipico2"
+    },
+    "debug": {
+        "jlink_device": "RP2350_0",
+        "openocd_target": "rp2350.cfg",
+        "svd_path": "rp2350.svd"
+    },
+    "frameworks": [
+        "arduino"
+    ],
+    "name": "Pico 2",
+    "upload": {
+        "psram_length": 0,
+        "maximum_ram_size": 524288,
+        "maximum_size": 4194304,
+        "require_upload_port": true,
+        "native_usb": true,
+        "use_1200bps_touch": true,
+        "wait_for_upload_port": false,
+        "protocol": "cmsis-dap",
+        "protocols": [
+            "blackmagic",
+            "cmsis-dap",
+            "jlink",
+            "raspberrypi-swd",
+            "picotool",
+            "picoprobe",
+            "pico-debug"
+        ]
+    },
+    "url": "https://www.raspberrypi.org/products/raspberry-pi-pico/",
+    "vendor": "Raspberry Pi"
+}

+ 921 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform.cpp

@@ -0,0 +1,921 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_config.h"
+#include <SdFat.h>
+#include <scsi.h>
+#include <assert.h>
+#include <hardware/gpio.h>
+#include <hardware/pio.h>
+#include <hardware/uart.h>
+#include <hardware/pll.h>
+#include <hardware/clocks.h>
+#include <hardware/spi.h>
+#include <hardware/adc.h>
+#include <hardware/flash.h>
+#include <hardware/structs/xip_ctrl.h>
+#include <hardware/structs/usb.h>
+#include <hardware/sync.h>
+#include "scsi_accel_target.h"
+
+
+#ifndef PIO_FRAMEWORK_ARDUINO_NO_USB
+# include <SerialUSB.h>
+# include <class/cdc/cdc_device.h>
+#endif
+
+#include <pico/multicore.h>
+
+#ifdef ZULUSCSI_NETWORK
+extern "C" {
+#  include <pico/cyw43_arch.h>
+} 
+#endif // ZULUSCSI_NETWORK
+
+#ifdef ENABLE_AUDIO_OUTPUT
+#  include "audio.h"
+#endif // ENABLE_AUDIO_OUTPUT
+
+extern bool g_rawdrive_active;
+
+extern "C" {
+
+const char *g_platform_name = PLATFORM_NAME;
+static bool g_scsi_initiator = false;
+static uint32_t g_flash_chip_size = 0;
+static bool g_uart_initialized = false;
+
+/***************/
+/* GPIO init   */
+/***************/
+
+// Helper function to configure whole GPIO in one line
+static void gpio_conf(uint gpio, gpio_function_t fn, bool pullup, bool pulldown, bool output, bool initial_state, bool fast_slew)
+{
+    gpio_put(gpio, initial_state);
+    gpio_set_dir(gpio, output);
+    gpio_set_pulls(gpio, pullup, pulldown);
+    gpio_set_function(gpio, fn);
+
+    if (fast_slew)
+    {
+        pads_bank0_hw->io[gpio] |= PADS_BANK0_GPIO0_SLEWFAST_BITS;
+    }
+}
+
+#ifdef ENABLE_AUDIO_OUTPUT
+// Increases clk_sys and clk_peri to 135.428571MHz at runtime to support
+// division to audio output rates. Invoke before anything is using clk_peri
+// except for the logging UART, which is handled below.
+static void reclock_for_audio() {
+    // ensure UART is fully drained before we mess up its clock
+    uart_tx_wait_blocking(uart0);
+    // switch clk_sys and clk_peri to pll_usb
+    // see code in 2.15.6.1 of the datasheet for useful comments
+    clock_configure(clk_sys,
+            CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX,
+            CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_CLKSRC_PLL_USB,
+            48 * MHZ,
+            48 * MHZ);
+    clock_configure(clk_peri,
+            0,
+            CLOCKS_CLK_PERI_CTRL_AUXSRC_VALUE_CLKSRC_PLL_USB,
+            48 * MHZ,
+            48 * MHZ);
+    // reset PLL for 135.428571MHz
+    pll_init(pll_sys, 1, 948000000, 7, 1);
+    // switch clocks back to pll_sys
+    clock_configure(clk_sys,
+            CLOCKS_CLK_SYS_CTRL_SRC_VALUE_CLKSRC_CLK_SYS_AUX,
+            CLOCKS_CLK_SYS_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS,
+            135428571,
+            135428571);
+    clock_configure(clk_peri,
+            0,
+            CLOCKS_CLK_PERI_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS,
+            135428571,
+            135428571);
+    // reset UART for the new clock speed
+    uart_init(uart0, 1000000);
+}
+#endif  // ENABLE_AUDIO_OUT
+
+#ifdef HAS_DIP_SWITCHES
+enum pin_setup_state_t  {SETUP_FALSE, SETUP_TRUE, SETUP_UNDETERMINED};
+static pin_setup_state_t read_setup_ack_pin()
+{
+    /* Revision 2022d of the RP2040 hardware has problems reading initiator DIP switch setting.
+     * The 74LVT245 hold current is keeping the GPIO_ACK state too strongly.
+     * Detect this condition by toggling the pin up and down and seeing if it sticks.
+     * 
+     * Revision 2023b and 2023c of the Pico boards have issues reading TERM and DEBUG DIP switch
+     * settings. GPIO_ACK is externally pulled down to ground for later revisions.
+     * If the state is detected as undetermined then the board is the 2023b or 2023c revision.
+     */
+
+    // Strong output high, then pulldown
+    //        pin             function       pup   pdown   out    state  fast
+    gpio_conf(SCSI_IN_ACK,  GPIO_FUNC_SIO, false, false, true,  true,  false);
+    gpio_conf(SCSI_IN_ACK,  GPIO_FUNC_SIO, false, true,  false, true,  false);
+    delay(1);
+    bool ack_state1 = gpio_get(SCSI_IN_ACK);
+    
+    // Strong output low, then pullup
+    //        pin             function       pup   pdown   out    state  fast
+    gpio_conf(SCSI_IN_ACK,  GPIO_FUNC_SIO, false, false, true,  false, false);
+    gpio_conf(SCSI_IN_ACK,  GPIO_FUNC_SIO, true,  false, false, false, false);
+    delay(1);
+    bool ack_state2 = gpio_get(SCSI_IN_ACK);
+
+    if (ack_state1 == ack_state2)
+    {
+        // Ok, was able to read the state directly
+        return !ack_state1 ? SETUP_TRUE : SETUP_FALSE;
+    }
+
+    // Enable OUT_BSY for a short time.
+    // If in target mode, this will force GPIO_ACK high.
+    gpio_put(SCSI_OUT_BSY, 0);
+    delay_100ns();
+    gpio_put(SCSI_OUT_BSY, 1);
+
+    return SETUP_UNDETERMINED;
+}
+#endif
+
+void platform_init()
+{
+    gpio_conf(LED_PIN,        GPIO_FUNC_SIO, false,false, true,  true, false);
+    for (;;);
+    // Make sure second core is stopped
+    multicore_reset_core1();
+
+    pio_clear_instruction_memory(pio0);
+    pio_clear_instruction_memory(pio1);
+    
+    /* First configure the pins that affect external buffer directions.
+     * RP2040 defaults to pulldowns, while these pins have external pull-ups.
+     */
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_DATA_DIR,  GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_RST,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_BSY,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_SEL,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+    /* Check dip switch settings */
+#ifdef HAS_DIP_SWITCHES
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_DBGLOG,     GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_TERM,       GPIO_FUNC_SIO, false, false, false, false, false);
+    delay(10); // 10 ms delay to let pull-ups do their work
+    bool working_dip = true;
+    bool dbglog = false;
+    bool termination = false;
+# ifdef ZULUSCSI_PICO
+    // Initiator dip setting works on all rev 2023b, 2023c, and newer rev Pico boards
+    g_scsi_initiator = !gpio_get(DIP_INITIATOR);
+    
+    working_dip = SETUP_UNDETERMINED != read_setup_ack_pin();    
+    if (working_dip)
+    {
+        dbglog = !gpio_get(DIP_DBGLOG);
+        termination = !gpio_get(DIP_TERM);
+        
+    }
+# else
+    g_scsi_initiator = SETUP_TRUE == read_setup_ack_pin();
+    dbglog = !gpio_get(DIP_DBGLOG);
+    termination = !gpio_get(DIP_TERM);
+# endif
+#else
+    delay(10);
+#endif // HAS_DIP_SWITCHES
+
+#ifndef DISABLE_SWO
+    /* Initialize logging to SWO pin (UART0) */
+    gpio_conf(SWO_PIN,        GPIO_FUNC_UART,false,false, true,  false, true);
+    uart_init(uart0, 1000000);
+    g_uart_initialized = true;
+#endif // DISABLE_SWO
+
+    logmsg("Platform: ", g_platform_name);
+    logmsg("FW Version: ", g_log_firmwareversion);
+
+#ifdef HAS_DIP_SWITCHES
+    if (working_dip)
+    {       
+        logmsg("DIP switch settings: debug log ", (int)dbglog, ", termination ", (int)termination);
+        g_log_debug = dbglog;
+
+        if (termination)
+        {
+            logmsg("SCSI termination is enabled");
+        }
+        else
+        {
+            logmsg("NOTE: SCSI termination is disabled");
+        }
+    }
+    else
+    {
+        logmsg("SCSI termination is determined by the DIP switch labeled \"TERM\"");
+        logmsg("Debug logging can only be enabled via INI file \"DEBUG=1\" under [SCSI] in zuluscsi.ini");
+        logmsg("-- DEBUG DIP switch setting is ignored on ZuluSCSI Pico FS Rev. 2023b and 2023c boards");
+        g_log_debug = false;
+    }
+#else
+    g_log_debug = false;
+    logmsg ("SCSI termination is handled by a hardware jumper");
+#endif  // HAS_DIP_SWITCHES
+
+#ifdef ENABLE_AUDIO_OUTPUT
+    logmsg("SP/DIF audio to expansion header enabled");
+    logmsg("-- Overclocking to 135.428571MHz");
+    reclock_for_audio();
+#endif // ENABLE_AUDIO_OUTPUT
+
+    // Get flash chip size
+    uint8_t cmd_read_jedec_id[4] = {0x9f, 0, 0, 0};
+    uint8_t response_jedec[4] = {0};
+    uint32_t saved_irq = save_and_disable_interrupts();
+    flash_do_cmd(cmd_read_jedec_id, response_jedec, 4);
+    restore_interrupts(saved_irq);
+    g_flash_chip_size = (1 << response_jedec[3]);
+    logmsg("Flash chip size: ", (int)(g_flash_chip_size / 1024), " kB");
+
+    // SD card pins
+    // Card is used in SDIO mode for main program, and in SPI mode for crash handler & bootloader.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SD_SPI_SCK,     GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MOSI,    GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MISO,    GPIO_FUNC_SPI, true, false, false, true, true);
+    gpio_conf(SD_SPI_CS,      GPIO_FUNC_SIO, true, false, true,  true, true);
+    gpio_conf(SDIO_D1,        GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SDIO_D2,        GPIO_FUNC_SIO, true, false, false, true, true);
+
+    // LED pin
+    gpio_conf(LED_PIN,        GPIO_FUNC_SIO, false,false, true,  false, false);
+
+#ifndef ENABLE_AUDIO_OUTPUT
+#ifdef GPIO_I2C_SDA
+    // I2C pins
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(GPIO_I2C_SCL,   GPIO_FUNC_I2C, true,false, false,  true, true);
+    gpio_conf(GPIO_I2C_SDA,   GPIO_FUNC_I2C, true,false, false,  true, true);
+#endif  // GPIO_I2C_SDA
+#else
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(GPIO_EXP_AUDIO, GPIO_FUNC_SPI, true,false, false,  true, true);
+    gpio_conf(GPIO_EXP_SPARE, GPIO_FUNC_SIO, true,false, false,  true, false);
+    // configuration of corresponding SPI unit occurs in audio_setup()
+#endif  // ENABLE_AUDIO_OUTPUT
+}
+
+// late_init() only runs in main application, SCSI not needed in bootloader
+void platform_late_init()
+{
+#if defined(HAS_DIP_SWITCHES) && defined(PLATFORM_HAS_INITIATOR_MODE)
+    if (g_scsi_initiator == true)
+    {
+        logmsg("SCSI initiator mode selected by DIP switch, expecting SCSI disks on the bus");
+    }
+    else
+    {
+        logmsg("SCSI target/disk mode selected by DIP switch, acting as a SCSI disk");
+    }
+#else
+    g_scsi_initiator = false;
+    logmsg("SCSI target/disk mode, acting as a SCSI disk");
+#endif // defined(HAS_DIP_SWITCHES) && defined(PLATFORM_HAS_INITIATOR_MODE)
+
+    /* Initialize SCSI pins to required modes.
+     * SCSI pins should be inactive / input at this point.
+     */
+
+    // SCSI data bus direction is switched by DATA_DIR signal.
+    // Pullups make sure that no glitches occur when switching direction.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_IO_DB0,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB1,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB2,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB3,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB4,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB5,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB6,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB7,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DBP,    GPIO_FUNC_SIO, true, false, false, true, true);
+
+    if (!g_scsi_initiator)
+    {
+        // Act as SCSI device / target
+
+        // SCSI control outputs
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_OUT_IO,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_MSG,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+        // REQ pin is switched between PIO and SIO, pull-up makes sure no glitches
+        gpio_conf(SCSI_OUT_REQ,   GPIO_FUNC_SIO, true ,false, true,  true, true);
+
+        // Shared pins are changed to input / output depending on communication phase
+        gpio_conf(SCSI_IN_SEL,    GPIO_FUNC_SIO, true, false, false, true, true);
+        if (SCSI_OUT_CD != SCSI_IN_SEL)
+        {
+            gpio_conf(SCSI_OUT_CD,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        }
+
+        gpio_conf(SCSI_IN_BSY,    GPIO_FUNC_SIO, true, false, false, true, true);
+        if (SCSI_OUT_MSG != SCSI_IN_BSY)
+        {
+            gpio_conf(SCSI_OUT_MSG,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        }
+
+        // SCSI control inputs
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_IN_ACK,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_ATN,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+
+#ifndef PIO_FRAMEWORK_ARDUINO_NO_USB
+    Serial.begin();
+#endif
+
+
+#ifdef ENABLE_AUDIO_OUTPUT
+        // one-time control setup for DMA channels and second core
+        audio_setup();
+#endif // ENABLE_AUDIO_OUTPUT
+    }
+    else
+    {
+#ifndef PLATFORM_HAS_INITIATOR_MODE
+        assert(false);
+#else
+        // Act as SCSI initiator
+
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_IN_IO,     GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_MSG,    GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_CD,     GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_REQ,    GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_BSY,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+        // Reinitialize OUT_RST to output mode. On RP Pico variant the pin is shared with IN_RST.
+        gpio_conf(SCSI_OUT_RST,   GPIO_FUNC_SIO, false, false, true,  true, true);
+        gpio_conf(SCSI_OUT_SEL,   GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_ACK,   GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_ATN,   GPIO_FUNC_SIO, false,false, true,  true, true);
+#endif  // PLATFORM_HAS_INITIATOR_MODE
+    }
+}
+
+void platform_post_sd_card_init() {}
+
+bool platform_is_initiator_mode_enabled()
+{
+    return g_scsi_initiator;
+}
+
+void platform_disable_led(void)
+{   
+    //        pin      function       pup   pdown  out    state fast
+    gpio_conf(LED_PIN, GPIO_FUNC_SIO, false,false, false, false, false);
+    logmsg("Disabling status LED");
+}
+
+/*****************************************/
+/* Crash handlers                        */
+/*****************************************/
+
+extern SdFs SD;
+extern uint32_t __StackTop;
+
+void platform_emergency_log_save()
+{
+    if (g_rawdrive_active)
+        return;
+    platform_set_sd_callback(NULL, NULL);
+    SD.begin(SD_CONFIG_CRASH);
+    FsFile crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+
+    if (!crashfile.isOpen())
+    {
+        // Try to reinitialize
+        int max_retry = 10;
+        while (max_retry-- > 0 && !SD.begin(SD_CONFIG_CRASH));
+
+        crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+    }
+
+    uint32_t startpos = 0;
+    crashfile.write(log_get_buffer(&startpos));
+    crashfile.write(log_get_buffer(&startpos));
+    crashfile.flush();
+    crashfile.close();
+}
+
+
+static void usb_log_poll();
+
+__attribute__((noinline))
+void show_hardfault(uint32_t *sp)
+{
+    uint32_t pc = sp[6];
+    uint32_t lr = sp[5];
+
+    logmsg("--------------");
+    logmsg("CRASH!");
+    logmsg("Platform: ", g_platform_name);
+    logmsg("FW Version: ", g_log_firmwareversion);
+    logmsg("scsiDev.cdb: ", bytearray(scsiDev.cdb, 12));
+    logmsg("scsiDev.phase: ", (int)scsiDev.phase);
+    logmsg("SP: ", (uint32_t)sp);
+    logmsg("PC: ", pc);
+    logmsg("LR: ", lr);
+    logmsg("R0: ", sp[0]);
+    logmsg("R1: ", sp[1]);
+    logmsg("R2: ", sp[2]);
+    logmsg("R3: ", sp[3]);
+
+    uint32_t *p = (uint32_t*)((uint32_t)sp & ~3);
+
+    for (int i = 0; i < 8; i++)
+    {
+        if (p == &__StackTop) break; // End of stack
+
+        logmsg("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+        p += 4;
+    }
+
+    platform_emergency_log_save();
+
+    while (1)
+    {
+        usb_log_poll();
+        // Flash the crash address on the LED
+        // Short pulse means 0, long pulse means 1
+        int base_delay = 500;
+        for (int i = 31; i >= 0; i--)
+        {
+            LED_OFF();
+            for (int j = 0; j < base_delay; j++) busy_wait_ms(1);
+
+            int delay = (pc & (1 << i)) ? (3 * base_delay) : base_delay;
+            LED_ON();
+            for (int j = 0; j < delay; j++) busy_wait_ms(1);
+            LED_OFF();
+        }
+
+        for (int j = 0; j < base_delay * 10; j++) busy_wait_ms(1);
+    }
+}
+
+__attribute__((naked, interrupt))
+void isr_hardfault(void)
+{
+    // Copies stack pointer into first argument
+    asm("mrs r0, msp\n"
+        "bl show_hardfault": : : "r0");
+}
+
+
+/*****************************************/
+/* Debug logging and watchdog            */
+/*****************************************/
+
+// Send log data to USB UART if USB is connected.
+// Data is retrieved from the shared log ring buffer and
+// this function sends as much as fits in USB CDC buffer.
+//
+// This is normally called by platform_reset_watchdog() in
+// the normal polling loop. If code hangs, the watchdog_callback()
+// also starts calling this after 2 seconds.
+// This ensures that log messages get passed even if code hangs,
+// but does not unnecessarily delay normal execution.
+static void usb_log_poll()
+{
+#ifndef PIO_FRAMEWORK_ARDUINO_NO_USB
+    static uint32_t logpos = 0;
+    if (Serial.availableForWrite())
+    {
+        // Retrieve pointer to log start and determine number of bytes available.
+        uint32_t available = 0;
+        const char *data = log_get_buffer(&logpos, &available);
+                // Limit to CDC packet size
+        uint32_t len = available;
+        if (len == 0) return;
+        if (len > CFG_TUD_CDC_EP_BUFSIZE) len = CFG_TUD_CDC_EP_BUFSIZE;
+        
+        // Update log position by the actual number of bytes sent
+        // If USB CDC buffer is full, this may be 0
+        uint32_t actual = 0;
+        actual = Serial.write(data, len);
+        logpos -= available - actual;
+    }
+#endif // PIO_FRAMEWORK_ARDUINO_NO_USB
+}
+
+
+// Use ADC to implement supply voltage monitoring for the +3.0V rail.
+// This works by sampling the temperature sensor channel, which has
+// a voltage of 0.7 V, allowing to calculate the VDD voltage.
+static void adc_poll()
+{
+#if PLATFORM_VDD_WARNING_LIMIT_mV > 0
+    static bool initialized = false;
+    static int lowest_vdd_seen = PLATFORM_VDD_WARNING_LIMIT_mV;
+
+    if (!initialized)
+    {
+        adc_init();
+        adc_set_temp_sensor_enabled(true);
+        adc_set_clkdiv(65535); // Lowest samplerate, about 2 kHz
+        adc_select_input(4);
+        adc_fifo_setup(true, false, 0, false, false);
+        adc_run(true);
+        initialized = true;
+    }
+
+#ifdef ENABLE_AUDIO_OUTPUT
+    /*
+    * If ADC sample reads are done, either via direct reading, FIFO, or DMA,
+    * at the same time a SPI DMA write begins, it appears that the first
+    * 16-bit word of the DMA data is lost. This causes the bitstream to glitch
+    * and audio to 'pop' noticably. For now, just disable ADC reads when audio
+    * is playing.
+    */
+   if (audio_is_active()) return;
+#endif  // ENABLE_AUDIO_OUTPUT
+
+    int adc_value_max = 0;
+    while (!adc_fifo_is_empty())
+    {
+        int adc_value = adc_fifo_get();
+        if (adc_value > adc_value_max) adc_value_max = adc_value;
+    }
+
+    // adc_value = 700mV * 4096 / Vdd
+    // => Vdd = 700mV * 4096 / adc_value
+    // To avoid wasting time on division, compare against
+    // limit directly.
+    const int limit = (700 * 4096) / PLATFORM_VDD_WARNING_LIMIT_mV;
+    if (adc_value_max > limit)
+    {
+        // Warn once, and then again if we detect even a lower drop.
+        int vdd_mV = (700 * 4096) / adc_value_max;
+        if (vdd_mV < lowest_vdd_seen)
+        {
+            logmsg("WARNING: Detected supply voltage drop to ", vdd_mV, "mV. Verify power supply is adequate.");
+            lowest_vdd_seen = vdd_mV - 50; // Small hysteresis to avoid excessive warnings
+        }
+    }
+#endif // PLATFORM_VDD_WARNING_LIMIT_mV > 0
+}
+
+// This function is called for every log message.
+void platform_log(const char *s)
+{
+    if (g_uart_initialized)
+    {
+        uart_puts(uart0, s);
+    }
+}
+
+static int g_watchdog_timeout;
+static bool g_watchdog_initialized;
+
+static void watchdog_callback(unsigned alarm_num)
+{
+    g_watchdog_timeout -= 1000;
+
+    if (g_watchdog_timeout < WATCHDOG_CRASH_TIMEOUT - 1000)
+    {
+        // Been stuck for at least a second, start dumping USB log
+        usb_log_poll();
+    }
+
+    if (g_watchdog_timeout <= WATCHDOG_CRASH_TIMEOUT - WATCHDOG_BUS_RESET_TIMEOUT)
+    {
+        if (!scsiDev.resetFlag || !g_scsiHostPhyReset)
+        {
+            logmsg("--------------");
+            logmsg("WATCHDOG TIMEOUT, attempting bus reset");
+            logmsg("Platform: ", g_platform_name);
+            logmsg("FW Version: ", g_log_firmwareversion);
+            logmsg("GPIO states: out ", sio_hw->gpio_out, " oe ", sio_hw->gpio_oe, " in ", sio_hw->gpio_in);
+            logmsg("scsiDev.cdb: ", bytearray(scsiDev.cdb, 12));
+            logmsg("scsiDev.phase: ", (int)scsiDev.phase);
+            scsi_accel_log_state();
+
+
+            // uint32_t *p =  (uint32_t*)__get_MSP();
+            uint32_t msp;
+            asm volatile ("MRS %0, msp" : "=r" (msp) );
+
+            uint32_t *p =  (uint32_t*)msp;
+
+            for (int i = 0; i < 8; i++)
+            {
+            if (p == &__StackTop) break; // End of stack
+
+                logmsg("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+                p += 4;
+            }
+
+            scsiDev.resetFlag = 1;
+            g_scsiHostPhyReset = true;
+        }
+
+        if (g_watchdog_timeout <= 0)
+        {
+            logmsg("--------------");
+            logmsg("WATCHDOG TIMEOUT, already attempted bus reset, rebooting");
+            logmsg("Platform: ", g_platform_name);
+            logmsg("FW Version: ", g_log_firmwareversion);
+            logmsg("GPIO states: out ", sio_hw->gpio_out, " oe ", sio_hw->gpio_oe, " in ", sio_hw->gpio_in);
+            logmsg("scsiDev.cdb: ", bytearray(scsiDev.cdb, 12));
+            logmsg("scsiDev.phase: ", (int)scsiDev.phase);
+
+            // uint32_t *p =  (uint32_t*)__get_MSP();
+            uint32_t msp;
+            asm volatile ("MRS %0, msp" : "=r" (msp) );
+            uint32_t *p =  (uint32_t*)msp;
+
+            for (int i = 0; i < 8; i++)
+            {
+                if (p == &__StackTop) break; // End of stack
+
+                logmsg("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+                p += 4;
+            }
+
+            usb_log_poll();
+
+            platform_emergency_log_save();
+
+            platform_boot_to_main_firmware();
+        }
+    }
+
+    hardware_alarm_set_target(alarm_num, delayed_by_ms(get_absolute_time(), 1000));
+}
+
+// This function can be used to periodically reset watchdog timer for crash handling.
+// It can also be left empty if the platform does not use a watchdog timer.
+void platform_reset_watchdog()
+{
+    g_watchdog_timeout = WATCHDOG_CRASH_TIMEOUT;
+
+    if (!g_watchdog_initialized)
+    {
+        int alarm_num = -1;
+        for (int i = 0; i < NUM_GENERIC_TIMERS; i++)
+        {
+            if (!hardware_alarm_is_claimed(i))
+            {
+                alarm_num = i;
+                break;
+            }
+        }
+        if (alarm_num == -1)
+        {
+            logmsg("No free watchdog hardware alarms to claim");
+            return;
+        }
+        hardware_alarm_claim(alarm_num);
+        hardware_alarm_set_callback(alarm_num, &watchdog_callback);
+        hardware_alarm_set_target(alarm_num, delayed_by_ms(get_absolute_time(), 1000));
+        g_watchdog_initialized = true;
+    }
+
+    // USB log is polled here also to make sure any log messages in fault states
+    // get passed to USB.
+    usb_log_poll();
+}
+
+// Poll function that is called every few milliseconds.
+// Can be left empty or used for platform-specific processing.
+void platform_poll()
+{
+    usb_log_poll();
+    adc_poll();
+    
+#ifdef ENABLE_AUDIO_OUTPUT
+    audio_poll();
+#endif // ENABLE_AUDIO_OUTPUT
+}
+
+uint8_t platform_get_buttons()
+{
+    uint8_t buttons = 0;
+
+#if defined(ENABLE_AUDIO_OUTPUT)
+    // pulled to VCC via resistor, sinking when pressed
+    if (!gpio_get(GPIO_EXP_SPARE)) buttons |= 1;
+#elif defined(GPIO_I2C_SDA)
+    // SDA = button 1, SCL = button 2
+    if (!gpio_get(GPIO_I2C_SDA)) buttons |= 1;
+    if (!gpio_get(GPIO_I2C_SCL)) buttons |= 2;
+#endif // defined(ENABLE_AUDIO_OUTPUT)
+
+    // Simple debouncing logic: handle button releases after 100 ms delay.
+    static uint32_t debounce;
+    static uint8_t buttons_debounced = 0;
+
+    if (buttons != 0)
+    {
+        buttons_debounced = buttons;
+        debounce = millis();
+    }
+    else if ((uint32_t)(millis() - debounce) > 100)
+    {
+        buttons_debounced = 0;
+    }
+
+    return buttons_debounced;
+}
+
+
+
+/************************************/
+/* ROM drive in extra flash space   */
+/************************************/
+
+#ifdef PLATFORM_HAS_ROM_DRIVE
+
+# ifndef ROMDRIVE_OFFSET
+    // Reserve up to 352 kB for firmware by default.
+    #define ROMDRIVE_OFFSET (352 * 1024)
+# endif
+
+uint32_t platform_get_romdrive_maxsize()
+{
+    if (g_flash_chip_size >= ROMDRIVE_OFFSET)
+    {
+        return g_flash_chip_size - ROMDRIVE_OFFSET;
+    }
+    else
+    {
+        // Failed to read flash chip size, default to 2 MB
+        return 2048 * 1024 - ROMDRIVE_OFFSET;
+    }
+}
+
+bool platform_read_romdrive(uint8_t *dest, uint32_t start, uint32_t count)
+{
+    xip_ctrl_hw->stream_ctr = 0;
+
+    while (!(xip_ctrl_hw->stat & XIP_STAT_FIFO_EMPTY))
+    {
+        (void) xip_ctrl_hw->stream_fifo;
+    }
+
+    xip_ctrl_hw->stream_addr = start + ROMDRIVE_OFFSET;
+    xip_ctrl_hw->stream_ctr = count / 4;
+
+    // Transfer happens in multiples of 4 bytes
+    assert(start < platform_get_romdrive_maxsize());
+    assert((count & 3) == 0);
+    assert((((uint32_t)dest) & 3) == 0);
+
+    uint32_t *dest32 = (uint32_t*)dest;
+    uint32_t words_remain = count / 4;
+    while (words_remain > 0)
+    {
+        if (!(xip_ctrl_hw->stat & XIP_STAT_FIFO_EMPTY))
+        {
+            *dest32++ = xip_ctrl_hw->stream_fifo;
+            words_remain--;
+        }
+    }
+
+    return true;
+}
+
+bool platform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t count)
+{
+    assert(start < platform_get_romdrive_maxsize());
+    assert((count % PLATFORM_ROMDRIVE_PAGE_SIZE) == 0);
+
+    uint32_t saved_irq = save_and_disable_interrupts();
+    flash_range_erase(start + ROMDRIVE_OFFSET, count);
+    flash_range_program(start + ROMDRIVE_OFFSET, data, count);
+    restore_interrupts(saved_irq);
+    return true;
+}
+
+#endif // PLATFORM_HAS_ROM_DRIVE
+
+/**********************************************/
+/* Mapping from data bytes to GPIO BOP values */
+/**********************************************/
+
+/* A lookup table is the fastest way to calculate parity and convert the IO pin mapping for data bus.
+ * For RP2040 we expect that the bits are consecutive and in order.
+ * The PIO-based parity scheme also requires that the lookup table is aligned to 512-byte increment.
+ * The parity table is placed into SRAM4 area to reduce bus contention.
+ */
+
+#define PARITY(n) ((1 ^ (n) ^ ((n)>>1) ^ ((n)>>2) ^ ((n)>>3) ^ ((n)>>4) ^ ((n)>>5) ^ ((n)>>6) ^ ((n)>>7)) & 1)
+#define X(n) (\
+    ((n & 0x01) ? 0 : (1 << SCSI_IO_DB0)) | \
+    ((n & 0x02) ? 0 : (1 << SCSI_IO_DB1)) | \
+    ((n & 0x04) ? 0 : (1 << SCSI_IO_DB2)) | \
+    ((n & 0x08) ? 0 : (1 << SCSI_IO_DB3)) | \
+    ((n & 0x10) ? 0 : (1 << SCSI_IO_DB4)) | \
+    ((n & 0x20) ? 0 : (1 << SCSI_IO_DB5)) | \
+    ((n & 0x40) ? 0 : (1 << SCSI_IO_DB6)) | \
+    ((n & 0x80) ? 0 : (1 << SCSI_IO_DB7)) | \
+    (PARITY(n)  ? 0 : (1 << SCSI_IO_DBP)) \
+)
+
+const uint16_t g_scsi_parity_lookup[256] __attribute__((aligned(512), section(".scratch_x.parity"))) =
+{
+    X(0x00), X(0x01), X(0x02), X(0x03), X(0x04), X(0x05), X(0x06), X(0x07), X(0x08), X(0x09), X(0x0a), X(0x0b), X(0x0c), X(0x0d), X(0x0e), X(0x0f),
+    X(0x10), X(0x11), X(0x12), X(0x13), X(0x14), X(0x15), X(0x16), X(0x17), X(0x18), X(0x19), X(0x1a), X(0x1b), X(0x1c), X(0x1d), X(0x1e), X(0x1f),
+    X(0x20), X(0x21), X(0x22), X(0x23), X(0x24), X(0x25), X(0x26), X(0x27), X(0x28), X(0x29), X(0x2a), X(0x2b), X(0x2c), X(0x2d), X(0x2e), X(0x2f),
+    X(0x30), X(0x31), X(0x32), X(0x33), X(0x34), X(0x35), X(0x36), X(0x37), X(0x38), X(0x39), X(0x3a), X(0x3b), X(0x3c), X(0x3d), X(0x3e), X(0x3f),
+    X(0x40), X(0x41), X(0x42), X(0x43), X(0x44), X(0x45), X(0x46), X(0x47), X(0x48), X(0x49), X(0x4a), X(0x4b), X(0x4c), X(0x4d), X(0x4e), X(0x4f),
+    X(0x50), X(0x51), X(0x52), X(0x53), X(0x54), X(0x55), X(0x56), X(0x57), X(0x58), X(0x59), X(0x5a), X(0x5b), X(0x5c), X(0x5d), X(0x5e), X(0x5f),
+    X(0x60), X(0x61), X(0x62), X(0x63), X(0x64), X(0x65), X(0x66), X(0x67), X(0x68), X(0x69), X(0x6a), X(0x6b), X(0x6c), X(0x6d), X(0x6e), X(0x6f),
+    X(0x70), X(0x71), X(0x72), X(0x73), X(0x74), X(0x75), X(0x76), X(0x77), X(0x78), X(0x79), X(0x7a), X(0x7b), X(0x7c), X(0x7d), X(0x7e), X(0x7f),
+    X(0x80), X(0x81), X(0x82), X(0x83), X(0x84), X(0x85), X(0x86), X(0x87), X(0x88), X(0x89), X(0x8a), X(0x8b), X(0x8c), X(0x8d), X(0x8e), X(0x8f),
+    X(0x90), X(0x91), X(0x92), X(0x93), X(0x94), X(0x95), X(0x96), X(0x97), X(0x98), X(0x99), X(0x9a), X(0x9b), X(0x9c), X(0x9d), X(0x9e), X(0x9f),
+    X(0xa0), X(0xa1), X(0xa2), X(0xa3), X(0xa4), X(0xa5), X(0xa6), X(0xa7), X(0xa8), X(0xa9), X(0xaa), X(0xab), X(0xac), X(0xad), X(0xae), X(0xaf),
+    X(0xb0), X(0xb1), X(0xb2), X(0xb3), X(0xb4), X(0xb5), X(0xb6), X(0xb7), X(0xb8), X(0xb9), X(0xba), X(0xbb), X(0xbc), X(0xbd), X(0xbe), X(0xbf),
+    X(0xc0), X(0xc1), X(0xc2), X(0xc3), X(0xc4), X(0xc5), X(0xc6), X(0xc7), X(0xc8), X(0xc9), X(0xca), X(0xcb), X(0xcc), X(0xcd), X(0xce), X(0xcf),
+    X(0xd0), X(0xd1), X(0xd2), X(0xd3), X(0xd4), X(0xd5), X(0xd6), X(0xd7), X(0xd8), X(0xd9), X(0xda), X(0xdb), X(0xdc), X(0xdd), X(0xde), X(0xdf),
+    X(0xe0), X(0xe1), X(0xe2), X(0xe3), X(0xe4), X(0xe5), X(0xe6), X(0xe7), X(0xe8), X(0xe9), X(0xea), X(0xeb), X(0xec), X(0xed), X(0xee), X(0xef),
+    X(0xf0), X(0xf1), X(0xf2), X(0xf3), X(0xf4), X(0xf5), X(0xf6), X(0xf7), X(0xf8), X(0xf9), X(0xfa), X(0xfb), X(0xfc), X(0xfd), X(0xfe), X(0xff)
+};
+
+#undef X
+
+/* Similarly, another lookup table is used to verify parity of received data.
+ * This table is indexed by the 8 data bits + 1 parity bit from SCSI bus (active low)
+ * Each word contains the data byte (inverted to active-high) and a bit indicating whether parity is valid.
+ */
+#define X(n) (\
+    ((n & 0xFF) ^ 0xFF) | \
+    (((PARITY(n & 0xFF) ^ (n >> 8)) & 1) << 8) \
+)
+
+const uint16_t g_scsi_parity_check_lookup[512] __attribute__((aligned(1024), section(".scratch_x.parity"))) =
+{
+    X(0x000), X(0x001), X(0x002), X(0x003), X(0x004), X(0x005), X(0x006), X(0x007), X(0x008), X(0x009), X(0x00a), X(0x00b), X(0x00c), X(0x00d), X(0x00e), X(0x00f),
+    X(0x010), X(0x011), X(0x012), X(0x013), X(0x014), X(0x015), X(0x016), X(0x017), X(0x018), X(0x019), X(0x01a), X(0x01b), X(0x01c), X(0x01d), X(0x01e), X(0x01f),
+    X(0x020), X(0x021), X(0x022), X(0x023), X(0x024), X(0x025), X(0x026), X(0x027), X(0x028), X(0x029), X(0x02a), X(0x02b), X(0x02c), X(0x02d), X(0x02e), X(0x02f),
+    X(0x030), X(0x031), X(0x032), X(0x033), X(0x034), X(0x035), X(0x036), X(0x037), X(0x038), X(0x039), X(0x03a), X(0x03b), X(0x03c), X(0x03d), X(0x03e), X(0x03f),
+    X(0x040), X(0x041), X(0x042), X(0x043), X(0x044), X(0x045), X(0x046), X(0x047), X(0x048), X(0x049), X(0x04a), X(0x04b), X(0x04c), X(0x04d), X(0x04e), X(0x04f),
+    X(0x050), X(0x051), X(0x052), X(0x053), X(0x054), X(0x055), X(0x056), X(0x057), X(0x058), X(0x059), X(0x05a), X(0x05b), X(0x05c), X(0x05d), X(0x05e), X(0x05f),
+    X(0x060), X(0x061), X(0x062), X(0x063), X(0x064), X(0x065), X(0x066), X(0x067), X(0x068), X(0x069), X(0x06a), X(0x06b), X(0x06c), X(0x06d), X(0x06e), X(0x06f),
+    X(0x070), X(0x071), X(0x072), X(0x073), X(0x074), X(0x075), X(0x076), X(0x077), X(0x078), X(0x079), X(0x07a), X(0x07b), X(0x07c), X(0x07d), X(0x07e), X(0x07f),
+    X(0x080), X(0x081), X(0x082), X(0x083), X(0x084), X(0x085), X(0x086), X(0x087), X(0x088), X(0x089), X(0x08a), X(0x08b), X(0x08c), X(0x08d), X(0x08e), X(0x08f),
+    X(0x090), X(0x091), X(0x092), X(0x093), X(0x094), X(0x095), X(0x096), X(0x097), X(0x098), X(0x099), X(0x09a), X(0x09b), X(0x09c), X(0x09d), X(0x09e), X(0x09f),
+    X(0x0a0), X(0x0a1), X(0x0a2), X(0x0a3), X(0x0a4), X(0x0a5), X(0x0a6), X(0x0a7), X(0x0a8), X(0x0a9), X(0x0aa), X(0x0ab), X(0x0ac), X(0x0ad), X(0x0ae), X(0x0af),
+    X(0x0b0), X(0x0b1), X(0x0b2), X(0x0b3), X(0x0b4), X(0x0b5), X(0x0b6), X(0x0b7), X(0x0b8), X(0x0b9), X(0x0ba), X(0x0bb), X(0x0bc), X(0x0bd), X(0x0be), X(0x0bf),
+    X(0x0c0), X(0x0c1), X(0x0c2), X(0x0c3), X(0x0c4), X(0x0c5), X(0x0c6), X(0x0c7), X(0x0c8), X(0x0c9), X(0x0ca), X(0x0cb), X(0x0cc), X(0x0cd), X(0x0ce), X(0x0cf),
+    X(0x0d0), X(0x0d1), X(0x0d2), X(0x0d3), X(0x0d4), X(0x0d5), X(0x0d6), X(0x0d7), X(0x0d8), X(0x0d9), X(0x0da), X(0x0db), X(0x0dc), X(0x0dd), X(0x0de), X(0x0df),
+    X(0x0e0), X(0x0e1), X(0x0e2), X(0x0e3), X(0x0e4), X(0x0e5), X(0x0e6), X(0x0e7), X(0x0e8), X(0x0e9), X(0x0ea), X(0x0eb), X(0x0ec), X(0x0ed), X(0x0ee), X(0x0ef),
+    X(0x0f0), X(0x0f1), X(0x0f2), X(0x0f3), X(0x0f4), X(0x0f5), X(0x0f6), X(0x0f7), X(0x0f8), X(0x0f9), X(0x0fa), X(0x0fb), X(0x0fc), X(0x0fd), X(0x0fe), X(0x0ff),
+    X(0x100), X(0x101), X(0x102), X(0x103), X(0x104), X(0x105), X(0x106), X(0x107), X(0x108), X(0x109), X(0x10a), X(0x10b), X(0x10c), X(0x10d), X(0x10e), X(0x10f),
+    X(0x110), X(0x111), X(0x112), X(0x113), X(0x114), X(0x115), X(0x116), X(0x117), X(0x118), X(0x119), X(0x11a), X(0x11b), X(0x11c), X(0x11d), X(0x11e), X(0x11f),
+    X(0x120), X(0x121), X(0x122), X(0x123), X(0x124), X(0x125), X(0x126), X(0x127), X(0x128), X(0x129), X(0x12a), X(0x12b), X(0x12c), X(0x12d), X(0x12e), X(0x12f),
+    X(0x130), X(0x131), X(0x132), X(0x133), X(0x134), X(0x135), X(0x136), X(0x137), X(0x138), X(0x139), X(0x13a), X(0x13b), X(0x13c), X(0x13d), X(0x13e), X(0x13f),
+    X(0x140), X(0x141), X(0x142), X(0x143), X(0x144), X(0x145), X(0x146), X(0x147), X(0x148), X(0x149), X(0x14a), X(0x14b), X(0x14c), X(0x14d), X(0x14e), X(0x14f),
+    X(0x150), X(0x151), X(0x152), X(0x153), X(0x154), X(0x155), X(0x156), X(0x157), X(0x158), X(0x159), X(0x15a), X(0x15b), X(0x15c), X(0x15d), X(0x15e), X(0x15f),
+    X(0x160), X(0x161), X(0x162), X(0x163), X(0x164), X(0x165), X(0x166), X(0x167), X(0x168), X(0x169), X(0x16a), X(0x16b), X(0x16c), X(0x16d), X(0x16e), X(0x16f),
+    X(0x170), X(0x171), X(0x172), X(0x173), X(0x174), X(0x175), X(0x176), X(0x177), X(0x178), X(0x179), X(0x17a), X(0x17b), X(0x17c), X(0x17d), X(0x17e), X(0x17f),
+    X(0x180), X(0x181), X(0x182), X(0x183), X(0x184), X(0x185), X(0x186), X(0x187), X(0x188), X(0x189), X(0x18a), X(0x18b), X(0x18c), X(0x18d), X(0x18e), X(0x18f),
+    X(0x190), X(0x191), X(0x192), X(0x193), X(0x194), X(0x195), X(0x196), X(0x197), X(0x198), X(0x199), X(0x19a), X(0x19b), X(0x19c), X(0x19d), X(0x19e), X(0x19f),
+    X(0x1a0), X(0x1a1), X(0x1a2), X(0x1a3), X(0x1a4), X(0x1a5), X(0x1a6), X(0x1a7), X(0x1a8), X(0x1a9), X(0x1aa), X(0x1ab), X(0x1ac), X(0x1ad), X(0x1ae), X(0x1af),
+    X(0x1b0), X(0x1b1), X(0x1b2), X(0x1b3), X(0x1b4), X(0x1b5), X(0x1b6), X(0x1b7), X(0x1b8), X(0x1b9), X(0x1ba), X(0x1bb), X(0x1bc), X(0x1bd), X(0x1be), X(0x1bf),
+    X(0x1c0), X(0x1c1), X(0x1c2), X(0x1c3), X(0x1c4), X(0x1c5), X(0x1c6), X(0x1c7), X(0x1c8), X(0x1c9), X(0x1ca), X(0x1cb), X(0x1cc), X(0x1cd), X(0x1ce), X(0x1cf),
+    X(0x1d0), X(0x1d1), X(0x1d2), X(0x1d3), X(0x1d4), X(0x1d5), X(0x1d6), X(0x1d7), X(0x1d8), X(0x1d9), X(0x1da), X(0x1db), X(0x1dc), X(0x1dd), X(0x1de), X(0x1df),
+    X(0x1e0), X(0x1e1), X(0x1e2), X(0x1e3), X(0x1e4), X(0x1e5), X(0x1e6), X(0x1e7), X(0x1e8), X(0x1e9), X(0x1ea), X(0x1eb), X(0x1ec), X(0x1ed), X(0x1ee), X(0x1ef),
+    X(0x1f0), X(0x1f1), X(0x1f2), X(0x1f3), X(0x1f4), X(0x1f5), X(0x1f6), X(0x1f7), X(0x1f8), X(0x1f9), X(0x1fa), X(0x1fb), X(0x1fc), X(0x1fd), X(0x1fe), X(0x1ff),
+};
+
+#undef X
+
+} /* extern "C" */

+ 190 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform.h

@@ -0,0 +1,190 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Platform-specific definitions for ZuluSCSI RP2040 hardware.
+
+#pragma once
+
+#include <stdint.h>
+#include <Arduino.h>
+#include "ZuluSCSI_platform_network.h"
+
+#ifdef ZULUSCSI_PICO
+// ZuluSCSI Pico carrier board variant
+#include "ZuluSCSI_platform_gpio_Pico.h"
+#elif defined(ZULUSCSI_BS2)
+// BS2 hardware variant, using Raspberry Pico board on a carrier PCB
+#include "ZuluSCSI_platform_gpio_BS2.h"
+#else
+// Normal RP2040 variant, using RP2040 chip directly
+#include "ZuluSCSI_platform_gpio_RP2040.h"
+#endif
+
+#include "scsiHostPhy.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These are used in debug output and default SCSI strings */
+extern const char *g_platform_name;
+
+#ifdef ZULUSCSI_PICO
+# ifdef ZULUSCSI_DAYNAPORT
+#   define PLATFORM_NAME "ZuluSCSI Pico DaynaPORT"
+# else
+#   define PLATFORM_NAME "ZuluSCSI Pico"
+# endif
+# define PLATFORM_REVISION "2.0"
+# define PLATFORM_HAS_INITIATOR_MODE 1
+# define DISABLE_SWO
+#elif defined(ZULUSCSI_BS2)
+# define PLATFORM_NAME "ZuluSCSI BS2"
+# define PLATFORM_REVISION "1.0"
+#else
+# define PLATFORM_NAME "ZuluSCSI RP2040"
+# define PLATFORM_REVISION "2.0"
+# define PLATFORM_HAS_INITIATOR_MODE 1
+#endif
+
+#define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_SYNC_10
+#define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 32768
+#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 65536
+#define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 8192
+#define SD_USE_SDIO 1
+#define PLATFORM_HAS_PARITY_CHECK 1
+
+#ifndef PLATFORM_VDD_WARNING_LIMIT_mV
+#define PLATFORM_VDD_WARNING_LIMIT_mV 2800
+#endif
+
+// NOTE: The driver supports synchronous speeds higher than 10MB/s, but this
+// has not been tested due to lack of fast enough SCSI adapter.
+// #define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_TURBO
+
+// Debug logging function, can be used to print to e.g. serial port.
+// May get called from interrupt handlers.
+void platform_log(const char *s);
+void platform_emergency_log_save();
+
+// Timing and delay functions.
+// Arduino platform already provides these
+unsigned long millis(void);
+void delay(unsigned long ms);
+
+// Short delays, can be called from interrupt mode
+static inline void delay_ns(unsigned long ns)
+{
+    delayMicroseconds((ns + 999) / 1000);
+}
+
+// Approximate fast delay
+static inline void delay_100ns()
+{
+    asm volatile ("nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop");
+}
+
+// Initialize SD card and GPIO configuration
+void platform_init();
+
+// Initialization for main application, not used for bootloader
+void platform_late_init();
+
+// Initialization after the SD Card has been found
+void platform_post_sd_card_init();
+
+// Disable the status LED
+void platform_disable_led(void);
+
+// Query whether initiator mode is enabled on targets with PLATFORM_HAS_INITIATOR_MODE
+bool platform_is_initiator_mode_enabled();
+
+// Setup soft watchdog if supported
+void platform_reset_watchdog();
+
+// Poll function that is called every few milliseconds.
+// The SD card is free to access during this time, and pauses up to
+// few milliseconds shouldn't disturb SCSI communication.
+void platform_poll();
+
+// Returns the state of any platform-specific buttons.
+// The returned value should be a mask for buttons 1-8 in bits 0-7 respectively,
+// where '1' is a button pressed and '0' is a button released.
+// Debouncing logic is left up to the specific implementation.
+// This function should return without significantly delay.
+uint8_t platform_get_buttons();
+
+// Set callback that will be called during data transfer to/from SD card.
+// This can be used to implement simultaneous transfer to SCSI bus.
+typedef void (*sd_callback_t)(uint32_t bytes_complete);
+void platform_set_sd_callback(sd_callback_t func, const uint8_t *buffer);
+
+// Reprogram firmware in main program area.
+#ifndef RP2040_DISABLE_BOOTLOADER
+#define PLATFORM_BOOTLOADER_SIZE (128 * 1024)
+#define PLATFORM_FLASH_TOTAL_SIZE (1024 * 1024)
+#define PLATFORM_FLASH_PAGE_SIZE 4096
+bool platform_rewrite_flash_page(uint32_t offset, uint8_t buffer[PLATFORM_FLASH_PAGE_SIZE]);
+void platform_boot_to_main_firmware();
+#endif
+
+// ROM drive in the unused external flash area
+#ifndef RP2040_DISABLE_ROMDRIVE
+#define PLATFORM_HAS_ROM_DRIVE 1
+// Check maximum available space for ROM drive in bytes
+uint32_t platform_get_romdrive_maxsize();
+
+// Read ROM drive area
+bool platform_read_romdrive(uint8_t *dest, uint32_t start, uint32_t count);
+
+// Reprogram ROM drive area
+#define PLATFORM_ROMDRIVE_PAGE_SIZE 4096
+bool platform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t count);
+#endif
+
+// Parity lookup tables for write and read from SCSI bus.
+// These are used by macros below and the code in scsi_accel_rp2040.cpp
+extern const uint16_t g_scsi_parity_lookup[256];
+extern const uint16_t g_scsi_parity_check_lookup[512];
+
+
+
+#ifdef __cplusplus
+}
+
+
+
+// SD card driver for SdFat
+
+#ifdef SD_USE_SDIO
+class SdioConfig;
+extern SdioConfig g_sd_sdio_config;
+#define SD_CONFIG g_sd_sdio_config
+#define SD_CONFIG_CRASH g_sd_sdio_config
+#else
+class SdSpiConfig;
+extern SdSpiConfig g_sd_spi_config;
+#define SD_CONFIG g_sd_spi_config
+#define SD_CONFIG_CRASH g_sd_spi_config
+#endif
+
+#endif

+ 136 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_BS2.h

@@ -0,0 +1,136 @@
+/**
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ *
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ *
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// GPIO definitions for BSv2-based hardware
+#pragma once
+
+#include <hardware/gpio.h>
+
+// SCSI data input/output port.
+// The data bus uses external bidirectional buffer, with
+// direction controlled by DATA_DIR pin.
+#define SCSI_IO_DB0  0
+#define SCSI_IO_DB1  1
+#define SCSI_IO_DB2  2
+#define SCSI_IO_DB3  3
+#define SCSI_IO_DB4  4
+#define SCSI_IO_DB5  5
+#define SCSI_IO_DB6  6
+#define SCSI_IO_DB7  7
+#define SCSI_IO_DBP  8
+#define SCSI_IO_DATA_MASK 0x1FF
+#define SCSI_IO_SHIFT 0
+
+// Data direction control
+#define SCSI_DATA_DIR 9
+
+// SCSI output status lines
+#define SCSI_OUT_IO   22
+#define SCSI_OUT_CD   18
+#define SCSI_OUT_MSG  20
+#define SCSI_OUT_RST  22
+#define SCSI_OUT_BSY  27
+#define SCSI_OUT_REQ  17
+#define SCSI_OUT_SEL  19
+
+// SCSI input status signals
+#define SCSI_IN_SEL  18
+#define SCSI_IN_ACK  26
+#define SCSI_IN_ATN  28
+#define SCSI_IN_BSY  20
+#define SCSI_IN_RST  21
+
+// Status LED pins
+#define LED_PIN      25
+#define LED_ON()     sio_hw->gpio_set = 1 << LED_PIN
+#define LED_OFF()    sio_hw->gpio_clr = 1 << LED_PIN
+
+// SD card pins in SDIO mode
+#define SDIO_CLK 10
+#define SDIO_CMD 11
+#define SDIO_D0  12
+#define SDIO_D1  13
+#define SDIO_D2  14
+#define SDIO_D3  15
+
+// SD card pins in SPI mode
+#define SD_SPI       spi0
+#define SD_SPI_SCK   10
+#define SD_SPI_MOSI  11
+#define SD_SPI_MISO  12
+#define SD_SPI_CS    15
+
+
+// Other pins
+#define SWO_PIN 16
+
+
+// Below are GPIO access definitions that are used from scsiPhy.cpp.
+
+// Write a single SCSI pin.
+// Example use: SCSI_OUT(ATN, 1) sets SCSI_ATN to low (active) state.
+#define SCSI_OUT(pin, state) \
+    *(state ? &sio_hw->gpio_clr : &sio_hw->gpio_set) = 1 << (SCSI_OUT_ ## pin)
+
+// Read a single SCSI pin.
+// Example use: SCSI_IN(ATN), returns 1 for active low state.
+#define SCSI_IN(pin) \
+    ((sio_hw->gpio_in & (1 << (SCSI_IN_ ## pin))) ? 0 : 1)
+
+// Enable driving of shared control pins
+#define SCSI_ENABLE_CONTROL_OUT() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_CD) | \
+                           (1 << SCSI_OUT_MSG))
+
+// Set SCSI data bus to output
+#define SCSI_ENABLE_DATA_OUT() \
+    (sio_hw->gpio_set = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
+
+// Write SCSI data bus, also sets REQ to inactive.
+#define SCSI_OUT_DATA(data) \
+    gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
+                    g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \
+    SCSI_ENABLE_DATA_OUT()
+
+// Release SCSI data bus and REQ signal
+#define SCSI_RELEASE_DATA_REQ() \
+    (sio_hw->gpio_oe_clr = SCSI_IO_DATA_MASK, \
+     sio_hw->gpio_clr = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_set = (1 << SCSI_OUT_REQ))
+
+// Release all SCSI outputs
+#define SCSI_RELEASE_OUTPUTS() \
+    SCSI_RELEASE_DATA_REQ(), \
+    sio_hw->gpio_set = (1 << SCSI_OUT_IO) | \
+                       (1 << SCSI_OUT_CD) | \
+                       (1 << SCSI_OUT_MSG) | \
+                       (1 << SCSI_OUT_RST) | \
+                       (1 << SCSI_OUT_BSY) | \
+                       (1 << SCSI_OUT_REQ) | \
+                       (1 << SCSI_OUT_SEL), \
+                       delay(1), \
+    sio_hw->gpio_oe_clr = (1 << SCSI_OUT_CD) | \
+                          (1 << SCSI_OUT_MSG)
+
+// Read SCSI data bus
+#define SCSI_IN_DATA() \
+    (~sio_hw->gpio_in & SCSI_IO_DATA_MASK) >> SCSI_IO_SHIFT

+ 172 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_Pico.h

@@ -0,0 +1,172 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// GPIO definitions for ZuluSCSI RP2040-based hardware
+
+#pragma once
+
+#include <hardware/gpio.h>
+
+// SCSI data input/output port.
+// The data bus uses external bidirectional buffer, with
+// direction controlled by DATA_DIR pin.
+#define SCSI_IO_DB0  0
+#define SCSI_IO_DB1  1
+#define SCSI_IO_DB2  2
+#define SCSI_IO_DB3  3
+#define SCSI_IO_DB4  4
+#define SCSI_IO_DB5  5
+#define SCSI_IO_DB6  6
+#define SCSI_IO_DB7  7
+#define SCSI_IO_DBP  8
+#define SCSI_IO_DATA_MASK 0x1FF
+#define SCSI_IO_SHIFT 0
+
+// Data direction control
+#define SCSI_DATA_DIR 9
+
+// SCSI output status lines
+#define SCSI_OUT_IO   22
+#define SCSI_OUT_CD   18
+#define SCSI_OUT_MSG  20
+#define SCSI_OUT_RST  21
+#define SCSI_OUT_BSY  27
+#define SCSI_OUT_REQ  17
+#define SCSI_OUT_SEL  19
+
+// SCSI input status signals
+#define SCSI_IN_SEL  18
+#define SCSI_IN_ACK  26
+#define SCSI_IN_ATN  28
+#define SCSI_IN_BSY  20
+#define SCSI_IN_RST  21
+
+// Status line outputs for initiator mode
+#define SCSI_OUT_ACK  26
+#define SCSI_OUT_ATN  28
+
+// Status line inputs for initiator mode
+#define SCSI_IN_IO    22
+#define SCSI_IN_CD    18
+#define SCSI_IN_MSG   20
+#define SCSI_IN_REQ   17
+
+// Status LED pins
+#define LED_PIN      16
+#define LED_ON()    sio_hw->gpio_set = 1 << LED_PIN
+#define LED_OFF()   sio_hw->gpio_clr = 1 << LED_PIN
+
+
+// SD card pins in SDIO mode
+#define SDIO_CLK 10
+#define SDIO_CMD 11
+#define SDIO_D0  12
+#define SDIO_D1  13
+#define SDIO_D2  14
+#define SDIO_D3  15
+
+// SD card pins in SPI mode
+#define SD_SPI       spi0
+#define SD_SPI_SCK   10
+#define SD_SPI_MOSI  11
+#define SD_SPI_MISO  12
+#define SD_SPI_CS    15
+
+#ifndef ENABLE_AUDIO_OUTPUT
+    // No spare pins for I2C
+    // IO expander I2C
+    // #define GPIO_I2C_SDA 14
+    // #define GPIO_I2C_SCL 15
+#else
+    // IO expander I2C pins being used as SPI for audio
+    #define AUDIO_SPI      spi1
+    #define GPIO_EXP_SPARE 14
+    #define GPIO_EXP_AUDIO 15
+#endif
+
+// DIP switch pins
+#define HAS_DIP_SWITCHES
+#define DIP_INITIATOR 28
+#define DIP_DBGLOG 17
+#define DIP_TERM 22
+
+// Other pins
+#define SWO_PIN 16
+
+// Below are GPIO access definitions that are used from scsiPhy.cpp.
+
+// Write a single SCSI pin.
+// Example use: SCSI_OUT(ATN, 1) sets SCSI_ATN to low (active) state.
+#define SCSI_OUT(pin, state) \
+    *(state ? &sio_hw->gpio_clr : &sio_hw->gpio_set) = 1 << (SCSI_OUT_ ## pin)
+
+// Read a single SCSI pin.
+// Example use: SCSI_IN(ATN), returns 1 for active low state.
+#define SCSI_IN(pin) \
+    ((sio_hw->gpio_in & (1 << (SCSI_IN_ ## pin))) ? 0 : 1)
+
+// Set pin directions for initiator vs. target mode
+#define SCSI_ENABLE_INITIATOR() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_ACK) | \
+                           (1 << SCSI_OUT_ATN)), \
+    (sio_hw->gpio_oe_clr = (1 << SCSI_IN_IO) | \
+                           (1 << SCSI_IN_CD) | \
+                           (1 << SCSI_IN_MSG) | \
+                           (1 << SCSI_IN_REQ))
+
+// Enable driving of shared control pins
+#define SCSI_ENABLE_CONTROL_OUT() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_CD) | \
+                           (1 << SCSI_OUT_MSG))
+
+// Set SCSI data bus to output
+#define SCSI_ENABLE_DATA_OUT() \
+    (sio_hw->gpio_clr = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
+
+// Write SCSI data bus, also sets REQ to inactive.
+#define SCSI_OUT_DATA(data) \
+    gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
+                    g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \
+    SCSI_ENABLE_DATA_OUT()
+
+// Release SCSI data bus and REQ signal
+#define SCSI_RELEASE_DATA_REQ() \
+    (sio_hw->gpio_oe_clr = SCSI_IO_DATA_MASK, \
+     sio_hw->gpio_set = (1 << SCSI_DATA_DIR) | (1 << SCSI_OUT_REQ))
+
+// Release all SCSI outputs
+#define SCSI_RELEASE_OUTPUTS() \
+    SCSI_RELEASE_DATA_REQ(), \
+    sio_hw->gpio_oe_clr = (1 << SCSI_OUT_CD) | \
+                          (1 << SCSI_OUT_MSG), \
+    sio_hw->gpio_set = (1 << SCSI_OUT_IO) | \
+                       (1 << SCSI_OUT_CD) | \
+                       (1 << SCSI_OUT_MSG) | \
+                       (1 << SCSI_OUT_RST) | \
+                       (1 << SCSI_OUT_BSY) | \
+                       (1 << SCSI_OUT_REQ) | \
+                       (1 << SCSI_OUT_SEL)
+
+// Read SCSI data bus
+#define SCSI_IN_DATA() \
+    (~sio_hw->gpio_in & SCSI_IO_DATA_MASK) >> SCSI_IO_SHIFT
+

+ 170 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_gpio_RP2040.h

@@ -0,0 +1,170 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// GPIO definitions for ZuluSCSI RP2040-based hardware
+
+#pragma once
+
+#include <hardware/gpio.h>
+
+// SCSI data input/output port.
+// The data bus uses external bidirectional buffer, with
+// direction controlled by DATA_DIR pin.
+#define SCSI_IO_DB0  0
+#define SCSI_IO_DB1  1
+#define SCSI_IO_DB2  2
+#define SCSI_IO_DB3  3
+#define SCSI_IO_DB4  4
+#define SCSI_IO_DB5  5
+#define SCSI_IO_DB6  6
+#define SCSI_IO_DB7  7
+#define SCSI_IO_DBP  8
+#define SCSI_IO_DATA_MASK 0x1FF
+#define SCSI_IO_SHIFT 0
+
+// Data direction control
+#define SCSI_DATA_DIR 17
+
+// SCSI output status lines
+#define SCSI_OUT_IO   12
+#define SCSI_OUT_CD   11
+#define SCSI_OUT_MSG  13
+#define SCSI_OUT_RST  28
+#define SCSI_OUT_BSY  26
+#define SCSI_OUT_REQ  9
+#define SCSI_OUT_SEL  24
+
+// SCSI input status signals
+#define SCSI_IN_SEL  11
+#define SCSI_IN_ACK  10
+#define SCSI_IN_ATN  29
+#define SCSI_IN_BSY  13
+#define SCSI_IN_RST  27
+
+// Status line outputs for initiator mode
+#define SCSI_OUT_ACK  10
+#define SCSI_OUT_ATN  29
+
+// Status line inputs for initiator mode
+#define SCSI_IN_IO    12
+#define SCSI_IN_CD    11
+#define SCSI_IN_MSG   13
+#define SCSI_IN_REQ   9
+
+// Status LED pins
+#define LED_PIN      25
+#define LED_ON()     sio_hw->gpio_set = 1 << LED_PIN
+#define LED_OFF()    sio_hw->gpio_clr = 1 << LED_PIN
+
+// SD card pins in SDIO mode
+#define SDIO_CLK 18
+#define SDIO_CMD 19
+#define SDIO_D0  20
+#define SDIO_D1  21
+#define SDIO_D2  22
+#define SDIO_D3  23
+
+// SD card pins in SPI mode
+#define SD_SPI       spi0
+#define SD_SPI_SCK   18
+#define SD_SPI_MOSI  19
+#define SD_SPI_MISO  20
+#define SD_SPI_CS    23
+
+#ifndef ENABLE_AUDIO_OUTPUT
+    // IO expander I2C
+    #define GPIO_I2C_SDA 14
+    #define GPIO_I2C_SCL 15
+#else
+    // IO expander I2C pins being used as SPI for audio
+    #define AUDIO_SPI      spi1
+    #define GPIO_EXP_SPARE 14
+    #define GPIO_EXP_AUDIO 15
+#endif
+
+// DIP switch pins
+#define HAS_DIP_SWITCHES
+#define DIP_INITIATOR 10
+#define DIP_DBGLOG 16
+#define DIP_TERM 9
+
+// Other pins
+#define SWO_PIN 16
+
+// Below are GPIO access definitions that are used from scsiPhy.cpp.
+
+// Write a single SCSI pin.
+// Example use: SCSI_OUT(ATN, 1) sets SCSI_ATN to low (active) state.
+#define SCSI_OUT(pin, state) \
+    *(state ? &sio_hw->gpio_clr : &sio_hw->gpio_set) = 1 << (SCSI_OUT_ ## pin)
+
+// Read a single SCSI pin.
+// Example use: SCSI_IN(ATN), returns 1 for active low state.
+#define SCSI_IN(pin) \
+    ((sio_hw->gpio_in & (1 << (SCSI_IN_ ## pin))) ? 0 : 1)
+
+// Set pin directions for initiator vs. target mode
+#define SCSI_ENABLE_INITIATOR() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_ACK) | \
+                           (1 << SCSI_OUT_ATN)), \
+    (sio_hw->gpio_oe_clr = (1 << SCSI_IN_IO) | \
+                           (1 << SCSI_IN_CD) | \
+                           (1 << SCSI_IN_MSG) | \
+                           (1 << SCSI_IN_REQ))
+
+// Enable driving of shared control pins
+#define SCSI_ENABLE_CONTROL_OUT() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_CD) | \
+                           (1 << SCSI_OUT_MSG))
+
+// Set SCSI data bus to output
+#define SCSI_ENABLE_DATA_OUT() \
+    (sio_hw->gpio_clr = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
+
+// Write SCSI data bus, also sets REQ to inactive.
+#define SCSI_OUT_DATA(data) \
+    gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
+                    g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \
+    SCSI_ENABLE_DATA_OUT()
+
+// Release SCSI data bus and REQ signal
+#define SCSI_RELEASE_DATA_REQ() \
+    (sio_hw->gpio_oe_clr = SCSI_IO_DATA_MASK, \
+     sio_hw->gpio_set = (1 << SCSI_DATA_DIR) | (1 << SCSI_OUT_REQ))
+
+// Release all SCSI outputs
+#define SCSI_RELEASE_OUTPUTS() \
+    SCSI_RELEASE_DATA_REQ(), \
+    sio_hw->gpio_oe_clr = (1 << SCSI_OUT_CD) | \
+                          (1 << SCSI_OUT_MSG), \
+    sio_hw->gpio_set = (1 << SCSI_OUT_IO) | \
+                       (1 << SCSI_OUT_CD) | \
+                       (1 << SCSI_OUT_MSG) | \
+                       (1 << SCSI_OUT_RST) | \
+                       (1 << SCSI_OUT_BSY) | \
+                       (1 << SCSI_OUT_REQ) | \
+                       (1 << SCSI_OUT_SEL)
+
+// Read SCSI data bus
+#define SCSI_IN_DATA() \
+    (~sio_hw->gpio_in & SCSI_IO_DATA_MASK) >> SCSI_IO_SHIFT
+

+ 227 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_msc.cpp

@@ -0,0 +1,227 @@
+/**
+ * Copyright (c) 2023-2024 zigzagjoe
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+/* platform specific MSC routines */
+#ifdef PLATFORM_MASS_STORAGE
+
+#include <SdFat.h>
+#include <device/usbd.h>
+#include <hardware/gpio.h>
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_msc.h"
+
+#include <class/msc/msc.h>
+#include <class/msc/msc_device.h>
+
+#if CFG_TUD_MSC_EP_BUFSIZE < SD_SECTOR_SIZE
+  #error "CFG_TUD_MSC_EP_BUFSIZE is too small! It needs to be at least 512 (SD_SECTOR_SIZE)"
+#endif
+
+// external global SD variable
+extern SdFs SD;
+static bool unitReady = false;
+
+/* return true if USB presence detected / eligble to enter CR mode */
+bool platform_sense_msc() {
+
+#ifdef ZULUSCSI_PICO
+  // check if we're USB powered, if not, exit immediately
+  // pin on the wireless module, see https://github.com/earlephilhower/arduino-pico/discussions/835
+  if (rp2040.isPicoW() && !digitalRead(34))
+    return false;
+
+  if (!rp2040.isPicoW() && !digitalRead(24))
+    return false;
+#endif
+
+  logmsg("Waiting for USB enumeration to enter Card Reader mode.");
+
+  // wait for up to a second to be enumerated
+  uint32_t start = millis();
+  while (!tud_connected() && ((uint32_t)(millis() - start) < CR_ENUM_TIMEOUT)) 
+    delay(100);
+
+  // tud_connected returns True if just got out of Bus Reset and received the very first data from host
+  // https://github.com/hathach/tinyusb/blob/master/src/device/usbd.h#L63
+  return tud_connected();
+}
+
+/* return true if we should remain in card reader mode and perform periodic tasks */
+bool platform_run_msc() {
+  return unitReady;
+}
+
+/* perform MSC class preinit tasks */
+void platform_enter_msc() {
+  dbgmsg("USB MSC buffer size: ", CFG_TUD_MSC_EP_BUFSIZE);
+  // MSC is ready for read/write
+  // we don't need any prep, but the var is requried as the MSC callbacks are always active
+  unitReady = true;
+}
+
+/* perform any cleanup tasks for the MSC-specific functionality */
+void platform_exit_msc() {
+  unitReady = false;
+}
+
+/* TinyUSB mass storage callbacks follow */
+
+// usb framework checks this func exists for mass storage config. no code needed.
+void __USBInstallMassStorage() { }
+
+// Invoked when received SCSI_CMD_INQUIRY
+// fill vendor id, product id and revision with string up to 8, 16, 4 characters respectively
+extern "C" void tud_msc_inquiry_cb(uint8_t lun, uint8_t vendor_id[8],
+                        uint8_t product_id[16], uint8_t product_rev[4]) {
+
+  // TODO: We could/should use strings from the platform, but they are too long
+  const char vid[] = "ZuluSCSI";
+  const char pid[] = "Pico"; 
+  const char rev[] = "1.0";
+
+  memcpy(vendor_id, vid, tu_min32(strlen(vid), 8));
+  memcpy(product_id, pid, tu_min32(strlen(pid), 16));
+  memcpy(product_rev, rev, tu_min32(strlen(rev), 4));
+}
+
+// max LUN supported
+// we only have the one SD card
+extern "C" uint8_t tud_msc_get_maxlun_cb(void) {
+  return 1; // number of LUNs supported
+}
+
+// return writable status
+// on platform supporting write protect switch, could do that here.
+// otherwise this is not actually needed
+extern "C" bool tud_msc_is_writable_cb (uint8_t lun)
+{
+  (void) lun;
+  return unitReady;
+}
+
+// see https://www.seagate.com/files/staticfiles/support/docs/manual/Interface%20manuals/100293068j.pdf pg 221
+extern "C" bool tud_msc_start_stop_cb(uint8_t lun, uint8_t power_condition, bool start, bool load_eject)
+{
+  (void) lun;
+  (void) power_condition;
+
+  if (load_eject)  {
+    if (start) {
+      // load disk storage
+      // do nothing as we started "loaded"
+    } else {
+      unitReady = false;
+    }
+  }
+
+  return true;
+}
+
+// return true if we are ready to service reads/writes
+extern "C" bool tud_msc_test_unit_ready_cb(uint8_t lun) {
+  (void) lun;
+
+  return unitReady;
+}
+
+// return size in blocks and block size
+extern "C" void tud_msc_capacity_cb(uint8_t lun, uint32_t *block_count,
+                         uint16_t *block_size) {
+  (void) lun;
+
+  *block_count = unitReady ? (SD.card()->sectorCount()) : 0;
+  *block_size = SD_SECTOR_SIZE;
+}
+
+// Callback invoked when received an SCSI command not in built-in list (below) which have their own callbacks
+// - READ_CAPACITY10, READ_FORMAT_CAPACITY, INQUIRY, MODE_SENSE6, REQUEST_SENSE, READ10 and WRITE10
+extern "C" int32_t tud_msc_scsi_cb(uint8_t lun, const uint8_t scsi_cmd[16], void *buffer,
+                        uint16_t bufsize) {
+
+  const void *response = NULL;
+  uint16_t resplen = 0;
+
+  switch (scsi_cmd[0]) {
+  case SCSI_CMD_PREVENT_ALLOW_MEDIUM_REMOVAL:
+    // Host is about to read/write etc ... better not to disconnect disk
+    resplen = 0;
+    break;
+
+  default:
+    // Set Sense = Invalid Command Operation
+    tud_msc_set_sense(lun, SCSI_SENSE_ILLEGAL_REQUEST, 0x20, 0x00);
+
+    // negative means error -> tinyusb could stall and/or response with failed status
+    resplen = -1;
+    break;
+  }
+
+  // return len must not larger than bufsize
+  if (resplen > bufsize) {
+    resplen = bufsize;
+  }
+
+  // copy response to stack's buffer if any
+  if (response && resplen) {
+    memcpy(buffer, response, resplen);
+  }
+
+  return resplen;
+}
+
+// Callback invoked when received READ10 command.
+// Copy disk's data to buffer (up to bufsize) and return number of copied bytes (must be multiple of block size)
+extern "C" int32_t tud_msc_read10_cb(uint8_t lun, uint32_t lba, uint32_t offset, 
+                            void* buffer, uint32_t bufsize)
+{
+  (void) lun;
+
+  bool rc = SD.card()->readSectors(lba, (uint8_t*) buffer, bufsize/SD_SECTOR_SIZE);
+
+  // only blink fast on reads; writes will override this
+  if (MSC_LEDMode == LED_SOLIDON)
+    MSC_LEDMode = LED_BLINK_FAST;
+  
+  return rc ? bufsize : -1;
+}
+
+// Callback invoked when receive WRITE10 command.
+// Process data in buffer to disk's storage and return number of written bytes (must be multiple of block size)
+extern "C" int32_t tud_msc_write10_cb(uint8_t lun, uint32_t lba, uint32_t offset,
+                           uint8_t *buffer, uint32_t bufsize) {
+  (void) lun;
+
+  bool rc = SD.card()->writeSectors(lba, buffer, bufsize/SD_SECTOR_SIZE);
+
+  // always slow blink
+  MSC_LEDMode = LED_BLINK_SLOW;
+
+  return rc ? bufsize : -1;
+}
+
+// Callback invoked when WRITE10 command is completed (status received and accepted by host).
+// used to flush any pending cache to storage
+extern "C" void tud_msc_write10_complete_cb(uint8_t lun) {
+  (void) lun;
+}
+
+#endif

+ 40 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_msc.h

@@ -0,0 +1,40 @@
+/**
+ * Copyright (c) 2023-2024 zigzagjoe
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#ifdef PLATFORM_MASS_STORAGE
+#pragma once
+
+// private constants/enums
+#define SD_SECTOR_SIZE 512
+
+/* return true if USB presence detected / eligble to enter CR mode */
+bool platform_sense_msc();
+
+/* perform MSC-specific init tasks */
+void platform_enter_msc();
+
+/* return true if we should remain in card reader mode. called in a loop. */
+bool platform_run_msc();
+
+/* perform any cleanup tasks for the MSC-specific functionality */
+void platform_exit_msc();
+
+#endif

+ 349 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_network.cpp

@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2023 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef ZULUSCSI_NETWORK
+#include "ZuluSCSI_platform_network.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_config.h"
+#include <scsi.h>
+#include <network.h>
+
+extern "C" {
+
+#include <cyw43.h>
+#include <pico/cyw43_arch.h>
+
+#ifndef CYW43_IOCTL_GET_RSSI
+#define CYW43_IOCTL_GET_RSSI (0xfe)
+#endif
+
+#define PICO_W_GPIO_LED_PIN 0
+#define PICO_W_LED_ON() cyw43_arch_gpio_put(PICO_W_GPIO_LED_PIN, 1)
+#define PICO_W_LED_OFF() cyw43_arch_gpio_put(PICO_W_GPIO_LED_PIN, 0)
+#define PICO_W_LONG_BLINK_DELAY 200
+#define PICO_W_SHORT_BLINK_DELAY 75
+
+// A default DaynaPort-compatible MAC
+static const char defaultMAC[] = { 0x00, 0x80, 0x19, 0xc0, 0xff, 0xee };
+
+static bool network_in_use = false;
+
+bool platform_network_supported()
+{
+	/* from cores/rp2040/RP2040Support.h */
+#if !defined(ARDUINO_RASPBERRY_PI_PICO_W)
+	return false;
+#else
+	extern bool __isPicoW;
+	return __isPicoW;
+#endif
+}
+
+int platform_network_init(char *mac)
+{
+	pico_unique_board_id_t board_id;
+	uint8_t set_mac[6], read_mac[6];
+
+	if (!platform_network_supported())
+		return -1;
+
+	// long signal blink at network initialization
+	PICO_W_LED_OFF();
+	PICO_W_LED_ON();
+	delay(PICO_W_LONG_BLINK_DELAY);
+	PICO_W_LED_OFF();
+
+
+	logmsg(" ");
+	logmsg("=== Network Initialization ===");
+
+	memset(wifi_network_list, 0, sizeof(wifi_network_list));
+
+	cyw43_deinit(&cyw43_state);
+	cyw43_init(&cyw43_state);
+
+	if (mac == NULL || (mac[0] == 0 && mac[1] == 0 && mac[2] == 0 && mac[3] == 0 && mac[4] == 0 && mac[5] == 0))
+	{
+		mac = (char *)&set_mac;
+		memcpy(mac, defaultMAC, sizeof(set_mac));
+
+		// retain Dayna vendor but use a device id specific to this board
+		pico_get_unique_board_id(&board_id);
+		if (g_log_debug)
+			logmsg("Unique board id: ", board_id.id[0], " ", board_id.id[1], " ", board_id.id[2], " ", board_id.id[3], " ", 
+										board_id.id[4], " ", board_id.id[5], " ", board_id.id[6], " ", board_id.id[7]);
+
+		if (board_id.id[3] != 0 && board_id.id[4] != 0 && board_id.id[5] != 0)
+		{
+			mac[3] = board_id.id[3];
+			mac[4] = board_id.id[4];
+			mac[5] = board_id.id[5];
+		}
+
+		memcpy(scsiDev.boardCfg.wifiMACAddress, mac, sizeof(scsiDev.boardCfg.wifiMACAddress));
+	}
+
+	// setting the MAC requires libpico to be compiled with CYW43_USE_OTP_MAC=0
+	memcpy(cyw43_state.mac, mac, sizeof(cyw43_state.mac));
+	cyw43_arch_enable_sta_mode();
+
+	cyw43_wifi_get_mac(&cyw43_state, CYW43_ITF_STA, read_mac);
+	logmsg("Wi-Fi MAC: ", read_mac[0],":",read_mac[1], ":", read_mac[2], ":", read_mac[3], ":", read_mac[4], ":", read_mac[5]);
+	if (memcmp(mac, read_mac, sizeof(read_mac)) != 0)
+		logmsg("WARNING: Wi-Fi MAC is not what was requested (", 
+				(uint8_t)mac[0], ":", (uint8_t)mac[1], ":", (uint8_t)mac[2], ":", (uint8_t)mac[3], ":", (uint8_t)mac[4], ":", (uint8_t)mac[5],
+				"), is libpico not compiled with CYW43_USE_OTP_MAC=0?");
+
+	network_in_use = true;
+
+	return 0;
+}
+
+void platform_network_add_multicast_address(uint8_t *mac)
+{
+	int ret;
+
+	if ((ret = cyw43_wifi_update_multicast_filter(&cyw43_state, mac, true)) != 0)
+		logmsg( __func__, ": cyw43_wifi_update_multicast_filter: ", ret);
+}
+
+bool platform_network_wifi_join(char *ssid, char *password)
+{
+	int ret;
+
+	if (!platform_network_supported())
+		return false;
+
+	if (password == NULL || password[0] == 0)
+	{
+		logmsg("Connecting to Wi-Fi SSID \"", ssid, "\" with no authentication");
+		ret = cyw43_arch_wifi_connect_async(ssid, NULL, CYW43_AUTH_OPEN);
+	}
+	else
+	{
+		logmsg("Connecting to Wi-Fi SSID \"", ssid, "\" with WPA/WPA2 PSK");
+		ret = cyw43_arch_wifi_connect_async(ssid, password, CYW43_AUTH_WPA2_MIXED_PSK);
+	}
+
+	if (ret != 0)
+	{
+		logmsg("Wi-Fi connection failed: ", ret);
+	}
+	else
+	{
+		// Short single blink at start of connection sequence
+		PICO_W_LED_OFF();
+		delay(PICO_W_SHORT_BLINK_DELAY);
+		PICO_W_LED_ON();
+		delay(PICO_W_SHORT_BLINK_DELAY);
+		PICO_W_LED_OFF();
+	}
+	
+	return (ret == 0);
+}
+
+void platform_network_poll()
+{
+	if (!network_in_use)
+		return;
+
+	scsiNetworkPurge();
+	cyw43_arch_poll();
+}
+
+int platform_network_send(uint8_t *buf, size_t len)
+{
+	int ret = cyw43_send_ethernet(&cyw43_state, 0, len, buf, 0);
+	if (ret != 0)
+		logmsg("cyw43_send_ethernet failed: ", ret);
+
+	return ret;
+}
+
+static int platform_network_wifi_scan_result(void *env, const cyw43_ev_scan_result_t *result)
+{
+	struct wifi_network_entry *entry = NULL;
+
+	if (!result || !result->ssid_len || !result->ssid[0])
+		return 0;
+
+	for (int i = 0; i < WIFI_NETWORK_LIST_ENTRY_COUNT; i++)
+	{
+		// take first available
+		if (wifi_network_list[i].ssid[0] == '\0')
+		{
+			entry = &wifi_network_list[i];
+			break;
+		}
+		// or if we've seen this network before, use this slot
+		else if (strcmp((char *)result->ssid, wifi_network_list[i].ssid) == 0)
+		{
+			entry = &wifi_network_list[i];
+			break;
+		}
+	}
+
+	if (!entry)
+	{
+		// no available slots, insert according to our RSSI
+		for (int i = 0; i < WIFI_NETWORK_LIST_ENTRY_COUNT; i++)
+		{
+			if (result->rssi > wifi_network_list[i].rssi)
+			{
+				// shift everything else down
+				for (int j = WIFI_NETWORK_LIST_ENTRY_COUNT - 1; j > i; j--)
+					wifi_network_list[j] = wifi_network_list[j - 1];
+
+				entry = &wifi_network_list[i];
+				memset(entry, 0, sizeof(struct wifi_network_entry));
+				break;
+			}
+		}
+	}
+
+	if (entry == NULL)
+		return 0;
+
+	if (entry->rssi == 0 || result->rssi > entry->rssi)
+	{
+		entry->channel = result->channel;
+		entry->rssi = result->rssi;
+	}
+	if (result->auth_mode & 7)
+		entry->flags = WIFI_NETWORK_FLAG_AUTH;
+	strncpy(entry->ssid, (const char *)result->ssid, sizeof(entry->ssid));
+	entry->ssid[sizeof(entry->ssid) - 1] = '\0';
+	memcpy(entry->bssid, result->bssid, sizeof(entry->bssid));
+
+	return 0;
+}
+
+int platform_network_wifi_start_scan()
+{
+	if (cyw43_wifi_scan_active(&cyw43_state))
+		return -1;
+
+	cyw43_wifi_scan_options_t scan_options = { 0 };
+	memset(wifi_network_list, 0, sizeof(wifi_network_list));
+	return cyw43_wifi_scan(&cyw43_state, &scan_options, NULL, platform_network_wifi_scan_result);
+}
+
+int platform_network_wifi_scan_finished()
+{
+	return !cyw43_wifi_scan_active(&cyw43_state);
+}
+
+void platform_network_wifi_dump_scan_list()
+{
+	struct wifi_network_entry *entry = NULL;
+	
+	for (int i = 0; i < WIFI_NETWORK_LIST_ENTRY_COUNT; i++)
+	{
+		entry = &wifi_network_list[i];
+
+		if (entry->ssid[0] == '\0')
+			break;
+			
+		logmsg("wifi[",i,"] = ",entry->ssid,", channel ",(int)entry->channel,", rssi ",(int)entry->rssi,
+				", bssid ",(uint8_t) entry->bssid[0],":",(uint8_t) entry->bssid[1],":",(uint8_t) entry->bssid[2],":",
+				(uint8_t) entry->bssid[3],":",(uint8_t) entry->bssid[4],":",(uint8_t) entry->bssid[5],", flags ", entry->flags);
+	}
+}
+
+int platform_network_wifi_rssi()
+{
+	int32_t rssi = 0;
+
+    cyw43_ioctl(&cyw43_state, CYW43_IOCTL_GET_RSSI, sizeof(rssi), (uint8_t *)&rssi, CYW43_ITF_STA);
+	return rssi;
+}
+
+char * platform_network_wifi_ssid()
+{
+	struct ssid_t {
+		uint32_t ssid_len;
+		uint8_t ssid[32 + 1];
+	} ssid;
+	static char cur_ssid[32 + 1];
+
+	memset(cur_ssid, 0, sizeof(cur_ssid));
+
+	int ret = cyw43_ioctl(&cyw43_state, CYW43_IOCTL_GET_SSID, sizeof(ssid), (uint8_t *)&ssid, CYW43_ITF_STA);
+	if (ret)
+	{
+		logmsg("Failed getting Wi-Fi SSID: ", ret);
+		return NULL;
+	}
+
+	ssid.ssid[sizeof(ssid.ssid) - 1] = '\0';
+	if (ssid.ssid_len < sizeof(ssid.ssid))
+		ssid.ssid[ssid.ssid_len] = '\0';
+	
+	strlcpy(cur_ssid, (char *)ssid.ssid, sizeof(cur_ssid));
+	return cur_ssid;
+}
+
+char * platform_network_wifi_bssid()
+{
+	static char bssid[6];
+
+	memset(bssid, 0, sizeof(bssid));
+
+	/* TODO */
+
+	return bssid;
+}
+
+int platform_network_wifi_channel()
+{
+	int32_t channel = 0;
+
+    cyw43_ioctl(&cyw43_state, CYW43_IOCTL_GET_CHANNEL, sizeof(channel), (uint8_t *)&channel, CYW43_ITF_STA);
+	return channel;
+}
+
+// these override weakly-defined functions in pico-sdk
+
+void cyw43_cb_process_ethernet(void *cb_data, int itf, size_t len, const uint8_t *buf)
+{
+	scsiNetworkEnqueue(buf, len);
+}
+
+void cyw43_cb_tcpip_set_link_down(cyw43_t *self, int itf)
+{
+	logmsg("Disassociated from Wi-Fi SSID \"",  (char *)self->ap_ssid,"\"");
+}
+
+void cyw43_cb_tcpip_set_link_up(cyw43_t *self, int itf)
+{
+	char *ssid = platform_network_wifi_ssid();
+
+	if (ssid)
+	{
+		logmsg("Successfully connected to Wi-Fi SSID \"",ssid,"\"");
+		// blink LED 3 times when connected
+		PICO_W_LED_OFF();
+		for (uint8_t i = 0; i < 3; i++)
+		{
+			delay(PICO_W_SHORT_BLINK_DELAY);
+			PICO_W_LED_ON();
+			delay(PICO_W_SHORT_BLINK_DELAY);
+			PICO_W_LED_OFF();
+		}
+	}
+}
+
+}
+#endif // ZULUSCSI_NETWORK

+ 45 - 0
lib/ZuluSCSI_platform_RP2350/ZuluSCSI_platform_network.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#ifdef ZULUSCSI_NETWORK
+
+#include <stdint.h>
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool platform_network_supported();
+void platform_network_poll();
+int platform_network_init(char *mac);
+void platform_network_add_multicast_address(uint8_t *mac);
+bool platform_network_wifi_join(char *ssid, char *password);
+int platform_network_wifi_start_scan();
+int platform_network_wifi_scan_finished();
+void platform_network_wifi_dump_scan_list();
+int platform_network_wifi_rssi();
+char * platform_network_wifi_ssid();
+char * platform_network_wifi_bssid();
+int platform_network_wifi_channel();
+int platform_network_send(uint8_t *buf, size_t len);
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif // ZULUSCSI_NETWORK

+ 598 - 0
lib/ZuluSCSI_platform_RP2350/audio.cpp

@@ -0,0 +1,598 @@
+/** 
+ * Copyright (C) 2023 saybur
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#ifdef ENABLE_AUDIO_OUTPUT
+
+#include <SdFat.h>
+#include <stdbool.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/spi.h>
+#include <pico/multicore.h>
+#include "audio.h"
+#include "ZuluSCSI_audio.h"
+#include "ZuluSCSI_config.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_platform.h"
+
+extern SdFs SD;
+
+// Table with the number of '1' bits for each index.
+// Used for SP/DIF parity calculations.
+// Placed in SRAM5 for the second core to use with reduced contention.
+const uint8_t snd_parity[256] __attribute__((aligned(256), section(".scratch_y.snd_parity"))) = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, };
+
+/*
+ * Precomputed biphase-mark patterns for data. For an 8-bit value this has
+ * 16-bits in MSB-first order for the correct high/low transitions to
+ * represent the data, given an output clocking rate twice the bitrate (so the
+ * bits '11' or '00' reflect a zero and '10' or '01' represent a one). Each
+ * value below starts with a '1' and will need to be inverted if the last bit
+ * of the previous mask was also a '1'. These values can be written to an
+ * appropriately configured SPI peripheral to blast biphase data at a
+ * receiver.
+ * 
+ * To facilitate fast lookups this table should be put in SRAM with low
+ * contention, aligned to an apppropriate boundry.
+ */
+const uint16_t biphase[256] __attribute__((aligned(512), section(".scratch_y.biphase"))) = {
+    0xCCCC, 0xB333, 0xD333, 0xACCC, 0xCB33, 0xB4CC, 0xD4CC, 0xAB33,
+    0xCD33, 0xB2CC, 0xD2CC, 0xAD33, 0xCACC, 0xB533, 0xD533, 0xAACC,
+    0xCCB3, 0xB34C, 0xD34C, 0xACB3, 0xCB4C, 0xB4B3, 0xD4B3, 0xAB4C,
+    0xCD4C, 0xB2B3, 0xD2B3, 0xAD4C, 0xCAB3, 0xB54C, 0xD54C, 0xAAB3,
+    0xCCD3, 0xB32C, 0xD32C, 0xACD3, 0xCB2C, 0xB4D3, 0xD4D3, 0xAB2C,
+    0xCD2C, 0xB2D3, 0xD2D3, 0xAD2C, 0xCAD3, 0xB52C, 0xD52C, 0xAAD3,
+    0xCCAC, 0xB353, 0xD353, 0xACAC, 0xCB53, 0xB4AC, 0xD4AC, 0xAB53,
+    0xCD53, 0xB2AC, 0xD2AC, 0xAD53, 0xCAAC, 0xB553, 0xD553, 0xAAAC,
+    0xCCCB, 0xB334, 0xD334, 0xACCB, 0xCB34, 0xB4CB, 0xD4CB, 0xAB34,
+    0xCD34, 0xB2CB, 0xD2CB, 0xAD34, 0xCACB, 0xB534, 0xD534, 0xAACB,
+    0xCCB4, 0xB34B, 0xD34B, 0xACB4, 0xCB4B, 0xB4B4, 0xD4B4, 0xAB4B,
+    0xCD4B, 0xB2B4, 0xD2B4, 0xAD4B, 0xCAB4, 0xB54B, 0xD54B, 0xAAB4,
+    0xCCD4, 0xB32B, 0xD32B, 0xACD4, 0xCB2B, 0xB4D4, 0xD4D4, 0xAB2B,
+    0xCD2B, 0xB2D4, 0xD2D4, 0xAD2B, 0xCAD4, 0xB52B, 0xD52B, 0xAAD4,
+    0xCCAB, 0xB354, 0xD354, 0xACAB, 0xCB54, 0xB4AB, 0xD4AB, 0xAB54,
+    0xCD54, 0xB2AB, 0xD2AB, 0xAD54, 0xCAAB, 0xB554, 0xD554, 0xAAAB,
+    0xCCCD, 0xB332, 0xD332, 0xACCD, 0xCB32, 0xB4CD, 0xD4CD, 0xAB32,
+    0xCD32, 0xB2CD, 0xD2CD, 0xAD32, 0xCACD, 0xB532, 0xD532, 0xAACD,
+    0xCCB2, 0xB34D, 0xD34D, 0xACB2, 0xCB4D, 0xB4B2, 0xD4B2, 0xAB4D,
+    0xCD4D, 0xB2B2, 0xD2B2, 0xAD4D, 0xCAB2, 0xB54D, 0xD54D, 0xAAB2,
+    0xCCD2, 0xB32D, 0xD32D, 0xACD2, 0xCB2D, 0xB4D2, 0xD4D2, 0xAB2D,
+    0xCD2D, 0xB2D2, 0xD2D2, 0xAD2D, 0xCAD2, 0xB52D, 0xD52D, 0xAAD2,
+    0xCCAD, 0xB352, 0xD352, 0xACAD, 0xCB52, 0xB4AD, 0xD4AD, 0xAB52,
+    0xCD52, 0xB2AD, 0xD2AD, 0xAD52, 0xCAAD, 0xB552, 0xD552, 0xAAAD,
+    0xCCCA, 0xB335, 0xD335, 0xACCA, 0xCB35, 0xB4CA, 0xD4CA, 0xAB35,
+    0xCD35, 0xB2CA, 0xD2CA, 0xAD35, 0xCACA, 0xB535, 0xD535, 0xAACA,
+    0xCCB5, 0xB34A, 0xD34A, 0xACB5, 0xCB4A, 0xB4B5, 0xD4B5, 0xAB4A,
+    0xCD4A, 0xB2B5, 0xD2B5, 0xAD4A, 0xCAB5, 0xB54A, 0xD54A, 0xAAB5,
+    0xCCD5, 0xB32A, 0xD32A, 0xACD5, 0xCB2A, 0xB4D5, 0xD4D5, 0xAB2A,
+    0xCD2A, 0xB2D5, 0xD2D5, 0xAD2A, 0xCAD5, 0xB52A, 0xD52A, 0xAAD5,
+    0xCCAA, 0xB355, 0xD355, 0xACAA, 0xCB55, 0xB4AA, 0xD4AA, 0xAB55,
+    0xCD55, 0xB2AA, 0xD2AA, 0xAD55, 0xCAAA, 0xB555, 0xD555, 0xAAAA };
+/*
+ * Biphase frame headers for SP/DIF, including the special bit framing
+ * errors used to detect (sub)frame start conditions. See above table
+ * for details.
+ */
+const uint16_t x_preamble = 0xE2CC;
+const uint16_t y_preamble = 0xE4CC;
+const uint16_t z_preamble = 0xE8CC;
+
+// DMA configuration info
+static dma_channel_config snd_dma_a_cfg;
+static dma_channel_config snd_dma_b_cfg;
+
+// some chonky buffers to store audio samples
+static uint8_t sample_buf_a[AUDIO_BUFFER_SIZE];
+static uint8_t sample_buf_b[AUDIO_BUFFER_SIZE];
+
+// tracking for the state of the above buffers
+enum bufstate { STALE, FILLING, READY };
+static volatile bufstate sbufst_a = STALE;
+static volatile bufstate sbufst_b = STALE;
+enum bufselect { A, B };
+static bufselect sbufsel = A;
+static uint16_t sbufpos = 0;
+static uint8_t sbufswap = 0;
+
+// buffers for storing biphase patterns
+#define SAMPLE_CHUNK_SIZE 1024 // ~5.8ms
+#define WIRE_BUFFER_SIZE (SAMPLE_CHUNK_SIZE * 2)
+static uint16_t wire_buf_a[WIRE_BUFFER_SIZE];
+static uint16_t wire_buf_b[WIRE_BUFFER_SIZE];
+
+// tracking for audio playback
+static uint8_t audio_owner; // SCSI ID or 0xFF when idle
+static volatile bool audio_paused = false;
+static ImageBackingStore* audio_file;
+static uint64_t fpos;
+static uint32_t fleft;
+
+// historical playback status information
+static audio_status_code audio_last_status[8] = {ASC_NO_STATUS, ASC_NO_STATUS, ASC_NO_STATUS, ASC_NO_STATUS,
+                                                 ASC_NO_STATUS, ASC_NO_STATUS, ASC_NO_STATUS, ASC_NO_STATUS};
+// volume information for targets
+static volatile uint16_t volumes[8] = {
+    DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH,
+    DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH, DEFAULT_VOLUME_LEVEL_2CH
+};
+static volatile uint16_t channels[8] = {
+    AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK,
+    AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK, AUDIO_CHANNEL_ENABLE_MASK
+};
+
+// mechanism for cleanly stopping DMA units
+static volatile bool audio_stopping = false;
+
+// trackers for the below function call
+static uint16_t sfcnt = 0; // sub-frame count; 2 per frame, 192 frames/block
+static uint8_t invert = 0; // biphase encode help: set if last wire bit was '1'
+
+/*
+ * Translates 16-bit stereo sound samples to biphase wire patterns for the
+ * SPI peripheral. Produces 8 patterns (128 bits, or 1 SP/DIF frame) per pair
+ * of input samples. Provided length is the total number of sample bytes present,
+ * _twice_ the number of samples (little-endian order assumed)
+ * 
+ * This function operates with side-effects and is not safe to call from both
+ * cores. It must also be called in the same order data is intended to be
+ * output.
+ */
+static void snd_encode(uint8_t* samples, uint16_t* wire_patterns, uint16_t len, uint8_t swap) {
+    uint16_t wvol = volumes[audio_owner & 7];
+    uint8_t lvol = ((wvol >> 8) + (wvol & 0xFF)) >> 1; // average of both values
+    // limit maximum volume; with my DACs I've had persistent issues
+    // with signal clipping when sending data in the highest bit position
+    lvol = lvol >> 2;
+    uint8_t rvol = lvol;
+    // enable or disable based on the channel information for both output
+    // ports, where the high byte and mask control the right channel, and
+    // the low control the left channel
+    uint16_t chn = channels[audio_owner & 7] & AUDIO_CHANNEL_ENABLE_MASK;
+    if (!(chn >> 8)) rvol = 0;
+    if (!(chn & 0xFF)) lvol = 0;
+
+    uint16_t widx = 0;
+    for (uint16_t i = 0; i < len; i += 2) {
+        uint32_t sample = 0;
+        uint8_t parity = 0;
+        if (samples != NULL) {
+            int32_t rsamp;
+            if (swap) {
+                rsamp = (int16_t)(samples[i + 1] + (samples[i] << 8));
+            } else {
+                rsamp = (int16_t)(samples[i] + (samples[i + 1] << 8));
+            }
+            // linear scale to requested audio value
+            if (i & 2) {
+                rsamp *= rvol;
+            } else {
+                rsamp *= lvol;
+            }
+            // use 20 bits of value only, which allows ignoring the lowest 8
+            // bits during biphase conversion (after including sample shift)
+            sample = ((uint32_t)rsamp) & 0xFFFFF0;
+
+            // determine parity, simplified to one lookup via XOR
+            parity = ((sample >> 16) ^ (sample >> 8)) ^ sample;
+            parity = snd_parity[parity];
+
+            // shift sample into the correct bit positions of the sub-frame.
+            sample = sample << 4;
+        }
+
+        // if needed, establish even parity with P bit
+        if (parity % 2) sample |= 0x80000000;
+
+        // translate sample into biphase encoding
+        // first is low 8 bits: preamble and 4 least-significant bits of 
+        // 24-bit audio, pre-encoded as all '0' due to 16-bit samples
+        uint16_t wp;
+        if (sfcnt == 0) {
+            wp = z_preamble; // left channel, block start
+        } else if (sfcnt % 2) {
+            wp = y_preamble; // right channel
+        } else {
+            wp = x_preamble; // left channel, not block start
+        }
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // next 8 bits
+        wp = biphase[(uint8_t) (sample >> 8)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // next 8 again, all audio data
+        wp = biphase[(uint8_t) (sample >> 16)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // final 8, low 4 audio data and high 4 control bits
+        wp = biphase[(uint8_t) (sample >> 24)];
+        if (invert) wp = ~wp;
+        invert = wp & 1;
+        wire_patterns[widx++] = wp;
+        // increment subframe counter for next pass
+        sfcnt++;
+        if (sfcnt == 384) sfcnt = 0; // if true, block complete
+    }
+}
+
+// functions for passing to Core1
+static void snd_process_a() {
+    if (sbufsel == A) {
+        if (sbufst_a == READY) {
+            snd_encode(sample_buf_a + sbufpos, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = B;
+                sbufpos = 0;
+                sbufst_a = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    } else {
+        if (sbufst_b == READY) {
+            snd_encode(sample_buf_b + sbufpos, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = A;
+                sbufpos = 0;
+                sbufst_b = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_a, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    }
+}
+static void snd_process_b() {
+    // clone of above for the other wire buffer
+    if (sbufsel == A) {
+        if (sbufst_a == READY) {
+            snd_encode(sample_buf_a + sbufpos, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = B;
+                sbufpos = 0;
+                sbufst_a = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    } else {
+        if (sbufst_b == READY) {
+            snd_encode(sample_buf_b + sbufpos, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+            sbufpos += SAMPLE_CHUNK_SIZE;
+            if (sbufpos >= AUDIO_BUFFER_SIZE) {
+                sbufsel = A;
+                sbufpos = 0;
+                sbufst_b = STALE;
+            }
+        } else {
+            snd_encode(NULL, wire_buf_b, SAMPLE_CHUNK_SIZE, sbufswap);
+        }
+    }
+}
+
+// Allows execution on Core1 via function pointers. Each function can take
+// no parameters and should return nothing, operating via side-effects only.
+static void core1_handler() {
+    while (1) {
+        void (*function)() = (void (*)()) multicore_fifo_pop_blocking();
+        (*function)();
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* ---------- VISIBLE FUNCTIONS ------------------------------------------- */
+/* ------------------------------------------------------------------------ */
+
+void audio_dma_irq() {
+    if (dma_hw->intr & (1 << SOUND_DMA_CHA)) {
+        dma_hw->ints0 = (1 << SOUND_DMA_CHA);
+        multicore_fifo_push_blocking((uintptr_t) &snd_process_a);
+        if (audio_stopping) {
+            channel_config_set_chain_to(&snd_dma_a_cfg, SOUND_DMA_CHA);
+        }
+        dma_channel_configure(SOUND_DMA_CHA,
+                &snd_dma_a_cfg,
+                &(spi_get_hw(AUDIO_SPI)->dr),
+                &wire_buf_a,
+                WIRE_BUFFER_SIZE,
+                false);
+    } else if (dma_hw->intr & (1 << SOUND_DMA_CHB)) {
+        dma_hw->ints0 = (1 << SOUND_DMA_CHB);
+        multicore_fifo_push_blocking((uintptr_t) &snd_process_b);
+        if (audio_stopping) {
+            channel_config_set_chain_to(&snd_dma_b_cfg, SOUND_DMA_CHB);
+        }
+        dma_channel_configure(SOUND_DMA_CHB,
+                &snd_dma_b_cfg,
+                &(spi_get_hw(AUDIO_SPI)->dr),
+                &wire_buf_b,
+                WIRE_BUFFER_SIZE,
+                false);
+    }
+}
+
+bool audio_is_active() {
+    return audio_owner != 0xFF;
+}
+
+bool audio_is_playing(uint8_t id) {
+    return audio_owner == (id & 7);
+}
+
+void audio_setup() {
+    // setup SPI to blast SP/DIF data over the TX pin
+    spi_set_baudrate(AUDIO_SPI, 5644800); // will be slightly wrong, ~0.03% slow
+    hw_write_masked(&spi_get_hw(AUDIO_SPI)->cr0,
+            0x1F, // TI mode with 16 bits
+            SPI_SSPCR0_DSS_BITS | SPI_SSPCR0_FRF_BITS);
+    spi_get_hw(AUDIO_SPI)->dmacr = SPI_SSPDMACR_TXDMAE_BITS;
+    hw_set_bits(&spi_get_hw(AUDIO_SPI)->cr1, SPI_SSPCR1_SSE_BITS);
+
+    dma_channel_claim(SOUND_DMA_CHA);
+	dma_channel_claim(SOUND_DMA_CHB);
+
+    logmsg("Starting Core1 for audio");
+    multicore_launch_core1(core1_handler);
+}
+
+void audio_poll() {
+    if (!audio_is_active()) return;
+    if (audio_paused) return;
+    if (fleft == 0 && sbufst_a == STALE && sbufst_b == STALE) {
+        // out of data and ready to stop
+        audio_stop(audio_owner);
+        return;
+    } else if (fleft == 0) {
+        // out of data to read but still working on remainder
+        return;
+    } else if (!audio_file->isOpen()) {
+        // closed elsewhere, maybe disk ejected?
+        dbgmsg("------ Playback stop due to closed file");
+        audio_stop(audio_owner);
+        return;
+    }
+
+    // are new audio samples needed from the memory card?
+    uint8_t* audiobuf;
+    if (sbufst_a == STALE) {
+        sbufst_a = FILLING;
+        audiobuf = sample_buf_a;
+    } else if (sbufst_b == STALE) {
+        sbufst_b = FILLING;
+        audiobuf = sample_buf_b;
+    } else {
+        // no data needed this time
+        return;
+    }
+
+    platform_set_sd_callback(NULL, NULL);
+    uint16_t toRead = AUDIO_BUFFER_SIZE;
+    if (fleft < toRead) toRead = fleft;
+    if (audio_file->position() != fpos) {
+        // should be uncommon due to SCSI command restrictions on devices
+        // playing audio; if this is showing up in logs a different approach
+        // will be needed to avoid seek performance issues on FAT32 vols
+        dbgmsg("------ Audio seek required on ", audio_owner);
+        if (!audio_file->seek(fpos)) {
+            logmsg("Audio error, unable to seek to ", fpos, ", ID:", audio_owner);
+        }
+    }
+    if (audio_file->read(audiobuf, toRead) != toRead) {
+        logmsg("Audio sample data underrun");
+    }
+    fpos += toRead;
+    fleft -= toRead;
+
+    if (sbufst_a == FILLING) {
+        sbufst_a = READY;
+    } else if (sbufst_b == FILLING) {
+        sbufst_b = READY;
+    }
+}
+
+bool audio_play(uint8_t owner, ImageBackingStore* img, uint64_t start, uint64_t end, bool swap) {
+    // stop any existing playback first
+    if (audio_is_active()) audio_stop(audio_owner);
+
+    // dbgmsg("Request to play ('", file, "':", start, ":", end, ")");
+
+    // verify audio file is present and inputs are (somewhat) sane
+    if (owner == 0xFF) {
+        logmsg("Illegal audio owner");
+        return false;
+    }
+    if (start >= end) {
+        logmsg("Invalid range for audio (", start, ":", end, ")");
+        return false;
+    }
+    platform_set_sd_callback(NULL, NULL);
+    audio_file = img;
+    if (!audio_file->isOpen()) {
+        logmsg("File not open for audio playback, ", owner);
+        return false;
+    }
+    uint64_t len = audio_file->size();
+    if (start > len) {
+        logmsg("File playback request start (", start, ":", len, ") outside file bounds");
+        return false;
+    }
+    // truncate playback end to end of file
+    // we will not consider this to be an error at the moment
+    if (end > len) {
+        dbgmsg("------ Truncate audio play request end ", end, " to file size ", len);
+        end = len;
+    }
+    fleft = end - start;
+    if (fleft <= 2 * AUDIO_BUFFER_SIZE) {
+        logmsg("File playback request (", start, ":", end, ") too short");
+        return false;
+    }
+
+    // read in initial sample buffers
+    if (!audio_file->seek(start)) {
+        logmsg("Sample file failed start seek to ", start);
+        return false;
+    }
+    if (audio_file->read(sample_buf_a, AUDIO_BUFFER_SIZE) != AUDIO_BUFFER_SIZE) {
+        logmsg("File playback start returned fewer bytes than allowed");
+        return false;
+    }
+    if (audio_file->read(sample_buf_b, AUDIO_BUFFER_SIZE) != AUDIO_BUFFER_SIZE) {
+        logmsg("File playback start returned fewer bytes than allowed");
+        return false;
+    }
+
+    // prepare initial tracking state
+    fpos = audio_file->position();
+    fleft -= AUDIO_BUFFER_SIZE * 2;
+    sbufsel = A;
+    sbufpos = 0;
+    sbufswap = swap;
+    sbufst_a = READY;
+    sbufst_b = READY;
+    audio_owner = owner & 7;
+    audio_last_status[audio_owner] = ASC_PLAYING;
+    audio_paused = false;
+
+    // prepare the wire buffers
+    for (uint16_t i = 0; i < WIRE_BUFFER_SIZE; i++) {
+        wire_buf_a[i] = 0;
+        wire_buf_b[i] = 0;
+    }
+    sfcnt = 0;
+    invert = 0;
+
+    // setup the two DMA units to hand-off to each other
+    // to maintain a stable bitstream these need to run without interruption
+	snd_dma_a_cfg = dma_channel_get_default_config(SOUND_DMA_CHA);
+	channel_config_set_transfer_data_size(&snd_dma_a_cfg, DMA_SIZE_16);
+	channel_config_set_dreq(&snd_dma_a_cfg, spi_get_dreq(AUDIO_SPI, true));
+	channel_config_set_read_increment(&snd_dma_a_cfg, true);
+	channel_config_set_chain_to(&snd_dma_a_cfg, SOUND_DMA_CHB);
+    // version of pico-sdk lacks channel_config_set_high_priority()
+    snd_dma_a_cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS;
+	dma_channel_configure(SOUND_DMA_CHA, &snd_dma_a_cfg, &(spi_get_hw(AUDIO_SPI)->dr),
+			&wire_buf_a, WIRE_BUFFER_SIZE, false);
+    dma_channel_set_irq0_enabled(SOUND_DMA_CHA, true);
+	snd_dma_b_cfg = dma_channel_get_default_config(SOUND_DMA_CHB);
+	channel_config_set_transfer_data_size(&snd_dma_b_cfg, DMA_SIZE_16);
+	channel_config_set_dreq(&snd_dma_b_cfg, spi_get_dreq(AUDIO_SPI, true));
+	channel_config_set_read_increment(&snd_dma_b_cfg, true);
+	channel_config_set_chain_to(&snd_dma_b_cfg, SOUND_DMA_CHA);
+    snd_dma_b_cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS;
+	dma_channel_configure(SOUND_DMA_CHB, &snd_dma_b_cfg, &(spi_get_hw(AUDIO_SPI)->dr),
+			&wire_buf_b, WIRE_BUFFER_SIZE, false);
+    dma_channel_set_irq0_enabled(SOUND_DMA_CHB, true);
+
+    // ready to go
+    dma_channel_start(SOUND_DMA_CHA);
+    return true;
+}
+
+bool audio_set_paused(uint8_t id, bool paused) {
+    if (audio_owner != (id & 7)) return false;
+    else if (audio_paused && paused) return false;
+    else if (!audio_paused && !paused) return false;
+
+    audio_paused = paused;
+    if (paused) {
+        audio_last_status[audio_owner] = ASC_PAUSED;
+    } else {
+        audio_last_status[audio_owner] = ASC_PLAYING;
+    }
+    return true;
+}
+
+void audio_stop(uint8_t id) {
+    if (audio_owner != (id & 7)) return;
+
+    // to help mute external hardware, send a bunch of '0' samples prior to
+    // halting the datastream; easiest way to do this is invalidating the
+    // sample buffers, same as if there was a sample data underrun
+    sbufst_a = STALE;
+    sbufst_b = STALE;
+
+    // then indicate that the streams should no longer chain to one another
+    // and wait for them to shut down naturally
+    audio_stopping = true;
+    while (dma_channel_is_busy(SOUND_DMA_CHA)) tight_loop_contents();
+    while (dma_channel_is_busy(SOUND_DMA_CHB)) tight_loop_contents();
+    while (spi_is_busy(AUDIO_SPI)) tight_loop_contents();
+    audio_stopping = false;
+
+    // idle the subsystem
+    audio_last_status[audio_owner] = ASC_COMPLETED;
+    audio_paused = false;
+    audio_owner = 0xFF;
+}
+
+audio_status_code audio_get_status_code(uint8_t id) {
+    audio_status_code tmp = audio_last_status[id & 7];
+    if (tmp == ASC_COMPLETED || tmp == ASC_ERRORED) {
+        audio_last_status[id & 7] = ASC_NO_STATUS;
+    }
+    return tmp;
+}
+
+uint16_t audio_get_volume(uint8_t id) {
+    return volumes[id & 7];
+}
+
+void audio_set_volume(uint8_t id, uint16_t vol) {
+    volumes[id & 7] = vol;
+}
+
+uint16_t audio_get_channel(uint8_t id) {
+    return channels[id & 7];
+}
+
+void audio_set_channel(uint8_t id, uint16_t chn) {
+    channels[id & 7] = chn;
+}
+
+uint64_t audio_get_file_position()
+{
+    return fpos;
+}
+
+void audio_set_file_position(uint32_t lba)
+{
+    fpos = 2352 * (uint64_t)lba;
+
+}
+#endif // ENABLE_AUDIO_OUTPUT

+ 63 - 0
lib/ZuluSCSI_platform_RP2350/audio.h

@@ -0,0 +1,63 @@
+/** 
+ * Copyright (C) 2023 saybur
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#pragma once
+#ifdef ENABLE_AUDIO_OUTPUT
+
+#include <stdint.h>
+
+// audio subsystem DMA channels
+#define SOUND_DMA_CHA 6
+#define SOUND_DMA_CHB 7
+
+// size of the two audio sample buffers, in bytes
+// these must be divisible by 1024
+// #define AUDIO_BUFFER_SIZE 8192 // ~46.44ms
+#define AUDIO_BUFFER_SIZE 4096 // reduce memory usage
+
+/**
+ * Handler for DMA interrupts
+ *
+ * This is called from scsi_dma_irq() in scsi_accel_rp2040.cpp. That is
+ * obviously a silly way to handle things. However, using
+ * irq_add_shared_handler() causes a lockup, likely due to pico-sdk issue #724
+ * fixed in 1.3.1. Current builds use pico-sdk 1.3.0 and are affected by
+ * the bug. To work around the problem the above exclusive handler
+ * delegates to this function if its normal mask is not matched.
+ */
+void audio_dma_irq();
+
+/**
+ * Indicates if the audio subsystem is actively streaming, including if it is
+ * sending silent data during sample stall events.
+ *
+ * \return true if audio streaming is active, false otherwise.
+ */
+bool audio_is_active();
+
+/**
+ * Initializes the audio subsystem. Should be called only once, toward the end
+ * of platform_late_init().
+ */
+void audio_setup();
+
+/**
+ * Called from platform_poll() to fill sample buffer(s) if needed.
+ */
+void audio_poll();
+
+#endif // ENABLE_AUDIO_OUTPUT

+ 28 - 0
lib/ZuluSCSI_platform_RP2350/bsp.h

@@ -0,0 +1,28 @@
+/** 
+ * SCSI2SD V6 - Copyright (C) 2016 Michael McMaster <michael@codesrc.com>
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * This file is licensed under the GPL version 3 or any later version.  
+ * It is derived from bsp.h in SCSI2SD V6.
+ *  
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Dummy file for SCSI2SD.
+
+#pragma once
+
+#define S2S_DMA_ALIGN

+ 41 - 0
lib/ZuluSCSI_platform_RP2350/process-linker-script.py

@@ -0,0 +1,41 @@
+# ZuluSCSI™ - Copyright (c) 2024 Rabbit Hole Computing™
+#
+# ZuluSCSI™ file is licensed under the GPL version 3 or any later version. 
+#
+# https://www.gnu.org/licenses/gpl-3.0.html
+# ----
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version. 
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details. 
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from string import Template 
+Import ("env")
+
+template_file = 'lib/ZuluSCSI_platform_RP2350/rp2350-template.ld'
+linker_file = env.subst('$BUILD_DIR') + '/rp2350.ld'
+
+def process_template(source, target, env):
+    values = {
+        'program_size': env.GetProjectOption('program_flash_allocation'),
+        'project_name': env.subst('$PIOENV')
+        }
+    with open(template_file, 'r') as t:
+        src = Template(t.read())
+        result = src.substitute(values)
+
+    with open(linker_file, 'w') as linker_script:
+        linker_script.write(result)
+
+env.AddPreAction("${BUILD_DIR}/${PROGNAME}.elf",
+        env.VerboseAction(process_template, 
+        'Generating linker script: "' + linker_file + '" from : "' + template_file + '"'
+        )
+)

+ 145 - 0
lib/ZuluSCSI_platform_RP2350/program_flash.cpp

@@ -0,0 +1,145 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+/**
+ * This has been removed from ZuluSCSI_platform.cpp so this code can be
+ * placed in SRAM while ZuluSCSI_platform.cpp.o can be placed in flash
+*/
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include <hardware/flash.h>
+#include <hardware/structs/xip_ctrl.h>
+#include <hardware/structs/usb.h>
+#include <hardware/structs/nvic.h>
+#include <hardware/structs/scb.h>
+#include <hardware/sync.h>
+
+#ifndef PIO_FRAMEWORK_ARDUINO_NO_USB
+#include <SerialUSB.h>
+#include <class/cdc/cdc_device.h>
+#endif
+
+
+/*****************************************/
+/* Flash reprogramming from bootloader   */
+/*****************************************/
+
+#ifdef PLATFORM_BOOTLOADER_SIZE
+
+extern uint32_t __real_vectors_start;
+extern uint32_t __StackTop;
+static volatile void *g_bootloader_exit_req;
+
+__attribute__((section(".time_critical.platform_rewrite_flash_page")))
+bool platform_rewrite_flash_page(uint32_t offset, uint8_t buffer[PLATFORM_FLASH_PAGE_SIZE])
+{
+    if (offset == PLATFORM_BOOTLOADER_SIZE)
+    {
+        if (buffer[3] != 0x20 || buffer[7] != 0x10)
+        {
+            logmsg("Invalid firmware file, starts with: ", bytearray(buffer, 16));
+            return false;
+        }
+    }
+
+
+//    if (NVIC_GetEnableIRQ(USBCTRL_IRQ_IRQn))
+    if (nvic_hw->iser[0] & 1 << 14)
+    {
+        logmsg("Disabling USB during firmware flashing");
+        //NVIC_DisableIRQ(USBCTRL_IRQ_IRQn);
+        nvic_hw->icer[0] = 1 << 14;
+        usb_hw->main_ctrl = 0;
+    }
+
+    dbgmsg("Writing flash at offset ", offset, " data ", bytearray(buffer, 4));
+    assert(offset % PLATFORM_FLASH_PAGE_SIZE == 0);
+    assert(offset >= PLATFORM_BOOTLOADER_SIZE);
+
+    // Avoid any mbed timer interrupts triggering during the flashing.
+    uint32_t saved_irq = save_and_disable_interrupts();
+
+    // For some reason any code executed after flashing crashes
+    // unless we disable the XIP cache.
+    // Not sure why this happens, as flash_range_program() is flushing
+    // the cache correctly.
+    // The cache is now enabled from bootloader start until it starts
+    // flashing, and again after reset to main firmware.
+    xip_ctrl_hw->ctrl = 0;
+
+    flash_range_erase(offset, PLATFORM_FLASH_PAGE_SIZE);
+    flash_range_program(offset, buffer, PLATFORM_FLASH_PAGE_SIZE);
+
+    uint32_t *buf32 = (uint32_t*)buffer;
+    uint32_t num_words = PLATFORM_FLASH_PAGE_SIZE / 4;
+    for (int i = 0; i < num_words; i++)
+    {
+        uint32_t expected = buf32[i];
+        uint32_t actual = *(volatile uint32_t*)(XIP_NOCACHE_NOALLOC_BASE + offset + i * 4);
+
+        if (actual != expected)
+        {
+            logmsg("Flash verify failed at offset ", offset + i * 4, " got ", actual, " expected ", expected);
+            restore_interrupts(saved_irq);
+            return false;
+        }
+    }
+
+    restore_interrupts(saved_irq);
+
+    return true;
+}
+
+
+void platform_boot_to_main_firmware()
+{
+    // To ensure that the system state is reset properly, we perform
+    // a SYSRESETREQ and jump straight from the reset vector to main application.
+    g_bootloader_exit_req = &g_bootloader_exit_req;
+    // SCB->AIRCR = 0x05FA0004;
+    scb_hw->aircr = 0x05FA0004;
+    while(1);
+}
+
+void btldr_reset_handler()
+{
+    uint32_t* application_base = &__real_vectors_start;
+    if (g_bootloader_exit_req == &g_bootloader_exit_req)
+    {
+        // Boot to main application
+        application_base = (uint32_t*)(XIP_BASE + PLATFORM_BOOTLOADER_SIZE);
+    }
+
+    // SCB->VTOR = (uint32_t)application_base;
+    scb_hw->vtor = (uint32_t)application_base;
+    __asm__(
+        "msr msp, %0\n\t"
+        "bx %1" : : "r" (application_base[0]),
+                    "r" (application_base[1]) : "memory");
+}
+
+// Replace the reset handler when building the bootloader
+// The rp2040_btldr.ld places real vector table at an offset.
+__attribute__((section(".btldr_vectors")))
+const void * btldr_vectors[2] = {&__StackTop, (void*)&btldr_reset_handler};
+
+#endif // PLATFORM_BOOTLOADER_SIZE

+ 222 - 0
lib/ZuluSCSI_platform_RP2350/rp2040-template.ld

@@ -0,0 +1,222 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+MEMORY
+{
+    FLASH(rx) : ORIGIN = 0x10000000, LENGTH = $program_size
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 256k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k
+}
+ENTRY(_entry_point)
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+    .boot2 : {
+        __boot2_start__ = .;
+        KEEP (*(.boot2))
+        __boot2_end__ = .;
+    } > FLASH
+    ASSERT(__boot2_end__ - __boot2_start__ == 256,
+        "ERROR: Pico second stage bootloader must be 256 bytes in size")
+
+    /* If ZuluSCSI SD card bootloader is included, it goes in first 128 kB */
+    .text.bootloader : ALIGN(16) SUBALIGN(16)
+    {
+        KEEP(*(.text.btldr*))
+        . = ALIGN(131072);
+        CHECK_BOOTLOADER_SIZE = 1 / (. <= 131072);
+    } > FLASH
+
+    .text : {
+        __logical_binary_start = .;
+        __real_vectors_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        KEEP (*(.reset))
+        KEEP (*(.init))
+        *(.fini)
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+        *(.eh_frame*)
+        . = ALIGN(4);
+
+        /* Put only non-timecritical code in flash
+         * This includes e.g. floating point math routines.
+         */
+        .pio/build/$project_name/src/ZuluSCSI_log.cpp.o(.text .text*)
+        .pio/build/$project_name/src/ZuluSCSI_log_trace.cpp.o(.text .text*)
+        .pio/build/$project_name/src/ZuluSCSI_settings.cpp.o(.text .text*)
+        .pio/build/$project_name/src/QuirksCheck.cpp.o(.text .text*)
+        *libZuluSCSI_platform_RP2040.a:ZuluSCSI_platform.cpp.o(.text .text*)
+        *libm*:(.text .text*)
+        *libc*:(.text .text*)
+        *libgcc*:*df*(.text .text*)
+        *USB*(.text .text*)
+        *SPI*(.text .text*)
+        *Spi*(.text .text*)
+        *spi*(.text .text*)
+        *stdc*:(.text .text*)
+        *supc*:(.text .text*)
+        *nosys*:(.text .text*)
+        *libc*:*printf*(.text .text*)
+        *libc*:*toa*(.text .text*)
+        *libminIni.a:(.text .text*)
+        *libCUEParser.a:(.text .text*)
+
+        /* RP2040 breakpoints in RAM code don't always work very well
+         * because the boot routine tends to overwrite them.
+         * Uncommenting this line puts all code in flash.
+         */
+        /* *(.text .text*) */
+    } > FLASH
+    .rodata : {
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        *(.rodata)
+        *(.rodata*)
+        . = ALIGN(4);
+    } > FLASH
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+    __etext = .;
+   .ram_vector_table (COPY): {
+        *(.ram_vector_table)
+    } > RAM
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        /* Time critical code will go here to avoid external flash latency */
+        *(.time_critical*)
+        . = ALIGN(4);
+        *(.text)
+        *(.text*)
+        . = ALIGN(4);
+        *(.data*)
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+        *(.jcr)
+        . = ALIGN(4);
+        __data_end__ = .;
+    } > RAM AT> FLASH
+    .uninitialized_data (COPY): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+    .bss : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+    .heap (COPY):
+    {
+        __end__ = .;
+        PROVIDE(end = .);
+        *(.heap*)
+        . = ORIGIN(RAM) + LENGTH(RAM) - 0x400;
+        __HeapLimit = .;
+    } > RAM
+    .stack1_dummy (COPY):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (COPY):
+    {
+        *(.stack*)
+    } > RAM
+    .flash_end : {
+        __flash_binary_end = .;
+    } > FLASH
+    __StackTop = ORIGIN(RAM) + LENGTH(RAM);
+    __StackLimit = __StackTop - 0x400;
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary")
+}

+ 352 - 0
lib/ZuluSCSI_platform_RP2350/rp2350-template.ld

@@ -0,0 +1,352 @@
+/* Based on GCC ARM embedded samples.
+   Defines the following symbols for use by code:
+    __exidx_start
+    __exidx_end
+    __etext
+    __data_start__
+    __preinit_array_start
+    __preinit_array_end
+    __init_array_start
+    __init_array_end
+    __fini_array_start
+    __fini_array_end
+    __data_end__
+    __bss_start__
+    __bss_end__
+    __end__
+    end
+    __HeapLimit
+    __StackLimit
+    __StackTop
+    __stack (== StackTop)
+*/
+
+MEMORY
+{
+    FLASH(rx) : ORIGIN = 0x10000000, LENGTH = $program_size
+    PSRAM(rwx) : ORIGIN = 0x11000000, LENGTH = 0
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 256k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k
+}
+
+PROVIDE ( _EEPROM_start = __EEPROM_START__ );
+PROVIDE ( _FS_start     = __FS_START__ );
+PROVIDE ( _FS_end       = __FS_END__ );
+
+ENTRY(_entry_point)
+
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+
+    /* The bootrom will enter the image at the point indicated in your
+       IMAGE_DEF, which is usually the reset handler of your vector table.
+
+       The debugger will use the ELF entry point, which is the _entry_point
+       symbol, and in our case is *different from the bootrom's entry point.*
+       This is used to go back through the bootrom on debugger launches only,
+       to perform the same initial flash setup that would be performed on a
+       cold boot.
+    */
+
+    /* If ZuluSCSI SD card bootloader is included, it goes in first 128 kB */
+    .text.bootloader : ALIGN(16) SUBALIGN(16)
+    {
+        KEEP(*(.text.btldr*))
+        . = ALIGN(131072);
+        CHECK_BOOTLOADER_SIZE = 1 / (. <= 131072);
+    } > FLASH
+
+    .text : {
+        __logical_binary_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        KEEP (*(.embedded_block))
+        __embedded_block_end = .;
+        KEEP (*(.reset))
+        /* TODO revisit this now memset/memcpy/float in ROM */
+        /* bit of a hack right now to exclude all floating point and time critical (e.g. memset, memcpy) code from
+         * FLASH ... we will include any thing excluded here in .data below by default */
+        *(.init)
+        *libgcc.a:cmse_nonsecure_call.o
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .text*)
+        *(.fini)
+        /* Pull all c'tors into .text */
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        /* Followed by destructors */
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+
+        . = ALIGN(4);
+        /* init data */
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+
+        . = ALIGN(4);
+        /* finit data */
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+
+        *(.eh_frame*)
+        . = ALIGN(4);
+
+               /* Put only non-timecritical code in flash
+         * This includes e.g. floating point math routines.
+         */
+        .pio/build/$project_name/src/ZuluSCSI_log.cpp.o(.text .text*)
+        .pio/build/$project_name/src/ZuluSCSI_log_trace.cpp.o(.text .text*)
+        .pio/build/$project_name/src/ZuluSCSI_settings.cpp.o(.text .text*)
+        .pio/build/$project_name/src/QuirksCheck.cpp.o(.text .text*)
+        *libZuluSCSI_platform_RP2350.a:ZuluSCSI_platform.cpp.o(.text .text*)
+        *libm*:(.text .text*)
+        *libc*:(.text .text*)
+        *libgcc*:*df*(.text .text*)
+        *USB*(.text .text*)
+        *SPI*(.text .text*)
+        *Spi*(.text .text*)
+        *spi*(.text .text*)
+        *stdc*:(.text .text*)
+        *supc*:(.text .text*)
+        *nosys*:(.text .text*)
+        *libc*:*printf*(.text .text*)
+        *libc*:*toa*(.text .text*)
+        *libminIni.a:(.text .text*)
+        *libCUEParser.a:(.text .text*)
+    } > FLASH
+
+    /* Note the boot2 section is optional, and should be discarded if there is
+       no reference to it *inside* the binary, as it is not called by the
+       bootrom. (The bootrom performs a simple best-effort XIP setup and
+       leaves it to the binary to do anything more sophisticated.) However
+       there is still a size limit of 256 bytes, to ensure the boot2 can be
+       stored in boot RAM.
+
+       Really this is a "XIP setup function" -- the name boot2 is historic and
+       refers to its dual-purpose on RP2040, where it also handled vectoring
+       from the bootrom into the user image.
+    */
+
+    .boot2 : {
+        __boot2_start__ = .;
+        *(.boot2)
+        __boot2_end__ = .;
+    } > FLASH
+
+    ASSERT(__boot2_end__ - __boot2_start__ <= 256,
+        "ERROR: Pico second stage bootloader must be no more than 256 bytes in size")
+
+    .rodata : {
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .rodata*)
+        *(.srodata*)
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        . = ALIGN(4);
+    } > FLASH
+
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+
+    /* Machine inspectable binary information */
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+
+    .ram_vector_table (NOLOAD): {
+        *(.ram_vector_table)
+    } > RAM
+
+    .uninitialized_data (NOLOAD): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        *(.time_critical*)
+
+        /* remaining .text and .rodata; i.e. stuff we exclude above because we want it in RAM */
+        *(.text)
+        *(.text*)
+        . = ALIGN(4);
+        *(.rodata*)
+        . = ALIGN(4);
+
+        *(.data*)
+        *(.sdata*)
+
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+
+        *(.jcr)
+        . = ALIGN(4);
+    } > RAM AT> FLASH
+
+    .tdata : {
+        . = ALIGN(4);
+		*(.tdata .tdata.* .gnu.linkonce.td.*)
+        /* All data end */
+        __tdata_end = .;
+    } > RAM AT> FLASH
+    PROVIDE(__data_end__ = .);
+
+    /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */
+    __etext = LOADADDR(.data);
+
+    .tbss (NOLOAD) : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        __tls_base = .;
+        *(.tbss .tbss.* .gnu.linkonce.tb.*)
+        *(.tcommon)
+
+        __tls_end = .;
+    } > RAM
+
+    .bss (NOLOAD) : {
+        . = ALIGN(4);
+        __tbss_end = .;
+
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        /* Python template escaping dollar sign with two dollar signs */
+        PROVIDE(__global_pointer$$ = . + 2K);
+        *(.sbss*)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+
+    .heap (NOLOAD):
+    {
+        __end__ = .;
+        end = __end__;
+        KEEP(*(.heap*))
+        /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however
+           to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */
+        . = ORIGIN(RAM) + LENGTH(RAM);
+        __HeapLimit = .;
+    } > RAM
+
+    /* Start and end symbols must be word-aligned */
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+
+    /* .stack*_dummy section doesn't contains any symbols. It is only
+     * used for linker to calculate size of stack sections, and assign
+     * values to stack symbols later
+     *
+     * stack1 section may be empty/missing if platform_launch_core1 is not used */
+
+    /* by default we put core 0 stack at the end of scratch Y, so that if core 1
+     * stack is not used then all of SCRATCH_X is free.
+     */
+    .stack1_dummy (NOLOAD):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (NOLOAD):
+    {
+        KEEP(*(.stack*))
+    } > SCRATCH_Y
+
+    .flash_end : {
+        KEEP(*(.embedded_end_block*))
+        PROVIDE(__flash_binary_end = .);
+    } > FLASH =0xaa
+
+    .psram (NOLOAD) : {
+        __psram_start__ = .;
+        *(.psram*)
+        . = ALIGN(4096);
+        __psram_heap_start__ = .;
+    } > PSRAM
+
+    /* stack limit is poorly named, but historically is maximum heap ptr */
+    __StackLimit = ORIGIN(RAM) + LENGTH(RAM);
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+
+    /* picolibc and LLVM */
+    PROVIDE (__heap_start = __end__);
+    PROVIDE (__heap_end = __HeapLimit);
+    PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) );
+    PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1));
+    PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) );
+
+    /* TLSF */
+    PROVIDE (__psram_start = __psram_start__);
+    PROVIDE (__psram_heap_start = __psram_heap_start__);
+
+    /* llvm-libc */
+    PROVIDE (_end = __end__);
+    PROVIDE (__llvm_libc_heap_limit = __HeapLimit);
+
+    /* Check if data + heap + stack exceeds RAM limit */
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary")
+    ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary")
+
+    /* todo assert on extra code */
+}
+

+ 319 - 0
lib/ZuluSCSI_platform_RP2350/rp2350_btldr.ld

@@ -0,0 +1,319 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+
+/*
+ *
+ * Customized linker script for building bootloader
+ *
+ */
+
+ MEMORY
+{
+    /* The bootloader is linked to begin at 0x12000100.
+     * First 256 bytes are reserved for RP2040 second stage bootloader,
+     * which comes as part of the main firmware.elf and is never overwritten.
+     */
+    FLASH(rx) : ORIGIN = 0x10000100, LENGTH = 128k-256
+    PSRAM(rwx) : ORIGIN = 0x11000000, LENGTH = 0
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 512k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20080000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20081000, LENGTH = 4k
+}
+PROVIDE ( _EEPROM_start = __EEPROM_START__ );
+PROVIDE ( _FS_start     = __FS_START__ );
+PROVIDE ( _FS_end       = __FS_END__ );
+
+ENTRY(_entry_point)
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+
+    .text : {
+        __logical_binary_start = .;
+        KEEP (*(.btldr_vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        . = ALIGN(256);
+        __real_vectors_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.embedded_block))
+        __embedded_block_end = .;
+        KEEP (*(.reset))
+        /* TODO revisit this now memset/memcpy/float in ROM */
+        /* bit of a hack right now to exclude all floating point and time critical (e.g. memset, memcpy) code from
+         * FLASH ... we will include any thing excluded here in .data below by default */
+        *(.init)
+        *libgcc.a:cmse_nonsecure_call.o
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .text*)
+        *(.fini)
+        /* Pull all c'tors into .text */
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        /* Followed by destructors */
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+
+        . = ALIGN(4);
+        /* init data */
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+
+        . = ALIGN(4);
+        /* finit data */
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+
+        *(.eh_frame*)
+        . = ALIGN(4);
+    } > FLASH
+
+    /* Note the boot2 section is optional, and should be discarded if there is
+       no reference to it *inside* the binary, as it is not called by the
+       bootrom. (The bootrom performs a simple best-effort XIP setup and
+       leaves it to the binary to do anything more sophisticated.) However
+       there is still a size limit of 256 bytes, to ensure the boot2 can be
+       stored in boot RAM.
+
+       Really this is a "XIP setup function" -- the name boot2 is historic and
+       refers to its dual-purpose on RP2040, where it also handled vectoring
+       from the bootrom into the user image.
+    */
+
+    .boot2 : {
+        __boot2_start__ = .;
+        *(.boot2)
+        __boot2_end__ = .;
+    } > FLASH
+
+    ASSERT(__boot2_end__ - __boot2_start__ <= 256,
+        "ERROR: Pico second stage bootloader must be no more than 256 bytes in size")
+
+    .rodata : {
+        *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .rodata*)
+        *(.srodata*)
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        . = ALIGN(4);
+    } > FLASH
+
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+
+    /* Machine inspectable binary information */
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+
+    .ram_vector_table (NOLOAD): {
+        *(.ram_vector_table)
+    } > RAM
+
+    .uninitialized_data (NOLOAD): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        *(.time_critical*)
+
+        /* remaining .text and .rodata; i.e. stuff we exclude above because we want it in RAM */
+        *(.text*)
+        . = ALIGN(4);
+        *(.rodata*)
+        . = ALIGN(4);
+
+        *(.data*)
+        *(.sdata*)
+
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        /* preinit data */
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+
+        *(.jcr)
+        . = ALIGN(4);
+    } > RAM AT> FLASH
+
+    .tdata : {
+        . = ALIGN(4);
+		*(.tdata .tdata.* .gnu.linkonce.td.*)
+        /* All data end */
+        __tdata_end = .;
+    } > RAM AT> FLASH
+    PROVIDE(__data_end__ = .);
+
+    /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */
+    __etext = LOADADDR(.data);
+
+    .tbss (NOLOAD) : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        __tls_base = .;
+        *(.tbss .tbss.* .gnu.linkonce.tb.*)
+        *(.tcommon)
+
+        __tls_end = .;
+    } > RAM
+
+    .bss (NOLOAD) : {
+        . = ALIGN(4);
+        __tbss_end = .;
+
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        PROVIDE(__global_pointer$ = . + 2K);
+        *(.sbss*)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+
+    .heap (NOLOAD):
+    {
+        __end__ = .;
+        end = __end__;
+        KEEP(*(.heap*))
+        /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however
+           to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */
+        . = ORIGIN(RAM) + LENGTH(RAM);
+        __HeapLimit = .;
+    } > RAM
+
+    /* Start and end symbols must be word-aligned */
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+
+    /* .stack*_dummy section doesn't contains any symbols. It is only
+     * used for linker to calculate size of stack sections, and assign
+     * values to stack symbols later
+     *
+     * stack1 section may be empty/missing if platform_launch_core1 is not used */
+
+    /* by default we put core 0 stack at the end of scratch Y, so that if core 1
+     * stack is not used then all of SCRATCH_X is free.
+     */
+    .stack1_dummy (NOLOAD):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (NOLOAD):
+    {
+        KEEP(*(.stack*))
+    } > SCRATCH_Y
+
+    .flash_end : {
+        KEEP(*(.embedded_end_block*))
+        PROVIDE(__flash_binary_end = .);
+    } > FLASH =0xaa
+
+    .psram (NOLOAD) : {
+        __psram_start__ = .;
+        *(.psram*)
+        . = ALIGN(4096);
+        __psram_heap_start__ = .;
+    } > PSRAM
+
+    /* stack limit is poorly named, but historically is maximum heap ptr */
+    __StackLimit = ORIGIN(RAM) + LENGTH(RAM);
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+
+    /* picolibc and LLVM */
+    PROVIDE (__heap_start = __end__);
+    PROVIDE (__heap_end = __HeapLimit);
+    PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) );
+    PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1));
+    PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) );
+
+    /* TLSF */
+    PROVIDE (__psram_start = __psram_start__);
+    PROVIDE (__psram_heap_start = __psram_heap_start__);
+
+    /* llvm-libc */
+    PROVIDE (_end = __end__);
+    PROVIDE (__llvm_libc_heap_limit = __HeapLimit);
+
+    /* Check if data + heap + stack exceeds RAM limit */
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary")
+    ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary")
+
+    /* todo assert on extra code */
+}
+

+ 14 - 0
lib/ZuluSCSI_platform_RP2350/run_pioasm.sh

@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# This script regenerates the .pio.h files from .pio
+
+pioasm sdio_RP2040.pio sdio_RP2040.pio.h
+pioasm sdio_Pico.pio sdio_Pico.pio.h
+pioasm sdio_BS2.pio sdio_BS2.pio.h
+
+pioasm scsi_accel_target_RP2040.pio scsi_accel_target_RP2040.pio.h
+pioasm scsi_accel_target_BS2.pio scsi_accel_target_BS2.pio.h
+pioasm scsi_accel_target_Pico.pio scsi_accel_target_Pico.pio.h
+
+pioasm scsi_accel_host_RP2040.pio scsi_accel_host_RP2040.pio.h
+pioasm scsi_accel_host_Pico.pio scsi_accel_host_Pico.pio.h

+ 36 - 0
lib/ZuluSCSI_platform_RP2350/scsi2sd_time.h

@@ -0,0 +1,36 @@
+/** 
+ * SCSI2SD V6 - Copyright (C) 2014 Michael McMaster <michael@codesrc.com>
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * This file is licensed under the GPL version 3 or any later version.  
+ * It is derived from time.h in SCSI2SD V6.
+ *  
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Timing functions for SCSI2SD.
+// This file is derived from time.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include "ZuluSCSI_platform.h"
+
+#define s2s_getTime_ms() millis()
+#define s2s_elapsedTime_ms(since) ((uint32_t)(millis() - (since)))
+#define s2s_delay_ms(x) delay_ns(x * 1000000)
+#define s2s_delay_us(x) delay_ns(x * 1000)
+#define s2s_delay_ns(x) delay_ns(x)

+ 303 - 0
lib/ZuluSCSI_platform_RP2350/scsiHostPhy.cpp

@@ -0,0 +1,303 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+#include "scsiHostPhy.h"
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_log_trace.h"
+#include "scsi_accel_host.h"
+#include <assert.h>
+
+#include <scsi2sd.h>
+extern "C" {
+#include <scsi.h>
+}
+
+volatile int g_scsiHostPhyReset;
+
+#ifndef PLATFORM_HAS_INITIATOR_MODE
+
+// Dummy functions for platforms without hardware support for
+// SCSI initiator mode.
+void scsiHostPhyReset(void) {}
+bool scsiHostPhySelect(int target_id) { return false; }
+int scsiHostPhyGetPhase() { return 0; }
+bool scsiHostRequestWaiting() { return false; }
+uint32_t scsiHostWrite(const uint8_t *data, uint32_t count) { return 0; }
+uint32_t scsiHostRead(uint8_t *data, uint32_t count) { return 0; }
+void scsiHostPhyRelease();
+
+#else
+
+// Release bus and pulse RST signal, initialize PHY to host mode.
+void scsiHostPhyReset(void)
+{
+    SCSI_RELEASE_OUTPUTS();
+    SCSI_ENABLE_INITIATOR();
+
+    scsi_accel_host_init();
+
+    SCSI_OUT(RST, 1);
+    delay(2);
+    SCSI_OUT(RST, 0);
+    delay(250);
+    g_scsiHostPhyReset = false;
+}
+
+// Select a device and an initiator, ids 0-7.
+// Returns true if the target answers to selection request.
+bool scsiHostPhySelect(int target_id, uint8_t initiator_id)
+{
+    SCSI_RELEASE_OUTPUTS();
+
+    // We can't write individual data bus bits, so use a bit modified
+    // arbitration scheme. We always yield to any other initiator on
+    // the bus.
+    scsiLogInitiatorPhaseChange(BUS_BUSY);
+    SCSI_OUT(BSY, 1);
+    for (int wait = 0; wait < 10; wait++)
+    {
+        delayMicroseconds(1);
+
+        if (SCSI_IN_DATA() != 0)
+        {
+            dbgmsg("scsiHostPhySelect: bus is busy");
+            scsiLogInitiatorPhaseChange(BUS_FREE);
+            SCSI_RELEASE_OUTPUTS();
+            return false;
+        }
+    }
+
+    // Selection phase
+    scsiLogInitiatorPhaseChange(SELECTION);
+    dbgmsg("------ SELECTING ", target_id, " with initiator ID ", (int)initiator_id);
+    SCSI_OUT(SEL, 1);
+    delayMicroseconds(5);
+    SCSI_OUT_DATA((1 << target_id) | (1 << initiator_id));
+    delayMicroseconds(5);
+    SCSI_OUT(BSY, 0);
+
+    // Wait for target to respond
+    for (int wait = 0; wait < 2500; wait++)
+    {
+        delayMicroseconds(100);
+        if (SCSI_IN(BSY))
+        {
+            break;
+        }
+    }
+
+    if (!SCSI_IN(BSY))
+    {
+        // No response
+        SCSI_RELEASE_OUTPUTS();
+        return false;
+    }
+
+    // We need to assert OUT_BSY to enable IO buffer U105 to read status signals.
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_OUT(BSY, 1);
+    SCSI_OUT(SEL, 0);
+    return true;
+}
+
+// Read the current communication phase as signaled by the target
+int scsiHostPhyGetPhase()
+{
+    static absolute_time_t last_online_time;
+
+    if (g_scsiHostPhyReset)
+    {
+        // Reset request from watchdog timer
+        scsiHostPhyRelease();
+        return BUS_FREE;
+    }
+
+    int phase = 0;
+    bool req_in = SCSI_IN(REQ);
+    if (SCSI_IN(CD)) phase |= __scsiphase_cd;
+    if (SCSI_IN(IO)) phase |= __scsiphase_io;
+    if (SCSI_IN(MSG)) phase |= __scsiphase_msg;
+
+    if (phase == 0 && absolute_time_diff_us(last_online_time, get_absolute_time()) > 100)
+    {
+        // Disable OUT_BSY for a short time to see if the target is still on line
+        SCSI_OUT(BSY, 0);
+        delayMicroseconds(1);
+
+        if (!SCSI_IN(BSY))
+        {
+            scsiLogInitiatorPhaseChange(BUS_FREE);
+            return BUS_FREE;
+        }
+
+        // Still online, re-enable OUT_BSY to enable IO buffers
+        SCSI_OUT(BSY, 1);
+        last_online_time = get_absolute_time();
+    }
+    else if (phase != 0)
+    {
+        last_online_time = get_absolute_time();
+    }
+
+    if (!req_in)
+    {
+        // Don't act on phase changes until target asserts request signal.
+        // This filters out any spurious changes on control signals.
+        return BUS_BUSY;
+    }
+    else
+    {
+        scsiLogInitiatorPhaseChange(phase);
+        return phase;
+    }
+}
+
+bool scsiHostRequestWaiting()
+{
+    return SCSI_IN(REQ);
+}
+
+// Blocking data transfer
+#define SCSIHOST_WAIT_ACTIVE(pin) \
+  if (!SCSI_IN(pin)) { \
+    if (!SCSI_IN(pin)) { \
+      while(!SCSI_IN(pin) && !g_scsiHostPhyReset); \
+    } \
+  }
+
+#define SCSIHOST_WAIT_INACTIVE(pin) \
+  if (SCSI_IN(pin)) { \
+    if (SCSI_IN(pin)) { \
+      while(SCSI_IN(pin) && !g_scsiHostPhyReset); \
+    } \
+  }
+
+// Write one byte to SCSI target using the handshake mechanism
+static inline void scsiHostWriteOneByte(uint8_t value)
+{
+    SCSIHOST_WAIT_ACTIVE(REQ);
+    SCSI_OUT_DATA(value);
+    delay_100ns(); // DB setup time before ACK
+    SCSI_OUT(ACK, 1);
+    SCSIHOST_WAIT_INACTIVE(REQ);
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_OUT(ACK, 0);
+}
+
+// Read one byte from SCSI target using the handshake mechanism.
+static inline uint8_t scsiHostReadOneByte(int* parityError)
+{
+    SCSIHOST_WAIT_ACTIVE(REQ);
+    uint16_t r = SCSI_IN_DATA();
+    SCSI_OUT(ACK, 1);
+    SCSIHOST_WAIT_INACTIVE(REQ);
+    SCSI_OUT(ACK, 0);
+
+    if (parityError && r != (g_scsi_parity_lookup[r & 0xFF] ^ SCSI_IO_DATA_MASK))
+    {
+        logmsg("Parity error in scsiReadOneByte(): ", (uint32_t)r);
+        *parityError = 1;
+    }
+
+    return (uint8_t)r;
+}
+
+uint32_t scsiHostWrite(const uint8_t *data, uint32_t count)
+{
+    scsiLogDataOut(data, count);
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    for (uint32_t i = 0; i < count; i++)
+    {
+        while (!SCSI_IN(REQ))
+        {
+            if (g_scsiHostPhyReset || SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+            {
+                // Target switched out of DATA_OUT mode
+                logmsg("scsiHostWrite: sent ", (int)i, " bytes, expected ", (int)count);
+                return i;
+            }
+        }
+
+        scsiHostWriteOneByte(data[i]);
+    }
+
+    return count;
+}
+
+uint32_t scsiHostRead(uint8_t *data, uint32_t count)
+{
+    int parityError = 0;
+    uint32_t fullcount = count;
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    if ((count & 1) == 0 && ((uint32_t)data & 1) == 0)
+    {
+        // Even number of bytes, use accelerated routine
+        count = scsi_accel_host_read(data, count, &parityError, &g_scsiHostPhyReset);
+    }
+    else
+    {
+        for (uint32_t i = 0; i < count; i++)
+        {
+            while (!SCSI_IN(REQ))
+            {
+                if (g_scsiHostPhyReset || !SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+                {
+                    // Target switched out of DATA_IN mode
+                    count = i;
+                }
+            }
+
+            data[i] = scsiHostReadOneByte(&parityError);
+        }
+    }
+
+    scsiLogDataIn(data, count);
+
+    if (g_scsiHostPhyReset || parityError)
+    {
+        return 0;
+    }
+    else
+    {
+        if (count < fullcount)
+        {
+            logmsg("scsiHostRead: received ", (int)count, " bytes, expected ", (int)fullcount);
+        }
+
+        return count;
+    }
+}
+
+// Release all bus signals
+void scsiHostPhyRelease()
+{
+    scsiLogInitiatorPhaseChange(BUS_FREE);
+    SCSI_RELEASE_OUTPUTS();
+}
+
+#endif

+ 55 - 0
lib/ZuluSCSI_platform_RP2350/scsiHostPhy.h

@@ -0,0 +1,55 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Host side SCSI physical interface.
+// Used in initiator to interface to an SCSI drive.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+// Request to stop activity and reset the bus
+extern volatile int g_scsiHostPhyReset;
+
+// Release bus and pulse RST signal, initialize PHY to host mode.
+void scsiHostPhyReset(void);
+
+// Select a device, id 0-7.
+// target_id - target device id 0-7
+// initiator_id - host device id 0-7
+// Returns true if the target answers to selection request.
+bool scsiHostPhySelect(int target_id, uint8_t initiator_id);
+
+// Read the current communication phase as signaled by the target
+// Matches SCSI_PHASE enumeration from scsi.h.
+int scsiHostPhyGetPhase();
+
+// Returns true if the device has asserted REQ signal, i.e. data waiting
+bool scsiHostRequestWaiting();
+
+// Blocking data transfer
+// These return the actual number of bytes transferred.
+uint32_t scsiHostWrite(const uint8_t *data, uint32_t count);
+uint32_t scsiHostRead(uint8_t *data, uint32_t count);
+
+// Release all bus signals
+void scsiHostPhyRelease();

+ 418 - 0
lib/ZuluSCSI_platform_RP2350/scsiPhy.cpp

@@ -0,0 +1,418 @@
+/** 
+ * SCSI2SD V6 - Copyright (C) 2013 Michael McMaster <michael@codesrc.com>
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * This file is licensed under the GPL version 3 or any later version.  
+ * It is derived from scsiPhy.c in SCSI2SD V6.
+ *  
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Implements the low level interface to SCSI bus
+// Partially derived from scsiPhy.c from SCSI2SD-V6
+
+#include "scsiPhy.h"
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_log_trace.h"
+#include "ZuluSCSI_config.h"
+#include "scsi_accel_target.h"
+#include "hardware/structs/iobank0.h"
+
+#include <scsi2sd.h>
+extern "C" {
+#include <scsi.h>
+#include <scsi2sd_time.h>
+}
+
+/***********************/
+/* SCSI status signals */
+/***********************/
+
+extern "C" bool scsiStatusATN()
+{
+    return SCSI_IN(ATN);
+}
+
+extern "C" bool scsiStatusBSY()
+{
+    return SCSI_IN(BSY);
+}
+
+/************************/
+/* SCSI selection logic */
+/************************/
+
+volatile uint8_t g_scsi_sts_selection;
+volatile uint8_t g_scsi_ctrl_bsy;
+
+void scsi_bsy_deassert_interrupt()
+{
+    if (SCSI_IN(SEL) && !SCSI_IN(BSY))
+    {
+        // Check if any of the targets we simulate is selected
+        uint8_t sel_bits = SCSI_IN_DATA();
+        int sel_id = -1;
+        for (int i = 0; i < S2S_MAX_TARGETS; i++)
+        {
+            if (scsiDev.targets[i].targetId <= 7 && scsiDev.targets[i].cfg)
+            {
+                if (sel_bits & (1 << scsiDev.targets[i].targetId))
+                {
+                    sel_id = scsiDev.targets[i].targetId;
+                    break;
+                }
+            }
+        }
+
+        if (sel_id >= 0)
+        {
+            // Set ATN flag here unconditionally, real value is only known after
+            // OUT_BSY is enabled in scsiStatusSEL() below.
+            g_scsi_sts_selection = SCSI_STS_SELECTION_SUCCEEDED | SCSI_STS_SELECTION_ATN | sel_id;
+        }
+
+        // selFlag is required for Philips P2000C which releases it after 600ns
+        // without waiting for BSY.
+        // Also required for some early Mac Plus roms
+        scsiDev.selFlag = *SCSI_STS_SELECTED;
+    }
+}
+
+extern "C" bool scsiStatusSEL()
+{
+    if (g_scsi_ctrl_bsy)
+    {
+        // We don't have direct register access to BSY bit like SCSI2SD scsi.c expects.
+        // Instead update the state here.
+        // Releasing happens with bus release.
+        g_scsi_ctrl_bsy = 0;
+
+#ifdef ZULUSCSI_BS2
+        // From BS2 repository commit 8971584485c42, not sure of purpose.
+        SCSI_OUT(CD, 0);
+        SCSI_OUT(MSG, 0);
+        SCSI_ENABLE_CONTROL_OUT();
+#endif
+
+        SCSI_OUT(BSY, 1);
+
+        // On RP2040 hardware the ATN signal is only available after OUT_BSY enables
+        // the IO buffer U105, so check the signal status here.
+        delay_100ns();
+        if (!scsiStatusATN())
+        {
+            // This is a SCSI1 host that does send IDENTIFY message
+            scsiDev.atnFlag = 0;
+            scsiDev.target->unitAttention = 0;
+            scsiDev.compatMode = COMPAT_SCSI1;
+        }
+    }
+
+    return SCSI_IN(SEL);
+}
+
+/************************/
+/* SCSI bus reset logic */
+/************************/
+
+static void scsi_rst_assert_interrupt()
+{
+    // Glitch filtering
+    bool rst1 = SCSI_IN(RST);
+    delay_ns(500);
+    bool rst2 = SCSI_IN(RST);
+
+    if (rst1 && rst2)
+    {
+        dbgmsg("BUS RESET");
+        scsiDev.resetFlag = 1;
+    }
+}
+
+static void scsiPhyIRQ(uint gpio, uint32_t events)
+{
+    if (gpio == SCSI_IN_BSY || gpio == SCSI_IN_SEL)
+    {
+        // Note BSY / SEL interrupts only when we are not driving OUT_BSY low ourselves.
+        // The BSY input pin may be shared with other signals.
+        if (sio_hw->gpio_out & (1 << SCSI_OUT_BSY))
+        {
+            scsi_bsy_deassert_interrupt();
+        }
+    }
+    else if (gpio == SCSI_IN_RST)
+    {
+        scsi_rst_assert_interrupt();
+    }
+}
+
+// This function is called to initialize the phy code.
+// It is called after power-on and after SCSI bus reset.
+extern "C" void scsiPhyReset(void)
+{
+    SCSI_RELEASE_OUTPUTS();
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+
+    scsi_accel_rp2040_init();
+
+    // Enable BSY, RST and SEL interrupts
+    // Note: RP2040 library currently supports only one callback,
+    // so it has to be same for both pins.
+    gpio_set_irq_enabled_with_callback(SCSI_IN_BSY, GPIO_IRQ_EDGE_RISE, true, scsiPhyIRQ);
+    gpio_set_irq_enabled(SCSI_IN_RST, GPIO_IRQ_EDGE_FALL, true);
+
+    // Check BSY line status when SEL goes active.
+    // This is needed to handle SCSI-1 hosts that use the single initiator mode.
+    // The host will just assert the SEL directly, without asserting BSY first.
+    gpio_set_irq_enabled(SCSI_IN_SEL, GPIO_IRQ_EDGE_FALL, true);
+}
+
+/************************/
+/* SCSI bus phase logic */
+/************************/
+
+static SCSI_PHASE g_scsi_phase;
+
+extern "C" void scsiEnterPhase(int phase)
+{
+    int delay = scsiEnterPhaseImmediate(phase);
+    if (delay > 0)
+    {
+        s2s_delay_ns(delay);
+    }
+}
+
+// Change state and return nanosecond delay to wait
+extern "C" uint32_t scsiEnterPhaseImmediate(int phase)
+{
+    if (phase != g_scsi_phase)
+    {
+        // ANSI INCITS 362-2002 SPI-3 10.7.1:
+        // Phase changes are not allowed while REQ or ACK is asserted.
+        while (likely(!scsiDev.resetFlag) && SCSI_IN(ACK)) {}
+
+        if (scsiDev.compatMode < COMPAT_SCSI2 && (phase == DATA_IN || phase == DATA_OUT))
+        {
+            // Akai S1000/S3000 seems to need extra delay before changing to data phase
+            // after a command. The code in ZuluSCSI_disk.cpp tries to do this while waiting
+            // for SD card, to avoid any extra latency.
+            s2s_delay_ns(400000);
+        }
+
+        int oldphase = g_scsi_phase;
+        g_scsi_phase = (SCSI_PHASE)phase;
+        scsiLogPhaseChange(phase);
+
+        // Select between synchronous vs. asynchronous SCSI writes
+        bool syncstatus = false;
+        if (scsiDev.target->syncOffset > 0 && (g_scsi_phase == DATA_IN || g_scsi_phase == DATA_OUT))
+        {
+            syncstatus = scsi_accel_rp2040_setSyncMode(scsiDev.target->syncOffset, scsiDev.target->syncPeriod);
+        }
+        else
+        {
+            syncstatus = scsi_accel_rp2040_setSyncMode(0, 0);
+        }
+
+        if (!syncstatus)
+        {
+            // SCSI DMA was not idle, we are in some kind of error state, force bus reset
+            scsiDev.resetFlag = 1;
+            return 0;
+        }
+
+        if (phase < 0)
+        {
+            // Other communication on bus or reset state
+            SCSI_RELEASE_OUTPUTS();
+            return 0;
+        }
+        else
+        {
+            // The phase control signals should be changed close to simultaneously.
+            // The SCSI spec allows 400 ns for this, but some hosts do not seem to be that
+            // tolerant. The Cortex-M0 is also quite slow in bit twiddling.
+            //
+            // To avoid unnecessary delays, precalculate an XOR mask and then apply it
+            // simultaneously to all three signals.
+            uint32_t gpio_new = 0;
+            if (!(phase & __scsiphase_msg)) { gpio_new |= (1 << SCSI_OUT_MSG); }
+            if (!(phase & __scsiphase_cd)) { gpio_new |= (1 << SCSI_OUT_CD); }
+            if (!(phase & __scsiphase_io)) { gpio_new |= (1 << SCSI_OUT_IO); }
+
+            uint32_t mask = (1 << SCSI_OUT_MSG) | (1 << SCSI_OUT_CD) | (1 << SCSI_OUT_IO);
+            uint32_t gpio_xor = (sio_hw->gpio_out ^ gpio_new) & mask;
+            sio_hw->gpio_togl = gpio_xor;
+            SCSI_ENABLE_CONTROL_OUT();
+
+            int delayNs = 400; // Bus settle delay
+            if ((oldphase & __scsiphase_io) != (phase & __scsiphase_io))
+            {
+                delayNs += 400; // Data release delay
+            }
+
+            if (scsiDev.compatMode < COMPAT_SCSI2)
+            {
+                // EMU EMAX needs 100uS ! 10uS is not enough.
+                delayNs += 100000;
+            }
+
+            return delayNs;
+        }
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+// Release all signals
+void scsiEnterBusFree(void)
+{
+    g_scsi_phase = BUS_FREE;
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+    scsiDev.cdbLen = 0;
+
+    SCSI_RELEASE_OUTPUTS();
+}
+
+/********************/
+/* Transmit to host */
+/********************/
+
+#define SCSI_WAIT_ACTIVE(pin) \
+  if (!SCSI_IN(pin)) { \
+    if (!SCSI_IN(pin)) { \
+      while(!SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+// In synchronous mode the ACK pulse can be very short, so use edge IRQ to detect it.
+#define CHECK_EDGE(pin) \
+    ((iobank0_hw->intr[pin / 8] >> (4 * (pin % 8))) & GPIO_IRQ_EDGE_FALL)
+
+#define SCSI_WAIT_ACTIVE_EDGE(pin) \
+  if (!CHECK_EDGE(SCSI_IN_ ## pin)) { \
+    while(!SCSI_IN(pin) && !CHECK_EDGE(SCSI_IN_ ## pin) && !scsiDev.resetFlag); \
+  }
+
+#define SCSI_WAIT_INACTIVE(pin) \
+  if (SCSI_IN(pin)) { \
+    if (SCSI_IN(pin)) { \
+      while(SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+// Write one byte to SCSI host using the handshake mechanism
+// This is suitable for both asynchronous and synchronous communication.
+static inline void scsiWriteOneByte(uint8_t value)
+{
+    SCSI_OUT_DATA(value);
+    delay_100ns(); // DB setup time before REQ
+    gpio_acknowledge_irq(SCSI_IN_ACK, GPIO_IRQ_EDGE_FALL);
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE_EDGE(ACK);
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_WAIT_INACTIVE(ACK);
+}
+
+extern "C" void scsiWriteByte(uint8_t value)
+{
+    scsiLogDataIn(&value, 1);
+    scsiWriteOneByte(value);
+}
+
+extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
+{
+    scsiStartWrite(data, count);
+    scsiFinishWrite();
+}
+
+extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
+{
+    scsiLogDataIn(data, count);
+    scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
+}
+
+extern "C" bool scsiIsWriteFinished(const uint8_t *data)
+{
+    return scsi_accel_rp2040_isWriteFinished(data);
+}
+
+extern "C" void scsiFinishWrite()
+{
+    scsi_accel_rp2040_finishWrite(&scsiDev.resetFlag);
+}
+
+/*********************/
+/* Receive from host */
+/*********************/
+
+// Read one byte from SCSI host using the handshake mechanism.
+static inline uint8_t scsiReadOneByte(int* parityError)
+{
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE(ACK);
+    delay_100ns();
+    uint16_t r = SCSI_IN_DATA();
+    SCSI_OUT(REQ, 0);
+    SCSI_WAIT_INACTIVE(ACK);
+
+    if (parityError && r != (g_scsi_parity_lookup[r & 0xFF] ^ SCSI_IO_DATA_MASK))
+    {
+        logmsg("Parity error in scsiReadOneByte(): ", (uint32_t)r);
+        *parityError = 1;
+    }
+
+    return (uint8_t)r;
+}
+
+extern "C" uint8_t scsiReadByte(void)
+{
+    uint8_t r = scsiReadOneByte(NULL);
+    scsiLogDataOut(&r, 1);
+    return r;
+}
+
+extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
+{
+    *parityError = 0;
+    if (!(scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY)) { parityError = NULL; }
+
+    scsiStartRead(data, count, parityError);
+    scsiFinishRead(data, count, parityError);
+}
+
+extern "C" void scsiStartRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    if (!(scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY)) { parityError = NULL; }
+    scsi_accel_rp2040_startRead(data, count, parityError, &scsiDev.resetFlag);
+}
+
+extern "C" void scsiFinishRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    if (!(scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY)) { parityError = NULL; }
+    scsi_accel_rp2040_finishRead(data, count, parityError, &scsiDev.resetFlag);
+    scsiLogDataOut(data, count);
+}
+
+extern "C" bool scsiIsReadFinished(const uint8_t *data)
+{
+    return scsi_accel_rp2040_isReadFinished(data);
+}

+ 97 - 0
lib/ZuluSCSI_platform_RP2350/scsiPhy.h

@@ -0,0 +1,97 @@
+/** 
+ * SCSI2SD V6 - Copyright (C) 2013 Michael McMaster <michael@codesrc.com>
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * This file is licensed under the GPL version 3 or any later version.  
+ * It is derived from scsiPhy.h in SCSI2SD V6.
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Interface to SCSI physical interface.
+// This file is derived from scsiPhy.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Read SCSI status signals
+bool scsiStatusATN();
+bool scsiStatusBSY();
+bool scsiStatusSEL();
+
+// Parity not yet implemented
+#define scsiParityError() 0
+
+// Get SCSI selection status.
+// This is latched by interrupt when BSY is deasserted while SEL is asserted.
+// Lowest 3 bits are the selected target id.
+// Highest bits are status information.
+#define SCSI_STS_SELECTION_SUCCEEDED 0x40
+#define SCSI_STS_SELECTION_ATN 0x80
+extern volatile uint8_t g_scsi_sts_selection;
+#define SCSI_STS_SELECTED (&g_scsi_sts_selection)
+extern volatile uint8_t g_scsi_ctrl_bsy;
+#define SCSI_CTRL_BSY (&g_scsi_ctrl_bsy)
+
+// Called when SCSI RST signal has been asserted, should release bus.
+void scsiPhyReset(void);
+
+// Change MSG / CD / IO signal states and wait for necessary transition time.
+// Phase argument is one of SCSI_PHASE enum values.
+void scsiEnterPhase(int phase);
+
+// Change state and return nanosecond delay to wait
+uint32_t scsiEnterPhaseImmediate(int phase);
+
+// Release all signals
+void scsiEnterBusFree(void);
+
+// Blocking data transfer
+void scsiWrite(const uint8_t* data, uint32_t count);
+void scsiRead(uint8_t* data, uint32_t count, int* parityError);
+void scsiWriteByte(uint8_t value);
+uint8_t scsiReadByte(void);
+
+// Non-blocking data transfer.
+// Depending on platform support the start() function may block.
+// The start function can be called multiple times, it may internally
+// either combine transfers or block until previous transfer completes.
+void scsiStartWrite(const uint8_t* data, uint32_t count);
+void scsiFinishWrite();
+void scsiStartRead(uint8_t* data, uint32_t count, int *parityError);
+void scsiFinishRead(uint8_t* data, uint32_t count, int *parityError);
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsiIsWriteFinished(const uint8_t *data);
+
+// Query whether the data at pointer has already been written, i.e. can be processed.
+// If data is NULL, checks if all reads have completed.
+bool scsiIsReadFinished(const uint8_t *data);
+
+#define PLATFORM_SCSIPHY_HAS_NONBLOCKING_READ 1
+
+#define s2s_getScsiRateKBs() 0
+
+#ifdef __cplusplus
+}
+#endif

+ 173 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host.cpp

@@ -0,0 +1,173 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#include "scsi_accel_host.h"
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+#include <hardware/sync.h>
+
+#ifdef PLATFORM_HAS_INITIATOR_MODE
+#ifdef ZULUSCSI_PICO
+#include "scsi_accel_host_Pico.pio.h"
+#else
+#include "scsi_accel_host_RP2040.pio.h"
+#endif
+
+#define SCSI_PIO pio0
+#define SCSI_SM 0
+
+static struct {
+    // PIO configurations
+    uint32_t pio_offset_async_read;
+    pio_sm_config pio_cfg_async_read;
+} g_scsi_host;
+
+enum scsidma_state_t { SCSIHOST_IDLE = 0,
+                       SCSIHOST_READ };
+static volatile scsidma_state_t g_scsi_host_state;
+
+static void scsi_accel_host_config_gpio()
+{
+    if (g_scsi_host_state == SCSIHOST_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IN_REQ].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_host_state == SCSIHOST_READ)
+    {
+        // Data bus and REQ as input, ACK pin as output
+        pio_sm_set_pins(SCSI_PIO, SCSI_SM, SCSI_IO_DATA_MASK | 1 << SCSI_IN_REQ | 1 << SCSI_OUT_ACK);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, SCSI_IO_DB0, 9, false);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, SCSI_IN_REQ, 1, false);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, SCSI_OUT_ACK, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IN_REQ].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+uint32_t scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // Currently this method just reads from the PIO RX fifo directly in software loop.
+    // The SD card access is parallelized using DMA, so there is limited benefit from using DMA here.
+    g_scsi_host_state = SCSIHOST_READ;
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    pio_sm_init(SCSI_PIO, SCSI_SM, g_scsi_host.pio_offset_async_read, &g_scsi_host.pio_cfg_async_read);
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, true);
+
+    // Set the number of bytes to read, must be divisible by 2.
+    assert((count & 1) == 0);
+    pio_sm_put(SCSI_PIO, SCSI_SM, count - 1);
+
+    // Read results from PIO RX FIFO
+    uint8_t *dst = buf;
+    uint8_t *end = buf + count;
+    uint32_t paritycheck = 0;
+    while (dst < end)
+    {
+        uint32_t available = pio_sm_get_rx_fifo_level(SCSI_PIO, SCSI_SM);
+
+        if (available == 0)
+        {
+            if (*resetFlag || !SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+            {
+                // Target switched out of DATA_IN mode
+                count = dst - buf;
+                break;
+            }
+        }
+
+        while (available > 0)
+        {
+            available--;
+            uint32_t word = pio_sm_get(SCSI_PIO, SCSI_SM);
+            paritycheck ^= word;
+            word = ~word;
+            *dst++ = word & 0xFF;
+            *dst++ = word >> 16;
+        }
+    }
+
+    // Check parity errors in whole block
+    // This doesn't detect if there is even number of parity errors in block.
+    uint8_t byte0 = ~(paritycheck & 0xFF);
+    uint8_t byte1 = ~(paritycheck >> 16);
+    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    {
+        logmsg("Parity error in scsi_accel_host_read(): ", paritycheck);
+        *parityError = 1;
+    }
+
+    g_scsi_host_state = SCSIHOST_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, false);
+
+    return count;
+}
+
+
+void scsi_accel_host_init()
+{
+    g_scsi_host_state = SCSIHOST_IDLE;
+    scsi_accel_host_config_gpio();
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SCSI_PIO);
+
+    // Asynchronous / synchronous SCSI read
+    g_scsi_host.pio_offset_async_read = pio_add_program(SCSI_PIO, &scsi_host_async_read_program);
+    g_scsi_host.pio_cfg_async_read = scsi_host_async_read_program_get_default_config(g_scsi_host.pio_offset_async_read);
+    sm_config_set_in_pins(&g_scsi_host.pio_cfg_async_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_host.pio_cfg_async_read, SCSI_OUT_ACK);
+    sm_config_set_out_shift(&g_scsi_host.pio_cfg_async_read, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_host.pio_cfg_async_read, true, true, 32);
+}
+
+#endif

+ 32 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host.h

@@ -0,0 +1,32 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_host_init();
+
+// Read data from SCSI bus.
+// Number of bytes to read must be divisible by two.
+uint32_t scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);

+ 46 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host_Pico.pio

@@ -0,0 +1,46 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+; RP2040 PIO program for accelerating SCSI initiator / host function
+; Run "pioasm scsi_accel_host.pio scsi_accel_host.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is ACK pin
+
+.define REQ 17
+.define ACK 26
+
+; Read from SCSI bus using asynchronous handshake.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_host_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 0 gpio REQ             side 1  ; Wait for REQ low
+    in pins, 9                  side 0  ; Assert ACK, read GPIO
+    in null, 7                  side 0  ; Padding bits
+    wait 1 gpio REQ             side 0  ; Wait for REQ high
+    jmp x-- start               side 1  ; Deassert ACK, decrement byte count and jump to start

+ 43 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host_Pico.pio.h

@@ -0,0 +1,43 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// -------------------- //
+// scsi_host_async_read //
+// -------------------- //
+
+#define scsi_host_async_read_wrap_target 0
+#define scsi_host_async_read_wrap 6
+
+static const uint16_t scsi_host_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x3011, //  2: wait   0 gpio, 17      side 1     
+    0x4009, //  3: in     pins, 9         side 0     
+    0x4067, //  4: in     null, 7         side 0     
+    0x2091, //  5: wait   1 gpio, 17      side 0     
+    0x1042, //  6: jmp    x--, 2          side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_host_async_read_program = {
+    .instructions = scsi_host_async_read_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_host_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_host_async_read_wrap_target, offset + scsi_host_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif

+ 46 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host_RP2040.pio

@@ -0,0 +1,46 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+; RP2040 PIO program for accelerating SCSI initiator / host function
+; Run "pioasm scsi_accel_host.pio scsi_accel_host.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is ACK pin
+
+.define REQ 9
+.define ACK 10
+
+; Read from SCSI bus using asynchronous handshake.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_host_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 0 gpio REQ             side 1  ; Wait for REQ low
+    in pins, 9                  side 0  ; Assert ACK, read GPIO
+    in null, 7                  side 0  ; Padding bits
+    wait 1 gpio REQ             side 0  ; Wait for REQ high
+    jmp x-- start               side 1  ; Deassert ACK, decrement byte count and jump to start

+ 44 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_host_RP2040.pio.h

@@ -0,0 +1,44 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// -------------------- //
+// scsi_host_async_read //
+// -------------------- //
+
+#define scsi_host_async_read_wrap_target 0
+#define scsi_host_async_read_wrap 6
+
+static const uint16_t scsi_host_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x3009, //  2: wait   0 gpio, 9       side 1     
+    0x4009, //  3: in     pins, 9         side 0     
+    0x4067, //  4: in     null, 7         side 0     
+    0x2089, //  5: wait   1 gpio, 9       side 0     
+    0x1042, //  6: jmp    x--, 2          side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_host_async_read_program = {
+    .instructions = scsi_host_async_read_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_host_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_host_async_read_wrap_target, offset + scsi_host_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+

+ 1139 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target.cpp

@@ -0,0 +1,1139 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * This work incorporates work from the following
+ *  Copyright (c) 2023 joshua stein <jcs@jcs.org>
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+/* Data flow in SCSI acceleration:
+ *
+ * 1. Application provides a buffer of bytes to send.
+ * 2. Code in this module adds parity bit to the bytes and packs two bytes into 32 bit words.
+ * 3. DMA controller copies the words to PIO peripheral FIFO.
+ * 4. PIO peripheral handles low-level SCSI handshake and writes bytes and parity to GPIO.
+ */
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "scsi_accel_target.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+#include <hardware/sync.h>
+#include <pico/multicore.h>
+
+#ifdef ENABLE_AUDIO_OUTPUT
+#include <audio.h>
+#endif // ENABLE_AUDIO_OUTPUT
+
+#if defined(ZULUSCSI_PICO) || defined(ZULUSCSI_BS2)
+#include "scsi_accel_target_Pico.pio.h"
+#else
+#include "scsi_accel_target_RP2040.pio.h"
+#endif // ZULUSCSI_PICO
+
+// SCSI bus write acceleration uses up to 3 PIO state machines:
+// SM0: Convert data bytes to lookup addresses to add parity
+// SM1: Write data to SCSI bus
+// SM2: For synchronous mode only, count ACK pulses
+#ifdef ZULUSCSI_NETWORK
+#  define SCSI_DMA_PIO pio0
+#  define SCSI_PARITY_SM 1
+#  define SCSI_DATA_SM 2
+#  define SCSI_SYNC_SM 3
+#else
+#  define SCSI_DMA_PIO pio0
+#  define SCSI_PARITY_SM 0
+#  define SCSI_DATA_SM 1
+#  define SCSI_SYNC_SM 2
+#endif // ZULUSCSI_NETWORK
+
+
+// SCSI bus write acceleration uses 3 or 4 DMA channels (data flow A->B->C->D):
+// A: Bytes from RAM to scsi_parity PIO
+// B: Addresses from scsi_parity PIO to lookup DMA READ_ADDR register
+// C: Lookup from g_scsi_parity_lookup and copy to scsi_accel_async_write or scsi_sync_write PIO
+// D: For sync transfers, scsi_sync_write to scsi_sync_write_pacer PIO
+//
+// SCSI bus read acceleration uses 4 DMA channels (data flow D->C->B->A):
+// A: Bytes from scsi_read_parity PIO to memory buffer
+// B: Lookup from g_scsi_parity_check_lookup and copy to scsi_read_parity PIO
+// C: Addresses from scsi_accel_read PIO to lookup DMA READ_ADDR register
+// D: From pacer to data state machine to trigger transfers
+#ifdef ZULUSCSI_NETWORK
+#  define SCSI_DMA_CH_A 6
+#  define SCSI_DMA_CH_B 7
+#  define SCSI_DMA_CH_C 8
+#  define SCSI_DMA_CH_D 9
+#else
+#  define SCSI_DMA_CH_A 0
+#  define SCSI_DMA_CH_B 1
+#  define SCSI_DMA_CH_C 2
+#  define SCSI_DMA_CH_D 3
+#endif
+
+static struct {
+    uint8_t *app_buf; // Buffer provided by application
+    uint32_t app_bytes; // Bytes available in application buffer
+    uint32_t dma_bytes; // Bytes that have been scheduled for DMA so far
+    
+    uint8_t *next_app_buf; // Next buffer from application after current one finishes
+    uint32_t next_app_bytes; // Bytes in next buffer
+
+    // Synchronous mode?
+    int syncOffset;
+    int syncPeriod;
+    int syncOffsetDivider; // Autopush/autopull threshold for the write pacer state machine
+    int syncOffsetPreload; // Number of items to preload in the RX fifo of scsi_sync_write
+
+    // PIO configurations
+    uint32_t pio_offset_parity;
+    uint32_t pio_offset_async_write;
+    uint32_t pio_offset_sync_write_pacer;
+    uint32_t pio_offset_sync_write;
+    uint32_t pio_offset_read;
+    uint32_t pio_offset_read_parity;
+    uint32_t pio_offset_sync_read_pacer;
+    pio_sm_config pio_cfg_parity;
+    pio_sm_config pio_cfg_async_write;
+    pio_sm_config pio_cfg_sync_write_pacer;
+    pio_sm_config pio_cfg_sync_write;
+    pio_sm_config pio_cfg_read;
+    pio_sm_config pio_cfg_read_parity;
+    pio_sm_config pio_cfg_sync_read_pacer;
+    
+    // DMA configurations for write
+    dma_channel_config dmacfg_write_chA; // Data from RAM to scsi_parity PIO
+    dma_channel_config dmacfg_write_chB; // Addresses from scsi_parity PIO to lookup DMA
+    dma_channel_config dmacfg_write_chC; // Data from g_scsi_parity_lookup to scsi write PIO
+    dma_channel_config dmacfg_write_chD; // In synchronous mode only, transfer between state machines
+
+    // DMA configurations for read
+    dma_channel_config dmacfg_read_chA; // Data to destination memory buffer
+    dma_channel_config dmacfg_read_chB; // From lookup table to scsi_read_parity PIO
+    dma_channel_config dmacfg_read_chC; // From scsi_accel_read to channel B READ_ADDR
+    dma_channel_config dmacfg_read_chD; // From pacer to data state machine
+} g_scsi_dma;
+
+enum scsidma_state_t { SCSIDMA_IDLE = 0,
+                       SCSIDMA_WRITE, SCSIDMA_WRITE_DONE,
+                       SCSIDMA_READ, SCSIDMA_READ_DONE };
+static const char* scsidma_states[5] = {"IDLE", "WRITE", "WRITE_DONE", "READ", "READ_DONE"};
+static volatile scsidma_state_t g_scsi_dma_state;
+static bool g_channels_claimed = false;
+static void scsidma_config_gpio();
+
+void scsi_accel_log_state()
+{
+    logmsg("SCSI DMA state: ", scsidma_states[g_scsi_dma_state]);
+    logmsg("Current buffer: ", g_scsi_dma.dma_bytes, "/", g_scsi_dma.app_bytes, ", next ", g_scsi_dma.next_app_bytes, " bytes");
+    logmsg("SyncOffset: ", g_scsi_dma.syncOffset, " SyncPeriod ", g_scsi_dma.syncPeriod);
+    logmsg("PIO Parity SM:",
+        " tx_fifo ", (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_PARITY_SM),
+        ", rx_fifo ", (int)pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_PARITY_SM),
+        ", pc ", (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_PARITY_SM),
+        ", instr ", SCSI_DMA_PIO->sm[SCSI_PARITY_SM].instr);
+    logmsg("PIO Data SM:",
+        " tx_fifo ", (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_DATA_SM),
+        ", rx_fifo ", (int)pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DATA_SM),
+        ", pc ", (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DATA_SM),
+        ", instr ", SCSI_DMA_PIO->sm[SCSI_DATA_SM].instr);
+    logmsg("PIO Sync SM:",
+        " tx_fifo ", (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_SYNC_SM),
+        ", rx_fifo ", (int)pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_SYNC_SM),
+        ", pc ", (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_SYNC_SM),
+        ", instr ", SCSI_DMA_PIO->sm[SCSI_SYNC_SM].instr);
+    logmsg("DMA CH A:",
+        " ctrl: ", dma_hw->ch[SCSI_DMA_CH_A].ctrl_trig,
+        " count: ", dma_hw->ch[SCSI_DMA_CH_A].transfer_count);
+    logmsg("DMA CH B:",
+        " ctrl: ", dma_hw->ch[SCSI_DMA_CH_B].ctrl_trig,
+        " count: ", dma_hw->ch[SCSI_DMA_CH_B].transfer_count);
+    logmsg("DMA CH C:",
+        " ctrl: ", dma_hw->ch[SCSI_DMA_CH_C].ctrl_trig,
+        " count: ", dma_hw->ch[SCSI_DMA_CH_C].transfer_count);
+    logmsg("DMA CH D:",
+        " ctrl: ", dma_hw->ch[SCSI_DMA_CH_D].ctrl_trig,
+        " count: ", dma_hw->ch[SCSI_DMA_CH_D].transfer_count);
+    logmsg("GPIO states: ", sio_hw->gpio_in);
+}
+
+/****************************************/
+/* Accelerated writes to SCSI bus       */
+/****************************************/
+
+// Load the SCSI parity state machine with the address of the parity lookup table.
+// Also sets up DMA channels B and C
+static void config_parity_sm_for_write()
+{
+    // Load base address to state machine register X
+    uint32_t addrbase = (uint32_t)&g_scsi_parity_lookup[0];
+    assert((addrbase & 0x1FF) == 0);
+    pio_sm_init(SCSI_DMA_PIO, SCSI_PARITY_SM, g_scsi_dma.pio_offset_parity, &g_scsi_dma.pio_cfg_parity);
+    pio_sm_put(SCSI_DMA_PIO, SCSI_PARITY_SM, addrbase >> 9);
+    pio_sm_exec(SCSI_DMA_PIO, SCSI_PARITY_SM, pio_encode_pull(false, false));
+    pio_sm_exec(SCSI_DMA_PIO, SCSI_PARITY_SM, pio_encode_mov(pio_x, pio_osr));
+    
+    // DMA channel B will copy addresses from parity PIO to DMA channel C read address register.
+    // It is triggered by the parity SM RX FIFO request
+    dma_channel_configure(SCSI_DMA_CH_B,
+        &g_scsi_dma.dmacfg_write_chB,
+        &dma_hw->ch[SCSI_DMA_CH_C].al3_read_addr_trig,
+        &SCSI_DMA_PIO->rxf[SCSI_PARITY_SM],
+        1, true);
+    
+    // DMA channel C will read g_scsi_parity_lookup to copy data + parity to SCSI write state machine.
+    // It is triggered by SCSI write machine TX FIFO request and chains to re-enable channel B.
+    dma_channel_configure(SCSI_DMA_CH_C,
+        &g_scsi_dma.dmacfg_write_chC,
+        &SCSI_DMA_PIO->txf[SCSI_DATA_SM],
+        NULL,
+        1, false);
+}
+
+static void start_dma_write()
+{
+    if (g_scsi_dma.app_bytes <= g_scsi_dma.dma_bytes)
+    {
+        // Buffer has been fully processed, swap it
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+
+    // Check if we are all done.
+    // From SCSIDMA_WRITE_DONE state we can either go to IDLE in stopWrite()
+    // or back to WRITE in startWrite().
+    uint32_t bytes_to_send = g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes;
+    if (bytes_to_send == 0)
+    {
+        g_scsi_dma_state = SCSIDMA_WRITE_DONE;
+        return;
+    }
+
+    uint8_t *src_buf = &g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
+    g_scsi_dma.dma_bytes += bytes_to_send;
+    
+    // Start DMA from current buffer to parity generator
+    dma_channel_configure(SCSI_DMA_CH_A,
+        &g_scsi_dma.dmacfg_write_chA,
+        &SCSI_DMA_PIO->txf[SCSI_PARITY_SM],
+        src_buf,
+        bytes_to_send,
+        true
+    );
+}
+
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag)
+{
+    // Any read requests should be matched with a stopRead()
+    assert(g_scsi_dma_state != SCSIDMA_READ && g_scsi_dma_state != SCSIDMA_READ_DONE);
+
+    uint32_t saved_irq = save_and_disable_interrupts();
+    if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        if (!g_scsi_dma.next_app_buf && data == g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+        {
+            // Combine with currently running request
+            g_scsi_dma.app_bytes += count;
+            count = 0;
+        }
+        else if (data == g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+        {
+            // Combine with queued request
+            g_scsi_dma.next_app_bytes += count;
+            count = 0;
+        }
+        else if (!g_scsi_dma.next_app_buf)
+        {
+            // Add as queued request
+            g_scsi_dma.next_app_buf = (uint8_t*)data;
+            g_scsi_dma.next_app_bytes = count;
+            count = 0;
+        }
+    }
+    restore_interrupts(saved_irq);
+
+    // Check if the request was combined
+    if (count == 0) return;
+
+    if (g_scsi_dma_state != SCSIDMA_IDLE && g_scsi_dma_state != SCSIDMA_WRITE_DONE)
+    {
+        // Wait for previous request to finish
+        scsi_accel_rp2040_finishWrite(resetFlag);
+        if (*resetFlag)
+        {
+            return;
+        }
+    }
+
+    bool must_reconfig_gpio = (g_scsi_dma_state == SCSIDMA_IDLE);
+    g_scsi_dma_state = SCSIDMA_WRITE;
+    g_scsi_dma.app_buf = (uint8_t*)data;
+    g_scsi_dma.app_bytes = count;
+    g_scsi_dma.dma_bytes = 0;
+    g_scsi_dma.next_app_buf = 0;
+    g_scsi_dma.next_app_bytes = 0;
+    
+    if (must_reconfig_gpio)
+    {
+        SCSI_ENABLE_DATA_OUT();
+
+        if (g_scsi_dma.syncOffset == 0)
+        {
+            // Asynchronous write
+            config_parity_sm_for_write();
+            pio_sm_init(SCSI_DMA_PIO, SCSI_DATA_SM, g_scsi_dma.pio_offset_async_write, &g_scsi_dma.pio_cfg_async_write);
+            scsidma_config_gpio();
+
+            pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, true);
+            pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, true);
+        }
+        else
+        {
+            // Synchronous write
+            // Data state machine writes data to SCSI bus and dummy bits to its RX fifo.
+            // Sync state machine empties the dummy bits every time ACK is received, to control the transmit pace.
+            config_parity_sm_for_write();
+            pio_sm_init(SCSI_DMA_PIO, SCSI_DATA_SM, g_scsi_dma.pio_offset_sync_write, &g_scsi_dma.pio_cfg_sync_write);
+            pio_sm_init(SCSI_DMA_PIO, SCSI_SYNC_SM, g_scsi_dma.pio_offset_sync_write_pacer, &g_scsi_dma.pio_cfg_sync_write_pacer);
+            scsidma_config_gpio();
+
+            // Prefill RX fifo to set the syncOffset
+            for (int i = 0; i < g_scsi_dma.syncOffsetPreload; i++)
+            {
+                pio_sm_exec(SCSI_DMA_PIO, SCSI_DATA_SM,
+                    pio_encode_push(false, false) | pio_encode_sideset(1, 1));
+            }
+
+            // Fill the pacer TX fifo
+            // DMA should start transferring only after ACK pulses are received
+            for (int i = 0; i < 4; i++)
+            {
+                pio_sm_put(SCSI_DMA_PIO, SCSI_SYNC_SM, 0);
+            }
+
+            // Fill the pacer OSR
+            pio_sm_exec(SCSI_DMA_PIO, SCSI_SYNC_SM,
+                pio_encode_mov(pio_osr, pio_null));
+
+            // Start DMA transfer to move dummy bits to write pacer
+            dma_channel_configure(SCSI_DMA_CH_D,
+                &g_scsi_dma.dmacfg_write_chD,
+                &SCSI_DMA_PIO->txf[SCSI_SYNC_SM],
+                &SCSI_DMA_PIO->rxf[SCSI_DATA_SM],
+                0xFFFFFFFF,
+                true
+            );
+
+            // Enable state machines
+            pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_SYNC_SM, true);
+            pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, true);
+            pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, true);
+        }
+        
+        dma_channel_set_irq0_enabled(SCSI_DMA_CH_A, true);
+    }
+
+    start_dma_write();
+}
+
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data)
+{
+    // Check if everything has completed
+    if (g_scsi_dma_state == SCSIDMA_IDLE || g_scsi_dma_state == SCSIDMA_WRITE_DONE)
+    {
+        return true;
+    }
+
+    if (!data)
+        return false;
+    
+    // Check if this data item is still in queue.
+    bool finished = true;
+    uint32_t saved_irq = save_and_disable_interrupts();
+    if (data >= g_scsi_dma.app_buf &&
+        data < g_scsi_dma.app_buf + g_scsi_dma.app_bytes &&
+        (uint32_t)data >= dma_hw->ch[SCSI_DMA_CH_A].al1_read_addr)
+    {
+        finished = false; // In current transfer
+    }
+    else if (data >= g_scsi_dma.next_app_buf &&
+             data < g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+    {
+        finished = false; // In queued transfer
+    }
+    restore_interrupts(saved_irq);
+
+    return finished;
+}
+
+// Once DMA has finished, check if all PIO queues have been drained
+static bool scsi_accel_rp2040_isWriteDone()
+{
+    // Check if data is still waiting in PIO FIFO
+    if (!pio_sm_is_tx_fifo_empty(SCSI_DMA_PIO, SCSI_PARITY_SM) ||
+        !pio_sm_is_rx_fifo_empty(SCSI_DMA_PIO, SCSI_PARITY_SM) ||
+        !pio_sm_is_tx_fifo_empty(SCSI_DMA_PIO, SCSI_DATA_SM))
+    {
+        return false;
+    }
+
+    if (g_scsi_dma.syncOffset > 0)
+    {
+        // Check if all bytes of synchronous write have been acknowledged
+        if (pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DATA_SM) > g_scsi_dma.syncOffsetPreload)
+            return false;
+    }
+    else
+    {
+        // Check if state machine has written out its OSR
+        if (pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DATA_SM) != g_scsi_dma.pio_offset_async_write)
+            return false;
+    }
+
+    // Check if ACK of the final byte has finished
+    if (SCSI_IN(ACK))
+        return false;
+
+    return true;
+}
+
+static void scsi_accel_rp2040_stopWrite(volatile int *resetFlag)
+{
+    // Wait for TX fifo to be empty and ACK to go high
+    // For synchronous writes wait for all ACKs to be received also
+    uint32_t start = millis();
+    while (!scsi_accel_rp2040_isWriteDone() && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            logmsg("scsi_accel_rp2040_stopWrite() timeout");
+            scsi_accel_log_state();
+            *resetFlag = 1;
+            break;
+        }
+    }
+
+    dma_channel_abort(SCSI_DMA_CH_A);
+    dma_channel_abort(SCSI_DMA_CH_B);
+    dma_channel_abort(SCSI_DMA_CH_C);
+    dma_channel_abort(SCSI_DMA_CH_D);
+    dma_channel_set_irq0_enabled(SCSI_DMA_CH_A, false);
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_SYNC_SM, false);
+}
+
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag)
+{
+    uint32_t start = millis();
+    while (g_scsi_dma_state != SCSIDMA_IDLE && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            logmsg("scsi_accel_rp2040_finishWrite() timeout");
+            scsi_accel_log_state();
+            *resetFlag = 1;
+            break;
+        }
+
+        if (g_scsi_dma_state == SCSIDMA_WRITE_DONE || *resetFlag)
+        {
+            // DMA done, wait for PIO to finish also and reconfig GPIO.
+            scsi_accel_rp2040_stopWrite(resetFlag);
+        }
+    }
+}
+
+/****************************************/
+/* Accelerated reads from SCSI bus      */
+/****************************************/
+
+// Load the SCSI read state machine with the address of the parity lookup table.
+// Also sets up DMA channels B, C and D
+static void config_parity_sm_for_read()
+{
+    // Configure parity check state machine
+    pio_sm_init(SCSI_DMA_PIO, SCSI_PARITY_SM, g_scsi_dma.pio_offset_read_parity, &g_scsi_dma.pio_cfg_read_parity);
+
+    // Load base address to state machine register X
+    uint32_t addrbase = (uint32_t)&g_scsi_parity_check_lookup[0];
+    assert((addrbase & 0x3FF) == 0);
+    pio_sm_init(SCSI_DMA_PIO, SCSI_DATA_SM, g_scsi_dma.pio_offset_read, &g_scsi_dma.pio_cfg_read);
+    pio_sm_put(SCSI_DMA_PIO, SCSI_DATA_SM, addrbase >> 10);
+    pio_sm_exec(SCSI_DMA_PIO, SCSI_DATA_SM, pio_encode_pull(false, false) | pio_encode_sideset(1, 1));
+    pio_sm_exec(SCSI_DMA_PIO, SCSI_DATA_SM, pio_encode_mov(pio_y, pio_osr) | pio_encode_sideset(1, 1));
+    
+    // For synchronous mode, the REQ pin is driven by SCSI_SYNC_SM, so disable it in SCSI_DATA_SM
+    if (g_scsi_dma.syncOffset > 0)
+    {
+        pio_sm_set_sideset_pins(SCSI_DMA_PIO, SCSI_DATA_SM, 0);
+    }
+
+    // DMA channel B will read g_scsi_parity_check_lookup and write to scsi_read_parity PIO.
+    dma_channel_configure(SCSI_DMA_CH_B,
+        &g_scsi_dma.dmacfg_read_chB,
+        &SCSI_DMA_PIO->txf[SCSI_PARITY_SM],
+        NULL,
+        1, false);
+    
+    // DMA channel C will copy addresses from data PIO to DMA channel B read address register.
+    // It is triggered by the data SM RX FIFO request.
+    // This triggers channel B by writing to READ_ADDR_TRIG
+    // Channel B chaining re-enables this channel.
+    dma_channel_configure(SCSI_DMA_CH_C,
+        &g_scsi_dma.dmacfg_read_chC,
+        &dma_hw->ch[SCSI_DMA_CH_B].al3_read_addr_trig,
+        &SCSI_DMA_PIO->rxf[SCSI_DATA_SM],
+        1, true);
+
+    if (g_scsi_dma.syncOffset == 0)
+    {
+        // DMA channel D will copy dummy words to scsi_accel_read PIO to set the number
+        // of bytes to transfer.
+        static const uint32_t dummy = 0;
+        dma_channel_configure(SCSI_DMA_CH_D,
+            &g_scsi_dma.dmacfg_read_chD,
+            &SCSI_DMA_PIO->txf[SCSI_DATA_SM],
+            &dummy,
+            0, false);
+    }
+    else
+    {
+        pio_sm_init(SCSI_DMA_PIO, SCSI_SYNC_SM, g_scsi_dma.pio_offset_sync_read_pacer, &g_scsi_dma.pio_cfg_sync_read_pacer);
+
+        // DMA channel D will copy words from scsi_sync_read_pacer to scsi_accel_read PIO
+        // to control the offset between REQ pulses sent and ACK pulses received.
+        dma_channel_configure(SCSI_DMA_CH_D,
+            &g_scsi_dma.dmacfg_read_chD,
+            &SCSI_DMA_PIO->txf[SCSI_DATA_SM],
+            &SCSI_DMA_PIO->rxf[SCSI_SYNC_SM],
+            0, false);
+    }
+
+    // Clear PIO IRQ flag that is used to detect parity error
+    SCSI_DMA_PIO->irq = 1;
+}
+
+static void start_dma_read()
+{
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, false);
+    pio_sm_clear_fifos(SCSI_DMA_PIO, SCSI_PARITY_SM);
+    pio_sm_clear_fifos(SCSI_DMA_PIO, SCSI_DATA_SM);
+    
+    if (g_scsi_dma.app_bytes <= g_scsi_dma.dma_bytes)
+    {
+        // Buffer has been fully processed, swap it
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+    
+    // Check if we are all done.
+    // From SCSIDMA_READ_DONE state we can either go to IDLE in stopRead()
+    // or back to READ in startWrite().
+    uint32_t bytes_to_read = g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes;
+    if (bytes_to_read == 0)
+    {
+        g_scsi_dma_state = SCSIDMA_READ_DONE;
+        return;
+    }
+
+    if (g_scsi_dma.syncOffset == 0)
+    {
+        // Start sending dummy words to scsi_accel_read state machine
+        dma_channel_set_trans_count(SCSI_DMA_CH_D, bytes_to_read, true);
+    }
+    else
+    {
+        // Set number of bytes to receive to the scsi_sync_read_pacer state machine register X
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_SYNC_SM, false);
+        hw_clear_bits(&SCSI_DMA_PIO->sm[SCSI_SYNC_SM].shiftctrl, PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS);
+        pio_sm_put(SCSI_DMA_PIO, SCSI_SYNC_SM, bytes_to_read - 1);
+        pio_sm_exec(SCSI_DMA_PIO, SCSI_SYNC_SM, pio_encode_pull(false, false) | pio_encode_sideset(1, 1));
+        pio_sm_exec(SCSI_DMA_PIO, SCSI_SYNC_SM, pio_encode_mov(pio_x, pio_osr) | pio_encode_sideset(1, 1));
+        hw_set_bits(&SCSI_DMA_PIO->sm[SCSI_SYNC_SM].shiftctrl, PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS);
+        
+        // Prefill FIFOs to get correct syncOffset
+        int prefill = 12 - g_scsi_dma.syncOffset;
+        
+        // Always at least 1 word to avoid race condition between REQ and ACK pulses
+        if (prefill < 1) prefill = 1;
+
+        // Up to 4 words in SCSI_DATA_SM TX fifo
+        for (int i = 0; i < 4 && prefill > 0; i++)
+        {
+            pio_sm_put(SCSI_DMA_PIO, SCSI_DATA_SM, 0);
+            prefill--;
+        }
+
+        // Up to 8 words in SCSI_SYNC_SM RX fifo
+        for (int i = 0; i < 8 && prefill > 0; i++)
+        {
+            pio_sm_exec(SCSI_DMA_PIO, SCSI_SYNC_SM, pio_encode_push(false, false) | pio_encode_sideset(1, 1));
+            prefill--;
+        }
+        
+        pio_sm_exec(SCSI_DMA_PIO, SCSI_SYNC_SM, pio_encode_jmp(g_scsi_dma.pio_offset_sync_read_pacer) | pio_encode_sideset(1, 1));
+
+        // Start transfers
+        dma_channel_set_trans_count(SCSI_DMA_CH_D, bytes_to_read, true);
+    }
+
+    // Start DMA to fill the destination buffer
+    uint8_t *dest_buf = &g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
+    g_scsi_dma.dma_bytes += bytes_to_read;
+    dma_channel_configure(SCSI_DMA_CH_A,
+        &g_scsi_dma.dmacfg_read_chA,
+        dest_buf,
+        &SCSI_DMA_PIO->rxf[SCSI_PARITY_SM],
+        bytes_to_read,
+        true
+    );
+
+    // Ready to start the data and parity check state machines
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, true);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, true);
+
+    if (g_scsi_dma.syncOffset > 0)
+    {
+        // Start sending REQ pulses
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_SYNC_SM, true);
+    }
+}
+
+void scsi_accel_rp2040_startRead(uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // Any write requests should be matched with a stopWrite()
+    assert(g_scsi_dma_state != SCSIDMA_WRITE && g_scsi_dma_state != SCSIDMA_WRITE_DONE);
+
+    uint32_t saved_irq = save_and_disable_interrupts();
+    if (g_scsi_dma_state == SCSIDMA_READ)
+    {
+        if (!g_scsi_dma.next_app_buf && data == g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+        {
+            // Combine with currently running request
+            g_scsi_dma.app_bytes += count;
+            count = 0;
+        }
+        else if (data == g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+        {
+            // Combine with queued request
+            g_scsi_dma.next_app_bytes += count;
+            count = 0;
+        }
+        else if (!g_scsi_dma.next_app_buf)
+        {
+            // Add as queued request
+            g_scsi_dma.next_app_buf = (uint8_t*)data;
+            g_scsi_dma.next_app_bytes = count;
+            count = 0;
+        }
+    }
+    restore_interrupts(saved_irq);
+
+    // Check if the request was combined
+    if (count == 0) return;
+
+    if (g_scsi_dma_state != SCSIDMA_IDLE && g_scsi_dma_state != SCSIDMA_READ_DONE)
+    {
+        // Wait for previous request to finish
+        scsi_accel_rp2040_finishRead(NULL, 0, parityError, resetFlag);
+        if (*resetFlag)
+        {
+            return;
+        }
+    }
+
+    bool must_reconfig_gpio = (g_scsi_dma_state == SCSIDMA_IDLE);
+    g_scsi_dma_state = SCSIDMA_READ;
+    g_scsi_dma.app_buf = (uint8_t*)data;
+    g_scsi_dma.app_bytes = count;
+    g_scsi_dma.dma_bytes = 0;
+    g_scsi_dma.next_app_buf = 0;
+    g_scsi_dma.next_app_bytes = 0;
+
+    if (must_reconfig_gpio)
+    {
+        config_parity_sm_for_read();
+        scsidma_config_gpio();
+        dma_channel_set_irq0_enabled(SCSI_DMA_CH_A, true);
+    }
+
+    start_dma_read();
+}
+
+bool scsi_accel_rp2040_isReadFinished(const uint8_t* data)
+{
+    // Check if everything has completed
+    if (g_scsi_dma_state == SCSIDMA_IDLE || g_scsi_dma_state == SCSIDMA_READ_DONE)
+    {
+        return true;
+    }
+
+    if (!data)
+        return false;
+
+    // Check if this data item is still in queue.
+    bool finished = true;
+    uint32_t saved_irq = save_and_disable_interrupts();
+    if (data >= g_scsi_dma.app_buf &&
+        data < g_scsi_dma.app_buf + g_scsi_dma.app_bytes &&
+        (uint32_t)data >= dma_hw->ch[SCSI_DMA_CH_A].write_addr)
+    {
+        finished = false; // In current transfer
+    }
+    else if (data >= g_scsi_dma.next_app_buf &&
+             data < g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+    {
+        finished = false; // In queued transfer
+    }
+    restore_interrupts(saved_irq);
+
+    return finished;
+}
+
+static void scsi_accel_rp2040_stopRead()
+{
+    dma_channel_abort(SCSI_DMA_CH_A);
+    dma_channel_abort(SCSI_DMA_CH_B);
+    dma_channel_abort(SCSI_DMA_CH_C);
+    dma_channel_abort(SCSI_DMA_CH_D);
+    dma_channel_set_irq0_enabled(SCSI_DMA_CH_A, false);
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_PARITY_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DATA_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_SYNC_SM, false);
+}
+
+void scsi_accel_rp2040_finishRead(const uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    uint32_t start = millis();
+    const uint8_t *query_addr = (data ? (data + count - 1) : NULL);
+    while (!scsi_accel_rp2040_isReadFinished(query_addr) && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            logmsg("scsi_accel_rp2040_finishRead timeout");
+            scsi_accel_log_state();
+            *resetFlag = 1;
+            break;
+        }
+    }
+    
+    if (g_scsi_dma_state == SCSIDMA_READ_DONE || *resetFlag)
+    {
+        // This was last buffer, release bus
+        scsi_accel_rp2040_stopRead();
+    }
+    
+    // Check if any parity errors have been detected during the transfer so far
+    if (parityError != NULL && (SCSI_DMA_PIO->irq & 1))
+    {
+        dbgmsg("scsi_accel_rp2040_finishRead(", bytearray(data, count), ") detected parity error");
+        *parityError = true;
+    }
+}
+
+/*******************************************************/
+/* Initialization functions common to read/write       */
+/*******************************************************/
+
+static void scsi_dma_irq()
+{
+#ifndef ENABLE_AUDIO_OUTPUT
+    dma_hw->ints0 = (1 << SCSI_DMA_CH_A);
+#else
+    // see audio.h for whats going on here
+    if (dma_hw->intr & (1 << SCSI_DMA_CH_A)) {
+        dma_hw->ints0 = (1 << SCSI_DMA_CH_A);
+    } else {
+        audio_dma_irq();
+        return;
+    }
+#endif
+
+    scsidma_state_t state = g_scsi_dma_state;
+    if (state == SCSIDMA_WRITE)
+    {
+        // Start writing from next buffer, if any, or set state to SCSIDMA_WRITE_DONE
+        start_dma_write();
+    }
+    else if (state == SCSIDMA_READ)
+    {
+        // Start reading into next buffer, if any, or set state to SCSIDMA_READ_DONE
+        start_dma_read();
+    }
+}
+
+// Select GPIO from PIO peripheral or from software controlled SIO
+static void scsidma_config_gpio()
+{
+    if (g_scsi_dma_state == SCSIDMA_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        // Make sure the initial state of all pins is high and output
+        pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ));
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_IO_DB0, 9, true);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_OUT_REQ, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_READ)
+    {
+        if (g_scsi_dma.syncOffset == 0)
+        {
+            // Asynchronous read
+            // Data bus as input, REQ pin as output
+            pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ));
+            pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_IO_DB0, 9, false);
+            pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_OUT_REQ, 1, true);
+        }
+        else
+        {
+            // Synchronous read, REQ pin is written by SYNC_SM
+            pio_sm_set_pins(SCSI_DMA_PIO, SCSI_SYNC_SM, SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ));
+            pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DATA_SM, SCSI_IO_DB0, 9, false);
+            pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_SYNC_SM, SCSI_OUT_REQ, 1, true);
+        }
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+void scsi_accel_rp2040_init()
+{
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    scsidma_config_gpio();
+    
+    if (g_channels_claimed) {
+        // Un-claim all SCSI state machines
+        pio_sm_unclaim(SCSI_DMA_PIO, SCSI_PARITY_SM);
+        pio_sm_unclaim(SCSI_DMA_PIO, SCSI_DATA_SM);
+        pio_sm_unclaim(SCSI_DMA_PIO, SCSI_SYNC_SM);
+
+        // Remove all SCSI programs
+        pio_remove_program(SCSI_DMA_PIO, &scsi_parity_program, g_scsi_dma.pio_offset_parity);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_accel_async_write_program, g_scsi_dma.pio_offset_async_write);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_sync_write_pacer_program, g_scsi_dma.pio_offset_sync_write_pacer);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_sync_write_program, g_scsi_dma.pio_offset_sync_write);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_accel_read_program, g_scsi_dma.pio_offset_read);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_sync_read_pacer_program, g_scsi_dma.pio_offset_sync_read_pacer);
+        pio_remove_program(SCSI_DMA_PIO, &scsi_read_parity_program, g_scsi_dma.pio_offset_read_parity);
+
+        // Un-claim all SCSI DMA channels
+        dma_channel_unclaim(SCSI_DMA_CH_A);
+        dma_channel_unclaim(SCSI_DMA_CH_B);
+        dma_channel_unclaim(SCSI_DMA_CH_C);
+        dma_channel_unclaim(SCSI_DMA_CH_D);
+
+        // Set flag to re-initialize SCSI PIO system
+        g_channels_claimed = false;
+    }
+    
+    if (!g_channels_claimed)
+    {
+        // Mark channels as being in use, unless it has been done already
+        pio_sm_claim(SCSI_DMA_PIO, SCSI_PARITY_SM);
+        pio_sm_claim(SCSI_DMA_PIO, SCSI_DATA_SM);
+        pio_sm_claim(SCSI_DMA_PIO, SCSI_SYNC_SM);
+        dma_channel_claim(SCSI_DMA_CH_A);
+        dma_channel_claim(SCSI_DMA_CH_B);
+        dma_channel_claim(SCSI_DMA_CH_C);
+        dma_channel_claim(SCSI_DMA_CH_D);
+        g_channels_claimed = true;
+    }
+    
+    // Parity lookup generator
+    g_scsi_dma.pio_offset_parity = pio_add_program(SCSI_DMA_PIO, &scsi_parity_program);
+    g_scsi_dma.pio_cfg_parity = scsi_parity_program_get_default_config(g_scsi_dma.pio_offset_parity);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_parity, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_parity, true, true, 32);
+
+    // Asynchronous SCSI write
+    g_scsi_dma.pio_offset_async_write = pio_add_program(SCSI_DMA_PIO, &scsi_accel_async_write_program);
+    g_scsi_dma.pio_cfg_async_write = scsi_accel_async_write_program_get_default_config(g_scsi_dma.pio_offset_async_write);
+    sm_config_set_out_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_IO_DB0, 9);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_OUT_REQ);
+    sm_config_set_fifo_join(&g_scsi_dma.pio_cfg_async_write, PIO_FIFO_JOIN_TX);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_async_write, true, false, 32);
+
+    // Synchronous SCSI write pacer / ACK handler
+    g_scsi_dma.pio_offset_sync_write_pacer = pio_add_program(SCSI_DMA_PIO, &scsi_sync_write_pacer_program);
+    g_scsi_dma.pio_cfg_sync_write_pacer = scsi_sync_write_pacer_program_get_default_config(g_scsi_dma.pio_offset_sync_write_pacer);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write_pacer, true, true, 1);
+
+    // Synchronous SCSI data writer
+    g_scsi_dma.pio_offset_sync_write = pio_add_program(SCSI_DMA_PIO, &scsi_sync_write_program);
+    g_scsi_dma.pio_cfg_sync_write = scsi_sync_write_program_get_default_config(g_scsi_dma.pio_offset_sync_write);
+    sm_config_set_out_pins(&g_scsi_dma.pio_cfg_sync_write, SCSI_IO_DB0, 9);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_sync_write, SCSI_OUT_REQ);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, 1);
+
+    // Asynchronous / synchronous SCSI read
+    g_scsi_dma.pio_offset_read = pio_add_program(SCSI_DMA_PIO, &scsi_accel_read_program);
+    g_scsi_dma.pio_cfg_read = scsi_accel_read_program_get_default_config(g_scsi_dma.pio_offset_read);
+    sm_config_set_in_pins(&g_scsi_dma.pio_cfg_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_read, SCSI_OUT_REQ);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_read, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_read, true, true, 32);
+
+    // Synchronous SCSI read pacer
+    g_scsi_dma.pio_offset_sync_read_pacer = pio_add_program(SCSI_DMA_PIO, &scsi_sync_read_pacer_program);
+    g_scsi_dma.pio_cfg_sync_read_pacer = scsi_sync_read_pacer_program_get_default_config(g_scsi_dma.pio_offset_sync_read_pacer);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_sync_read_pacer, SCSI_OUT_REQ);
+
+    // Read parity check
+    g_scsi_dma.pio_offset_read_parity = pio_add_program(SCSI_DMA_PIO, &scsi_read_parity_program);
+    g_scsi_dma.pio_cfg_read_parity = scsi_read_parity_program_get_default_config(g_scsi_dma.pio_offset_read_parity);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_read_parity, true, true, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_read_parity, true, false, 32);
+
+    // Create DMA channel configurations so they can be applied quickly later
+    
+    // For write to SCSI BUS:
+    // Channel A: Bytes from RAM to scsi_parity PIO
+    dma_channel_config cfg = dma_channel_get_default_config(SCSI_DMA_CH_A);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_8);
+    channel_config_set_read_increment(&cfg, true);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_PARITY_SM, true));
+    g_scsi_dma.dmacfg_write_chA = cfg;
+
+    // Channel B: Addresses from scsi_parity PIO to lookup DMA READ_ADDR register
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_B);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_PARITY_SM, false));
+    g_scsi_dma.dmacfg_write_chB = cfg;
+
+    // Channel C: Lookup from g_scsi_parity_lookup and copy to scsi_accel_async_write or scsi_sync_write PIO
+    // When done, chain to channel B
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_C);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_16);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DATA_SM, true));
+    channel_config_set_chain_to(&cfg, SCSI_DMA_CH_B);
+    g_scsi_dma.dmacfg_write_chC = cfg;
+
+    // Channel D: In synchronous mode a second DMA channel is used to transfer dummy bits
+    // from first state machine to second one.
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_D);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_SYNC_SM, true));
+    g_scsi_dma.dmacfg_write_chD = cfg;
+
+    // For read from SCSI BUS:
+    // Channel A: Bytes from scsi_read_parity PIO to destination memory buffer
+    // This takes the bottom 8 bits which is the data without parity bit.
+    // Triggered by scsi_read_parity RX FIFO.
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_A);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_8);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, true);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_PARITY_SM, false));
+    g_scsi_dma.dmacfg_read_chA = cfg;
+
+    // Channel B: Lookup from g_scsi_parity_check_lookup and copy to scsi_read_parity PIO
+    // Triggered by channel C writing to READ_ADDR_TRIG
+    // Re-enables channel C by chaining after done.
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_B);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_16);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, DREQ_FORCE);
+    channel_config_set_chain_to(&cfg, SCSI_DMA_CH_C);
+    cfg.ctrl |= DMA_CH0_CTRL_TRIG_HIGH_PRIORITY_BITS;
+    g_scsi_dma.dmacfg_read_chB = cfg;
+
+    // Channel C: Addresses from scsi_read PIO to channel B READ_ADDR register
+    // A single transfer starts when PIO RX FIFO has data.
+    // The DMA channel is re-enabled by channel B chaining.
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_C);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DATA_SM, false));
+    g_scsi_dma.dmacfg_read_chC = cfg;
+
+    // Channel D: In synchronous mode a second DMA channel is used to transfer dummy words
+    // from first state machine to second one to control the pace of data transfer.
+    // In asynchronous mode this just transfers words to control the number of bytes.
+    cfg = dma_channel_get_default_config(SCSI_DMA_CH_D);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DATA_SM, true));
+    g_scsi_dma.dmacfg_read_chD = cfg;
+    
+    // Interrupts are used for data buffer swapping
+    irq_set_exclusive_handler(DMA_IRQ_0, scsi_dma_irq);
+    irq_set_enabled(DMA_IRQ_0, true);
+}
+
+bool scsi_accel_rp2040_setSyncMode(int syncOffset, int syncPeriod)
+{
+    if (g_scsi_dma_state != SCSIDMA_IDLE)
+    {
+        logmsg("ERROR: SCSI DMA was in state ", (int)g_scsi_dma_state, " when changing sync mode, forcing bus reset");
+        scsi_accel_log_state();
+        return false;
+    }
+
+    if (syncOffset != g_scsi_dma.syncOffset || syncPeriod != g_scsi_dma.syncPeriod)
+    {
+        g_scsi_dma.syncOffset = syncOffset;
+        g_scsi_dma.syncPeriod = syncPeriod;
+
+        if (syncOffset > 0)
+        {
+            // Set up offset amount to PIO state machine configs.
+            // The RX fifo of scsi_sync_write has 4 slots.
+            // We can preload it with 0-3 items and set the autopush threshold 1, 2, 4 ... 32
+            // to act as a divider. This allows offsets 1 to 128 bytes.
+            // SCSI2SD code currently only uses offsets up to 15.
+            if (syncOffset <= 4)
+            {
+                g_scsi_dma.syncOffsetDivider = 1;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset;
+            }
+            else if (syncOffset <= 8)
+            {
+                g_scsi_dma.syncOffsetDivider = 2;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset / 2;
+            }
+            else if (syncOffset <= 16)
+            {
+                g_scsi_dma.syncOffsetDivider = 4;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset / 4;
+            }
+            else
+            {
+                g_scsi_dma.syncOffsetDivider = 4;
+                g_scsi_dma.syncOffsetPreload = 0;
+            }
+
+            // To properly detect when all bytes have been ACKed,
+            // we need at least one vacant slot in the FIFO.
+            if (g_scsi_dma.syncOffsetPreload > 3)
+                g_scsi_dma.syncOffsetPreload = 3;
+
+            sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write_pacer, true, true, g_scsi_dma.syncOffsetDivider);
+            sm_config_set_in_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, g_scsi_dma.syncOffsetDivider);
+
+            // Set up the timing parameters to PIO program
+            // The scsi_sync_write PIO program consists of three instructions.
+            // The delays are in clock cycles, each taking 8 ns.
+            // delay0: Delay from data write to REQ assertion
+            // delay1: Delay from REQ assert to REQ deassert
+            // delay2: Delay from REQ deassert to data write
+            int delay0, delay1, delay2;
+            int totalDelay = syncPeriod * 4 / 8;
+
+            if (syncPeriod <= 25)
+            {
+                // Fast SCSI timing: 30 ns assertion period, 25 ns skew delay
+                // The hardware rise and fall time require some extra delay,
+                // the values below are tuned based on oscilloscope measurements.
+                delay0 = 3;
+                delay1 = 5;
+                delay2 = totalDelay - delay0 - delay1 - 3;
+                if (delay2 < 0) delay2 = 0;
+                if (delay2 > 15) delay2 = 15;
+            }
+            else
+            {
+                // Slow SCSI timing: 90 ns assertion period, 55 ns skew delay
+                delay0 = 6;
+                delay1 = 12;
+                delay2 = totalDelay - delay0 - delay1 - 3;
+                if (delay2 < 0) delay2 = 0;
+                if (delay2 > 15) delay2 = 15;
+            }
+
+            // Patch the delay values into the instructions in scsi_sync_write.
+            // The code in scsi_accel.pio must have delay set to 0 for this to work correctly.
+            uint16_t instr0 = scsi_sync_write_program_instructions[0] | pio_encode_delay(delay0);
+            uint16_t instr1 = scsi_sync_write_program_instructions[1] | pio_encode_delay(delay1);
+            uint16_t instr2 = scsi_sync_write_program_instructions[2] | pio_encode_delay(delay2);
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 0] = instr0;
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 1] = instr1;
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 2] = instr2;
+
+            // And similar patching for scsi_sync_read_pacer
+            int rdelay2 = totalDelay - delay1 - 2;
+            if (rdelay2 > 15) rdelay2 = 15;
+            if (rdelay2 < 5) rdelay2 = 5;
+            uint16_t rinstr0 = scsi_sync_read_pacer_program_instructions[0] | pio_encode_delay(rdelay2);
+            uint16_t rinstr1 = (scsi_sync_read_pacer_program_instructions[1] + g_scsi_dma.pio_offset_sync_read_pacer) | pio_encode_delay(delay1);
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_read_pacer + 0] = rinstr0;
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_read_pacer + 1] = rinstr1;
+        }
+    }
+
+    return true;
+}

+ 69 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target.h

@@ -0,0 +1,69 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Accelerated SCSI subroutines using RP2040 hardware PIO peripheral.
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_rp2040_init();
+
+// Log current state of DMA & PIO hardware for debugging
+void scsi_accel_log_state();
+
+// Set SCSI access mode for synchronous transfers
+// Setting syncOffset = 0 enables asynchronous SCSI.
+// Setting syncOffset > 0 enables synchronous SCSI.
+// Returns false if busy, caller should issue bus reset to recover.
+bool scsi_accel_rp2040_setSyncMode(int syncOffset, int syncPeriod);
+
+// Queue a request to write data from the buffer to SCSI bus.
+// This function typically returns immediately and the request will complete in background.
+// If there are too many queued requests, this function will block until previous request finishes.
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag);
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data);
+
+// Wait for all write requests to finish and release the bus.
+// If resetFlag is non-zero, aborts write immediately.
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag);
+
+// Queue a request to read data from SCSI bus to the buffer.
+// This function typically returns immediately and the request will complete in background.
+// If there are too many queued requests, this function will block until previous request finishes.
+void scsi_accel_rp2040_startRead(uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag);
+
+// Query whether data at address is part of a queued read request.
+// Returns true if there is no outstanding request.
+// If data is NULL, checks if all reads have completed.
+bool scsi_accel_rp2040_isReadFinished(const uint8_t* data);
+
+// Wait for a read request to complete.
+// If buf is not NULL, waits only until the data at data[0] .. data[count-1] is valid.
+// If buf is NULL, waits for all read requests to complete.
+// If there are no further read requests, releases the bus.
+// If resetFlag is non-zero, aborts read immediately.
+// If a parity error has been noticed in any buffer since starting the read, parityError is set to 1.
+void scsi_accel_rp2040_finishRead(const uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag);
+

+ 124 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target_Pico.pio

@@ -0,0 +1,124 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+; RP2040 PIO program for accelerating SCSI communication
+; Run "pioasm scsi_accel.pio scsi_accel.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is REQ pin
+
+.define REQ 17
+.define ACK 26
+
+; Delay from data setup to REQ assertion.
+; deskew delay + cable skew delay = 55 ns minimum
+; One clock cycle is 8 ns => delay 7 clocks
+.define REQ_DLY 7
+
+; Adds parity to data that is to be written to SCSI
+; This works by generating addresses for DMA to fetch data from.
+; Register X should be initialized to the base address of the lookup table.
+.program scsi_parity
+    pull block
+    in NULL, 1
+    in OSR, 8
+    in X, 23
+
+; Write to SCSI bus using asynchronous handshake.
+; Data is written as 32-bit words that contain the 8 data bits + 1 parity bit.
+; 23 bits in each word are discarded.
+; Number of bytes to send must be multiple of 2.
+.program scsi_accel_async_write
+    .side_set 1
+
+    pull ifempty block          side 1  ; Get data from TX FIFO
+    out pins, 9                 side 1  ; Write data and parity bit
+    out null, 23 [REQ_DLY-2]    side 1  ; Discard unused bits, wait for data preset time
+    wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+
+; Read from SCSI bus using sync or async handshake.
+; Data is returned as 32-bit words:
+; - bit  0: always zero
+; - bits 1-8: data byte
+; - bit  9: parity bit
+; - bits 10-31: lookup table address
+; Lookup table address should be loaded into register Y.
+; One dummy word should be written to TX fifo for every byte to receive.
+.program scsi_accel_read
+    .side_set 1
+
+    pull block                  side 1  ; Pull from TX fifo for counting bytes and pacing sync mode
+    wait 1 gpio ACK             side 1  ; Wait for ACK high
+    in null, 1                  side 0  ; Zero bit because lookup table entries are 16-bit
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+    in pins, 9                  side 1  ; Deassert REQ, read GPIO
+    in y, 22                    side 1  ; Copy parity lookup table address
+
+; Data state machine for synchronous writes.
+; Takes the lowest 9 bits of each 32 bit word and writes them to bus with REQ pulse.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+;
+; Shifts one bit to ISR per every byte transmitted. This is used to control the transfer
+; pace, the RX fifo acts as a counter to keep track of unacknowledged bytes. The C code
+; can set the syncOffset by changing autopush threshold, e.g. threshold 3 = 12 bytes offset.
+.program scsi_sync_write
+    .side_set 1
+
+    out pins, 9      [0]        side 1  ; Write data and parity bit, wait for deskew delay
+    out null, 23     [0]        side 0  ; Assert REQ, wait for assert time
+    in null, 1       [0]        side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
+
+; Data pacing state machine for synchronous writes.
+; Takes one bit from ISR on every falling edge of ACK.
+; The C code should set autopull threshold to match scsi_sync_write autopush threshold.
+; System DMA will then move words from scsi_sync_write RX fifo to scsi_sync_write_pacer TX fifo.
+.program scsi_sync_write_pacer
+    wait 1 gpio ACK
+    wait 0 gpio ACK   ; Wait for falling edge on ACK
+    out null, 1       ; Let scsi_sync_write send one more byte
+
+; Data pacing state machine for synchronous reads.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+; Number of bytes to receive minus one should be loaded into register X.
+; In synchronous mode this generates the REQ pulses and dummy words.
+; In asynchronous mode it just generates dummy words to feed to scsi_accel_read.
+.program scsi_sync_read_pacer
+    .side_set 1
+
+start:
+    push block      [0]      side 1  ; Send dummy word to scsi_accel_read, wait for transfer period
+    jmp x-- start   [0]      side 0  ; Assert REQ, wait for assert time
+
+finish:
+    jmp finish      [0]      side 1
+
+; Parity checker for reads from SCSI bus.
+; Receives 16-bit words from g_scsi_parity_check_lookup
+; Bottom 8 bits are the data byte, which is passed to output FIFO
+; The 9th bit is parity valid bit, which is 1 for valid and 0 for parity error.
+.program scsi_read_parity
+parity_valid:
+    out isr, 8                ; Take the 8 data bits for passing to RX fifo
+    push block                ; Push the data to RX fifo
+    out x, 24                 ; Take the parity valid bit, and the rest of 32-bit word
+    jmp x-- parity_valid      ; If parity valid bit is 1, repeat from start
+    irq set 0                 ; Parity error, set interrupt flag

+ 225 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target_Pico.pio.h

@@ -0,0 +1,225 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ----------- //
+// scsi_parity //
+// ----------- //
+
+#define scsi_parity_wrap_target 0
+#define scsi_parity_wrap 3
+
+static const uint16_t scsi_parity_program_instructions[] = {
+            //     .wrap_target
+    0x80a0, //  0: pull   block                      
+    0x4061, //  1: in     null, 1                    
+    0x40e8, //  2: in     osr, 8                     
+    0x4037, //  3: in     x, 23                      
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_parity_program = {
+    .instructions = scsi_parity_program_instructions,
+    .length = 4,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_parity_wrap_target, offset + scsi_parity_wrap);
+    return c;
+}
+#endif
+
+// ---------------------- //
+// scsi_accel_async_write //
+// ---------------------- //
+
+#define scsi_accel_async_write_wrap_target 0
+#define scsi_accel_async_write_wrap 4
+
+static const uint16_t scsi_accel_async_write_program_instructions[] = {
+            //     .wrap_target
+    0x90e0, //  0: pull   ifempty block   side 1     
+    0x7009, //  1: out    pins, 9         side 1     
+    0x7577, //  2: out    null, 23        side 1 [5] 
+    0x309a, //  3: wait   1 gpio, 26      side 1     
+    0x201a, //  4: wait   0 gpio, 26      side 0     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_write_program = {
+    .instructions = scsi_accel_async_write_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_write_wrap_target, offset + scsi_accel_async_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------- //
+// scsi_accel_read //
+// --------------- //
+
+#define scsi_accel_read_wrap_target 0
+#define scsi_accel_read_wrap 5
+
+static const uint16_t scsi_accel_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0x309a, //  1: wait   1 gpio, 26      side 1     
+    0x4061, //  2: in     null, 1         side 0     
+    0x201a, //  3: wait   0 gpio, 26      side 0     
+    0x5009, //  4: in     pins, 9         side 1     
+    0x5056, //  5: in     y, 22           side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_read_program = {
+    .instructions = scsi_accel_read_program_instructions,
+    .length = 6,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_read_wrap_target, offset + scsi_accel_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------- //
+// scsi_sync_write //
+// --------------- //
+
+#define scsi_sync_write_wrap_target 0
+#define scsi_sync_write_wrap 2
+
+static const uint16_t scsi_sync_write_program_instructions[] = {
+            //     .wrap_target
+    0x7009, //  0: out    pins, 9         side 1     
+    0x6077, //  1: out    null, 23        side 0     
+    0x5061, //  2: in     null, 1         side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_program = {
+    .instructions = scsi_sync_write_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_wrap_target, offset + scsi_sync_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------------- //
+// scsi_sync_write_pacer //
+// --------------------- //
+
+#define scsi_sync_write_pacer_wrap_target 0
+#define scsi_sync_write_pacer_wrap 2
+
+static const uint16_t scsi_sync_write_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x209a, //  0: wait   1 gpio, 26                 
+    0x201a, //  1: wait   0 gpio, 26                 
+    0x6061, //  2: out    null, 1                    
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_pacer_program = {
+    .instructions = scsi_sync_write_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_pacer_wrap_target, offset + scsi_sync_write_pacer_wrap);
+    return c;
+}
+#endif
+
+// -------------------- //
+// scsi_sync_read_pacer //
+// -------------------- //
+
+#define scsi_sync_read_pacer_wrap_target 0
+#define scsi_sync_read_pacer_wrap 2
+
+static const uint16_t scsi_sync_read_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x9020, //  0: push   block           side 1     
+    0x0040, //  1: jmp    x--, 0          side 0     
+    0x1002, //  2: jmp    2               side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_read_pacer_program = {
+    .instructions = scsi_sync_read_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_read_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_read_pacer_wrap_target, offset + scsi_sync_read_pacer_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ---------------- //
+// scsi_read_parity //
+// ---------------- //
+
+#define scsi_read_parity_wrap_target 0
+#define scsi_read_parity_wrap 4
+
+static const uint16_t scsi_read_parity_program_instructions[] = {
+            //     .wrap_target
+    0x60c8, //  0: out    isr, 8                     
+    0x8020, //  1: push   block                      
+    0x6038, //  2: out    x, 24                      
+    0x0040, //  3: jmp    x--, 0                     
+    0xc000, //  4: irq    nowait 0                   
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_read_parity_program = {
+    .instructions = scsi_read_parity_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_read_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_read_parity_wrap_target, offset + scsi_read_parity_wrap);
+    return c;
+}
+#endif
+

+ 124 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target_RP2040.pio

@@ -0,0 +1,124 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+; RP2040 PIO program for accelerating SCSI communication
+; Run "pioasm scsi_accel.pio scsi_accel.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is REQ pin
+
+.define REQ 9
+.define ACK 10
+
+; Delay from data setup to REQ assertion.
+; deskew delay + cable skew delay = 55 ns minimum
+; One clock cycle is 8 ns => delay 7 clocks
+.define REQ_DLY 7
+
+; Adds parity to data that is to be written to SCSI
+; This works by generating addresses for DMA to fetch data from.
+; Register X should be initialized to the base address of the lookup table.
+.program scsi_parity
+    pull block
+    in NULL, 1
+    in OSR, 8
+    in X, 23
+
+; Write to SCSI bus using asynchronous handshake.
+; Data is written as 32-bit words that contain the 8 data bits + 1 parity bit.
+; 23 bits in each word are discarded.
+; Number of bytes to send must be multiple of 2.
+.program scsi_accel_async_write
+    .side_set 1
+
+    pull ifempty block          side 1  ; Get data from TX FIFO
+    out pins, 9                 side 1  ; Write data and parity bit
+    out null, 23 [REQ_DLY-2]    side 1  ; Discard unused bits, wait for data preset time
+    wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+
+; Read from SCSI bus using sync or async handshake.
+; Data is returned as 32-bit words:
+; - bit  0: always zero
+; - bits 1-8: data byte
+; - bit  9: parity bit
+; - bits 10-31: lookup table address
+; Lookup table address should be loaded into register Y.
+; One dummy word should be written to TX fifo for every byte to receive.
+.program scsi_accel_read
+    .side_set 1
+
+    pull block                  side 1  ; Pull from TX fifo for counting bytes and pacing sync mode
+    wait 1 gpio ACK             side 1  ; Wait for ACK high
+    in null, 1                  side 0  ; Zero bit because lookup table entries are 16-bit
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+    in pins, 9                  side 1  ; Deassert REQ, read GPIO
+    in y, 22                    side 1  ; Copy parity lookup table address
+
+; Data state machine for synchronous writes.
+; Takes the lowest 9 bits of each 32 bit word and writes them to bus with REQ pulse.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+;
+; Shifts one bit to ISR per every byte transmitted. This is used to control the transfer
+; pace, the RX fifo acts as a counter to keep track of unacknowledged bytes. The C code
+; can set the syncOffset by changing autopush threshold, e.g. threshold 3 = 12 bytes offset.
+.program scsi_sync_write
+    .side_set 1
+
+    out pins, 9      [0]        side 1  ; Write data and parity bit, wait for deskew delay
+    out null, 23     [0]        side 0  ; Assert REQ, wait for assert time
+    in null, 1       [0]        side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
+
+; Data pacing state machine for synchronous writes.
+; Takes one bit from ISR on every falling edge of ACK.
+; The C code should set autopull threshold to match scsi_sync_write autopush threshold.
+; System DMA will then move words from scsi_sync_write RX fifo to scsi_sync_write_pacer TX fifo.
+.program scsi_sync_write_pacer
+    wait 1 gpio ACK
+    wait 0 gpio ACK   ; Wait for falling edge on ACK
+    out null, 1       ; Let scsi_sync_write send one more byte
+
+; Data pacing state machine for synchronous reads.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+; Number of bytes to receive minus one should be loaded into register X.
+; In synchronous mode this generates the REQ pulses and dummy words.
+; In asynchronous mode it just generates dummy words to feed to scsi_accel_read.
+.program scsi_sync_read_pacer
+    .side_set 1
+
+start:
+    push block      [0]      side 1  ; Send dummy word to scsi_accel_read, wait for transfer period
+    jmp x-- start   [0]      side 0  ; Assert REQ, wait for assert time
+
+finish:
+    jmp finish      [0]      side 1
+
+; Parity checker for reads from SCSI bus.
+; Receives 16-bit words from g_scsi_parity_check_lookup
+; Bottom 8 bits are the data byte, which is passed to output FIFO
+; The 9th bit is parity valid bit, which is 1 for valid and 0 for parity error.
+.program scsi_read_parity
+parity_valid:
+    out isr, 8                ; Take the 8 data bits for passing to RX fifo
+    push block                ; Push the data to RX fifo
+    out x, 24                 ; Take the parity valid bit, and the rest of 32-bit word
+    jmp x-- parity_valid      ; If parity valid bit is 1, repeat from start
+    irq set 0                 ; Parity error, set interrupt flag

+ 225 - 0
lib/ZuluSCSI_platform_RP2350/scsi_accel_target_RP2040.pio.h

@@ -0,0 +1,225 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ----------- //
+// scsi_parity //
+// ----------- //
+
+#define scsi_parity_wrap_target 0
+#define scsi_parity_wrap 3
+
+static const uint16_t scsi_parity_program_instructions[] = {
+            //     .wrap_target
+    0x80a0, //  0: pull   block                      
+    0x4061, //  1: in     null, 1                    
+    0x40e8, //  2: in     osr, 8                     
+    0x4037, //  3: in     x, 23                      
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_parity_program = {
+    .instructions = scsi_parity_program_instructions,
+    .length = 4,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_parity_wrap_target, offset + scsi_parity_wrap);
+    return c;
+}
+#endif
+
+// ---------------------- //
+// scsi_accel_async_write //
+// ---------------------- //
+
+#define scsi_accel_async_write_wrap_target 0
+#define scsi_accel_async_write_wrap 4
+
+static const uint16_t scsi_accel_async_write_program_instructions[] = {
+            //     .wrap_target
+    0x90e0, //  0: pull   ifempty block   side 1     
+    0x7009, //  1: out    pins, 9         side 1     
+    0x7577, //  2: out    null, 23        side 1 [5] 
+    0x308a, //  3: wait   1 gpio, 10      side 1     
+    0x200a, //  4: wait   0 gpio, 10      side 0     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_write_program = {
+    .instructions = scsi_accel_async_write_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_write_wrap_target, offset + scsi_accel_async_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------- //
+// scsi_accel_read //
+// --------------- //
+
+#define scsi_accel_read_wrap_target 0
+#define scsi_accel_read_wrap 5
+
+static const uint16_t scsi_accel_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0x308a, //  1: wait   1 gpio, 10      side 1     
+    0x4061, //  2: in     null, 1         side 0     
+    0x200a, //  3: wait   0 gpio, 10      side 0     
+    0x5009, //  4: in     pins, 9         side 1     
+    0x5056, //  5: in     y, 22           side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_read_program = {
+    .instructions = scsi_accel_read_program_instructions,
+    .length = 6,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_read_wrap_target, offset + scsi_accel_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------- //
+// scsi_sync_write //
+// --------------- //
+
+#define scsi_sync_write_wrap_target 0
+#define scsi_sync_write_wrap 2
+
+static const uint16_t scsi_sync_write_program_instructions[] = {
+            //     .wrap_target
+    0x7009, //  0: out    pins, 9         side 1     
+    0x6077, //  1: out    null, 23        side 0     
+    0x5061, //  2: in     null, 1         side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_program = {
+    .instructions = scsi_sync_write_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_wrap_target, offset + scsi_sync_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------------- //
+// scsi_sync_write_pacer //
+// --------------------- //
+
+#define scsi_sync_write_pacer_wrap_target 0
+#define scsi_sync_write_pacer_wrap 2
+
+static const uint16_t scsi_sync_write_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x208a, //  0: wait   1 gpio, 10                 
+    0x200a, //  1: wait   0 gpio, 10                 
+    0x6061, //  2: out    null, 1                    
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_pacer_program = {
+    .instructions = scsi_sync_write_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_pacer_wrap_target, offset + scsi_sync_write_pacer_wrap);
+    return c;
+}
+#endif
+
+// -------------------- //
+// scsi_sync_read_pacer //
+// -------------------- //
+
+#define scsi_sync_read_pacer_wrap_target 0
+#define scsi_sync_read_pacer_wrap 2
+
+static const uint16_t scsi_sync_read_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x9020, //  0: push   block           side 1     
+    0x0040, //  1: jmp    x--, 0          side 0     
+    0x1002, //  2: jmp    2               side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_read_pacer_program = {
+    .instructions = scsi_sync_read_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_read_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_read_pacer_wrap_target, offset + scsi_sync_read_pacer_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ---------------- //
+// scsi_read_parity //
+// ---------------- //
+
+#define scsi_read_parity_wrap_target 0
+#define scsi_read_parity_wrap 4
+
+static const uint16_t scsi_read_parity_program_instructions[] = {
+            //     .wrap_target
+    0x60c8, //  0: out    isr, 8                     
+    0x8020, //  1: push   block                      
+    0x6038, //  2: out    x, 24                      
+    0x0040, //  3: jmp    x--, 0                     
+    0xc000, //  4: irq    nowait 0                   
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_read_parity_program = {
+    .instructions = scsi_read_parity_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_read_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_read_parity_wrap_target, offset + scsi_read_parity_wrap);
+    return c;
+}
+#endif
+

+ 535 - 0
lib/ZuluSCSI_platform_RP2350/sd_card_sdio.cpp

@@ -0,0 +1,535 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Driver for accessing SD card in SDIO mode on RP2040.
+
+#include "ZuluSCSI_platform.h"
+
+#ifdef SD_USE_SDIO
+
+#include "ZuluSCSI_log.h"
+#include "sdio.h"
+#include <hardware/gpio.h>
+#include <SdFat.h>
+#include <SdCard/SdCardInfo.h>
+
+static uint32_t g_sdio_ocr; // Operating condition register from card
+static uint32_t g_sdio_rca; // Relative card address
+static cid_t g_sdio_cid;
+static csd_t g_sdio_csd;
+static int g_sdio_error_line;
+static sdio_status_t g_sdio_error;
+static uint32_t g_sdio_dma_buf[128];
+static uint32_t g_sdio_sector_count;
+
+#define checkReturnOk(call) ((g_sdio_error = (call)) == SDIO_OK ? true : logSDError(__LINE__))
+static bool logSDError(int line)
+{
+    g_sdio_error_line = line;
+    logmsg("SDIO SD card error on line ", line, ", error code ", (int)g_sdio_error);
+    return false;
+}
+
+// Callback used by SCSI code for simultaneous processing
+static sd_callback_t m_stream_callback;
+static const uint8_t *m_stream_buffer;
+static uint32_t m_stream_count;
+static uint32_t m_stream_count_start;
+
+void platform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+    m_stream_callback = func;
+    m_stream_buffer = buffer;
+    m_stream_count = 0;
+    m_stream_count_start = 0;
+}
+
+static sd_callback_t get_stream_callback(const uint8_t *buf, uint32_t count, const char *accesstype, uint32_t sector)
+{
+    m_stream_count_start = m_stream_count;
+
+    if (m_stream_callback)
+    {
+        if (buf == m_stream_buffer + m_stream_count)
+        {
+            m_stream_count += count;
+            return m_stream_callback;
+        }
+        else
+        {
+            dbgmsg("SD card ", accesstype, "(", (int)sector,
+                  ") slow transfer, buffer", (uint32_t)buf, " vs. ", (uint32_t)(m_stream_buffer + m_stream_count));
+            return NULL;
+        }
+    }
+    
+    return NULL;
+}
+
+bool SdioCard::begin(SdioConfig sdioConfig)
+{
+    uint32_t reply;
+    sdio_status_t status;
+    
+    // Initialize at 1 MHz clock speed
+    rp2040_sdio_init(25);
+
+    // Establish initial connection with the card
+    for (int retries = 0; retries < 5; retries++)
+    {
+        // After a hard fault crash, delayMicroseconds hangs
+        // using busy_wait_us_32 instead
+        // delayMicroseconds(1000);
+        busy_wait_us_32(1000);
+        reply = 0;
+        rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
+        status = rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
+
+        if (status == SDIO_OK && reply == 0x1AA)
+        {
+            break;
+        }
+    }
+
+    if (reply != 0x1AA || status != SDIO_OK)
+    {
+        // dbgmsg("SDIO not responding to CMD8 SEND_IF_COND, status ", (int)status, " reply ", reply);
+        return false;
+    }
+
+    // Send ACMD41 to begin card initialization and wait for it to complete
+    uint32_t start = millis();
+    do {
+        if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, 0, &reply)) || // APP_CMD
+            !checkReturnOk(rp2040_sdio_command_R3(ACMD41, 0xD0040000, &g_sdio_ocr))) // 3.0V voltage
+            // !checkReturnOk(rp2040_sdio_command_R1(ACMD41, 0xC0100000, &g_sdio_ocr)))
+        {
+            return false;
+        }
+
+        if ((uint32_t)(millis() - start) > 1000)
+        {
+            logmsg("SDIO card initialization timeout");
+            return false;
+        }
+    } while (!(g_sdio_ocr & (1 << 31)));
+
+    // Get CID
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD2, 0, (uint8_t*)&g_sdio_cid)))
+    {
+        dbgmsg("SDIO failed to read CID");
+        return false;
+    }
+
+    // Get relative card address
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD3, 0, &g_sdio_rca)))
+    {
+        dbgmsg("SDIO failed to get RCA");
+        return false;
+    }
+
+    // Get CSD
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD9, g_sdio_rca, (uint8_t*)&g_sdio_csd)))
+    {
+        dbgmsg("SDIO failed to read CSD");
+        return false;
+    }
+
+    g_sdio_sector_count = sectorCount();
+
+    // Select card
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD7, g_sdio_rca, &reply)))
+    {
+        dbgmsg("SDIO failed to select card");
+        return false;
+    }
+
+    // Set 4-bit bus mode
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) ||
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD6, 2, &reply)))
+    {
+        dbgmsg("SDIO failed to set bus width");
+        return false;
+    }
+
+    // Increase to 25 MHz clock rate
+    rp2040_sdio_init(1);
+
+    return true;
+}
+
+uint8_t SdioCard::errorCode() const
+{
+    return g_sdio_error;
+}
+
+uint32_t SdioCard::errorData() const
+{
+    return 0;
+}
+
+uint32_t SdioCard::errorLine() const
+{
+    return g_sdio_error_line;
+}
+
+bool SdioCard::isBusy() 
+{
+    return (sio_hw->gpio_in & (1 << SDIO_D0)) == 0;
+}
+
+uint32_t SdioCard::kHzSdClk()
+{
+    return 0;
+}
+
+bool SdioCard::readCID(cid_t* cid)
+{
+    *cid = g_sdio_cid;
+    return true;
+}
+
+bool SdioCard::readCSD(csd_t* csd)
+{
+    *csd = g_sdio_csd;
+    return true;
+}
+
+bool SdioCard::readOCR(uint32_t* ocr)
+{
+    // SDIO mode does not have CMD58, but main program uses this to
+    // poll for card presence. Return status register instead.
+    return checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, ocr));
+}
+
+bool SdioCard::readData(uint8_t* dst)
+{
+    logmsg("SdioCard::readData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStart(uint32_t sector)
+{
+    logmsg("SdioCard::readStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStop()
+{
+    logmsg("SdioCard::readStop() called but not implemented!");
+    return false;
+}
+
+uint32_t SdioCard::sectorCount()
+{
+    return g_sdio_csd.capacity();
+}
+
+uint32_t SdioCard::status()
+{
+    uint32_t reply;
+    if (checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, &reply)))
+        return reply;
+    else
+        return 0;
+}
+
+bool SdioCard::stopTransmission(bool blocking)
+{
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)))
+    {
+        return false;
+    }
+
+    if (!blocking)
+    {
+        return true;
+    }
+    else
+    {
+        uint32_t start = millis();
+        while ((uint32_t)(millis() - start) < 5000 && isBusy())
+        {
+            if (m_stream_callback)
+            {
+                m_stream_callback(m_stream_count);
+            }
+        }
+        if (isBusy())
+        {
+            logmsg("SdioCard::stopTransmission() timeout");
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+}
+
+bool SdioCard::syncDevice()
+{
+    return true;
+}
+
+uint8_t SdioCard::type() const
+{
+    if (g_sdio_ocr & (1 << 30))
+        return SD_CARD_TYPE_SDHC;
+    else
+        return SD_CARD_TYPE_SD2;
+}
+
+bool SdioCard::writeData(const uint8_t* src)
+{
+    logmsg("SdioCard::writeData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStart(uint32_t sector)
+{
+    logmsg("SdioCard::writeStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStop()
+{
+    logmsg("SdioCard::writeStop() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::erase(uint32_t firstSector, uint32_t lastSector)
+{
+    logmsg("SdioCard::erase() not implemented");
+    return false;
+}
+
+bool SdioCard::cardCMD6(uint32_t arg, uint8_t* status) {
+    logmsg("SdioCard::cardCMD6() not implemented");
+    return false;
+}
+
+bool SdioCard::readSCR(scr_t* scr) {
+    logmsg("SdioCard::readSCR() not implemented");
+    return false;
+}
+
+/* Writing and reading, with progress callback */
+
+bool SdioCard::writeSector(uint32_t sector, const uint8_t* src)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data to a temporary buffer.
+        memcpy(g_sdio_dma_buf, src, sizeof(g_sdio_dma_buf));
+        src = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    // If possible, report transfer status to application through callback.
+    sd_callback_t callback = get_stream_callback(src, 512, "writeSector", sector);
+
+    // Cards up to 2GB use byte addressing, SDHC cards use sector addressing
+    uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD24, address, &reply)) || // WRITE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, 1))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        logmsg("SdioCard::writeSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::writeSectors(uint32_t sector, const uint8_t* src, size_t n)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Unaligned write, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!writeSector(sector + i, src + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(src, n * 512, "writeSectors", sector);
+
+    // Cards up to 2GB use byte addressing, SDHC cards use sector addressing
+    uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) || // APP_CMD
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD23, n, &reply)) || // SET_WR_CLK_ERASE_COUNT
+        !checkReturnOk(rp2040_sdio_command_R1(CMD25, address, &reply)) || // WRITE_MULTIPLE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, n))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        logmsg("SdioCard::writeSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        // TODO: Instead of CMD12 stopTransmission command, according to SD spec we should send stopTran token.
+        // stopTransmission seems to work in practice.
+        return stopTransmission(true);
+    }
+}
+
+bool SdioCard::readSector(uint32_t sector, uint8_t* dst)
+{
+    uint8_t *real_dst = dst;
+    if (((uint32_t)dst & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data from a temporary buffer.
+        dst = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, 512, "readSector", sector);
+
+    // Cards up to 2GB use byte addressing, SDHC cards use sector addressing
+    uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD17, address, &reply))) // READ_SINGLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        logmsg("SdioCard::readSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    if (dst != real_dst)
+    {
+        memcpy(real_dst, g_sdio_dma_buf, sizeof(g_sdio_dma_buf));
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::readSectors(uint32_t sector, uint8_t* dst, size_t n)
+{
+    if (((uint32_t)dst & 3) != 0 || sector + n >= g_sdio_sector_count)
+    {
+        // Unaligned read or end-of-drive read, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!readSector(sector + i, dst + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, n * 512, "readSectors", sector);
+
+    // Cards up to 2GB use byte addressing, SDHC cards use sector addressing
+    uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD18, address, &reply))) // READ_MULTIPLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        logmsg("SdioCard::readSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        return stopTransmission(true);
+    }
+}
+
+// These functions are not used for SDIO mode but are needed to avoid build error.
+void sdCsInit(SdCsPin_t pin) {}
+void sdCsWrite(SdCsPin_t pin, bool level) {}
+
+// SDIO configuration for main program
+SdioConfig g_sd_sdio_config(DMA_SDIO);
+
+#endif

+ 103 - 0
lib/ZuluSCSI_platform_RP2350/sd_card_spi.cpp

@@ -0,0 +1,103 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Driver and interface for accessing SD card in SPI mode
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include <hardware/spi.h>
+#include <SdFat.h>
+
+#ifndef SD_USE_SDIO
+
+class RP2040SPIDriver : public SdSpiBaseClass
+{
+public:
+    void begin(SdSpiConfig config) {
+    }
+
+    void activate() {
+        _spi_init(SD_SPI, m_sckfreq);
+        spi_set_format(SD_SPI, 8, SPI_CPOL_0, SPI_CPHA_0, SPI_MSB_FIRST);
+    }
+
+    void deactivate() {
+    }
+
+    void wait_idle() {
+        while (!(spi_get_hw(SD_SPI)->sr & SPI_SSPSR_TFE_BITS));
+        while (spi_get_hw(SD_SPI)->sr & SPI_SSPSR_BSY_BITS);
+    }
+
+    // Single byte receive
+    uint8_t receive() {
+        uint8_t tx = 0xFF;
+        uint8_t rx;
+        spi_write_read_blocking(SD_SPI, &tx, &rx, 1);
+        return rx;
+    }
+
+    // Single byte send
+    void send(uint8_t data) {
+        spi_write_blocking(SD_SPI, &data, 1);
+        wait_idle();
+    }
+
+    // Multiple byte receive
+    uint8_t receive(uint8_t* buf, size_t count)
+    {
+        spi_read_blocking(SD_SPI, 0xFF, buf, count);
+        return 0;
+    }
+
+    // Multiple byte send
+    void send(const uint8_t* buf, size_t count) {
+        spi_write_blocking(SD_SPI, buf, count);
+    }
+
+    void setSckSpeed(uint32_t maxSck) {
+        m_sckfreq = maxSck;
+    }
+
+private:
+    uint32_t m_sckfreq;
+};
+
+void sdCsInit(SdCsPin_t pin)
+{
+}
+
+void sdCsWrite(SdCsPin_t pin, bool level)
+{
+    if (level)
+        sio_hw->gpio_set = (1 << SD_SPI_CS);
+    else
+        sio_hw->gpio_clr = (1 << SD_SPI_CS);
+}
+
+RP2040SPIDriver g_sd_spi_port;
+SdSpiConfig g_sd_spi_config(0, DEDICATED_SPI, SD_SCK_MHZ(25), &g_sd_spi_port);
+
+void platform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+}
+
+#endif

+ 848 - 0
lib/ZuluSCSI_platform_RP2350/sdio.cpp

@@ -0,0 +1,848 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// Implementation of SDIO communication for RP2040
+//
+// The RP2040 official work-in-progress code at
+// https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+// may be useful reference, but this is independent implementation.
+//
+// For official SDIO specifications, refer to:
+// https://www.sdcard.org/downloads/pls/
+// "SDIO Physical Layer Simplified Specification Version 8.00"
+
+#include "sdio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/gpio.h>
+#include <ZuluSCSI_platform.h>
+#include <ZuluSCSI_log.h>
+
+// \todo find a better way
+#include <hardware/structs/scb.h>
+
+#if defined(ZULUSCSI_PICO) || defined(ZULUSCSI_BS2)
+#include "sdio_Pico.pio.h"
+#else
+#include "sdio_RP2040.pio.h"
+#endif
+
+#define SDIO_PIO pio1
+#define SDIO_CMD_SM 0
+#define SDIO_DATA_SM 1
+#define SDIO_DMA_CH 4
+#define SDIO_DMA_CHB 5
+
+// Maximum number of 512 byte blocks to transfer in one request
+#define SDIO_MAX_BLOCKS 256
+
+enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
+
+static struct {
+    uint32_t pio_cmd_clk_offset;
+    uint32_t pio_data_rx_offset;
+    pio_sm_config pio_cfg_data_rx;
+    uint32_t pio_data_tx_offset;
+    pio_sm_config pio_cfg_data_tx;
+
+    sdio_transfer_state_t transfer_state;
+    uint32_t transfer_start_time;
+    uint32_t *data_buf;
+    uint32_t blocks_done; // Number of blocks transferred so far
+    uint32_t total_blocks; // Total number of blocks to transfer
+    uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
+    uint32_t checksum_errors; // Number of checksum errors detected
+
+    // Variables for block writes
+    uint64_t next_wr_block_checksum;
+    uint32_t end_token_buf[3]; // CRC and end token for write block
+    sdio_status_t wr_status;
+    uint32_t card_response;
+    
+    // Variables for block reads
+    // This is used to perform DMA into data buffers and checksum buffers separately.
+    struct {
+        void * write_addr;
+        uint32_t transfer_count;
+    } dma_blocks[SDIO_MAX_BLOCKS * 2];
+    struct {
+        uint32_t top;
+        uint32_t bottom;
+    } received_checksums[SDIO_MAX_BLOCKS];
+} g_sdio;
+
+void rp2040_sdio_dma_irq();
+
+/*******************************************************
+ * Checksum algorithms
+ *******************************************************/
+
+// Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
+// Usage:
+//    uint8_t crc = 0;
+//    crc = crc7_table[crc ^ byte];
+//    .. repeat for every byte ..
+static const uint8_t crc7_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
+};
+
+// Calculate the CRC16 checksum for parallel 4 bit lines separately.
+// When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
+// is applied to each line separately and generates total of
+// 4 x 16 = 64 bits of checksum.
+__attribute__((optimize("O3")))
+uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
+{
+    uint64_t crc = 0;
+    uint32_t *end = data + num_words;
+    while (data < end)
+    {
+        for (int unroll = 0; unroll < 4; unroll++)
+        {
+            // Each 32-bit word contains 8 bits per line.
+            // Reverse the bytes because SDIO protocol is big-endian.
+            uint32_t data_in = __builtin_bswap32(*data++);
+
+            // Shift out 8 bits for each line
+            uint32_t data_out = crc >> 32;
+            crc <<= 32;
+
+            // XOR outgoing data to itself with 4 bit delay
+            data_out ^= (data_out >> 16);
+
+            // XOR incoming data to outgoing data with 4 bit delay
+            data_out ^= (data_in >> 16);
+
+            // XOR outgoing and incoming data to accumulator at each tap
+            uint64_t xorred = data_out ^ data_in;
+            crc ^= xorred;
+            crc ^= xorred << (5 * 4);
+            crc ^= xorred << (12 * 4);
+        }
+    }
+
+    return crc;
+}
+
+/*******************************************************
+ * Basic SDIO command execution
+ *******************************************************/
+
+static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
+{
+    // dbgmsg("SDIO Command: ", (int)command, " arg ", arg);
+
+    // Format the arguments in the way expected by the PIO code.
+    uint32_t word0 =
+        (47 << 24) | // Number of bits in command minus one
+        ( 1 << 22) | // Transfer direction from host to card
+        (command << 16) | // Command byte
+        (((arg >> 24) & 0xFF) << 8) | // MSB byte of argument
+        (((arg >> 16) & 0xFF) << 0);
+    
+    uint32_t word1 =
+        (((arg >> 8) & 0xFF) << 24) |
+        (((arg >> 0) & 0xFF) << 16) | // LSB byte of argument
+        ( 1 << 8); // End bit
+
+    // Set number of bits in response minus one, or leave at 0 if no response expected
+    if (response_bits)
+    {
+        word1 |= ((response_bits - 1) << 0);
+    }
+
+    // Calculate checksum in the order that the bytes will be transmitted (big-endian)
+    uint8_t crc = 0;
+    crc = crc7_table[crc ^ ((word0 >> 16) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  8) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  0) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 24) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 16) & 0xFF)];
+    word1 |= crc << 8;
+    
+    // Transmit command
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
+}
+
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, response ? 48 : 0);
+
+    // Wait for response
+    uint32_t start = millis();
+    uint32_t wait_words = response ? 2 : 1;
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < wait_words)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            if (command != 8) // Don't log for missing SD card
+            {
+                dbgmsg("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
+                    "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                    " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                    " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+            }
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    if (response)
+    {
+        // Read out response packet
+        uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        // dbgmsg("SDIO R1 response: ", resp0, " ", resp1);
+
+        // Calculate response checksum
+        uint8_t crc = 0;
+        crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  8) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  0) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp1 >>  8) & 0xFF)];
+
+        uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
+        if (crc != actual_crc)
+        {
+            dbgmsg("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+            return SDIO_ERR_RESPONSE_CRC;
+        }
+
+        uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
+        if (response_cmd != command && command != 41)
+        {
+            dbgmsg("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
+            return SDIO_ERR_RESPONSE_CODE;
+        }
+
+        *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    }
+    else
+    {
+        // Read out dummy marker
+        pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
+{
+    // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t response_buf[5];
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
+
+    sdio_send_command(command, arg, 136);
+
+    uint32_t start = millis();
+    while (dma_channel_is_busy(SDIO_DMA_CH))
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            dbgmsg("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            dma_channel_abort(SDIO_DMA_CH);
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    dma_channel_abort(SDIO_DMA_CH);
+
+    // Copy the response payload to output buffer
+    response[0]  = ((response_buf[0] >> 16) & 0xFF);
+    response[1]  = ((response_buf[0] >>  8) & 0xFF);
+    response[2]  = ((response_buf[0] >>  0) & 0xFF);
+    response[3]  = ((response_buf[1] >> 24) & 0xFF);
+    response[4]  = ((response_buf[1] >> 16) & 0xFF);
+    response[5]  = ((response_buf[1] >>  8) & 0xFF);
+    response[6]  = ((response_buf[1] >>  0) & 0xFF);
+    response[7]  = ((response_buf[2] >> 24) & 0xFF);
+    response[8]  = ((response_buf[2] >> 16) & 0xFF);
+    response[9]  = ((response_buf[2] >>  8) & 0xFF);
+    response[10] = ((response_buf[2] >>  0) & 0xFF);
+    response[11] = ((response_buf[3] >> 24) & 0xFF);
+    response[12] = ((response_buf[3] >> 16) & 0xFF);
+    response[13] = ((response_buf[3] >>  8) & 0xFF);
+    response[14] = ((response_buf[3] >>  0) & 0xFF);
+    response[15] = ((response_buf[4] >>  0) & 0xFF);
+
+    // Calculate checksum of the payload
+    uint8_t crc = 0;
+    for (int i = 0; i < 15; i++)
+    {
+        crc = crc7_table[crc ^ response[i]];
+    }
+
+    uint8_t actual_crc = response[15] & 0xFE;
+    if (crc != actual_crc)
+    {
+        dbgmsg("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+        return SDIO_ERR_RESPONSE_CRC;
+    }
+
+    uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
+    if (response_cmd != 0x3F)
+    {
+        dbgmsg("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
+        return SDIO_ERR_RESPONSE_CODE;
+    }
+
+    return SDIO_OK;
+}
+
+
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, 48);
+
+    // Wait for response
+    uint32_t start = millis();
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            dbgmsg("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    // Read out response packet
+    uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    // dbgmsg("SDIO R3 response: ", resp0, " ", resp1);
+
+    return SDIO_OK;
+}
+
+/*******************************************************
+ * Data reception from SD card
+ *******************************************************/
+
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_RX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Create DMA block descriptors to store each block of 512 bytes of data to buffer
+    // and then 8 bytes to g_sdio.received_checksums.
+    for (int i = 0; i < num_blocks; i++)
+    {
+        g_sdio.dma_blocks[i * 2].write_addr = buffer + i * SDIO_BLOCK_SIZE;
+        g_sdio.dma_blocks[i * 2].transfer_count = SDIO_BLOCK_SIZE / sizeof(uint32_t);
+
+        g_sdio.dma_blocks[i * 2 + 1].write_addr = &g_sdio.received_checksums[i];
+        g_sdio.dma_blocks[i * 2 + 1].transfer_count = 2;
+    }
+    g_sdio.dma_blocks[num_blocks * 2].write_addr = 0;
+    g_sdio.dma_blocks[num_blocks * 2].transfer_count = 0;
+
+    // Configure first DMA channel for reading from the PIO RX fifo
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
+
+    // Configure second DMA channel for reconfiguring the first one
+    dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_ring(&dmacfg, true, 3);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &dma_hw->ch[SDIO_DMA_CH].al1_write_addr,
+        g_sdio.dma_blocks, 2, false);
+
+    // Initialize PIO state machine
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+
+    // Write number of nibbles to receive to Y register
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, SDIO_BLOCK_SIZE * 2 + 16 - 1);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+
+    // Enable RX FIFO join because we don't need the TX FIFO during transfer.
+    // This gives more leeway for the DMA block switching
+    SDIO_PIO->sm[SDIO_DATA_SM].shiftctrl |= PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS;
+
+    // Start PIO and DMA
+    dma_channel_start(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+
+    return SDIO_OK;
+}
+
+// Check checksums for received blocks
+static void sdio_verify_rx_checksums(uint32_t maxcount)
+{
+    while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
+    {
+        // Calculate checksum from received data
+        int blockidx = g_sdio.blocks_checksumed++;
+        uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                     SDIO_WORDS_PER_BLOCK);
+
+        // Convert received checksum to little-endian format
+        uint32_t top = __builtin_bswap32(g_sdio.received_checksums[blockidx].top);
+        uint32_t bottom = __builtin_bswap32(g_sdio.received_checksums[blockidx].bottom);
+        uint64_t expected = ((uint64_t)top << 32) | bottom;
+
+        if (checksum != expected)
+        {
+            g_sdio.checksum_errors++;
+            if (g_sdio.checksum_errors == 1)
+            {
+                logmsg("SDIO checksum error in reception: block ", blockidx,
+                      " calculated ", checksum, " expected ", expected);
+            }
+        }
+    }
+}
+
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
+{
+    // Was everything done when the previous rx_poll() finished?
+    if (g_sdio.blocks_done >= g_sdio.total_blocks)
+    {
+        g_sdio.transfer_state = SDIO_IDLE;
+    }
+    else
+    {
+        // Use the idle time to calculate checksums
+        sdio_verify_rx_checksums(4);
+
+        // Check how many DMA control blocks have been consumed
+        uint32_t dma_ctrl_block_count = (dma_hw->ch[SDIO_DMA_CHB].read_addr - (uint32_t)&g_sdio.dma_blocks);
+        dma_ctrl_block_count /= sizeof(g_sdio.dma_blocks[0]);
+
+        // Compute how many complete 512 byte SDIO blocks have been transferred
+        // When transfer ends, dma_ctrl_block_count == g_sdio.total_blocks * 2 + 1
+        g_sdio.blocks_done = (dma_ctrl_block_count - 1) / 2;
+
+        // NOTE: When all blocks are done, rx_poll() still returns SDIO_BUSY once.
+        // This provides a chance to start the SCSI transfer before the last checksums
+        // are computed. Any checksum failures can be indicated in SCSI status after
+        // the data transfer has finished.
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        // Verify all remaining checksums.
+        sdio_verify_rx_checksums(g_sdio.total_blocks);
+
+        if (g_sdio.checksum_errors == 0)
+            return SDIO_OK;
+        else
+            return SDIO_ERR_DATA_CRC;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        dbgmsg("rp2040_sdio_rx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+
+/*******************************************************
+ * Data transmission to SD card
+ *******************************************************/
+
+static void sdio_start_next_block_tx()
+{
+    // Initialize PIO
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+    
+    // Configure DMA to send the data block payload (512 bytes)
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, false);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, true));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
+        SDIO_WORDS_PER_BLOCK, false);
+
+    // Prepare second DMA channel to send the CRC and block end marker
+    uint64_t crc = g_sdio.next_wr_block_checksum;
+    g_sdio.end_token_buf[0] = (uint32_t)(crc >> 32);
+    g_sdio.end_token_buf[1] = (uint32_t)(crc >>  0);
+    g_sdio.end_token_buf[2] = 0xFFFFFFFF;
+    channel_config_set_bswap(&dmacfg, false);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.end_token_buf, 3, false);
+    
+    // Enable IRQ to trigger when block is done
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
+
+    // Initialize register X with nibble count and register Y with response bit count
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 1048);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_x, 32));
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 31);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+    
+    // Initialize pins to output and high
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pins, 15));
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pindirs, 15));
+
+    // Write start token and start the DMA transfer.
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFF0);
+    dma_channel_start(SDIO_DMA_CH);
+    
+    // Start state machine
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+}
+
+static void sdio_compute_next_tx_checksum()
+{
+    assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed < g_sdio.total_blocks);
+    int blockidx = g_sdio.blocks_checksumed++;
+    g_sdio.next_wr_block_checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                             SDIO_WORDS_PER_BLOCK);
+}
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_TX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Compute first block checksum
+    sdio_compute_next_tx_checksum();
+
+    // Start first DMA transfer and PIO
+    sdio_start_next_block_tx();
+
+    if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+    {
+        // Precompute second block checksum
+        sdio_compute_next_tx_checksum();
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t check_sdio_write_response(uint32_t card_response)
+{
+    // Shift card response until top bit is 0 (the start bit)
+    // The format of response is poorly documented in SDIO spec but refer to e.g.
+    // http://my-cool-projects.blogspot.com/2013/02/the-mysterious-sd-card-crc-status.html
+    uint32_t resp = card_response;
+    if (!(~resp & 0xFFFF0000)) resp <<= 16;
+    if (!(~resp & 0xFF000000)) resp <<= 8;
+    if (!(~resp & 0xF0000000)) resp <<= 4;
+    if (!(~resp & 0xC0000000)) resp <<= 2;
+    if (!(~resp & 0x80000000)) resp <<= 1;
+
+    uint32_t wr_status = (resp >> 28) & 7;
+
+    if (wr_status == 2)
+    {
+        return SDIO_OK;
+    }
+    else if (wr_status == 5)
+    {
+        logmsg("SDIO card reports write CRC error, status ", card_response);
+        return SDIO_ERR_WRITE_CRC;    
+    }
+    else if (wr_status == 6)
+    {
+        logmsg("SDIO card reports write failure, status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;    
+    }
+    else
+    {
+        logmsg("SDIO card reports unknown write status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;    
+    }
+}
+
+// When a block finishes, this IRQ handler starts the next one
+static void rp2040_sdio_tx_irq()
+{
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+
+    if (g_sdio.transfer_state == SDIO_TX)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CH) && !dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            // Main data transfer is finished now.
+            // When card is ready, PIO will put card response on RX fifo
+            g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
+            if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_DATA_SM))
+            {
+                // Card is already idle
+                g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+            }
+            else
+            {
+                // Use DMA to wait for the response
+                dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+                channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+                channel_config_set_read_increment(&dmacfg, false);
+                channel_config_set_write_increment(&dmacfg, false);
+                channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+                dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+                    &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_DATA_SM], 1, true);
+            }
+        }
+    }
+    
+    if (g_sdio.transfer_state == SDIO_TX_WAIT_IDLE)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            g_sdio.wr_status = check_sdio_write_response(g_sdio.card_response);
+
+            if (g_sdio.wr_status != SDIO_OK)
+            {
+                rp2040_sdio_stop();
+                return;
+            }
+
+            g_sdio.blocks_done++;
+            if (g_sdio.blocks_done < g_sdio.total_blocks)
+            {
+                sdio_start_next_block_tx();
+                g_sdio.transfer_state = SDIO_TX;
+
+                if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+                {
+                    // Precompute the CRC for next block so that it is ready when
+                    // we want to send it.
+                    sdio_compute_next_tx_checksum();
+                }
+            }
+            else
+            {
+                rp2040_sdio_stop();
+            }
+        }    
+    }
+}
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
+{
+    // if (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk)
+    // #define SCB_ICSR_VECTACTIVE_Msk            (0x1FFUL /*<< SCB_ICSR_VECTACTIVE_Pos*/)       /*!< SCB ICSR: VECTACTIVE Mask */
+    if (scb_hw->icsr & (0x1FFUL))
+    {
+        // Verify that IRQ handler gets called even if we are in hardfault handler
+        rp2040_sdio_tx_irq();
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        rp2040_sdio_stop();
+        return g_sdio.wr_status;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        dbgmsg("rp2040_sdio_tx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_tx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop()
+{
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+    g_sdio.transfer_state = SDIO_IDLE;
+    return SDIO_OK;
+}
+
+void rp2040_sdio_init(int clock_divider)
+{
+    // Mark resources as being in use, unless it has been done already.
+    static bool resources_claimed = false;
+    if (!resources_claimed)
+    {
+        pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
+        pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
+        dma_channel_claim(SDIO_DMA_CH);
+        dma_channel_claim(SDIO_DMA_CHB);
+        resources_claimed = true;
+    }
+
+    memset(&g_sdio, 0, sizeof(g_sdio));
+
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SDIO_PIO);
+
+    // Command & clock state machine
+    g_sdio.pio_cmd_clk_offset = pio_add_program(SDIO_PIO, &sdio_cmd_clk_program);
+    pio_sm_config cfg = sdio_cmd_clk_program_get_default_config(g_sdio.pio_cmd_clk_offset);
+    sm_config_set_out_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_in_pins(&cfg, SDIO_CMD);
+    sm_config_set_set_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_jmp_pin(&cfg, SDIO_CMD);
+    sm_config_set_sideset_pins(&cfg, SDIO_CLK);
+    sm_config_set_out_shift(&cfg, false, true, 32);
+    sm_config_set_in_shift(&cfg, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&cfg, clock_divider, 0);
+    sm_config_set_mov_status(&cfg, STATUS_TX_LESSTHAN, 2);
+
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_clk_offset, &cfg);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
+
+    // Data reception program
+    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
+    g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_rx, clock_divider, 0);
+
+    // Data transmission program
+    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
+    g_sdio.pio_cfg_data_tx = sdio_data_tx_program_get_default_config(g_sdio.pio_data_tx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0);
+    sm_config_set_set_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_tx, false, false, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_tx, clock_divider, 0);
+
+    // Disable SDIO pins input synchronizer.
+    // This reduces input delay.
+    // Because the CLK is driven synchronously to CPU clock,
+    // there should be no metastability problems.
+    SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK) | (1 << SDIO_CMD)
+                                 | (1 << SDIO_D0) | (1 << SDIO_D1) | (1 << SDIO_D2) | (1 << SDIO_D3);
+
+    // Redirect GPIOs to PIO
+    gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
+
+    // Set up IRQ handler when DMA completes.
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
+    irq_set_enabled(DMA_IRQ_1, true);
+#if 0
+#ifndef ENABLE_AUDIO_OUTPUT
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
+#else
+    // seem to hit assertion in _exclusive_handler call due to DMA_IRQ_0 being shared?
+    // slightly less efficient to do it this way, so investigate further at some point
+    irq_add_shared_handler(DMA_IRQ_1, rp2040_sdio_tx_irq, 0xFF);
+#endif
+    irq_set_enabled(DMA_IRQ_1, true);
+#endif
+}

+ 73 - 0
lib/ZuluSCSI_platform_RP2350/sdio.h

@@ -0,0 +1,73 @@
+/** 
+ * ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+ * 
+ * ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+ * 
+ * https://www.gnu.org/licenses/gpl-3.0.html
+ * ----
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version. 
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+**/
+
+// SD card access using SDIO for RP2040 platform.
+// This module contains the low-level SDIO bus implementation using
+// the PIO peripheral. The high-level commands are in sd_card_sdio.cpp.
+
+#pragma once
+#include <stdint.h>
+
+enum sdio_status_t {
+    SDIO_OK = 0,
+    SDIO_BUSY = 1,
+    SDIO_ERR_RESPONSE_TIMEOUT = 2, // Timed out waiting for response from card
+    SDIO_ERR_RESPONSE_CRC = 3,     // Response CRC is wrong
+    SDIO_ERR_RESPONSE_CODE = 4,    // Response command code does not match what was sent
+    SDIO_ERR_DATA_TIMEOUT = 5,     // Timed out waiting for data block
+    SDIO_ERR_DATA_CRC = 6,         // CRC for data packet is wrong
+    SDIO_ERR_WRITE_CRC = 7,        // Card reports bad CRC for write
+    SDIO_ERR_WRITE_FAIL = 8,       // Card reports write failure
+};
+
+#define SDIO_BLOCK_SIZE 512
+#define SDIO_WORDS_PER_BLOCK 128
+
+// Execute a command that has 48-bit reply (response types R1, R6, R7)
+// If response is NULL, does not wait for reply.
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Execute a command that has 136-bit reply (response type R2)
+// Response buffer should have space for 16 bytes (the 128 bit payload)
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t *response);
+
+// Execute a command that has 48-bit reply but without CRC (response R3)
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Start transferring data from SD card to memory buffer
+// Transfer block size is always 512 bytes.
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks);
+
+// Check if reception is complete
+// Returns SDIO_BUSY while transferring, SDIO_OK when done and error on failure.
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete = nullptr);
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks);
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete = nullptr);
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop();
+
+// (Re)initialize the SDIO interface
+void rp2040_sdio_init(int clock_divider = 1);

+ 164 - 0
lib/ZuluSCSI_platform_RP2350/sdio_Pico.pio

@@ -0,0 +1,164 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+; RP2040 PIO program for implementing SD card access in SDIO mode
+; Run "pioasm rp2040_sdio.pio rp2040_sdio.pio.h" to regenerate the C header from this.
+
+; The RP2040 official work-in-progress code at
+; https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+; may be useful reference, but this is independent implementation.
+;
+; For official SDIO specifications, refer to:
+; https://www.sdcard.org/downloads/pls/
+; "SDIO Physical Layer Simplified Specification Version 8.00"
+
+; Clock settings
+; For 3.3V communication the available speeds are:
+; - Default speed: max. 25 MHz clock
+; - High speed:    max. 50 MHz clock
+;
+; From the default RP2040 clock speed of 125 MHz, the closest dividers
+; are 3 for 41.7 MHz and 5 for 25 MHz. The CPU can apply further divider
+; through state machine registers for the initial handshake.
+;
+; Because data is written on the falling edge and read on the rising
+; edge, it is preferrable to have a long 0 state and short 1 state.
+;.define CLKDIV 3
+.define CLKDIV 5
+.define D0 ((CLKDIV + 1) / 2 - 1)
+.define D1 (CLKDIV/2 - 1)
+.define SDIO_CLK_GPIO 10
+
+; State machine 0 is used to:
+; - generate continuous clock on SDIO_CLK
+; - send CMD packets
+; - receive response packets
+;
+; Pin mapping for this state machine:
+; - Sideset    : CLK
+; - IN/OUT/SET : CMD
+; - JMP_PIN    : CMD
+;
+; The commands to send are put on TX fifo and must have two words:
+; Word 0 bits 31-24: Number of bits in command minus one (usually 47)
+; Word 0 bits 23-00: First 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 31-08: Last 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 07-00: Number of bits in response minus one (usually 47), or 0 if no response
+;
+; The response is put on RX fifo, starting with the MSB.
+; Partial last word will be padded with zero bits at the top.
+;
+; The state machine EXECCTRL should be set so that STATUS indicates TX FIFO < 2
+; and that AUTOPULL and AUTOPUSH are enabled.
+
+.program sdio_cmd_clk
+    .side_set 1
+
+    mov OSR, NULL       side 1 [D1]    ; Make sure OSR is full of zeros to prevent autopull
+
+wait_cmd:
+    mov Y, !STATUS      side 0 [D0]    ; Check if TX FIFO has data
+    jmp !Y wait_cmd     side 1 [D1]
+
+load_cmd:
+    out NULL, 32        side 0 [D0]    ; Load first word (trigger autopull)
+    out X, 8            side 1 [D1]    ; Number of bits to send
+    set pins, 1         side 0 [D0]    ; Initial state of CMD is high
+    set pindirs, 1      side 1 [D1]    ; Set SDIO_CMD as output
+
+send_cmd:
+    out pins, 1         side 0 [D0]    ; Write output on falling edge of CLK
+    jmp X-- send_cmd    side 1 [D1]
+
+prep_resp:
+    set pindirs, 0      side 0 [D0]    ; Set SDIO_CMD as input
+    out X, 8            side 1 [D1]    ; Get number of bits in response
+    nop                 side 0 [D0]    ; For clock alignment
+    jmp !X resp_done    side 1 [D1]    ; Check if we expect a response
+
+wait_resp:
+    nop                  side 0 [D0]
+    jmp PIN wait_resp    side 1 [D1]    ; Loop until SDIO_CMD = 0
+
+    ; Note: input bits are read at the same time as we write CLK=0.
+    ; Because the host controls the clock, the read happens before
+    ; the card sees the falling clock edge. This gives maximum time
+    ; for the data bit to settle.
+read_resp:
+    in PINS, 1          side 0 [D0]    ; Read input data bit
+    jmp X-- read_resp   side 1 [D1]    ; Loop to receive all data bits
+
+resp_done:
+    push                side 0 [D0]    ; Push the remaining part of response
+
+; State machine 1 is used to send and receive data blocks.
+; Pin mapping for this state machine:
+; - IN / OUT: SDIO_D0-D3
+; - GPIO defined at beginning of this file: SDIO_CLK
+
+; Data reception program
+; This program will wait for initial start of block token and then
+; receive a data block. The application must set number of nibbles
+; to receive minus 1 to Y register before running this program.
+.program sdio_data_rx
+
+wait_start:
+    mov X, Y                               ; Reinitialize number of nibbles to receive
+    wait 0 pin 0                           ; Wait for zero state on D0
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV-1]  ; Wait for rising edge and then whole clock cycle
+
+rx_data:
+    in PINS, 4                 [CLKDIV-2]  ; Read nibble
+    jmp X--, rx_data
+
+; Data transmission program
+;
+; Before running this program, pindirs should be set as output
+; and register X should be initialized with the number of nibbles
+; to send minus 1 (typically 8 + 1024 + 16 + 1 - 1 = 1048)
+; and register Y with the number of response bits minus 1 (typically 31).
+;
+; Words written to TX FIFO must be:
+; - Word 0: start token 0xFFFFFFF0
+; - Word 1-128: transmitted data (512 bytes)
+; - Word 129-130: CRC checksum
+; - Word 131: end token 0xFFFFFFFF
+;
+; After the card reports idle status, RX FIFO will get a word that
+; contains the D0 line response from card.
+
+.program sdio_data_tx
+    wait 0 gpio SDIO_CLK_GPIO  
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV + D1 - 1]; Synchronize so that write occurs on falling edge
+
+tx_loop:
+    out PINS, 4                [D0]    ; Write nibble and wait for whole clock cycle
+    jmp X-- tx_loop            [D1]
+
+    set pindirs, 0x00          [D0]    ; Set data bus as input
+
+.wrap_target
+response_loop:
+    in PINS, 1                 [D1]    ; Read D0 on rising edge
+    jmp Y--, response_loop     [D0]
+
+wait_idle:
+    wait 1 pin 0               [D1]    ; Wait for card to indicate idle condition
+    push                       [D0]    ; Push the response token
+.wrap

+ 121 - 0
lib/ZuluSCSI_platform_RP2350/sdio_Pico.pio.h

@@ -0,0 +1,121 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ------------ //
+// sdio_cmd_clk //
+// ------------ //
+
+#define sdio_cmd_clk_wrap_target 0
+#define sdio_cmd_clk_wrap 17
+
+static const uint16_t sdio_cmd_clk_program_instructions[] = {
+            //     .wrap_target
+    0xb1e3, //  0: mov    osr, null       side 1 [1] 
+    0xa24d, //  1: mov    y, !status      side 0 [2] 
+    0x1161, //  2: jmp    !y, 1           side 1 [1] 
+    0x6260, //  3: out    null, 32        side 0 [2] 
+    0x7128, //  4: out    x, 8            side 1 [1] 
+    0xe201, //  5: set    pins, 1         side 0 [2] 
+    0xf181, //  6: set    pindirs, 1      side 1 [1] 
+    0x6201, //  7: out    pins, 1         side 0 [2] 
+    0x1147, //  8: jmp    x--, 7          side 1 [1] 
+    0xe280, //  9: set    pindirs, 0      side 0 [2] 
+    0x7128, // 10: out    x, 8            side 1 [1] 
+    0xa242, // 11: nop                    side 0 [2] 
+    0x1131, // 12: jmp    !x, 17          side 1 [1] 
+    0xa242, // 13: nop                    side 0 [2] 
+    0x11cd, // 14: jmp    pin, 13         side 1 [1] 
+    0x4201, // 15: in     pins, 1         side 0 [2] 
+    0x114f, // 16: jmp    x--, 15         side 1 [1] 
+    0x8220, // 17: push   block           side 0 [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_cmd_clk_program = {
+    .instructions = sdio_cmd_clk_program_instructions,
+    .length = 18,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_cmd_clk_wrap_target, offset + sdio_cmd_clk_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_rx //
+// ------------ //
+
+#define sdio_data_rx_wrap_target 0
+#define sdio_data_rx_wrap 4
+
+static const uint16_t sdio_data_rx_program_instructions[] = {
+            //     .wrap_target
+    0xa022, //  0: mov    x, y                       
+    0x2020, //  1: wait   0 pin, 0                   
+    0x248A, //  2: wait   1 gpio, 10             [4] 
+    0x4304, //  3: in     pins, 4                [3] 
+    0x0043, //  4: jmp    x--, 3                     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_rx_program = {
+    .instructions = sdio_data_rx_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_rx_wrap_target, offset + sdio_data_rx_wrap);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_tx //
+// ------------ //
+
+#define sdio_data_tx_wrap_target 5
+#define sdio_data_tx_wrap 8
+
+static const uint16_t sdio_data_tx_program_instructions[] = {
+    0x200A, //  0: wait   0 gpio, 10                 
+    0x258A, //  1: wait   1 gpio, 10             [5] 
+    0x6204, //  2: out    pins, 4                [2] 
+    0x0142, //  3: jmp    x--, 2                 [1] 
+    0xe280, //  4: set    pindirs, 0             [2] 
+            //     .wrap_target
+    0x4101, //  5: in     pins, 1                [1] 
+    0x0285, //  6: jmp    y--, 5                 [2] 
+    0x21a0, //  7: wait   1 pin, 0               [1] 
+    0x8220, //  8: push   block                  [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_tx_program = {
+    .instructions = sdio_data_tx_program_instructions,
+    .length = 9,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_tx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_tx_wrap_target, offset + sdio_data_tx_wrap);
+    return c;
+}
+#endif
+

+ 164 - 0
lib/ZuluSCSI_platform_RP2350/sdio_RP2040.pio

@@ -0,0 +1,164 @@
+; ZuluSCSI™ - Copyright (c) 2022 Rabbit Hole Computing™
+; 
+; ZuluSCSI™ firmware is licensed under the GPL version 3 or any later version. 
+; 
+; https://www.gnu.org/licenses/gpl-3.0.html
+; ----
+; This program is free software: you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation, either version 3 of the License, or
+; (at your option) any later version. 
+; 
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details. 
+; 
+; You should have received a copy of the GNU General Public License
+; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+; RP2040 PIO program for implementing SD card access in SDIO mode
+; Run "pioasm rp2040_sdio.pio rp2040_sdio.pio.h" to regenerate the C header from this.
+
+; The RP2040 official work-in-progress code at
+; https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+; may be useful reference, but this is independent implementation.
+;
+; For official SDIO specifications, refer to:
+; https://www.sdcard.org/downloads/pls/
+; "SDIO Physical Layer Simplified Specification Version 8.00"
+
+; Clock settings
+; For 3.3V communication the available speeds are:
+; - Default speed: max. 25 MHz clock
+; - High speed:    max. 50 MHz clock
+;
+; From the default RP2040 clock speed of 125 MHz, the closest dividers
+; are 3 for 41.7 MHz and 5 for 25 MHz. The CPU can apply further divider
+; through state machine registers for the initial handshake.
+;
+; Because data is written on the falling edge and read on the rising
+; edge, it is preferrable to have a long 0 state and short 1 state.
+;.define CLKDIV 3
+.define CLKDIV 5
+.define D0 ((CLKDIV + 1) / 2 - 1)
+.define D1 (CLKDIV/2 - 1)
+.define SDIO_CLK_GPIO 18
+
+; State machine 0 is used to:
+; - generate continuous clock on SDIO_CLK
+; - send CMD packets
+; - receive response packets
+;
+; Pin mapping for this state machine:
+; - Sideset    : CLK
+; - IN/OUT/SET : CMD
+; - JMP_PIN    : CMD
+;
+; The commands to send are put on TX fifo and must have two words:
+; Word 0 bits 31-24: Number of bits in command minus one (usually 47)
+; Word 0 bits 23-00: First 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 31-08: Last 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 07-00: Number of bits in response minus one (usually 47), or 0 if no response
+;
+; The response is put on RX fifo, starting with the MSB.
+; Partial last word will be padded with zero bits at the top.
+;
+; The state machine EXECCTRL should be set so that STATUS indicates TX FIFO < 2
+; and that AUTOPULL and AUTOPUSH are enabled.
+
+.program sdio_cmd_clk
+    .side_set 1
+
+    mov OSR, NULL       side 1 [D1]    ; Make sure OSR is full of zeros to prevent autopull
+
+wait_cmd:
+    mov Y, !STATUS      side 0 [D0]    ; Check if TX FIFO has data
+    jmp !Y wait_cmd     side 1 [D1]
+
+load_cmd:
+    out NULL, 32        side 0 [D0]    ; Load first word (trigger autopull)
+    out X, 8            side 1 [D1]    ; Number of bits to send
+    set pins, 1         side 0 [D0]    ; Initial state of CMD is high
+    set pindirs, 1      side 1 [D1]    ; Set SDIO_CMD as output
+
+send_cmd:
+    out pins, 1         side 0 [D0]    ; Write output on falling edge of CLK
+    jmp X-- send_cmd    side 1 [D1]
+
+prep_resp:
+    set pindirs, 0      side 0 [D0]    ; Set SDIO_CMD as input
+    out X, 8            side 1 [D1]    ; Get number of bits in response
+    nop                 side 0 [D0]    ; For clock alignment
+    jmp !X resp_done    side 1 [D1]    ; Check if we expect a response
+
+wait_resp:
+    nop                  side 0 [D0]
+    jmp PIN wait_resp    side 1 [D1]    ; Loop until SDIO_CMD = 0
+
+    ; Note: input bits are read at the same time as we write CLK=0.
+    ; Because the host controls the clock, the read happens before
+    ; the card sees the falling clock edge. This gives maximum time
+    ; for the data bit to settle.
+read_resp:
+    in PINS, 1          side 0 [D0]    ; Read input data bit
+    jmp X-- read_resp   side 1 [D1]    ; Loop to receive all data bits
+
+resp_done:
+    push                side 0 [D0]    ; Push the remaining part of response
+
+; State machine 1 is used to send and receive data blocks.
+; Pin mapping for this state machine:
+; - IN / OUT: SDIO_D0-D3
+; - GPIO defined at beginning of this file: SDIO_CLK
+
+; Data reception program
+; This program will wait for initial start of block token and then
+; receive a data block. The application must set number of nibbles
+; to receive minus 1 to Y register before running this program.
+.program sdio_data_rx
+
+wait_start:
+    mov X, Y                               ; Reinitialize number of nibbles to receive
+    wait 0 pin 0                           ; Wait for zero state on D0
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV-1]  ; Wait for rising edge and then whole clock cycle
+
+rx_data:
+    in PINS, 4                 [CLKDIV-2]  ; Read nibble
+    jmp X--, rx_data
+
+; Data transmission program
+;
+; Before running this program, pindirs should be set as output
+; and register X should be initialized with the number of nibbles
+; to send minus 1 (typically 8 + 1024 + 16 + 1 - 1 = 1048)
+; and register Y with the number of response bits minus 1 (typically 31).
+;
+; Words written to TX FIFO must be:
+; - Word 0: start token 0xFFFFFFF0
+; - Word 1-128: transmitted data (512 bytes)
+; - Word 129-130: CRC checksum
+; - Word 131: end token 0xFFFFFFFF
+;
+; After the card reports idle status, RX FIFO will get a word that
+; contains the D0 line response from card.
+
+.program sdio_data_tx
+    wait 0 gpio SDIO_CLK_GPIO  
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV + D1 - 1]; Synchronize so that write occurs on falling edge
+
+tx_loop:
+    out PINS, 4                [D0]    ; Write nibble and wait for whole clock cycle
+    jmp X-- tx_loop            [D1]
+
+    set pindirs, 0x00          [D0]    ; Set data bus as input
+
+.wrap_target
+response_loop:
+    in PINS, 1                 [D1]    ; Read D0 on rising edge
+    jmp Y--, response_loop     [D0]
+
+wait_idle:
+    wait 1 pin 0               [D1]    ; Wait for card to indicate idle condition
+    push                       [D0]    ; Push the response token
+.wrap

+ 121 - 0
lib/ZuluSCSI_platform_RP2350/sdio_RP2040.pio.h

@@ -0,0 +1,121 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ------------ //
+// sdio_cmd_clk //
+// ------------ //
+
+#define sdio_cmd_clk_wrap_target 0
+#define sdio_cmd_clk_wrap 17
+
+static const uint16_t sdio_cmd_clk_program_instructions[] = {
+            //     .wrap_target
+    0xb1e3, //  0: mov    osr, null       side 1 [1] 
+    0xa24d, //  1: mov    y, !status      side 0 [2] 
+    0x1161, //  2: jmp    !y, 1           side 1 [1] 
+    0x6260, //  3: out    null, 32        side 0 [2] 
+    0x7128, //  4: out    x, 8            side 1 [1] 
+    0xe201, //  5: set    pins, 1         side 0 [2] 
+    0xf181, //  6: set    pindirs, 1      side 1 [1] 
+    0x6201, //  7: out    pins, 1         side 0 [2] 
+    0x1147, //  8: jmp    x--, 7          side 1 [1] 
+    0xe280, //  9: set    pindirs, 0      side 0 [2] 
+    0x7128, // 10: out    x, 8            side 1 [1] 
+    0xa242, // 11: nop                    side 0 [2] 
+    0x1131, // 12: jmp    !x, 17          side 1 [1] 
+    0xa242, // 13: nop                    side 0 [2] 
+    0x11cd, // 14: jmp    pin, 13         side 1 [1] 
+    0x4201, // 15: in     pins, 1         side 0 [2] 
+    0x114f, // 16: jmp    x--, 15         side 1 [1] 
+    0x8220, // 17: push   block           side 0 [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_cmd_clk_program = {
+    .instructions = sdio_cmd_clk_program_instructions,
+    .length = 18,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_cmd_clk_wrap_target, offset + sdio_cmd_clk_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_rx //
+// ------------ //
+
+#define sdio_data_rx_wrap_target 0
+#define sdio_data_rx_wrap 4
+
+static const uint16_t sdio_data_rx_program_instructions[] = {
+            //     .wrap_target
+    0xa022, //  0: mov    x, y                       
+    0x2020, //  1: wait   0 pin, 0                   
+    0x2492, //  2: wait   1 gpio, 18             [4] 
+    0x4304, //  3: in     pins, 4                [3] 
+    0x0043, //  4: jmp    x--, 3                     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_rx_program = {
+    .instructions = sdio_data_rx_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_rx_wrap_target, offset + sdio_data_rx_wrap);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_tx //
+// ------------ //
+
+#define sdio_data_tx_wrap_target 5
+#define sdio_data_tx_wrap 8
+
+static const uint16_t sdio_data_tx_program_instructions[] = {
+    0x2012, //  0: wait   0 gpio, 18                 
+    0x2592, //  1: wait   1 gpio, 18             [5] 
+    0x6204, //  2: out    pins, 4                [2] 
+    0x0142, //  3: jmp    x--, 2                 [1] 
+    0xe280, //  4: set    pindirs, 0             [2] 
+            //     .wrap_target
+    0x4101, //  5: in     pins, 1                [1] 
+    0x0285, //  6: jmp    y--, 5                 [2] 
+    0x21a0, //  7: wait   1 pin, 0               [1] 
+    0x8220, //  8: push   block                  [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_tx_program = {
+    .instructions = sdio_data_tx_program_instructions,
+    .length = 9,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_tx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_tx_wrap_target, offset + sdio_data_tx_wrap);
+    return c;
+}
+#endif
+

+ 49 - 1
platformio.ini

@@ -1,7 +1,7 @@
 ; PlatformIO Project Configuration File https://docs.platformio.org/page/projectconf.html
 
 [platformio]
-default_envs = ZuluSCSIv1_0, ZuluSCSIv1_0_mini, ZuluSCSIv1_1_plus, ZuluSCSI_RP2040, ZuluSCSI_RP2040_Audio, ZuluSCSI_Pico, ZuluSCSI_Pico_DaynaPORT, ZuluSCSI_BS2
+default_envs = ZuluSCSIv1_0, ZuluSCSIv1_0_mini, ZuluSCSIv1_1_plus, ZuluSCSI_RP2040, ZuluSCSI_RP2040_Audio, ZuluSCSI_Pico, ZuluSCSI_Pico_DaynaPORT, ZuluSCSI_BS2, ZuluSCSI_Pico_2
 
 ; Example platform to serve as a base for porting efforts
 [env:template]
@@ -286,3 +286,51 @@ build_flags =
      -DZULUSCSI_V1_4
 ;     -DPIO_USBFS_DEVICE_MSC
      -DPLATFORM_MASS_STORAGE
+
+
+;========================================
+; ZuluSCSI RP2350 hardware platform, based on the Raspberry Pi foundation RP2350 microcontroller
+[env:ZuluSCSI_Pico_2]
+platform = https://github.com/maxgerhardt/platform-raspberrypi.git#rp2350_picosdk
+platform_packages =
+    framework-arduinopico@https://github.com/earlephilhower/arduino-pico.git
+board_build.core = earlephilhower
+board = rpipico2
+framework = arduino
+; How much flash in bytes the bootloader and main app will be allocated
+; It is used as the starting point for a ROM image saved in flash
+; Changing this will cause issues with boards that already have a ROM drive in flash
+program_flash_allocation = 360448
+extra_scripts =
+    src/build_bootloader.py
+    lib/ZuluSCSI_platform_RP2350/process-linker-script.py
+board_build.ldscript = ${BUILD_DIR}/rp2350.ld
+ldscript_bootloader = lib/ZuluSCSI_platform_RP2350/rp2350_btldr.ld
+lib_deps =
+    SdFat=https://github.com/rabbitholecomputing/SdFat#2.2.0-gpt
+    minIni
+    ZuluSCSI_platform_RP2350
+    SCSI2SD
+    CUEParser
+upload_protocol = cmsis-dap
+debug_tool = cmsis-dap
+debug_build_flags =
+    -O2 -ggdb -g3
+; The values can be adjusted down to get a debug build to fit in to SRAM
+    -DLOGBUFSIZE=4096
+build_flags =
+    -O2 -Isrc -ggdb -g3
+    -Wall -Wno-sign-compare -Wno-ignored-qualifiers
+    -DSPI_DRIVER_SELECT=3
+    -DSD_CHIP_SELECT_MODE=2
+    -DENABLE_DEDICATED_SPI=1
+    -DHAS_SDIO_CLASS
+    -DUSE_ARDUINO=1
+    -DZULUSCSI_PICO
+    -DROMDRIVE_OFFSET=${env:ZuluSCSI_RP2040.program_flash_allocation}
+; build flags mirroring the framework-arduinopico#v3.6.0-DaynaPORT static library build
+    ; -DPICO_CYW43_ARCH_POLL=1
+	; -DCYW43_LWIP=0
+	; -DCYW43_USE_OTP_MAC=0
+    ; -DPLATFORM_MASS_STORAGE
+