فهرست منبع

Add platform support for upcoming RP2040-based ZuluSCSI

Add platform support for upcoming RP2040-based ZuluSCSI
Alex Perez 3 سال پیش
والد
کامیت
2164dfa715

+ 42 - 0
boards/ZuluSCSI_RP2040.json

@@ -0,0 +1,42 @@
+{
+    "name": "ZuluSCSI RP2040",
+    "url": "https://github.com/ZuluSCSI/ZuluSCSI-firmware",
+    "vendor": "ZuluSCSI",
+    "build": {
+        "core": "arduino",
+        "cpu": "cortex-m0plus",
+        "extra_flags": "-DARDUINO_ARCH_RP2040",
+        "f_cpu": "133000000L",
+        "hwids": [
+        [
+            "0x2E8A",
+            "0x00C0"
+        ]
+        ],
+        "mcu": "rp2040",
+        "variant": "RASPBERRY_PI_PICO"
+    },
+    "debug": {
+        "jlink_device": "RP2040_M0_0",
+        "openocd_target": "rp2040.cfg",
+        "svd_path": "rp2040.svd"
+    },
+    "frameworks": [
+        "arduino"
+    ],
+    "upload": {
+        "maximum_ram_size": 270336,
+        "maximum_size": 2097152,
+        "require_upload_port": true,
+        "native_usb": true,
+        "use_1200bps_touch": true,
+        "wait_for_upload_port": false,
+        "protocol": "picotool",
+        "protocols": [
+        "cmsis-dap",
+        "jlink",
+        "raspberrypi-swd",
+        "picotool"
+        ]
+    }
+}

+ 308 - 0
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.cpp

@@ -0,0 +1,308 @@
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_config.h"
+#include <SdFat.h>
+#include <scsi.h>
+#include <assert.h>
+#include <hardware/gpio.h>
+#include <hardware/uart.h>
+#include <hardware/spi.h>
+#include <platform/mbed_error.h>
+
+extern "C" {
+
+const char *g_azplatform_name = PLATFORM_NAME;
+
+void mbed_error_hook(const mbed_error_ctx * error_context);
+
+/***************/
+/* GPIO init   */
+/***************/
+
+// Helper function to configure whole GPIO in one line
+static void gpio_conf(uint gpio, enum gpio_function fn, bool pullup, bool pulldown, bool output, bool initial_state, bool fast_slew)
+{
+    gpio_put(gpio, initial_state);
+    gpio_set_dir(gpio, output);
+    gpio_set_pulls(gpio, pullup, pulldown);
+    gpio_set_function(gpio, fn);
+
+    if (fast_slew)
+    {
+        padsbank0_hw->io[gpio] |= PADS_BANK0_GPIO0_SLEWFAST_BITS;
+    }
+}
+
+void azplatform_init()
+{
+    /* First configure the pins that affect external buffer directions.
+     * RP2040 defaults to pulldowns, while these pins have external pull-ups.
+     */
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_DATA_DIR,  GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_RST,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_BSY,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_SEL,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+    /* Check dip switch settings */
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_DBGLOG,     GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_TERM,       GPIO_FUNC_SIO, false, false, false, false, false);
+
+    delay(10); // 10 ms delay to let pull-ups do their work
+
+    bool initiator = !gpio_get(DIP_INITIATOR);
+    bool dbglog = !gpio_get(DIP_DBGLOG);
+    bool termination = !gpio_get(DIP_TERM);
+
+    /* Initialize logging to SWO pin (UART0) */
+    gpio_conf(SWO_PIN,        GPIO_FUNC_UART,false,false, true,  false, true);
+    uart_init(uart0, 1000000);
+    mbed_set_error_hook(mbed_error_hook);
+
+    azlog("DIP switch settings: initiator ", (int)initiator, ", debug log ", (int)dbglog, ", termination ", (int)termination);
+
+    if (initiator)
+    {
+        azlog("ERROR: SCSI initiator mode is not implemented yet, turn DIP switch off for proper operation!");
+    }
+
+    g_azlog_debug = dbglog;
+    
+    if (termination)
+    {
+        azlog("SCSI termination is enabled");
+    }
+    else
+    {
+        azlog("NOTE: SCSI termination is disabled");
+    }
+
+    /* Initialize SCSI and SD card pins to required modes.
+     * SCSI pins should be inactive / input at this point.
+     */
+
+    // SCSI data bus direction is switched by DATA_DIR signal.
+    // Pullups make sure that no glitches occur when switching direction.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_IO_DB0,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB1,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB2,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB3,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB4,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB5,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB6,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB7,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DBP,    GPIO_FUNC_SIO, true, false, false, true, true);
+
+    // SCSI control outputs
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_OUT_IO,    GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_MSG,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+    // REQ pin is switched between PIO and SIO, pull-up makes sure no glitches
+    gpio_conf(SCSI_OUT_REQ,   GPIO_FUNC_SIO, true ,false, true,  true, true);
+
+    // Shared pins are changed to input / output depending on communication phase
+    gpio_conf(SCSI_IN_SEL,    GPIO_FUNC_SIO, true, false, false, true, true);
+    if (SCSI_OUT_CD != SCSI_IN_SEL)
+    {
+        gpio_conf(SCSI_OUT_CD,    GPIO_FUNC_SIO, false,false, true,  true, true);
+    }
+
+    gpio_conf(SCSI_IN_BSY,    GPIO_FUNC_SIO, true, false, false, true, true);
+    if (SCSI_OUT_MSG != SCSI_IN_BSY)
+    {
+        gpio_conf(SCSI_OUT_MSG,    GPIO_FUNC_SIO, false,false, true,  true, true);
+    }
+
+    // SCSI control inputs
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_IN_ACK,    GPIO_FUNC_SIO, true, false, false, true, false);
+    gpio_conf(SCSI_IN_ATN,    GPIO_FUNC_SIO, true, false, false, true, false);
+    gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+
+    // SD card pins
+    // Card is used in SDIO mode for main program, and in SPI mode for crash handler & bootloader.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SD_SPI_SCK,     GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MOSI,    GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MISO,    GPIO_FUNC_SPI, true, false, false, true, true);
+    gpio_conf(SD_SPI_CS,      GPIO_FUNC_SIO, true, false, true,  true, true);
+    gpio_conf(SDIO_D1,        GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SDIO_D2,        GPIO_FUNC_SIO, true, false, false, true, true);
+
+    // LED pin
+    gpio_conf(LED_PIN,        GPIO_FUNC_SIO, false,false, true,  false, false);
+
+    // I2C pins
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(GPIO_I2C_SCL,   GPIO_FUNC_I2C, true,false, false,  true, true);
+    gpio_conf(GPIO_I2C_SDA,   GPIO_FUNC_I2C, true,false, false,  true, true);
+}
+
+void azplatform_late_init()
+{
+    /* This function can usually be left empty.
+     * It can be used for initialization code that should not run in bootloader.
+     */
+}
+
+/*****************************************/
+/* Crash handlers                        */
+/*****************************************/
+
+extern SdFs SD;
+extern uint32_t __StackTop;
+
+void azplatform_emergency_log_save()
+{
+    azplatform_set_sd_callback(NULL, NULL);
+
+    SD.begin(SD_CONFIG_CRASH);
+    FsFile crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+
+    if (!crashfile.isOpen())
+    {
+        // Try to reinitialize
+        int max_retry = 10;
+        while (max_retry-- > 0 && !SD.begin(SD_CONFIG_CRASH));
+
+        crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+    }
+
+    uint32_t startpos = 0;
+    crashfile.write(azlog_get_buffer(&startpos));
+    crashfile.write(azlog_get_buffer(&startpos));
+    crashfile.flush();
+    crashfile.close();
+}
+
+void mbed_error_hook(const mbed_error_ctx * error_context)
+{
+    azlog("--------------");
+    azlog("CRASH!");
+    azlog("Platform: ", g_azplatform_name);
+    azlog("FW Version: ", g_azlog_firmwareversion);
+    azlog("error_status: ", (uint32_t)error_context->error_status);
+    azlog("error_address: ", error_context->error_address);
+    azlog("error_value: ", error_context->error_value);
+
+    uint32_t *p = (uint32_t*)((uint32_t)error_context->thread_current_sp & ~3);
+    for (int i = 0; i < 8; i++)
+    {
+        if (p == &__StackTop) break; // End of stack
+
+        azlog("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+        p += 4;
+    }
+
+    azplatform_emergency_log_save();
+
+    while (1)
+    {
+        // Flash the crash address on the LED
+        // Short pulse means 0, long pulse means 1
+        int base_delay = 1000;
+        for (int i = 31; i >= 0; i--)
+        {
+            LED_OFF();
+            for (int j = 0; j < base_delay; j++) delay_ns(100000);
+
+            int delay = (error_context->error_address & (1 << i)) ? (3 * base_delay) : base_delay;
+            LED_ON();
+            for (int j = 0; j < delay; j++) delay_ns(100000);
+            LED_OFF();
+        }
+
+        for (int j = 0; j < base_delay * 10; j++) delay_ns(100000);
+    }
+}
+
+/*****************************************/
+/* Debug logging and watchdog            */
+/*****************************************/
+
+// This function is called for every log message.
+void azplatform_log(const char *s)
+{
+    uart_puts(uart0, s);
+}
+
+// This function can be used to periodically reset watchdog timer for crash handling.
+// It can also be left empty if the platform does not use a watchdog timer.
+void azplatform_reset_watchdog()
+{
+}
+
+/**********************************************/
+/* Mapping from data bytes to GPIO BOP values */
+/**********************************************/
+
+/* A lookup table is the fastest way to calculate parity and convert the IO pin mapping for data bus.
+ * For RP2040 we expect that the bits are consecutive and in order.
+ */
+
+#define PARITY(n) ((1 ^ (n) ^ ((n)>>1) ^ ((n)>>2) ^ ((n)>>3) ^ ((n)>>4) ^ ((n)>>5) ^ ((n)>>6) ^ ((n)>>7)) & 1)
+#define X(n) (\
+    ((n & 0x01) ? 0 : (1 << SCSI_IO_DB0)) | \
+    ((n & 0x02) ? 0 : (1 << SCSI_IO_DB1)) | \
+    ((n & 0x04) ? 0 : (1 << SCSI_IO_DB2)) | \
+    ((n & 0x08) ? 0 : (1 << SCSI_IO_DB3)) | \
+    ((n & 0x10) ? 0 : (1 << SCSI_IO_DB4)) | \
+    ((n & 0x20) ? 0 : (1 << SCSI_IO_DB5)) | \
+    ((n & 0x40) ? 0 : (1 << SCSI_IO_DB6)) | \
+    ((n & 0x80) ? 0 : (1 << SCSI_IO_DB7)) | \
+    (PARITY(n)  ? 0 : (1 << SCSI_IO_DBP)) \
+)
+
+const uint32_t g_scsi_parity_lookup[256] =
+{
+    X(0x00), X(0x01), X(0x02), X(0x03), X(0x04), X(0x05), X(0x06), X(0x07), X(0x08), X(0x09), X(0x0a), X(0x0b), X(0x0c), X(0x0d), X(0x0e), X(0x0f),
+    X(0x10), X(0x11), X(0x12), X(0x13), X(0x14), X(0x15), X(0x16), X(0x17), X(0x18), X(0x19), X(0x1a), X(0x1b), X(0x1c), X(0x1d), X(0x1e), X(0x1f),
+    X(0x20), X(0x21), X(0x22), X(0x23), X(0x24), X(0x25), X(0x26), X(0x27), X(0x28), X(0x29), X(0x2a), X(0x2b), X(0x2c), X(0x2d), X(0x2e), X(0x2f),
+    X(0x30), X(0x31), X(0x32), X(0x33), X(0x34), X(0x35), X(0x36), X(0x37), X(0x38), X(0x39), X(0x3a), X(0x3b), X(0x3c), X(0x3d), X(0x3e), X(0x3f),
+    X(0x40), X(0x41), X(0x42), X(0x43), X(0x44), X(0x45), X(0x46), X(0x47), X(0x48), X(0x49), X(0x4a), X(0x4b), X(0x4c), X(0x4d), X(0x4e), X(0x4f),
+    X(0x50), X(0x51), X(0x52), X(0x53), X(0x54), X(0x55), X(0x56), X(0x57), X(0x58), X(0x59), X(0x5a), X(0x5b), X(0x5c), X(0x5d), X(0x5e), X(0x5f),
+    X(0x60), X(0x61), X(0x62), X(0x63), X(0x64), X(0x65), X(0x66), X(0x67), X(0x68), X(0x69), X(0x6a), X(0x6b), X(0x6c), X(0x6d), X(0x6e), X(0x6f),
+    X(0x70), X(0x71), X(0x72), X(0x73), X(0x74), X(0x75), X(0x76), X(0x77), X(0x78), X(0x79), X(0x7a), X(0x7b), X(0x7c), X(0x7d), X(0x7e), X(0x7f),
+    X(0x80), X(0x81), X(0x82), X(0x83), X(0x84), X(0x85), X(0x86), X(0x87), X(0x88), X(0x89), X(0x8a), X(0x8b), X(0x8c), X(0x8d), X(0x8e), X(0x8f),
+    X(0x90), X(0x91), X(0x92), X(0x93), X(0x94), X(0x95), X(0x96), X(0x97), X(0x98), X(0x99), X(0x9a), X(0x9b), X(0x9c), X(0x9d), X(0x9e), X(0x9f),
+    X(0xa0), X(0xa1), X(0xa2), X(0xa3), X(0xa4), X(0xa5), X(0xa6), X(0xa7), X(0xa8), X(0xa9), X(0xaa), X(0xab), X(0xac), X(0xad), X(0xae), X(0xaf),
+    X(0xb0), X(0xb1), X(0xb2), X(0xb3), X(0xb4), X(0xb5), X(0xb6), X(0xb7), X(0xb8), X(0xb9), X(0xba), X(0xbb), X(0xbc), X(0xbd), X(0xbe), X(0xbf),
+    X(0xc0), X(0xc1), X(0xc2), X(0xc3), X(0xc4), X(0xc5), X(0xc6), X(0xc7), X(0xc8), X(0xc9), X(0xca), X(0xcb), X(0xcc), X(0xcd), X(0xce), X(0xcf),
+    X(0xd0), X(0xd1), X(0xd2), X(0xd3), X(0xd4), X(0xd5), X(0xd6), X(0xd7), X(0xd8), X(0xd9), X(0xda), X(0xdb), X(0xdc), X(0xdd), X(0xde), X(0xdf),
+    X(0xe0), X(0xe1), X(0xe2), X(0xe3), X(0xe4), X(0xe5), X(0xe6), X(0xe7), X(0xe8), X(0xe9), X(0xea), X(0xeb), X(0xec), X(0xed), X(0xee), X(0xef),
+    X(0xf0), X(0xf1), X(0xf2), X(0xf3), X(0xf4), X(0xf5), X(0xf6), X(0xf7), X(0xf8), X(0xf9), X(0xfa), X(0xfb), X(0xfc), X(0xfd), X(0xfe), X(0xff)
+};
+
+#undef X
+
+} /* extern "C" */
+
+/* Logging from mbed */
+
+static class LogTarget: public mbed::FileHandle {
+public:
+    virtual ssize_t read(void *buffer, size_t size) { return 0; }
+    virtual ssize_t write(const void *buffer, size_t size)
+    {
+        // A bit inefficient but mbed seems to write() one character
+        // at a time anyways.
+        for (int i = 0; i < size; i++)
+        {
+            char buf[2] = {((const char*)buffer)[i], 0};
+            azlog_raw(buf);
+        }
+        return size;
+    }
+
+    virtual off_t seek(off_t offset, int whence = SEEK_SET) { return offset; }
+    virtual int close() { return 0; }
+    virtual off_t size() { return 0; }
+} g_LogTarget;
+
+mbed::FileHandle *mbed::mbed_override_console(int fd)
+{
+    return &g_LogTarget;
+}

+ 122 - 0
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.h

@@ -0,0 +1,122 @@
+// Platform-specific definitions for ZuluSCSI RP2040 hardware.
+
+#pragma once
+
+#include <stdint.h>
+#include <Arduino.h>
+#include "ZuluSCSI_platform_gpio.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These are used in debug output and default SCSI strings */
+extern const char *g_azplatform_name;
+#define PLATFORM_NAME "ZuluSCSI RP2040"
+#define PLATFORM_REVISION "2.0"
+#define SD_USE_SDIO 1
+
+// Debug logging function, can be used to print to e.g. serial port.
+// May get called from interrupt handlers.
+void azplatform_log(const char *s);
+
+// Timing and delay functions.
+// Arduino platform already provides these
+unsigned long millis(void);
+void delay(unsigned long ms);
+
+// Short delays, can be called from interrupt mode
+static inline void delay_ns(unsigned long ns)
+{
+    delayMicroseconds((ns + 999) / 1000);
+}
+
+// Approximate fast delay
+static inline void delay_100ns()
+{
+    asm volatile ("nop \n nop \n nop \n nop \n nop");
+}
+
+// Initialize SD card and GPIO configuration
+void azplatform_init();
+
+// Initialization for main application, not used for bootloader
+void azplatform_late_init();
+
+// Setup soft watchdog if supported
+void azplatform_reset_watchdog();
+
+// Set callback that will be called during data transfer to/from SD card.
+// This can be used to implement simultaneous transfer to SCSI bus.
+typedef void (*sd_callback_t)(uint32_t bytes_complete);
+void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer);
+
+// Below are GPIO access definitions that are used from scsiPhy.cpp.
+
+// Write a single SCSI pin.
+// Example use: SCSI_OUT(ATN, 1) sets SCSI_ATN to low (active) state.
+#define SCSI_OUT(pin, state) \
+    *(state ? &sio_hw->gpio_clr : &sio_hw->gpio_set) = 1 << (SCSI_OUT_ ## pin)
+
+// Read a single SCSI pin.
+// Example use: SCSI_IN(ATN), returns 1 for active low state.
+#define SCSI_IN(pin) \
+    ((sio_hw->gpio_in & (1 << (SCSI_IN_ ## pin))) ? 0 : 1)
+
+// Enable driving of shared control pins
+#define SCSI_ENABLE_CONTROL_OUT() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_CD) | \
+                           (1 << SCSI_OUT_MSG))
+
+// Set SCSI data bus to output
+#define SCSI_ENABLE_DATA_OUT() \
+    (sio_hw->gpio_clr = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
+
+// Write SCSI data bus, also sets REQ to inactive.
+extern const uint32_t g_scsi_parity_lookup[256];
+#define SCSI_OUT_DATA(data) \
+    gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
+                    g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \
+    SCSI_ENABLE_DATA_OUT()
+
+// Release SCSI data bus and REQ signal
+#define SCSI_RELEASE_DATA_REQ() \
+    (sio_hw->gpio_oe_clr = SCSI_IO_DATA_MASK, \
+     sio_hw->gpio_set = (1 << SCSI_DATA_DIR) | (1 << SCSI_OUT_REQ))
+
+// Release all SCSI outputs
+#define SCSI_RELEASE_OUTPUTS() \
+    SCSI_RELEASE_DATA_REQ(), \
+    sio_hw->gpio_oe_clr = (1 << SCSI_OUT_CD) | \
+                          (1 << SCSI_OUT_MSG), \
+    sio_hw->gpio_set = (1 << SCSI_OUT_IO) | \
+                       (1 << SCSI_OUT_CD) | \
+                       (1 << SCSI_OUT_MSG) | \
+                       (1 << SCSI_OUT_RST) | \
+                       (1 << SCSI_OUT_BSY) | \
+                       (1 << SCSI_OUT_REQ) | \
+                       (1 << SCSI_OUT_SEL)
+
+// Read SCSI data bus
+#define SCSI_IN_DATA(data) \
+    (~sio_hw->gpio_in & SCSI_IO_DATA_MASK) >> SCSI_IO_SHIFT
+
+#ifdef __cplusplus
+}
+
+// SD card driver for SdFat
+
+#ifdef SD_USE_SDIO
+class SdioConfig;
+extern SdioConfig g_sd_sdio_config;
+#define SD_CONFIG g_sd_sdio_config
+#define SD_CONFIG_CRASH g_sd_sdio_config
+#else
+class SdSpiConfig;
+extern SdSpiConfig g_sd_spi_config;
+#define SD_CONFIG g_sd_spi_config
+#define SD_CONFIG_CRASH g_sd_spi_config
+#endif
+
+#endif

+ 71 - 0
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform_gpio.h

@@ -0,0 +1,71 @@
+// GPIO definitions for ZuluSCSI RP2040-based hardware
+
+#pragma once
+
+#include <hardware/gpio.h>
+
+// SCSI data input/output port.
+// The data bus uses external bidirectional buffer, with
+// direction controlled by DATA_DIR pin.
+#define SCSI_IO_DB0  0
+#define SCSI_IO_DB1  1
+#define SCSI_IO_DB2  2
+#define SCSI_IO_DB3  3
+#define SCSI_IO_DB4  4
+#define SCSI_IO_DB5  5
+#define SCSI_IO_DB6  6
+#define SCSI_IO_DB7  7
+#define SCSI_IO_DBP  8
+#define SCSI_IO_DATA_MASK 0x1FF
+#define SCSI_IO_SHIFT 0
+
+// Data direction control
+#define SCSI_DATA_DIR 17
+
+// SCSI output status lines
+#define SCSI_OUT_IO   12
+#define SCSI_OUT_CD   11
+#define SCSI_OUT_MSG  13
+#define SCSI_OUT_RST  28
+#define SCSI_OUT_BSY  26
+#define SCSI_OUT_REQ  9
+#define SCSI_OUT_SEL  24
+
+// SCSI input status signals
+#define SCSI_IN_SEL  11
+#define SCSI_IN_ACK  10
+#define SCSI_IN_ATN  29
+#define SCSI_IN_BSY  13
+#define SCSI_IN_RST  27
+
+// Status LED pins
+#define LED_PIN      25
+#define LED_ON()     sio_hw->gpio_set = 1 << LED_PIN
+#define LED_OFF()    sio_hw->gpio_clr = 1 << LED_PIN
+
+// SD card pins in SDIO mode
+#define SDIO_CLK 18
+#define SDIO_CMD 19
+#define SDIO_D0  20
+#define SDIO_D1  21
+#define SDIO_D2  22
+#define SDIO_D3  23
+
+// SD card pins in SPI mode
+#define SD_SPI       spi0
+#define SD_SPI_SCK   18
+#define SD_SPI_MOSI  19
+#define SD_SPI_MISO  20
+#define SD_SPI_CS    23
+
+// IO expander I2C
+#define GPIO_I2C_SDA 14
+#define GPIO_I2C_SCL 15
+
+// DIP switch pins
+#define DIP_INITIATOR 10
+#define DIP_DBGLOG 16
+#define DIP_TERM 9
+
+// Other pins
+#define SWO_PIN 16

+ 5 - 0
lib/ZuluSCSI_platform_RP2040/bsp.h

@@ -0,0 +1,5 @@
+// Dummy file for SCSI2SD.
+
+#pragma once
+
+#define S2S_DMA_ALIGN

+ 159 - 0
lib/ZuluSCSI_platform_RP2040/rp2040.ld

@@ -0,0 +1,159 @@
+MEMORY
+{
+    FLASH(rx) : ORIGIN = 0x10000000, LENGTH = 2048k
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 240k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k
+}
+ENTRY(_entry_point)
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+    .boot2 : {
+        __boot2_start__ = .;
+        KEEP (*(.boot2))
+        __boot2_end__ = .;
+    } > FLASH
+    ASSERT(__boot2_end__ - __boot2_start__ == 256,
+        "ERROR: Pico second stage bootloader must be 256 bytes in size")
+    .text : {
+        __logical_binary_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        KEEP (*(.reset))
+        KEEP (*(.init))
+        *(.fini)
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+        *(.eh_frame*)
+        . = ALIGN(4);
+        *(.text)
+        *(.text*)
+    } > FLASH
+    .rodata : {
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        *(.rodata)
+        *(.rodata*)
+        . = ALIGN(4);
+    } > FLASH
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+    __etext = .;
+   .ram_vector_table (COPY): {
+        *(.ram_vector_table)
+    } > RAM
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+        *(.time_critical*)
+        . = ALIGN(4);
+        *(.data*)
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+        *(.jcr)
+        . = ALIGN(4);
+        __data_end__ = .;
+    } > RAM AT> FLASH
+    .uninitialized_data (COPY): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+    .bss : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+    .heap (COPY):
+    {
+        __end__ = .;
+        PROVIDE(end = .);
+        *(.heap*)
+        . = ORIGIN(RAM) + LENGTH(RAM) - 0x400;
+        __HeapLimit = .;
+    } > RAM
+    .stack1_dummy (COPY):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (COPY):
+    {
+        *(.stack*)
+    } > RAM
+    .flash_end : {
+        __flash_binary_end = .;
+    } > FLASH
+    __StackTop = ORIGIN(RAM) + LENGTH(RAM);
+    __StackLimit = __StackTop - 0x400;
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary")
+}

+ 794 - 0
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -0,0 +1,794 @@
+// Implementation of SDIO communication for RP2040
+//
+// The RP2040 official work-in-progress code at
+// https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+// may be useful reference, but this is independent implementation.
+//
+// For official SDIO specifications, refer to:
+// https://www.sdcard.org/downloads/pls/
+// "SDIO Physical Layer Simplified Specification Version 8.00"
+
+#include "rp2040_sdio.h"
+#include "rp2040_sdio.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/gpio.h>
+#include <ZuluSCSI_platform.h>
+#include <ZuluSCSI_log.h>
+
+#define SDIO_PIO pio1
+#define SDIO_CMD_SM 0
+#define SDIO_DATA_SM 1
+#define SDIO_DMA_CH 1
+#define SDIO_DMA_CHB 2
+
+// Maximum number of 512 byte blocks to transfer in one request
+#define SDIO_MAX_BLOCKS 256
+
+enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
+
+static struct {
+    uint32_t pio_cmd_clk_offset;
+    uint32_t pio_data_rx_offset;
+    pio_sm_config pio_cfg_data_rx;
+    uint32_t pio_data_tx_offset;
+    pio_sm_config pio_cfg_data_tx;
+
+    sdio_transfer_state_t transfer_state;
+    uint32_t transfer_start_time;
+    uint32_t *data_buf;
+    uint32_t blocks_done; // Number of blocks transferred so far
+    uint32_t total_blocks; // Total number of blocks to transfer
+    uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
+    uint32_t checksum_errors; // Number of checksum errors detected
+
+    // Variables for block writes
+    uint64_t next_wr_block_checksum;
+    uint32_t end_token_buf[3]; // CRC and end token for write block
+    sdio_status_t wr_status;
+    uint32_t card_response;
+    
+    // Variables for block reads
+    // This is used to perform DMA into data buffers and checksum buffers separately.
+    struct {
+        void * write_addr;
+        uint32_t transfer_count;
+    } dma_blocks[SDIO_MAX_BLOCKS * 2];
+    struct {
+        uint32_t top;
+        uint32_t bottom;
+    } received_checksums[SDIO_MAX_BLOCKS];
+} g_sdio;
+
+void rp2040_sdio_dma_irq();
+
+/*******************************************************
+ * Checksum algorithms
+ *******************************************************/
+
+// Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
+// Usage:
+//    uint8_t crc = 0;
+//    crc = crc7_table[crc ^ byte];
+//    .. repeat for every byte ..
+static const uint8_t crc7_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
+};
+
+// Calculate the CRC16 checksum for parallel 4 bit lines separately.
+// When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
+// is applied to each line separately and generates total of
+// 4 x 16 = 64 bits of checksum.
+uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
+{
+    uint64_t crc = 0;
+    uint32_t *end = data + num_words;
+    while (data < end)
+    {
+        // Each 32-bit word contains 8 bits per line.
+        // Reverse the bytes because SDIO protocol is big-endian.
+        uint32_t data_in = __builtin_bswap32(*data++);
+
+        // Shift out 8 bits for each line
+        uint32_t data_out = crc >> 32;
+        crc <<= 32;
+
+        // XOR outgoing data to itself with 4 bit delay
+        data_out ^= (data_out >> 16);
+
+        // XOR incoming data to outgoing data with 4 bit delay
+        data_out ^= (data_in >> 16);
+
+        // XOR outgoing and incoming data to accumulator at each tap
+        uint64_t xorred = data_out ^ data_in;
+        crc ^= xorred;
+        crc ^= xorred << (5 * 4);
+        crc ^= xorred << (12 * 4);
+    }
+
+    return crc;
+}
+
+/*******************************************************
+ * Basic SDIO command execution
+ *******************************************************/
+
+static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
+{
+    // azdbg("SDIO Command: ", (int)command, " arg ", arg);
+
+    // Format the arguments in the way expected by the PIO code.
+    uint32_t word0 =
+        (47 << 24) | // Number of bits in command minus one
+        ( 1 << 22) | // Transfer direction from host to card
+        (command << 16) | // Command byte
+        (((arg >> 24) & 0xFF) << 8) | // MSB byte of argument
+        (((arg >> 16) & 0xFF) << 0);
+    
+    uint32_t word1 =
+        (((arg >> 8) & 0xFF) << 24) |
+        (((arg >> 0) & 0xFF) << 16) | // LSB byte of argument
+        ( 1 << 8); // End bit
+
+    // Set number of bits in response minus one, or leave at 0 if no response expected
+    if (response_bits)
+    {
+        word1 |= ((response_bits - 1) << 0);
+    }
+
+    // Calculate checksum in the order that the bytes will be transmitted (big-endian)
+    uint8_t crc = 0;
+    crc = crc7_table[crc ^ ((word0 >> 16) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  8) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  0) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 24) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 16) & 0xFF)];
+    word1 |= crc << 8;
+    
+    // Transmit command
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
+}
+
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, response ? 48 : 0);
+
+    // Wait for response
+    uint32_t start = millis();
+    uint32_t wait_words = response ? 2 : 1;
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < wait_words)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    if (response)
+    {
+        // Read out response packet
+        uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        // azdbg("SDIO R1 response: ", resp0, " ", resp1);
+
+        // Calculate response checksum
+        uint8_t crc = 0;
+        crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  8) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  0) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp1 >>  8) & 0xFF)];
+
+        uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
+        if (crc != actual_crc)
+        {
+            azdbg("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+            return SDIO_ERR_RESPONSE_CRC;
+        }
+
+        uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
+        if (response_cmd != command && command != 41)
+        {
+            azdbg("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
+            return SDIO_ERR_RESPONSE_CODE;
+        }
+
+        *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    }
+    else
+    {
+        // Read out dummy marker
+        pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
+{
+    // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t response_buf[5];
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
+
+    sdio_send_command(command, arg, 136);
+
+    uint32_t start = millis();
+    while (dma_channel_is_busy(SDIO_DMA_CH))
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            dma_channel_abort(SDIO_DMA_CH);
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    dma_channel_abort(SDIO_DMA_CH);
+
+    // Copy the response payload to output buffer
+    response[0]  = ((response_buf[0] >> 16) & 0xFF);
+    response[1]  = ((response_buf[0] >>  8) & 0xFF);
+    response[2]  = ((response_buf[0] >>  0) & 0xFF);
+    response[3]  = ((response_buf[1] >> 24) & 0xFF);
+    response[4]  = ((response_buf[1] >> 16) & 0xFF);
+    response[5]  = ((response_buf[1] >>  8) & 0xFF);
+    response[6]  = ((response_buf[1] >>  0) & 0xFF);
+    response[7]  = ((response_buf[2] >> 24) & 0xFF);
+    response[8]  = ((response_buf[2] >> 16) & 0xFF);
+    response[9]  = ((response_buf[2] >>  8) & 0xFF);
+    response[10] = ((response_buf[2] >>  0) & 0xFF);
+    response[11] = ((response_buf[3] >> 24) & 0xFF);
+    response[12] = ((response_buf[3] >> 16) & 0xFF);
+    response[13] = ((response_buf[3] >>  8) & 0xFF);
+    response[14] = ((response_buf[3] >>  0) & 0xFF);
+    response[15] = ((response_buf[4] >>  0) & 0xFF);
+
+    // Calculate checksum of the payload
+    uint8_t crc = 0;
+    for (int i = 0; i < 15; i++)
+    {
+        crc = crc7_table[crc ^ response[i]];
+    }
+
+    uint8_t actual_crc = response[15] & 0xFE;
+    if (crc != actual_crc)
+    {
+        azdbg("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+        return SDIO_ERR_RESPONSE_CRC;
+    }
+
+    uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
+    if (response_cmd != 0x3F)
+    {
+        azdbg("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
+        return SDIO_ERR_RESPONSE_CODE;
+    }
+
+    return SDIO_OK;
+}
+
+
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, 48);
+
+    // Wait for response
+    uint32_t start = millis();
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    // Read out response packet
+    uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    // azdbg("SDIO R3 response: ", resp0, " ", resp1);
+
+    return SDIO_OK;
+}
+
+/*******************************************************
+ * Data reception from SD card
+ *******************************************************/
+
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_RX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Create DMA block descriptors to store each block of 512 bytes of data to buffer
+    // and then 8 bytes to g_sdio.received_checksums.
+    for (int i = 0; i < num_blocks; i++)
+    {
+        g_sdio.dma_blocks[i * 2].write_addr = buffer + i * SDIO_BLOCK_SIZE;
+        g_sdio.dma_blocks[i * 2].transfer_count = SDIO_BLOCK_SIZE / sizeof(uint32_t);
+
+        g_sdio.dma_blocks[i * 2 + 1].write_addr = &g_sdio.received_checksums[i];
+        g_sdio.dma_blocks[i * 2 + 1].transfer_count = 2;
+    }
+    g_sdio.dma_blocks[num_blocks * 2].write_addr = 0;
+    g_sdio.dma_blocks[num_blocks * 2].transfer_count = 0;
+
+    // Configure first DMA channel for reading from the PIO RX fifo
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
+
+    // Configure second DMA channel for reconfiguring the first one
+    dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_ring(&dmacfg, true, 3);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &dma_hw->ch[SDIO_DMA_CH].al1_write_addr,
+        g_sdio.dma_blocks, 2, false);
+
+    // Initialize PIO state machine
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+
+    // Write number of nibbles to receive to Y register
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, SDIO_BLOCK_SIZE * 2 + 16 - 1);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+
+    // Enable RX FIFO join because we don't need the TX FIFO during transfer.
+    // This gives more leeway for the DMA block switching
+    SDIO_PIO->sm[SDIO_DATA_SM].shiftctrl |= PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS;
+
+    // Start PIO and DMA
+    dma_channel_start(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+
+    return SDIO_OK;
+}
+
+// Check checksums for received blocks
+static void sdio_verify_rx_checksums(uint32_t maxcount)
+{
+    while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
+    {
+        // Calculate checksum from received data
+        int blockidx = g_sdio.blocks_checksumed++;
+        uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                     SDIO_WORDS_PER_BLOCK);
+
+        // Convert received checksum to little-endian format
+        uint32_t top = __builtin_bswap32(g_sdio.received_checksums[blockidx].top);
+        uint32_t bottom = __builtin_bswap32(g_sdio.received_checksums[blockidx].bottom);
+        uint64_t expected = ((uint64_t)top << 32) | bottom;
+
+        if (checksum != expected)
+        {
+            g_sdio.checksum_errors++;
+            if (g_sdio.checksum_errors == 1)
+            {
+                azlog("SDIO checksum error in reception: block ", blockidx,
+                      " calculated ", checksum, " expected ", expected);
+            }
+        }
+    }
+}
+
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
+{
+    // Check how many DMA control blocks have been consumed
+    uint32_t dma_ctrl_block_count = (dma_hw->ch[SDIO_DMA_CHB].read_addr - (uint32_t)&g_sdio.dma_blocks);
+    dma_ctrl_block_count /= sizeof(g_sdio.dma_blocks[0]);
+
+    // Compute how many complete 512 byte SDIO blocks have been transferred
+    // When transfer ends, dma_ctrl_block_count == g_sdio.total_blocks * 2 + 1
+    g_sdio.blocks_done = (dma_ctrl_block_count - 1) / 2;
+
+    // Is it all done?
+    if (g_sdio.blocks_done >= g_sdio.total_blocks)
+    {
+        g_sdio.transfer_state = SDIO_IDLE;
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        sdio_verify_rx_checksums(g_sdio.total_blocks);
+
+        if (g_sdio.checksum_errors == 0)
+            return SDIO_OK;
+        else
+            return SDIO_ERR_DATA_CRC;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        azdbg("rp2040_sdio_rx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+    else
+    {
+        // Use the idle time to calculate checksums
+        sdio_verify_rx_checksums(1);
+    }
+
+    return SDIO_BUSY;
+}
+
+
+/*******************************************************
+ * Data transmission to SD card
+ *******************************************************/
+
+static void sdio_start_next_block_tx()
+{
+    // Initialize PIO
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+    
+    // Configure DMA to send the data block payload (512 bytes)
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, false);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, true));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
+        SDIO_WORDS_PER_BLOCK, false);
+
+    // Prepare second DMA channel to send the CRC and block end marker
+    uint64_t crc = g_sdio.next_wr_block_checksum;
+    g_sdio.end_token_buf[0] = (uint32_t)(crc >> 32);
+    g_sdio.end_token_buf[1] = (uint32_t)(crc >>  0);
+    g_sdio.end_token_buf[2] = 0xFFFFFFFF;
+    channel_config_set_bswap(&dmacfg, false);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.end_token_buf, 3, false);
+    
+    // Enable IRQ to trigger when block is done
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
+
+    // Initialize register X with nibble count and register Y with response bit count
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 1048);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_x, 32));
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 31);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+    
+    // Initialize pins to output and high
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pins, 15));
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pindirs, 15));
+
+    // Write start token and start the DMA transfer.
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFF0);
+    dma_channel_start(SDIO_DMA_CH);
+    
+    // Start state machine
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+}
+
+static void sdio_compute_next_tx_checksum()
+{
+    assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed < g_sdio.total_blocks);
+    int blockidx = g_sdio.blocks_checksumed++;
+    g_sdio.next_wr_block_checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                             SDIO_WORDS_PER_BLOCK);
+}
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_TX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Compute first block checksum
+    sdio_compute_next_tx_checksum();
+
+    // Start first DMA transfer and PIO
+    sdio_start_next_block_tx();
+
+    if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+    {
+        // Precompute second block checksum
+        sdio_compute_next_tx_checksum();
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t check_sdio_write_response(uint32_t card_response)
+{
+    // Shift card response until top bit is 0 (the start bit)
+    // The format of response is poorly documented in SDIO spec but refer to e.g.
+    // http://my-cool-projects.blogspot.com/2013/02/the-mysterious-sd-card-crc-status.html
+    uint32_t resp = card_response;
+    if (!(~resp & 0xFFFF0000)) resp <<= 16;
+    if (!(~resp & 0xFF000000)) resp <<= 8;
+    if (!(~resp & 0xF0000000)) resp <<= 4;
+    if (!(~resp & 0xC0000000)) resp <<= 2;
+    if (!(~resp & 0x80000000)) resp <<= 1;
+
+    uint32_t wr_status = (resp >> 28) & 7;
+
+    if (wr_status == 2)
+    {
+        return SDIO_OK;
+    }
+    else if (wr_status == 5)
+    {
+        azlog("SDIO card reports write CRC error, status ", card_response);
+        return SDIO_ERR_WRITE_CRC;    
+    }
+    else if (wr_status == 6)
+    {
+        azlog("SDIO card reports write failure, status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;    
+    }
+    else
+    {
+        azlog("SDIO card reports unknown write status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;    
+    }
+}
+
+// When a block finishes, this IRQ handler starts the next one
+static void rp2040_sdio_tx_irq()
+{
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+
+    if (g_sdio.transfer_state == SDIO_TX)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CH) && !dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            // Main data transfer is finished now.
+            // When card is ready, PIO will put card response on RX fifo
+            g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
+            if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_DATA_SM))
+            {
+                // Card is already idle
+                g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+            }
+            else
+            {
+                // Use DMA to wait for the response
+                dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+                channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+                channel_config_set_read_increment(&dmacfg, false);
+                channel_config_set_write_increment(&dmacfg, false);
+                channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+                dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+                    &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_DATA_SM], 1, true);
+            }
+        }
+    }
+    
+    if (g_sdio.transfer_state == SDIO_TX_WAIT_IDLE)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            g_sdio.wr_status = check_sdio_write_response(g_sdio.card_response);
+
+            if (g_sdio.wr_status != SDIO_OK)
+            {
+                rp2040_sdio_stop();
+                return;
+            }
+
+            g_sdio.blocks_done++;
+            if (g_sdio.blocks_done < g_sdio.total_blocks)
+            {
+                sdio_start_next_block_tx();
+                g_sdio.transfer_state = SDIO_TX;
+
+                if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+                {
+                    // Precompute the CRC for next block so that it is ready when
+                    // we want to send it.
+                    sdio_compute_next_tx_checksum();
+                }
+            }
+            else
+            {
+                rp2040_sdio_stop();
+            }
+        }    
+    }
+}
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
+{
+    if (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk)
+    {
+        // Verify that IRQ handler gets called even if we are in hardfault handler
+        rp2040_sdio_tx_irq();
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        rp2040_sdio_stop();
+        return g_sdio.wr_status;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        azdbg("rp2040_sdio_tx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_tx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop()
+{
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+    g_sdio.transfer_state = SDIO_IDLE;
+    return SDIO_OK;
+}
+
+void rp2040_sdio_init(int clock_divider)
+{
+    // Mark resources as being in use, unless it has been done already.
+    static bool resources_claimed = false;
+    if (!resources_claimed)
+    {
+        pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
+        pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
+        dma_channel_claim(SDIO_DMA_CH);
+        dma_channel_claim(SDIO_DMA_CHB);
+        resources_claimed = true;
+    }
+
+    memset(&g_sdio, 0, sizeof(g_sdio));
+
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SDIO_PIO);
+
+    // Command & clock state machine
+    g_sdio.pio_cmd_clk_offset = pio_add_program(SDIO_PIO, &sdio_cmd_clk_program);
+    pio_sm_config cfg = sdio_cmd_clk_program_get_default_config(g_sdio.pio_cmd_clk_offset);
+    sm_config_set_out_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_in_pins(&cfg, SDIO_CMD);
+    sm_config_set_set_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_jmp_pin(&cfg, SDIO_CMD);
+    sm_config_set_sideset_pins(&cfg, SDIO_CLK);
+    sm_config_set_out_shift(&cfg, false, true, 32);
+    sm_config_set_in_shift(&cfg, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&cfg, clock_divider, 0);
+    sm_config_set_mov_status(&cfg, STATUS_TX_LESSTHAN, 2);
+
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_clk_offset, &cfg);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
+
+    // Data reception program
+    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
+    g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_rx, clock_divider, 0);
+
+    // Data transmission program
+    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
+    g_sdio.pio_cfg_data_tx = sdio_data_tx_program_get_default_config(g_sdio.pio_data_tx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0);
+    sm_config_set_set_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_tx, false, false, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_tx, clock_divider, 0);
+
+    // Disable SDIO pins input synchronizer.
+    // This reduces input delay.
+    // Because the CLK is driven synchronously to CPU clock,
+    // there should be no metastability problems.
+    SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK) | (1 << SDIO_CMD)
+                                 | (1 << SDIO_D0) | (1 << SDIO_D1) | (1 << SDIO_D2) | (1 << SDIO_D3);
+
+    // Redirect GPIOs to PIO
+    gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
+
+    // Set up IRQ handler when DMA completes.
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
+    irq_set_enabled(DMA_IRQ_1, true);
+}

+ 52 - 0
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.h

@@ -0,0 +1,52 @@
+// SD card access using SDIO for RP2040 platform.
+// This module contains the low-level SDIO bus implementation using
+// the PIO peripheral. The high-level commands are in sd_card_sdio.cpp.
+
+#pragma once
+#include <stdint.h>
+
+enum sdio_status_t {
+    SDIO_OK = 0,
+    SDIO_BUSY = 1,
+    SDIO_ERR_RESPONSE_TIMEOUT = 2, // Timed out waiting for response from card
+    SDIO_ERR_RESPONSE_CRC = 3,     // Response CRC is wrong
+    SDIO_ERR_RESPONSE_CODE = 4,    // Response command code does not match what was sent
+    SDIO_ERR_DATA_TIMEOUT = 5,     // Timed out waiting for data block
+    SDIO_ERR_DATA_CRC = 6,         // CRC for data packet is wrong
+    SDIO_ERR_WRITE_CRC = 7,        // Card reports bad CRC for write
+    SDIO_ERR_WRITE_FAIL = 8,       // Card reports write failure
+};
+
+#define SDIO_BLOCK_SIZE 512
+#define SDIO_WORDS_PER_BLOCK 128
+
+// Execute a command that has 48-bit reply (response types R1, R6, R7)
+// If response is NULL, does not wait for reply.
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Execute a command that has 136-bit reply (response type R2)
+// Response buffer should have space for 16 bytes (the 128 bit payload)
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t *response);
+
+// Execute a command that has 48-bit reply but without CRC (response R3)
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Start transferring data from SD card to memory buffer
+// Transfer block size is always 512 bytes.
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks);
+
+// Check if reception is complete
+// Returns SDIO_BUSY while transferring, SDIO_OK when done and error on failure.
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete = nullptr);
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks);
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete = nullptr);
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop();
+
+// (Re)initialize the SDIO interface
+void rp2040_sdio_init(int clock_divider = 1);

+ 145 - 0
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.pio

@@ -0,0 +1,145 @@
+; RP2040 PIO program for implementing SD card access in SDIO mode
+; Run "pioasm rp2040_sdio.pio rp2040_sdio.pio.h" to regenerate the C header from this.
+
+; The RP2040 official work-in-progress code at
+; https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+; may be useful reference, but this is independent implementation.
+;
+; For official SDIO specifications, refer to:
+; https://www.sdcard.org/downloads/pls/
+; "SDIO Physical Layer Simplified Specification Version 8.00"
+
+; Clock settings
+; For 3.3V communication the available speeds are:
+; - Default speed: max. 25 MHz clock
+; - High speed:    max. 50 MHz clock
+;
+; From the default RP2040 clock speed of 125 MHz, the closest dividers
+; are 3 for 41.7 MHz and 5 for 25 MHz. The CPU can apply further divider
+; through state machine registers for the initial handshake.
+;
+; Because data is written on the falling edge and read on the rising
+; edge, it is preferrable to have a long 0 state and short 1 state.
+;.define CLKDIV 3
+.define CLKDIV 5
+.define D0 ((CLKDIV + 1) / 2 - 1)
+.define D1 (CLKDIV/2 - 1)
+.define SDIO_CLK_GPIO 18
+
+; State machine 0 is used to:
+; - generate continuous clock on SDIO_CLK
+; - send CMD packets
+; - receive response packets
+;
+; Pin mapping for this state machine:
+; - Sideset    : CLK
+; - IN/OUT/SET : CMD
+; - JMP_PIN    : CMD
+;
+; The commands to send are put on TX fifo and must have two words:
+; Word 0 bits 31-24: Number of bits in command minus one (usually 47)
+; Word 0 bits 23-00: First 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 31-08: Last 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 07-00: Number of bits in response minus one (usually 47), or 0 if no response
+;
+; The response is put on RX fifo, starting with the MSB.
+; Partial last word will be padded with zero bits at the top.
+;
+; The state machine EXECCTRL should be set so that STATUS indicates TX FIFO < 2
+; and that AUTOPULL and AUTOPUSH are enabled.
+
+.program sdio_cmd_clk
+    .side_set 1
+
+    mov OSR, NULL       side 1 [D1]    ; Make sure OSR is full of zeros to prevent autopull
+
+wait_cmd:
+    mov Y, !STATUS      side 0 [D0]    ; Check if TX FIFO has data
+    jmp !Y wait_cmd     side 1 [D1]
+
+load_cmd:
+    out NULL, 32        side 0 [D0]    ; Load first word (trigger autopull)
+    out X, 8            side 1 [D1]    ; Number of bits to send
+    set pins, 1         side 0 [D0]    ; Initial state of CMD is high
+    set pindirs, 1      side 1 [D1]    ; Set SDIO_CMD as output
+
+send_cmd:
+    out pins, 1         side 0 [D0]    ; Write output on falling edge of CLK
+    jmp X-- send_cmd    side 1 [D1]
+
+prep_resp:
+    set pindirs, 0      side 0 [D0]    ; Set SDIO_CMD as input
+    out X, 8            side 1 [D1]    ; Get number of bits in response
+    nop                 side 0 [D0]    ; For clock alignment
+    jmp !X resp_done    side 1 [D1]    ; Check if we expect a response
+
+wait_resp:
+    nop                  side 0 [D0]
+    jmp PIN wait_resp    side 1 [D1]    ; Loop until SDIO_CMD = 0
+
+    ; Note: input bits are read at the same time as we write CLK=0.
+    ; Because the host controls the clock, the read happens before
+    ; the card sees the falling clock edge. This gives maximum time
+    ; for the data bit to settle.
+read_resp:
+    in PINS, 1          side 0 [D0]    ; Read input data bit
+    jmp X-- read_resp   side 1 [D1]    ; Loop to receive all data bits
+
+resp_done:
+    push                side 0 [D0]    ; Push the remaining part of response
+
+; State machine 1 is used to send and receive data blocks.
+; Pin mapping for this state machine:
+; - IN / OUT: SDIO_D0-D3
+; - GPIO defined at beginning of this file: SDIO_CLK
+
+; Data reception program
+; This program will wait for initial start of block token and then
+; receive a data block. The application must set number of nibbles
+; to receive minus 1 to Y register before running this program.
+.program sdio_data_rx
+
+wait_start:
+    mov X, Y                               ; Reinitialize number of nibbles to receive
+    wait 0 pin 0                           ; Wait for zero state on D0
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV-1]  ; Wait for rising edge and then whole clock cycle
+
+rx_data:
+    in PINS, 4                 [CLKDIV-2]  ; Read nibble
+    jmp X--, rx_data
+
+; Data transmission program
+;
+; Before running this program, pindirs should be set as output
+; and register X should be initialized with the number of nibbles
+; to send minus 1 (typically 8 + 1024 + 16 + 1 - 1 = 1048)
+; and register Y with the number of response bits minus 1 (typically 31).
+;
+; Words written to TX FIFO must be:
+; - Word 0: start token 0xFFFFFFF0
+; - Word 1-128: transmitted data (512 bytes)
+; - Word 129-130: CRC checksum
+; - Word 131: end token 0xFFFFFFFF
+;
+; After the card reports idle status, RX FIFO will get a word that
+; contains the D0 line response from card.
+
+.program sdio_data_tx
+    wait 0 gpio SDIO_CLK_GPIO  
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV + D1 - 1]; Synchronize so that write occurs on falling edge
+
+tx_loop:
+    out PINS, 4                [D0]    ; Write nibble and wait for whole clock cycle
+    jmp X-- tx_loop            [D1]
+
+    set pindirs, 0x00          [D0]    ; Set data bus as input
+
+.wrap_target
+response_loop:
+    in PINS, 1                 [D1]    ; Read D0 on rising edge
+    jmp Y--, response_loop     [D0]
+
+wait_idle:
+    wait 1 pin 0               [D1]    ; Wait for card to indicate idle condition
+    push                       [D0]    ; Push the response token
+.wrap

+ 121 - 0
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.pio.h

@@ -0,0 +1,121 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ------------ //
+// sdio_cmd_clk //
+// ------------ //
+
+#define sdio_cmd_clk_wrap_target 0
+#define sdio_cmd_clk_wrap 17
+
+static const uint16_t sdio_cmd_clk_program_instructions[] = {
+            //     .wrap_target
+    0xb1e3, //  0: mov    osr, null       side 1 [1] 
+    0xa24d, //  1: mov    y, !status      side 0 [2] 
+    0x1161, //  2: jmp    !y, 1           side 1 [1] 
+    0x6260, //  3: out    null, 32        side 0 [2] 
+    0x7128, //  4: out    x, 8            side 1 [1] 
+    0xe201, //  5: set    pins, 1         side 0 [2] 
+    0xf181, //  6: set    pindirs, 1      side 1 [1] 
+    0x6201, //  7: out    pins, 1         side 0 [2] 
+    0x1147, //  8: jmp    x--, 7          side 1 [1] 
+    0xe280, //  9: set    pindirs, 0      side 0 [2] 
+    0x7128, // 10: out    x, 8            side 1 [1] 
+    0xa242, // 11: nop                    side 0 [2] 
+    0x1131, // 12: jmp    !x, 17          side 1 [1] 
+    0xa242, // 13: nop                    side 0 [2] 
+    0x11cd, // 14: jmp    pin, 13         side 1 [1] 
+    0x4201, // 15: in     pins, 1         side 0 [2] 
+    0x114f, // 16: jmp    x--, 15         side 1 [1] 
+    0x8220, // 17: push   block           side 0 [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_cmd_clk_program = {
+    .instructions = sdio_cmd_clk_program_instructions,
+    .length = 18,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_cmd_clk_wrap_target, offset + sdio_cmd_clk_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_rx //
+// ------------ //
+
+#define sdio_data_rx_wrap_target 0
+#define sdio_data_rx_wrap 4
+
+static const uint16_t sdio_data_rx_program_instructions[] = {
+            //     .wrap_target
+    0xa022, //  0: mov    x, y                       
+    0x2020, //  1: wait   0 pin, 0                   
+    0x2492, //  2: wait   1 gpio, 18             [4] 
+    0x4304, //  3: in     pins, 4                [3] 
+    0x0043, //  4: jmp    x--, 3                     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_rx_program = {
+    .instructions = sdio_data_rx_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_rx_wrap_target, offset + sdio_data_rx_wrap);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_tx //
+// ------------ //
+
+#define sdio_data_tx_wrap_target 5
+#define sdio_data_tx_wrap 8
+
+static const uint16_t sdio_data_tx_program_instructions[] = {
+    0x2012, //  0: wait   0 gpio, 18                 
+    0x2592, //  1: wait   1 gpio, 18             [5] 
+    0x6204, //  2: out    pins, 4                [2] 
+    0x0142, //  3: jmp    x--, 2                 [1] 
+    0xe280, //  4: set    pindirs, 0             [2] 
+            //     .wrap_target
+    0x4101, //  5: in     pins, 1                [1] 
+    0x0285, //  6: jmp    y--, 5                 [2] 
+    0x21a0, //  7: wait   1 pin, 0               [1] 
+    0x8220, //  8: push   block                  [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_tx_program = {
+    .instructions = sdio_data_tx_program_instructions,
+    .length = 9,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_tx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_tx_wrap_target, offset + sdio_data_tx_wrap);
+    return c;
+}
+#endif
+

+ 13 - 0
lib/ZuluSCSI_platform_RP2040/scsi2sd_time.h

@@ -0,0 +1,13 @@
+// Timing functions for SCSI2SD.
+// This file is derived from time.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include "ZuluSCSI_platform.h"
+
+#define s2s_getTime_ms() millis()
+#define s2s_elapsedTime_ms(since) ((uint32_t)(millis() - (since)))
+#define s2s_delay_ms(x) delay_ns(x * 1000000)
+#define s2s_delay_us(x) delay_ns(x * 1000)
+#define s2s_delay_ns(x) delay_ns(x)

+ 336 - 0
lib/ZuluSCSI_platform_RP2040/scsiPhy.cpp

@@ -0,0 +1,336 @@
+// Implements the low level interface to SCSI bus
+// Partially derived from scsiPhy.c from SCSI2SD-V6
+
+#include "scsiPhy.h"
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "ZuluSCSI_log_trace.h"
+#include "ZuluSCSI_config.h"
+#include "scsi_accel_rp2040.h"
+
+#include <scsi2sd.h>
+extern "C" {
+#include <scsi.h>
+#include <scsi2sd_time.h>
+}
+
+/***********************/
+/* SCSI status signals */
+/***********************/
+
+extern "C" bool scsiStatusATN()
+{
+    return SCSI_IN(ATN);
+}
+
+extern "C" bool scsiStatusBSY()
+{
+    return SCSI_IN(BSY);
+}
+
+/************************/
+/* SCSI selection logic */
+/************************/
+
+volatile uint8_t g_scsi_sts_selection;
+volatile uint8_t g_scsi_ctrl_bsy;
+
+void scsi_bsy_deassert_interrupt()
+{
+    if (SCSI_IN(SEL) && !SCSI_IN(BSY))
+    {
+        // Check if any of the targets we simulate is selected
+        uint8_t sel_bits = SCSI_IN_DATA();
+        int sel_id = -1;
+        for (int i = 0; i < S2S_MAX_TARGETS; i++)
+        {
+            if (scsiDev.targets[i].targetId <= 7 && scsiDev.targets[i].cfg)
+            {
+                if (sel_bits & (1 << scsiDev.targets[i].targetId))
+                {
+                    sel_id = scsiDev.targets[i].targetId;
+                    break;
+                }
+            }
+        }
+
+        if (sel_id >= 0)
+        {
+            uint8_t atn_flag = SCSI_IN(ATN) ? SCSI_STS_SELECTION_ATN : 0;
+            g_scsi_sts_selection = SCSI_STS_SELECTION_SUCCEEDED | atn_flag | sel_id;
+        }
+
+        // selFlag is required for Philips P2000C which releases it after 600ns
+        // without waiting for BSY.
+        // Also required for some early Mac Plus roms
+        scsiDev.selFlag = *SCSI_STS_SELECTED;
+    }
+}
+
+extern "C" bool scsiStatusSEL()
+{
+    if (g_scsi_ctrl_bsy)
+    {
+        // We don't have direct register access to BSY bit like SCSI2SD scsi.c expects.
+        // Instead update the state here.
+        // Releasing happens with bus release.
+        g_scsi_ctrl_bsy = 0;
+        SCSI_OUT(BSY, 1);
+
+        // On RP2040 hardware the ATN signal is only available after OUT_BSY enables
+        // the IO buffer U105, so check the signal status here.
+        delay_100ns();
+        scsiDev.atnFlag |= scsiStatusATN();
+    }
+
+    return SCSI_IN(SEL);
+}
+
+/************************/
+/* SCSI bus reset logic */
+/************************/
+
+static void scsi_rst_assert_interrupt()
+{
+    // Glitch filtering
+    bool rst1 = SCSI_IN(RST);
+    delay_ns(500);
+    bool rst2 = SCSI_IN(RST);
+
+    if (rst1 && rst2)
+    {
+        azdbg("BUS RESET");
+        scsiDev.resetFlag = 1;
+    }
+}
+
+static void scsiPhyIRQ(uint gpio, uint32_t events)
+{
+    if (gpio == SCSI_IN_BSY)
+    {
+        // Note BSY interrupts only when we are not driving OUT_BSY low ourselves.
+        // The BSY input pin may be shared with other signals.
+        if (sio_hw->gpio_out & (1 << SCSI_OUT_BSY))
+        {
+            scsi_bsy_deassert_interrupt();
+        }
+    }
+    else if (gpio == SCSI_IN_RST)
+    {
+        scsi_rst_assert_interrupt();
+    }
+}
+
+// This function is called to initialize the phy code.
+// It is called after power-on and after SCSI bus reset.
+extern "C" void scsiPhyReset(void)
+{
+    SCSI_RELEASE_OUTPUTS();
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+
+    scsi_accel_rp2040_init();
+
+    // Enable BSY and RST interrupts
+    // Note: RP2040 library currently supports only one callback,
+    // so it has to be same for both pins.
+    gpio_set_irq_enabled_with_callback(SCSI_IN_BSY, GPIO_IRQ_EDGE_RISE, true, scsiPhyIRQ);
+    gpio_set_irq_enabled_with_callback(SCSI_IN_RST, GPIO_IRQ_EDGE_FALL, true, scsiPhyIRQ);
+}
+
+/************************/
+/* SCSI bus phase logic */
+/************************/
+
+static SCSI_PHASE g_scsi_phase;
+
+extern "C" void scsiEnterPhase(int phase)
+{
+    int delay = scsiEnterPhaseImmediate(phase);
+    if (delay > 0)
+    {
+        s2s_delay_ns(delay);
+    }
+}
+
+// Change state and return nanosecond delay to wait
+extern "C" uint32_t scsiEnterPhaseImmediate(int phase)
+{
+    // ANSI INCITS 362-2002 SPI-3 10.7.1:
+    // Phase changes are not allowed while REQ or ACK is asserted.
+    while (likely(!scsiDev.resetFlag) && SCSI_IN(ACK)) {}
+
+    if (phase != g_scsi_phase)
+    {
+        int oldphase = g_scsi_phase;
+        g_scsi_phase = (SCSI_PHASE)phase;
+        scsiLogPhaseChange(phase);
+
+        if (phase < 0)
+        {
+            // Other communication on bus or reset state
+            SCSI_RELEASE_OUTPUTS();
+            return 0;
+        }
+        else
+        {
+            SCSI_OUT(MSG, phase & __scsiphase_msg);
+            SCSI_OUT(CD,  phase & __scsiphase_cd);
+            SCSI_OUT(IO,  phase & __scsiphase_io);
+            SCSI_ENABLE_CONTROL_OUT();
+
+            int delayNs = 400; // Bus settle delay
+            if ((oldphase & __scsiphase_io) != (phase & __scsiphase_io))
+            {
+                delayNs += 400; // Data release delay
+            }
+
+            if (scsiDev.compatMode < COMPAT_SCSI2)
+            {
+                // EMU EMAX needs 100uS ! 10uS is not enough.
+                delayNs += 100000;
+            }
+
+            return delayNs;
+        }
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+// Release all signals
+void scsiEnterBusFree(void)
+{
+    g_scsi_phase = BUS_FREE;
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+    scsiDev.cdbLen = 0;
+
+    SCSI_RELEASE_OUTPUTS();
+}
+
+/********************/
+/* Transmit to host */
+/********************/
+
+#define SCSI_WAIT_ACTIVE(pin) \
+  if (!SCSI_IN(pin)) { \
+    if (!SCSI_IN(pin)) { \
+      while(!SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+#define SCSI_WAIT_INACTIVE(pin) \
+  if (SCSI_IN(pin)) { \
+    if (SCSI_IN(pin)) { \
+      while(SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+// Write one byte to SCSI host using the handshake mechanism
+static inline void scsiWriteOneByte(uint8_t value)
+{
+    SCSI_OUT_DATA(value);
+    delay_100ns(); // DB setup time before REQ
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE(ACK);
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_WAIT_INACTIVE(ACK);
+}
+
+extern "C" void scsiWriteByte(uint8_t value)
+{
+    scsiLogDataIn(&value, 1);
+    scsiWriteOneByte(value);
+}
+
+extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
+{
+    scsiStartWrite(data, count);
+    scsiFinishWrite();
+}
+
+extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
+{
+    scsiLogDataIn(data, count);
+
+    if ((count & 1) != 0)
+    {
+        // Unaligned write, do it byte-by-byte
+        scsiFinishWrite();
+        for (uint32_t i = 0; i < count; i++)
+        {
+            if (scsiDev.resetFlag) break;
+            scsiWriteOneByte(data[i]);
+        }
+    }
+    else
+    {
+        // Use accelerated routine
+        scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
+    }
+}
+
+extern "C" bool scsiIsWriteFinished(const uint8_t *data)
+{
+    return scsi_accel_rp2040_isWriteFinished(data);
+}
+
+extern "C" void scsiFinishWrite()
+{
+    scsi_accel_rp2040_finishWrite(&scsiDev.resetFlag);
+}
+
+/*********************/
+/* Receive from host */
+/*********************/
+
+// Read one byte from SCSI host using the handshake mechanism.
+static inline uint8_t scsiReadOneByte(int* parityError)
+{
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE(ACK);
+    delay_100ns();
+    uint16_t r = SCSI_IN_DATA();
+    SCSI_OUT(REQ, 0);
+    SCSI_WAIT_INACTIVE(ACK);
+
+    if (parityError && r != (g_scsi_parity_lookup[r & 0xFF] ^ SCSI_IO_DATA_MASK))
+    {
+        azlog("Parity error in scsiReadOneByte(): ", (uint32_t)r);
+        *parityError = 1;
+    }
+
+    return (uint8_t)r;
+}
+
+extern "C" uint8_t scsiReadByte(void)
+{
+    uint8_t r = scsiReadOneByte(NULL);
+    scsiLogDataOut(&r, 1);
+    return r;
+}
+
+extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
+{
+    *parityError = 0;
+
+    if ((count & 1) != 0)
+    {
+        // Unaligned transfer, do byte by byte
+        for (uint32_t i = 0; i < count; i++)
+        {
+            if (scsiDev.resetFlag) break;
+            data[i] = scsiReadOneByte(parityError);
+        }
+    }
+    else
+    {
+        // Use accelerated routine
+        scsi_accel_rp2040_read(data, count, parityError, &scsiDev.resetFlag);
+    }
+
+    scsiLogDataOut(data, count);
+}

+ 67 - 0
lib/ZuluSCSI_platform_RP2040/scsiPhy.h

@@ -0,0 +1,67 @@
+// Interface to SCSI physical interface.
+// This file is derived from scsiPhy.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Read SCSI status signals
+bool scsiStatusATN();
+bool scsiStatusBSY();
+bool scsiStatusSEL();
+
+// Parity not yet implemented
+#define scsiParityError() 0
+
+// Get SCSI selection status.
+// This is latched by interrupt when BSY is deasserted while SEL is asserted.
+// Lowest 3 bits are the selected target id.
+// Highest bits are status information.
+#define SCSI_STS_SELECTION_SUCCEEDED 0x40
+#define SCSI_STS_SELECTION_ATN 0x80
+extern volatile uint8_t g_scsi_sts_selection;
+#define SCSI_STS_SELECTED (&g_scsi_sts_selection)
+extern volatile uint8_t g_scsi_ctrl_bsy;
+#define SCSI_CTRL_BSY (&g_scsi_ctrl_bsy)
+
+// Called when SCSI RST signal has been asserted, should release bus.
+void scsiPhyReset(void);
+
+// Change MSG / CD / IO signal states and wait for necessary transition time.
+// Phase argument is one of SCSI_PHASE enum values.
+void scsiEnterPhase(int phase);
+
+// Change state and return nanosecond delay to wait
+uint32_t scsiEnterPhaseImmediate(int phase);
+
+// Release all signals
+void scsiEnterBusFree(void);
+
+// Blocking data transfer
+void scsiWrite(const uint8_t* data, uint32_t count);
+void scsiRead(uint8_t* data, uint32_t count, int* parityError);
+void scsiWriteByte(uint8_t value);
+uint8_t scsiReadByte(void);
+
+// Non-blocking data transfer.
+// Depending on platform support the start() function may block.
+// The start function can be called multiple times, it may internally
+// either combine transfers or block until previous transfer completes.
+void scsiStartWrite(const uint8_t* data, uint32_t count);
+void scsiFinishWrite();
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsiIsWriteFinished(const uint8_t *data);
+
+
+#define s2s_getScsiRateKBs() 0
+
+#ifdef __cplusplus
+}
+#endif

+ 46 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio

@@ -0,0 +1,46 @@
+; RP2040 PIO program for accelerating SCSI communication
+; Run "pioasm scsi_accel.pio scsi_accel.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is REQ pin
+
+.define REQ 9
+.define ACK 10
+
+; Delay from data setup to REQ assertion.
+; deskew delay + cable skew delay = 55 ns minimum
+; One clock cycle is 8 ns => delay 7 clocks
+.define REQ_DLY 7
+
+; Write to SCSI bus using asynchronous handshake.
+; Data is written as 16-bit words that contain the 8 data bits + 1 parity bit.
+; 7 bits in each word are discarded.
+; Number of bytes to send must be multiple of 2.
+.program scsi_accel_async_write
+    .side_set 1
+
+    pull ifempty block          side 1  ; Get data from TX FIFO
+    out pins, 9                 side 1  ; Write data and parity bit
+    out null, 7 [REQ_DLY-2]     side 1  ; Discard unused bits, wait for data preset time
+    wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+
+; Read from SCSI bus using asynchronous handshake.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_accel_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 1 gpio ACK             side 1  ; Wait for ACK high
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+    in pins, 9                  side 1  ; Deassert REQ, read GPIO
+    in null, 7                  side 1  ; Padding bits
+    push iffull block           side 1  ; Put data to RX FIFO
+    jmp x-- start               side 1  ; Decrement byte count and jump to start
+

+ 77 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio.h

@@ -0,0 +1,77 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ---------------------- //
+// scsi_accel_async_write //
+// ---------------------- //
+
+#define scsi_accel_async_write_wrap_target 0
+#define scsi_accel_async_write_wrap 4
+
+static const uint16_t scsi_accel_async_write_program_instructions[] = {
+            //     .wrap_target
+    0x90e0, //  0: pull   ifempty block   side 1     
+    0x7009, //  1: out    pins, 9         side 1     
+    0x7567, //  2: out    null, 7         side 1 [5] 
+    0x308a, //  3: wait   1 gpio, 10      side 1     
+    0x200a, //  4: wait   0 gpio, 10      side 0     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_write_program = {
+    .instructions = scsi_accel_async_write_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_write_wrap_target, offset + scsi_accel_async_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------------- //
+// scsi_accel_async_read //
+// --------------------- //
+
+#define scsi_accel_async_read_wrap_target 0
+#define scsi_accel_async_read_wrap 7
+
+static const uint16_t scsi_accel_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x308a, //  2: wait   1 gpio, 10      side 1     
+    0x200a, //  3: wait   0 gpio, 10      side 0     
+    0x5009, //  4: in     pins, 9         side 1     
+    0x5067, //  5: in     null, 7         side 1     
+    0x9060, //  6: push   iffull block    side 1     
+    0x1042, //  7: jmp    x--, 2          side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_read_program = {
+    .instructions = scsi_accel_async_read_program_instructions,
+    .length = 8,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_read_wrap_target, offset + scsi_accel_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+

+ 464 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_rp2040.cpp

@@ -0,0 +1,464 @@
+/* Data flow in SCSI acceleration:
+ *
+ * 1. Application provides a buffer of bytes to send.
+ * 2. Code in this module adds parity bit to the bytes and packs two bytes into 32 bit words.
+ * 3. DMA controller copies the words to PIO peripheral FIFO.
+ * 4. PIO peripheral handles low-level SCSI handshake and writes bytes and parity to GPIO.
+ */
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "scsi_accel_rp2040.h"
+#include "scsi_accel.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+
+#define SCSI_DMA_PIO pio0
+#define SCSI_DMA_SM 0
+#define SCSI_DMA_CH 0
+
+enum scsidma_buf_sel_t { SCSIBUF_NONE = 0, SCSIBUF_A = 1, SCSIBUF_B = 2 };
+
+#define DMA_BUF_SIZE 128
+static struct {
+    uint8_t *app_buf; // Buffer provided by application
+    uint32_t app_bytes; // Bytes available in application buffer
+    uint32_t dma_bytes; // Bytes that have been converted to DMA buffer so far
+    
+    uint8_t *next_app_buf; // Next buffer from application after current one finishes
+    uint32_t next_app_bytes; // Bytes in next buffer
+
+    // PIO configurations
+    uint32_t pio_offset_async_write;
+    uint32_t pio_offset_async_read;
+    pio_sm_config pio_cfg_async_write;
+    pio_sm_config pio_cfg_async_read;
+
+    // DMA configurations
+    dma_channel_config dma_write_config;
+
+    // We use two DMA buffers alternatively
+    // The buffer contains the data bytes with parity added.
+    scsidma_buf_sel_t dma_current_buf;
+    uint32_t dma_countA;
+    uint32_t dma_countB;
+    uint32_t dma_bufA[DMA_BUF_SIZE];
+    uint32_t dma_bufB[DMA_BUF_SIZE];
+} g_scsi_dma;
+
+enum scsidma_state_t { SCSIDMA_IDLE = 0,
+                       SCSIDMA_WRITE, SCSIDMA_WRITE_DONE,
+                       SCSIDMA_READ };
+static volatile scsidma_state_t g_scsi_dma_state;
+static bool g_channels_claimed = false;
+
+// Fill DMA buffer and return number of words ready to be transferred
+static uint32_t refill_dmabuf(uint32_t *buf)
+{
+    if (g_scsi_dma.app_bytes == 0 && g_scsi_dma.next_app_bytes > 0)
+    {
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+
+    uint32_t count = (g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes) / 2;
+    if (count > DMA_BUF_SIZE) count = DMA_BUF_SIZE;
+
+    uint16_t *src = (uint16_t*)&g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
+    uint16_t *end = src + count;
+    uint32_t *dst = buf;
+    while (src < end)
+    {
+        uint16_t input = *src++;
+        *dst++ = (g_scsi_parity_lookup[input & 0xFF])
+               | ((g_scsi_parity_lookup[input >> 8]) << 16);
+    }
+
+    g_scsi_dma.dma_bytes += count * 2;
+
+    // Check if this buffer has been fully processed
+    if (g_scsi_dma.dma_bytes >= g_scsi_dma.app_bytes)
+    {
+        assert(g_scsi_dma.dma_bytes == g_scsi_dma.app_bytes);
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+
+    return count;
+}
+
+// Select GPIO from PIO peripheral or from software controlled SIO
+static void scsidma_config_gpio()
+{
+    if (g_scsi_dma_state == SCSIDMA_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        // Make sure the initial state of all pins is high and output
+        pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DMA_SM, 0x3FF);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 0, 10, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_READ)
+    {
+        // Data bus as input, REQ pin as output
+        pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DMA_SM, 0x3FF);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 0, 9, false);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 9, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+static void start_dma_write()
+{
+    // Prefill both DMA buffers
+    g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+    g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+    
+    // Start DMA from buffer A
+    g_scsi_dma.dma_current_buf = SCSIBUF_A;
+    dma_channel_configure(SCSI_DMA_CH,
+        &g_scsi_dma.dma_write_config,
+        &SCSI_DMA_PIO->txf[SCSI_DMA_SM],
+        g_scsi_dma.dma_bufA,
+        g_scsi_dma.dma_countA,
+        true
+    );
+}
+
+static void scsi_dma_write_irq()
+{
+    dma_hw->ints0 = 1 << SCSI_DMA_CH;
+
+    if (g_scsi_dma.dma_current_buf == SCSIBUF_A)
+    {
+        // Transfer from buffer A finished
+        g_scsi_dma.dma_countA = 0;
+        g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+
+        if (g_scsi_dma.dma_countB != 0)
+        {
+            // Start transferring buffer B immediately
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countB, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufB, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_B;
+
+            // Refill buffer A for next time
+            g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+        }
+    }
+    else
+    {
+        // Transfer from buffer B finished
+        g_scsi_dma.dma_countB = 0;
+        g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+
+        if (g_scsi_dma.dma_countA != 0)
+        {
+            // Start transferring buffer A immediately
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countA, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufA, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_A;
+
+            // Refill buffer B for next time
+            g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+        }
+    }
+
+    if (g_scsi_dma.dma_current_buf == SCSIBUF_NONE)
+    {
+        // Both buffers are empty, check if we have more data
+        g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+
+        if (g_scsi_dma.dma_countA == 0)
+        {
+            // End of data for DMA, but PIO may still have bytes in its buffer
+            g_scsi_dma_state = SCSIDMA_WRITE_DONE;
+        }
+        else
+        {
+            // Start transfer from buffer A
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countA, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufA, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_A;
+
+            // Refill B for the next interrupt
+            g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+        }
+    }
+}
+
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag)
+{
+    // Number of bytes should always be divisible by 2.
+    assert((count & 1) == 0);
+
+    __disable_irq();
+    if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        if (!g_scsi_dma.next_app_buf && data == g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+        {
+            // Combine with currently running request
+            g_scsi_dma.app_bytes += count;
+            count = 0;
+        }
+        else if (data == g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+        {
+            // Combine with queued request
+            g_scsi_dma.next_app_bytes += count;
+            count = 0;
+        }
+        else if (!g_scsi_dma.next_app_buf)
+        {
+            // Add as queued request
+            g_scsi_dma.next_app_buf = (uint8_t*)data;
+            g_scsi_dma.next_app_bytes = count;
+            count = 0;
+        }
+    }
+    __enable_irq();
+
+    // Check if the request was combined
+    if (count == 0) return;
+
+    if (g_scsi_dma_state != SCSIDMA_IDLE && g_scsi_dma_state != SCSIDMA_WRITE_DONE)
+    {
+        // Wait for previous request to finish
+        scsi_accel_rp2040_finishWrite(resetFlag);
+        if (*resetFlag)
+        {
+            return;
+        }
+    }
+
+    bool must_reconfig_gpio = (g_scsi_dma_state == SCSIDMA_IDLE);
+    g_scsi_dma_state = SCSIDMA_WRITE;
+    g_scsi_dma.app_buf = (uint8_t*)data;
+    g_scsi_dma.app_bytes = count;
+    g_scsi_dma.dma_bytes = 0;
+    g_scsi_dma.next_app_buf = 0;
+    g_scsi_dma.next_app_bytes = 0;
+    g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+    
+    if (must_reconfig_gpio)
+    {
+        SCSI_ENABLE_DATA_OUT();
+        pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SM, g_scsi_dma.pio_offset_async_write, &g_scsi_dma.pio_cfg_async_write);
+        scsidma_config_gpio();
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
+        
+        dma_channel_set_irq0_enabled(SCSI_DMA_CH, true);
+        irq_set_exclusive_handler(DMA_IRQ_0, scsi_dma_write_irq);
+        irq_set_enabled(DMA_IRQ_0, true);
+    }
+
+    start_dma_write();
+}
+
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data)
+{
+    // Check if everything has completed
+    if (g_scsi_dma_state == SCSIDMA_IDLE || g_scsi_dma_state == SCSIDMA_WRITE_DONE)
+    {
+        return true;
+    }
+
+    if (!data)
+        return false;
+    
+    // Check if this data item is still in queue.
+    __disable_irq();
+    bool finished = true;
+    if (data >= g_scsi_dma.app_buf + g_scsi_dma.dma_bytes &&
+        data < g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+    {
+        finished = false; // In current transfer
+    }
+    else if (data >= g_scsi_dma.next_app_buf &&
+             data < g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+    {
+        finished = false; // In queued transfer
+    }
+    __enable_irq();
+
+    return finished;
+}
+
+void scsi_accel_rp2040_stopWrite(volatile int *resetFlag)
+{
+    // Wait for TX fifo to be empty and ACK to go high
+    uint32_t start = millis();
+    while ((!pio_sm_is_tx_fifo_empty(SCSI_DMA_PIO, SCSI_DMA_SM) || SCSI_IN(ACK)) && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            azlog("scsi_accel_rp2040_stopWrite() timeout");
+            *resetFlag = 1;
+            break;
+        }
+    }
+
+    dma_channel_abort(SCSI_DMA_CH);
+    dma_channel_set_irq0_enabled(SCSI_DMA_CH, false);
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, false);
+}
+
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag)
+{
+    uint32_t start = millis();
+    while (g_scsi_dma_state != SCSIDMA_IDLE && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            azlog("scsi_accel_rp2040_finishWrite() timeout");
+            *resetFlag = 1;
+            break;
+        }
+
+        if (g_scsi_dma_state == SCSIDMA_WRITE_DONE)
+        {
+            // DMA done, wait for PIO to finish also and reconfig GPIO.
+            scsi_accel_rp2040_stopWrite(resetFlag);
+        }
+    }
+}
+
+void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // The hardware would support DMA for reading from SCSI bus also, but currently
+    // the rest of the software architecture does not. There is not much benefit
+    // because there isn't much else to do before we get the data from the SCSI bus.
+    //
+    // Currently this method just reads from the PIO RX fifo directly in software loop.
+    
+    g_scsi_dma_state = SCSIDMA_READ;
+    pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SM, g_scsi_dma.pio_offset_async_read, &g_scsi_dma.pio_cfg_async_read);
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
+
+    // Set the number of bytes to read, must be divisible by 2.
+    assert((count & 1) == 0);
+    pio_sm_put(SCSI_DMA_PIO, SCSI_DMA_SM, count - 1);
+
+    // Read results from PIO RX FIFO
+    uint8_t *dst = buf;
+    uint8_t *end = buf + count;
+    uint32_t paritycheck = 0;
+    while (dst < end)
+    {
+        if (*resetFlag)
+        {
+            break;
+        }
+
+        uint32_t available = pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM);
+
+        while (available > 0)
+        {
+            available--;
+            uint32_t word = pio_sm_get(SCSI_DMA_PIO, SCSI_DMA_SM);
+            paritycheck ^= word;
+            word = ~word;
+            *dst++ = word & 0xFF;
+            *dst++ = word >> 16;
+        }
+    }
+
+    // Check parity errors in whole block
+    // This doesn't detect if there is even number of parity errors in block.
+    uint8_t byte0 = ~(paritycheck & 0xFF);
+    uint8_t byte1 = ~(paritycheck >> 16);
+    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    {
+        azlog("Parity error in scsi_accel_rp2040_read(): ", paritycheck);
+        *parityError = 1;
+    }
+
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, false);
+}
+
+void scsi_accel_rp2040_init()
+{
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    scsidma_config_gpio();
+
+    // Mark channels as being in use, unless it has been done already
+    if (!g_channels_claimed)
+    {
+        pio_sm_claim(SCSI_DMA_PIO, SCSI_DMA_SM);
+        dma_channel_claim(SCSI_DMA_CH);
+        g_channels_claimed = true;
+    }
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SCSI_DMA_PIO);
+    
+    // Asynchronous SCSI write
+    g_scsi_dma.pio_offset_async_write = pio_add_program(SCSI_DMA_PIO, &scsi_accel_async_write_program);
+    g_scsi_dma.pio_cfg_async_write = scsi_accel_async_write_program_get_default_config(g_scsi_dma.pio_offset_async_write);
+    sm_config_set_out_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_IO_DB0, 9);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_OUT_REQ);
+    sm_config_set_fifo_join(&g_scsi_dma.pio_cfg_async_write, PIO_FIFO_JOIN_TX);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_async_write, true, false, 32);
+
+    // Asynchronous SCSI read
+    g_scsi_dma.pio_offset_async_read = pio_add_program(SCSI_DMA_PIO, &scsi_accel_async_read_program);
+    g_scsi_dma.pio_cfg_async_read = scsi_accel_async_read_program_get_default_config(g_scsi_dma.pio_offset_async_read);
+    sm_config_set_in_pins(&g_scsi_dma.pio_cfg_async_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_async_read, SCSI_OUT_REQ);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_async_write, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_async_read, true, true, 32);
+
+    // Create DMA channel configuration so it can be applied quickly later
+    dma_channel_config cfg = dma_channel_get_default_config(SCSI_DMA_CH);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, true);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DMA_SM, true));
+    g_scsi_dma.dma_write_config = cfg;
+}

+ 17 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_rp2040.h

@@ -0,0 +1,17 @@
+// Accelerated SCSI subroutines using RP2040 hardware PIO peripheral.
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_rp2040_init();
+
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag);
+void scsi_accel_rp2040_stopWrite(volatile int *resetFlag);
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag);
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data);
+
+void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);

+ 476 - 0
lib/ZuluSCSI_platform_RP2040/sd_card_sdio.cpp

@@ -0,0 +1,476 @@
+// Driver for accessing SD card in SDIO mode on RP2040.
+
+#include "ZuluSCSI_platform.h"
+
+#ifdef SD_USE_SDIO
+
+#include "ZuluSCSI_log.h"
+#include "rp2040_sdio.h"
+#include <hardware/gpio.h>
+#include <SdFat.h>
+#include <SdCard/SdCardInfo.h>
+
+static uint32_t g_sdio_ocr; // Operating condition register from card
+static uint32_t g_sdio_rca; // Relative card address
+static cid_t g_sdio_cid;
+static int g_sdio_error_line;
+static sdio_status_t g_sdio_error;
+static uint32_t g_sdio_dma_buf[128];
+
+#define checkReturnOk(call) ((g_sdio_error = (call)) == SDIO_OK ? true : logSDError(__LINE__))
+static bool logSDError(int line)
+{
+    g_sdio_error_line = line;
+    azlog("SDIO SD card error on line ", line, ", error code ", (int)g_sdio_error);
+    return false;
+}
+
+// Callback used by SCSI code for simultaneous processing
+static sd_callback_t m_stream_callback;
+static const uint8_t *m_stream_buffer;
+static uint32_t m_stream_count;
+static uint32_t m_stream_count_start;
+
+void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+    m_stream_callback = func;
+    m_stream_buffer = buffer;
+    m_stream_count = 0;
+    m_stream_count_start = 0;
+}
+
+static sd_callback_t get_stream_callback(const uint8_t *buf, uint32_t count)
+{
+    m_stream_count_start = m_stream_count;
+
+    if (m_stream_callback)
+    {
+        if (buf == m_stream_buffer + m_stream_count)
+        {
+            m_stream_count += count;
+            return m_stream_callback;
+        }
+        else
+        {
+            azdbg("Stream buffer mismatch: ", (uint32_t)buf, " vs. ", (uint32_t)(m_stream_buffer + m_stream_count));
+            return NULL;
+        }
+    }
+    
+    return NULL;
+}
+
+bool SdioCard::begin(SdioConfig sdioConfig)
+{
+    uint32_t reply;
+    sdio_status_t status;
+    
+    // Initialize at 1 MHz clock speed
+    rp2040_sdio_init(25);
+
+    // Establish initial connection with the card
+    for (int retries = 0; retries < 5; retries++)
+    {
+        delayMicroseconds(1000);
+        reply = 0;
+        rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
+        status = rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
+
+        if (status == SDIO_OK && reply == 0x1AA)
+        {
+            break;
+        }
+    }
+
+    if (reply != 0x1AA || status != SDIO_OK)
+    {
+        azdbg("SDIO not responding to CMD8 SEND_IF_COND, status ", (int)status, " reply ", reply);
+        return false;
+    }
+
+    // Send ACMD41 to begin card initialization and wait for it to complete
+    uint32_t start = millis();
+    do {
+        if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, 0, &reply)) || // APP_CMD
+            !checkReturnOk(rp2040_sdio_command_R3(ACMD41, 0xD0040000, &g_sdio_ocr))) // 3.0V voltage
+            // !checkReturnOk(rp2040_sdio_command_R1(ACMD41, 0xC0100000, &g_sdio_ocr)))
+        {
+            return false;
+        }
+
+        if ((uint32_t)(millis() - start) > 1000)
+        {
+            azlog("SDIO card initialization timeout");
+            return false;
+        }
+    } while (!(g_sdio_ocr & (1 << 31)));
+
+    // Get CID
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD2, 0, (uint8_t*)&g_sdio_cid)))
+    {
+        azdbg("SDIO failed to read CID");
+        return false;
+    }
+
+    // Get relative card address
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD3, 0, &g_sdio_rca)))
+    {
+        azdbg("SDIO failed to get RCA");
+        return false;
+    }
+
+    // Select card
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD7, g_sdio_rca, &reply)))
+    {
+        azdbg("SDIO failed to select card");
+        return false;
+    }
+
+    // Set 4-bit bus mode
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) ||
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD6, 2, &reply)))
+    {
+        azdbg("SDIO failed to set bus width");
+        return false;
+    }
+
+    // Increase to 25 MHz clock rate
+    rp2040_sdio_init(1);
+
+    return true;
+}
+
+uint8_t SdioCard::errorCode() const
+{
+    return g_sdio_error;
+}
+
+uint32_t SdioCard::errorData() const
+{
+    return 0;
+}
+
+uint32_t SdioCard::errorLine() const
+{
+    return g_sdio_error_line;
+}
+
+bool SdioCard::isBusy() 
+{
+    return (sio_hw->gpio_in & (1 << SDIO_D0)) == 0;
+}
+
+uint32_t SdioCard::kHzSdClk()
+{
+    return 0;
+}
+
+bool SdioCard::readCID(cid_t* cid)
+{
+    *cid = g_sdio_cid;
+    return true;
+}
+
+bool SdioCard::readCSD(csd_t* csd)
+{
+    return checkReturnOk(rp2040_sdio_command_R2(CMD9, g_sdio_rca, (uint8_t*)csd)); // SEND_CSD
+}
+
+bool SdioCard::readOCR(uint32_t* ocr)
+{
+    // SDIO mode does not have CMD58, but main program uses this to
+    // poll for card presence. Return status register instead.
+    return checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, ocr));
+}
+
+bool SdioCard::readData(uint8_t* dst)
+{
+    azlog("SdioCard::readData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStart(uint32_t sector)
+{
+    azlog("SdioCard::readStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStop()
+{
+    azlog("SdioCard::readStop() called but not implemented!");
+    return false;
+}
+
+uint32_t SdioCard::sectorCount()
+{
+    csd_t csd;
+    readCSD(&csd);
+    return sdCardCapacity(&csd);
+}
+
+uint32_t SdioCard::status()
+{
+    uint32_t reply;
+    if (checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, &reply)))
+        return reply;
+    else
+        return 0;
+}
+
+bool SdioCard::stopTransmission(bool blocking)
+{
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)))
+    {
+        return false;
+    }
+
+    if (!blocking)
+    {
+        return true;
+    }
+    else
+    {
+        uint32_t end = millis() + 100;
+        while (millis() < end && isBusy())
+        {
+            if (m_stream_callback)
+            {
+                m_stream_callback(m_stream_count);
+            }
+        }
+        if (isBusy())
+        {
+            azlog("SdioCard::stopTransmission() timeout");
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+}
+
+bool SdioCard::syncDevice()
+{
+    return true;
+}
+
+uint8_t SdioCard::type() const
+{
+    if (g_sdio_ocr & (1 << 30))
+        return SD_CARD_TYPE_SDHC;
+    else
+        return SD_CARD_TYPE_SD2;
+}
+
+bool SdioCard::writeData(const uint8_t* src)
+{
+    azlog("SdioCard::writeData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStart(uint32_t sector)
+{
+    azlog("SdioCard::writeStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStop()
+{
+    azlog("SdioCard::writeStop() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::erase(uint32_t firstSector, uint32_t lastSector)
+{
+    return false;
+    // return checkReturnOk(sd_erase(firstSector * 512, lastSector * 512));
+}
+
+/* Writing and reading, with progress callback */
+
+bool SdioCard::writeSector(uint32_t sector, const uint8_t* src)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data to a temporary buffer.
+        memcpy(g_sdio_dma_buf, src, sizeof(g_sdio_dma_buf));
+        src = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    // If possible, report transfer status to application through callback.
+    sd_callback_t callback = get_stream_callback(src, 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD24, sector, &reply)) || // WRITE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, 1))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azlog("SdioCard::writeSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::writeSectors(uint32_t sector, const uint8_t* src, size_t n)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Unaligned write, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!writeSector(sector + i, src + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(src, n * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) || // APP_CMD
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD23, n, &reply)) || // SET_WR_CLK_ERASE_COUNT
+        !checkReturnOk(rp2040_sdio_command_R1(CMD25, sector, &reply)) || // WRITE_MULTIPLE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, n))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azlog("SdioCard::writeSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        return stopTransmission(true);
+    }
+}
+
+bool SdioCard::readSector(uint32_t sector, uint8_t* dst)
+{
+    uint8_t *real_dst = dst;
+    if (((uint32_t)dst & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data from a temporary buffer.
+        dst = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD17, sector, &reply))) // READ_SINGLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azlog("SdioCard::readSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    if (dst != real_dst)
+    {
+        memcpy(real_dst, g_sdio_dma_buf, sizeof(g_sdio_dma_buf));
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::readSectors(uint32_t sector, uint8_t* dst, size_t n)
+{
+    if (((uint32_t)dst & 3) != 0)
+    {
+        // Unaligned read, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!readSector(sector + i, dst + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, n * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD18, sector, &reply))) // READ_MULTIPLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azlog("SdioCard::readSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        return stopTransmission(true);
+    }
+}
+
+// These functions are not used for SDIO mode but are needed to avoid build error.
+void sdCsInit(SdCsPin_t pin) {}
+void sdCsWrite(SdCsPin_t pin, bool level) {}
+
+// SDIO configuration for main program
+SdioConfig g_sd_sdio_config(DMA_SDIO);
+
+#endif

+ 82 - 0
lib/ZuluSCSI_platform_RP2040/sd_card_spi.cpp

@@ -0,0 +1,82 @@
+// Driver and interface for accessing SD card in SPI mode
+
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include <hardware/spi.h>
+#include <SdFat.h>
+
+#ifndef SD_USE_SDIO
+
+class RP2040SPIDriver : public SdSpiBaseClass
+{
+public:
+    void begin(SdSpiConfig config) {
+    }
+
+    void activate() {
+        _spi_init(SD_SPI, m_sckfreq);
+        spi_set_format(SD_SPI, 8, SPI_CPOL_0, SPI_CPHA_0, SPI_MSB_FIRST);
+    }
+
+    void deactivate() {
+    }
+
+    void wait_idle() {
+        while (!(spi_get_hw(SD_SPI)->sr & SPI_SSPSR_TFE_BITS));
+        while (spi_get_hw(SD_SPI)->sr & SPI_SSPSR_BSY_BITS);
+    }
+
+    // Single byte receive
+    uint8_t receive() {
+        uint8_t tx = 0xFF;
+        uint8_t rx;
+        spi_write_read_blocking(SD_SPI, &tx, &rx, 1);
+        return rx;
+    }
+
+    // Single byte send
+    void send(uint8_t data) {
+        spi_write_blocking(SD_SPI, &data, 1);
+        wait_idle();
+    }
+
+    // Multiple byte receive
+    uint8_t receive(uint8_t* buf, size_t count)
+    {
+        spi_read_blocking(SD_SPI, 0xFF, buf, count);
+        return 0;
+    }
+
+    // Multiple byte send
+    void send(const uint8_t* buf, size_t count) {
+        spi_write_blocking(SD_SPI, buf, count);
+    }
+
+    void setSckSpeed(uint32_t maxSck) {
+        m_sckfreq = maxSck;
+    }
+
+private:
+    uint32_t m_sckfreq;
+};
+
+void sdCsInit(SdCsPin_t pin)
+{
+}
+
+void sdCsWrite(SdCsPin_t pin, bool level)
+{
+    if (level)
+        sio_hw->gpio_set = (1 << SD_SPI_CS);
+    else
+        sio_hw->gpio_clr = (1 << SD_SPI_CS);
+}
+
+RP2040SPIDriver g_sd_spi_port;
+SdSpiConfig g_sd_spi_config(0, DEDICATED_SPI, SD_SCK_MHZ(25), &g_sd_spi_port);
+
+void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+}
+
+#endif

+ 21 - 1
platformio.ini

@@ -1,7 +1,7 @@
 ; PlatformIO Project Configuration File https://docs.platformio.org/page/projectconf.html
 
 [platformio]
-default_envs = ZuluSCSIv1_0, ZuluSCSIv1_1
+default_envs = ZuluSCSIv1_0, ZuluSCSIv1_1, ZuluSCSI_RP2040_v2_0
 
 ; Example platform to serve as a base for porting efforts
 [env:template]
@@ -61,3 +61,23 @@ build_flags =
      -DENABLE_DEDICATED_SPI=1
      -DHAS_SDIO_CLASS
      -DZULUSCSI_V1_1
+
+; ZuluSCSI v2.0 hardware platform, based on RP2040
+[env:ZuluSCSI_RP2040_v2_0]
+platform = raspberrypi
+framework = arduino
+board = ZuluSCSI_RP2040
+board_build.ldscript = lib/ZuluSCSI_platform_RP2040/rp2040.ld
+lib_deps =
+    SdFat=https://github.com/greiman/SdFat#2.1.2
+    minIni
+    ZuluSCSI_platform_RP2040
+    SCSI2SD
+build_flags =
+    -Os -Isrc -ggdb -g3
+    -Wall -Wno-sign-compare -Wno-ignored-qualifiers
+    -DSPI_DRIVER_SELECT=3
+    -DSD_CHIP_SELECT_MODE=2
+    -DENABLE_DEDICATED_SPI=1
+    -DHAS_SDIO_CLASS
+    -DUSE_ARDUINO=1

+ 2 - 2
src/ZuluSCSI_config.h

@@ -4,8 +4,8 @@
 #pragma once
 
 // Use variables for version number
-#define FW_VER_NUM      "1.0.7"
-#define FW_VER_SUFFIX   "release"
+#define FW_VER_NUM      "1.0.8"
+#define FW_VER_SUFFIX   "rc1"
 #define ZULU_FW_VERSION FW_VER_NUM "-" FW_VER_SUFFIX
 
 // Configuration and log file paths

+ 87 - 0
utils/random_tester.py

@@ -0,0 +1,87 @@
+#!/usr/bin/python3
+
+'''This script executes random-sized reads and writes to one or more block devices to test them.
+It will destroy the contents of the block device.'''
+
+import sys
+import os
+import mmap
+import random
+import time
+
+class BlockDevice:
+    def __init__(self, path, sectorsize = 512):
+        self.path = path
+        self.dev = os.fdopen(os.open(path, os.O_RDWR | os.O_DIRECT | os.O_SYNC), "rb+", 0)
+        self.sectorsize = sectorsize
+
+    def write_block(self, first_sector, sector_count, seed):
+        rnd = random.Random(seed)
+        buffer = mmap.mmap(-1, sector_count * self.sectorsize)
+        buffer.write(rnd.randbytes(sector_count * self.sectorsize))
+        
+        start = time.time()
+        self.dev.seek(first_sector * self.sectorsize)
+        self.dev.write(buffer)
+        elapsed = time.time() - start
+        speed = sector_count * self.sectorsize / elapsed / 1e6
+
+        print("Wrote  %16s, %8d, %8d, %8d, %8.3f MB/s" % (self.path, first_sector, sector_count, seed, speed))
+
+    def verify_block(self, first_sector, sector_count, seed):
+        rnd = random.Random(seed)
+        buffer = mmap.mmap(-1, sector_count * self.sectorsize)
+
+        start = time.time()
+        self.dev.seek(first_sector * self.sectorsize)
+        self.dev.readinto(buffer)
+        elapsed = time.time() - start
+        speed = sector_count * self.sectorsize / elapsed / 1e6
+
+        print("Verify %16s, %8d, %8d, %8d, %8.3f MB/s" % (self.path, first_sector, sector_count, seed, speed))
+
+        buffer.seek(0)
+        actual = buffer.read(sector_count * self.sectorsize)
+        expected = rnd.randbytes(sector_count * self.sectorsize)
+        if expected != actual:
+            print("Compare error, device = %s, sectorsize = %d, first_sector = %d, sector_count = %d, seed = %d"
+                % (self.path, self.sectorsize, first_sector, sector_count, seed))
+            fname = "%d" % time.time()
+            open(fname + ".expected", "wb").write(expected)
+            open(fname + ".actual", "wb").write(actual)
+            print("Saved data to %s.expected/actual" % fname)
+            raise Exception("Compare error")
+
+if __name__ == "__main__":
+    blockdevs = []
+    for path in sys.argv[1:]:
+        sectorsize = 512
+        if ':' in path:
+            path, sectorsize = path.split(':')
+            sectorsize = int(sectorsize)
+        blockdevs.append(BlockDevice(path, sectorsize=sectorsize))
+    
+    maxsectors = 100000
+    rnd = random.Random()
+    while True:
+        blocks = []
+        start = 0
+        while start + 256 < maxsectors:
+            start = min(maxsectors, start + rnd.randint(0, 10000))
+            dev = rnd.choice(blockdevs)
+            count = rnd.randint(1, 256)
+            seed = rnd.randint(1, 10000000)
+            blocks.append((dev, start, count, seed))
+            start += count
+        
+        print("Write / verify set size: %d" % len(blocks))
+
+        random.shuffle(blocks)
+        for dev, start, count, seed in blocks:
+            dev.write_block(start, count, seed)
+        
+        random.shuffle(blocks)
+        for dev, start, count, seed in blocks:
+            dev.verify_block(start, count, seed)
+
+

+ 1 - 1
utils/rename_binaries.sh

@@ -8,7 +8,7 @@ mkdir -p distrib
 DATE=$(date +%Y-%m-%d)
 VERSION=$(git describe --always)
 
-for file in $(ls .pio/build/*/*.bin .pio/build/*/*.elf)
+for file in $(ls .pio/build/*/*.bin .pio/build/*/*.elf .pio/build/*/*.uf2)
 do
     NEWNAME=$(echo $file | sed 's|.pio/build/\([^/]*\)/\(.*\)\.\(.*\)|\1_'$DATE'_'$VERSION'.\3|')
     echo $file to distrib/$NEWNAME

+ 96 - 0
utils/rp2040_gdb_macros

@@ -0,0 +1,96 @@
+# Macros to help debugging on RP2040 and other mbed-os / RTX targets using Black Magic Probe
+# Loosely based upon https://github.com/pyocd/pyOCD/pull/392/files
+
+# Get a thread backtrace from hardfault context
+define hardfault_backtrace
+    set $sp = $psp
+    bt
+end
+
+# List current threads
+define threads
+    set $t = &osRtxInfo.thread
+    thread_print_chain $t->run.curr "RUN  "
+    thread_print_chain $t->ready.thread_list "READY"
+    thread_print_chain $t->delay_list "DELAY"
+    thread_print_chain $t->wait_list "WAIT "
+
+    printf "Use thread_bt 0x... to show thread backtrace\n"
+end
+
+define thread_print_chain
+    set $p = $arg0
+    while ($p != 0)
+        printf "%-16s ", $p->name
+        printf $arg1
+        printf " 0x%08x\n", $p->sp
+        set $p = $p->thread_next
+    end
+end
+
+# Switch to different thread
+define thread_switch
+    set $tsp=(uint32_t*)$arg0
+    set $n_r0  =$r0  
+    set $n_r1  =$r1  
+    set $n_r2  =$r2  
+    set $n_r3  =$r3  
+    set $n_r4  =$r4  
+    set $n_r5  =$r5  
+    set $n_r6  =$r6  
+    set $n_r7  =$r7  
+    set $n_r8  =$r8  
+    set $n_r9  =$r9  
+    set $n_r10 =$r10 
+    set $n_r11 =$r11 
+    set $n_r12 =$r12 
+    set $n_sp  =$sp  
+    set $n_lr  =$lr  
+    set $n_pc  =$pc  
+    set $n_xPSR=$xPSR
+    set $r4   = $tsp[0]
+    set $r5   = $tsp[1]
+    set $r6   = $tsp[2]
+    set $r7   = $tsp[3]
+    set $r8   = $tsp[4]
+    set $r9   = $tsp[5]
+    set $r10  = $tsp[6]
+    set $r11  = $tsp[7]
+    set $r0   = $tsp[8]
+    set $r1   = $tsp[9]
+    set $r2   = $tsp[10]
+    set $r3   = $tsp[11]
+    set $r12  = $tsp[12]
+    set $lr   = $tsp[13]
+    set $pc   = $tsp[14]
+    set $xPSR = $tsp[15]
+    set $sp   = &$tsp[16]
+
+    echo Switched task, use thread_restore to return before continuing execution.\n
+end
+
+define thread_restore
+    set $r0  =$n_r0  
+    set $r1  =$n_r1  
+    set $r2  =$n_r2  
+    set $r3  =$n_r3  
+    set $r4  =$n_r4  
+    set $r5  =$n_r5  
+    set $r6  =$n_r6  
+    set $r7  =$n_r7  
+    set $r8  =$n_r8  
+    set $r9  =$n_r9  
+    set $r10 =$n_r10 
+    set $r11 =$n_r11 
+    set $r12 =$n_r12 
+    set $sp  =$n_sp  
+    set $lr  =$n_lr  
+    set $pc  =$n_pc  
+    set $xPSR=$n_xPSR
+end
+
+define thread_bt
+    thread_switch $arg0
+    bt
+    thread_restore
+end

+ 9 - 0
utils/run_gdb_rp2040.sh

@@ -0,0 +1,9 @@
+#!/bin/bash
+
+
+arm-none-eabi-gdb \
+       -iex 'target extended /dev/ttyACM0' \
+       -iex 'mon s' -iex 'att 1' \
+       -iex 'set mem inaccessible-by-default off' \
+       -iex 'source utils/rp2040_gdb_macros' \
+       .pio/build/ZuluSCSI_RP2040_v2_0/firmware.elf

+ 0 - 0
utils/run_gdb.sh → utils/run_gdb_v1_0.sh