Sfoglia il codice sorgente

Add BS2 platform files as a copy of old version of ZuluSCSI_RP2040 platform

Corresponds to BS2 repo commit:
commit cc2c12834cc15459ea38e43fe68b86aac65dcd4d
Author: Eric Helgeson <erichelgeson@gmail.com>
Date:   Wed Oct 26 09:18:51 2022 -0500

    Rebrand
Petteri Aimonen 2 anni fa
parent
commit
d4c9a944c4

+ 530 - 0
lib/BlueSCSI_platform_RP2040/BlueSCSI_platform.cpp

@@ -0,0 +1,530 @@
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include "BlueSCSI_config.h"
+#include <SdFat.h>
+#include <scsi.h>
+#include <assert.h>
+#include <hardware/gpio.h>
+#include <hardware/uart.h>
+#include <hardware/spi.h>
+#include <hardware/structs/xip_ctrl.h>
+#include <platform/mbed_error.h>
+
+extern "C" {
+
+// As of 2022-09-13, the platformio RP2040 core is missing cplusplus guard on flash.h
+// For that reason this has to be inside the extern "C" here.
+#include <hardware/flash.h>
+
+const char *g_bluescsiplatform_name = PLATFORM_NAME;
+static bool g_scsi_initiator = false;
+
+void mbed_error_hook(const mbed_error_ctx * error_context);
+
+/***************/
+/* GPIO init   */
+/***************/
+
+// Helper function to configure whole GPIO in one line
+static void gpio_conf(uint gpio, enum gpio_function fn, bool pullup, bool pulldown, bool output, bool initial_state, bool fast_slew)
+{
+    gpio_put(gpio, initial_state);
+    gpio_set_dir(gpio, output);
+    gpio_set_pulls(gpio, pullup, pulldown);
+    gpio_set_function(gpio, fn);
+
+    if (fast_slew)
+    {
+        padsbank0_hw->io[gpio] |= PADS_BANK0_GPIO0_SLEWFAST_BITS;
+    }
+}
+
+void bluescsiplatform_init()
+{
+    /* First configure the pins that affect external buffer directions.
+     * RP2040 defaults to pulldowns, while these pins have external pull-ups.
+     */
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_DATA_DIR,  GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_RST,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_BSY,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    gpio_conf(SCSI_OUT_SEL,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+    /* Check dip switch settings */
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_DBGLOG,     GPIO_FUNC_SIO, false, false, false, false, false);
+    gpio_conf(DIP_TERM,       GPIO_FUNC_SIO, false, false, false, false, false);
+
+    delay(10); // 10 ms delay to let pull-ups do their work
+
+    bool dbglog = !gpio_get(DIP_DBGLOG);
+    bool termination = !gpio_get(DIP_TERM);
+
+    /* Initialize logging to SWO pin (UART0) */
+    gpio_conf(SWO_PIN,        GPIO_FUNC_UART,false,false, true,  false, true);
+    uart_init(uart0, 1000000);
+    mbed_set_error_hook(mbed_error_hook);
+
+    bluelog("DIP switch settings: debug log ", (int)dbglog, ", termination ", (int)termination);
+
+    g_bluelog_debug = dbglog;
+
+    if (termination)
+    {
+        bluelog("SCSI termination is enabled");
+    }
+    else
+    {
+        bluelog("NOTE: SCSI termination is disabled");
+    }
+
+    // SD card pins
+    // Card is used in SDIO mode for main program, and in SPI mode for crash handler & bootloader.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SD_SPI_SCK,     GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MOSI,    GPIO_FUNC_SPI, true, false, true,  true, true);
+    gpio_conf(SD_SPI_MISO,    GPIO_FUNC_SPI, true, false, false, true, true);
+    gpio_conf(SD_SPI_CS,      GPIO_FUNC_SIO, true, false, true,  true, true);
+    gpio_conf(SDIO_D1,        GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SDIO_D2,        GPIO_FUNC_SIO, true, false, false, true, true);
+
+    // LED pin
+    gpio_conf(LED_PIN,        GPIO_FUNC_SIO, false,false, true,  false, false);
+
+    // I2C pins
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(GPIO_I2C_SCL,   GPIO_FUNC_I2C, true,false, false,  true, true);
+    gpio_conf(GPIO_I2C_SDA,   GPIO_FUNC_I2C, true,false, false,  true, true);
+}
+
+static bool read_initiator_dip_switch()
+{
+    /* Revision 2022d hardware has problems reading initiator DIP switch setting.
+     * The 74LVT245 hold current is keeping the GPIO_ACK state too strongly.
+     * Detect this condition by toggling the pin up and down and seeing if it sticks.
+     */
+
+    // Strong output high, then pulldown
+    //        pin             function       pup   pdown   out    state  fast
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, false, true,  true,  false);
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, true,  false, true,  false);
+    delay(1);
+    bool initiator_state1 = gpio_get(DIP_INITIATOR);
+
+    // Strong output low, then pullup
+    //        pin             function       pup   pdown   out    state  fast
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, false, false, true,  false, false);
+    gpio_conf(DIP_INITIATOR,  GPIO_FUNC_SIO, true,  false, false, false, false);
+    delay(1);
+    bool initiator_state2 = gpio_get(DIP_INITIATOR);
+
+    if (initiator_state1 == initiator_state2)
+    {
+        // Ok, was able to read the state directly
+        return !initiator_state1;
+    }
+
+    // Enable OUT_BSY for a short time.
+    // If in target mode, this will force GPIO_ACK high.
+    gpio_put(SCSI_OUT_BSY, 0);
+    delay_100ns();
+    gpio_put(SCSI_OUT_BSY, 1);
+
+    return !gpio_get(DIP_INITIATOR);
+}
+
+// late_init() only runs in main application, SCSI not needed in bootloader
+void bluescsiplatform_late_init()
+{
+    if (read_initiator_dip_switch())
+    {
+        g_scsi_initiator = true;
+        bluelog("SCSI initiator mode selected by DIP switch, expecting SCSI disks on the bus");
+    }
+    else
+    {
+        g_scsi_initiator = false;
+        bluelog("SCSI target mode selected by DIP switch, acting as an SCSI disk");
+    }
+
+    /* Initialize SCSI pins to required modes.
+     * SCSI pins should be inactive / input at this point.
+     */
+
+    // SCSI data bus direction is switched by DATA_DIR signal.
+    // Pullups make sure that no glitches occur when switching direction.
+    //        pin             function       pup   pdown  out    state fast
+    gpio_conf(SCSI_IO_DB0,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB1,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB2,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB3,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB4,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB5,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB6,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DB7,    GPIO_FUNC_SIO, true, false, false, true, true);
+    gpio_conf(SCSI_IO_DBP,    GPIO_FUNC_SIO, true, false, false, true, true);
+
+    if (!g_scsi_initiator)
+    {
+        // Act as SCSI device / target
+
+        // SCSI control outputs
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_OUT_IO,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_MSG,   GPIO_FUNC_SIO, false,false, true,  true, true);
+
+        // REQ pin is switched between PIO and SIO, pull-up makes sure no glitches
+        gpio_conf(SCSI_OUT_REQ,   GPIO_FUNC_SIO, true ,false, true,  true, true);
+
+        // Shared pins are changed to input / output depending on communication phase
+        gpio_conf(SCSI_IN_SEL,    GPIO_FUNC_SIO, true, false, false, true, true);
+        if (SCSI_OUT_CD != SCSI_IN_SEL)
+        {
+            gpio_conf(SCSI_OUT_CD,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        }
+
+        gpio_conf(SCSI_IN_BSY,    GPIO_FUNC_SIO, true, false, false, true, true);
+        if (SCSI_OUT_MSG != SCSI_IN_BSY)
+        {
+            gpio_conf(SCSI_OUT_MSG,    GPIO_FUNC_SIO, false,false, true,  true, true);
+        }
+
+        // SCSI control inputs
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_IN_ACK,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_ATN,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+    }
+    else
+    {
+        // Act as SCSI initiator
+
+        //        pin             function       pup   pdown  out    state fast
+        gpio_conf(SCSI_IN_IO,     GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_MSG,    GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_CD,     GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_REQ,    GPIO_FUNC_SIO, true ,false, false, true, false);
+        gpio_conf(SCSI_IN_BSY,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_IN_RST,    GPIO_FUNC_SIO, true, false, false, true, false);
+        gpio_conf(SCSI_OUT_SEL,   GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_ACK,   GPIO_FUNC_SIO, false,false, true,  true, true);
+        gpio_conf(SCSI_OUT_ATN,   GPIO_FUNC_SIO, false,false, true,  true, true);
+    }
+}
+
+bool bluescsiplatform_is_initiator_mode_enabled()
+{
+    return g_scsi_initiator;
+}
+
+/*****************************************/
+/* Crash handlers                        */
+/*****************************************/
+
+extern SdFs SD;
+extern uint32_t __StackTop;
+
+void bluescsiplatform_emergency_log_save()
+{
+    bluescsiplatform_set_sd_callback(NULL, NULL);
+
+    SD.begin(SD_CONFIG_CRASH);
+    FsFile crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+
+    if (!crashfile.isOpen())
+    {
+        // Try to reinitialize
+        int max_retry = 10;
+        while (max_retry-- > 0 && !SD.begin(SD_CONFIG_CRASH));
+
+        crashfile = SD.open(CRASHFILE, O_WRONLY | O_CREAT | O_TRUNC);
+    }
+
+    uint32_t startpos = 0;
+    crashfile.write(bluelog_get_buffer(&startpos));
+    crashfile.write(bluelog_get_buffer(&startpos));
+    crashfile.flush();
+    crashfile.close();
+}
+
+void mbed_error_hook(const mbed_error_ctx * error_context)
+{
+    bluelog("--------------");
+    bluelog("CRASH!");
+    bluelog("Platform: ", g_bluescsiplatform_name);
+    bluelog("FW Version: ", g_bluelog_firmwareversion);
+    bluelog("error_status: ", (uint32_t)error_context->error_status);
+    bluelog("error_address: ", error_context->error_address);
+    bluelog("error_value: ", error_context->error_value);
+
+    uint32_t *p = (uint32_t*)((uint32_t)error_context->thread_current_sp & ~3);
+    for (int i = 0; i < 8; i++)
+    {
+        if (p == &__StackTop) break; // End of stack
+
+        bluelog("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+        p += 4;
+    }
+
+    bluescsiplatform_emergency_log_save();
+
+    while (1)
+    {
+        // Flash the crash address on the LED
+        // Short pulse means 0, long pulse means 1
+        int base_delay = 1000;
+        for (int i = 31; i >= 0; i--)
+        {
+            LED_OFF();
+            for (int j = 0; j < base_delay; j++) delay_ns(100000);
+
+            int delay = (error_context->error_address & (1 << i)) ? (3 * base_delay) : base_delay;
+            LED_ON();
+            for (int j = 0; j < delay; j++) delay_ns(100000);
+            LED_OFF();
+        }
+
+        for (int j = 0; j < base_delay * 10; j++) delay_ns(100000);
+    }
+}
+
+/*****************************************/
+/* Debug logging and watchdog            */
+/*****************************************/
+
+// This function is called for every log message.
+void bluescsiplatform_log(const char *s)
+{
+    uart_puts(uart0, s);
+}
+
+static int g_watchdog_timeout;
+static bool g_watchdog_initialized;
+
+static void watchdog_callback(unsigned alarm_num)
+{
+    g_watchdog_timeout -= 1000;
+
+    if (g_watchdog_timeout <= WATCHDOG_CRASH_TIMEOUT - WATCHDOG_BUS_RESET_TIMEOUT)
+    {
+        if (!scsiDev.resetFlag || !g_scsiHostPhyReset)
+        {
+            bluelog("--------------");
+            bluelog("WATCHDOG TIMEOUT, attempting bus reset");
+            bluelog("GPIO states: out ", sio_hw->gpio_out, " oe ", sio_hw->gpio_oe, " in ", sio_hw->gpio_in);
+
+            uint32_t *p = (uint32_t*)__get_PSP();
+            for (int i = 0; i < 8; i++)
+            {
+                if (p == &__StackTop) break; // End of stack
+
+                bluelog("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+                p += 4;
+            }
+
+            scsiDev.resetFlag = 1;
+            g_scsiHostPhyReset = true;
+        }
+
+        if (g_watchdog_timeout <= 0)
+        {
+            bluelog("--------------");
+            bluelog("WATCHDOG TIMEOUT!");
+            bluelog("Platform: ", g_bluescsiplatform_name);
+            bluelog("FW Version: ", g_bluelog_firmwareversion);
+            bluelog("GPIO states: out ", sio_hw->gpio_out, " oe ", sio_hw->gpio_oe, " in ", sio_hw->gpio_in);
+
+            uint32_t *p = (uint32_t*)__get_PSP();
+            for (int i = 0; i < 8; i++)
+            {
+                if (p == &__StackTop) break; // End of stack
+
+                bluelog("STACK ", (uint32_t)p, ":    ", p[0], " ", p[1], " ", p[2], " ", p[3]);
+                p += 4;
+            }
+
+            bluescsiplatform_emergency_log_save();
+
+            bluescsiplatform_boot_to_main_firmware();
+        }
+    }
+
+    hardware_alarm_set_target(3, delayed_by_ms(get_absolute_time(), 1000));
+}
+
+// This function can be used to periodically reset watchdog timer for crash handling.
+// It can also be left empty if the platform does not use a watchdog timer.
+void bluescsiplatform_reset_watchdog()
+{
+    g_watchdog_timeout = WATCHDOG_CRASH_TIMEOUT;
+
+    if (!g_watchdog_initialized)
+    {
+        hardware_alarm_claim(3);
+        hardware_alarm_set_callback(3, &watchdog_callback);
+        hardware_alarm_set_target(3, delayed_by_ms(get_absolute_time(), 1000));
+        g_watchdog_initialized = true;
+    }
+}
+
+/*****************************************/
+/* Flash reprogramming from bootloader   */
+/*****************************************/
+
+#ifdef BLUESCSIPLATFORM_BOOTLOADER_SIZE
+
+extern uint32_t __real_vectors_start;
+extern uint32_t __StackTop;
+static volatile void *g_bootloader_exit_req;
+
+bool bluescsiplatform_rewrite_flash_page(uint32_t offset, uint8_t buffer[BLUESCSIPLATFORM_FLASH_PAGE_SIZE])
+{
+    if (offset == BLUESCSIPLATFORM_BOOTLOADER_SIZE)
+    {
+        if (buffer[3] != 0x20 || buffer[7] != 0x10)
+        {
+            bluelog("Invalid firmware file, starts with: ", bytearray(buffer, 16));
+            return false;
+        }
+    }
+
+    bluedbg("Writing flash at offset ", offset, " data ", bytearray(buffer, 4));
+    assert(offset % BLUESCSIPLATFORM_FLASH_PAGE_SIZE == 0);
+    assert(offset >= BLUESCSIPLATFORM_BOOTLOADER_SIZE);
+
+    // Avoid any mbed timer interrupts triggering during the flashing.
+    __disable_irq();
+
+    // For some reason any code executed after flashing crashes
+    // unless we disable the XIP cache.
+    // Not sure why this happens, as flash_range_program() is flushing
+    // the cache correctly.
+    // The cache is now enabled from bootloader start until it starts
+    // flashing, and again after reset to main firmware.
+    xip_ctrl_hw->ctrl = 0;
+
+    flash_range_erase(offset, BLUESCSIPLATFORM_FLASH_PAGE_SIZE);
+    flash_range_program(offset, buffer, BLUESCSIPLATFORM_FLASH_PAGE_SIZE);
+
+    uint32_t *buf32 = (uint32_t*)buffer;
+    uint32_t num_words = BLUESCSIPLATFORM_FLASH_PAGE_SIZE / 4;
+    for (int i = 0; i < num_words; i++)
+    {
+        uint32_t expected = buf32[i];
+        uint32_t actual = *(volatile uint32_t*)(XIP_NOCACHE_BASE + offset + i * 4);
+
+        if (actual != expected)
+        {
+            bluelog("Flash verify failed at offset ", offset + i * 4, " got ", actual, " expected ", expected);
+            return false;
+        }
+    }
+
+    __enable_irq();
+
+    return true;
+}
+
+void bluescsiplatform_boot_to_main_firmware()
+{
+    // To ensure that the system state is reset properly, we perform
+    // a SYSRESETREQ and jump straight from the reset vector to main application.
+    g_bootloader_exit_req = &g_bootloader_exit_req;
+    SCB->AIRCR = 0x05FA0004;
+    while(1);
+}
+
+void btldr_reset_handler()
+{
+    uint32_t* application_base = &__real_vectors_start;
+    if (g_bootloader_exit_req == &g_bootloader_exit_req)
+    {
+        // Boot to main application
+        application_base = (uint32_t*)(XIP_BASE + BLUESCSIPLATFORM_BOOTLOADER_SIZE);
+    }
+
+    SCB->VTOR = (uint32_t)application_base;
+    __asm__(
+        "msr msp, %0\n\t"
+        "bx %1" : : "r" (application_base[0]),
+                    "r" (application_base[1]) : "memory");
+}
+
+// Replace the reset handler when building the bootloader
+// The rp2040_btldr.ld places real vector table at an offset.
+__attribute__((section(".btldr_vectors")))
+const void * btldr_vectors[2] = {&__StackTop, (void*)&btldr_reset_handler};
+
+#endif
+
+/**********************************************/
+/* Mapping from data bytes to GPIO BOP values */
+/**********************************************/
+
+/* A lookup table is the fastest way to calculate parity and convert the IO pin mapping for data bus.
+ * For RP2040 we expect that the bits are consecutive and in order.
+ */
+
+#define PARITY(n) ((1 ^ (n) ^ ((n)>>1) ^ ((n)>>2) ^ ((n)>>3) ^ ((n)>>4) ^ ((n)>>5) ^ ((n)>>6) ^ ((n)>>7)) & 1)
+#define X(n) (\
+    ((n & 0x01) ? 0 : (1 << SCSI_IO_DB0)) | \
+    ((n & 0x02) ? 0 : (1 << SCSI_IO_DB1)) | \
+    ((n & 0x04) ? 0 : (1 << SCSI_IO_DB2)) | \
+    ((n & 0x08) ? 0 : (1 << SCSI_IO_DB3)) | \
+    ((n & 0x10) ? 0 : (1 << SCSI_IO_DB4)) | \
+    ((n & 0x20) ? 0 : (1 << SCSI_IO_DB5)) | \
+    ((n & 0x40) ? 0 : (1 << SCSI_IO_DB6)) | \
+    ((n & 0x80) ? 0 : (1 << SCSI_IO_DB7)) | \
+    (PARITY(n)  ? 0 : (1 << SCSI_IO_DBP)) \
+)
+
+const uint32_t g_scsi_parity_lookup[256] =
+{
+    X(0x00), X(0x01), X(0x02), X(0x03), X(0x04), X(0x05), X(0x06), X(0x07), X(0x08), X(0x09), X(0x0a), X(0x0b), X(0x0c), X(0x0d), X(0x0e), X(0x0f),
+    X(0x10), X(0x11), X(0x12), X(0x13), X(0x14), X(0x15), X(0x16), X(0x17), X(0x18), X(0x19), X(0x1a), X(0x1b), X(0x1c), X(0x1d), X(0x1e), X(0x1f),
+    X(0x20), X(0x21), X(0x22), X(0x23), X(0x24), X(0x25), X(0x26), X(0x27), X(0x28), X(0x29), X(0x2a), X(0x2b), X(0x2c), X(0x2d), X(0x2e), X(0x2f),
+    X(0x30), X(0x31), X(0x32), X(0x33), X(0x34), X(0x35), X(0x36), X(0x37), X(0x38), X(0x39), X(0x3a), X(0x3b), X(0x3c), X(0x3d), X(0x3e), X(0x3f),
+    X(0x40), X(0x41), X(0x42), X(0x43), X(0x44), X(0x45), X(0x46), X(0x47), X(0x48), X(0x49), X(0x4a), X(0x4b), X(0x4c), X(0x4d), X(0x4e), X(0x4f),
+    X(0x50), X(0x51), X(0x52), X(0x53), X(0x54), X(0x55), X(0x56), X(0x57), X(0x58), X(0x59), X(0x5a), X(0x5b), X(0x5c), X(0x5d), X(0x5e), X(0x5f),
+    X(0x60), X(0x61), X(0x62), X(0x63), X(0x64), X(0x65), X(0x66), X(0x67), X(0x68), X(0x69), X(0x6a), X(0x6b), X(0x6c), X(0x6d), X(0x6e), X(0x6f),
+    X(0x70), X(0x71), X(0x72), X(0x73), X(0x74), X(0x75), X(0x76), X(0x77), X(0x78), X(0x79), X(0x7a), X(0x7b), X(0x7c), X(0x7d), X(0x7e), X(0x7f),
+    X(0x80), X(0x81), X(0x82), X(0x83), X(0x84), X(0x85), X(0x86), X(0x87), X(0x88), X(0x89), X(0x8a), X(0x8b), X(0x8c), X(0x8d), X(0x8e), X(0x8f),
+    X(0x90), X(0x91), X(0x92), X(0x93), X(0x94), X(0x95), X(0x96), X(0x97), X(0x98), X(0x99), X(0x9a), X(0x9b), X(0x9c), X(0x9d), X(0x9e), X(0x9f),
+    X(0xa0), X(0xa1), X(0xa2), X(0xa3), X(0xa4), X(0xa5), X(0xa6), X(0xa7), X(0xa8), X(0xa9), X(0xaa), X(0xab), X(0xac), X(0xad), X(0xae), X(0xaf),
+    X(0xb0), X(0xb1), X(0xb2), X(0xb3), X(0xb4), X(0xb5), X(0xb6), X(0xb7), X(0xb8), X(0xb9), X(0xba), X(0xbb), X(0xbc), X(0xbd), X(0xbe), X(0xbf),
+    X(0xc0), X(0xc1), X(0xc2), X(0xc3), X(0xc4), X(0xc5), X(0xc6), X(0xc7), X(0xc8), X(0xc9), X(0xca), X(0xcb), X(0xcc), X(0xcd), X(0xce), X(0xcf),
+    X(0xd0), X(0xd1), X(0xd2), X(0xd3), X(0xd4), X(0xd5), X(0xd6), X(0xd7), X(0xd8), X(0xd9), X(0xda), X(0xdb), X(0xdc), X(0xdd), X(0xde), X(0xdf),
+    X(0xe0), X(0xe1), X(0xe2), X(0xe3), X(0xe4), X(0xe5), X(0xe6), X(0xe7), X(0xe8), X(0xe9), X(0xea), X(0xeb), X(0xec), X(0xed), X(0xee), X(0xef),
+    X(0xf0), X(0xf1), X(0xf2), X(0xf3), X(0xf4), X(0xf5), X(0xf6), X(0xf7), X(0xf8), X(0xf9), X(0xfa), X(0xfb), X(0xfc), X(0xfd), X(0xfe), X(0xff)
+};
+
+#undef X
+
+} /* extern "C" */
+
+/* Logging from mbed */
+
+static class LogTarget: public mbed::FileHandle {
+public:
+    virtual ssize_t read(void *buffer, size_t size) { return 0; }
+    virtual ssize_t write(const void *buffer, size_t size)
+    {
+        // A bit inefficient but mbed seems to write() one character
+        // at a time anyways.
+        for (int i = 0; i < size; i++)
+        {
+            char buf[2] = {((const char*)buffer)[i], 0};
+            bluelog_raw(buf);
+        }
+        return size;
+    }
+
+    virtual off_t seek(off_t offset, int whence = SEEK_SET) { return offset; }
+    virtual int close() { return 0; }
+    virtual off_t size() { return 0; }
+} g_LogTarget;
+
+mbed::FileHandle *mbed::mbed_override_console(int fd)
+{
+    return &g_LogTarget;
+}

+ 154 - 0
lib/BlueSCSI_platform_RP2040/BlueSCSI_platform.h

@@ -0,0 +1,154 @@
+// Platform-specific definitions for BlueSCSI RP2040 hardware.
+
+#pragma once
+
+#include <stdint.h>
+#include <Arduino.h>
+#include "BlueSCSI_platform_gpio.h"
+#include "scsiHostPhy.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These are used in debug output and default SCSI strings */
+extern const char *g_bluescsiplatform_name;
+#define PLATFORM_NAME "BlueSCSI RP2040"
+#define PLATFORM_REVISION "2.0"
+#define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_SYNC_10
+#define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 4096
+#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 32768
+#define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 8192
+#define SD_USE_SDIO 1
+#define PLATFORM_HAS_INITIATOR_MODE 1
+
+// NOTE: The driver supports synchronous speeds higher than 10MB/s, but this
+// has not been tested due to lack of fast enough SCSI adapter.
+// #define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_TURBO
+
+// Debug logging function, can be used to print to e.g. serial port.
+// May get called from interrupt handlers.
+void bluescsiplatform_log(const char *s);
+void bluescsiplatform_emergency_log_save();
+
+// Timing and delay functions.
+// Arduino platform already provides these
+unsigned long millis(void);
+void delay(unsigned long ms);
+
+// Short delays, can be called from interrupt mode
+static inline void delay_ns(unsigned long ns)
+{
+    delayMicroseconds((ns + 999) / 1000);
+}
+
+// Approximate fast delay
+static inline void delay_100ns()
+{
+    asm volatile ("nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop \n nop");
+}
+
+// Initialize SD card and GPIO configuration
+void bluescsiplatform_init();
+
+// Initialization for main application, not used for bootloader
+void bluescsiplatform_late_init();
+
+// Query whether initiator mode is enabled on targets with PLATFORM_HAS_INITIATOR_MODE
+bool bluescsiplatform_is_initiator_mode_enabled();
+
+// Setup soft watchdog if supported
+void bluescsiplatform_reset_watchdog();
+
+// Set callback that will be called during data transfer to/from SD card.
+// This can be used to implement simultaneous transfer to SCSI bus.
+typedef void (*sd_callback_t)(uint32_t bytes_complete);
+void bluescsiplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer);
+
+// Reprogram firmware in main program area.
+#ifndef RP2040_DISABLE_BOOTLOADER
+#define BLUESCSIPLATFORM_BOOTLOADER_SIZE (128 * 1024)
+#define BLUESCSIPLATFORM_FLASH_TOTAL_SIZE (1024 * 1024)
+#define BLUESCSIPLATFORM_FLASH_PAGE_SIZE 4096
+bool bluescsiplatform_rewrite_flash_page(uint32_t offset, uint8_t buffer[BLUESCSIPLATFORM_FLASH_PAGE_SIZE]);
+void bluescsiplatform_boot_to_main_firmware();
+#endif
+
+// Below are GPIO access definitions that are used from scsiPhy.cpp.
+
+// Write a single SCSI pin.
+// Example use: SCSI_OUT(ATN, 1) sets SCSI_ATN to low (active) state.
+#define SCSI_OUT(pin, state) \
+    *(state ? &sio_hw->gpio_clr : &sio_hw->gpio_set) = 1 << (SCSI_OUT_ ## pin)
+
+// Read a single SCSI pin.
+// Example use: SCSI_IN(ATN), returns 1 for active low state.
+#define SCSI_IN(pin) \
+    ((sio_hw->gpio_in & (1 << (SCSI_IN_ ## pin))) ? 0 : 1)
+
+// Set pin directions for initiator vs. target mode
+#define SCSI_ENABLE_INITIATOR() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_ACK) | \
+                           (1 << SCSI_OUT_ATN)), \
+    (sio_hw->gpio_oe_clr = (1 << SCSI_IN_IO) | \
+                           (1 << SCSI_IN_CD) | \
+                           (1 << SCSI_IN_MSG) | \
+                           (1 << SCSI_IN_REQ))
+
+// Enable driving of shared control pins
+#define SCSI_ENABLE_CONTROL_OUT() \
+    (sio_hw->gpio_oe_set = (1 << SCSI_OUT_CD) | \
+                           (1 << SCSI_OUT_MSG))
+
+// Set SCSI data bus to output
+#define SCSI_ENABLE_DATA_OUT() \
+    (sio_hw->gpio_clr = (1 << SCSI_DATA_DIR), \
+     sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
+
+// Write SCSI data bus, also sets REQ to inactive.
+extern const uint32_t g_scsi_parity_lookup[256];
+#define SCSI_OUT_DATA(data) \
+    gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
+                    g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \
+    SCSI_ENABLE_DATA_OUT()
+
+// Release SCSI data bus and REQ signal
+#define SCSI_RELEASE_DATA_REQ() \
+    (sio_hw->gpio_oe_clr = SCSI_IO_DATA_MASK, \
+     sio_hw->gpio_set = (1 << SCSI_DATA_DIR) | (1 << SCSI_OUT_REQ))
+
+// Release all SCSI outputs
+#define SCSI_RELEASE_OUTPUTS() \
+    SCSI_RELEASE_DATA_REQ(), \
+    sio_hw->gpio_oe_clr = (1 << SCSI_OUT_CD) | \
+                          (1 << SCSI_OUT_MSG), \
+    sio_hw->gpio_set = (1 << SCSI_OUT_IO) | \
+                       (1 << SCSI_OUT_CD) | \
+                       (1 << SCSI_OUT_MSG) | \
+                       (1 << SCSI_OUT_RST) | \
+                       (1 << SCSI_OUT_BSY) | \
+                       (1 << SCSI_OUT_REQ) | \
+                       (1 << SCSI_OUT_SEL)
+
+// Read SCSI data bus
+#define SCSI_IN_DATA() \
+    (~sio_hw->gpio_in & SCSI_IO_DATA_MASK) >> SCSI_IO_SHIFT
+
+#ifdef __cplusplus
+}
+
+// SD card driver for SdFat
+
+#ifdef SD_USE_SDIO
+class SdioConfig;
+extern SdioConfig g_sd_sdio_config;
+#define SD_CONFIG g_sd_sdio_config
+#define SD_CONFIG_CRASH g_sd_sdio_config
+#else
+class SdSpiConfig;
+extern SdSpiConfig g_sd_spi_config;
+#define SD_CONFIG g_sd_spi_config
+#define SD_CONFIG_CRASH g_sd_spi_config
+#endif
+
+#endif

+ 81 - 0
lib/BlueSCSI_platform_RP2040/BlueSCSI_platform_gpio.h

@@ -0,0 +1,81 @@
+// GPIO definitions for BlueSCSI RP2040-based hardware
+
+#pragma once
+
+#include <hardware/gpio.h>
+
+// SCSI data input/output port.
+// The data bus uses external bidirectional buffer, with
+// direction controlled by DATA_DIR pin.
+#define SCSI_IO_DB0  0
+#define SCSI_IO_DB1  1
+#define SCSI_IO_DB2  2
+#define SCSI_IO_DB3  3
+#define SCSI_IO_DB4  4
+#define SCSI_IO_DB5  5
+#define SCSI_IO_DB6  6
+#define SCSI_IO_DB7  7
+#define SCSI_IO_DBP  8
+#define SCSI_IO_DATA_MASK 0x1FF
+#define SCSI_IO_SHIFT 0
+
+// Data direction control
+#define SCSI_DATA_DIR 17
+
+// SCSI output status lines
+#define SCSI_OUT_IO   12
+#define SCSI_OUT_CD   11
+#define SCSI_OUT_MSG  13
+#define SCSI_OUT_RST  28
+#define SCSI_OUT_BSY  26
+#define SCSI_OUT_REQ  9
+#define SCSI_OUT_SEL  24
+
+// SCSI input status signals
+#define SCSI_IN_SEL  11
+#define SCSI_IN_ACK  10
+#define SCSI_IN_ATN  29
+#define SCSI_IN_BSY  13
+#define SCSI_IN_RST  27
+
+// Status line outputs for initiator mode
+#define SCSI_OUT_ACK  10
+#define SCSI_OUT_ATN  29
+
+// Status line inputs for initiator mode
+#define SCSI_IN_IO    12
+#define SCSI_IN_CD    11
+#define SCSI_IN_MSG   13
+#define SCSI_IN_REQ   9
+
+// Status LED pins
+#define LED_PIN      25
+#define LED_ON()     sio_hw->gpio_set = 1 << LED_PIN
+#define LED_OFF()    sio_hw->gpio_clr = 1 << LED_PIN
+
+// SD card pins in SDIO mode
+#define SDIO_CLK 18
+#define SDIO_CMD 19
+#define SDIO_D0  20
+#define SDIO_D1  21
+#define SDIO_D2  22
+#define SDIO_D3  23
+
+// SD card pins in SPI mode
+#define SD_SPI       spi0
+#define SD_SPI_SCK   18
+#define SD_SPI_MOSI  19
+#define SD_SPI_MISO  20
+#define SD_SPI_CS    23
+
+// IO expander I2C
+#define GPIO_I2C_SDA 14
+#define GPIO_I2C_SCL 15
+
+// DIP switch pins
+#define DIP_INITIATOR 10
+#define DIP_DBGLOG 16
+#define DIP_TERM 9
+
+// Other pins
+#define SWO_PIN 16

+ 5 - 0
lib/BlueSCSI_platform_RP2040/bsp.h

@@ -0,0 +1,5 @@
+// Dummy file for SCSI2SD.
+
+#pragma once
+
+#define S2S_DMA_ALIGN

+ 196 - 0
lib/BlueSCSI_platform_RP2040/rp2040.ld

@@ -0,0 +1,196 @@
+MEMORY
+{
+    FLASH(rx) : ORIGIN = 0x10000000, LENGTH = 2048k
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 240k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k
+}
+ENTRY(_entry_point)
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+    .boot2 : {
+        __boot2_start__ = .;
+        KEEP (*(.boot2))
+        __boot2_end__ = .;
+    } > FLASH
+    ASSERT(__boot2_end__ - __boot2_start__ == 256,
+        "ERROR: Pico second stage bootloader must be 256 bytes in size")
+
+    /* If BlueSCSI SD card bootloader is included, it goes in first 128 kB */
+    .text.bootloader : ALIGN(16) SUBALIGN(16)
+    {
+        KEEP(*(.text.btldr*))
+        . = ALIGN(131072);
+        CHECK_BOOTLOADER_SIZE = 1 / (. <= 131072);
+    } > FLASH
+
+    .text : {
+        __logical_binary_start = .;
+        __real_vectors_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        KEEP (*(.reset))
+        KEEP (*(.init))
+        *(.fini)
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+        *(.eh_frame*)
+        . = ALIGN(4);
+
+        /* Put only non-timecritical code in flash
+         * This includes e.g. floating point math routines.
+         */
+        *libm*:(.text .text*)
+        *libc*:(.text .text*)
+        *libgcc*:*df*(.text .text*)
+        *USB*(.text .text*)
+        *SPI*(.text .text*)
+        *Spi*(.text .text*)
+        *spi*(.text .text*)
+        *stdc*:(.text .text*)
+        *supc*:(.text .text*)
+        *nosys*:(.text .text*)
+        *libc*:*printf*(.text .text*)
+        *libc*:*toa*(.text .text*)
+        *libminIni.a:(.text .text*)
+
+        /* RP2040 breakpoints in RAM code don't always work very well
+         * because the boot routine tends to overwrite them.
+         * Uncommenting this line puts all code in flash.
+         */
+        /* *(.text .text*) */
+    } > FLASH
+    .rodata : {
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        *(.rodata)
+        *(.rodata*)
+        . = ALIGN(4);
+    } > FLASH
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+    __etext = .;
+   .ram_vector_table (COPY): {
+        *(.ram_vector_table)
+    } > RAM
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        /* Time critical code will go here to avoid external flash latency */
+        *(.time_critical*)
+        . = ALIGN(4);
+        *(.text)
+        *(.text*)
+
+        . = ALIGN(4);
+        *(.data*)
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+        *(.jcr)
+        . = ALIGN(4);
+        __data_end__ = .;
+    } > RAM AT> FLASH
+    .uninitialized_data (COPY): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+    .bss : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+    .heap (COPY):
+    {
+        __end__ = .;
+        PROVIDE(end = .);
+        *(.heap*)
+        . = ORIGIN(RAM) + LENGTH(RAM) - 0x400;
+        __HeapLimit = .;
+    } > RAM
+    .stack1_dummy (COPY):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (COPY):
+    {
+        *(.stack*)
+    } > RAM
+    .flash_end : {
+        __flash_binary_end = .;
+    } > FLASH
+    __StackTop = ORIGIN(RAM) + LENGTH(RAM);
+    __StackLimit = __StackTop - 0x400;
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary")
+}

+ 168 - 0
lib/BlueSCSI_platform_RP2040/rp2040_btldr.ld

@@ -0,0 +1,168 @@
+/*
+ *
+ * Customized linker script for building bootloader
+ *
+ */
+
+ MEMORY
+{
+    /* The bootloader is linked to begin at 0x12000100.
+     * First 256 bytes are reserved for RP2040 second stage bootloader,
+     * which comes as part of the main firmware.elf and is never overwritten.
+     */
+    FLASH(rx) : ORIGIN = 0x10000100, LENGTH = 128k-256
+    RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 240k  /* Leave space for pico-debug */
+    SCRATCH_X(rwx) : ORIGIN = 0x20040000, LENGTH = 4k
+    SCRATCH_Y(rwx) : ORIGIN = 0x20041000, LENGTH = 4k
+}
+ENTRY(_entry_point)
+SECTIONS
+{
+    .flash_begin : {
+        __flash_binary_start = .;
+    } > FLASH
+
+    .text : {
+        __logical_binary_start = .;
+        KEEP (*(.btldr_vectors))
+        KEEP (*(.binary_info_header))
+        __binary_info_header_end = .;
+        . = ALIGN(256);
+        __real_vectors_start = .;
+        KEEP (*(.vectors))
+        KEEP (*(.reset))
+        KEEP (*(.init))
+        *(.fini)
+        *crtbegin.o(.ctors)
+        *crtbegin?.o(.ctors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+        *(SORT(.ctors.*))
+        *(.ctors)
+        *crtbegin.o(.dtors)
+        *crtbegin?.o(.dtors)
+        *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+        *(SORT(.dtors.*))
+        *(.dtors)
+        *(.eh_frame*)
+        *(.text .text*)
+        . = ALIGN(4);
+    } > FLASH
+    .rodata : {
+        . = ALIGN(4);
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*)))
+        *(.rodata)
+        *(.rodata*)
+        . = ALIGN(4);
+    } > FLASH
+    .ARM.extab :
+    {
+        *(.ARM.extab* .gnu.linkonce.armextab.*)
+    } > FLASH
+    __exidx_start = .;
+    .ARM.exidx :
+    {
+        *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+    } > FLASH
+    __exidx_end = .;
+    . = ALIGN(4);
+    __binary_info_start = .;
+    .binary_info :
+    {
+        KEEP(*(.binary_info.keep.*))
+        *(.binary_info.*)
+    } > FLASH
+    __binary_info_end = .;
+    . = ALIGN(4);
+    __etext = .;
+   .ram_vector_table (COPY): {
+        *(.ram_vector_table)
+    } > RAM
+    .data : {
+        __data_start__ = .;
+        *(vtable)
+
+        /* Time critical code will go here to avoid external flash latency */
+        *(.time_critical*)
+
+        . = ALIGN(4);
+        *(.data*)
+        . = ALIGN(4);
+        *(.after_data.*)
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__mutex_array_start = .);
+        KEEP(*(SORT(.mutex_array.*)))
+        KEEP(*(.mutex_array))
+        PROVIDE_HIDDEN (__mutex_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__preinit_array_start = .);
+        KEEP(*(SORT(.preinit_array.*)))
+        KEEP(*(.preinit_array))
+        PROVIDE_HIDDEN (__preinit_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__init_array_start = .);
+        KEEP(*(SORT(.init_array.*)))
+        KEEP(*(.init_array))
+        PROVIDE_HIDDEN (__init_array_end = .);
+        . = ALIGN(4);
+        PROVIDE_HIDDEN (__fini_array_start = .);
+        *(SORT(.fini_array.*))
+        *(.fini_array)
+        PROVIDE_HIDDEN (__fini_array_end = .);
+        *(.jcr)
+        . = ALIGN(4);
+        __data_end__ = .;
+    } > RAM AT> FLASH
+    .uninitialized_data (COPY): {
+        . = ALIGN(4);
+        *(.uninitialized_data*)
+    } > RAM
+    .scratch_x : {
+        __scratch_x_start__ = .;
+        *(.scratch_x.*)
+        . = ALIGN(4);
+        __scratch_x_end__ = .;
+    } > SCRATCH_X AT > FLASH
+    __scratch_x_source__ = LOADADDR(.scratch_x);
+    .scratch_y : {
+        __scratch_y_start__ = .;
+        *(.scratch_y.*)
+        . = ALIGN(4);
+        __scratch_y_end__ = .;
+    } > SCRATCH_Y AT > FLASH
+    __scratch_y_source__ = LOADADDR(.scratch_y);
+    .bss : {
+        . = ALIGN(4);
+        __bss_start__ = .;
+        *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*)))
+        *(COMMON)
+        . = ALIGN(4);
+        __bss_end__ = .;
+    } > RAM
+    .heap (COPY):
+    {
+        __end__ = .;
+        PROVIDE(end = .);
+        *(.heap*)
+        . = ORIGIN(RAM) + LENGTH(RAM) - 0x400;
+        __HeapLimit = .;
+    } > RAM
+    .stack1_dummy (COPY):
+    {
+        *(.stack1*)
+    } > SCRATCH_X
+    .stack_dummy (COPY):
+    {
+        *(.stack*)
+    } > RAM
+    .flash_end : {
+        __flash_binary_end = .;
+    } > FLASH
+    __StackTop = ORIGIN(RAM) + LENGTH(RAM);
+    __StackLimit = __StackTop - 0x400;
+    __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X);
+    __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy);
+    __StackBottom = __StackTop - SIZEOF(.stack_dummy);
+    PROVIDE(__stack = __StackTop);
+    ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed")
+    ASSERT( __binary_info_header_end - __logical_binary_start <= 256, "Binary info must be in first 256 bytes of the binary")
+}

+ 804 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -0,0 +1,804 @@
+// Implementation of SDIO communication for RP2040
+//
+// The RP2040 official work-in-progress code at
+// https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+// may be useful reference, but this is independent implementation.
+//
+// For official SDIO specifications, refer to:
+// https://www.sdcard.org/downloads/pls/
+// "SDIO Physical Layer Simplified Specification Version 8.00"
+
+#include "rp2040_sdio.h"
+#include "rp2040_sdio.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/gpio.h>
+#include <BlueSCSI_platform.h>
+#include <BlueSCSI_log.h>
+
+#define SDIO_PIO pio1
+#define SDIO_CMD_SM 0
+#define SDIO_DATA_SM 1
+#define SDIO_DMA_CH 2
+#define SDIO_DMA_CHB 3
+
+// Maximum number of 512 byte blocks to transfer in one request
+#define SDIO_MAX_BLOCKS 256
+
+enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
+
+static struct {
+    uint32_t pio_cmd_clk_offset;
+    uint32_t pio_data_rx_offset;
+    pio_sm_config pio_cfg_data_rx;
+    uint32_t pio_data_tx_offset;
+    pio_sm_config pio_cfg_data_tx;
+
+    sdio_transfer_state_t transfer_state;
+    uint32_t transfer_start_time;
+    uint32_t *data_buf;
+    uint32_t blocks_done; // Number of blocks transferred so far
+    uint32_t total_blocks; // Total number of blocks to transfer
+    uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
+    uint32_t checksum_errors; // Number of checksum errors detected
+
+    // Variables for block writes
+    uint64_t next_wr_block_checksum;
+    uint32_t end_token_buf[3]; // CRC and end token for write block
+    sdio_status_t wr_status;
+    uint32_t card_response;
+
+    // Variables for block reads
+    // This is used to perform DMA into data buffers and checksum buffers separately.
+    struct {
+        void * write_addr;
+        uint32_t transfer_count;
+    } dma_blocks[SDIO_MAX_BLOCKS * 2];
+    struct {
+        uint32_t top;
+        uint32_t bottom;
+    } received_checksums[SDIO_MAX_BLOCKS];
+} g_sdio;
+
+void rp2040_sdio_dma_irq();
+
+/*******************************************************
+ * Checksum algorithms
+ *******************************************************/
+
+// Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
+// Usage:
+//    uint8_t crc = 0;
+//    crc = crc7_table[crc ^ byte];
+//    .. repeat for every byte ..
+static const uint8_t crc7_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
+};
+
+// Calculate the CRC16 checksum for parallel 4 bit lines separately.
+// When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
+// is applied to each line separately and generates total of
+// 4 x 16 = 64 bits of checksum.
+__attribute__((optimize("O3")))
+uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
+{
+    uint64_t crc = 0;
+    uint32_t *end = data + num_words;
+    while (data < end)
+    {
+        for (int unroll = 0; unroll < 4; unroll++)
+        {
+            // Each 32-bit word contains 8 bits per line.
+            // Reverse the bytes because SDIO protocol is big-endian.
+            uint32_t data_in = __builtin_bswap32(*data++);
+
+            // Shift out 8 bits for each line
+            uint32_t data_out = crc >> 32;
+            crc <<= 32;
+
+            // XOR outgoing data to itself with 4 bit delay
+            data_out ^= (data_out >> 16);
+
+            // XOR incoming data to outgoing data with 4 bit delay
+            data_out ^= (data_in >> 16);
+
+            // XOR outgoing and incoming data to accumulator at each tap
+            uint64_t xorred = data_out ^ data_in;
+            crc ^= xorred;
+            crc ^= xorred << (5 * 4);
+            crc ^= xorred << (12 * 4);
+        }
+    }
+
+    return crc;
+}
+
+/*******************************************************
+ * Basic SDIO command execution
+ *******************************************************/
+
+static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
+{
+    // bluedbg("SDIO Command: ", (int)command, " arg ", arg);
+
+    // Format the arguments in the way expected by the PIO code.
+    uint32_t word0 =
+        (47 << 24) | // Number of bits in command minus one
+        ( 1 << 22) | // Transfer direction from host to card
+        (command << 16) | // Command byte
+        (((arg >> 24) & 0xFF) << 8) | // MSB byte of argument
+        (((arg >> 16) & 0xFF) << 0);
+
+    uint32_t word1 =
+        (((arg >> 8) & 0xFF) << 24) |
+        (((arg >> 0) & 0xFF) << 16) | // LSB byte of argument
+        ( 1 << 8); // End bit
+
+    // Set number of bits in response minus one, or leave at 0 if no response expected
+    if (response_bits)
+    {
+        word1 |= ((response_bits - 1) << 0);
+    }
+
+    // Calculate checksum in the order that the bytes will be transmitted (big-endian)
+    uint8_t crc = 0;
+    crc = crc7_table[crc ^ ((word0 >> 16) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  8) & 0xFF)];
+    crc = crc7_table[crc ^ ((word0 >>  0) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 24) & 0xFF)];
+    crc = crc7_table[crc ^ ((word1 >> 16) & 0xFF)];
+    word1 |= crc << 8;
+
+    // Transmit command
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
+}
+
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, response ? 48 : 0);
+
+    // Wait for response
+    uint32_t start = millis();
+    uint32_t wait_words = response ? 2 : 1;
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < wait_words)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            bluedbg("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    if (response)
+    {
+        // Read out response packet
+        uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        // bluedbg("SDIO R1 response: ", resp0, " ", resp1);
+
+        // Calculate response checksum
+        uint8_t crc = 0;
+        crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  8) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp0 >>  0) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp1 >>  8) & 0xFF)];
+
+        uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
+        if (crc != actual_crc)
+        {
+            bluedbg("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+            return SDIO_ERR_RESPONSE_CRC;
+        }
+
+        uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
+        if (response_cmd != command && command != 41)
+        {
+            bluedbg("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
+            return SDIO_ERR_RESPONSE_CODE;
+        }
+
+        *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    }
+    else
+    {
+        // Read out dummy marker
+        pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
+{
+    // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t response_buf[5];
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
+
+    sdio_send_command(command, arg, 136);
+
+    uint32_t start = millis();
+    while (dma_channel_is_busy(SDIO_DMA_CH))
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            bluedbg("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            dma_channel_abort(SDIO_DMA_CH);
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    dma_channel_abort(SDIO_DMA_CH);
+
+    // Copy the response payload to output buffer
+    response[0]  = ((response_buf[0] >> 16) & 0xFF);
+    response[1]  = ((response_buf[0] >>  8) & 0xFF);
+    response[2]  = ((response_buf[0] >>  0) & 0xFF);
+    response[3]  = ((response_buf[1] >> 24) & 0xFF);
+    response[4]  = ((response_buf[1] >> 16) & 0xFF);
+    response[5]  = ((response_buf[1] >>  8) & 0xFF);
+    response[6]  = ((response_buf[1] >>  0) & 0xFF);
+    response[7]  = ((response_buf[2] >> 24) & 0xFF);
+    response[8]  = ((response_buf[2] >> 16) & 0xFF);
+    response[9]  = ((response_buf[2] >>  8) & 0xFF);
+    response[10] = ((response_buf[2] >>  0) & 0xFF);
+    response[11] = ((response_buf[3] >> 24) & 0xFF);
+    response[12] = ((response_buf[3] >> 16) & 0xFF);
+    response[13] = ((response_buf[3] >>  8) & 0xFF);
+    response[14] = ((response_buf[3] >>  0) & 0xFF);
+    response[15] = ((response_buf[4] >>  0) & 0xFF);
+
+    // Calculate checksum of the payload
+    uint8_t crc = 0;
+    for (int i = 0; i < 15; i++)
+    {
+        crc = crc7_table[crc ^ response[i]];
+    }
+
+    uint8_t actual_crc = response[15] & 0xFE;
+    if (crc != actual_crc)
+    {
+        bluedbg("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+        return SDIO_ERR_RESPONSE_CRC;
+    }
+
+    uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
+    if (response_cmd != 0x3F)
+    {
+        bluedbg("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
+        return SDIO_ERR_RESPONSE_CODE;
+    }
+
+    return SDIO_OK;
+}
+
+
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, 48);
+
+    // Wait for response
+    uint32_t start = millis();
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            bluedbg("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    // Read out response packet
+    uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    // bluedbg("SDIO R3 response: ", resp0, " ", resp1);
+
+    return SDIO_OK;
+}
+
+/*******************************************************
+ * Data reception from SD card
+ *******************************************************/
+
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_RX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Create DMA block descriptors to store each block of 512 bytes of data to buffer
+    // and then 8 bytes to g_sdio.received_checksums.
+    for (int i = 0; i < num_blocks; i++)
+    {
+        g_sdio.dma_blocks[i * 2].write_addr = buffer + i * SDIO_BLOCK_SIZE;
+        g_sdio.dma_blocks[i * 2].transfer_count = SDIO_BLOCK_SIZE / sizeof(uint32_t);
+
+        g_sdio.dma_blocks[i * 2 + 1].write_addr = &g_sdio.received_checksums[i];
+        g_sdio.dma_blocks[i * 2 + 1].transfer_count = 2;
+    }
+    g_sdio.dma_blocks[num_blocks * 2].write_addr = 0;
+    g_sdio.dma_blocks[num_blocks * 2].transfer_count = 0;
+
+    // Configure first DMA channel for reading from the PIO RX fifo
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
+
+    // Configure second DMA channel for reconfiguring the first one
+    dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_ring(&dmacfg, true, 3);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &dma_hw->ch[SDIO_DMA_CH].al1_write_addr,
+        g_sdio.dma_blocks, 2, false);
+
+    // Initialize PIO state machine
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+
+    // Write number of nibbles to receive to Y register
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, SDIO_BLOCK_SIZE * 2 + 16 - 1);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+
+    // Enable RX FIFO join because we don't need the TX FIFO during transfer.
+    // This gives more leeway for the DMA block switching
+    SDIO_PIO->sm[SDIO_DATA_SM].shiftctrl |= PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS;
+
+    // Start PIO and DMA
+    dma_channel_start(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+
+    return SDIO_OK;
+}
+
+// Check checksums for received blocks
+static void sdio_verify_rx_checksums(uint32_t maxcount)
+{
+    while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
+    {
+        // Calculate checksum from received data
+        int blockidx = g_sdio.blocks_checksumed++;
+        uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                     SDIO_WORDS_PER_BLOCK);
+
+        // Convert received checksum to little-endian format
+        uint32_t top = __builtin_bswap32(g_sdio.received_checksums[blockidx].top);
+        uint32_t bottom = __builtin_bswap32(g_sdio.received_checksums[blockidx].bottom);
+        uint64_t expected = ((uint64_t)top << 32) | bottom;
+
+        if (checksum != expected)
+        {
+            g_sdio.checksum_errors++;
+            if (g_sdio.checksum_errors == 1)
+            {
+                bluelog("SDIO checksum error in reception: block ", blockidx,
+                      " calculated ", checksum, " expected ", expected);
+            }
+        }
+    }
+}
+
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
+{
+    // Was everything done when the previous rx_poll() finished?
+    if (g_sdio.blocks_done >= g_sdio.total_blocks)
+    {
+        g_sdio.transfer_state = SDIO_IDLE;
+    }
+    else
+    {
+        // Use the idle time to calculate checksums
+        sdio_verify_rx_checksums(4);
+
+        // Check how many DMA control blocks have been consumed
+        uint32_t dma_ctrl_block_count = (dma_hw->ch[SDIO_DMA_CHB].read_addr - (uint32_t)&g_sdio.dma_blocks);
+        dma_ctrl_block_count /= sizeof(g_sdio.dma_blocks[0]);
+
+        // Compute how many complete 512 byte SDIO blocks have been transferred
+        // When transfer ends, dma_ctrl_block_count == g_sdio.total_blocks * 2 + 1
+        g_sdio.blocks_done = (dma_ctrl_block_count - 1) / 2;
+
+        // NOTE: When all blocks are done, rx_poll() still returns SDIO_BUSY once.
+        // This provides a chance to start the SCSI transfer before the last checksums
+        // are computed. Any checksum failures can be indicated in SCSI status after
+        // the data transfer has finished.
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        // Verify all remaining checksums.
+        sdio_verify_rx_checksums(g_sdio.total_blocks);
+
+        if (g_sdio.checksum_errors == 0)
+            return SDIO_OK;
+        else
+            return SDIO_ERR_DATA_CRC;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        bluedbg("rp2040_sdio_rx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+
+/*******************************************************
+ * Data transmission to SD card
+ *******************************************************/
+
+static void sdio_start_next_block_tx()
+{
+    // Initialize PIO
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+
+    // Configure DMA to send the data block payload (512 bytes)
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, false);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, true));
+    channel_config_set_bswap(&dmacfg, true);
+    channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
+        SDIO_WORDS_PER_BLOCK, false);
+
+    // Prepare second DMA channel to send the CRC and block end marker
+    uint64_t crc = g_sdio.next_wr_block_checksum;
+    g_sdio.end_token_buf[0] = (uint32_t)(crc >> 32);
+    g_sdio.end_token_buf[1] = (uint32_t)(crc >>  0);
+    g_sdio.end_token_buf[2] = 0xFFFFFFFF;
+    channel_config_set_bswap(&dmacfg, false);
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.end_token_buf, 3, false);
+
+    // Enable IRQ to trigger when block is done
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
+
+    // Initialize register X with nibble count and register Y with response bit count
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 1048);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_x, 32));
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 31);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+
+    // Initialize pins to output and high
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pins, 15));
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pindirs, 15));
+
+    // Write start token and start the DMA transfer.
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFF0);
+    dma_channel_start(SDIO_DMA_CH);
+
+    // Start state machine
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+}
+
+static void sdio_compute_next_tx_checksum()
+{
+    assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed < g_sdio.total_blocks);
+    int blockidx = g_sdio.blocks_checksumed++;
+    g_sdio.next_wr_block_checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
+                                                             SDIO_WORDS_PER_BLOCK);
+}
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_TX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Compute first block checksum
+    sdio_compute_next_tx_checksum();
+
+    // Start first DMA transfer and PIO
+    sdio_start_next_block_tx();
+
+    if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+    {
+        // Precompute second block checksum
+        sdio_compute_next_tx_checksum();
+    }
+
+    return SDIO_OK;
+}
+
+sdio_status_t check_sdio_write_response(uint32_t card_response)
+{
+    // Shift card response until top bit is 0 (the start bit)
+    // The format of response is poorly documented in SDIO spec but refer to e.g.
+    // http://my-cool-projects.blogspot.com/2013/02/the-mysterious-sd-card-crc-status.html
+    uint32_t resp = card_response;
+    if (!(~resp & 0xFFFF0000)) resp <<= 16;
+    if (!(~resp & 0xFF000000)) resp <<= 8;
+    if (!(~resp & 0xF0000000)) resp <<= 4;
+    if (!(~resp & 0xC0000000)) resp <<= 2;
+    if (!(~resp & 0x80000000)) resp <<= 1;
+
+    uint32_t wr_status = (resp >> 28) & 7;
+
+    if (wr_status == 2)
+    {
+        return SDIO_OK;
+    }
+    else if (wr_status == 5)
+    {
+        bluelog("SDIO card reports write CRC error, status ", card_response);
+        return SDIO_ERR_WRITE_CRC;
+    }
+    else if (wr_status == 6)
+    {
+        bluelog("SDIO card reports write failure, status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;
+    }
+    else
+    {
+        bluelog("SDIO card reports unknown write status ", card_response);
+        return SDIO_ERR_WRITE_FAIL;
+    }
+}
+
+// When a block finishes, this IRQ handler starts the next one
+static void rp2040_sdio_tx_irq()
+{
+    dma_hw->ints1 = 1 << SDIO_DMA_CHB;
+
+    if (g_sdio.transfer_state == SDIO_TX)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CH) && !dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            // Main data transfer is finished now.
+            // When card is ready, PIO will put card response on RX fifo
+            g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
+            if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_DATA_SM))
+            {
+                // Card is already idle
+                g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+            }
+            else
+            {
+                // Use DMA to wait for the response
+                dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+                channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+                channel_config_set_read_increment(&dmacfg, false);
+                channel_config_set_write_increment(&dmacfg, false);
+                channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+                dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
+                    &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_DATA_SM], 1, true);
+            }
+        }
+    }
+
+    if (g_sdio.transfer_state == SDIO_TX_WAIT_IDLE)
+    {
+        if (!dma_channel_is_busy(SDIO_DMA_CHB))
+        {
+            g_sdio.wr_status = check_sdio_write_response(g_sdio.card_response);
+
+            if (g_sdio.wr_status != SDIO_OK)
+            {
+                rp2040_sdio_stop();
+                return;
+            }
+
+            g_sdio.blocks_done++;
+            if (g_sdio.blocks_done < g_sdio.total_blocks)
+            {
+                sdio_start_next_block_tx();
+                g_sdio.transfer_state = SDIO_TX;
+
+                if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
+                {
+                    // Precompute the CRC for next block so that it is ready when
+                    // we want to send it.
+                    sdio_compute_next_tx_checksum();
+                }
+            }
+            else
+            {
+                rp2040_sdio_stop();
+            }
+        }
+    }
+}
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
+{
+    if (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk)
+    {
+        // Verify that IRQ handler gets called even if we are in hardfault handler
+        rp2040_sdio_tx_irq();
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        rp2040_sdio_stop();
+        return g_sdio.wr_status;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        bluedbg("rp2040_sdio_tx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_tx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop()
+{
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+    g_sdio.transfer_state = SDIO_IDLE;
+    return SDIO_OK;
+}
+
+void rp2040_sdio_init(int clock_divider)
+{
+    // Mark resources as being in use, unless it has been done already.
+    static bool resources_claimed = false;
+    if (!resources_claimed)
+    {
+        pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
+        pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
+        dma_channel_claim(SDIO_DMA_CH);
+        dma_channel_claim(SDIO_DMA_CHB);
+        resources_claimed = true;
+    }
+
+    memset(&g_sdio, 0, sizeof(g_sdio));
+
+    dma_channel_abort(SDIO_DMA_CH);
+    dma_channel_abort(SDIO_DMA_CHB);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SDIO_PIO);
+
+    // Command & clock state machine
+    g_sdio.pio_cmd_clk_offset = pio_add_program(SDIO_PIO, &sdio_cmd_clk_program);
+    pio_sm_config cfg = sdio_cmd_clk_program_get_default_config(g_sdio.pio_cmd_clk_offset);
+    sm_config_set_out_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_in_pins(&cfg, SDIO_CMD);
+    sm_config_set_set_pins(&cfg, SDIO_CMD, 1);
+    sm_config_set_jmp_pin(&cfg, SDIO_CMD);
+    sm_config_set_sideset_pins(&cfg, SDIO_CLK);
+    sm_config_set_out_shift(&cfg, false, true, 32);
+    sm_config_set_in_shift(&cfg, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&cfg, clock_divider, 0);
+    sm_config_set_mov_status(&cfg, STATUS_TX_LESSTHAN, 2);
+
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_clk_offset, &cfg);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
+
+    // Data reception program
+    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
+    g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_rx, clock_divider, 0);
+
+    // Data transmission program
+    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
+    g_sdio.pio_cfg_data_tx = sdio_data_tx_program_get_default_config(g_sdio.pio_data_tx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0);
+    sm_config_set_set_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_tx, false, false, 32);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
+    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_tx, clock_divider, 0);
+
+    // Disable SDIO pins input synchronizer.
+    // This reduces input delay.
+    // Because the CLK is driven synchronously to CPU clock,
+    // there should be no metastability problems.
+    SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK) | (1 << SDIO_CMD)
+                                 | (1 << SDIO_D0) | (1 << SDIO_D1) | (1 << SDIO_D2) | (1 << SDIO_D3);
+
+    // Redirect GPIOs to PIO
+    gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
+
+    // Set up IRQ handler when DMA completes.
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
+    irq_set_enabled(DMA_IRQ_1, true);
+}

+ 52 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.h

@@ -0,0 +1,52 @@
+// SD card access using SDIO for RP2040 platform.
+// This module contains the low-level SDIO bus implementation using
+// the PIO peripheral. The high-level commands are in sd_card_sdio.cpp.
+
+#pragma once
+#include <stdint.h>
+
+enum sdio_status_t {
+    SDIO_OK = 0,
+    SDIO_BUSY = 1,
+    SDIO_ERR_RESPONSE_TIMEOUT = 2, // Timed out waiting for response from card
+    SDIO_ERR_RESPONSE_CRC = 3,     // Response CRC is wrong
+    SDIO_ERR_RESPONSE_CODE = 4,    // Response command code does not match what was sent
+    SDIO_ERR_DATA_TIMEOUT = 5,     // Timed out waiting for data block
+    SDIO_ERR_DATA_CRC = 6,         // CRC for data packet is wrong
+    SDIO_ERR_WRITE_CRC = 7,        // Card reports bad CRC for write
+    SDIO_ERR_WRITE_FAIL = 8,       // Card reports write failure
+};
+
+#define SDIO_BLOCK_SIZE 512
+#define SDIO_WORDS_PER_BLOCK 128
+
+// Execute a command that has 48-bit reply (response types R1, R6, R7)
+// If response is NULL, does not wait for reply.
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Execute a command that has 136-bit reply (response type R2)
+// Response buffer should have space for 16 bytes (the 128 bit payload)
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t *response);
+
+// Execute a command that has 48-bit reply but without CRC (response R3)
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response);
+
+// Start transferring data from SD card to memory buffer
+// Transfer block size is always 512 bytes.
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks);
+
+// Check if reception is complete
+// Returns SDIO_BUSY while transferring, SDIO_OK when done and error on failure.
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete = nullptr);
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks);
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete = nullptr);
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop();
+
+// (Re)initialize the SDIO interface
+void rp2040_sdio_init(int clock_divider = 1);

+ 145 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.pio

@@ -0,0 +1,145 @@
+; RP2040 PIO program for implementing SD card access in SDIO mode
+; Run "pioasm rp2040_sdio.pio rp2040_sdio.pio.h" to regenerate the C header from this.
+
+; The RP2040 official work-in-progress code at
+; https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
+; may be useful reference, but this is independent implementation.
+;
+; For official SDIO specifications, refer to:
+; https://www.sdcard.org/downloads/pls/
+; "SDIO Physical Layer Simplified Specification Version 8.00"
+
+; Clock settings
+; For 3.3V communication the available speeds are:
+; - Default speed: max. 25 MHz clock
+; - High speed:    max. 50 MHz clock
+;
+; From the default RP2040 clock speed of 125 MHz, the closest dividers
+; are 3 for 41.7 MHz and 5 for 25 MHz. The CPU can apply further divider
+; through state machine registers for the initial handshake.
+;
+; Because data is written on the falling edge and read on the rising
+; edge, it is preferrable to have a long 0 state and short 1 state.
+;.define CLKDIV 3
+.define CLKDIV 5
+.define D0 ((CLKDIV + 1) / 2 - 1)
+.define D1 (CLKDIV/2 - 1)
+.define SDIO_CLK_GPIO 18
+
+; State machine 0 is used to:
+; - generate continuous clock on SDIO_CLK
+; - send CMD packets
+; - receive response packets
+;
+; Pin mapping for this state machine:
+; - Sideset    : CLK
+; - IN/OUT/SET : CMD
+; - JMP_PIN    : CMD
+;
+; The commands to send are put on TX fifo and must have two words:
+; Word 0 bits 31-24: Number of bits in command minus one (usually 47)
+; Word 0 bits 23-00: First 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 31-08: Last 24 bits of the command packet, shifted out MSB first
+; Word 1 bits 07-00: Number of bits in response minus one (usually 47), or 0 if no response
+;
+; The response is put on RX fifo, starting with the MSB.
+; Partial last word will be padded with zero bits at the top.
+;
+; The state machine EXECCTRL should be set so that STATUS indicates TX FIFO < 2
+; and that AUTOPULL and AUTOPUSH are enabled.
+
+.program sdio_cmd_clk
+    .side_set 1
+
+    mov OSR, NULL       side 1 [D1]    ; Make sure OSR is full of zeros to prevent autopull
+
+wait_cmd:
+    mov Y, !STATUS      side 0 [D0]    ; Check if TX FIFO has data
+    jmp !Y wait_cmd     side 1 [D1]
+
+load_cmd:
+    out NULL, 32        side 0 [D0]    ; Load first word (trigger autopull)
+    out X, 8            side 1 [D1]    ; Number of bits to send
+    set pins, 1         side 0 [D0]    ; Initial state of CMD is high
+    set pindirs, 1      side 1 [D1]    ; Set SDIO_CMD as output
+
+send_cmd:
+    out pins, 1         side 0 [D0]    ; Write output on falling edge of CLK
+    jmp X-- send_cmd    side 1 [D1]
+
+prep_resp:
+    set pindirs, 0      side 0 [D0]    ; Set SDIO_CMD as input
+    out X, 8            side 1 [D1]    ; Get number of bits in response
+    nop                 side 0 [D0]    ; For clock alignment
+    jmp !X resp_done    side 1 [D1]    ; Check if we expect a response
+
+wait_resp:
+    nop                  side 0 [D0]
+    jmp PIN wait_resp    side 1 [D1]    ; Loop until SDIO_CMD = 0
+
+    ; Note: input bits are read at the same time as we write CLK=0.
+    ; Because the host controls the clock, the read happens before
+    ; the card sees the falling clock edge. This gives maximum time
+    ; for the data bit to settle.
+read_resp:
+    in PINS, 1          side 0 [D0]    ; Read input data bit
+    jmp X-- read_resp   side 1 [D1]    ; Loop to receive all data bits
+
+resp_done:
+    push                side 0 [D0]    ; Push the remaining part of response
+
+; State machine 1 is used to send and receive data blocks.
+; Pin mapping for this state machine:
+; - IN / OUT: SDIO_D0-D3
+; - GPIO defined at beginning of this file: SDIO_CLK
+
+; Data reception program
+; This program will wait for initial start of block token and then
+; receive a data block. The application must set number of nibbles
+; to receive minus 1 to Y register before running this program.
+.program sdio_data_rx
+
+wait_start:
+    mov X, Y                               ; Reinitialize number of nibbles to receive
+    wait 0 pin 0                           ; Wait for zero state on D0
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV-1]  ; Wait for rising edge and then whole clock cycle
+
+rx_data:
+    in PINS, 4                 [CLKDIV-2]  ; Read nibble
+    jmp X--, rx_data
+
+; Data transmission program
+;
+; Before running this program, pindirs should be set as output
+; and register X should be initialized with the number of nibbles
+; to send minus 1 (typically 8 + 1024 + 16 + 1 - 1 = 1048)
+; and register Y with the number of response bits minus 1 (typically 31).
+;
+; Words written to TX FIFO must be:
+; - Word 0: start token 0xFFFFFFF0
+; - Word 1-128: transmitted data (512 bytes)
+; - Word 129-130: CRC checksum
+; - Word 131: end token 0xFFFFFFFF
+;
+; After the card reports idle status, RX FIFO will get a word that
+; contains the D0 line response from card.
+
+.program sdio_data_tx
+    wait 0 gpio SDIO_CLK_GPIO  
+    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV + D1 - 1]; Synchronize so that write occurs on falling edge
+
+tx_loop:
+    out PINS, 4                [D0]    ; Write nibble and wait for whole clock cycle
+    jmp X-- tx_loop            [D1]
+
+    set pindirs, 0x00          [D0]    ; Set data bus as input
+
+.wrap_target
+response_loop:
+    in PINS, 1                 [D1]    ; Read D0 on rising edge
+    jmp Y--, response_loop     [D0]
+
+wait_idle:
+    wait 1 pin 0               [D1]    ; Wait for card to indicate idle condition
+    push                       [D0]    ; Push the response token
+.wrap

+ 121 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.pio.h

@@ -0,0 +1,121 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ------------ //
+// sdio_cmd_clk //
+// ------------ //
+
+#define sdio_cmd_clk_wrap_target 0
+#define sdio_cmd_clk_wrap 17
+
+static const uint16_t sdio_cmd_clk_program_instructions[] = {
+            //     .wrap_target
+    0xb1e3, //  0: mov    osr, null       side 1 [1] 
+    0xa24d, //  1: mov    y, !status      side 0 [2] 
+    0x1161, //  2: jmp    !y, 1           side 1 [1] 
+    0x6260, //  3: out    null, 32        side 0 [2] 
+    0x7128, //  4: out    x, 8            side 1 [1] 
+    0xe201, //  5: set    pins, 1         side 0 [2] 
+    0xf181, //  6: set    pindirs, 1      side 1 [1] 
+    0x6201, //  7: out    pins, 1         side 0 [2] 
+    0x1147, //  8: jmp    x--, 7          side 1 [1] 
+    0xe280, //  9: set    pindirs, 0      side 0 [2] 
+    0x7128, // 10: out    x, 8            side 1 [1] 
+    0xa242, // 11: nop                    side 0 [2] 
+    0x1131, // 12: jmp    !x, 17          side 1 [1] 
+    0xa242, // 13: nop                    side 0 [2] 
+    0x11cd, // 14: jmp    pin, 13         side 1 [1] 
+    0x4201, // 15: in     pins, 1         side 0 [2] 
+    0x114f, // 16: jmp    x--, 15         side 1 [1] 
+    0x8220, // 17: push   block           side 0 [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_cmd_clk_program = {
+    .instructions = sdio_cmd_clk_program_instructions,
+    .length = 18,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_cmd_clk_wrap_target, offset + sdio_cmd_clk_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_rx //
+// ------------ //
+
+#define sdio_data_rx_wrap_target 0
+#define sdio_data_rx_wrap 4
+
+static const uint16_t sdio_data_rx_program_instructions[] = {
+            //     .wrap_target
+    0xa022, //  0: mov    x, y                       
+    0x2020, //  1: wait   0 pin, 0                   
+    0x2492, //  2: wait   1 gpio, 18             [4] 
+    0x4304, //  3: in     pins, 4                [3] 
+    0x0043, //  4: jmp    x--, 3                     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_rx_program = {
+    .instructions = sdio_data_rx_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_rx_wrap_target, offset + sdio_data_rx_wrap);
+    return c;
+}
+#endif
+
+// ------------ //
+// sdio_data_tx //
+// ------------ //
+
+#define sdio_data_tx_wrap_target 5
+#define sdio_data_tx_wrap 8
+
+static const uint16_t sdio_data_tx_program_instructions[] = {
+    0x2012, //  0: wait   0 gpio, 18                 
+    0x2592, //  1: wait   1 gpio, 18             [5] 
+    0x6204, //  2: out    pins, 4                [2] 
+    0x0142, //  3: jmp    x--, 2                 [1] 
+    0xe280, //  4: set    pindirs, 0             [2] 
+            //     .wrap_target
+    0x4101, //  5: in     pins, 1                [1] 
+    0x0285, //  6: jmp    y--, 5                 [2] 
+    0x21a0, //  7: wait   1 pin, 0               [1] 
+    0x8220, //  8: push   block                  [2] 
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program sdio_data_tx_program = {
+    .instructions = sdio_data_tx_program_instructions,
+    .length = 9,
+    .origin = -1,
+};
+
+static inline pio_sm_config sdio_data_tx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + sdio_data_tx_wrap_target, offset + sdio_data_tx_wrap);
+    return c;
+}
+#endif
+

+ 13 - 0
lib/BlueSCSI_platform_RP2040/scsi2sd_time.h

@@ -0,0 +1,13 @@
+// Timing functions for SCSI2SD.
+// This file is derived from time.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include "BlueSCSI_platform.h"
+
+#define s2s_getTime_ms() millis()
+#define s2s_elapsedTime_ms(since) ((uint32_t)(millis() - (since)))
+#define s2s_delay_ms(x) delay_ns(x * 1000000)
+#define s2s_delay_us(x) delay_ns(x * 1000)
+#define s2s_delay_ns(x) delay_ns(x)

+ 266 - 0
lib/BlueSCSI_platform_RP2040/scsiHostPhy.cpp

@@ -0,0 +1,266 @@
+#include "scsiHostPhy.h"
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include "BlueSCSI_log_trace.h"
+#include "scsi_accel_host.h"
+#include <assert.h>
+
+#include <scsi2sd.h>
+extern "C" {
+#include <scsi.h>
+}
+
+volatile int g_scsiHostPhyReset;
+
+// Release bus and pulse RST signal, initialize PHY to host mode.
+void scsiHostPhyReset(void)
+{
+    SCSI_RELEASE_OUTPUTS();
+    SCSI_ENABLE_INITIATOR();
+
+    scsi_accel_host_init();
+
+    SCSI_OUT(RST, 1);
+    delay(2);
+    SCSI_OUT(RST, 0);
+    delay(250);
+    g_scsiHostPhyReset = false;
+}
+
+// Select a device, id 0-7.
+// Returns true if the target answers to selection request.
+bool scsiHostPhySelect(int target_id)
+{
+    SCSI_RELEASE_OUTPUTS();
+
+    // We can't write individual data bus bits, so use a bit modified
+    // arbitration scheme. We always yield to any other initiator on
+    // the bus.
+    scsiLogInitiatorPhaseChange(BUS_BUSY);
+    SCSI_OUT(BSY, 1);
+    for (int wait = 0; wait < 10; wait++)
+    {
+        delayMicroseconds(1);
+
+        if (SCSI_IN_DATA() != 0)
+        {
+            bluedbg("scsiHostPhySelect: bus is busy");
+            scsiLogInitiatorPhaseChange(BUS_FREE);
+            SCSI_RELEASE_OUTPUTS();
+            return false;
+        }
+    }
+
+    // Selection phase
+    scsiLogInitiatorPhaseChange(SELECTION);
+    bluedbg("------ SELECTING ", target_id);
+    SCSI_OUT(SEL, 1);
+    delayMicroseconds(5);
+    SCSI_OUT_DATA(1 << target_id);
+    delayMicroseconds(5);
+    SCSI_OUT(BSY, 0);
+
+    // Wait for target to respond
+    for (int wait = 0; wait < 2500; wait++)
+    {
+        delayMicroseconds(100);
+        if (SCSI_IN(BSY))
+        {
+            break;
+        }
+    }
+
+    if (!SCSI_IN(BSY))
+    {
+        // No response
+        SCSI_RELEASE_OUTPUTS();
+        return false;
+    }
+
+    // We need to assert OUT_BSY to enable IO buffer U105 to read status signals.
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_OUT(BSY, 1);
+    SCSI_OUT(SEL, 0);
+    return true;
+}
+
+// Read the current communication phase as signaled by the target
+int scsiHostPhyGetPhase()
+{
+    static absolute_time_t last_online_time;
+
+    if (g_scsiHostPhyReset)
+    {
+        // Reset request from watchdog timer
+        scsiHostPhyRelease();
+        return BUS_FREE;
+    }
+
+    int phase = 0;
+    bool req_in = SCSI_IN(REQ);
+    if (SCSI_IN(CD)) phase |= __scsiphase_cd;
+    if (SCSI_IN(IO)) phase |= __scsiphase_io;
+    if (SCSI_IN(MSG)) phase |= __scsiphase_msg;
+
+    if (phase == 0 && absolute_time_diff_us(last_online_time, get_absolute_time()) > 100)
+    {
+        // Disable OUT_BSY for a short time to see if the target is still on line
+        SCSI_OUT(BSY, 0);
+        delayMicroseconds(1);
+
+        if (!SCSI_IN(BSY))
+        {
+            scsiLogInitiatorPhaseChange(BUS_FREE);
+            return BUS_FREE;
+        }
+
+        // Still online, re-enable OUT_BSY to enable IO buffers
+        SCSI_OUT(BSY, 1);
+        last_online_time = get_absolute_time();
+    }
+    else if (phase != 0)
+    {
+        last_online_time = get_absolute_time();
+    }
+
+    if (!req_in)
+    {
+        // Don't act on phase changes until target asserts request signal.
+        // This filters out any spurious changes on control signals.
+        return BUS_BUSY;
+    }
+    else
+    {
+        scsiLogInitiatorPhaseChange(phase);
+        return phase;
+    }
+}
+
+bool scsiHostRequestWaiting()
+{
+    return SCSI_IN(REQ);
+}
+
+// Blocking data transfer
+#define SCSIHOST_WAIT_ACTIVE(pin) \
+  if (!SCSI_IN(pin)) { \
+    if (!SCSI_IN(pin)) { \
+      while(!SCSI_IN(pin) && !g_scsiHostPhyReset); \
+    } \
+  }
+
+#define SCSIHOST_WAIT_INACTIVE(pin) \
+  if (SCSI_IN(pin)) { \
+    if (SCSI_IN(pin)) { \
+      while(SCSI_IN(pin) && !g_scsiHostPhyReset); \
+    } \
+  }
+
+// Write one byte to SCSI target using the handshake mechanism
+static inline void scsiHostWriteOneByte(uint8_t value)
+{
+    SCSIHOST_WAIT_ACTIVE(REQ);
+    SCSI_OUT_DATA(value);
+    delay_100ns(); // DB setup time before ACK
+    SCSI_OUT(ACK, 1);
+    SCSIHOST_WAIT_INACTIVE(REQ);
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_OUT(ACK, 0);
+}
+
+// Read one byte from SCSI target using the handshake mechanism.
+static inline uint8_t scsiHostReadOneByte(int* parityError)
+{
+    SCSIHOST_WAIT_ACTIVE(REQ);
+    uint16_t r = SCSI_IN_DATA();
+    SCSI_OUT(ACK, 1);
+    SCSIHOST_WAIT_INACTIVE(REQ);
+    SCSI_OUT(ACK, 0);
+
+    if (parityError && r != (g_scsi_parity_lookup[r & 0xFF] ^ SCSI_IO_DATA_MASK))
+    {
+        bluelog("Parity error in scsiReadOneByte(): ", (uint32_t)r);
+        *parityError = 1;
+    }
+
+    return (uint8_t)r;
+}
+
+uint32_t scsiHostWrite(const uint8_t *data, uint32_t count)
+{
+    scsiLogDataOut(data, count);
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    for (uint32_t i = 0; i < count; i++)
+    {
+        while (!SCSI_IN(REQ))
+        {
+            if (g_scsiHostPhyReset || SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+            {
+                // Target switched out of DATA_OUT mode
+                bluelog("scsiHostWrite: sent ", (int)i, " bytes, expected ", (int)count);
+                return i;
+            }
+        }
+
+        scsiHostWriteOneByte(data[i]);
+    }
+
+    return count;
+}
+
+uint32_t scsiHostRead(uint8_t *data, uint32_t count)
+{
+    int parityError = 0;
+    uint32_t fullcount = count;
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    if ((count & 1) == 0 && ((uint32_t)data & 1) == 0)
+    {
+        // Even number of bytes, use accelerated routine
+        count = scsi_accel_host_read(data, count, &parityError, &g_scsiHostPhyReset);
+    }
+    else
+    {
+        for (uint32_t i = 0; i < count; i++)
+        {
+            while (!SCSI_IN(REQ))
+            {
+                if (g_scsiHostPhyReset || !SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+                {
+                    // Target switched out of DATA_IN mode
+                    count = i;
+                }
+            }
+
+            data[i] = scsiHostReadOneByte(&parityError);
+        }
+    }
+
+    scsiLogDataIn(data, count);
+
+    if (g_scsiHostPhyReset || parityError)
+    {
+        return 0;
+    }
+    else
+    {
+        if (count < fullcount)
+        {
+            bluelog("scsiHostRead: received ", (int)count, " bytes, expected ", (int)fullcount);
+        }
+
+        return count;
+    }
+}
+
+// Release all bus signals
+void scsiHostPhyRelease()
+{
+    scsiLogInitiatorPhaseChange(BUS_FREE);
+    SCSI_RELEASE_OUTPUTS();
+}

+ 32 - 0
lib/BlueSCSI_platform_RP2040/scsiHostPhy.h

@@ -0,0 +1,32 @@
+// Host side SCSI physical interface.
+// Used in initiator to interface to an SCSI drive.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+// Request to stop activity and reset the bus
+extern volatile int g_scsiHostPhyReset;
+
+// Release bus and pulse RST signal, initialize PHY to host mode.
+void scsiHostPhyReset(void);
+
+// Select a device, id 0-7.
+// Returns true if the target answers to selection request.
+bool scsiHostPhySelect(int target_id);
+
+// Read the current communication phase as signaled by the target
+// Matches SCSI_PHASE enumeration from scsi.h.
+int scsiHostPhyGetPhase();
+
+// Returns true if the device has asserted REQ signal, i.e. data waiting
+bool scsiHostRequestWaiting();
+
+// Blocking data transfer
+// These return the actual number of bytes transferred.
+uint32_t scsiHostWrite(const uint8_t *data, uint32_t count);
+uint32_t scsiHostRead(uint8_t *data, uint32_t count);
+
+// Release all bus signals
+void scsiHostPhyRelease();

+ 366 - 0
lib/BlueSCSI_platform_RP2040/scsiPhy.cpp

@@ -0,0 +1,366 @@
+// Implements the low level interface to SCSI bus
+// Partially derived from scsiPhy.c from SCSI2SD-V6
+
+#include "scsiPhy.h"
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include "BlueSCSI_log_trace.h"
+#include "BlueSCSI_config.h"
+#include "scsi_accel_rp2040.h"
+
+#include <scsi2sd.h>
+extern "C" {
+#include <scsi.h>
+#include <scsi2sd_time.h>
+}
+
+/***********************/
+/* SCSI status signals */
+/***********************/
+
+extern "C" bool scsiStatusATN()
+{
+    return SCSI_IN(ATN);
+}
+
+extern "C" bool scsiStatusBSY()
+{
+    return SCSI_IN(BSY);
+}
+
+/************************/
+/* SCSI selection logic */
+/************************/
+
+volatile uint8_t g_scsi_sts_selection;
+volatile uint8_t g_scsi_ctrl_bsy;
+
+void scsi_bsy_deassert_interrupt()
+{
+    if (SCSI_IN(SEL) && !SCSI_IN(BSY))
+    {
+        // Check if any of the targets we simulate is selected
+        uint8_t sel_bits = SCSI_IN_DATA();
+        int sel_id = -1;
+        for (int i = 0; i < S2S_MAX_TARGETS; i++)
+        {
+            if (scsiDev.targets[i].targetId <= 7 && scsiDev.targets[i].cfg)
+            {
+                if (sel_bits & (1 << scsiDev.targets[i].targetId))
+                {
+                    sel_id = scsiDev.targets[i].targetId;
+                    break;
+                }
+            }
+        }
+
+        if (sel_id >= 0)
+        {
+            // Set ATN flag here unconditionally, real value is only known after
+            // OUT_BSY is enabled in scsiStatusSEL() below.
+            g_scsi_sts_selection = SCSI_STS_SELECTION_SUCCEEDED | SCSI_STS_SELECTION_ATN | sel_id;
+        }
+
+        // selFlag is required for Philips P2000C which releases it after 600ns
+        // without waiting for BSY.
+        // Also required for some early Mac Plus roms
+        scsiDev.selFlag = *SCSI_STS_SELECTED;
+    }
+}
+
+extern "C" bool scsiStatusSEL()
+{
+    if (g_scsi_ctrl_bsy)
+    {
+        // We don't have direct register access to BSY bit like SCSI2SD scsi.c expects.
+        // Instead update the state here.
+        // Releasing happens with bus release.
+        g_scsi_ctrl_bsy = 0;
+        SCSI_OUT(BSY, 1);
+
+        // On RP2040 hardware the ATN signal is only available after OUT_BSY enables
+        // the IO buffer U105, so check the signal status here.
+        delay_100ns();
+        if (!scsiStatusATN())
+        {
+            // This is a SCSI1 host that does send IDENTIFY message
+            scsiDev.atnFlag = 0;
+            scsiDev.target->unitAttention = 0;
+            scsiDev.compatMode = COMPAT_SCSI1;
+        }
+    }
+
+    return SCSI_IN(SEL);
+}
+
+/************************/
+/* SCSI bus reset logic */
+/************************/
+
+static void scsi_rst_assert_interrupt()
+{
+    // Glitch filtering
+    bool rst1 = SCSI_IN(RST);
+    delay_ns(500);
+    bool rst2 = SCSI_IN(RST);
+
+    if (rst1 && rst2)
+    {
+        bluedbg("BUS RESET");
+        scsiDev.resetFlag = 1;
+    }
+}
+
+static void scsiPhyIRQ(uint gpio, uint32_t events)
+{
+    if (gpio == SCSI_IN_BSY || gpio == SCSI_IN_SEL)
+    {
+        // Note BSY / SEL interrupts only when we are not driving OUT_BSY low ourselves.
+        // The BSY input pin may be shared with other signals.
+        if (sio_hw->gpio_out & (1 << SCSI_OUT_BSY))
+        {
+            scsi_bsy_deassert_interrupt();
+        }
+    }
+    else if (gpio == SCSI_IN_RST)
+    {
+        scsi_rst_assert_interrupt();
+    }
+}
+
+// This function is called to initialize the phy code.
+// It is called after power-on and after SCSI bus reset.
+extern "C" void scsiPhyReset(void)
+{
+    SCSI_RELEASE_OUTPUTS();
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+
+    scsi_accel_rp2040_init();
+
+    // Enable BSY, RST and SEL interrupts
+    // Note: RP2040 library currently supports only one callback,
+    // so it has to be same for both pins.
+    gpio_set_irq_enabled_with_callback(SCSI_IN_BSY, GPIO_IRQ_EDGE_RISE, true, scsiPhyIRQ);
+    gpio_set_irq_enabled(SCSI_IN_RST, GPIO_IRQ_EDGE_FALL, true);
+
+    // Check BSY line status when SEL goes active.
+    // This is needed to handle SCSI-1 hosts that use the single initiator mode.
+    // The host will just assert the SEL directly, without asserting BSY first.
+    gpio_set_irq_enabled(SCSI_IN_SEL, GPIO_IRQ_EDGE_FALL, true);
+}
+
+/************************/
+/* SCSI bus phase logic */
+/************************/
+
+static SCSI_PHASE g_scsi_phase;
+
+extern "C" void scsiEnterPhase(int phase)
+{
+    int delay = scsiEnterPhaseImmediate(phase);
+    if (delay > 0)
+    {
+        s2s_delay_ns(delay);
+    }
+}
+
+// Change state and return nanosecond delay to wait
+extern "C" uint32_t scsiEnterPhaseImmediate(int phase)
+{
+    if (phase != g_scsi_phase)
+    {
+        // ANSI INCITS 362-2002 SPI-3 10.7.1:
+        // Phase changes are not allowed while REQ or ACK is asserted.
+        while (likely(!scsiDev.resetFlag) && SCSI_IN(ACK)) {}
+
+        if (scsiDev.compatMode < COMPAT_SCSI2 && (phase == DATA_IN || phase == DATA_OUT))
+        {
+            // Akai S1000/S3000 seems to need extra delay before changing to data phase
+            // after a command. The code in BlueSCSI_disk.cpp tries to do this while waiting
+            // for SD card, to avoid any extra latency.
+            s2s_delay_ns(400000);
+        }
+
+        int oldphase = g_scsi_phase;
+        g_scsi_phase = (SCSI_PHASE)phase;
+        scsiLogPhaseChange(phase);
+
+        // Select between synchronous vs. asynchronous SCSI writes
+        if (g_scsi_phase == DATA_IN && scsiDev.target->syncOffset > 0)
+        {
+            scsi_accel_rp2040_setWriteMode(scsiDev.target->syncOffset, scsiDev.target->syncPeriod);
+        }
+        else
+        {
+            scsi_accel_rp2040_setWriteMode(0, 0);
+        }
+
+        if (phase < 0)
+        {
+            // Other communication on bus or reset state
+            SCSI_RELEASE_OUTPUTS();
+            return 0;
+        }
+        else
+        {
+            SCSI_OUT(MSG, phase & __scsiphase_msg);
+            SCSI_OUT(CD,  phase & __scsiphase_cd);
+            SCSI_OUT(IO,  phase & __scsiphase_io);
+            SCSI_ENABLE_CONTROL_OUT();
+
+            int delayNs = 400; // Bus settle delay
+            if ((oldphase & __scsiphase_io) != (phase & __scsiphase_io))
+            {
+                delayNs += 400; // Data release delay
+            }
+
+            if (scsiDev.compatMode < COMPAT_SCSI2)
+            {
+                // EMU EMAX needs 100uS ! 10uS is not enough.
+                delayNs += 100000;
+            }
+
+            return delayNs;
+        }
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+// Release all signals
+void scsiEnterBusFree(void)
+{
+    g_scsi_phase = BUS_FREE;
+    g_scsi_sts_selection = 0;
+    g_scsi_ctrl_bsy = 0;
+    scsiDev.cdbLen = 0;
+
+    SCSI_RELEASE_OUTPUTS();
+}
+
+/********************/
+/* Transmit to host */
+/********************/
+
+#define SCSI_WAIT_ACTIVE(pin) \
+  if (!SCSI_IN(pin)) { \
+    if (!SCSI_IN(pin)) { \
+      while(!SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+#define SCSI_WAIT_INACTIVE(pin) \
+  if (SCSI_IN(pin)) { \
+    if (SCSI_IN(pin)) { \
+      while(SCSI_IN(pin) && !scsiDev.resetFlag); \
+    } \
+  }
+
+// Write one byte to SCSI host using the handshake mechanism
+static inline void scsiWriteOneByte(uint8_t value)
+{
+    SCSI_OUT_DATA(value);
+    delay_100ns(); // DB setup time before REQ
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE(ACK);
+    SCSI_RELEASE_DATA_REQ();
+    SCSI_WAIT_INACTIVE(ACK);
+}
+
+extern "C" void scsiWriteByte(uint8_t value)
+{
+    scsiLogDataIn(&value, 1);
+    scsiWriteOneByte(value);
+}
+
+extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
+{
+    scsiStartWrite(data, count);
+    scsiFinishWrite();
+}
+
+extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
+{
+    scsiLogDataIn(data, count);
+
+    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
+    {
+        // Unaligned write, do it byte-by-byte
+        scsiFinishWrite();
+        for (uint32_t i = 0; i < count; i++)
+        {
+            if (scsiDev.resetFlag) break;
+            scsiWriteOneByte(data[i]);
+        }
+    }
+    else
+    {
+        // Use accelerated routine
+        scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
+    }
+}
+
+extern "C" bool scsiIsWriteFinished(const uint8_t *data)
+{
+    return scsi_accel_rp2040_isWriteFinished(data);
+}
+
+extern "C" void scsiFinishWrite()
+{
+    scsi_accel_rp2040_finishWrite(&scsiDev.resetFlag);
+}
+
+/*********************/
+/* Receive from host */
+/*********************/
+
+// Read one byte from SCSI host using the handshake mechanism.
+static inline uint8_t scsiReadOneByte(int* parityError)
+{
+    SCSI_OUT(REQ, 1);
+    SCSI_WAIT_ACTIVE(ACK);
+    delay_100ns();
+    uint16_t r = SCSI_IN_DATA();
+    SCSI_OUT(REQ, 0);
+    SCSI_WAIT_INACTIVE(ACK);
+
+    if (parityError && r != (g_scsi_parity_lookup[r & 0xFF] ^ SCSI_IO_DATA_MASK))
+    {
+        bluelog("Parity error in scsiReadOneByte(): ", (uint32_t)r);
+        *parityError = 1;
+    }
+
+    return (uint8_t)r;
+}
+
+extern "C" uint8_t scsiReadByte(void)
+{
+    uint8_t r = scsiReadOneByte(NULL);
+    scsiLogDataOut(&r, 1);
+    return r;
+}
+
+extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
+{
+    *parityError = 0;
+
+    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
+    {
+        // Unaligned transfer, do byte by byte
+        for (uint32_t i = 0; i < count; i++)
+        {
+            if (scsiDev.resetFlag) break;
+            data[i] = scsiReadOneByte(parityError);
+        }
+    }
+    else
+    {
+        // Use accelerated routine
+        scsi_accel_rp2040_read(data, count, parityError, &scsiDev.resetFlag);
+    }
+
+    scsiLogDataOut(data, count);
+}

+ 67 - 0
lib/BlueSCSI_platform_RP2040/scsiPhy.h

@@ -0,0 +1,67 @@
+// Interface to SCSI physical interface.
+// This file is derived from scsiPhy.h in SCSI2SD-V6.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Read SCSI status signals
+bool scsiStatusATN();
+bool scsiStatusBSY();
+bool scsiStatusSEL();
+
+// Parity not yet implemented
+#define scsiParityError() 0
+
+// Get SCSI selection status.
+// This is latched by interrupt when BSY is deasserted while SEL is asserted.
+// Lowest 3 bits are the selected target id.
+// Highest bits are status information.
+#define SCSI_STS_SELECTION_SUCCEEDED 0x40
+#define SCSI_STS_SELECTION_ATN 0x80
+extern volatile uint8_t g_scsi_sts_selection;
+#define SCSI_STS_SELECTED (&g_scsi_sts_selection)
+extern volatile uint8_t g_scsi_ctrl_bsy;
+#define SCSI_CTRL_BSY (&g_scsi_ctrl_bsy)
+
+// Called when SCSI RST signal has been asserted, should release bus.
+void scsiPhyReset(void);
+
+// Change MSG / CD / IO signal states and wait for necessary transition time.
+// Phase argument is one of SCSI_PHASE enum values.
+void scsiEnterPhase(int phase);
+
+// Change state and return nanosecond delay to wait
+uint32_t scsiEnterPhaseImmediate(int phase);
+
+// Release all signals
+void scsiEnterBusFree(void);
+
+// Blocking data transfer
+void scsiWrite(const uint8_t* data, uint32_t count);
+void scsiRead(uint8_t* data, uint32_t count, int* parityError);
+void scsiWriteByte(uint8_t value);
+uint8_t scsiReadByte(void);
+
+// Non-blocking data transfer.
+// Depending on platform support the start() function may block.
+// The start function can be called multiple times, it may internally
+// either combine transfers or block until previous transfer completes.
+void scsiStartWrite(const uint8_t* data, uint32_t count);
+void scsiFinishWrite();
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsiIsWriteFinished(const uint8_t *data);
+
+
+#define s2s_getScsiRateKBs() 0
+
+#ifdef __cplusplus
+}
+#endif

+ 68 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel.pio

@@ -0,0 +1,68 @@
+; RP2040 PIO program for accelerating SCSI communication
+; Run "pioasm scsi_accel.pio scsi_accel.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is REQ pin
+
+.define REQ 9
+.define ACK 10
+
+; Delay from data setup to REQ assertion.
+; deskew delay + cable skew delay = 55 ns minimum
+; One clock cycle is 8 ns => delay 7 clocks
+.define REQ_DLY 7
+
+; Write to SCSI bus using asynchronous handshake.
+; Data is written as 16-bit words that contain the 8 data bits + 1 parity bit.
+; 7 bits in each word are discarded.
+; Number of bytes to send must be multiple of 2.
+.program scsi_accel_async_write
+    .side_set 1
+
+    pull ifempty block          side 1  ; Get data from TX FIFO
+    out pins, 9                 side 1  ; Write data and parity bit
+    out null, 7 [REQ_DLY-2]     side 1  ; Discard unused bits, wait for data preset time
+    wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+
+; Read from SCSI bus using asynchronous handshake.
+; Also works for synchronous mode down to 50 ns transfer period.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_accel_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 1 gpio ACK             side 1  ; Wait for ACK high
+    wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
+    in pins, 9                  side 1  ; Deassert REQ, read GPIO
+    in null, 7                  side 1  ; Padding bits
+    jmp x-- start               side 1  ; Decrement byte count and jump to start
+
+; Data state machine for synchronous writes.
+; Takes the lowest 9 bits of each 16 bit word and writes them to bus with REQ pulse.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+;
+; Shifts one bit to ISR per every byte transmitted. This is used to control the transfer
+; pace, the RX fifo acts as a counter to keep track of unacknowledged bytes. The C code
+; can set the syncOffset by changing autopush threshold, e.g. threshold 3 = 12 bytes offset.
+.program scsi_sync_write
+    .side_set 1
+
+    out pins, 9     [0]         side 1  ; Write data and parity bit, wait for deskew delay
+    out null, 7     [0]         side 0  ; Assert REQ, wait for assert time
+    in null, 1      [0]         side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
+
+; Data pacing state machine for synchronous writes.
+; Takes one bit from ISR on every falling edge of ACK.
+; The C code should set autopull threshold to match scsi_sync_write autopush threshold.
+; System DMA will then move words from scsi_sync_write RX fifo to scsi_sync_write_pacer TX fifo.
+.program scsi_sync_write_pacer
+    wait 1 gpio ACK
+    wait 0 gpio ACK   ; Wait for falling edge on ACK
+    out null, 1       ; Let scsi_sync_write send one more byte

+ 134 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel.pio.h

@@ -0,0 +1,134 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ---------------------- //
+// scsi_accel_async_write //
+// ---------------------- //
+
+#define scsi_accel_async_write_wrap_target 0
+#define scsi_accel_async_write_wrap 4
+
+static const uint16_t scsi_accel_async_write_program_instructions[] = {
+            //     .wrap_target
+    0x90e0, //  0: pull   ifempty block   side 1     
+    0x7009, //  1: out    pins, 9         side 1     
+    0x7567, //  2: out    null, 7         side 1 [5] 
+    0x308a, //  3: wait   1 gpio, 10      side 1     
+    0x200a, //  4: wait   0 gpio, 10      side 0     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_write_program = {
+    .instructions = scsi_accel_async_write_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_write_wrap_target, offset + scsi_accel_async_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------------- //
+// scsi_accel_async_read //
+// --------------------- //
+
+#define scsi_accel_async_read_wrap_target 0
+#define scsi_accel_async_read_wrap 6
+
+static const uint16_t scsi_accel_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x308a, //  2: wait   1 gpio, 10      side 1     
+    0x200a, //  3: wait   0 gpio, 10      side 0     
+    0x5009, //  4: in     pins, 9         side 1     
+    0x5067, //  5: in     null, 7         side 1     
+    0x1042, //  6: jmp    x--, 2          side 1
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_accel_async_read_program = {
+    .instructions = scsi_accel_async_read_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_accel_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_accel_async_read_wrap_target, offset + scsi_accel_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------- //
+// scsi_sync_write //
+// --------------- //
+
+#define scsi_sync_write_wrap_target 0
+#define scsi_sync_write_wrap 2
+
+static const uint16_t scsi_sync_write_program_instructions[] = {
+            //     .wrap_target
+    0x7009, //  0: out    pins, 9         side 1
+    0x6067, //  1: out    null, 7         side 0
+    0x5061, //  2: in     null, 1         side 1
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_program = {
+    .instructions = scsi_sync_write_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_wrap_target, offset + scsi_sync_write_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// --------------------- //
+// scsi_sync_write_pacer //
+// --------------------- //
+
+#define scsi_sync_write_pacer_wrap_target 0
+#define scsi_sync_write_pacer_wrap 2
+
+static const uint16_t scsi_sync_write_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x208a, //  0: wait   1 gpio, 10
+    0x200a, //  1: wait   0 gpio, 10
+    0x6061, //  2: out    null, 1
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_write_pacer_program = {
+    .instructions = scsi_sync_write_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_write_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_write_pacer_wrap_target, offset + scsi_sync_write_pacer_wrap);
+    return c;
+}
+#endif

+ 141 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_host.cpp

@@ -0,0 +1,141 @@
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#include "scsi_accel_host.h"
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include "scsi_accel_host.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+#include <hardware/sync.h>
+
+#define SCSI_PIO pio0
+#define SCSI_SM 0
+
+static struct {
+    // PIO configurations
+    uint32_t pio_offset_async_read;
+    pio_sm_config pio_cfg_async_read;
+} g_scsi_host;
+
+enum scsidma_state_t { SCSIHOST_IDLE = 0,
+                       SCSIHOST_READ };
+static volatile scsidma_state_t g_scsi_host_state;
+
+static void scsi_accel_host_config_gpio()
+{
+    if (g_scsi_host_state == SCSIHOST_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_host_state == SCSIHOST_READ)
+    {
+        // Data bus and REQ as input, ACK pin as output
+        pio_sm_set_pins(SCSI_PIO, SCSI_SM, 0x7FF);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, 0, 10, false);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, 10, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+uint32_t scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // Currently this method just reads from the PIO RX fifo directly in software loop.
+    // The SD card access is parallelized using DMA, so there is limited benefit from using DMA here.
+    g_scsi_host_state = SCSIHOST_READ;
+
+    int cd_start = SCSI_IN(CD);
+    int msg_start = SCSI_IN(MSG);
+
+    pio_sm_init(SCSI_PIO, SCSI_SM, g_scsi_host.pio_offset_async_read, &g_scsi_host.pio_cfg_async_read);
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, true);
+
+    // Set the number of bytes to read, must be divisible by 2.
+    assert((count & 1) == 0);
+    pio_sm_put(SCSI_PIO, SCSI_SM, count - 1);
+
+    // Read results from PIO RX FIFO
+    uint8_t *dst = buf;
+    uint8_t *end = buf + count;
+    uint32_t paritycheck = 0;
+    while (dst < end)
+    {
+        uint32_t available = pio_sm_get_rx_fifo_level(SCSI_PIO, SCSI_SM);
+
+        if (available == 0)
+        {
+            if (*resetFlag || !SCSI_IN(IO) || SCSI_IN(CD) != cd_start || SCSI_IN(MSG) != msg_start)
+            {
+                // Target switched out of DATA_IN mode
+                count = dst - buf;
+                break;
+            }
+        }
+
+        while (available > 0)
+        {
+            available--;
+            uint32_t word = pio_sm_get(SCSI_PIO, SCSI_SM);
+            paritycheck ^= word;
+            word = ~word;
+            *dst++ = word & 0xFF;
+            *dst++ = word >> 16;
+        }
+    }
+
+    // Check parity errors in whole block
+    // This doesn't detect if there is even number of parity errors in block.
+    uint8_t byte0 = ~(paritycheck & 0xFF);
+    uint8_t byte1 = ~(paritycheck >> 16);
+    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    {
+        bluelog("Parity error in scsi_accel_host_read(): ", paritycheck);
+        *parityError = 1;
+    }
+
+    g_scsi_host_state = SCSIHOST_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, false);
+
+    return count;
+}
+
+
+void scsi_accel_host_init()
+{
+    g_scsi_host_state = SCSIHOST_IDLE;
+    scsi_accel_host_config_gpio();
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SCSI_PIO);
+
+    // Asynchronous / synchronous SCSI read
+    g_scsi_host.pio_offset_async_read = pio_add_program(SCSI_PIO, &scsi_host_async_read_program);
+    g_scsi_host.pio_cfg_async_read = scsi_host_async_read_program_get_default_config(g_scsi_host.pio_offset_async_read);
+    sm_config_set_in_pins(&g_scsi_host.pio_cfg_async_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_host.pio_cfg_async_read, SCSI_OUT_ACK);
+    sm_config_set_out_shift(&g_scsi_host.pio_cfg_async_read, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_host.pio_cfg_async_read, true, true, 32);
+}

+ 11 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_host.h

@@ -0,0 +1,11 @@
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_host_init();
+
+// Read data from SCSI bus.
+// Number of bytes to read must be divisible by two.
+uint32_t scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);

+ 26 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_host.pio

@@ -0,0 +1,26 @@
+; RP2040 PIO program for accelerating SCSI initiator / host function
+; Run "pioasm scsi_accel_host.pio scsi_accel_host.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is ACK pin
+
+.define REQ 9
+.define ACK 10
+
+; Read from SCSI bus using asynchronous handshake.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_host_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 0 gpio REQ             side 1  ; Wait for REQ low
+    in pins, 9                  side 0  ; Assert ACK, read GPIO
+    in null, 7                  side 0  ; Padding bits
+    wait 1 gpio REQ             side 0  ; Wait for REQ high
+    jmp x-- start               side 1  ; Deassert ACK, decrement byte count and jump to start

+ 44 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_host.pio.h

@@ -0,0 +1,44 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// -------------------- //
+// scsi_host_async_read //
+// -------------------- //
+
+#define scsi_host_async_read_wrap_target 0
+#define scsi_host_async_read_wrap 6
+
+static const uint16_t scsi_host_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x3009, //  2: wait   0 gpio, 9       side 1     
+    0x4009, //  3: in     pins, 9         side 0     
+    0x4067, //  4: in     null, 7         side 0     
+    0x2089, //  5: wait   1 gpio, 9       side 0     
+    0x1042, //  6: jmp    x--, 2          side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_host_async_read_program = {
+    .instructions = scsi_host_async_read_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_host_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_host_async_read_wrap_target, offset + scsi_host_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+

+ 734 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_rp2040.cpp

@@ -0,0 +1,734 @@
+/* Data flow in SCSI acceleration:
+ *
+ * 1. Application provides a buffer of bytes to send.
+ * 2. Code in this module adds parity bit to the bytes and packs two bytes into 32 bit words.
+ * 3. DMA controller copies the words to PIO peripheral FIFO.
+ * 4. PIO peripheral handles low-level SCSI handshake and writes bytes and parity to GPIO.
+ */
+
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include "scsi_accel_rp2040.h"
+#include "scsi_accel.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+#include <hardware/sync.h>
+#include <multicore.h>
+
+#define SCSI_DMA_PIO pio0
+#define SCSI_DMA_SM 0
+#define SCSI_DMA_CH 0
+#define SCSI_DMA_SYNC_SM 1
+#define SCSI_DMA_SYNC_CH 1
+
+enum scsidma_buf_sel_t { SCSIBUF_NONE = 0, SCSIBUF_A = 1, SCSIBUF_B = 2 };
+
+#define DMA_BUF_SIZE 128
+static struct {
+    uint8_t *app_buf; // Buffer provided by application
+    uint32_t app_bytes; // Bytes available in application buffer
+    uint32_t dma_bytes; // Bytes that have been converted to DMA buffer so far
+
+    uint8_t *next_app_buf; // Next buffer from application after current one finishes
+    uint32_t next_app_bytes; // Bytes in next buffer
+
+    // Synchronous mode?
+    int syncOffset;
+    int syncPeriod;
+    int syncOffsetDivider; // Autopush/autopull threshold for the write pacer state machine
+    int syncOffsetPreload; // Number of items to preload in the RX fifo of scsi_sync_write
+
+    // PIO configurations
+    uint32_t pio_offset_async_write;
+    uint32_t pio_offset_async_read;
+    uint32_t pio_offset_sync_write_pacer;
+    uint32_t pio_offset_sync_write;
+    pio_sm_config pio_cfg_async_write;
+    pio_sm_config pio_cfg_async_read;
+    pio_sm_config pio_cfg_sync_write_pacer;
+    pio_sm_config pio_cfg_sync_write;
+
+    // DMA configurations
+    dma_channel_config dma_write_config; // Data from RAM to first state machine
+    dma_channel_config dma_write_pacer_config; // In synchronous mode only, transfer between state machines
+
+    // We use two DMA buffers alternatively
+    // The buffer contains the data bytes with parity added.
+    scsidma_buf_sel_t dma_current_buf;
+    uint32_t dma_countA;
+    uint32_t dma_countB;
+    uint32_t dma_bufA[DMA_BUF_SIZE];
+    uint32_t dma_bufB[DMA_BUF_SIZE];
+
+    // Try to offload SCSI DMA interrupts to second core if possible
+    volatile bool core1_active;
+    mutex_t mutex;
+} g_scsi_dma;
+
+enum scsidma_state_t { SCSIDMA_IDLE = 0,
+                       SCSIDMA_WRITE, SCSIDMA_WRITE_DONE,
+                       SCSIDMA_READ };
+static volatile scsidma_state_t g_scsi_dma_state;
+static bool g_channels_claimed = false;
+
+// Fill DMA buffer and return number of words ready to be transferred
+static uint32_t refill_dmabuf(uint32_t *buf)
+{
+    if (g_scsi_dma.app_bytes == 0 && g_scsi_dma.next_app_bytes > 0)
+    {
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+
+    uint32_t count = (g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes) / 2;
+    if (count > DMA_BUF_SIZE) count = DMA_BUF_SIZE;
+
+    uint16_t *src = (uint16_t*)&g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
+    uint16_t *end = src + count;
+    uint32_t *dst = buf;
+    while (src < end)
+    {
+        uint16_t input = *src++;
+        *dst++ = (g_scsi_parity_lookup[input & 0xFF])
+               | ((g_scsi_parity_lookup[input >> 8]) << 16);
+    }
+
+    g_scsi_dma.dma_bytes += count * 2;
+
+    // Check if this buffer has been fully processed
+    if (g_scsi_dma.dma_bytes >= g_scsi_dma.app_bytes)
+    {
+        assert(g_scsi_dma.dma_bytes == g_scsi_dma.app_bytes);
+        g_scsi_dma.dma_bytes = 0;
+        g_scsi_dma.app_buf = g_scsi_dma.next_app_buf;
+        g_scsi_dma.app_bytes = g_scsi_dma.next_app_bytes;
+        g_scsi_dma.next_app_buf = 0;
+        g_scsi_dma.next_app_bytes = 0;
+    }
+
+    return count;
+}
+
+// Select GPIO from PIO peripheral or from software controlled SIO
+static void scsidma_config_gpio()
+{
+    if (g_scsi_dma_state == SCSIDMA_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        // Make sure the initial state of all pins is high and output
+        pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DMA_SM, 0x3FF);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 0, 10, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_PIO0;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+    else if (g_scsi_dma_state == SCSIDMA_READ)
+    {
+        // Data bus as input, REQ pin as output
+        pio_sm_set_pins(SCSI_DMA_PIO, SCSI_DMA_SM, 0x3FF);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 0, 9, false);
+        pio_sm_set_consecutive_pindirs(SCSI_DMA_PIO, SCSI_DMA_SM, 9, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_REQ].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+static void start_dma_write()
+{
+    // Prefill both DMA buffers
+    g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+    g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+
+    if (g_scsi_dma.syncOffset == 0)
+    {
+        // Asynchronous mode
+        // Start DMA from buffer A
+        g_scsi_dma.dma_current_buf = SCSIBUF_A;
+        dma_channel_configure(SCSI_DMA_CH,
+            &g_scsi_dma.dma_write_config,
+            &SCSI_DMA_PIO->txf[SCSI_DMA_SM],
+            g_scsi_dma.dma_bufA,
+            g_scsi_dma.dma_countA,
+            true
+        );
+
+        // Enable state machine
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
+    }
+    else
+    {
+        // Synchronous mode
+
+        // Start DMA transfer to move dummy bits to write pacer
+        dma_channel_configure(SCSI_DMA_SYNC_CH,
+            &g_scsi_dma.dma_write_pacer_config,
+            &SCSI_DMA_PIO->txf[SCSI_DMA_SYNC_CH],
+            &SCSI_DMA_PIO->rxf[SCSI_DMA_SM],
+            0xFFFFFFFF,
+            true
+        );
+
+        // Start DMA transfer to move data from buffer A to data writer
+        g_scsi_dma.dma_current_buf = SCSIBUF_A;
+        dma_channel_configure(SCSI_DMA_CH,
+            &g_scsi_dma.dma_write_config,
+            &SCSI_DMA_PIO->txf[SCSI_DMA_SM],
+            g_scsi_dma.dma_bufA,
+            g_scsi_dma.dma_countA,
+            true
+        );
+
+        // Enable state machines
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM, true);
+        pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
+    }
+
+}
+
+static void scsi_dma_write_irq()
+{
+    dma_hw->ints0 = 1 << SCSI_DMA_CH;
+
+    mutex_enter_blocking(&g_scsi_dma.mutex);
+
+    if (g_scsi_dma.dma_current_buf == SCSIBUF_A)
+    {
+        // Transfer from buffer A finished
+        g_scsi_dma.dma_countA = 0;
+        g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+
+        if (g_scsi_dma.dma_countB != 0)
+        {
+            // Start transferring buffer B immediately
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countB, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufB, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_B;
+
+            // Refill buffer A for next time
+            g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+        }
+    }
+    else
+    {
+        // Transfer from buffer B finished
+        g_scsi_dma.dma_countB = 0;
+        g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+
+        if (g_scsi_dma.dma_countA != 0)
+        {
+            // Start transferring buffer A immediately
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countA, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufA, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_A;
+
+            // Refill buffer B for next time
+            g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+        }
+    }
+
+    if (g_scsi_dma.dma_current_buf == SCSIBUF_NONE)
+    {
+        // Both buffers are empty, check if we have more data
+        g_scsi_dma.dma_countA = refill_dmabuf(g_scsi_dma.dma_bufA);
+
+        if (g_scsi_dma.dma_countA == 0)
+        {
+            // End of data for DMA, but PIO may still have bytes in its buffer
+            g_scsi_dma_state = SCSIDMA_WRITE_DONE;
+        }
+        else
+        {
+            // Start transfer from buffer A
+            dma_channel_set_trans_count(SCSI_DMA_CH, g_scsi_dma.dma_countA, false);
+            dma_channel_set_read_addr(SCSI_DMA_CH, g_scsi_dma.dma_bufA, true);
+            g_scsi_dma.dma_current_buf = SCSIBUF_A;
+
+            // Refill B for the next interrupt
+            g_scsi_dma.dma_countB = refill_dmabuf(g_scsi_dma.dma_bufB);
+        }
+    }
+
+    mutex_exit(&g_scsi_dma.mutex);
+}
+
+// SCSI DMA interrupts are offloaded to the second core if possible
+static void enable_irq_second_core()
+{
+    irq_set_exclusive_handler(DMA_IRQ_0, scsi_dma_write_irq);
+    irq_set_enabled(DMA_IRQ_0, true);
+    g_scsi_dma.core1_active = true;
+}
+
+// Block the SCSI DMA interrupt from executing on either core.
+// Used during setting of the buffer pointers.
+static void scsi_dma_block_irqs()
+{
+    __disable_irq();
+    mutex_enter_blocking(&g_scsi_dma.mutex);
+}
+
+static void scsi_dma_unblock_irqs()
+{
+    mutex_exit(&g_scsi_dma.mutex);
+    __enable_irq();
+}
+
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag)
+{
+    // Number of bytes should always be divisible by 2.
+    assert((count & 1) == 0);
+
+    scsi_dma_block_irqs();
+    if (g_scsi_dma_state == SCSIDMA_WRITE)
+    {
+        if (!g_scsi_dma.next_app_buf && data == g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+        {
+            // Combine with currently running request
+            g_scsi_dma.app_bytes += count;
+            count = 0;
+        }
+        else if (data == g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+        {
+            // Combine with queued request
+            g_scsi_dma.next_app_bytes += count;
+            count = 0;
+        }
+        else if (!g_scsi_dma.next_app_buf)
+        {
+            // Add as queued request
+            g_scsi_dma.next_app_buf = (uint8_t*)data;
+            g_scsi_dma.next_app_bytes = count;
+            count = 0;
+        }
+    }
+    scsi_dma_unblock_irqs();
+
+    // Check if the request was combined
+    if (count == 0) return;
+
+    if (g_scsi_dma_state != SCSIDMA_IDLE && g_scsi_dma_state != SCSIDMA_WRITE_DONE)
+    {
+        // Wait for previous request to finish
+        scsi_accel_rp2040_finishWrite(resetFlag);
+        if (*resetFlag)
+        {
+            return;
+        }
+    }
+
+    bool must_reconfig_gpio = (g_scsi_dma_state == SCSIDMA_IDLE);
+    g_scsi_dma_state = SCSIDMA_WRITE;
+    g_scsi_dma.app_buf = (uint8_t*)data;
+    g_scsi_dma.app_bytes = count;
+    g_scsi_dma.dma_bytes = 0;
+    g_scsi_dma.next_app_buf = 0;
+    g_scsi_dma.next_app_bytes = 0;
+    g_scsi_dma.dma_current_buf = SCSIBUF_NONE;
+
+    if (must_reconfig_gpio)
+    {
+        SCSI_ENABLE_DATA_OUT();
+
+        if (g_scsi_dma.syncOffset == 0)
+        {
+            // Asynchronous write
+            pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SM, g_scsi_dma.pio_offset_async_write, &g_scsi_dma.pio_cfg_async_write);
+            scsidma_config_gpio();
+        }
+        else
+        {
+            // Synchronous write
+            // First state machine writes data to SCSI bus and dummy bits to its RX fifo.
+            // Second state machine empties the dummy bits every time ACK is received, to control the transmit pace.
+            pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SM, g_scsi_dma.pio_offset_sync_write, &g_scsi_dma.pio_cfg_sync_write);
+            pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM, g_scsi_dma.pio_offset_sync_write_pacer, &g_scsi_dma.pio_cfg_sync_write_pacer);
+            scsidma_config_gpio();
+
+            // Prefill RX fifo to set the syncOffset
+            for (int i = 0; i < g_scsi_dma.syncOffsetPreload; i++)
+            {
+                pio_sm_exec(SCSI_DMA_PIO, SCSI_DMA_SM,
+                    pio_encode_push(false, false) | pio_encode_sideset(1, 1));
+            }
+
+            // Fill the pacer TX fifo
+            // DMA should start transferring only after ACK pulses are received
+            for (int i = 0; i < 4; i++)
+            {
+                pio_sm_put(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM, 0);
+            }
+
+            // Fill the pacer OSR
+            pio_sm_exec(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM,
+                pio_encode_mov(pio_osr, pio_null));
+        }
+
+        dma_channel_set_irq0_enabled(SCSI_DMA_CH, true);
+    }
+
+    start_dma_write();
+}
+
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data)
+{
+    // Check if everything has completed
+    if (g_scsi_dma_state == SCSIDMA_IDLE || g_scsi_dma_state == SCSIDMA_WRITE_DONE)
+    {
+        return true;
+    }
+
+    if (!data)
+        return false;
+
+    // Check if this data item is still in queue.
+    bool finished = true;
+    scsi_dma_block_irqs();
+    if (data >= g_scsi_dma.app_buf + g_scsi_dma.dma_bytes &&
+        data < g_scsi_dma.app_buf + g_scsi_dma.app_bytes)
+    {
+        finished = false; // In current transfer
+    }
+    else if (data >= g_scsi_dma.next_app_buf &&
+             data < g_scsi_dma.next_app_buf + g_scsi_dma.next_app_bytes)
+    {
+        finished = false; // In queued transfer
+    }
+    scsi_dma_unblock_irqs();
+
+    return finished;
+}
+
+static bool scsi_accel_rp2040_isWriteDone()
+{
+    // Check if data is still waiting in PIO FIFO
+    if (!pio_sm_is_tx_fifo_empty(SCSI_DMA_PIO, SCSI_DMA_SM))
+        return false;
+
+    if (g_scsi_dma.syncOffset > 0)
+    {
+        // Check if all bytes of synchronous write have been acknowledged
+        if (pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM) > g_scsi_dma.syncOffsetPreload)
+            return false;
+    }
+    else
+    {
+        // Check if state machine has written out its OSR
+        if (pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DMA_SM) != g_scsi_dma.pio_offset_async_write)
+            return false;
+    }
+
+    // Check if ACK of the final byte has finished
+    if (SCSI_IN(ACK))
+        return false;
+
+    return true;
+}
+
+void scsi_accel_rp2040_stopWrite(volatile int *resetFlag)
+{
+    // Wait for TX fifo to be empty and ACK to go high
+    // For synchronous writes wait for all ACKs to be received also
+    uint32_t start = millis();
+    while (!scsi_accel_rp2040_isWriteDone() && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            bluelog("scsi_accel_rp2040_stopWrite() timeout, FIFO levels ",
+                (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM), " ",
+                (int)pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM), " PC ",
+                (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DMA_SM));
+            *resetFlag = 1;
+            break;
+        }
+    }
+
+    dma_channel_abort(SCSI_DMA_CH);
+    dma_channel_abort(SCSI_DMA_SYNC_CH);
+    dma_channel_set_irq0_enabled(SCSI_DMA_CH, false);
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, false);
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM, false);
+}
+
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag)
+{
+    uint32_t start = millis();
+    while (g_scsi_dma_state != SCSIDMA_IDLE && !*resetFlag)
+    {
+        if ((uint32_t)(millis() - start) > 5000)
+        {
+            bluelog("scsi_accel_rp2040_finishWrite() timeout,"
+             " state: ", (int)g_scsi_dma_state, " ", (int)g_scsi_dma.dma_current_buf, " ", (int)g_scsi_dma.dma_countA, " ", (int)g_scsi_dma.dma_countB,
+             " PIO PC: ", (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DMA_SM), " ", (int)pio_sm_get_pc(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM),
+             " PIO FIFO: ", (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM), " ", (int)pio_sm_get_tx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM),
+             " DMA counts: ", dma_hw->ch[SCSI_DMA_CH].al2_transfer_count, " ", dma_hw->ch[SCSI_DMA_SYNC_CH].al2_transfer_count);
+            *resetFlag = 1;
+            break;
+        }
+
+        if (g_scsi_dma_state == SCSIDMA_WRITE_DONE)
+        {
+            // DMA done, wait for PIO to finish also and reconfig GPIO.
+            scsi_accel_rp2040_stopWrite(resetFlag);
+        }
+    }
+}
+
+void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // The hardware would support DMA for reading from SCSI bus also, but currently
+    // the rest of the software architecture does not. There is not much benefit
+    // because there isn't much else to do before we get the data from the SCSI bus.
+    //
+    // Currently this method just reads from the PIO RX fifo directly in software loop.
+
+    g_scsi_dma_state = SCSIDMA_READ;
+    pio_sm_init(SCSI_DMA_PIO, SCSI_DMA_SM, g_scsi_dma.pio_offset_async_read, &g_scsi_dma.pio_cfg_async_read);
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
+
+    // Set the number of bytes to read, must be divisible by 2.
+    assert((count & 1) == 0);
+    pio_sm_put(SCSI_DMA_PIO, SCSI_DMA_SM, count - 1);
+
+    // Read results from PIO RX FIFO
+    uint8_t *dst = buf;
+    uint8_t *end = buf + count;
+    uint32_t paritycheck = 0;
+    while (dst < end)
+    {
+        if (*resetFlag)
+        {
+            break;
+        }
+
+        uint32_t available = pio_sm_get_rx_fifo_level(SCSI_DMA_PIO, SCSI_DMA_SM);
+
+        while (available > 0)
+        {
+            available--;
+            uint32_t word = pio_sm_get(SCSI_DMA_PIO, SCSI_DMA_SM);
+            paritycheck ^= word;
+            word = ~word;
+            *dst++ = word & 0xFF;
+            *dst++ = word >> 16;
+        }
+    }
+
+    // Check parity errors in whole block
+    // This doesn't detect if there is even number of parity errors in block.
+    uint8_t byte0 = ~(paritycheck & 0xFF);
+    uint8_t byte1 = ~(paritycheck >> 16);
+    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    {
+        bluelog("Parity error in scsi_accel_rp2040_read(): ", paritycheck);
+        *parityError = 1;
+    }
+
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsidma_config_gpio();
+    pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, false);
+}
+
+void scsi_accel_rp2040_init()
+{
+    g_scsi_dma_state = SCSIDMA_IDLE;
+    scsidma_config_gpio();
+
+    // Mark channels as being in use, unless it has been done already
+    if (!g_channels_claimed)
+    {
+        pio_sm_claim(SCSI_DMA_PIO, SCSI_DMA_SM);
+        dma_channel_claim(SCSI_DMA_CH);
+        mutex_init(&g_scsi_dma.mutex);
+        g_channels_claimed = true;
+    }
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SCSI_DMA_PIO);
+
+    // Asynchronous SCSI write
+    g_scsi_dma.pio_offset_async_write = pio_add_program(SCSI_DMA_PIO, &scsi_accel_async_write_program);
+    g_scsi_dma.pio_cfg_async_write = scsi_accel_async_write_program_get_default_config(g_scsi_dma.pio_offset_async_write);
+    sm_config_set_out_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_IO_DB0, 9);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_async_write, SCSI_OUT_REQ);
+    sm_config_set_fifo_join(&g_scsi_dma.pio_cfg_async_write, PIO_FIFO_JOIN_TX);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_async_write, true, false, 32);
+
+    // Asynchronous / synchronous SCSI read
+    g_scsi_dma.pio_offset_async_read = pio_add_program(SCSI_DMA_PIO, &scsi_accel_async_read_program);
+    g_scsi_dma.pio_cfg_async_read = scsi_accel_async_read_program_get_default_config(g_scsi_dma.pio_offset_async_read);
+    sm_config_set_in_pins(&g_scsi_dma.pio_cfg_async_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_async_read, SCSI_OUT_REQ);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_async_read, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_async_read, true, true, 32);
+
+    // Synchronous SCSI write pacer / ACK handler
+    g_scsi_dma.pio_offset_sync_write_pacer = pio_add_program(SCSI_DMA_PIO, &scsi_sync_write_pacer_program);
+    g_scsi_dma.pio_cfg_sync_write_pacer = scsi_sync_write_pacer_program_get_default_config(g_scsi_dma.pio_offset_sync_write_pacer);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write_pacer, true, true, 1);
+
+    // Synchronous SCSI data writer
+    g_scsi_dma.pio_offset_sync_write = pio_add_program(SCSI_DMA_PIO, &scsi_sync_write_program);
+    g_scsi_dma.pio_cfg_sync_write = scsi_sync_write_program_get_default_config(g_scsi_dma.pio_offset_sync_write);
+    sm_config_set_out_pins(&g_scsi_dma.pio_cfg_sync_write, SCSI_IO_DB0, 9);
+    sm_config_set_sideset_pins(&g_scsi_dma.pio_cfg_sync_write, SCSI_OUT_REQ);
+    sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, 32);
+    sm_config_set_in_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, 1);
+
+    // Create DMA channel configuration so it can be applied quickly later
+    dma_channel_config cfg = dma_channel_get_default_config(SCSI_DMA_CH);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, true);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DMA_SM, true));
+    g_scsi_dma.dma_write_config = cfg;
+
+    // In synchronous mode a second DMA channel is used to transfer dummy bits
+    // from first state machine to second one.
+    cfg = dma_channel_get_default_config(SCSI_DMA_SYNC_CH);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&cfg, false);
+    channel_config_set_write_increment(&cfg, false);
+    channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DMA_SYNC_SM, true));
+    g_scsi_dma.dma_write_pacer_config = cfg;
+
+    // Try to enable interrupt handling on second core
+    irq_set_enabled(DMA_IRQ_0, false);
+    g_scsi_dma.core1_active = false;
+    multicore_reset_core1();
+    multicore_launch_core1(&enable_irq_second_core);
+    delay(5);
+
+    if (!g_scsi_dma.core1_active)
+    {
+        bluelog("Failed to offload SCSI DMA interrupts to second core, using first core");
+        multicore_reset_core1();
+        irq_set_exclusive_handler(DMA_IRQ_0, scsi_dma_write_irq);
+        irq_set_enabled(DMA_IRQ_0, true);
+    }
+}
+
+void scsi_accel_rp2040_setWriteMode(int syncOffset, int syncPeriod)
+{
+    if (syncOffset != g_scsi_dma.syncOffset || syncPeriod != g_scsi_dma.syncPeriod)
+    {
+        g_scsi_dma.syncOffset = syncOffset;
+        g_scsi_dma.syncPeriod = syncPeriod;
+
+        if (syncOffset > 0)
+        {
+            // Set up offset amount to PIO state machine configs.
+            // The RX fifo of scsi_sync_write has 4 slots.
+            // We can preload it with 0-3 items and set the autopush threshold 1, 2, 4 ... 32
+            // to act as a divider. This allows offsets 1 to 128 bytes.
+            // SCSI2SD code currently only uses offsets up to 15.
+            if (syncOffset <= 4)
+            {
+                g_scsi_dma.syncOffsetDivider = 1;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset;
+            }
+            else if (syncOffset <= 8)
+            {
+                g_scsi_dma.syncOffsetDivider = 2;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset / 2;
+            }
+            else if (syncOffset <= 16)
+            {
+                g_scsi_dma.syncOffsetDivider = 4;
+                g_scsi_dma.syncOffsetPreload = 5 - syncOffset / 4;
+            }
+            else
+            {
+                g_scsi_dma.syncOffsetDivider = 4;
+                g_scsi_dma.syncOffsetPreload = 0;
+            }
+
+            // To properly detect when all bytes have been ACKed,
+            // we need at least one vacant slot in the FIFO.
+            if (g_scsi_dma.syncOffsetPreload > 3)
+                g_scsi_dma.syncOffsetPreload = 3;
+
+            sm_config_set_out_shift(&g_scsi_dma.pio_cfg_sync_write_pacer, true, true, g_scsi_dma.syncOffsetDivider);
+            sm_config_set_in_shift(&g_scsi_dma.pio_cfg_sync_write, true, true, g_scsi_dma.syncOffsetDivider);
+
+            // Set up the timing parameters to PIO program
+            // The scsi_sync_write PIO program consists of three instructions.
+            // The delays are in clock cycles, each taking 8 ns.
+            // delay0: Delay from data write to REQ assertion
+            // delay1: Delay from REQ assert to REQ deassert
+            // delay2: Delay from REQ deassert to data write
+            int delay0, delay1, delay2;
+            int totalDelay = syncPeriod * 4 / 8;
+
+            if (syncPeriod <= 25)
+            {
+                // Fast SCSI timing: 30 ns assertion period, 25 ns skew delay
+                // The hardware rise and fall time require some extra delay,
+                // the values below are tuned based on oscilloscope measurements.
+                delay0 = 3;
+                delay1 = 5;
+                delay2 = totalDelay - delay0 - delay1 - 3;
+                if (delay2 < 0) delay2 = 0;
+                if (delay2 > 15) delay2 = 15;
+            }
+            else
+            {
+                // Slow SCSI timing: 90 ns assertion period, 55 ns skew delay
+                delay0 = 6;
+                delay1 = 12;
+                delay2 = totalDelay - delay0 - delay1 - 3;
+                if (delay2 < 0) delay2 = 0;
+                if (delay2 > 15) delay2 = 15;
+            }
+
+            // Patch the delay values into the instructions.
+            // The code in scsi_accel.pio must have delay set to 0 for this to work correctly.
+            uint16_t instr0 = scsi_sync_write_program_instructions[0] | pio_encode_delay(delay0);
+            uint16_t instr1 = scsi_sync_write_program_instructions[1] | pio_encode_delay(delay1);
+            uint16_t instr2 = scsi_sync_write_program_instructions[2] | pio_encode_delay(delay2);
+
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 0] = instr0;
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 1] = instr1;
+            SCSI_DMA_PIO->instr_mem[g_scsi_dma.pio_offset_sync_write + 2] = instr2;
+        }
+    }
+
+}

+ 24 - 0
lib/BlueSCSI_platform_RP2040/scsi_accel_rp2040.h

@@ -0,0 +1,24 @@
+// Accelerated SCSI subroutines using RP2040 hardware PIO peripheral.
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_rp2040_init();
+
+// Set SCSI access mode for write requests.
+// Setting syncOffset = 0 enables asynchronous SCSI.
+// Setting syncOffset > 0 enables synchronous SCSI.
+void scsi_accel_rp2040_setWriteMode(int syncOffset, int syncPeriod);
+
+void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag);
+void scsi_accel_rp2040_stopWrite(volatile int *resetFlag);
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag);
+
+// Query whether the data at pointer has already been read, i.e. buffer can be reused.
+// If data is NULL, checks if all writes have completed.
+bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data);
+
+// Read data from SCSI bus.
+// Works for both asynchronous and synchronous modes.
+void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);

+ 487 - 0
lib/BlueSCSI_platform_RP2040/sd_card_sdio.cpp

@@ -0,0 +1,487 @@
+// Driver for accessing SD card in SDIO mode on RP2040.
+
+#include "BlueSCSI_platform.h"
+
+#ifdef SD_USE_SDIO
+
+#include "BlueSCSI_log.h"
+#include "rp2040_sdio.h"
+#include <hardware/gpio.h>
+#include <SdFat.h>
+#include <SdCard/SdCardInfo.h>
+
+static uint32_t g_sdio_ocr; // Operating condition register from card
+static uint32_t g_sdio_rca; // Relative card address
+static cid_t g_sdio_cid;
+static csd_t g_sdio_csd;
+static int g_sdio_error_line;
+static sdio_status_t g_sdio_error;
+static uint32_t g_sdio_dma_buf[128];
+static uint32_t g_sdio_sector_count;
+
+#define checkReturnOk(call) ((g_sdio_error = (call)) == SDIO_OK ? true : logSDError(__LINE__))
+static bool logSDError(int line)
+{
+    g_sdio_error_line = line;
+    bluelog("SDIO SD card error on line ", line, ", error code ", (int)g_sdio_error);
+    return false;
+}
+
+// Callback used by SCSI code for simultaneous processing
+static sd_callback_t m_stream_callback;
+static const uint8_t *m_stream_buffer;
+static uint32_t m_stream_count;
+static uint32_t m_stream_count_start;
+
+void bluescsiplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+    m_stream_callback = func;
+    m_stream_buffer = buffer;
+    m_stream_count = 0;
+    m_stream_count_start = 0;
+}
+
+static sd_callback_t get_stream_callback(const uint8_t *buf, uint32_t count, const char *accesstype, uint32_t sector)
+{
+    m_stream_count_start = m_stream_count;
+
+    if (m_stream_callback)
+    {
+        if (buf == m_stream_buffer + m_stream_count)
+        {
+            m_stream_count += count;
+            return m_stream_callback;
+        }
+        else
+        {
+            bluedbg("SD card ", accesstype, "(", (int)sector,
+                  ") slow transfer, buffer", (uint32_t)buf, " vs. ", (uint32_t)(m_stream_buffer + m_stream_count));
+            return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+bool SdioCard::begin(SdioConfig sdioConfig)
+{
+    uint32_t reply;
+    sdio_status_t status;
+
+    // Initialize at 1 MHz clock speed
+    rp2040_sdio_init(25);
+
+    // Establish initial connection with the card
+    for (int retries = 0; retries < 5; retries++)
+    {
+        delayMicroseconds(1000);
+        reply = 0;
+        rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
+        status = rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
+
+        if (status == SDIO_OK && reply == 0x1AA)
+        {
+            break;
+        }
+    }
+
+    if (reply != 0x1AA || status != SDIO_OK)
+    {
+        bluedbg("SDIO not responding to CMD8 SEND_IF_COND, status ", (int)status, " reply ", reply);
+        return false;
+    }
+
+    // Send ACMD41 to begin card initialization and wait for it to complete
+    uint32_t start = millis();
+    do {
+        if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, 0, &reply)) || // APP_CMD
+            !checkReturnOk(rp2040_sdio_command_R3(ACMD41, 0xD0040000, &g_sdio_ocr))) // 3.0V voltage
+            // !checkReturnOk(rp2040_sdio_command_R1(ACMD41, 0xC0100000, &g_sdio_ocr)))
+        {
+            return false;
+        }
+
+        if ((uint32_t)(millis() - start) > 1000)
+        {
+            bluelog("SDIO card initialization timeout");
+            return false;
+        }
+    } while (!(g_sdio_ocr & (1 << 31)));
+
+    // Get CID
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD2, 0, (uint8_t*)&g_sdio_cid)))
+    {
+        bluedbg("SDIO failed to read CID");
+        return false;
+    }
+
+    // Get relative card address
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD3, 0, &g_sdio_rca)))
+    {
+        bluedbg("SDIO failed to get RCA");
+        return false;
+    }
+
+    // Get CSD
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD9, g_sdio_rca, (uint8_t*)&g_sdio_csd)))
+    {
+        bluedbg("SDIO failed to read CSD");
+        return false;
+    }
+
+    g_sdio_sector_count = sectorCount();
+
+    // Select card
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD7, g_sdio_rca, &reply)))
+    {
+        bluedbg("SDIO failed to select card");
+        return false;
+    }
+
+    // Set 4-bit bus mode
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) ||
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD6, 2, &reply)))
+    {
+        bluedbg("SDIO failed to set bus width");
+        return false;
+    }
+
+    // Increase to 25 MHz clock rate
+    rp2040_sdio_init(1);
+
+    return true;
+}
+
+uint8_t SdioCard::errorCode() const
+{
+    return g_sdio_error;
+}
+
+uint32_t SdioCard::errorData() const
+{
+    return 0;
+}
+
+uint32_t SdioCard::errorLine() const
+{
+    return g_sdio_error_line;
+}
+
+bool SdioCard::isBusy()
+{
+    return (sio_hw->gpio_in & (1 << SDIO_D0)) == 0;
+}
+
+uint32_t SdioCard::kHzSdClk()
+{
+    return 0;
+}
+
+bool SdioCard::readCID(cid_t* cid)
+{
+    *cid = g_sdio_cid;
+    return true;
+}
+
+bool SdioCard::readCSD(csd_t* csd)
+{
+    *csd = g_sdio_csd;
+    return true;
+}
+
+bool SdioCard::readOCR(uint32_t* ocr)
+{
+    // SDIO mode does not have CMD58, but main program uses this to
+    // poll for card presence. Return status register instead.
+    return checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, ocr));
+}
+
+bool SdioCard::readData(uint8_t* dst)
+{
+    bluelog("SdioCard::readData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStart(uint32_t sector)
+{
+    bluelog("SdioCard::readStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::readStop()
+{
+    bluelog("SdioCard::readStop() called but not implemented!");
+    return false;
+}
+
+uint32_t SdioCard::sectorCount()
+{
+    return sdCardCapacity(&g_sdio_csd);
+}
+
+uint32_t SdioCard::status()
+{
+    uint32_t reply;
+    if (checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, &reply)))
+        return reply;
+    else
+        return 0;
+}
+
+bool SdioCard::stopTransmission(bool blocking)
+{
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)))
+    {
+        return false;
+    }
+
+    if (!blocking)
+    {
+        return true;
+    }
+    else
+    {
+        uint32_t end = millis() + 100;
+        while (millis() < end && isBusy())
+        {
+            if (m_stream_callback)
+            {
+                m_stream_callback(m_stream_count);
+            }
+        }
+        if (isBusy())
+        {
+            bluelog("SdioCard::stopTransmission() timeout");
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+}
+
+bool SdioCard::syncDevice()
+{
+    return true;
+}
+
+uint8_t SdioCard::type() const
+{
+    if (g_sdio_ocr & (1 << 30))
+        return SD_CARD_TYPE_SDHC;
+    else
+        return SD_CARD_TYPE_SD2;
+}
+
+bool SdioCard::writeData(const uint8_t* src)
+{
+    bluelog("SdioCard::writeData() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStart(uint32_t sector)
+{
+    bluelog("SdioCard::writeStart() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::writeStop()
+{
+    bluelog("SdioCard::writeStop() called but not implemented!");
+    return false;
+}
+
+bool SdioCard::erase(uint32_t firstSector, uint32_t lastSector)
+{
+    return false;
+    // return checkReturnOk(sd_erase(firstSector * 512, lastSector * 512));
+}
+
+/* Writing and reading, with progress callback */
+
+bool SdioCard::writeSector(uint32_t sector, const uint8_t* src)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data to a temporary buffer.
+        memcpy(g_sdio_dma_buf, src, sizeof(g_sdio_dma_buf));
+        src = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    // If possible, report transfer status to application through callback.
+    sd_callback_t callback = get_stream_callback(src, 512, "writeSector", sector);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD24, sector, &reply)) || // WRITE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, 1))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        bluelog("SdioCard::writeSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::writeSectors(uint32_t sector, const uint8_t* src, size_t n)
+{
+    if (((uint32_t)src & 3) != 0)
+    {
+        // Unaligned write, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!writeSector(sector + i, src + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(src, n * 512, "writeSectors", sector);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) || // APP_CMD
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD23, n, &reply)) || // SET_WR_CLK_ERASE_COUNT
+        !checkReturnOk(rp2040_sdio_command_R1(CMD25, sector, &reply)) || // WRITE_MULTIPLE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, n))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        bluelog("SdioCard::writeSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        return stopTransmission(true);
+    }
+}
+
+bool SdioCard::readSector(uint32_t sector, uint8_t* dst)
+{
+    uint8_t *real_dst = dst;
+    if (((uint32_t)dst & 3) != 0)
+    {
+        // Buffer is not aligned, need to memcpy() the data from a temporary buffer.
+        dst = (uint8_t*)g_sdio_dma_buf;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, 512, "readSector", sector);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD17, sector, &reply))) // READ_SINGLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        bluelog("SdioCard::readSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    if (dst != real_dst)
+    {
+        memcpy(real_dst, g_sdio_dma_buf, sizeof(g_sdio_dma_buf));
+    }
+
+    return g_sdio_error == SDIO_OK;
+}
+
+bool SdioCard::readSectors(uint32_t sector, uint8_t* dst, size_t n)
+{
+    if (((uint32_t)dst & 3) != 0 || sector + n >= g_sdio_sector_count)
+    {
+        // Unaligned read or end-of-drive read, execute sector-by-sector
+        for (size_t i = 0; i < n; i++)
+        {
+            if (!readSector(sector + i, dst + 512 * i))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    sd_callback_t callback = get_stream_callback(dst, n * 512, "readSectors", sector);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD18, sector, &reply))) // READ_MULTIPLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        bluelog("SdioCard::readSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+        stopTransmission(true);
+        return false;
+    }
+    else
+    {
+        return stopTransmission(true);
+    }
+}
+
+// These functions are not used for SDIO mode but are needed to avoid build error.
+void sdCsInit(SdCsPin_t pin) {}
+void sdCsWrite(SdCsPin_t pin, bool level) {}
+
+// SDIO configuration for main program
+SdioConfig g_sd_sdio_config(DMA_SDIO);
+
+#endif

+ 82 - 0
lib/BlueSCSI_platform_RP2040/sd_card_spi.cpp

@@ -0,0 +1,82 @@
+// Driver and interface for accessing SD card in SPI mode
+
+#include "BlueSCSI_platform.h"
+#include "BlueSCSI_log.h"
+#include <hardware/spi.h>
+#include <SdFat.h>
+
+#ifndef SD_USE_SDIO
+
+class RP2040SPIDriver : public SdSpiBaseClass
+{
+public:
+    void begin(SdSpiConfig config) {
+    }
+
+    void activate() {
+        _spi_init(SD_SPI, m_sckfreq);
+        spi_set_format(SD_SPI, 8, SPI_CPOL_0, SPI_CPHA_0, SPI_MSB_FIRST);
+    }
+
+    void deactivate() {
+    }
+
+    void wait_idle() {
+        while (!(spi_get_hw(SD_SPI)->sr & SPI_SSPSR_TFE_BITS));
+        while (spi_get_hw(SD_SPI)->sr & SPI_SSPSR_BSY_BITS);
+    }
+
+    // Single byte receive
+    uint8_t receive() {
+        uint8_t tx = 0xFF;
+        uint8_t rx;
+        spi_write_read_blocking(SD_SPI, &tx, &rx, 1);
+        return rx;
+    }
+
+    // Single byte send
+    void send(uint8_t data) {
+        spi_write_blocking(SD_SPI, &data, 1);
+        wait_idle();
+    }
+
+    // Multiple byte receive
+    uint8_t receive(uint8_t* buf, size_t count)
+    {
+        spi_read_blocking(SD_SPI, 0xFF, buf, count);
+        return 0;
+    }
+
+    // Multiple byte send
+    void send(const uint8_t* buf, size_t count) {
+        spi_write_blocking(SD_SPI, buf, count);
+    }
+
+    void setSckSpeed(uint32_t maxSck) {
+        m_sckfreq = maxSck;
+    }
+
+private:
+    uint32_t m_sckfreq;
+};
+
+void sdCsInit(SdCsPin_t pin)
+{
+}
+
+void sdCsWrite(SdCsPin_t pin, bool level)
+{
+    if (level)
+        sio_hw->gpio_set = (1 << SD_SPI_CS);
+    else
+        sio_hw->gpio_clr = (1 << SD_SPI_CS);
+}
+
+RP2040SPIDriver g_sd_spi_port;
+SdSpiConfig g_sd_spi_config(0, DEDICATED_SPI, SD_SCK_MHZ(25), &g_sd_spi_port);
+
+void bluescsiplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+}
+
+#endif