Browse Source

RP2040 SDIO read access working.

Write does not work yet.

(cherry picked from commit 6c4b4b2beee28b832b37bf31757eab9b49f8d108)
Petteri Aimonen 3 năm trước cách đây
mục cha
commit
b6202b3beb

+ 5 - 5
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.h

@@ -106,15 +106,15 @@ extern const uint32_t g_scsi_parity_lookup[256];
 }
 
 // SD card driver for SdFat
-class SdSpiConfig;
-class SdioConfig;
-extern SdSpiConfig g_sd_spi_config;
-extern SdioConfig g_sd_sdio_config;
 
 #ifdef SD_USE_SDIO
+class SdioConfig;
+extern SdioConfig g_sd_sdio_config;
 #define SD_CONFIG g_sd_sdio_config
-#define SD_CONFIG_CRASH g_sd_spi_config
+#define SD_CONFIG_CRASH g_sd_sdio_config
 #else
+class SdSpiConfig;
+extern SdSpiConfig g_sd_spi_config;
 #define SD_CONFIG g_sd_spi_config
 #define SD_CONFIG_CRASH g_sd_spi_config
 #endif

+ 524 - 16
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -11,6 +11,7 @@
 #include "rp2040_sdio.h"
 #include "rp2040_sdio.pio.h"
 #include <hardware/pio.h>
+#include <hardware/dma.h>
 #include <hardware/gpio.h>
 #include <ZuluSCSI_platform.h>
 #include <ZuluSCSI_log.h>
@@ -18,11 +19,35 @@
 #define SDIO_PIO pio1
 #define SDIO_CMD_SM 0
 #define SDIO_DATA_SM 1
+#define SDIO_DMA_CH 1
+
+// Maximum number of 512 byte blocks to transfer in one request
+#define SDIO_MAX_BLOCKS 256
+
+enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX };
 
 static struct {
     uint32_t pio_cmd_clk_offset;
+    uint32_t pio_data_rx_offset;
+    pio_sm_config pio_cfg_data_rx;
+    uint32_t pio_data_tx_offset;
+    pio_sm_config pio_cfg_data_tx;
+
+    sdio_transfer_state_t transfer_state;
+    bool inside_irq_handler; // True if we are inside crash handler code
+    uint32_t transfer_start_time;
+    uint32_t *data_buf;
+    uint32_t blocks_done; // Number of blocks transferred so far
+    uint32_t total_blocks; // Total number of blocks to transfer
+    uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
+    uint32_t checksum_errors; // Number of checksum errors detected
+    uint64_t block_checksums[SDIO_MAX_BLOCKS];
 } g_sdio;
 
+/*******************************************************
+ * Checksum algorithms
+ *******************************************************/
+
 // Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
 // Usage:
 //    uint8_t crc = 0;
@@ -47,9 +72,47 @@ static const uint8_t crc7_table[256] = {
 	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
 
-sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
+// Calculate the CRC16 checksum for parallel 4 bit lines separately.
+// When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
+// is applied to each line separately and generates total of
+// 4 x 16 = 64 bits of checksum.
+uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
+{
+    uint64_t crc = 0;
+    uint32_t *end = data + num_words;
+    while (data < end)
+    {
+        // Each 32-bit word contains 8 bits per line.
+        // Reverse the bytes because SDIO protocol is big-endian.
+        uint32_t data_in = __builtin_bswap32(*data++);
+
+        // Shift out 8 bits for each line
+        uint32_t data_out = crc >> 32;
+        crc <<= 32;
+
+        // XOR outgoing data to itself with 4 bit delay
+        data_out ^= (data_out >> 16);
+
+        // XOR incoming data to outgoing data with 4 bit delay
+        data_out ^= (data_in >> 16);
+
+        // XOR outgoing and incoming data to accumulator at each tap
+        uint64_t xorred = data_out ^ data_in;
+        crc ^= xorred;
+        crc ^= xorred << (5 * 4);
+        crc ^= xorred << (12 * 4);
+    }
+
+    return crc;
+}
+
+/*******************************************************
+ * Basic SDIO command execution
+ *******************************************************/
+
+static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
 {
-    azdbg("Command: ", command, " arg ", arg);
+    // azdbg("SDIO Command: ", (int)command, " arg ", arg);
 
     // Format the arguments in the way expected by the PIO code.
     uint32_t word0 =
@@ -65,9 +128,9 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
         ( 1 << 8); // End bit
 
     // Set number of bits in response minus one, or leave at 0 if no response expected
-    if (response)
+    if (response_bits)
     {
-        word1 |= (47 << 0);
+        word1 |= ((response_bits - 1) << 0);
     }
 
     // Calculate checksum in the order that the bytes will be transmitted (big-endian)
@@ -83,6 +146,11 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
     pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
     pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
     pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
+}
+
+sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, response ? 48 : 0);
 
     // Wait for response
     uint32_t start = millis();
@@ -91,7 +159,7 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
     {
         if ((uint32_t)(millis() - start) > 2)
         {
-            azdbg("Timeout waiting for response in rp2040_sdio_command_R1(), ",
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
                   "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
                   " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
                   " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
@@ -103,20 +171,15 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
         }
     }
 
-    delay(1);
-    azdbg("PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
-                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
-                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
-
     if (response)
     {
         // Read out response packet
         uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
         uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
-        azdbg(resp0, " ", resp1);
+        // azdbg("SDIO R1 response: ", resp0, " ", resp1);
 
         // Calculate response checksum
-        crc = 0;
+        uint8_t crc = 0;
         crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
         crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
         crc = crc7_table[crc ^ ((resp0 >>  8) & 0xFF)];
@@ -126,8 +189,15 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
         uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
         if (crc != actual_crc)
         {
-            azdbg("CRC error in rp2040_sdio_command_R1(): calculated ", crc, " packet has ", actual_crc);
-            return SDIO_ERR_CRC;
+            azdbg("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+            return SDIO_ERR_RESPONSE_CRC;
+        }
+
+        uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
+        if (response_cmd != command && command != 41)
+        {
+            azdbg("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
+            return SDIO_ERR_RESPONSE_CODE;
         }
 
         *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
@@ -141,19 +211,425 @@ sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *re
     return SDIO_OK;
 }
 
-void rp2040_sdio_init()
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
+{
+    // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t response_buf[5];
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
+
+    sdio_send_command(command, arg, 136);
+
+    uint32_t start = millis();
+    while (dma_channel_is_busy(SDIO_DMA_CH))
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            dma_channel_abort(SDIO_DMA_CH);
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    dma_channel_abort(SDIO_DMA_CH);
+
+    // Copy the response payload to output buffer
+    response[0]  = ((response_buf[0] >> 16) & 0xFF);
+    response[1]  = ((response_buf[0] >>  8) & 0xFF);
+    response[2]  = ((response_buf[0] >>  0) & 0xFF);
+    response[3]  = ((response_buf[1] >> 24) & 0xFF);
+    response[4]  = ((response_buf[1] >> 16) & 0xFF);
+    response[5]  = ((response_buf[1] >>  8) & 0xFF);
+    response[6]  = ((response_buf[1] >>  0) & 0xFF);
+    response[7]  = ((response_buf[2] >> 24) & 0xFF);
+    response[8]  = ((response_buf[2] >> 16) & 0xFF);
+    response[9]  = ((response_buf[2] >>  8) & 0xFF);
+    response[10] = ((response_buf[2] >>  0) & 0xFF);
+    response[11] = ((response_buf[3] >> 24) & 0xFF);
+    response[12] = ((response_buf[3] >> 16) & 0xFF);
+    response[13] = ((response_buf[3] >>  8) & 0xFF);
+    response[14] = ((response_buf[3] >>  0) & 0xFF);
+    response[15] = ((response_buf[4] >>  0) & 0xFF);
+
+    // Calculate checksum of the payload
+    uint8_t crc = 0;
+    for (int i = 0; i < 15; i++)
+    {
+        crc = crc7_table[crc ^ response[i]];
+    }
+
+    uint8_t actual_crc = response[15] & 0xFE;
+    if (crc != actual_crc)
+    {
+        azdbg("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+        return SDIO_ERR_RESPONSE_CRC;
+    }
+
+    uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
+    if (response_cmd != 0x3F)
+    {
+        azdbg("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
+        return SDIO_ERR_RESPONSE_CODE;
+    }
+
+    return SDIO_OK;
+}
+
+
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
+{
+    sdio_send_command(command, arg, 48);
+
+    // Wait for response
+    uint32_t start = millis();
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
+    {
+        if ((uint32_t)(millis() - start) > 2)
+        {
+            azdbg("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                  " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+
+            // Reset the state machine program
+            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
+            return SDIO_ERR_RESPONSE_TIMEOUT;
+        }
+    }
+
+    // Read out response packet
+    uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+    *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    // azdbg("SDIO R3 response: ", resp0, " ", resp1);
+
+    return SDIO_OK;
+}
+
+/*******************************************************
+ * Data reception from SD card
+ *******************************************************/
+
+static void sdio_start_next_block_rx()
+{
+    assert (g_sdio.blocks_done < g_sdio.total_blocks);
+
+    // Disable and reset PIO from previous block
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+    pio_sm_restart(SDIO_PIO, SDIO_DATA_SM);
+    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_jmp(g_sdio.pio_data_rx_offset));
+
+    // Start new DMA transfer
+    dma_channel_transfer_to_buffer_now(SDIO_DMA_CH, g_sdio.data_buf + 128 * g_sdio.blocks_done, 128);
+
+    // Enable PIO
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+}
+
+// Check checksums for received blocks
+static void sdio_verify_rx_checksums(uint32_t maxcount)
 {
-    azdbg("rp2040_sdio_init()");
+    while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
+    {
+        int blockidx = g_sdio.blocks_checksumed++;
+        uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * 128, 128);
+        uint64_t expected = g_sdio.block_checksums[blockidx];
+
+        if (checksum != expected)
+        {
+            g_sdio.checksum_errors++;
+            if (g_sdio.checksum_errors == 1)
+            {
+                azlog("SDIO checksum error in reception: calculated ", checksum, " expected ", expected);
+            }
+        }
+    }
+}
 
+static void rp2040_sdio_rx_irq()
+{
+    dma_hw->ints1 = 1 << SDIO_DMA_CH;
+
+    // Wait for CRC to be received
+    int maxwait = 1000;
+    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM) < 2)
+    {
+        if (maxwait-- < 0)
+        {
+            azlog("rp2040_sdio_rx_irq(): timeout waiting for CRC reception");
+            break;
+        }
+    }
+    uint32_t crc0 = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+    uint32_t crc1 = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+    g_sdio.block_checksums[g_sdio.blocks_done] = ((uint64_t)crc0 << 32) | crc1;
+    g_sdio.blocks_done++;
+
+    if (g_sdio.blocks_done < g_sdio.total_blocks)
+    {
+        sdio_start_next_block_rx();
+    }
+    else
+    {
+        g_sdio.transfer_state = SDIO_IDLE;
+    }
+}
+
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_RX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Check if we are inside interrupt handler.
+    // This happens when saving crash log from hardfault.
+    // If true, must use polling mode instead of interrupts.
+    g_sdio.inside_irq_handler = (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk);
+
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+
+    // Configure DMA to receive the data block payload (512 bytes).
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+    channel_config_set_bswap(&dmacfg, true);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
+
+    sdio_start_next_block_rx();
+
+    return SDIO_OK;
+}
+
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
+{
+    if (g_sdio.inside_irq_handler && (dma_hw->ints0 & (1 << SDIO_DMA_CH)))
+    {
+        // Make sure DMA interrupt handler gets called even from inside hardfault handler.
+        rp2040_sdio_rx_irq();
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * 512;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        sdio_verify_rx_checksums(g_sdio.total_blocks);
+
+        if (g_sdio.checksum_errors == 0)
+            return SDIO_OK;
+        else
+            return SDIO_ERR_DATA_CRC;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        azdbg("rp2040_sdio_rx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+    else
+    {
+        // Use the idle time to calculate checksums
+        sdio_verify_rx_checksums(1);
+    }
+
+    return SDIO_BUSY;
+}
+
+
+/*******************************************************
+ * Data transmission to SD card
+ *******************************************************/
+
+static void sdio_start_next_block_tx()
+{
+    assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed > g_sdio.blocks_done);
+
+    // Start new DMA transfer
+    dma_channel_transfer_from_buffer_now(SDIO_DMA_CH, g_sdio.data_buf + 128 * g_sdio.blocks_done, 128);
+}
+
+static void sdio_compute_tx_checksums(uint32_t maxcount)
+{
+    while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
+    {
+        int blockidx = g_sdio.blocks_checksumed++;
+        g_sdio.block_checksums[blockidx] = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * 128, 128);
+    }
+}
+
+static void rp2040_sdio_tx_irq()
+{
+    // Wait for there to be enough space for checksum
+    int maxwait = 1000;
+    while (pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM) < 5)
+    {
+        if (maxwait-- < 0)
+        {
+            azlog("rp2040_sdio_tx_irq(): timeout waiting for space in TX buffer for CRC");
+            break;
+        }
+    }
+
+    // Send the checksum and block end marker
+    uint64_t crc = g_sdio.block_checksums[g_sdio.blocks_done];
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, (uint32_t)(crc >> 32));
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, (uint32_t)(crc >>  0));
+    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFFF);
+
+    g_sdio.blocks_done++;
+    if (g_sdio.blocks_done < g_sdio.total_blocks)
+    {
+        sdio_start_next_block_tx();
+    }
+    else
+    {
+        g_sdio.transfer_state = SDIO_IDLE;
+    }
+}
+
+
+// Start transferring data from memory to SD card
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
+{
+    // Buffer must be aligned
+    assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
+
+    g_sdio.transfer_state = SDIO_TX;
+    g_sdio.transfer_start_time = millis();
+    g_sdio.data_buf = (uint32_t*)buffer;
+    g_sdio.blocks_done = 0;
+    g_sdio.total_blocks = num_blocks;
+    g_sdio.blocks_checksumed = 0;
+    g_sdio.checksum_errors = 0;
+
+    // Check if we are inside interrupt handler.
+    // This happens when saving crash log from hardfault.
+    // If true, must use polling mode instead of interrupts.
+    g_sdio.inside_irq_handler = (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk);
+
+    // Compute first block checksum
+    sdio_compute_tx_checksums(1);
+
+    // Initialize PIO
+    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, true);
+
+    // Configure DMA to send the data block payload (512 bytes)
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, false);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+    channel_config_set_bswap(&dmacfg, true);
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->txf[SDIO_DATA_SM], 0, false);
+
+    // Start first DMA transfer and PIO
+    sdio_start_next_block_tx();
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+
+    // Compute rest of the block checksums so that they are ready when needed
+    sdio_compute_tx_checksums(g_sdio.total_blocks);
+
+    return SDIO_OK;
+}
+
+// Check if transmission is complete
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
+{
+    if (g_sdio.inside_irq_handler && (dma_hw->ints0 & (1 << SDIO_DMA_CH)))
+    {
+        // Make sure DMA interrupt handler gets called even from inside hardfault handler.
+        rp2040_sdio_tx_irq();
+    }
+
+    if (bytes_complete)
+    {
+        *bytes_complete = g_sdio.blocks_done * 512;
+    }
+
+    if (g_sdio.transfer_state == SDIO_IDLE)
+    {
+        pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+        pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+        return SDIO_OK;
+    }
+    else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
+    {
+        azdbg("rp2040_sdio_tx_poll() timeout, "
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+        rp2040_sdio_stop();
+        return SDIO_ERR_DATA_TIMEOUT;
+    }
+
+    return SDIO_BUSY;
+}
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop()
+{
+    dma_channel_abort(SDIO_DMA_CH);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+    g_sdio.transfer_state = SDIO_IDLE;
+    return SDIO_OK;
+}
+
+void rp2040_sdio_dma_irq()
+{
+    dma_hw->ints1 = 1 << SDIO_DMA_CH;
+
+    if (g_sdio.transfer_state == SDIO_TX)
+        rp2040_sdio_tx_irq();
+    else if (g_sdio.transfer_state == SDIO_RX)
+        rp2040_sdio_rx_irq();
+}
+
+void rp2040_sdio_init()
+{
     // Mark resources as being in use, unless it has been done already.
     static bool resources_claimed = false;
     if (!resources_claimed)
     {
         pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
         pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
+        dma_channel_claim(SDIO_DMA_CH);
         resources_claimed = true;
     }
 
+    memset(&g_sdio, 0, sizeof(g_sdio));
+
     // Load PIO programs
     pio_clear_instruction_memory(SDIO_PIO);
 
@@ -174,7 +650,39 @@ void rp2040_sdio_init()
     pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
     pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
 
+    // Data reception program
+    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
+    g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
+    sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
+    sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
+    sm_config_set_fifo_join(&g_sdio.pio_cfg_data_rx, PIO_FIFO_JOIN_RX);
+
+    // Data transmission program
+    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
+    g_sdio.pio_cfg_data_tx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_tx_offset);
+    sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
+    sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
+    sm_config_set_fifo_join(&g_sdio.pio_cfg_data_tx, PIO_FIFO_JOIN_TX);
+
+    // Disable CLK pin input synchronizer.
+    // This reduces delay from clk state machine to data state machine.
+    // Because the CLK pin is output and driven synchronously to CPU clock,
+    // there is no metastability problems.
+    SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK);
+
     // Redirect GPIOs to PIO
     gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
     gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
+    gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
+
+    // Set up IRQ handler when DMA completes.
+    // This is time-critical because the CRC must be written / read before PIO FIFO runs out.
+    dma_hw->ints1 = 1 << SDIO_DMA_CH;
+    dma_channel_set_irq1_enabled(SDIO_DMA_CH, true);
+    irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_dma_irq);
+    irq_set_enabled(DMA_IRQ_1, true);
+    irq_set_priority(DMA_IRQ_1, 255);
 }

+ 21 - 8
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.h

@@ -7,28 +7,41 @@
 
 enum sdio_status_t {
     SDIO_OK = 0,
-    SDIO_ERR_RESPONSE_TIMEOUT = 1, // Timed out waiting for response from card
-    SDIO_ERR_CRC = 2,              // Response CRC is wrong
+    SDIO_BUSY = 1,
+    SDIO_ERR_RESPONSE_TIMEOUT = 2, // Timed out waiting for response from card
+    SDIO_ERR_RESPONSE_CRC = 3,     // Response CRC is wrong
+    SDIO_ERR_RESPONSE_CODE = 4,    // Response command code does not match what was sent
+    SDIO_ERR_DATA_TIMEOUT = 5,     // Timed out waiting for data block
+    SDIO_ERR_DATA_CRC = 6,         // CRC for data packet is wrong
 };
 
-// Execute a command that has 48-bit reply (response types R1, R3, R6 and R7)
+// Execute a command that has 48-bit reply (response types R1, R6, R7)
 // If response is NULL, does not wait for reply.
 sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response);
 
 // Execute a command that has 136-bit reply (response type R2)
-sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint32_t response[4]);
+// Response buffer should have space for 16 bytes (the 128 bit payload)
+sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t *response);
+
+// Execute a command that has 48-bit reply but without CRC (response R3)
+sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response);
 
 // Start transferring data from SD card to memory buffer
-sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_bytes);
+// Transfer block size is always 512 bytes.
+sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks);
 
 // Check if reception is complete
-bool rp2040_sdio_rx_poll();
+// Returns SDIO_BUSY while transferring, SDIO_OK when done and error on failure.
+sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete = nullptr);
 
 // Start transferring data from memory to SD card
-sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_bytes);
+sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks);
 
 // Check if transmission is complete
-bool rp2040_sdio_tx_poll();
+sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete = nullptr);
+
+// Force everything to idle state
+sdio_status_t rp2040_sdio_stop();
 
 // (Re)initialize the SDIO interface
 void rp2040_sdio_init();

+ 4 - 2
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.pio

@@ -101,10 +101,11 @@ resp_done:
 .program sdio_data_rx
 
 wait_start:
-    mov Y, !PINS                ; Read GPIOs (currently doesn't check for clock edge)
-    jmp !Y wait_start           ; Keep looping until we see all zeros start token
+    wait 0 pin 0                ; Wait for zero state on D0
+    wait 1 gpio SDIO_CLK_GPIO   ; Wait for rising edge
 
 .wrap_target
+    wait 0 gpio SDIO_CLK_GPIO
     wait 1 gpio SDIO_CLK_GPIO   ; Wait for rising clock edge
     in PINS, 4                  ; Read nibble
 .wrap
@@ -115,5 +116,6 @@ wait_start:
 ; for the start of block token, and append the checksum.
 ; The data should be padded to full 32 bits by 0xFF bytes.
 .program sdio_data_tx
+    wait 1 gpio SDIO_CLK_GPIO
     wait 0 gpio SDIO_CLK_GPIO   ; Wait for falling clock edge
     out PINS, 4                 ; Write nibble

+ 12 - 10
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.pio.h

@@ -58,21 +58,22 @@ static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset)
 // ------------ //
 
 #define sdio_data_rx_wrap_target 2
-#define sdio_data_rx_wrap 3
+#define sdio_data_rx_wrap 4
 
 static const uint16_t sdio_data_rx_program_instructions[] = {
-    0xa048, //  0: mov    y, !pins                   
-    0x0060, //  1: jmp    !y, 0                      
+    0x2020, //  0: wait   0 pin, 0
+    0x2092, //  1: wait   1 gpio, 18
             //     .wrap_target
-    0x2092, //  2: wait   1 gpio, 18                 
-    0x4004, //  3: in     pins, 4                    
+    0x2012, //  2: wait   0 gpio, 18
+    0x2092, //  3: wait   1 gpio, 18
+    0x4004, //  4: in     pins, 4
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
 static const struct pio_program sdio_data_rx_program = {
     .instructions = sdio_data_rx_program_instructions,
-    .length = 4,
+    .length = 5,
     .origin = -1,
 };
 
@@ -88,19 +89,20 @@ static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset)
 // ------------ //
 
 #define sdio_data_tx_wrap_target 0
-#define sdio_data_tx_wrap 1
+#define sdio_data_tx_wrap 2
 
 static const uint16_t sdio_data_tx_program_instructions[] = {
             //     .wrap_target
-    0x2012, //  0: wait   0 gpio, 18                 
-    0x6004, //  1: out    pins, 4                    
+    0x2092, //  0: wait   1 gpio, 18
+    0x2012, //  1: wait   0 gpio, 18
+    0x6004, //  2: out    pins, 4
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
 static const struct pio_program sdio_data_tx_program = {
     .instructions = sdio_data_tx_program_instructions,
-    .length = 2,
+    .length = 3,
     .origin = -1,
 };
 

+ 274 - 106
lib/ZuluSCSI_platform_RP2040/sd_card_sdio.cpp

@@ -10,27 +10,99 @@
 #include <SdFat.h>
 #include <SdCard/SdCardInfo.h>
 
+static uint32_t g_sdio_ocr; // Operating condition register from card
+static uint32_t g_sdio_rca; // Relative card address
+static cid_t g_sdio_cid;
+static int g_sdio_error_line;
+static sdio_status_t g_sdio_error;
+
+#define checkReturnOk(call) ((g_sdio_error = (call)) == SDIO_OK ? true : logSDError(__LINE__))
+static bool logSDError(int line)
+{
+    g_sdio_error_line = line;
+    azlog("SDIO SD card error on line ", line, ", error code ", (int)g_sdio_error);
+    return false;
+}
 
 bool SdioCard::begin(SdioConfig sdioConfig)
 {
     uint32_t reply;
+    sdio_status_t status;
     
     rp2040_sdio_init();
-    delay(1);
-    rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
-    rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
-    azdbg("Reply ", reply);
-    rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
-    rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
-    azdbg("Reply ", reply);
-
-    delay(100);
-    return false;
+
+    // Establish initial connection with the card
+    for (int retries = 0; retries < 5; retries++)
+    {
+        delayMicroseconds(1000);
+        reply = 0;
+        rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
+        status = rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
+
+        if (status == SDIO_OK && reply == 0x1AA)
+        {
+            break;
+        }
+    }
+
+    if (reply != 0x1AA || status != SDIO_OK)
+    {
+        azdbg("SDIO not responding to CMD8 SEND_IF_COND, status ", (int)status, " reply ", reply);
+        return false;
+    }
+
+    // Send ACMD41 to begin card initialization and wait for it to complete
+    uint32_t start = millis();
+    do {
+        if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, 0, &reply)) || // APP_CMD
+            !checkReturnOk(rp2040_sdio_command_R3(ACMD41, 0xD0040000, &g_sdio_ocr))) // 3.0V voltage
+            // !checkReturnOk(rp2040_sdio_command_R1(ACMD41, 0xC0100000, &g_sdio_ocr)))
+        {
+            return false;
+        }
+
+        if ((uint32_t)(millis() - start) > 1000)
+        {
+            azlog("SDIO card initialization timeout");
+            return false;
+        }
+    } while (!(g_sdio_ocr & (1 << 31)));
+
+    // Get CID
+    if (!checkReturnOk(rp2040_sdio_command_R2(CMD2, 0, (uint8_t*)&g_sdio_cid)))
+    {
+        azdbg("SDIO failed to read CID");
+        return false;
+    }
+
+    // Get relative card address
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD3, 0, &g_sdio_rca)))
+    {
+        azdbg("SDIO failed to get RCA");
+        return false;
+    }
+
+    // Select card
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD7, g_sdio_rca, &reply)))
+    {
+        azdbg("SDIO failed to select card");
+        return false;
+    }
+
+    // Set 4-bit bus mode
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) ||
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD6, 2, &reply)))
+    {
+        azdbg("SDIO failed to set bus width");
+        return false;
+    }
+
+    return true;
 }
 
 uint8_t SdioCard::errorCode() const
 {
-    return SD_CARD_ERROR_NONE;
+    return g_sdio_error;
 }
 
 uint32_t SdioCard::errorData() const
@@ -40,7 +112,7 @@ uint32_t SdioCard::errorData() const
 
 uint32_t SdioCard::errorLine() const
 {
-    return 0;
+    return g_sdio_error_line;
 }
 
 bool SdioCard::isBusy() 
@@ -55,17 +127,20 @@ uint32_t SdioCard::kHzSdClk()
 
 bool SdioCard::readCID(cid_t* cid)
 {
+    *cid = g_sdio_cid;
     return true;
 }
 
 bool SdioCard::readCSD(csd_t* csd)
 {
-    return true;
+    return checkReturnOk(rp2040_sdio_command_R2(CMD9, g_sdio_rca, (uint8_t*)csd)); // SEND_CSD
 }
 
 bool SdioCard::readOCR(uint32_t* ocr)
 {
-    return true;
+    // SDIO mode does not have CMD58, but main program uses this to
+    // poll for card presence. Return status register instead.
+    return checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, ocr));
 }
 
 bool SdioCard::readData(uint8_t* dst)
@@ -88,67 +163,61 @@ bool SdioCard::readStop()
 
 uint32_t SdioCard::sectorCount()
 {
-    // csd_t csd;
-    // sd_csd_get((uint8_t*)&csd);
-    // return sdCardCapacity(&csd);
-    return 0;
+    csd_t csd;
+    readCSD(&csd);
+    return sdCardCapacity(&csd);
 }
 
 uint32_t SdioCard::status()
 {
-    // uint32_t status = 0;
-    // if (!checkReturnOk(sd_cardstatus_get(&status)))
-    //     return 0;
-    // else
-    //     return status;
-    return 0;
+    uint32_t reply;
+    if (checkReturnOk(rp2040_sdio_command_R1(CMD13, g_sdio_rca, &reply)))
+        return reply;
+    else
+        return 0;
 }
 
 bool SdioCard::stopTransmission(bool blocking)
 {
-    return false;
-    // if (!checkReturnOk(sd_transfer_stop()))
-    //     return false;
-
-    // if (!blocking)
-    // {
-    //     return true;
-    // }
-    // else
-    // {
-    //     uint32_t end = millis() + 100;
-    //     while (millis() < end && isBusy())
-    //     {
-    //     }
-    //     if (isBusy())
-    //     {
-    //         azlog("SdioCard::stopTransmission() timeout");
-    //         return false;
-    //     }
-    //     else
-    //     {
-    //         return true;
-    //     }
-    // }
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)))
+    {
+        return false;
+    }
+
+    if (!blocking)
+    {
+        return true;
+    }
+    else
+    {
+        uint32_t end = millis() + 100;
+        while (millis() < end && isBusy())
+        {
+        }
+        if (isBusy())
+        {
+            azlog("SdioCard::stopTransmission() timeout");
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
 }
 
 bool SdioCard::syncDevice()
 {
-    // if (sd_transfer_state_get() != SD_NO_TRANSFER)
-    // {
-    //     return stopTransmission(true);
-    // }
     return true;
 }
 
 uint8_t SdioCard::type() const
 {
-    // if (g_sdio_card_type == SDIO_HIGH_CAPACITY_SD_CARD)
-    //     return SD_CARD_TYPE_SDHC;
-    // else if (g_sdio_card_type == SDIO_STD_CAPACITY_SD_CARD_V2_0)
-    //     return SD_CARD_TYPE_SD2;
-    // else
-    //     return SD_CARD_TYPE_SD1;
+    if (g_sdio_ocr & (1 << 30))
+        return SD_CARD_TYPE_SDHC;
+    else
+        return SD_CARD_TYPE_SD2;
 }
 
 bool SdioCard::writeData(const uint8_t* src)
@@ -171,78 +240,177 @@ bool SdioCard::writeStop()
 
 bool SdioCard::erase(uint32_t firstSector, uint32_t lastSector)
 {
+    return false;
     // return checkReturnOk(sd_erase(firstSector * 512, lastSector * 512));
 }
 
 /* Writing and reading, with progress callback */
 
-// static sd_callback_t m_stream_callback;
-// static const uint8_t *m_stream_buffer;
-// static uint32_t m_stream_count;
-// static uint32_t m_stream_count_start;
-
-// void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
-// {
-//     m_stream_callback = func;
-//     m_stream_buffer = buffer;
-//     m_stream_count = 0;
-//     m_stream_count_start = 0;
-// }
-
-// static void sdio_callback(uint32_t complete)
-// {
-//     if (m_stream_callback)
-//     {
-//         m_stream_callback(m_stream_count_start + complete);
-//     }
-// }
-
-// static sdio_callback_t get_stream_callback(const uint8_t *buf, uint32_t count)
-// {
-//     m_stream_count_start = m_stream_count;
-
-//     if (m_stream_callback)
-//     {
-//         if (buf == m_stream_buffer + m_stream_count)
-//         {
-//             m_stream_count += count;
-//             return &sdio_callback;
-//         }
-//         else
-//         {
-//             azdbg("Stream buffer mismatch: ", (uint32_t)buf, " vs. ", (uint32_t)(m_stream_buffer + m_stream_count));
-//             return NULL;
-//         }
-//     }
+static sd_callback_t m_stream_callback;
+static const uint8_t *m_stream_buffer;
+static uint32_t m_stream_count;
+static uint32_t m_stream_count_start;
+
+void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
+{
+    m_stream_callback = func;
+    m_stream_buffer = buffer;
+    m_stream_count = 0;
+    m_stream_count_start = 0;
+}
+
+static sd_callback_t get_stream_callback(const uint8_t *buf, uint32_t count)
+{
+    m_stream_count_start = m_stream_count;
+
+    if (m_stream_callback)
+    {
+        if (buf == m_stream_buffer + m_stream_count)
+        {
+            m_stream_count += count;
+            return m_stream_callback;
+        }
+        else
+        {
+            azdbg("Stream buffer mismatch: ", (uint32_t)buf, " vs. ", (uint32_t)(m_stream_buffer + m_stream_count));
+            return NULL;
+        }
+    }
     
-//     return NULL;
-// }
+    return NULL;
+}
 
 
 bool SdioCard::writeSector(uint32_t sector, const uint8_t* src)
 {
-    // return checkReturnOk(sd_block_write((uint32_t*)src, (uint64_t)sector * 512, 512,
-    //     get_stream_callback(src, 512)));
+    sd_callback_t callback = get_stream_callback(src, 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD24, sector, &reply)) || // WRITE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, 1))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azdbg("SdioCard::writeSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
 }
 
 bool SdioCard::writeSectors(uint32_t sector, const uint8_t* src, size_t n)
 {
-    // return checkReturnOk(sd_multiblocks_write((uint32_t*)src, (uint64_t)sector * 512, 512, n,
-    //     get_stream_callback(src, n * 512)));
+    sd_callback_t callback = get_stream_callback(src, 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD55, g_sdio_rca, &reply)) || // APP_CMD
+        !checkReturnOk(rp2040_sdio_command_R1(ACMD23, n, &reply)) || // SET_WR_CLK_ERASE_COUNT
+        !checkReturnOk(rp2040_sdio_command_R1(CMD25, sector, &reply)) || // WRITE_MULTIPLE_BLOCK
+        !checkReturnOk(rp2040_sdio_tx_start(src, n))) // Start transmission
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_tx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)); // STOP_TRANSMISSION
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azdbg("SdioCard::writeSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
 }
 
 bool SdioCard::readSector(uint32_t sector, uint8_t* dst)
 {
-    // return checkReturnOk(sd_block_read((uint32_t*)dst, (uint64_t)sector * 512, 512,
-    //     get_stream_callback(dst, 512)));
+    sd_callback_t callback = get_stream_callback(dst, 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD17, sector, &reply))) // READ_SINGLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azdbg("SdioCard::readSector(", sector, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
 }
 
 bool SdioCard::readSectors(uint32_t sector, uint8_t* dst, size_t n)
 {
-    // return checkReturnOk(sd_multiblocks_read((uint32_t*)dst, (uint64_t)sector * 512, 512, n,
-    //     get_stream_callback(dst, n * 512)));
+    sd_callback_t callback = get_stream_callback(dst, n * 512);
+
+    uint32_t reply;
+    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) || // Prepare for reception
+        !checkReturnOk(rp2040_sdio_command_R1(CMD18, sector, &reply))) // READ_MULTIPLE_BLOCK
+    {
+        return false;
+    }
+
+    do {
+        uint32_t bytes_done;
+        g_sdio_error = rp2040_sdio_rx_poll(&bytes_done);
+
+        if (callback)
+        {
+            callback(m_stream_count_start + bytes_done);
+        }
+    } while (g_sdio_error == SDIO_BUSY);
+
+    checkReturnOk(rp2040_sdio_command_R1(CMD12, 0, &reply)); // STOP_TRANSMISSION
+
+    if (g_sdio_error != SDIO_OK)
+    {
+        azdbg("SdioCard::readSectors(", sector, ",...,", (int)n, ") failed: ", (int)g_sdio_error);
+    }
+
+    return g_sdio_error == SDIO_OK;
 }
 
+// These functions are not used for SDIO mode but are needed to avoid build error.
+void sdCsInit(SdCsPin_t pin) {}
+void sdCsWrite(SdCsPin_t pin, bool level) {}
+
 // SDIO configuration for main program
 SdioConfig g_sd_sdio_config(DMA_SDIO);
 

+ 3 - 30
lib/ZuluSCSI_platform_RP2040/sd_card_spi.cpp

@@ -1,21 +1,16 @@
 // Driver and interface for accessing SD card in SPI mode
-// Normally this is only used for saving crash log in interrupt mode
 
 #include "ZuluSCSI_platform.h"
 #include "ZuluSCSI_log.h"
 #include <hardware/spi.h>
 #include <SdFat.h>
 
+#ifndef SD_USE_SDIO
 
 class RP2040SPIDriver : public SdSpiBaseClass
 {
 public:
     void begin(SdSpiConfig config) {
-        // Make sure pins are routed to SPI
-        gpio_set_function(SD_SPI_SCK,  GPIO_FUNC_SPI);
-        gpio_set_function(SD_SPI_MOSI, GPIO_FUNC_SPI);
-        gpio_set_function(SD_SPI_MISO, GPIO_FUNC_SPI);
-        gpio_set_function(SD_SPI_CS,   GPIO_FUNC_SIO);
     }
 
     void activate() {
@@ -49,43 +44,20 @@ public:
     uint8_t receive(uint8_t* buf, size_t count)
     {
         spi_read_blocking(SD_SPI, 0xFF, buf, count);
-
-        if (m_stream_callback && buf == m_stream_buffer + m_stream_count)
-        {
-            m_stream_count += count;
-            m_stream_callback(m_stream_count);
-        }
-
         return 0;
     }
 
     // Multiple byte send
     void send(const uint8_t* buf, size_t count) {
         spi_write_blocking(SD_SPI, buf, count);
-
-        if (m_stream_callback && buf == m_stream_buffer + m_stream_count)
-        {
-            m_stream_count += count;
-            m_stream_callback(m_stream_count);
-        }
     }
 
     void setSckSpeed(uint32_t maxSck) {
         m_sckfreq = maxSck;
     }
 
-    void set_sd_callback(sd_callback_t func, const uint8_t *buffer)
-    {
-        m_stream_buffer = buffer;
-        m_stream_count = 0;
-        m_stream_callback = func;
-    }
-
 private:
     uint32_t m_sckfreq;
-    const uint8_t *m_stream_buffer;
-    uint32_t m_stream_count;
-    sd_callback_t m_stream_callback;
 };
 
 void sdCsInit(SdCsPin_t pin)
@@ -105,5 +77,6 @@ SdSpiConfig g_sd_spi_config(0, DEDICATED_SPI, SD_SCK_MHZ(25), &g_sd_spi_port);
 
 void azplatform_set_sd_callback(sd_callback_t func, const uint8_t *buffer)
 {
-    g_sd_spi_port.set_sd_callback(func, buffer);
 }
+
+#endif