Ver código fonte

SDIO Edge Case Fix

androda 1 ano atrás
pai
commit
9bb8ad79f3

+ 254 - 172
lib/BlueSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -1,5 +1,6 @@
 // Implementation of SDIO communication for RP2040
 // Copyright (c) 2022 Rabbit Hole Computing™
+// Copyright (c) 2024 Tech by Androda, LLC
 //
 // The RP2040 official work-in-progress code at
 // https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
@@ -13,7 +14,7 @@
 #include "rp2040_sdio.pio.h"
 #include <hardware/pio.h>
 #include <hardware/dma.h>
-#include <hardware/gpio.h>
+//#include <hardware/gpio.h>
 #include <BlueSCSI_platform.h>
 #include <BlueSCSI_log.h>
 
@@ -29,7 +30,8 @@
 enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
 
 static struct {
-    uint32_t pio_cmd_clk_offset;
+    uint32_t pio_cmd_rsp_clk_offset;
+    pio_sm_config pio_cfg_cmd_rsp;
     uint32_t pio_data_rx_offset;
     pio_sm_config pio_cfg_data_rx;
     uint32_t pio_data_tx_offset;
@@ -42,7 +44,7 @@ static struct {
     uint32_t total_blocks; // Total number of blocks to transfer
     uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
     uint32_t checksum_errors; // Number of checksum errors detected
-
+    uint8_t cmdBuf[6];
     // Variables for block writes
     uint64_t next_wr_block_checksum;
     uint32_t end_token_buf[3]; // CRC and end token for write block
@@ -73,22 +75,38 @@ void rp2040_sdio_dma_irq();
 //    crc = crc7_table[crc ^ byte];
 //    .. repeat for every byte ..
 static const uint8_t crc7_table[256] = {
-	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
-	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
-	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
-	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
-	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
-	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
-	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
-	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
-	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
-	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
-	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
-	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
-	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
-	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
-	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
-	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+    0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
+    0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+    0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
+    0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+    0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
+    0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+    0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
+    0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
+    0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
+    0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
+    0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
+    0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
+    0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
+    0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
+    0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
+    0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
 
 // Calculate the CRC16 checksum for parallel 4 bit lines separately.
@@ -129,147 +147,204 @@ uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
     return crc;
 }
 
+
+/*******************************************************
+ * Clock Runner
+ *******************************************************/
+void cycleSdClock() {
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_nop() | pio_encode_sideset_opt(1, 1) | pio_encode_delay(1));
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_nop() | pio_encode_sideset_opt(1, 0) | pio_encode_delay(1));
+}
+
 /*******************************************************
  * Basic SDIO command execution
  *******************************************************/
 
 static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
 {
-    // debuglog("SDIO Command: ", (int)command, " arg ", arg);
-
-    // Format the arguments in the way expected by the PIO code.
-    uint32_t word0 =
-        (47 << 24) | // Number of bits in command minus one
-        ( 1 << 22) | // Transfer direction from host to card
-        (command << 16) | // Command byte
-        (((arg >> 24) & 0xFF) << 8) | // MSB byte of argument
-        (((arg >> 16) & 0xFF) << 0);
-
-    uint32_t word1 =
-        (((arg >> 8) & 0xFF) << 24) |
-        (((arg >> 0) & 0xFF) << 16) | // LSB byte of argument
-        ( 1 << 8); // End bit
-
-    // Set number of bits in response minus one, or leave at 0 if no response expected
-    if (response_bits)
-    {
-        word1 |= ((response_bits - 1) << 0);
-    }
+    // if (command != 41 && command != 55) {
+    //     log("C: ", (int)command, " A: ", arg);
+    // }
+    io_wo_8* txFifo = reinterpret_cast<io_wo_8*>(&SDIO_PIO->txf[SDIO_CMD_SM]);
+
+    // Reinitialize the CMD SM
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_rsp_clk_offset, &g_sdio.pio_cfg_cmd_rsp);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CMD, 1, true);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, false);
+
+    // Pin direction: output, initial state should be high
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pins, 1));
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pindirs, 1));
+
+    // Write the number of tx / rx bits to the SM
+    *txFifo = 55;  // Write 56 bits total
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_x, 8));
+    *txFifo = response_bits ? response_bits - 1 : 0;    // Bit count to receive
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_y, 8));
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
+
+    // Build the command bytes (commands are 48 bits long)
+    g_sdio.cmdBuf[0] = command | 0x40;
+    g_sdio.cmdBuf[1] = (uint8_t)(arg >> 24U);
+    g_sdio.cmdBuf[2] = (uint8_t)(arg >> 16U);
+    g_sdio.cmdBuf[3] = (uint8_t)(arg >> 8U);
+    g_sdio.cmdBuf[4] = (uint8_t)arg;
 
-    // Calculate checksum in the order that the bytes will be transmitted (big-endian)
+    // Get the SM clocking while we calculate CRCs
+    *txFifo = 0XFF;
+
+    // CRC calculation
     uint8_t crc = 0;
-    crc = crc7_table[crc ^ ((word0 >> 16) & 0xFF)];
-    crc = crc7_table[crc ^ ((word0 >>  8) & 0xFF)];
-    crc = crc7_table[crc ^ ((word0 >>  0) & 0xFF)];
-    crc = crc7_table[crc ^ ((word1 >> 24) & 0xFF)];
-    crc = crc7_table[crc ^ ((word1 >> 16) & 0xFF)];
-    word1 |= crc << 8;
-
-    // Transmit command
-    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
-    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
-    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
+    for(uint8_t i = 0; i < 5; i++) {
+        crc = crc7_table[crc ^ g_sdio.cmdBuf[i]];
+    }
+    crc = crc | 0x1;
+    g_sdio.cmdBuf[5] = crc;
+
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
+    channel_config_set_read_increment(&dmacfg, true);
+    channel_config_set_write_increment(&dmacfg, false);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, true));
+    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &SDIO_PIO->txf[SDIO_CMD_SM], &g_sdio.cmdBuf, 6, true);
 }
 
 sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
 {
+    uint32_t resp[2];
+    if (response) {
+        dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+        channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
+        channel_config_set_read_increment(&dmacfg, false);
+        channel_config_set_write_increment(&dmacfg, true);
+        channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));  //6 * 8 = 48 bits
+        dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &resp, &SDIO_PIO->rxf[SDIO_CMD_SM], 6, true);
+    }
+
     sdio_send_command(command, arg, response ? 48 : 0);
 
-    // Wait for response
     uint32_t start = millis();
-    uint32_t wait_words = response ? 2 : 1;
-    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < wait_words)
+    if (response)
     {
-        if ((uint32_t)(millis() - start) > 2)
+        // Wait for DMA channel to receive response
+        while (dma_channel_is_busy(SDIO_DMA_CHB))
         {
-            if (command != 8) // Don't log for missing SD card
+            if ((uint32_t)(millis() - start) > 2)
             {
-                debuglog("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
-                    "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
-                    " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
-                    " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
-            }
+                if (command != 8) {
+                    /*debug*/log("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
+                        "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
+                        " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                        " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+                }
 
-            // Reset the state machine program
-            pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
-            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
-            return SDIO_ERR_RESPONSE_TIMEOUT;
+                // Reset the state machine program
+                dma_channel_abort(SDIO_DMA_CHB);
+                pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, there was an error
+                pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+                return SDIO_ERR_RESPONSE_TIMEOUT;
+            }
         }
-    }
-
-    if (response)
-    {
-        // Read out response packet
-        uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
-        uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        // Must bswap due to 8 bit segmentation
+        resp[0] = __builtin_bswap32(resp[0]);
+        resp[1] = __builtin_bswap32(resp[1]) >> 16;
         // debuglog("SDIO R1 response: ", resp0, " ", resp1);
 
         // Calculate response checksum
         uint8_t crc = 0;
-        crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
-        crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
-        crc = crc7_table[crc ^ ((resp0 >>  8) & 0xFF)];
-        crc = crc7_table[crc ^ ((resp0 >>  0) & 0xFF)];
-        crc = crc7_table[crc ^ ((resp1 >>  8) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp[0] >> 24) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp[0] >> 16) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp[0] >>  8) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp[0] >>  0) & 0xFF)];
+        crc = crc7_table[crc ^ ((resp[1] >>  8) & 0xFF)];
 
-        uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
+        uint8_t actual_crc = ((resp[1] >> 0) & 0xFE);
         if (crc != actual_crc)
         {
             debuglog("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
+            debuglog("resp[0]:", resp[0], "resp[1]:", resp[1]);
             return SDIO_ERR_RESPONSE_CRC;
         }
 
-        uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
+        uint8_t response_cmd = ((resp[0] >> 24) & 0xFF);
         if (response_cmd != command && command != 41)
         {
             debuglog("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
             return SDIO_ERR_RESPONSE_CODE;
         }
 
-        *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
-    }
-    else
-    {
-        // Read out dummy marker
-        pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
+        *response = ((resp[0] & 0xFFFFFF) << 8) | ((resp[1] >> 8) & 0xFF);
+    } else {
+        // Wait for CMD SM TX FIFO Stall (all command bits were sent)
+        uint32_t tx_stall_flag = 1u << (PIO_FDEBUG_TXSTALL_LSB + SDIO_CMD_SM);
+        // Clear the stall marker
+        SDIO_PIO->fdebug = tx_stall_flag;
+        // Wait for the stall
+        while (!(SDIO_PIO->fdebug & tx_stall_flag)) {
+            if ((uint32_t)(millis() - start) > 2)
+            {
+                if (command != 8) {
+                    /*debug*/log("Timeout waiting for CMD TX in rp2040_sdio_command_R1(", (int)command, "), ",
+                        "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
+                        " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+                        " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
+                }
+
+                // Reset the state machine program
+                pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, there was an error
+                pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+                return SDIO_ERR_RESPONSE_TIMEOUT;
+            }
+        }
     }
 
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
     return SDIO_OK;
 }
 
 sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
 {
     // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
-    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
     uint32_t response_buf[5];
-    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
-    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
     channel_config_set_read_increment(&dmacfg, false);
     channel_config_set_write_increment(&dmacfg, true);
-    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
-    dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));          //17 * 8 = 136
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 17, true);
 
     sdio_send_command(command, arg, 136);
 
     uint32_t start = millis();
-    while (dma_channel_is_busy(SDIO_DMA_CH))
+    while (dma_channel_is_busy(SDIO_DMA_CHB))
     {
         if ((uint32_t)(millis() - start) > 2)
         {
             debuglog("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
-                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
                   " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
                   " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
 
             // Reset the state machine program
-            dma_channel_abort(SDIO_DMA_CH);
+            dma_channel_abort(SDIO_DMA_CHB);
+            pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, there was an error
             pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
-            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
             return SDIO_ERR_RESPONSE_TIMEOUT;
         }
     }
 
-    dma_channel_abort(SDIO_DMA_CH);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, its job is done
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+    dma_channel_abort(SDIO_DMA_CHB);
+
+    // Must byte swap because receiving 8-bit chunks instead of 32 bit
+    response_buf[0] = __builtin_bswap32(response_buf[0]);
+    response_buf[1] = __builtin_bswap32(response_buf[1]);
+    response_buf[2] = __builtin_bswap32(response_buf[2]);
+    response_buf[3] = __builtin_bswap32(response_buf[3]);
+    response_buf[4] = __builtin_bswap32(response_buf[4]) >> 24;
 
     // Copy the response payload to output buffer
     response[0]  = ((response_buf[0] >> 16) & 0xFF);
@@ -316,30 +391,43 @@ sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t resp
 
 sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
 {
+    uint32_t resp[2];
+    dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
+    channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
+    channel_config_set_read_increment(&dmacfg, false);
+    channel_config_set_write_increment(&dmacfg, true);
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));  //6 * 8 = 48 bits
+    dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &resp, &SDIO_PIO->rxf[SDIO_CMD_SM], 6, true);
+        
     sdio_send_command(command, arg, 48);
 
     // Wait for response
     uint32_t start = millis();
-    while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
+    while (dma_channel_is_busy(SDIO_DMA_CHB))
     {
         if ((uint32_t)(millis() - start) > 2)
         {
             debuglog("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
-                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
+                  "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
                   " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
                   " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
 
             // Reset the state machine program
+            dma_channel_abort(SDIO_DMA_CHB);
+            pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, there was an error
             pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
-            pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
             return SDIO_ERR_RESPONSE_TIMEOUT;
         }
     }
 
-    // Read out response packet
-    uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
-    uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
-    *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
+    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);  // Turn off the CMD SM, its job is done
+    pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
+
+    // Must bswap due to 8 bit transfer
+    resp[0] = __builtin_bswap32(resp[0]);
+    resp[1] = __builtin_bswap32(resp[1]) >> 16;
+    
+    *response = ((resp[0] & 0xFFFFFF) << 8) | ((resp[1] >> 8) & 0xFF);
     // debuglog("SDIO R3 response: ", resp0, " ", resp1);
 
     return SDIO_OK;
@@ -396,6 +484,7 @@ sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
 
     // Initialize PIO state machine
     pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_CLK, 1, true);
     pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
 
     // Write number of nibbles to receive to Y register
@@ -473,6 +562,7 @@ sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
 
     if (g_sdio.transfer_state == SDIO_IDLE)
     {
+        pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
         // Verify all remaining checksums.
         sdio_verify_rx_checksums(g_sdio.total_blocks);
 
@@ -487,7 +577,8 @@ sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
             "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
             " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
             " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
-            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+            " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count,
+            " BD: ", g_sdio.blocks_done);
         rp2040_sdio_stop();
         return SDIO_ERR_DATA_TIMEOUT;
     }
@@ -502,19 +593,24 @@ sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
 
 static void sdio_start_next_block_tx()
 {
-    // Initialize PIO
-    pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+    // Initialize PIOs
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
+
+    // Re-set the pin direction things here
+    pio_sm_set_pins(SDIO_PIO, SDIO_CMD_SM, 0xF);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, true);
 
     // Configure DMA to send the data block payload (512 bytes)
     dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
     channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
     channel_config_set_read_increment(&dmacfg, true);
     channel_config_set_write_increment(&dmacfg, false);
-    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, true));
+    channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, true));
     channel_config_set_bswap(&dmacfg, true);
     channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
     dma_channel_configure(SDIO_DMA_CH, &dmacfg,
-        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
+        &SDIO_PIO->txf[SDIO_CMD_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
         SDIO_WORDS_PER_BLOCK, false);
 
     // Prepare second DMA channel to send the CRC and block end marker
@@ -524,28 +620,30 @@ static void sdio_start_next_block_tx()
     g_sdio.end_token_buf[2] = 0xFFFFFFFF;
     channel_config_set_bswap(&dmacfg, false);
     dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
-        &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.end_token_buf, 3, false);
+        &SDIO_PIO->txf[SDIO_CMD_SM], g_sdio.end_token_buf, 3, false);
 
     // Enable IRQ to trigger when block is done
     dma_hw->ints1 = 1 << SDIO_DMA_CHB;
     dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
 
-    // Initialize register X with nibble count and register Y with response bit count
-    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 1048);
-    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_x, 32));
-    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 31);
-    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
+    // Initialize register X with nibble count
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 1048);
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_x, 32));
+
+    // Initialize CRC receiver Y bit count
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 7);
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_y, 32));
 
     // Initialize pins to output and high
-    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pins, 15));
-    pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pindirs, 15));
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pins, 15));
+    pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pindirs, 15));
 
     // Write start token and start the DMA transfer.
-    pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFF0);
+    pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 0xFFFFFFF0);
     dma_channel_start(SDIO_DMA_CH);
 
     // Start state machine
-    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
+    pio_set_sm_mask_enabled(SDIO_PIO, (1ul << SDIO_CMD_SM)/* | (1ul << SDIO_DATA_SM)*/, true);
 }
 
 static void sdio_compute_next_tx_checksum()
@@ -587,28 +685,21 @@ sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
 
 sdio_status_t check_sdio_write_response(uint32_t card_response)
 {
-    // Shift card response until top bit is 0 (the start bit)
-    // The format of response is poorly documented in SDIO spec but refer to e.g.
-    // http://my-cool-projects.blogspot.com/2013/02/the-mysterious-sd-card-crc-status.html
-    uint32_t resp = card_response;
-    if (!(~resp & 0xFFFF0000)) resp <<= 16;
-    if (!(~resp & 0xFF000000)) resp <<= 8;
-    if (!(~resp & 0xF0000000)) resp <<= 4;
-    if (!(~resp & 0xC0000000)) resp <<= 2;
-    if (!(~resp & 0x80000000)) resp <<= 1;
-
-    uint32_t wr_status = (resp >> 28) & 7;
-
-    if (wr_status == 2)
+    uint8_t wr_status = card_response & 0x1F;
+    //  5 = 0b0101 = data accepted  (11100101)
+    // 11 = 0b1011 = CRC error      (11101011)
+    // 13 = 0b1101 = Write Error    (11101101)
+
+    if (wr_status == 0b101)
     {
         return SDIO_OK;
     }
-    else if (wr_status == 5)
+    else if (wr_status == 0b1011)
     {
         log("SDIO card reports write CRC error, status ", card_response);
         return SDIO_ERR_WRITE_CRC;
     }
-    else if (wr_status == 6)
+    else if (wr_status == 0b1101)
     {
         log("SDIO card reports write failure, status ", card_response);
         return SDIO_ERR_WRITE_FAIL;
@@ -632,21 +723,21 @@ static void rp2040_sdio_tx_irq()
             // Main data transfer is finished now.
             // When card is ready, PIO will put card response on RX fifo
             g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
-            if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_DATA_SM))
+            if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_CMD_SM))
             {
                 // Card is already idle
-                g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
+                g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
             }
             else
             {
                 // Use DMA to wait for the response
                 dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
-                channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
+                channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
                 channel_config_set_read_increment(&dmacfg, false);
                 channel_config_set_write_increment(&dmacfg, false);
-                channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
+                channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
                 dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
-                    &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_DATA_SM], 1, true);
+                    &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_CMD_SM], 1, true);
             }
         }
     }
@@ -706,10 +797,11 @@ sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
     else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
     {
         debuglog("rp2040_sdio_tx_poll() timeout, "
-            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_tx_offset,
-            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
-            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
+            "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_data_tx_offset,
+            " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
+            " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
             " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
+
         rp2040_sdio_stop();
         return SDIO_ERR_DATA_TIMEOUT;
     }
@@ -723,8 +815,7 @@ sdio_status_t rp2040_sdio_stop()
     dma_channel_abort(SDIO_DMA_CH);
     dma_channel_abort(SDIO_DMA_CHB);
     dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
-    pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
-    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
+    pio_set_sm_mask_enabled(SDIO_PIO, (1ul << SDIO_CMD_SM) | (1ul << SDIO_DATA_SM), false);
     g_sdio.transfer_state = SDIO_IDLE;
     return SDIO_OK;
 }
@@ -751,41 +842,32 @@ void rp2040_sdio_init(int clock_divider)
 
     // Load PIO programs
     pio_clear_instruction_memory(SDIO_PIO);
-
-    // Command & clock state machine
-    g_sdio.pio_cmd_clk_offset = pio_add_program(SDIO_PIO, &sdio_cmd_clk_program);
-    pio_sm_config cfg = sdio_cmd_clk_program_get_default_config(g_sdio.pio_cmd_clk_offset);
-    sm_config_set_out_pins(&cfg, SDIO_CMD, 1);
-    sm_config_set_in_pins(&cfg, SDIO_CMD);
-    sm_config_set_set_pins(&cfg, SDIO_CMD, 1);
-    sm_config_set_jmp_pin(&cfg, SDIO_CMD);
-    sm_config_set_sideset_pins(&cfg, SDIO_CLK);
-    sm_config_set_out_shift(&cfg, false, true, 32);
-    sm_config_set_in_shift(&cfg, false, true, 32);
-    sm_config_set_clkdiv_int_frac(&cfg, clock_divider, 0);
-    sm_config_set_mov_status(&cfg, STATUS_TX_LESSTHAN, 2);
-
-    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_clk_offset, &cfg);
+    
+    // Set pull resistors for all SD data lines
+    gpio_set_pulls(SDIO_CLK, true, false);
+    gpio_set_pulls(SDIO_CMD, true, false);
+    gpio_set_pulls(SDIO_D0, true, false);
+    gpio_set_pulls(SDIO_D1, true, false);
+    gpio_set_pulls(SDIO_D2, true, false);
+    gpio_set_pulls(SDIO_D3, true, false);
+
+    // Command state machine
+    g_sdio.pio_cmd_rsp_clk_offset = pio_add_program(SDIO_PIO, &cmd_rsp_program);
+    g_sdio.pio_cfg_cmd_rsp = pio_cmd_rsp_program_config(g_sdio.pio_cmd_rsp_clk_offset, SDIO_CMD, SDIO_CLK, clock_divider, 0);
+
+    pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_rsp_clk_offset, &g_sdio.pio_cfg_cmd_rsp);
+    pio_sm_set_pins(SDIO_PIO, SDIO_CMD_SM, 1);
     pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
-    pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CMD, 1, true);
+    pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, false);
 
     // Data reception program
-    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
-    g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
-    sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
-    sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
-    sm_config_set_out_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
-    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_rx, clock_divider, 0);
+    g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &rd_data_w_clock_program);
+    g_sdio.pio_cfg_data_rx = pio_rd_data_w_clock_program_config(g_sdio.pio_data_rx_offset, SDIO_D0, SDIO_CLK, clock_divider);
 
     // Data transmission program
-    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
-    g_sdio.pio_cfg_data_tx = sdio_data_tx_program_get_default_config(g_sdio.pio_data_tx_offset);
-    sm_config_set_in_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0);
-    sm_config_set_set_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
-    sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
-    sm_config_set_in_shift(&g_sdio.pio_cfg_data_tx, false, false, 32);
-    sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
-    sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_tx, clock_divider, 0);
+    g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_tx_w_clock_program);
+    g_sdio.pio_cfg_data_tx = pio_sdio_tx_w_clock_program_config(g_sdio.pio_data_tx_offset, SDIO_D0, SDIO_CLK, clock_divider);
 
     // Disable SDIO pins input synchronizer.
     // This reduces input delay.

+ 3 - 0
lib/BlueSCSI_platform_RP2040/rp2040_sdio.h

@@ -48,5 +48,8 @@ sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete = nullptr);
 // Force everything to idle state
 sdio_status_t rp2040_sdio_stop();
 
+// Performs one full CLK line cycle
+void cycleSdClock();
+
 // (Re)initialize the SDIO interface
 void rp2040_sdio_init(int clock_divider = 1);

+ 124 - 97
lib/BlueSCSI_platform_RP2040/rp2040_sdio.pio

@@ -2,7 +2,31 @@
 ; Run "pioasm rp2040_sdio.pio rp2040_sdio.pio.h" to regenerate the C header from this.
 ;
 ; Copyright (c) 2022 Rabbit Hole Computing™
+; Copyright (c) 2011-2024 Bill Greiman
+; Copyright (c) 2024 Tech by Androda, LLC
+; This file is part of the SdFat library for SD memory cards.
+
+; Portions from Bill Greiman use the MIT License:
+; MIT License
+;
+; Permission is hereby granted, free of charge, to any person obtaining a
+; copy of this software and associated documentation files (the "Software"),
+; to deal in the Software without restriction, including without limitation
+; the rights to use, copy, modify, merge, publish, distribute, sublicense,
+; and/or sell copies of the Software, and to permit persons to whom the
+; Software is furnished to do so, subject to the following conditions:
 ;
+; The above copyright notice and this permission notice shall be included
+; in all copies or substantial portions of the Software.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+; DEALINGS IN THE SOFTWARE.
+
 ; The RP2040 official work-in-progress code at
 ; https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
 ; may be useful reference, but this is independent implementation.
@@ -27,121 +51,124 @@
 .define D0 (((CLKDIV + 1) /2) - 1)
 .define D1 ((CLKDIV/2) - 1)
 .define SDIO_CLK_GPIO 10
+.define public SDIO_IRQ 7
 
-; State machine 0 is used to:
-; - generate continuous clock on SDIO_CLK
-; - send CMD packets
-; - receive response packets
-;
-; Pin mapping for this state machine:
-; - Sideset    : CLK
-; - IN/OUT/SET : CMD
-; - JMP_PIN    : CMD
-;
-; The commands to send are put on TX fifo and must have two words:
-; Word 0 bits 31-24: Number of bits in command minus one (usually 47)
-; Word 0 bits 23-00: First 24 bits of the command packet, shifted out MSB first
-; Word 1 bits 31-08: Last 24 bits of the command packet, shifted out MSB first
-; Word 1 bits 07-00: Number of bits in response minus one (usually 47), or 0 if no response
-;
-; The response is put on RX fifo, starting with the MSB.
-; Partial last word will be padded with zero bits at the top.
-;
-; The state machine EXECCTRL should be set so that STATUS indicates TX FIFO < 2
-; and that AUTOPULL and AUTOPUSH are enabled.
-
-.program sdio_cmd_clk
-    .side_set 1
-
-    mov OSR, NULL       side 1 [D1]    ; Make sure OSR is full of zeros to prevent autopull
-
-wait_cmd:
-    mov Y, !STATUS      side 0 [D0]    ; Check if TX FIFO has data
-    jmp !Y wait_cmd     side 1 [D1]
-
-load_cmd:
-    out NULL, 32        side 0 [D0]    ; Load first word (trigger autopull)
-    out X, 8            side 1 [D1]    ; Number of bits to send
-    set pins, 1         side 0 [D0]    ; Initial state of CMD is high
-    set pindirs, 1      side 1 [D1]    ; Set SDIO_CMD as output
-
+; State Machine 0 is for the Command / Response
+; This State Machine will stall with clock low after sending a command and receiving the response
+; Note that the FIFOs are set to 8 bit mode here, because 8 bits evenly divides all command and response sizes
+.program cmd_rsp
+.side_set 1 opt
+.wrap_target
+cmd_begin:
 send_cmd:
-    out pins, 1         side 0 [D0]    ; Write output on falling edge of CLK
-    jmp X-- send_cmd    side 1 [D1]
-
-prep_resp:
-    set pindirs, 0      side 0 [D0]    ; Set SDIO_CMD as input
-    out X, 8            side 1 [D1]    ; Get number of bits in response
-    nop                 side 0 [D0]    ; For clock alignment
-    jmp !X resp_done    side 1 [D1]    ; Check if we expect a response
+    out pins, 1         side 0 [1]  ; When TX FIFO is empty, this command will stall with clock low
+    jmp X-- send_cmd    side 1 [1]
 
+    jmp !Y cmd_begin    side 0 [1]  ; If no response, go back to the beginning and stall
+    set pindirs, 0      side 1 [3]
 wait_resp:
-    nop                  side 0 [D0]
-    jmp PIN wait_resp    side 1 [D1]    ; Loop until SDIO_CMD = 0
-
-    ; Note: input bits are read at the same time as we write CLK=0.
-    ; Because the host controls the clock, the read happens before
-    ; the card sees the falling clock edge. This gives maximum time
-    ; for the data bit to settle.
+    nop                 side 0 [3]
+    nop                 side 1 [2]
+    jmp PIN wait_resp               ; Run the SD clock until CMD pin goes low (First bit of response)
+    
 read_resp:
-    in PINS, 1          side 0 [D0]    ; Read input data bit
-    jmp X-- read_resp   side 1 [D1]    ; Loop to receive all data bits
-
-resp_done:
-    push                side 0 [D0]    ; Push the remaining part of response
-
-; State machine 1 is used to send and receive data blocks.
-; Pin mapping for this state machine:
-; - IN / OUT: SDIO_D0-D3
-; - GPIO defined at beginning of this file: SDIO_CLK
-
-; Data reception program
-; This program will wait for initial start of block token and then
-; receive a data block. The application must set number of nibbles
-; to receive minus 1 to Y register before running this program.
-.program sdio_data_rx
-
-wait_start:
-    mov X, Y                               ; Reinitialize number of nibbles to receive
-    wait 0 pin 0                           ; Wait for zero state on D0
-    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV-1]  ; Wait for rising edge and then whole clock cycle
-
-rx_data:
-    in PINS, 4                 [CLKDIV-2]  ; Read nibble
-    jmp X--, rx_data
+    in pins, 1          
+    push iffull block   side 0 [2]  ; Read command response
+    jmp Y-- read_resp   side 1 [1]
+.wrap
+
+% c-sdk {
+static inline pio_sm_config pio_cmd_rsp_program_config(uint offset, uint cmd_pin, uint clk_pin, uint16_t div_int, uint8_t div_frac) {
+    pio_sm_config c = cmd_rsp_program_get_default_config(offset);
+    sm_config_set_sideset_pins(&c, clk_pin);
+    sm_config_set_out_pins(&c, cmd_pin, 1);
+    sm_config_set_in_pins(&c, cmd_pin);
+    sm_config_set_set_pins(&c, cmd_pin, 1);
+    sm_config_set_jmp_pin(&c, cmd_pin);
+    sm_config_set_in_shift(&c, false, false, 8);
+    sm_config_set_out_shift(&c, false, true, 8);
+    sm_config_set_clkdiv_int_frac(&c, div_int, div_frac);
+    return c;
+}
+%}
+
+; Program which reads data and provides its own clock signal
+; Use direct-execute PIO instructions to place the number of 4-bit nibbles to receive
+; into the X register before enabling the state machine
+.program rd_data_w_clock
+.side_set 1
+mov X, Y                side 0      ; Reinitialize number of nibbles to receive
+wait_d0:
+    nop                 side 0 [3]  ; Run the clock...
+    jmp PIN wait_d0     side 1 [3]  ; Until the first response nibble (all zeroes)
+    nop                 side 0 [2]  ; Clock transition low to make the SD card write out the first actual data nibble
+    nop                 side 1 [1]  ; Transition clock high to stick data value
+read_loop:
+    in pins, 4          side 0 [2]  ; Read in the nibble and transition the clock low
+    push iffull block   side 1      ; Transition the clock high and block execution if rx fifo is full
+    jmp X--, read_loop  side 1      ; No delays here or previous instruction, because instr [1] = two instr execution time
+
+% c-sdk {
+
+static inline pio_sm_config pio_rd_data_w_clock_program_config(uint offset, uint d0_pin, uint clk_pin, float clk_div) {
+  pio_sm_config c = rd_data_w_clock_program_get_default_config(offset);
+  sm_config_set_sideset_pins(&c, clk_pin);
+  sm_config_set_in_pins(&c, d0_pin);
+  sm_config_set_jmp_pin(&c, d0_pin);
+  sm_config_set_in_shift(&c, false, false, 32);
+  sm_config_set_out_shift(&c, false, true, 32);
+  sm_config_set_clkdiv(&c, clk_div);
+  return c;
+}
+%}
 
 ; Data transmission program
 ;
 ; Before running this program, pindirs should be set as output
 ; and register X should be initialized with the number of nibbles
 ; to send minus 1 (typically 8 + 1024 + 16 + 1 - 1 = 1048)
-; and register Y with the number of response bits minus 1 (typically 31).
+;
+; Register Y must be set to the number of CRC bits to receive (8/32)
 ;
 ; Words written to TX FIFO must be:
 ; - Word 0: start token 0xFFFFFFF0
 ; - Word 1-128: transmitted data (512 bytes)
 ; - Word 129-130: CRC checksum
 ; - Word 131: end token 0xFFFFFFFF
-;
-; After the card reports idle status, RX FIFO will get a word that
-; contains the D0 line response from card.
+.program sdio_tx_w_clock
+.side_set 1 opt
+tx_loop:
+    out PINS, 4             side 0 [2]      ; Write nibble value and transition clock low
+    jmp X-- tx_loop         side 1 [1]      ; Transition clock high, and check if more data needs to be sent
+
+    set pindirs, 0          side 1 [2]      ; Set input mode to receive CRC token, without changing clock phase
+
+crc_get:
+    in pins, 1              side 1 [4]      ; Input the first bit of CRC response
+    jmp Y-- crc_get         side 0 [4]      ; Read the CRC bits
+bsy_wait:
+    jmp PIN done            side 1 [4]
+    jmp bsy_wait            side 0 [4]      ; Clock until no longer BSY
+done:
+.wrap_target
+    push iffull noblock     side 0         ; Unconditional, just push the response token
+.wrap
+
+% c-sdk {
+static inline pio_sm_config pio_sdio_tx_w_clock_program_config(uint offset, uint data_pin, uint clk_pin, int clk_div) {
+    pio_sm_config c = sdio_tx_w_clock_program_get_default_config(offset);
+    sm_config_set_sideset_pins(&c, clk_pin);
+    sm_config_set_out_pins(&c, data_pin, 4);
+    sm_config_set_in_pins(&c, data_pin);
+    sm_config_set_set_pins(&c, data_pin, 4);
+    sm_config_set_in_shift(&c, false, false, 8);
+    sm_config_set_out_shift(&c, false, true, 32);
+    sm_config_set_jmp_pin(&c, data_pin);
+    sm_config_set_clkdiv_int_frac(&c, clk_div, 0);
+    return c;
+}
+%}
 
-.program sdio_data_tx
-    wait 0 gpio SDIO_CLK_GPIO  
-    wait 1 gpio SDIO_CLK_GPIO  [CLKDIV + D1 - 1]; Synchronize so that write occurs on falling edge
 
-tx_loop:
-    out PINS, 4                [D0]    ; Write nibble and wait for whole clock cycle
-    jmp X-- tx_loop            [D1]
 
-    set pindirs, 0x00          [D0]    ; Set data bus as input
 
-.wrap_target
-response_loop:
-    in PINS, 1                 [D1]    ; Read D0 on rising edge
-    jmp Y--, response_loop     [D0]
-
-wait_idle:
-    wait 1 pin 0               [D1]    ; Wait for card to indicate idle condition
-    push                       [D0]    ; Push the response token
-.wrap

+ 105 - 68
lib/BlueSCSI_platform_RP2040/rp2040_sdio.pio.h

@@ -8,114 +8,151 @@
 #include "hardware/pio.h"
 #endif
 
-// ------------ //
-// sdio_cmd_clk //
-// ------------ //
+#define SDIO_IRQ 7
 
-#define sdio_cmd_clk_wrap_target 0
-#define sdio_cmd_clk_wrap 17
+// ------- //
+// cmd_rsp //
+// ------- //
 
-static const uint16_t sdio_cmd_clk_program_instructions[] = {
+#define cmd_rsp_wrap_target 0
+#define cmd_rsp_wrap 9
+
+static const uint16_t cmd_rsp_program_instructions[] = {
             //     .wrap_target
-    0xb1e3, //  0: mov    osr, null       side 1 [1] 
-    0xa24d, //  1: mov    y, !status      side 0 [2] 
-    0x1161, //  2: jmp    !y, 1           side 1 [1] 
-    0x6260, //  3: out    null, 32        side 0 [2] 
-    0x7128, //  4: out    x, 8            side 1 [1] 
-    0xe201, //  5: set    pins, 1         side 0 [2] 
-    0xf181, //  6: set    pindirs, 1      side 1 [1] 
-    0x6201, //  7: out    pins, 1         side 0 [2] 
-    0x1147, //  8: jmp    x--, 7          side 1 [1] 
-    0xe280, //  9: set    pindirs, 0      side 0 [2] 
-    0x7128, // 10: out    x, 8            side 1 [1] 
-    0xa242, // 11: nop                    side 0 [2] 
-    0x1131, // 12: jmp    !x, 17          side 1 [1] 
-    0xa242, // 13: nop                    side 0 [2] 
-    0x11cd, // 14: jmp    pin, 13         side 1 [1] 
-    0x4201, // 15: in     pins, 1         side 0 [2] 
-    0x114f, // 16: jmp    x--, 15         side 1 [1] 
-    0x8220, // 17: push   block           side 0 [2] 
+    0x7101, //  0: out    pins, 1         side 0 [1] 
+    0x1940, //  1: jmp    x--, 0          side 1 [1] 
+    0x1160, //  2: jmp    !y, 0           side 0 [1] 
+    0xfb80, //  3: set    pindirs, 0      side 1 [3] 
+    0xb342, //  4: nop                    side 0 [3] 
+    0xba42, //  5: nop                    side 1 [2] 
+    0x00c4, //  6: jmp    pin, 4                     
+    0x4001, //  7: in     pins, 1                    
+    0x9260, //  8: push   iffull block    side 0 [2] 
+    0x1987, //  9: jmp    y--, 7          side 1 [1] 
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
-static const struct pio_program sdio_cmd_clk_program = {
-    .instructions = sdio_cmd_clk_program_instructions,
-    .length = 18,
+static const struct pio_program cmd_rsp_program = {
+    .instructions = cmd_rsp_program_instructions,
+    .length = 10,
     .origin = -1,
 };
 
-static inline pio_sm_config sdio_cmd_clk_program_get_default_config(uint offset) {
+static inline pio_sm_config cmd_rsp_program_get_default_config(uint offset) {
     pio_sm_config c = pio_get_default_sm_config();
-    sm_config_set_wrap(&c, offset + sdio_cmd_clk_wrap_target, offset + sdio_cmd_clk_wrap);
-    sm_config_set_sideset(&c, 1, false, false);
+    sm_config_set_wrap(&c, offset + cmd_rsp_wrap_target, offset + cmd_rsp_wrap);
+    sm_config_set_sideset(&c, 2, true, false);
+    return c;
+}
+
+static inline pio_sm_config pio_cmd_rsp_program_config(uint offset, uint cmd_pin, uint clk_pin, uint16_t div_int, uint8_t div_frac) {
+    pio_sm_config c = cmd_rsp_program_get_default_config(offset);
+    sm_config_set_sideset_pins(&c, clk_pin);
+    sm_config_set_out_pins(&c, cmd_pin, 1);
+    sm_config_set_in_pins(&c, cmd_pin);
+    sm_config_set_set_pins(&c, cmd_pin, 1);
+    sm_config_set_jmp_pin(&c, cmd_pin);
+    sm_config_set_in_shift(&c, false, false, 8);
+    sm_config_set_out_shift(&c, false, true, 8);
+    sm_config_set_clkdiv_int_frac(&c, div_int, div_frac);
     return c;
 }
+
 #endif
 
-// ------------ //
-// sdio_data_rx //
-// ------------ //
+// --------------- //
+// rd_data_w_clock //
+// --------------- //
 
-#define sdio_data_rx_wrap_target 0
-#define sdio_data_rx_wrap 4
+#define rd_data_w_clock_wrap_target 0
+#define rd_data_w_clock_wrap 7
 
-static const uint16_t sdio_data_rx_program_instructions[] = {
+static const uint16_t rd_data_w_clock_program_instructions[] = {
             //     .wrap_target
-    0xa022, //  0: mov    x, y                       
-    0x2020, //  1: wait   0 pin, 0                   
-    0x248a, //  2: wait   1 gpio, 10             [4] 
-    0x4304, //  3: in     pins, 4                [3] 
-    0x0043, //  4: jmp    x--, 3                     
+    0xa022, //  0: mov    x, y            side 0     
+    0xa342, //  1: nop                    side 0 [3] 
+    0x13c1, //  2: jmp    pin, 1          side 1 [3] 
+    0xa242, //  3: nop                    side 0 [2] 
+    0xb142, //  4: nop                    side 1 [1] 
+    0x4204, //  5: in     pins, 4         side 0 [2] 
+    0x9060, //  6: push   iffull block    side 1     
+    0x1045, //  7: jmp    x--, 5          side 1     
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
-static const struct pio_program sdio_data_rx_program = {
-    .instructions = sdio_data_rx_program_instructions,
-    .length = 5,
+static const struct pio_program rd_data_w_clock_program = {
+    .instructions = rd_data_w_clock_program_instructions,
+    .length = 8,
     .origin = -1,
 };
 
-static inline pio_sm_config sdio_data_rx_program_get_default_config(uint offset) {
+static inline pio_sm_config rd_data_w_clock_program_get_default_config(uint offset) {
     pio_sm_config c = pio_get_default_sm_config();
-    sm_config_set_wrap(&c, offset + sdio_data_rx_wrap_target, offset + sdio_data_rx_wrap);
+    sm_config_set_wrap(&c, offset + rd_data_w_clock_wrap_target, offset + rd_data_w_clock_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
     return c;
 }
-#endif
 
-// ------------ //
-// sdio_data_tx //
-// ------------ //
+static inline pio_sm_config pio_rd_data_w_clock_program_config(uint offset, uint d0_pin, uint clk_pin, float clk_div) {
+  pio_sm_config c = rd_data_w_clock_program_get_default_config(offset);
+  sm_config_set_sideset_pins(&c, clk_pin);
+  sm_config_set_in_pins(&c, d0_pin);
+  sm_config_set_jmp_pin(&c, d0_pin);
+  sm_config_set_in_shift(&c, false, false, 32);
+  sm_config_set_out_shift(&c, false, true, 32);
+  sm_config_set_clkdiv(&c, clk_div);
+  return c;
+}
 
-#define sdio_data_tx_wrap_target 5
-#define sdio_data_tx_wrap 8
+#endif
 
-static const uint16_t sdio_data_tx_program_instructions[] = {
-    0x200a, //  0: wait   0 gpio, 10                 
-    0x258a, //  1: wait   1 gpio, 10             [5] 
-    0x6204, //  2: out    pins, 4                [2] 
-    0x0142, //  3: jmp    x--, 2                 [1] 
-    0xe280, //  4: set    pindirs, 0             [2] 
+// --------------- //
+// sdio_tx_w_clock //
+// --------------- //
+
+#define sdio_tx_w_clock_wrap_target 7
+#define sdio_tx_w_clock_wrap 7
+
+static const uint16_t sdio_tx_w_clock_program_instructions[] = {
+    0x7204, //  0: out    pins, 4         side 0 [2] 
+    0x1940, //  1: jmp    x--, 0          side 1 [1] 
+    0xfa80, //  2: set    pindirs, 0      side 1 [2] 
+    0x5c01, //  3: in     pins, 1         side 1 [4] 
+    0x1483, //  4: jmp    y--, 3          side 0 [4] 
+    0x1cc7, //  5: jmp    pin, 7          side 1 [4] 
+    0x1405, //  6: jmp    5               side 0 [4] 
             //     .wrap_target
-    0x4101, //  5: in     pins, 1                [1] 
-    0x0285, //  6: jmp    y--, 5                 [2] 
-    0x21a0, //  7: wait   1 pin, 0               [1] 
-    0x8220, //  8: push   block                  [2] 
+    0x9040, //  7: push   iffull noblock  side 0     
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
-static const struct pio_program sdio_data_tx_program = {
-    .instructions = sdio_data_tx_program_instructions,
-    .length = 9,
+static const struct pio_program sdio_tx_w_clock_program = {
+    .instructions = sdio_tx_w_clock_program_instructions,
+    .length = 8,
     .origin = -1,
 };
 
-static inline pio_sm_config sdio_data_tx_program_get_default_config(uint offset) {
+static inline pio_sm_config sdio_tx_w_clock_program_get_default_config(uint offset) {
     pio_sm_config c = pio_get_default_sm_config();
-    sm_config_set_wrap(&c, offset + sdio_data_tx_wrap_target, offset + sdio_data_tx_wrap);
+    sm_config_set_wrap(&c, offset + sdio_tx_w_clock_wrap_target, offset + sdio_tx_w_clock_wrap);
+    sm_config_set_sideset(&c, 2, true, false);
     return c;
 }
-#endif
 
+static inline pio_sm_config pio_sdio_tx_w_clock_program_config(uint offset, uint data_pin, uint clk_pin, int clk_div) {
+    pio_sm_config c = sdio_tx_w_clock_program_get_default_config(offset);
+    sm_config_set_sideset_pins(&c, clk_pin);
+    sm_config_set_out_pins(&c, data_pin, 4);
+    sm_config_set_in_pins(&c, data_pin);
+    sm_config_set_set_pins(&c, data_pin, 4);
+    sm_config_set_in_shift(&c, false, false, 8);
+    sm_config_set_out_shift(&c, false, true, 32);
+    sm_config_set_jmp_pin(&c, data_pin);
+    sm_config_set_clkdiv_int_frac(&c, clk_div, 0);
+    return c;
+}
+
+#endif

+ 16 - 7
lib/BlueSCSI_platform_RP2040/sd_card_sdio.cpp

@@ -1,6 +1,7 @@
 // Driver for accessing SD card in SDIO mode on RP2040.
 //
 // Copyright (c) 2022 Rabbit Hole Computing™
+// Copyright (c) 2024 Tech by Androda, LLC
 
 #include "BlueSCSI_platform.h"
 
@@ -20,6 +21,7 @@ static int g_sdio_error_line;
 static sdio_status_t g_sdio_error;
 static uint32_t g_sdio_dma_buf[128];
 static uint32_t g_sdio_sector_count;
+static uint8_t cardType;
 
 #define checkReturnOk(call) ((g_sdio_error = (call)) == SDIO_OK ? true : logSDError(__LINE__))
 static bool logSDError(int line)
@@ -80,7 +82,6 @@ bool SdioCard::begin(SdioConfig sdioConfig)
         reply = 0;
         rp2040_sdio_command_R1(CMD0, 0, NULL); // GO_IDLE_STATE
         status = rp2040_sdio_command_R1(CMD8, 0x1AA, &reply); // SEND_IF_COND
-
         if (status == SDIO_OK && reply == 0x1AA)
         {
             break;
@@ -247,6 +248,7 @@ bool SdioCard::stopTransmission(bool blocking)
         uint32_t start = millis();
         while ((uint32_t)(millis() - start) < 5000 && isBusy())
         {
+            cycleSdClock();
             if (m_stream_callback)
             {
                 m_stream_callback(m_stream_count);
@@ -423,9 +425,14 @@ bool SdioCard::readSector(uint32_t sector, uint8_t* dst)
     uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
 
     uint32_t reply;
-    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
-        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) || // Prepare for reception
-        !checkReturnOk(rp2040_sdio_command_R1(CMD17, address, &reply))) // READ_SINGLE_BLOCK
+    // Honestly CMD16 feels partially unnecessary.  Default block length is 512.  SDHC, SDXC, SDUC, *always* use 512 and this does nothing.
+    // Set length is valid for memory access commands only if partial block read operation are allowed in CSD.
+    // We do have the CSD, so CMD16 should only be run if actually necessary
+    if (
+        !checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD17, address, &reply)) || // READ_SINGLE_BLOCK
+        !checkReturnOk(rp2040_sdio_rx_start(dst, 1)) // Prepare for reception
+        )
     {
         return false;
     }
@@ -474,9 +481,11 @@ bool SdioCard::readSectors(uint32_t sector, uint8_t* dst, size_t n)
     uint32_t address = (type() == SD_CARD_TYPE_SDHC) ? sector : (sector * 512);
 
     uint32_t reply;
-    if (!checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
-        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) || // Prepare for reception
-        !checkReturnOk(rp2040_sdio_command_R1(CMD18, address, &reply))) // READ_MULTIPLE_BLOCK
+    if (
+        !checkReturnOk(rp2040_sdio_command_R1(16, 512, &reply)) || // SET_BLOCKLEN
+        !checkReturnOk(rp2040_sdio_command_R1(CMD18, address, &reply)) || // READ_MULTIPLE_BLOCK
+        !checkReturnOk(rp2040_sdio_rx_start(dst, n)) // Prepare for reception
+        )
     {
         return false;
     }