rp2040_sdio.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. // Implementation of SDIO communication for RP2040
  2. //
  3. // The RP2040 official work-in-progress code at
  4. // https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
  5. // may be useful reference, but this is independent implementation.
  6. //
  7. // For official SDIO specifications, refer to:
  8. // https://www.sdcard.org/downloads/pls/
  9. // "SDIO Physical Layer Simplified Specification Version 8.00"
  10. #include "rp2040_sdio.h"
  11. #include "rp2040_sdio.pio.h"
  12. #include <hardware/pio.h>
  13. #include <hardware/dma.h>
  14. #include <hardware/gpio.h>
  15. #include <BlueSCSI_platform.h>
  16. #include <BlueSCSI_log.h>
  17. #define SDIO_PIO pio1
  18. #define SDIO_CMD_SM 0
  19. #define SDIO_DATA_SM 1
  20. #define SDIO_DMA_CH 4
  21. #define SDIO_DMA_CHB 5
  22. // Maximum number of 512 byte blocks to transfer in one request
  23. #define SDIO_MAX_BLOCKS 256
  24. enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
  25. static struct {
  26. uint32_t pio_cmd_clk_offset;
  27. uint32_t pio_data_rx_offset;
  28. pio_sm_config pio_cfg_data_rx;
  29. uint32_t pio_data_tx_offset;
  30. pio_sm_config pio_cfg_data_tx;
  31. sdio_transfer_state_t transfer_state;
  32. uint32_t transfer_start_time;
  33. uint32_t *data_buf;
  34. uint32_t blocks_done; // Number of blocks transferred so far
  35. uint32_t total_blocks; // Total number of blocks to transfer
  36. uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
  37. uint32_t checksum_errors; // Number of checksum errors detected
  38. // Variables for block writes
  39. uint64_t next_wr_block_checksum;
  40. uint32_t end_token_buf[3]; // CRC and end token for write block
  41. sdio_status_t wr_status;
  42. uint32_t card_response;
  43. // Variables for block reads
  44. // This is used to perform DMA into data buffers and checksum buffers separately.
  45. struct {
  46. void * write_addr;
  47. uint32_t transfer_count;
  48. } dma_blocks[SDIO_MAX_BLOCKS * 2];
  49. struct {
  50. uint32_t top;
  51. uint32_t bottom;
  52. } received_checksums[SDIO_MAX_BLOCKS];
  53. } g_sdio;
  54. void rp2040_sdio_dma_irq();
  55. /*******************************************************
  56. * Checksum algorithms
  57. *******************************************************/
  58. // Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
  59. // Usage:
  60. // uint8_t crc = 0;
  61. // crc = crc7_table[crc ^ byte];
  62. // .. repeat for every byte ..
  63. static const uint8_t crc7_table[256] = {
  64. 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
  65. 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
  66. 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
  67. 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28, 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
  68. 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
  69. 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84, 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
  70. 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
  71. 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0, 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
  72. 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc, 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
  73. 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce, 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
  74. 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98, 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
  75. 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa, 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
  76. 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34, 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
  77. 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06, 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
  78. 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50, 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
  79. 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
  80. };
  81. // Calculate the CRC16 checksum for parallel 4 bit lines separately.
  82. // When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
  83. // is applied to each line separately and generates total of
  84. // 4 x 16 = 64 bits of checksum.
  85. __attribute__((optimize("O3")))
  86. uint64_t sdio_crc16_4bit_checksum(uint32_t *data, uint32_t num_words)
  87. {
  88. uint64_t crc = 0;
  89. uint32_t *end = data + num_words;
  90. while (data < end)
  91. {
  92. for (int unroll = 0; unroll < 4; unroll++)
  93. {
  94. // Each 32-bit word contains 8 bits per line.
  95. // Reverse the bytes because SDIO protocol is big-endian.
  96. uint32_t data_in = __builtin_bswap32(*data++);
  97. // Shift out 8 bits for each line
  98. uint32_t data_out = crc >> 32;
  99. crc <<= 32;
  100. // XOR outgoing data to itself with 4 bit delay
  101. data_out ^= (data_out >> 16);
  102. // XOR incoming data to outgoing data with 4 bit delay
  103. data_out ^= (data_in >> 16);
  104. // XOR outgoing and incoming data to accumulator at each tap
  105. uint64_t xorred = data_out ^ data_in;
  106. crc ^= xorred;
  107. crc ^= xorred << (5 * 4);
  108. crc ^= xorred << (12 * 4);
  109. }
  110. }
  111. return crc;
  112. }
  113. /*******************************************************
  114. * Basic SDIO command execution
  115. *******************************************************/
  116. static void sdio_send_command(uint8_t command, uint32_t arg, uint8_t response_bits)
  117. {
  118. // debuglog("SDIO Command: ", (int)command, " arg ", arg);
  119. // Format the arguments in the way expected by the PIO code.
  120. uint32_t word0 =
  121. (47 << 24) | // Number of bits in command minus one
  122. ( 1 << 22) | // Transfer direction from host to card
  123. (command << 16) | // Command byte
  124. (((arg >> 24) & 0xFF) << 8) | // MSB byte of argument
  125. (((arg >> 16) & 0xFF) << 0);
  126. uint32_t word1 =
  127. (((arg >> 8) & 0xFF) << 24) |
  128. (((arg >> 0) & 0xFF) << 16) | // LSB byte of argument
  129. ( 1 << 8); // End bit
  130. // Set number of bits in response minus one, or leave at 0 if no response expected
  131. if (response_bits)
  132. {
  133. word1 |= ((response_bits - 1) << 0);
  134. }
  135. // Calculate checksum in the order that the bytes will be transmitted (big-endian)
  136. uint8_t crc = 0;
  137. crc = crc7_table[crc ^ ((word0 >> 16) & 0xFF)];
  138. crc = crc7_table[crc ^ ((word0 >> 8) & 0xFF)];
  139. crc = crc7_table[crc ^ ((word0 >> 0) & 0xFF)];
  140. crc = crc7_table[crc ^ ((word1 >> 24) & 0xFF)];
  141. crc = crc7_table[crc ^ ((word1 >> 16) & 0xFF)];
  142. word1 |= crc << 8;
  143. // Transmit command
  144. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  145. pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word0);
  146. pio_sm_put(SDIO_PIO, SDIO_CMD_SM, word1);
  147. }
  148. sdio_status_t rp2040_sdio_command_R1(uint8_t command, uint32_t arg, uint32_t *response)
  149. {
  150. sdio_send_command(command, arg, response ? 48 : 0);
  151. // Wait for response
  152. uint32_t start = millis();
  153. uint32_t wait_words = response ? 2 : 1;
  154. while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < wait_words)
  155. {
  156. if ((uint32_t)(millis() - start) > 2)
  157. {
  158. if (command != 8) // Don't log for missing SD card
  159. {
  160. debuglog("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
  161. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
  162. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  163. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  164. }
  165. // Reset the state machine program
  166. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  167. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
  168. return SDIO_ERR_RESPONSE_TIMEOUT;
  169. }
  170. }
  171. if (response)
  172. {
  173. // Read out response packet
  174. uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  175. uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  176. // debuglog("SDIO R1 response: ", resp0, " ", resp1);
  177. // Calculate response checksum
  178. uint8_t crc = 0;
  179. crc = crc7_table[crc ^ ((resp0 >> 24) & 0xFF)];
  180. crc = crc7_table[crc ^ ((resp0 >> 16) & 0xFF)];
  181. crc = crc7_table[crc ^ ((resp0 >> 8) & 0xFF)];
  182. crc = crc7_table[crc ^ ((resp0 >> 0) & 0xFF)];
  183. crc = crc7_table[crc ^ ((resp1 >> 8) & 0xFF)];
  184. uint8_t actual_crc = ((resp1 >> 0) & 0xFE);
  185. if (crc != actual_crc)
  186. {
  187. debuglog("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
  188. return SDIO_ERR_RESPONSE_CRC;
  189. }
  190. uint8_t response_cmd = ((resp0 >> 24) & 0xFF);
  191. if (response_cmd != command && command != 41)
  192. {
  193. debuglog("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
  194. return SDIO_ERR_RESPONSE_CODE;
  195. }
  196. *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
  197. }
  198. else
  199. {
  200. // Read out dummy marker
  201. pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  202. }
  203. return SDIO_OK;
  204. }
  205. sdio_status_t rp2040_sdio_command_R2(uint8_t command, uint32_t arg, uint8_t response[16])
  206. {
  207. // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
  208. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  209. uint32_t response_buf[5];
  210. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  211. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  212. channel_config_set_read_increment(&dmacfg, false);
  213. channel_config_set_write_increment(&dmacfg, true);
  214. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
  215. dma_channel_configure(SDIO_DMA_CH, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 5, true);
  216. sdio_send_command(command, arg, 136);
  217. uint32_t start = millis();
  218. while (dma_channel_is_busy(SDIO_DMA_CH))
  219. {
  220. if ((uint32_t)(millis() - start) > 2)
  221. {
  222. debuglog("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
  223. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
  224. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  225. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  226. // Reset the state machine program
  227. dma_channel_abort(SDIO_DMA_CH);
  228. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  229. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
  230. return SDIO_ERR_RESPONSE_TIMEOUT;
  231. }
  232. }
  233. dma_channel_abort(SDIO_DMA_CH);
  234. // Copy the response payload to output buffer
  235. response[0] = ((response_buf[0] >> 16) & 0xFF);
  236. response[1] = ((response_buf[0] >> 8) & 0xFF);
  237. response[2] = ((response_buf[0] >> 0) & 0xFF);
  238. response[3] = ((response_buf[1] >> 24) & 0xFF);
  239. response[4] = ((response_buf[1] >> 16) & 0xFF);
  240. response[5] = ((response_buf[1] >> 8) & 0xFF);
  241. response[6] = ((response_buf[1] >> 0) & 0xFF);
  242. response[7] = ((response_buf[2] >> 24) & 0xFF);
  243. response[8] = ((response_buf[2] >> 16) & 0xFF);
  244. response[9] = ((response_buf[2] >> 8) & 0xFF);
  245. response[10] = ((response_buf[2] >> 0) & 0xFF);
  246. response[11] = ((response_buf[3] >> 24) & 0xFF);
  247. response[12] = ((response_buf[3] >> 16) & 0xFF);
  248. response[13] = ((response_buf[3] >> 8) & 0xFF);
  249. response[14] = ((response_buf[3] >> 0) & 0xFF);
  250. response[15] = ((response_buf[4] >> 0) & 0xFF);
  251. // Calculate checksum of the payload
  252. uint8_t crc = 0;
  253. for (int i = 0; i < 15; i++)
  254. {
  255. crc = crc7_table[crc ^ response[i]];
  256. }
  257. uint8_t actual_crc = response[15] & 0xFE;
  258. if (crc != actual_crc)
  259. {
  260. debuglog("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
  261. return SDIO_ERR_RESPONSE_CRC;
  262. }
  263. uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
  264. if (response_cmd != 0x3F)
  265. {
  266. debuglog("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
  267. return SDIO_ERR_RESPONSE_CODE;
  268. }
  269. return SDIO_OK;
  270. }
  271. sdio_status_t rp2040_sdio_command_R3(uint8_t command, uint32_t arg, uint32_t *response)
  272. {
  273. sdio_send_command(command, arg, 48);
  274. // Wait for response
  275. uint32_t start = millis();
  276. while (pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM) < 2)
  277. {
  278. if ((uint32_t)(millis() - start) > 2)
  279. {
  280. debuglog("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
  281. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_clk_offset,
  282. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  283. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  284. // Reset the state machine program
  285. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  286. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_jmp(g_sdio.pio_cmd_clk_offset));
  287. return SDIO_ERR_RESPONSE_TIMEOUT;
  288. }
  289. }
  290. // Read out response packet
  291. uint32_t resp0 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  292. uint32_t resp1 = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  293. *response = ((resp0 & 0xFFFFFF) << 8) | ((resp1 >> 8) & 0xFF);
  294. // debuglog("SDIO R3 response: ", resp0, " ", resp1);
  295. return SDIO_OK;
  296. }
  297. /*******************************************************
  298. * Data reception from SD card
  299. *******************************************************/
  300. sdio_status_t rp2040_sdio_rx_start(uint8_t *buffer, uint32_t num_blocks)
  301. {
  302. // Buffer must be aligned
  303. assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
  304. g_sdio.transfer_state = SDIO_RX;
  305. g_sdio.transfer_start_time = millis();
  306. g_sdio.data_buf = (uint32_t*)buffer;
  307. g_sdio.blocks_done = 0;
  308. g_sdio.total_blocks = num_blocks;
  309. g_sdio.blocks_checksumed = 0;
  310. g_sdio.checksum_errors = 0;
  311. // Create DMA block descriptors to store each block of 512 bytes of data to buffer
  312. // and then 8 bytes to g_sdio.received_checksums.
  313. for (int i = 0; i < num_blocks; i++)
  314. {
  315. g_sdio.dma_blocks[i * 2].write_addr = buffer + i * SDIO_BLOCK_SIZE;
  316. g_sdio.dma_blocks[i * 2].transfer_count = SDIO_BLOCK_SIZE / sizeof(uint32_t);
  317. g_sdio.dma_blocks[i * 2 + 1].write_addr = &g_sdio.received_checksums[i];
  318. g_sdio.dma_blocks[i * 2 + 1].transfer_count = 2;
  319. }
  320. g_sdio.dma_blocks[num_blocks * 2].write_addr = 0;
  321. g_sdio.dma_blocks[num_blocks * 2].transfer_count = 0;
  322. // Configure first DMA channel for reading from the PIO RX fifo
  323. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  324. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  325. channel_config_set_read_increment(&dmacfg, false);
  326. channel_config_set_write_increment(&dmacfg, true);
  327. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
  328. channel_config_set_bswap(&dmacfg, true);
  329. channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
  330. dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
  331. // Configure second DMA channel for reconfiguring the first one
  332. dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  333. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  334. channel_config_set_read_increment(&dmacfg, true);
  335. channel_config_set_write_increment(&dmacfg, true);
  336. channel_config_set_ring(&dmacfg, true, 3);
  337. dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &dma_hw->ch[SDIO_DMA_CH].al1_write_addr,
  338. g_sdio.dma_blocks, 2, false);
  339. // Initialize PIO state machine
  340. pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
  341. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
  342. // Write number of nibbles to receive to Y register
  343. pio_sm_put(SDIO_PIO, SDIO_DATA_SM, SDIO_BLOCK_SIZE * 2 + 16 - 1);
  344. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
  345. // Enable RX FIFO join because we don't need the TX FIFO during transfer.
  346. // This gives more leeway for the DMA block switching
  347. SDIO_PIO->sm[SDIO_DATA_SM].shiftctrl |= PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS;
  348. // Start PIO and DMA
  349. dma_channel_start(SDIO_DMA_CHB);
  350. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
  351. return SDIO_OK;
  352. }
  353. // Check checksums for received blocks
  354. static void sdio_verify_rx_checksums(uint32_t maxcount)
  355. {
  356. while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
  357. {
  358. // Calculate checksum from received data
  359. int blockidx = g_sdio.blocks_checksumed++;
  360. uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
  361. SDIO_WORDS_PER_BLOCK);
  362. // Convert received checksum to little-endian format
  363. uint32_t top = __builtin_bswap32(g_sdio.received_checksums[blockidx].top);
  364. uint32_t bottom = __builtin_bswap32(g_sdio.received_checksums[blockidx].bottom);
  365. uint64_t expected = ((uint64_t)top << 32) | bottom;
  366. if (checksum != expected)
  367. {
  368. g_sdio.checksum_errors++;
  369. if (g_sdio.checksum_errors == 1)
  370. {
  371. log("SDIO checksum error in reception: block ", blockidx,
  372. " calculated ", checksum, " expected ", expected);
  373. }
  374. }
  375. }
  376. }
  377. sdio_status_t rp2040_sdio_rx_poll(uint32_t *bytes_complete)
  378. {
  379. // Was everything done when the previous rx_poll() finished?
  380. if (g_sdio.blocks_done >= g_sdio.total_blocks)
  381. {
  382. g_sdio.transfer_state = SDIO_IDLE;
  383. }
  384. else
  385. {
  386. // Use the idle time to calculate checksums
  387. sdio_verify_rx_checksums(4);
  388. // Check how many DMA control blocks have been consumed
  389. uint32_t dma_ctrl_block_count = (dma_hw->ch[SDIO_DMA_CHB].read_addr - (uint32_t)&g_sdio.dma_blocks);
  390. dma_ctrl_block_count /= sizeof(g_sdio.dma_blocks[0]);
  391. // Compute how many complete 512 byte SDIO blocks have been transferred
  392. // When transfer ends, dma_ctrl_block_count == g_sdio.total_blocks * 2 + 1
  393. g_sdio.blocks_done = (dma_ctrl_block_count - 1) / 2;
  394. // NOTE: When all blocks are done, rx_poll() still returns SDIO_BUSY once.
  395. // This provides a chance to start the SCSI transfer before the last checksums
  396. // are computed. Any checksum failures can be indicated in SCSI status after
  397. // the data transfer has finished.
  398. }
  399. if (bytes_complete)
  400. {
  401. *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
  402. }
  403. if (g_sdio.transfer_state == SDIO_IDLE)
  404. {
  405. // Verify all remaining checksums.
  406. sdio_verify_rx_checksums(g_sdio.total_blocks);
  407. if (g_sdio.checksum_errors == 0)
  408. return SDIO_OK;
  409. else
  410. return SDIO_ERR_DATA_CRC;
  411. }
  412. else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
  413. {
  414. debuglog("rp2040_sdio_rx_poll() timeout, "
  415. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
  416. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  417. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  418. " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
  419. rp2040_sdio_stop();
  420. return SDIO_ERR_DATA_TIMEOUT;
  421. }
  422. return SDIO_BUSY;
  423. }
  424. /*******************************************************
  425. * Data transmission to SD card
  426. *******************************************************/
  427. static void sdio_start_next_block_tx()
  428. {
  429. // Initialize PIO
  430. pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
  431. // Configure DMA to send the data block payload (512 bytes)
  432. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  433. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  434. channel_config_set_read_increment(&dmacfg, true);
  435. channel_config_set_write_increment(&dmacfg, false);
  436. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, true));
  437. channel_config_set_bswap(&dmacfg, true);
  438. channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
  439. dma_channel_configure(SDIO_DMA_CH, &dmacfg,
  440. &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
  441. SDIO_WORDS_PER_BLOCK, false);
  442. // Prepare second DMA channel to send the CRC and block end marker
  443. uint64_t crc = g_sdio.next_wr_block_checksum;
  444. g_sdio.end_token_buf[0] = (uint32_t)(crc >> 32);
  445. g_sdio.end_token_buf[1] = (uint32_t)(crc >> 0);
  446. g_sdio.end_token_buf[2] = 0xFFFFFFFF;
  447. channel_config_set_bswap(&dmacfg, false);
  448. dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
  449. &SDIO_PIO->txf[SDIO_DATA_SM], g_sdio.end_token_buf, 3, false);
  450. // Enable IRQ to trigger when block is done
  451. dma_hw->ints1 = 1 << SDIO_DMA_CHB;
  452. dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
  453. // Initialize register X with nibble count and register Y with response bit count
  454. pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 1048);
  455. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_x, 32));
  456. pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 31);
  457. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
  458. // Initialize pins to output and high
  459. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pins, 15));
  460. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_set(pio_pindirs, 15));
  461. // Write start token and start the DMA transfer.
  462. pio_sm_put(SDIO_PIO, SDIO_DATA_SM, 0xFFFFFFF0);
  463. dma_channel_start(SDIO_DMA_CH);
  464. // Start state machine
  465. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
  466. }
  467. static void sdio_compute_next_tx_checksum()
  468. {
  469. assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed < g_sdio.total_blocks);
  470. int blockidx = g_sdio.blocks_checksumed++;
  471. g_sdio.next_wr_block_checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
  472. SDIO_WORDS_PER_BLOCK);
  473. }
  474. // Start transferring data from memory to SD card
  475. sdio_status_t rp2040_sdio_tx_start(const uint8_t *buffer, uint32_t num_blocks)
  476. {
  477. // Buffer must be aligned
  478. assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
  479. g_sdio.transfer_state = SDIO_TX;
  480. g_sdio.transfer_start_time = millis();
  481. g_sdio.data_buf = (uint32_t*)buffer;
  482. g_sdio.blocks_done = 0;
  483. g_sdio.total_blocks = num_blocks;
  484. g_sdio.blocks_checksumed = 0;
  485. g_sdio.checksum_errors = 0;
  486. // Compute first block checksum
  487. sdio_compute_next_tx_checksum();
  488. // Start first DMA transfer and PIO
  489. sdio_start_next_block_tx();
  490. if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
  491. {
  492. // Precompute second block checksum
  493. sdio_compute_next_tx_checksum();
  494. }
  495. return SDIO_OK;
  496. }
  497. sdio_status_t check_sdio_write_response(uint32_t card_response)
  498. {
  499. // Shift card response until top bit is 0 (the start bit)
  500. // The format of response is poorly documented in SDIO spec but refer to e.g.
  501. // http://my-cool-projects.blogspot.com/2013/02/the-mysterious-sd-card-crc-status.html
  502. uint32_t resp = card_response;
  503. if (!(~resp & 0xFFFF0000)) resp <<= 16;
  504. if (!(~resp & 0xFF000000)) resp <<= 8;
  505. if (!(~resp & 0xF0000000)) resp <<= 4;
  506. if (!(~resp & 0xC0000000)) resp <<= 2;
  507. if (!(~resp & 0x80000000)) resp <<= 1;
  508. uint32_t wr_status = (resp >> 28) & 7;
  509. if (wr_status == 2)
  510. {
  511. return SDIO_OK;
  512. }
  513. else if (wr_status == 5)
  514. {
  515. log("SDIO card reports write CRC error, status ", card_response);
  516. return SDIO_ERR_WRITE_CRC;
  517. }
  518. else if (wr_status == 6)
  519. {
  520. log("SDIO card reports write failure, status ", card_response);
  521. return SDIO_ERR_WRITE_FAIL;
  522. }
  523. else
  524. {
  525. log("SDIO card reports unknown write status ", card_response);
  526. return SDIO_ERR_WRITE_FAIL;
  527. }
  528. }
  529. // When a block finishes, this IRQ handler starts the next one
  530. static void rp2040_sdio_tx_irq()
  531. {
  532. dma_hw->ints1 = 1 << SDIO_DMA_CHB;
  533. if (g_sdio.transfer_state == SDIO_TX)
  534. {
  535. if (!dma_channel_is_busy(SDIO_DMA_CH) && !dma_channel_is_busy(SDIO_DMA_CHB))
  536. {
  537. // Main data transfer is finished now.
  538. // When card is ready, PIO will put card response on RX fifo
  539. g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
  540. if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_DATA_SM))
  541. {
  542. // Card is already idle
  543. g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_DATA_SM);
  544. }
  545. else
  546. {
  547. // Use DMA to wait for the response
  548. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  549. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  550. channel_config_set_read_increment(&dmacfg, false);
  551. channel_config_set_write_increment(&dmacfg, false);
  552. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
  553. dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
  554. &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_DATA_SM], 1, true);
  555. }
  556. }
  557. }
  558. if (g_sdio.transfer_state == SDIO_TX_WAIT_IDLE)
  559. {
  560. if (!dma_channel_is_busy(SDIO_DMA_CHB))
  561. {
  562. g_sdio.wr_status = check_sdio_write_response(g_sdio.card_response);
  563. if (g_sdio.wr_status != SDIO_OK)
  564. {
  565. rp2040_sdio_stop();
  566. return;
  567. }
  568. g_sdio.blocks_done++;
  569. if (g_sdio.blocks_done < g_sdio.total_blocks)
  570. {
  571. sdio_start_next_block_tx();
  572. g_sdio.transfer_state = SDIO_TX;
  573. if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
  574. {
  575. // Precompute the CRC for next block so that it is ready when
  576. // we want to send it.
  577. sdio_compute_next_tx_checksum();
  578. }
  579. }
  580. else
  581. {
  582. rp2040_sdio_stop();
  583. }
  584. }
  585. }
  586. }
  587. // Check if transmission is complete
  588. sdio_status_t rp2040_sdio_tx_poll(uint32_t *bytes_complete)
  589. {
  590. if (SCB->ICSR & SCB_ICSR_VECTACTIVE_Msk)
  591. {
  592. // Verify that IRQ handler gets called even if we are in hardfault handler
  593. rp2040_sdio_tx_irq();
  594. }
  595. if (bytes_complete)
  596. {
  597. *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
  598. }
  599. if (g_sdio.transfer_state == SDIO_IDLE)
  600. {
  601. rp2040_sdio_stop();
  602. return g_sdio.wr_status;
  603. }
  604. else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
  605. {
  606. debuglog("rp2040_sdio_tx_poll() timeout, "
  607. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_tx_offset,
  608. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  609. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  610. " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
  611. rp2040_sdio_stop();
  612. return SDIO_ERR_DATA_TIMEOUT;
  613. }
  614. return SDIO_BUSY;
  615. }
  616. // Force everything to idle state
  617. sdio_status_t rp2040_sdio_stop()
  618. {
  619. dma_channel_abort(SDIO_DMA_CH);
  620. dma_channel_abort(SDIO_DMA_CHB);
  621. dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
  622. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
  623. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
  624. g_sdio.transfer_state = SDIO_IDLE;
  625. return SDIO_OK;
  626. }
  627. void rp2040_sdio_init(int clock_divider)
  628. {
  629. // Mark resources as being in use, unless it has been done already.
  630. static bool resources_claimed = false;
  631. if (!resources_claimed)
  632. {
  633. pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
  634. pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
  635. dma_channel_claim(SDIO_DMA_CH);
  636. dma_channel_claim(SDIO_DMA_CHB);
  637. resources_claimed = true;
  638. }
  639. memset(&g_sdio, 0, sizeof(g_sdio));
  640. dma_channel_abort(SDIO_DMA_CH);
  641. dma_channel_abort(SDIO_DMA_CHB);
  642. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
  643. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
  644. // Load PIO programs
  645. pio_clear_instruction_memory(SDIO_PIO);
  646. // Command & clock state machine
  647. g_sdio.pio_cmd_clk_offset = pio_add_program(SDIO_PIO, &sdio_cmd_clk_program);
  648. pio_sm_config cfg = sdio_cmd_clk_program_get_default_config(g_sdio.pio_cmd_clk_offset);
  649. sm_config_set_out_pins(&cfg, SDIO_CMD, 1);
  650. sm_config_set_in_pins(&cfg, SDIO_CMD);
  651. sm_config_set_set_pins(&cfg, SDIO_CMD, 1);
  652. sm_config_set_jmp_pin(&cfg, SDIO_CMD);
  653. sm_config_set_sideset_pins(&cfg, SDIO_CLK);
  654. sm_config_set_out_shift(&cfg, false, true, 32);
  655. sm_config_set_in_shift(&cfg, false, true, 32);
  656. sm_config_set_clkdiv_int_frac(&cfg, clock_divider, 0);
  657. sm_config_set_mov_status(&cfg, STATUS_TX_LESSTHAN, 2);
  658. pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_clk_offset, &cfg);
  659. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
  660. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
  661. // Data reception program
  662. g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &sdio_data_rx_program);
  663. g_sdio.pio_cfg_data_rx = sdio_data_rx_program_get_default_config(g_sdio.pio_data_rx_offset);
  664. sm_config_set_in_pins(&g_sdio.pio_cfg_data_rx, SDIO_D0);
  665. sm_config_set_in_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
  666. sm_config_set_out_shift(&g_sdio.pio_cfg_data_rx, false, true, 32);
  667. sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_rx, clock_divider, 0);
  668. // Data transmission program
  669. g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &sdio_data_tx_program);
  670. g_sdio.pio_cfg_data_tx = sdio_data_tx_program_get_default_config(g_sdio.pio_data_tx_offset);
  671. sm_config_set_in_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0);
  672. sm_config_set_set_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
  673. sm_config_set_out_pins(&g_sdio.pio_cfg_data_tx, SDIO_D0, 4);
  674. sm_config_set_in_shift(&g_sdio.pio_cfg_data_tx, false, false, 32);
  675. sm_config_set_out_shift(&g_sdio.pio_cfg_data_tx, false, true, 32);
  676. sm_config_set_clkdiv_int_frac(&g_sdio.pio_cfg_data_tx, clock_divider, 0);
  677. // Disable SDIO pins input synchronizer.
  678. // This reduces input delay.
  679. // Because the CLK is driven synchronously to CPU clock,
  680. // there should be no metastability problems.
  681. SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK) | (1 << SDIO_CMD)
  682. | (1 << SDIO_D0) | (1 << SDIO_D1) | (1 << SDIO_D2) | (1 << SDIO_D3);
  683. // Redirect GPIOs to PIO
  684. gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
  685. gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
  686. gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
  687. gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
  688. gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
  689. gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
  690. // Set up IRQ handler when DMA completes.
  691. irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
  692. irq_set_enabled(DMA_IRQ_1, true);
  693. }