rp2040_sdio.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. // Implementation of SDIO communication for RP2040
  2. // Copyright (c) 2022 Rabbit Hole Computing™
  3. // Copyright (c) 2024 Tech by Androda, LLC
  4. //
  5. // The RP2040 official work-in-progress code at
  6. // https://github.com/raspberrypi/pico-extras/tree/master/src/rp2_common/pico_sd_card
  7. // may be useful reference, but this is independent implementation.
  8. //
  9. // For official SDIO specifications, refer to:
  10. // https://www.sdcard.org/downloads/pls/
  11. // "SDIO Physical Layer Simplified Specification Version 8.00"
  12. #include "rp2040_sdio.h"
  13. #include "rp2040_sdio.pio.h"
  14. #include <hardware/pio.h>
  15. #include <hardware/dma.h>
  16. //#include <hardware/gpio.h>
  17. #include <BlueSCSI_platform.h>
  18. #include <BlueSCSI_log.h>
  19. #include "timings_RP2MCU.h"
  20. #define SDIO_PIO pio1
  21. #define SDIO_CMD_SM 0
  22. #define SDIO_DATA_SM 1
  23. #define SDIO_DMA_CH 4
  24. #define SDIO_DMA_CHB 5
  25. #define PIO_INSTR_MASK_REMOVE_DELAY 0xF8FF
  26. #define PIO_INSTR_MASK_GET_DELAY 0x700
  27. #define PIO_INSTR_JMP_MASK 0xE000
  28. #define PIO_INSTR_JMP_ADDR 0x1F
  29. // Maximum number of 512 byte blocks to transfer in one request
  30. #define SDIO_MAX_BLOCKS 256
  31. enum sdio_transfer_state_t { SDIO_IDLE, SDIO_RX, SDIO_TX, SDIO_TX_WAIT_IDLE};
  32. static struct {
  33. uint32_t pio_cmd_rsp_clk_offset;
  34. pio_sm_config pio_cfg_cmd_rsp;
  35. uint32_t pio_data_rx_offset;
  36. pio_sm_config pio_cfg_data_rx;
  37. uint32_t pio_data_tx_offset;
  38. pio_sm_config pio_cfg_data_tx;
  39. sdio_transfer_state_t transfer_state;
  40. uint32_t transfer_start_time;
  41. uint32_t *data_buf;
  42. uint32_t blocks_done; // Number of blocks transferred so far
  43. uint32_t total_blocks; // Total number of blocks to transfer
  44. uint32_t blocks_checksumed; // Number of blocks that have had CRC calculated
  45. uint32_t checksum_errors; // Number of checksum errors detected
  46. uint8_t cmdBuf[6];
  47. // Variables for block writes
  48. uint64_t next_wr_block_checksum;
  49. uint32_t end_token_buf[3]; // CRC and end token for write block
  50. sdio_status_t wr_status;
  51. uint32_t card_response;
  52. // Variables for block reads
  53. // This is used to perform DMA into data buffers and checksum buffers separately.
  54. struct {
  55. void * write_addr;
  56. uint32_t transfer_count;
  57. } dma_blocks[SDIO_MAX_BLOCKS * 2];
  58. struct {
  59. uint32_t top;
  60. uint32_t bottom;
  61. } received_checksums[SDIO_MAX_BLOCKS];
  62. } g_sdio;
  63. void rp2040_sdio_dma_irq();
  64. /*******************************************************
  65. * Checksum algorithms
  66. *******************************************************/
  67. // Table lookup for calculating CRC-7 checksum that is used in SDIO command packets.
  68. // Usage:
  69. // uint8_t crc = 0;
  70. // crc = crc7_table[crc ^ byte];
  71. // .. repeat for every byte ..
  72. static const uint8_t crc7_table[256] = {
  73. 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
  74. 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
  75. 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
  76. 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
  77. 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
  78. 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
  79. 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
  80. 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
  81. 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
  82. 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
  83. 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
  84. 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
  85. 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
  86. 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
  87. 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
  88. 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
  89. 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
  90. 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
  91. 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
  92. 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
  93. 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
  94. 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
  95. 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
  96. 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
  97. 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
  98. 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
  99. 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
  100. 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
  101. 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
  102. 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
  103. 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
  104. 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
  105. };
  106. // Calculate the CRC16 checksum for parallel 4 bit lines separately.
  107. // When the SDIO bus operates in 4-bit mode, the CRC16 algorithm
  108. // is applied to each line separately and generates total of
  109. // 4 x 16 = 64 bits of checksum.
  110. __attribute__((optimize("O3")))
  111. uint64_t __not_in_flash_func(sdio_crc16_4bit_checksum)(uint32_t *data, uint32_t num_words)
  112. {
  113. uint64_t crc = 0;
  114. uint32_t *end = data + num_words;
  115. while (data < end)
  116. {
  117. for (int unroll = 0; unroll < 4; unroll++)
  118. {
  119. // Each 32-bit word contains 8 bits per line.
  120. // Reverse the bytes because SDIO protocol is big-endian.
  121. uint32_t data_in = __builtin_bswap32(*data++);
  122. // Shift out 8 bits for each line
  123. uint32_t data_out = crc >> 32;
  124. crc <<= 32;
  125. // XOR outgoing data to itself with 4 bit delay
  126. data_out ^= (data_out >> 16);
  127. // XOR incoming data to outgoing data with 4 bit delay
  128. data_out ^= (data_in >> 16);
  129. // XOR outgoing and incoming data to accumulator at each tap
  130. uint64_t xorred = data_out ^ data_in;
  131. crc ^= xorred;
  132. crc ^= xorred << (5 * 4);
  133. crc ^= xorred << (12 * 4);
  134. }
  135. }
  136. return crc;
  137. }
  138. /*******************************************************
  139. * Clock Runner
  140. *******************************************************/
  141. void __not_in_flash_func(cycleSdClock)() {
  142. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_nop() | pio_encode_sideset_opt(1, 1) | pio_encode_delay(1));
  143. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_nop() | pio_encode_sideset_opt(1, 0) | pio_encode_delay(1));
  144. }
  145. /*******************************************************
  146. * Status Register Receiver
  147. *******************************************************/
  148. sdio_status_t __not_in_flash_func(receive_status_register)(uint8_t* sds) {
  149. rp2040_sdio_rx_start(sds, 1, 64);
  150. // Wait for the DMA operation to complete, or fail if it took too long
  151. waitagain:
  152. while (dma_channel_is_busy(SDIO_DMA_CHB) || dma_channel_is_busy(SDIO_DMA_CH))
  153. {
  154. if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 2)
  155. {
  156. // Reset the state machine program
  157. dma_channel_abort(SDIO_DMA_CHB);
  158. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
  159. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  160. return SDIO_ERR_RESPONSE_TIMEOUT;
  161. }
  162. }
  163. // Assert that both DMA channels are complete
  164. if(dma_channel_is_busy(SDIO_DMA_CHB) || dma_channel_is_busy(SDIO_DMA_CH)) {
  165. // Wait failure, go back.
  166. goto waitagain;
  167. }
  168. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
  169. g_sdio.transfer_state = SDIO_IDLE;
  170. return SDIO_OK;
  171. }
  172. /*******************************************************
  173. * Basic SDIO command execution
  174. *******************************************************/
  175. static void __not_in_flash_func(sdio_send_command)(uint8_t command, uint32_t arg, uint8_t response_bits)
  176. {
  177. // if (command != 41 && command != 55) {
  178. // log("C: ", (int)command, " A: ", arg);
  179. // }
  180. io_wo_8* txFifo = reinterpret_cast<io_wo_8*>(&SDIO_PIO->txf[SDIO_CMD_SM]);
  181. // Reinitialize the CMD SM
  182. pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_rsp_clk_offset, &g_sdio.pio_cfg_cmd_rsp);
  183. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
  184. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CMD, 1, true);
  185. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, false);
  186. // Pin direction: output, initial state should be high
  187. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pins, 1));
  188. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pindirs, 1));
  189. // Write the number of tx / rx bits to the SM
  190. *txFifo = 55; // Write 56 bits total
  191. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_x, 8));
  192. *txFifo = response_bits ? response_bits - 1 : 0; // Bit count to receive
  193. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_y, 8));
  194. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, true);
  195. // Build the command bytes (commands are 48 bits long)
  196. g_sdio.cmdBuf[0] = command | 0x40;
  197. g_sdio.cmdBuf[1] = (uint8_t)(arg >> 24U);
  198. g_sdio.cmdBuf[2] = (uint8_t)(arg >> 16U);
  199. g_sdio.cmdBuf[3] = (uint8_t)(arg >> 8U);
  200. g_sdio.cmdBuf[4] = (uint8_t)arg;
  201. // Get the SM clocking while we calculate CRCs
  202. *txFifo = 0XFF;
  203. // CRC calculation
  204. uint8_t crc = 0;
  205. for(uint8_t i = 0; i < 5; i++) {
  206. crc = crc7_table[crc ^ g_sdio.cmdBuf[i]];
  207. }
  208. crc = crc | 0x1;
  209. g_sdio.cmdBuf[5] = crc;
  210. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  211. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
  212. channel_config_set_read_increment(&dmacfg, true);
  213. channel_config_set_write_increment(&dmacfg, false);
  214. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, true));
  215. dma_channel_configure(SDIO_DMA_CH, &dmacfg, &SDIO_PIO->txf[SDIO_CMD_SM], &g_sdio.cmdBuf, 6, true);
  216. }
  217. sdio_status_t __not_in_flash_func(rp2040_sdio_command_R1)(uint8_t command, uint32_t arg, uint32_t *response)
  218. {
  219. uint32_t resp[2];
  220. if (response) {
  221. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  222. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
  223. channel_config_set_read_increment(&dmacfg, false);
  224. channel_config_set_write_increment(&dmacfg, true);
  225. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false)); //6 * 8 = 48 bits
  226. dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &resp, &SDIO_PIO->rxf[SDIO_CMD_SM], 6, true);
  227. }
  228. sdio_send_command(command, arg, response ? 48 : 0);
  229. uint32_t start = millis();
  230. if (response)
  231. {
  232. // Wait for DMA channel to receive response
  233. while (dma_channel_is_busy(SDIO_DMA_CHB))
  234. {
  235. if ((uint32_t)(millis() - start) > 2)
  236. {
  237. if (command != 8) {
  238. /*debug*/log("Timeout waiting for response in rp2040_sdio_command_R1(", (int)command, "), ",
  239. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
  240. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  241. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  242. }
  243. // Reset the state machine program
  244. dma_channel_abort(SDIO_DMA_CHB);
  245. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, there was an error
  246. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  247. return SDIO_ERR_RESPONSE_TIMEOUT;
  248. }
  249. }
  250. // Must bswap due to 8 bit segmentation
  251. resp[0] = __builtin_bswap32(resp[0]);
  252. resp[1] = __builtin_bswap32(resp[1]) >> 16;
  253. // debuglog("SDIO R1 response: ", resp0, " ", resp1);
  254. // Calculate response checksum
  255. uint8_t crc = 0;
  256. crc = crc7_table[crc ^ ((resp[0] >> 24) & 0xFF)];
  257. crc = crc7_table[crc ^ ((resp[0] >> 16) & 0xFF)];
  258. crc = crc7_table[crc ^ ((resp[0] >> 8) & 0xFF)];
  259. crc = crc7_table[crc ^ ((resp[0] >> 0) & 0xFF)];
  260. crc = crc7_table[crc ^ ((resp[1] >> 8) & 0xFF)];
  261. uint8_t actual_crc = ((resp[1] >> 0) & 0xFE);
  262. if (crc != actual_crc)
  263. {
  264. debuglog("rp2040_sdio_command_R1(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
  265. debuglog("resp[0]:", resp[0], "resp[1]:", resp[1]);
  266. return SDIO_ERR_RESPONSE_CRC;
  267. }
  268. uint8_t response_cmd = ((resp[0] >> 24) & 0xFF);
  269. if (response_cmd != command && command != 41)
  270. {
  271. debuglog("rp2040_sdio_command_R1(", (int)command, "): received reply for ", (int)response_cmd);
  272. return SDIO_ERR_RESPONSE_CODE;
  273. }
  274. *response = ((resp[0] & 0xFFFFFF) << 8) | ((resp[1] >> 8) & 0xFF);
  275. } else {
  276. // Wait for CMD SM TX FIFO Stall (all command bits were sent)
  277. uint32_t tx_stall_flag = 1u << (PIO_FDEBUG_TXSTALL_LSB + SDIO_CMD_SM);
  278. // Clear the stall marker
  279. SDIO_PIO->fdebug = tx_stall_flag;
  280. // Wait for the stall
  281. while (!(SDIO_PIO->fdebug & tx_stall_flag)) {
  282. if ((uint32_t)(millis() - start) > 2)
  283. {
  284. if (command != 8) {
  285. /*debug*/log("Timeout waiting for CMD TX in rp2040_sdio_command_R1(", (int)command, "), ",
  286. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
  287. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  288. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  289. }
  290. // Reset the state machine program
  291. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, there was an error
  292. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  293. return SDIO_ERR_RESPONSE_TIMEOUT;
  294. }
  295. }
  296. }
  297. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
  298. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  299. return SDIO_OK;
  300. }
  301. sdio_status_t __not_in_flash_func(rp2040_sdio_command_R2)(uint8_t command, uint32_t arg, uint8_t response[16])
  302. {
  303. // The response is too long to fit in the PIO FIFO, so use DMA to receive it.
  304. uint32_t response_buf[5];
  305. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  306. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
  307. channel_config_set_read_increment(&dmacfg, false);
  308. channel_config_set_write_increment(&dmacfg, true);
  309. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false)); //17 * 8 = 136
  310. dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &response_buf, &SDIO_PIO->rxf[SDIO_CMD_SM], 17, true);
  311. sdio_send_command(command, arg, 136);
  312. uint32_t start = millis();
  313. while (dma_channel_is_busy(SDIO_DMA_CHB))
  314. {
  315. if ((uint32_t)(millis() - start) > 2)
  316. {
  317. debuglog("Timeout waiting for response in rp2040_sdio_command_R2(", (int)command, "), ",
  318. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
  319. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  320. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  321. // Reset the state machine program
  322. dma_channel_abort(SDIO_DMA_CHB);
  323. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, there was an error
  324. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  325. return SDIO_ERR_RESPONSE_TIMEOUT;
  326. }
  327. }
  328. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, its job is done
  329. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  330. dma_channel_abort(SDIO_DMA_CHB);
  331. // Must byte swap because receiving 8-bit chunks instead of 32 bit
  332. response_buf[0] = __builtin_bswap32(response_buf[0]);
  333. response_buf[1] = __builtin_bswap32(response_buf[1]);
  334. response_buf[2] = __builtin_bswap32(response_buf[2]);
  335. response_buf[3] = __builtin_bswap32(response_buf[3]);
  336. response_buf[4] = __builtin_bswap32(response_buf[4]) >> 24;
  337. // Copy the response payload to output buffer
  338. response[0] = ((response_buf[0] >> 16) & 0xFF);
  339. response[1] = ((response_buf[0] >> 8) & 0xFF);
  340. response[2] = ((response_buf[0] >> 0) & 0xFF);
  341. response[3] = ((response_buf[1] >> 24) & 0xFF);
  342. response[4] = ((response_buf[1] >> 16) & 0xFF);
  343. response[5] = ((response_buf[1] >> 8) & 0xFF);
  344. response[6] = ((response_buf[1] >> 0) & 0xFF);
  345. response[7] = ((response_buf[2] >> 24) & 0xFF);
  346. response[8] = ((response_buf[2] >> 16) & 0xFF);
  347. response[9] = ((response_buf[2] >> 8) & 0xFF);
  348. response[10] = ((response_buf[2] >> 0) & 0xFF);
  349. response[11] = ((response_buf[3] >> 24) & 0xFF);
  350. response[12] = ((response_buf[3] >> 16) & 0xFF);
  351. response[13] = ((response_buf[3] >> 8) & 0xFF);
  352. response[14] = ((response_buf[3] >> 0) & 0xFF);
  353. response[15] = ((response_buf[4] >> 0) & 0xFF);
  354. // Calculate checksum of the payload
  355. uint8_t crc = 0;
  356. for (int i = 0; i < 15; i++)
  357. {
  358. crc = crc7_table[crc ^ response[i]];
  359. }
  360. uint8_t actual_crc = response[15] & 0xFE;
  361. if (crc != actual_crc)
  362. {
  363. debuglog("rp2040_sdio_command_R2(", (int)command, "): CRC error, calculated ", crc, " packet has ", actual_crc);
  364. return SDIO_ERR_RESPONSE_CRC;
  365. }
  366. uint8_t response_cmd = ((response_buf[0] >> 24) & 0xFF);
  367. if (response_cmd != 0x3F)
  368. {
  369. debuglog("rp2040_sdio_command_R2(", (int)command, "): Expected reply code 0x3F");
  370. return SDIO_ERR_RESPONSE_CODE;
  371. }
  372. return SDIO_OK;
  373. }
  374. sdio_status_t __not_in_flash_func(rp2040_sdio_command_R3)(uint8_t command, uint32_t arg, uint32_t *response)
  375. {
  376. uint32_t resp[2];
  377. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  378. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
  379. channel_config_set_read_increment(&dmacfg, false);
  380. channel_config_set_write_increment(&dmacfg, true);
  381. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false)); //6 * 8 = 48 bits
  382. dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &resp, &SDIO_PIO->rxf[SDIO_CMD_SM], 6, true);
  383. sdio_send_command(command, arg, 48);
  384. // Wait for response
  385. uint32_t start = millis();
  386. while (dma_channel_is_busy(SDIO_DMA_CHB))
  387. {
  388. if ((uint32_t)(millis() - start) > 2)
  389. {
  390. debuglog("Timeout waiting for response in rp2040_sdio_command_R3(", (int)command, "), ",
  391. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_cmd_rsp_clk_offset,
  392. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  393. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM));
  394. // Reset the state machine program
  395. dma_channel_abort(SDIO_DMA_CHB);
  396. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, there was an error
  397. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  398. return SDIO_ERR_RESPONSE_TIMEOUT;
  399. }
  400. }
  401. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false); // Turn off the CMD SM, its job is done
  402. pio_sm_clear_fifos(SDIO_PIO, SDIO_CMD_SM);
  403. // Must bswap due to 8 bit transfer
  404. resp[0] = __builtin_bswap32(resp[0]);
  405. resp[1] = __builtin_bswap32(resp[1]) >> 16;
  406. *response = ((resp[0] & 0xFFFFFF) << 8) | ((resp[1] >> 8) & 0xFF);
  407. // debuglog("SDIO R3 response: ", resp0, " ", resp1);
  408. return SDIO_OK;
  409. }
  410. /*******************************************************
  411. * Data reception from SD card
  412. *******************************************************/
  413. sdio_status_t __not_in_flash_func(rp2040_sdio_rx_start)(uint8_t *buffer, uint32_t num_blocks, uint32_t block_size)
  414. {
  415. // Buffer must be aligned
  416. assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
  417. g_sdio.transfer_state = SDIO_RX;
  418. g_sdio.transfer_start_time = millis();
  419. g_sdio.data_buf = (uint32_t*)buffer;
  420. g_sdio.blocks_done = 0;
  421. g_sdio.total_blocks = num_blocks;
  422. g_sdio.blocks_checksumed = 0;
  423. g_sdio.checksum_errors = 0;
  424. // Create DMA block descriptors to store each block of block_size bytes of data to buffer
  425. // and then 8 bytes to g_sdio.received_checksums.
  426. for (int i = 0; i < num_blocks; i++)
  427. {
  428. g_sdio.dma_blocks[i * 2].write_addr = buffer + (i * block_size);
  429. g_sdio.dma_blocks[i * 2].transfer_count = block_size / sizeof(uint32_t);
  430. g_sdio.dma_blocks[i * 2 + 1].write_addr = &g_sdio.received_checksums[i];
  431. g_sdio.dma_blocks[i * 2 + 1].transfer_count = 2;
  432. }
  433. g_sdio.dma_blocks[num_blocks * 2].write_addr = 0;
  434. g_sdio.dma_blocks[num_blocks * 2].transfer_count = 0;
  435. // Configure first DMA channel for reading from the PIO RX fifo
  436. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  437. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  438. channel_config_set_read_increment(&dmacfg, false);
  439. channel_config_set_write_increment(&dmacfg, true);
  440. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_DATA_SM, false));
  441. channel_config_set_bswap(&dmacfg, true);
  442. channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
  443. dma_channel_configure(SDIO_DMA_CH, &dmacfg, 0, &SDIO_PIO->rxf[SDIO_DATA_SM], 0, false);
  444. // Configure second DMA channel for reconfiguring the first one
  445. dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  446. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  447. channel_config_set_read_increment(&dmacfg, true);
  448. channel_config_set_write_increment(&dmacfg, true);
  449. channel_config_set_ring(&dmacfg, true, 3);
  450. dma_channel_configure(SDIO_DMA_CHB, &dmacfg, &dma_hw->ch[SDIO_DMA_CH].al1_write_addr,
  451. g_sdio.dma_blocks, 2, false);
  452. // Initialize PIO state machine
  453. pio_sm_init(SDIO_PIO, SDIO_DATA_SM, g_sdio.pio_data_rx_offset, &g_sdio.pio_cfg_data_rx);
  454. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_CLK, 1, true);
  455. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_DATA_SM, SDIO_D0, 4, false);
  456. // Write number of nibbles to receive to Y register
  457. pio_sm_put(SDIO_PIO, SDIO_DATA_SM, (block_size * 2) + 16 - 1);
  458. pio_sm_exec(SDIO_PIO, SDIO_DATA_SM, pio_encode_out(pio_y, 32));
  459. // Enable RX FIFO join because we don't need the TX FIFO during transfer.
  460. // This gives more leeway for the DMA block switching
  461. SDIO_PIO->sm[SDIO_DATA_SM].shiftctrl |= PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS;
  462. // Start PIO and DMA
  463. dma_channel_start(SDIO_DMA_CHB);
  464. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, true);
  465. return SDIO_OK;
  466. }
  467. // Check checksums for received blocks
  468. static void __not_in_flash_func(sdio_verify_rx_checksums)(uint32_t maxcount)
  469. {
  470. while (g_sdio.blocks_checksumed < g_sdio.blocks_done && maxcount-- > 0)
  471. {
  472. // Calculate checksum from received data
  473. int blockidx = g_sdio.blocks_checksumed++;
  474. uint64_t checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
  475. SDIO_WORDS_PER_BLOCK);
  476. // Convert received checksum to little-endian format
  477. uint32_t top = __builtin_bswap32(g_sdio.received_checksums[blockidx].top);
  478. uint32_t bottom = __builtin_bswap32(g_sdio.received_checksums[blockidx].bottom);
  479. uint64_t expected = ((uint64_t)top << 32) | bottom;
  480. if (checksum != expected)
  481. {
  482. g_sdio.checksum_errors++;
  483. if (g_sdio.checksum_errors == 1)
  484. {
  485. log("SDIO checksum error in reception: block ", blockidx,
  486. " calculated ", checksum, " expected ", expected);
  487. }
  488. }
  489. }
  490. }
  491. sdio_status_t __not_in_flash_func(rp2040_sdio_rx_poll)(uint32_t *bytes_complete)
  492. {
  493. // Was everything done when the previous rx_poll() finished?
  494. if (g_sdio.blocks_done >= g_sdio.total_blocks)
  495. {
  496. g_sdio.transfer_state = SDIO_IDLE;
  497. }
  498. else
  499. {
  500. // Use the idle time to calculate checksums
  501. sdio_verify_rx_checksums(4);
  502. // Check how many DMA control blocks have been consumed
  503. uint32_t dma_ctrl_block_count = (dma_hw->ch[SDIO_DMA_CHB].read_addr - (uint32_t)&g_sdio.dma_blocks);
  504. dma_ctrl_block_count /= sizeof(g_sdio.dma_blocks[0]);
  505. // Compute how many complete 512 byte SDIO blocks have been transferred
  506. // When transfer ends, dma_ctrl_block_count == g_sdio.total_blocks * 2 + 1
  507. g_sdio.blocks_done = (dma_ctrl_block_count - 1) / 2;
  508. // NOTE: When all blocks are done, rx_poll() still returns SDIO_BUSY once.
  509. // This provides a chance to start the SCSI transfer before the last checksums
  510. // are computed. Any checksum failures can be indicated in SCSI status after
  511. // the data transfer has finished.
  512. }
  513. if (bytes_complete)
  514. {
  515. *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
  516. }
  517. if (g_sdio.transfer_state == SDIO_IDLE)
  518. {
  519. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
  520. // Verify all remaining checksums.
  521. sdio_verify_rx_checksums(g_sdio.total_blocks);
  522. if (g_sdio.checksum_errors == 0)
  523. return SDIO_OK;
  524. else
  525. return SDIO_ERR_DATA_CRC;
  526. }
  527. else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
  528. {
  529. debuglog("rp2040_sdio_rx_poll() timeout, "
  530. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_DATA_SM) - (int)g_sdio.pio_data_rx_offset,
  531. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  532. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_DATA_SM),
  533. " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count,
  534. " BD: ", g_sdio.blocks_done);
  535. rp2040_sdio_stop();
  536. return SDIO_ERR_DATA_TIMEOUT;
  537. }
  538. return SDIO_BUSY;
  539. }
  540. /*******************************************************
  541. * Data transmission to SD card
  542. *******************************************************/
  543. static void __not_in_flash_func(sdio_start_next_block_tx)()
  544. {
  545. // Initialize PIOs
  546. pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_data_tx_offset, &g_sdio.pio_cfg_data_tx);
  547. // Re-set the pin direction things here
  548. pio_sm_set_pins(SDIO_PIO, SDIO_CMD_SM, 0xF);
  549. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
  550. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, true);
  551. // Configure DMA to send the data block payload (512 bytes)
  552. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CH);
  553. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_32);
  554. channel_config_set_read_increment(&dmacfg, true);
  555. channel_config_set_write_increment(&dmacfg, false);
  556. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, true));
  557. channel_config_set_bswap(&dmacfg, true);
  558. channel_config_set_chain_to(&dmacfg, SDIO_DMA_CHB);
  559. dma_channel_configure(SDIO_DMA_CH, &dmacfg,
  560. &SDIO_PIO->txf[SDIO_CMD_SM], g_sdio.data_buf + g_sdio.blocks_done * SDIO_WORDS_PER_BLOCK,
  561. SDIO_WORDS_PER_BLOCK, false);
  562. // Prepare second DMA channel to send the CRC and block end marker
  563. uint64_t crc = g_sdio.next_wr_block_checksum;
  564. g_sdio.end_token_buf[0] = (uint32_t)(crc >> 32);
  565. g_sdio.end_token_buf[1] = (uint32_t)(crc >> 0);
  566. g_sdio.end_token_buf[2] = 0xFFFFFFFF;
  567. channel_config_set_bswap(&dmacfg, false);
  568. dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
  569. &SDIO_PIO->txf[SDIO_CMD_SM], g_sdio.end_token_buf, 3, false);
  570. // Enable IRQ to trigger when block is done
  571. dma_hw->ints1 = 1 << SDIO_DMA_CHB;
  572. dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 1);
  573. // Initialize register X with nibble count
  574. pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 1048);
  575. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_x, 32));
  576. // Initialize CRC receiver Y bit count
  577. pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 7);
  578. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_out(pio_y, 32));
  579. // Initialize pins to output and high
  580. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pins, 15));
  581. pio_sm_exec(SDIO_PIO, SDIO_CMD_SM, pio_encode_set(pio_pindirs, 15));
  582. // Write start token and start the DMA transfer.
  583. pio_sm_put(SDIO_PIO, SDIO_CMD_SM, 0xFFFFFFF0);
  584. dma_channel_start(SDIO_DMA_CH);
  585. // Start state machine
  586. pio_set_sm_mask_enabled(SDIO_PIO, (1ul << SDIO_CMD_SM)/* | (1ul << SDIO_DATA_SM)*/, true);
  587. }
  588. static void __not_in_flash_func(sdio_compute_next_tx_checksum)()
  589. {
  590. assert (g_sdio.blocks_done < g_sdio.total_blocks && g_sdio.blocks_checksumed < g_sdio.total_blocks);
  591. int blockidx = g_sdio.blocks_checksumed++;
  592. g_sdio.next_wr_block_checksum = sdio_crc16_4bit_checksum(g_sdio.data_buf + blockidx * SDIO_WORDS_PER_BLOCK,
  593. SDIO_WORDS_PER_BLOCK);
  594. }
  595. // Start transferring data from memory to SD card
  596. sdio_status_t __not_in_flash_func(rp2040_sdio_tx_start)(const uint8_t *buffer, uint32_t num_blocks)
  597. {
  598. // Buffer must be aligned
  599. assert(((uint32_t)buffer & 3) == 0 && num_blocks <= SDIO_MAX_BLOCKS);
  600. g_sdio.transfer_state = SDIO_TX;
  601. g_sdio.transfer_start_time = millis();
  602. g_sdio.data_buf = (uint32_t*)buffer;
  603. g_sdio.blocks_done = 0;
  604. g_sdio.total_blocks = num_blocks;
  605. g_sdio.blocks_checksumed = 0;
  606. g_sdio.checksum_errors = 0;
  607. // Compute first block checksum
  608. sdio_compute_next_tx_checksum();
  609. // Start first DMA transfer and PIO
  610. sdio_start_next_block_tx();
  611. if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
  612. {
  613. // Precompute second block checksum
  614. sdio_compute_next_tx_checksum();
  615. }
  616. return SDIO_OK;
  617. }
  618. sdio_status_t __not_in_flash_func(check_sdio_write_response)(uint32_t card_response)
  619. {
  620. uint8_t wr_status = card_response & 0x1F;
  621. // 5 = 0b0101 = data accepted (11100101)
  622. // 11 = 0b1011 = CRC error (11101011)
  623. // 13 = 0b1101 = Write Error (11101101)
  624. if (wr_status == 0b101)
  625. {
  626. return SDIO_OK;
  627. }
  628. else if (wr_status == 0b1011)
  629. {
  630. log("SDIO card reports write CRC error, status ", card_response);
  631. return SDIO_ERR_WRITE_CRC;
  632. }
  633. else if (wr_status == 0b1101)
  634. {
  635. log("SDIO card reports write failure, status ", card_response);
  636. return SDIO_ERR_WRITE_FAIL;
  637. }
  638. else
  639. {
  640. log("SDIO card reports unknown write status ", card_response);
  641. return SDIO_ERR_WRITE_FAIL;
  642. }
  643. }
  644. // When a block finishes, this IRQ handler starts the next one
  645. static void __not_in_flash_func(rp2040_sdio_tx_irq)()
  646. {
  647. dma_hw->ints1 = 1 << SDIO_DMA_CHB;
  648. if (g_sdio.transfer_state == SDIO_TX)
  649. {
  650. if (!dma_channel_is_busy(SDIO_DMA_CH) && !dma_channel_is_busy(SDIO_DMA_CHB))
  651. {
  652. // Main data transfer is finished now.
  653. // When card is ready, PIO will put card response on RX fifo
  654. g_sdio.transfer_state = SDIO_TX_WAIT_IDLE;
  655. if (!pio_sm_is_rx_fifo_empty(SDIO_PIO, SDIO_CMD_SM))
  656. {
  657. // Card is already idle
  658. g_sdio.card_response = pio_sm_get(SDIO_PIO, SDIO_CMD_SM);
  659. }
  660. else
  661. {
  662. // Use DMA to wait for the response
  663. dma_channel_config dmacfg = dma_channel_get_default_config(SDIO_DMA_CHB);
  664. channel_config_set_transfer_data_size(&dmacfg, DMA_SIZE_8);
  665. channel_config_set_read_increment(&dmacfg, false);
  666. channel_config_set_write_increment(&dmacfg, false);
  667. channel_config_set_dreq(&dmacfg, pio_get_dreq(SDIO_PIO, SDIO_CMD_SM, false));
  668. dma_channel_configure(SDIO_DMA_CHB, &dmacfg,
  669. &g_sdio.card_response, &SDIO_PIO->rxf[SDIO_CMD_SM], 1, true);
  670. }
  671. }
  672. }
  673. if (g_sdio.transfer_state == SDIO_TX_WAIT_IDLE)
  674. {
  675. if (!dma_channel_is_busy(SDIO_DMA_CHB))
  676. {
  677. g_sdio.wr_status = check_sdio_write_response(g_sdio.card_response);
  678. if (g_sdio.wr_status != SDIO_OK)
  679. {
  680. rp2040_sdio_stop();
  681. return;
  682. }
  683. g_sdio.blocks_done++;
  684. if (g_sdio.blocks_done < g_sdio.total_blocks)
  685. {
  686. sdio_start_next_block_tx();
  687. g_sdio.transfer_state = SDIO_TX;
  688. if (g_sdio.blocks_checksumed < g_sdio.total_blocks)
  689. {
  690. // Precompute the CRC for next block so that it is ready when
  691. // we want to send it.
  692. sdio_compute_next_tx_checksum();
  693. }
  694. }
  695. else
  696. {
  697. rp2040_sdio_stop();
  698. }
  699. }
  700. }
  701. }
  702. // Check if transmission is complete
  703. sdio_status_t __not_in_flash_func(rp2040_sdio_tx_poll)(uint32_t *bytes_complete)
  704. {
  705. if (scb_hw->icsr & (0x1FFUL))
  706. {
  707. // Verify that IRQ handler gets called even if we are in hardfault handler
  708. rp2040_sdio_tx_irq();
  709. }
  710. if (bytes_complete)
  711. {
  712. *bytes_complete = g_sdio.blocks_done * SDIO_BLOCK_SIZE;
  713. }
  714. if (g_sdio.transfer_state == SDIO_IDLE)
  715. {
  716. rp2040_sdio_stop();
  717. return g_sdio.wr_status;
  718. }
  719. else if ((uint32_t)(millis() - g_sdio.transfer_start_time) > 1000)
  720. {
  721. debuglog("rp2040_sdio_tx_poll() timeout, "
  722. "PIO PC: ", (int)pio_sm_get_pc(SDIO_PIO, SDIO_CMD_SM) - (int)g_sdio.pio_data_tx_offset,
  723. " RXF: ", (int)pio_sm_get_rx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  724. " TXF: ", (int)pio_sm_get_tx_fifo_level(SDIO_PIO, SDIO_CMD_SM),
  725. " DMA CNT: ", dma_hw->ch[SDIO_DMA_CH].al2_transfer_count);
  726. rp2040_sdio_stop();
  727. return SDIO_ERR_DATA_TIMEOUT;
  728. }
  729. return SDIO_BUSY;
  730. }
  731. // Force everything to idle state
  732. sdio_status_t __not_in_flash_func(rp2040_sdio_stop)()
  733. {
  734. dma_channel_abort(SDIO_DMA_CH);
  735. dma_channel_abort(SDIO_DMA_CHB);
  736. dma_set_irq1_channel_mask_enabled(1 << SDIO_DMA_CHB, 0);
  737. pio_set_sm_mask_enabled(SDIO_PIO, (1ul << SDIO_CMD_SM) | (1ul << SDIO_DATA_SM), false);
  738. g_sdio.transfer_state = SDIO_IDLE;
  739. return SDIO_OK;
  740. }
  741. void __not_in_flash_func(rp2040_sdio_init)(int clock_divider)
  742. {
  743. // Mark resources as being in use, unless it has been done already.
  744. static bool resources_claimed = false;
  745. if (!resources_claimed)
  746. {
  747. pio_sm_claim(SDIO_PIO, SDIO_CMD_SM);
  748. pio_sm_claim(SDIO_PIO, SDIO_DATA_SM);
  749. dma_channel_claim(SDIO_DMA_CH);
  750. dma_channel_claim(SDIO_DMA_CHB);
  751. resources_claimed = true;
  752. }
  753. memset(&g_sdio, 0, sizeof(g_sdio));
  754. dma_channel_abort(SDIO_DMA_CH);
  755. dma_channel_abort(SDIO_DMA_CHB);
  756. pio_sm_set_enabled(SDIO_PIO, SDIO_CMD_SM, false);
  757. pio_sm_set_enabled(SDIO_PIO, SDIO_DATA_SM, false);
  758. // Load PIO programs
  759. pio_clear_instruction_memory(SDIO_PIO);
  760. // Set pull resistors for all SD data lines
  761. gpio_set_pulls(SDIO_CLK, true, false);
  762. gpio_set_pulls(SDIO_CMD, true, false);
  763. gpio_set_pulls(SDIO_D0, true, false);
  764. gpio_set_pulls(SDIO_D1, true, false);
  765. gpio_set_pulls(SDIO_D2, true, false);
  766. gpio_set_pulls(SDIO_D3, true, false);
  767. //
  768. // Command state machine
  769. //
  770. uint16_t rw_prg_instr[cmd_rsp_program.length];
  771. memcpy(rw_prg_instr, cmd_rsp_program_instructions, sizeof(cmd_rsp_program_instructions));
  772. // If the instruction is a side-set 0 use delay0
  773. // If the instruction is a side-set 1 use delay1
  774. // cmd_rsp does not follow a certain pattern, so bluntly set delay values here.
  775. // NOTE: Any changes to cmd_rsp will require updates here as well
  776. rw_prg_instr[0] = cmd_rsp_program_instructions[0] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  777. rw_prg_instr[1] = cmd_rsp_program_instructions[1] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  778. rw_prg_instr[2] = cmd_rsp_program_instructions[2] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  779. rw_prg_instr[3] = cmd_rsp_program_instructions[3] | pio_encode_delay(g_bluescsi_timings->sdio.delay1 + 1);
  780. rw_prg_instr[4] = cmd_rsp_program_instructions[4] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  781. rw_prg_instr[5] = cmd_rsp_program_instructions[5] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  782. rw_prg_instr[8] = cmd_rsp_program_instructions[8] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  783. rw_prg_instr[9] = cmd_rsp_program_instructions[9] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  784. pio_program cmd_rsp_prg_w_delays = {
  785. rw_prg_instr,
  786. cmd_rsp_program.length,
  787. cmd_rsp_program.origin,
  788. cmd_rsp_program.pio_version
  789. };
  790. g_sdio.pio_cmd_rsp_clk_offset = pio_add_program(SDIO_PIO, &cmd_rsp_prg_w_delays);
  791. g_sdio.pio_cfg_cmd_rsp = pio_cmd_rsp_program_config(g_sdio.pio_cmd_rsp_clk_offset, SDIO_CMD, SDIO_CLK, clock_divider, 0);
  792. pio_sm_init(SDIO_PIO, SDIO_CMD_SM, g_sdio.pio_cmd_rsp_clk_offset, &g_sdio.pio_cfg_cmd_rsp);
  793. pio_sm_set_pins(SDIO_PIO, SDIO_CMD_SM, 1);
  794. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CLK, 1, true);
  795. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_CMD, 1, true);
  796. pio_sm_set_consecutive_pindirs(SDIO_PIO, SDIO_CMD_SM, SDIO_D0, 4, false);
  797. //
  798. // Data reception program
  799. //
  800. // Clear the temp memory array
  801. memset(rw_prg_instr, cmd_rsp_program.length, sizeof(rw_prg_instr[0]));
  802. memcpy(rw_prg_instr, rd_data_w_clock_program_instructions, sizeof(rd_data_w_clock_program_instructions));
  803. rw_prg_instr[1] = rd_data_w_clock_program_instructions[1] | pio_encode_delay(g_bluescsi_timings->sdio.delay0 + 1);
  804. rw_prg_instr[2] = rd_data_w_clock_program_instructions[2] | pio_encode_delay(g_bluescsi_timings->sdio.delay1 + 1);
  805. rw_prg_instr[3] = rd_data_w_clock_program_instructions[3] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  806. rw_prg_instr[4] = rd_data_w_clock_program_instructions[4] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  807. rw_prg_instr[5] = rd_data_w_clock_program_instructions[5] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  808. pio_program rd_data_w_delays = {
  809. rw_prg_instr,
  810. rd_data_w_clock_program.length,
  811. rd_data_w_clock_program.origin,
  812. rd_data_w_clock_program.pio_version
  813. };
  814. g_sdio.pio_data_rx_offset = pio_add_program(SDIO_PIO, &rd_data_w_delays);
  815. g_sdio.pio_cfg_data_rx = pio_rd_data_w_clock_program_config(g_sdio.pio_data_rx_offset, SDIO_D0, SDIO_CLK, clock_divider);
  816. //
  817. // Data transmission program
  818. //
  819. // Clear the temp memory array
  820. memset(rw_prg_instr, cmd_rsp_program.length, sizeof(rw_prg_instr[0]));
  821. memcpy(rw_prg_instr, tx_data_w_clock_program_instructions, sizeof(tx_data_w_clock_program_instructions));
  822. rw_prg_instr[0] = tx_data_w_clock_program_instructions[0] | pio_encode_delay(g_bluescsi_timings->sdio.delay0);
  823. rw_prg_instr[1] = tx_data_w_clock_program_instructions[1] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  824. rw_prg_instr[2] = tx_data_w_clock_program_instructions[2] | pio_encode_delay(g_bluescsi_timings->sdio.delay1);
  825. rw_prg_instr[3] = tx_data_w_clock_program_instructions[3] | pio_encode_delay(g_bluescsi_timings->sdio.delay1 + 1);
  826. rw_prg_instr[4] = tx_data_w_clock_program_instructions[4] | pio_encode_delay(g_bluescsi_timings->sdio.delay0 + 1);
  827. rw_prg_instr[5] = tx_data_w_clock_program_instructions[5] | pio_encode_delay(g_bluescsi_timings->sdio.delay1 + 1);
  828. rw_prg_instr[6] = tx_data_w_clock_program_instructions[6] | pio_encode_delay(g_bluescsi_timings->sdio.delay0 + 1);
  829. pio_program tx_data_w_delays = {
  830. rw_prg_instr,
  831. tx_data_w_clock_program.length,
  832. tx_data_w_clock_program.origin,
  833. tx_data_w_clock_program.pio_version
  834. };
  835. g_sdio.pio_data_tx_offset = pio_add_program(SDIO_PIO, &tx_data_w_delays);
  836. g_sdio.pio_cfg_data_tx = pio_tx_w_clock_program_config(g_sdio.pio_data_tx_offset, SDIO_D0, SDIO_CLK, clock_divider);
  837. // Disable SDIO pins input synchronizer.
  838. // This reduces input delay.
  839. // Because the CLK is driven synchronously to CPU clock,
  840. // there should be no metastability problems.
  841. SDIO_PIO->input_sync_bypass |= (1 << SDIO_CLK) | (1 << SDIO_CMD)
  842. | (1 << SDIO_D0) | (1 << SDIO_D1) | (1 << SDIO_D2) | (1 << SDIO_D3);
  843. // Redirect GPIOs to PIO
  844. gpio_set_function(SDIO_CMD, GPIO_FUNC_PIO1);
  845. gpio_set_function(SDIO_CLK, GPIO_FUNC_PIO1);
  846. gpio_set_function(SDIO_D0, GPIO_FUNC_PIO1);
  847. gpio_set_function(SDIO_D1, GPIO_FUNC_PIO1);
  848. gpio_set_function(SDIO_D2, GPIO_FUNC_PIO1);
  849. gpio_set_function(SDIO_D3, GPIO_FUNC_PIO1);
  850. // Set up IRQ handler when DMA completes.
  851. irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
  852. irq_set_enabled(DMA_IRQ_1, true);
  853. #if 0
  854. #ifndef ENABLE_AUDIO_OUTPUT
  855. irq_set_exclusive_handler(DMA_IRQ_1, rp2040_sdio_tx_irq);
  856. #else
  857. // seem to hit assertion in _exclusive_handler call due to DMA_IRQ_0 being shared?
  858. // slightly less efficient to do it this way, so investigate further at some point
  859. irq_add_shared_handler(DMA_IRQ_1, rp2040_sdio_tx_irq, 0xFF);
  860. #endif
  861. irq_set_enabled(DMA_IRQ_1, true);
  862. #endif
  863. }
  864. void __not_in_flash_func(rp2040_sdio_update_delays)(pio_program program, uint32_t offset, uint16_t additional_delay) {
  865. //log("Offset:", offset);
  866. uint16_t instr_to_rewrite;
  867. uint16_t existing_delay;
  868. for (int i = 0; i < program.length; i++) {
  869. instr_to_rewrite = program.instructions[i];
  870. //log("Old Instr:", i, ":", (uint32_t)instr_to_rewrite);
  871. if (instr_to_rewrite & PIO_INSTR_MASK_GET_DELAY) { // If there's a delay, increment it. Otherwise, leave it alone.
  872. existing_delay = (instr_to_rewrite & PIO_INSTR_MASK_GET_DELAY) >> 8;
  873. existing_delay += additional_delay;
  874. instr_to_rewrite = (instr_to_rewrite & PIO_INSTR_MASK_REMOVE_DELAY) | (existing_delay << 8);
  875. // Canonicalize JMP addresses
  876. if ((instr_to_rewrite & PIO_INSTR_JMP_MASK) == 0) { // Highest three bits are zero on a JMP
  877. uint32_t jmp_address = instr_to_rewrite & PIO_INSTR_JMP_ADDR;
  878. jmp_address += offset;
  879. instr_to_rewrite = (instr_to_rewrite & (~ PIO_INSTR_JMP_ADDR)) | jmp_address;
  880. }
  881. //log("New Instr:", i, ":", (uint32_t)instr_to_rewrite);
  882. SDIO_PIO->instr_mem[offset + i] = instr_to_rewrite;
  883. }
  884. }
  885. }
  886. void __not_in_flash_func(rp2040_sdio_delay_increment)(uint16_t additional_delay) {
  887. /*
  888. Rewrite in-place every SDIO instruction for all the SDIO programs.
  889. These additional delay cycles effectively decrease the SDIO clock rate, which can be helpful in electrically noisy environments.
  890. */
  891. rp2040_sdio_update_delays(cmd_rsp_program, g_sdio.pio_cmd_rsp_clk_offset, additional_delay);
  892. rp2040_sdio_update_delays(rd_data_w_clock_program, g_sdio.pio_data_rx_offset, additional_delay);
  893. rp2040_sdio_update_delays(tx_data_w_clock_program, g_sdio.pio_data_tx_offset, additional_delay);
  894. }