scsi_accel_sync.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /* Synchronous mode SCSI implementation.
  2. *
  3. * In synchronous mode, the handshake mechanism is not used. Instead
  4. * either end of the communication will just send a bunch of bytes
  5. * and only afterwards checks that the number of acknowledgement
  6. * pulses matches.
  7. *
  8. * The receiving end should latch in the data at the falling edge of
  9. * the request pulse (on either REQ or ACK pin). We use the GD32 EXMC
  10. * peripheral to implement this latching with the NWAIT pin when
  11. * reading data from the host. NOE is used to generate the REQ pulses.
  12. *
  13. * Writing data to the host is simpler, as we can just write it out
  14. * from the GPIO port at our own pace. A timer is used for generating
  15. * the output pulses on REQ pin.
  16. */
  17. #include "scsi_accel_sync.h"
  18. #include <BlueSCSI_log.h>
  19. #include <gd32f20x_exmc.h>
  20. #include <scsi.h>
  21. #ifndef SCSI_SYNC_MODE_AVAILABLE
  22. void scsi_accel_sync_init() {}
  23. void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag) {}
  24. void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag) {}
  25. #else
  26. /********************************/
  27. /* Transfer from host to device */
  28. /********************************/
  29. #define SYNC_DMA_BUFSIZE 512
  30. static uint32_t g_sync_dma_buf[SYNC_DMA_BUFSIZE];
  31. void scsi_accel_sync_init()
  32. {
  33. rcu_periph_clock_enable(RCU_EXMC);
  34. rcu_periph_clock_enable(SCSI_EXMC_DMA_RCU);
  35. rcu_periph_clock_enable(SCSI_SYNC_TIMER_RCU);
  36. exmc_norsram_timing_parameter_struct timing_param = {
  37. .asyn_access_mode = EXMC_ACCESS_MODE_A,
  38. .syn_data_latency = EXMC_DATALAT_2_CLK,
  39. .syn_clk_division = EXMC_SYN_CLOCK_RATIO_2_CLK,
  40. .bus_latency = 1,
  41. .asyn_data_setuptime = 2,
  42. .asyn_address_holdtime = 2,
  43. .asyn_address_setuptime = 16
  44. };
  45. exmc_norsram_parameter_struct sram_param = {
  46. .norsram_region = EXMC_BANK0_NORSRAM_REGION0,
  47. .write_mode = EXMC_ASYN_WRITE,
  48. .extended_mode = DISABLE,
  49. .asyn_wait = ENABLE,
  50. .nwait_signal = ENABLE,
  51. .memory_write = DISABLE,
  52. .nwait_config = EXMC_NWAIT_CONFIG_DURING,
  53. .wrap_burst_mode = DISABLE,
  54. .nwait_polarity = EXMC_NWAIT_POLARITY_HIGH,
  55. .burst_mode = DISABLE,
  56. .databus_width = EXMC_NOR_DATABUS_WIDTH_16B,
  57. .memory_type = EXMC_MEMORY_TYPE_SRAM,
  58. .address_data_mux = DISABLE,
  59. .read_write_timing = &timing_param
  60. };
  61. EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;
  62. exmc_norsram_init(&sram_param);
  63. // DMA used to transfer data from EXMC to RAM
  64. // DMA is used so that if data transfer fails, we can at least abort by resetting CPU.
  65. // Accessing EXMC from the CPU directly hangs it totally if ACK pulses are not received.
  66. dma_parameter_struct exmc_dma_config =
  67. {
  68. .periph_addr = EXMC_NOR_PSRAM,
  69. .periph_width = DMA_PERIPHERAL_WIDTH_16BIT,
  70. .memory_addr = (uint32_t)g_sync_dma_buf,
  71. .memory_width = DMA_MEMORY_WIDTH_16BIT,
  72. .number = 0, // Filled before transfer
  73. .priority = DMA_PRIORITY_MEDIUM,
  74. .periph_inc = DMA_PERIPH_INCREASE_DISABLE,
  75. .memory_inc = DMA_MEMORY_INCREASE_ENABLE,
  76. .direction = DMA_PERIPHERAL_TO_MEMORY
  77. };
  78. dma_init(SCSI_EXMC_DMA, SCSI_EXMC_DMACH, &exmc_dma_config);
  79. dma_memory_to_memory_enable(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);
  80. gpio_init(SCSI_IN_ACK_EXMC_NWAIT_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_IN_ACK_EXMC_NWAIT_PIN);
  81. gpio_init(SCSI_TIMER_IN_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_TIMER_IN_PIN);
  82. // TIMER1 is used to count ACK pulses
  83. TIMER_CTL0(SCSI_SYNC_TIMER) = 0;
  84. TIMER_SMCFG(SCSI_SYNC_TIMER) = TIMER_SLAVE_MODE_EXTERNAL0 | TIMER_SMCFG_TRGSEL_CI0FE0;
  85. TIMER_CAR(SCSI_SYNC_TIMER) = 65535;
  86. TIMER_PSC(SCSI_SYNC_TIMER) = 0;
  87. TIMER_CHCTL0(SCSI_SYNC_TIMER) = 0x0001; // CH0 as input
  88. }
  89. void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag)
  90. {
  91. // Enable EXMC to drive REQ from EXMC_NOE pin
  92. EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) |= EXMC_SNCTL_NRBKEN;
  93. uint32_t oldmode = GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT);
  94. uint32_t newmode = oldmode & ~(0xF << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4));
  95. newmode |= 0xB << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4);
  96. GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = newmode;
  97. while (count > 0)
  98. {
  99. uint32_t blocksize = (count > SYNC_DMA_BUFSIZE * 2) ? (SYNC_DMA_BUFSIZE * 2) : count;
  100. count -= blocksize;
  101. DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) = blocksize;
  102. DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) |= DMA_CHXCTL_CHEN;
  103. uint16_t *src = (uint16_t*)g_sync_dma_buf;
  104. uint8_t *dst = data;
  105. uint8_t *end = data + blocksize;
  106. uint32_t start = millis();
  107. while (dst < end)
  108. {
  109. uint32_t remain = DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);
  110. while (dst < end - remain)
  111. {
  112. *dst++ = ~(*src++) >> SCSI_EXMC_DATA_SHIFT;
  113. }
  114. if ((uint32_t)(millis() - start) > 500 || *resetFlag)
  115. {
  116. // We are in a pinch here: without ACK pulses coming, the EXMC and DMA peripherals
  117. // are locked up. The only way out is a whole system reset.
  118. bluelog("SCSI Synchronous read timeout: resetting system");
  119. NVIC_SystemReset();
  120. }
  121. }
  122. DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) &= ~DMA_CHXCTL_CHEN;
  123. data = end;
  124. }
  125. GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = oldmode;
  126. EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;
  127. }
  128. /********************************/
  129. /* Transfer from device to host */
  130. /********************************/
  131. // Simple delay, about 20 ns.
  132. // This is less likely to get optimized away by CPU pipeline than nop
  133. #define ASM_DELAY() \
  134. " ldr %[tmp2], [%[reset_flag]] \n"
  135. // Take 8 bits from d and format them for writing
  136. // d is name of data operand, b is bit offset
  137. #define ASM_LOAD_DATA(b) \
  138. " ubfx %[tmp1], %[data], #" b ", #8 \n" \
  139. " ldr %[tmp1], [%[byte_lookup], %[tmp1], lsl #2] \n"
  140. // Write data to SCSI port and set REQ high
  141. #define ASM_SEND_DATA() \
  142. " str %[tmp1], [%[out_port_bop]] \n"
  143. // Set REQ low
  144. #define ASM_SET_REQ_LOW() \
  145. " mov %[tmp2], %[bop_req_low] \n" \
  146. " str %[tmp2], [%[out_port_bop]] \n"
  147. // Wait for ACK_TIMER - n to be less than num_bytes
  148. #define ASM_WAIT_ACK_TIMER(n) \
  149. "wait_acks_" n "_%=: \n" \
  150. " ldr %[tmp2], [%[ack_timer]] \n" \
  151. " sub %[tmp2], # " n " \n" \
  152. " cmp %[tmp2], %[num_bytes] \n" \
  153. " ble got_acks_" n "_%= \n" \
  154. " ldr %[tmp2], [%[reset_flag]] \n" \
  155. " cmp %[tmp2], #0 \n" \
  156. " bne all_done_%= \n" \
  157. " b wait_acks_" n "_%= \n" \
  158. "got_acks_" n "_%=: \n"
  159. // Send 4 bytes
  160. #define ASM_SEND_4BYTES() \
  161. ASM_LOAD_DATA("0") \
  162. ASM_SEND_DATA() \
  163. ASM_DELAY1() \
  164. ASM_SET_REQ_LOW() \
  165. ASM_DELAY2() \
  166. ASM_LOAD_DATA("8") \
  167. ASM_SEND_DATA() \
  168. ASM_DELAY1() \
  169. ASM_SET_REQ_LOW() \
  170. ASM_DELAY2() \
  171. ASM_LOAD_DATA("16") \
  172. ASM_SEND_DATA() \
  173. ASM_DELAY1() \
  174. ASM_SET_REQ_LOW() \
  175. ASM_DELAY2() \
  176. ASM_LOAD_DATA("24") \
  177. ASM_SEND_DATA() \
  178. ASM_DELAY1() \
  179. ASM_SET_REQ_LOW()
  180. // Send 1 byte, wait for ACK_TIMER to be less than num_bytes + n and send 3 bytes more
  181. // This interleaving minimizes the delay caused by WAIT_ACK_TIMER.
  182. #define ASM_SEND_4BYTES_WAIT(n) \
  183. ASM_LOAD_DATA("0") \
  184. ASM_SEND_DATA() \
  185. ASM_DELAY2() \
  186. ASM_LOAD_DATA("8") \
  187. ASM_SET_REQ_LOW() \
  188. ASM_DELAY2() \
  189. " ldr %[tmp2], [%[ack_timer]] \n" \
  190. " sub %[tmp2], # " n " \n" \
  191. ASM_SEND_DATA() \
  192. " cmp %[tmp2], %[num_bytes] \n" \
  193. " ble got_acks_" n "_%= \n" \
  194. ASM_WAIT_ACK_TIMER(n) \
  195. ASM_DELAY2() \
  196. ASM_SET_REQ_LOW() \
  197. ASM_DELAY2() \
  198. ASM_LOAD_DATA("16") \
  199. ASM_SEND_DATA() \
  200. ASM_DELAY1() \
  201. ASM_SET_REQ_LOW() \
  202. ASM_DELAY2() \
  203. ASM_LOAD_DATA("24") \
  204. ASM_SEND_DATA() \
  205. ASM_DELAY1() \
  206. ASM_SET_REQ_LOW() \
  207. // Specialized routine for settings:
  208. // <=100 ns period, >=15 outstanding REQs
  209. static void sync_send_100ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
  210. {
  211. volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
  212. volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
  213. const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
  214. register uint32_t tmp1 = 0;
  215. register uint32_t tmp2 = 0;
  216. register uint32_t data = 0;
  217. // Delay 1 is typically longest and delay 2 shortest.
  218. // Tuning these is just trial and error.
  219. #define ASM_DELAY1() " nop\n nop\n nop\n"
  220. #define ASM_DELAY2() " nop\n nop\n"
  221. asm volatile (
  222. "main_loop_%=: \n"
  223. " subs %[num_bytes], %[num_bytes], #16 \n"
  224. " bmi last_bytes_%= \n"
  225. /* At each point make sure there is at most 15 bytes in flight */
  226. " ldr %[data], [%[buf]], #4 \n"
  227. ASM_SEND_4BYTES_WAIT("22")
  228. " ldr %[data], [%[buf]], #4 \n"
  229. ASM_SEND_4BYTES()
  230. " ldr %[data], [%[buf]], #4 \n"
  231. ASM_SEND_4BYTES_WAIT("14")
  232. " ldr %[data], [%[buf]], #4 \n"
  233. ASM_SEND_4BYTES()
  234. " cbz %[num_bytes], all_done_%= \n"
  235. " b main_loop_%= \n"
  236. "last_bytes_%=: \n"
  237. " add %[num_bytes], %[num_bytes], #16 \n"
  238. "last_bytes_loop_%=: \n"
  239. " ldrb %[data], [%[buf]], #1 \n"
  240. ASM_LOAD_DATA("0")
  241. ASM_WAIT_ACK_TIMER("15")
  242. ASM_SEND_DATA()
  243. ASM_DELAY1()
  244. ASM_SET_REQ_LOW()
  245. " subs %[num_bytes], %[num_bytes], #1 \n"
  246. " bne last_bytes_loop_%= \n"
  247. "all_done_%=: \n"
  248. ASM_DELAY()
  249. : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
  250. [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
  251. : /* Input */ [ack_timer] "r" (ack_timer),
  252. [bop_req_low] "I" (SCSI_OUT_REQ << 16),
  253. [out_port_bop] "r"(out_port_bop),
  254. [byte_lookup] "r" (byte_lookup),
  255. [reset_flag] "r" (resetFlag)
  256. : /* Clobber */);
  257. #undef ASM_DELAY1
  258. #undef ASM_DELAY2
  259. SCSI_RELEASE_DATA_REQ();
  260. }
  261. // Specialized routine for settings:
  262. // <=200 ns period, >=15 outstanding REQs
  263. static void sync_send_200ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
  264. {
  265. volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
  266. volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
  267. const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
  268. register uint32_t tmp1 = 0;
  269. register uint32_t tmp2 = 0;
  270. register uint32_t data = 0;
  271. #define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()
  272. #define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY()
  273. asm volatile (
  274. "main_loop_%=: \n"
  275. " subs %[num_bytes], %[num_bytes], #16 \n"
  276. " bmi last_bytes_%= \n"
  277. /* At each point make sure there is at most 15 bytes in flight */
  278. " ldr %[data], [%[buf]], #4 \n"
  279. ASM_SEND_4BYTES_WAIT("22")
  280. ASM_DELAY2()
  281. " ldr %[data], [%[buf]], #4 \n"
  282. ASM_SEND_4BYTES()
  283. ASM_DELAY2()
  284. " ldr %[data], [%[buf]], #4 \n"
  285. ASM_SEND_4BYTES_WAIT("14")
  286. ASM_DELAY2()
  287. " ldr %[data], [%[buf]], #4 \n"
  288. ASM_SEND_4BYTES()
  289. " cbz %[num_bytes], all_done_%= \n"
  290. " b main_loop_%= \n"
  291. "last_bytes_%=: \n"
  292. " add %[num_bytes], %[num_bytes], #16 \n"
  293. "last_bytes_loop_%=: \n"
  294. " ldrb %[data], [%[buf]], #1 \n"
  295. ASM_LOAD_DATA("0")
  296. ASM_WAIT_ACK_TIMER("15")
  297. ASM_SEND_DATA()
  298. ASM_DELAY1()
  299. ASM_SET_REQ_LOW()
  300. ASM_DELAY2()
  301. " subs %[num_bytes], %[num_bytes], #1 \n"
  302. " bne last_bytes_loop_%= \n"
  303. "all_done_%=: \n"
  304. ASM_DELAY1()
  305. : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
  306. [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
  307. : /* Input */ [ack_timer] "r" (ack_timer),
  308. [bop_req_low] "I" (SCSI_OUT_REQ << 16),
  309. [out_port_bop] "r"(out_port_bop),
  310. [byte_lookup] "r" (byte_lookup),
  311. [reset_flag] "r" (resetFlag)
  312. : /* Clobber */);
  313. #undef ASM_DELAY1
  314. #undef ASM_DELAY2
  315. SCSI_RELEASE_DATA_REQ();
  316. }
  317. // Specialized routine for settings:
  318. // <=260 ns period, >=7 outstanding REQs
  319. static void sync_send_260ns_7off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
  320. {
  321. volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
  322. volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
  323. const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
  324. register uint32_t tmp1 = 0;
  325. register uint32_t tmp2 = 0;
  326. register uint32_t data = 0;
  327. #define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \
  328. ASM_DELAY() ASM_DELAY()
  329. #define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \
  330. ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()
  331. asm volatile (
  332. "main_loop_%=: \n"
  333. " subs %[num_bytes], %[num_bytes], #4 \n"
  334. " bmi last_bytes_%= \n"
  335. /* At each point make sure there is at most 3 bytes in flight */
  336. " ldr %[data], [%[buf]], #4 \n"
  337. ASM_SEND_4BYTES_WAIT("7")
  338. " cbz %[num_bytes], all_done_%= \n"
  339. " b main_loop_%= \n"
  340. "last_bytes_%=: \n"
  341. " add %[num_bytes], %[num_bytes], #4 \n"
  342. "last_bytes_loop_%=: \n"
  343. " ldrb %[data], [%[buf]], #1 \n"
  344. ASM_LOAD_DATA("0")
  345. ASM_WAIT_ACK_TIMER("5")
  346. ASM_SEND_DATA()
  347. ASM_DELAY1()
  348. ASM_SET_REQ_LOW()
  349. ASM_DELAY2()
  350. " subs %[num_bytes], %[num_bytes], #1 \n"
  351. " bne last_bytes_loop_%= \n"
  352. "all_done_%=: \n"
  353. ASM_DELAY1()
  354. : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
  355. [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
  356. : /* Input */ [ack_timer] "r" (ack_timer),
  357. [bop_req_low] "I" (SCSI_OUT_REQ << 16),
  358. [out_port_bop] "r"(out_port_bop),
  359. [byte_lookup] "r" (byte_lookup),
  360. [reset_flag] "r" (resetFlag)
  361. : /* Clobber */);
  362. #undef ASM_DELAY1
  363. #undef ASM_DELAY2
  364. SCSI_RELEASE_DATA_REQ();
  365. }
  366. void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag)
  367. {
  368. // Timer counts down from the initial number of bytes.
  369. TIMER_CNT(SCSI_SYNC_TIMER) = count;
  370. TIMER_CTL0(SCSI_SYNC_TIMER) = TIMER_CTL0_CEN | TIMER_CTL0_DIR;
  371. int syncOffset = scsiDev.target->syncOffset;
  372. int syncPeriod = scsiDev.target->syncPeriod;
  373. if (syncOffset >= 15 && syncPeriod <= 25)
  374. {
  375. sync_send_100ns_15off(data, count, resetFlag);
  376. }
  377. else if (syncOffset >= 15 && syncPeriod <= 50)
  378. {
  379. sync_send_200ns_15off(data, count, resetFlag);
  380. }
  381. else if (syncOffset >= 7 && syncPeriod <= 65)
  382. {
  383. sync_send_260ns_7off(data, count, resetFlag);
  384. }
  385. else
  386. {
  387. bluedbg("No optimized routine for syncOffset=", syncOffset, " syndPeriod=", syncPeriod, ", using fallback");
  388. while (count-- > 0)
  389. {
  390. while (TIMER_CNT(SCSI_SYNC_TIMER) > count + syncOffset && !*resetFlag);
  391. SCSI_OUT_DATA(*data++);
  392. delay_ns(syncPeriod * 2);
  393. SCSI_OUT(REQ, 0);
  394. delay_ns(syncPeriod * 2);
  395. }
  396. delay_ns(syncPeriod * 2);
  397. SCSI_RELEASE_DATA_REQ();
  398. }
  399. while (TIMER_CNT(SCSI_SYNC_TIMER) > 0 && !*resetFlag);
  400. if (*resetFlag)
  401. {
  402. bluedbg("Bus reset during sync transfer, total ", (int)count,
  403. " bytes, remaining ACK count ", (int)TIMER_CNT(SCSI_SYNC_TIMER));
  404. }
  405. TIMER_CTL0(SCSI_SYNC_TIMER) = 0;
  406. }
  407. #endif