spirom.sv 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. //
  2. // Fast data download from 2-bit SPI flash, or zero SDRAM.
  3. //
  4. // Feed a FIFO that then writes to SDRAM.
  5. // Requires writes in aligned 8-byte chunks.
  6. //
  7. // This unit does *not* require a 2x SPI clock;
  8. // it uses a DDR buffer for clock out.
  9. //
  10. module spirom (
  11. input rst_n,
  12. input rom_clk,
  13. input ram_clk,
  14. input sys_clk,
  15. /* SPI ROM interface */
  16. output spi_sck,
  17. inout [1:0] spi_io,
  18. output reg spi_cs_n,
  19. /* SDRAM interface */
  20. output [15:0] wd, // Data to RAM
  21. (* syn_preserve = 1 *) // Don't merge into FIFO
  22. output [24:1] waddr, // RAM address
  23. output reg [1:0] wrq, // Write request (min 4/8 bytes)
  24. input wacc, // Data accepted (ready for next data)
  25. /* CPU control interface */
  26. output reg [31:0] cpu_rdata,
  27. input [31:0] cpu_wdata,
  28. input cpu_valid,
  29. input [3:0] cpu_wstrb,
  30. input [2:0] cpu_addr,
  31. output reg irq
  32. );
  33. reg [24:2] ramstart;
  34. reg [31:0] romcmd;
  35. reg [23:2] datalen;
  36. reg [2:0] cmdlen;
  37. reg go_spi;
  38. reg is_spi;
  39. reg go_ram;
  40. reg is_ram;
  41. reg spi_dual;
  42. reg spi_more; // Do not raise CS# after command done
  43. reg ram_done;
  44. reg ram_done_q;
  45. reg [1:0] cpu_wr_q;
  46. reg [31:0] spi_in_shr; // Input shift register for one-bit input
  47. wire spi_active_s;
  48. wire cpu_wr_w = cpu_valid & cpu_wstrb[0];
  49. always @(negedge rst_n or posedge ram_clk)
  50. if (~rst_n)
  51. begin
  52. ramstart <= 23'b0;
  53. romcmd <= 32'b0;
  54. datalen <= 22'b0;
  55. cmdlen <= 3'b0;
  56. go_spi <= 1'b0;
  57. is_spi <= 1'b0;
  58. go_ram <= 1'b0;
  59. is_ram <= 1'b0;
  60. ram_done_q <= 1'b1;
  61. irq <= 1'b1;
  62. spi_dual <= 1'b0;
  63. spi_more <= 1'b0;
  64. cpu_wr_q <= 2'b0;
  65. end
  66. else
  67. begin
  68. ram_done_q <= ram_done;
  69. if (~ram_done_q)
  70. go_ram <= 1'b0;
  71. if (spi_active_s)
  72. go_spi <= 1'b0;
  73. if (ram_done_q & ~go_ram & ~spi_active_s & ~go_spi)
  74. irq <= 1'b1;
  75. // Don't allow writing unless the unit is idle (IRQ = 1)
  76. // Delay the recognition of the write by one ram_clk
  77. // cycle (so it is recognized on the second half of the
  78. // corresponding sys_clk cycle) to relax timings; this
  79. // is not performance critical at all.
  80. cpu_wr_q <= { cpu_wr_q[0], cpu_wr_w & irq };
  81. if (cpu_wr_q == 2'b01)
  82. begin
  83. // Only full word accesses supported via DMA!!
  84. case (cpu_addr)
  85. 2'b00: begin
  86. ramstart <= cpu_wdata[24:2];
  87. end
  88. 2'b01: begin
  89. romcmd <= cpu_wdata[31:0];
  90. end
  91. 2'b10: begin
  92. datalen <= cpu_wdata[23:2];
  93. cmdlen <= cpu_wdata[26:24];
  94. go_spi <= cpu_wdata[26:24] != 3'd0;
  95. is_spi <= cpu_wdata[26:24] != 3'd0;
  96. spi_dual <= cpu_wdata[27];
  97. spi_more <= cpu_wdata[28];
  98. is_ram <= cpu_wdata[29];
  99. go_ram <= cpu_wdata[29];
  100. irq <= 1'b0;
  101. end
  102. default: begin
  103. // Do nothing
  104. end
  105. endcase // case (cpu_addr)
  106. end // if (cpu_valid & cpu_wstrb[0])
  107. end // else: !if(~rst_n)
  108. always_comb
  109. case (cpu_addr)
  110. 3'b000: cpu_rdata = { 7'b0, ramstart, 2'b0 };
  111. 3'b001: cpu_rdata = romcmd;
  112. 3'b010: cpu_rdata = { 2'b0, is_ram, spi_more, spi_dual,
  113. cmdlen, datalen, 2'b0 };
  114. 3'b011: cpu_rdata = { 31'b0, irq };
  115. 3'b100: cpu_rdata = spi_in_shr;
  116. default: cpu_rdata = 32'bx;
  117. endcase // case (cpu_addr)
  118. //
  119. // FIFO and input latches
  120. //
  121. reg [1:0] spi_in_q;
  122. reg spi_ram_in_req;
  123. wire [11:0] wrusedw;
  124. wire [8:0] rdusedw;
  125. wire [15:0] fifo_out;
  126. wire [1:0] spi_in_data;
  127. assign spi_in_data[0] = spi_dual ? spi_in_q[0] : spi_in_q[1];
  128. assign spi_in_data[1] = spi_dual ? spi_in_q[1] : spi_in_shr[0];
  129. ddufifo spirom_fifo (
  130. .aclr ( ~rst_n ),
  131. .wrclk ( rom_clk ),
  132. .data ( spi_in_data ),
  133. .wrreq ( spi_ram_in_req ),
  134. .wrusedw ( wrusedw ),
  135. .rdclk ( ram_clk ),
  136. .q ( fifo_out ),
  137. .rdreq ( wacc & is_spi ),
  138. .rdusedw ( rdusedw )
  139. );
  140. //
  141. // Interfacing between FIFO and input signals
  142. //
  143. // Shuffle fifo_out because SPI brings in data in bigendian bit
  144. // order within bytes, but the FIFO IP assumes littleendian
  145. //
  146. wire [15:0] spi_wd;
  147. assign spi_wd[ 7: 6] = fifo_out[ 1: 0];
  148. assign spi_wd[ 5: 4] = fifo_out[ 3: 2];
  149. assign spi_wd[ 3: 2] = fifo_out[ 5: 4];
  150. assign spi_wd[ 1: 0] = fifo_out[ 7: 6];
  151. assign spi_wd[15:14] = fifo_out[ 9: 8];
  152. assign spi_wd[13:12] = fifo_out[11:10];
  153. assign spi_wd[11:10] = fifo_out[13:12];
  154. assign spi_wd[ 9: 8] = fifo_out[15:14];
  155. reg [24:1] waddr_q;
  156. reg [23:1] ram_data_ctr;
  157. reg wacc_q;
  158. assign waddr = waddr_q;
  159. assign wd = is_spi ? spi_wd : 16'h0000;
  160. always @(negedge rst_n or posedge ram_clk)
  161. if (~rst_n)
  162. begin
  163. waddr_q <= 24'bx;
  164. ram_data_ctr <= 23'b0;
  165. wacc_q <= 1'b0;
  166. wrq <= 2'b00;
  167. ram_done <= 1'b1;
  168. end
  169. else
  170. begin
  171. wacc_q <= wacc;
  172. if (|ram_data_ctr)
  173. begin
  174. ram_done <= 1'b0;
  175. if (is_spi)
  176. begin
  177. // Reading from SPI ROM
  178. wrq[0] <= rdusedw >= 9'd4; // 4*2 = 8 bytes min available
  179. wrq[1] <= rdusedw >= 9'd8; // 8*2 = 16 bytes min available
  180. end
  181. else
  182. begin
  183. // Zeroing memory
  184. wrq[0] <= |ram_data_ctr[23:3];
  185. wrq[1] <= |ram_data_ctr[23:4];
  186. end
  187. waddr_q <= waddr_q + wacc_q;
  188. ram_data_ctr <= ram_data_ctr - wacc_q;
  189. end // if (|ram_data_ctr)
  190. else
  191. begin
  192. wrq <= 2'b00;
  193. ram_done <= 1'b1;
  194. if (go_ram)
  195. begin
  196. waddr_q <= { ramstart, 1'b0 };
  197. ram_data_ctr <= { datalen, 1'b0 };
  198. ram_done <= 1'b0;
  199. end
  200. end
  201. end // else: !if(~rst_n)
  202. // Negative indicies refer to fractional bytes
  203. reg [2:-3] spi_cmd_ctr;
  204. reg [23:-3] spi_data_ctr;
  205. reg spi_clk_en;
  206. reg [1:0] spi_clk_en_q;
  207. reg spi_mosi_en;
  208. reg [1:0] go_spi_q;
  209. wire go_spi_s;
  210. reg spi_more_q;
  211. reg spi_active;
  212. reg spi_active_q;
  213. reg [31:0] spi_out_shr;
  214. reg spi_in_req;
  215. reg spi_in_req_q;
  216. // Wait these many bit times between CS# high and the next CS# high
  217. // (tSHSL). The worst of these is tSHSL2 = 50 ns = 8 cycles @ 134 MHz.
  218. localparam spi_cs_wait_lg2 = 3;
  219. reg [spi_cs_wait_lg2:0] spi_cs_ctr;
  220. wire spi_cs_ready = spi_cs_ctr[spi_cs_wait_lg2];
  221. // Explicit synchronizers for handshake signals
  222. synchronizer #(.width(1)) go_spi_synchro
  223. (
  224. .rst_n ( rst_n ),
  225. .clk ( rom_clk ),
  226. .d ( go_spi ),
  227. .q ( go_spi_s )
  228. );
  229. synchronizer #(.width(1)) spi_active_synchro
  230. (
  231. .rst_n ( rst_n ),
  232. .clk ( ram_clk ),
  233. .d ( spi_active ),
  234. .q ( spi_active_s )
  235. );
  236. // 64/4 = 16 bytes min space
  237. wire dma_queue_space = (~wrusedw) >= 12'd128;
  238. always @(negedge rst_n or posedge rom_clk)
  239. if (~rst_n)
  240. begin
  241. spi_cmd_ctr <= 6'b0;
  242. spi_clk_en <= 1'b0;
  243. spi_clk_en_q <= 'b0;
  244. spi_data_ctr <= 27'b0;
  245. spi_cs_n <= 1'b1;
  246. spi_cs_ctr <= 'b0;
  247. spi_in_req <= 1'b0;
  248. spi_in_req_q <= 1'b0;
  249. spi_ram_in_req <= 1'b0;
  250. spi_mosi_en <= 1'b1;
  251. spi_in_shr <= 32'b0;
  252. spi_active <= 1'b0;
  253. spi_active_q <= 1'b0;
  254. spi_more_q <= 1'b0;
  255. spi_out_shr <= 32'b0;
  256. end
  257. else
  258. begin
  259. // Fun with long pipelined chains of registers...
  260. spi_in_req <= 1'b0;
  261. spi_in_req_q <= spi_in_req;
  262. spi_ram_in_req <= spi_in_req_q & is_ram;
  263. spi_clk_en <= 1'b0;
  264. spi_clk_en_q <= (spi_clk_en_q << 1'b1) | spi_clk_en;
  265. spi_active_q <= spi_active;
  266. if ( spi_clk_en_q[1] )
  267. spi_in_shr <= { spi_in_shr[30:0], spi_in_q[1] };
  268. // Bit to start transmitting on the next clock down transition
  269. // This needs to be delayed by one cycle in order to match
  270. // the one-cycle delay imposed by the DDR output buffer
  271. // when spi_clk_en goes high.
  272. if ( spi_clk_en_q )
  273. spi_out_shr <= { spi_out_shr[30:0], 1'b1 };
  274. // tSHSL: make sure we get 8 bit times of CS# deselect between
  275. // commands.
  276. if ( ~spi_cs_n )
  277. spi_cs_ctr <= 'b0;
  278. else
  279. spi_cs_ctr <= spi_cs_ctr + !spi_cs_ready;
  280. // Note: datalen <- spi_data_ctr is a 2-cycle multipath
  281. if (~spi_active)
  282. begin
  283. spi_cs_n <= ~spi_more_q;
  284. if ( go_spi_s & (spi_more_q | spi_cs_ready) )
  285. begin
  286. // Starting new transaction
  287. spi_cmd_ctr <= { cmdlen, 3'b0 };
  288. spi_data_ctr <= { datalen, 5'b0 };
  289. spi_active <= 1'b1;
  290. spi_cs_n <= 1'b0;
  291. spi_more_q <= spi_more;
  292. spi_out_shr <= romcmd;
  293. end
  294. end // if (~spi_active)
  295. else
  296. begin
  297. spi_cs_n <= 1'b0;
  298. if ( ~|{spi_data_ctr, spi_cmd_ctr} )
  299. begin
  300. // Transaction completed. Note: CS# needs to remain
  301. // asserted for at least one more cycle in case of read.
  302. spi_clk_en <= 1'b0;
  303. spi_mosi_en <= 1'b1;
  304. spi_active <= 1'b0;
  305. end
  306. else
  307. begin
  308. // This will block unnecessarily if the DMA queue
  309. // is full from a previous transaction, but that doesn't
  310. // matter in practice... just let it drain.
  311. spi_clk_en <= dma_queue_space;
  312. spi_mosi_en <= ~spi_dual | |spi_cmd_ctr;
  313. if ( spi_clk_en & ~|spi_cmd_ctr )
  314. spi_in_req <= spi_data_ctr[-3] | spi_dual;
  315. if ( spi_clk_en | ~spi_active_q )
  316. begin
  317. // This is either the kickoff cycle or advancing
  318. if ( ~|spi_cmd_ctr )
  319. spi_data_ctr <= spi_data_ctr - (1'b1 << spi_dual);
  320. else
  321. spi_cmd_ctr <= spi_cmd_ctr - 1'b1;
  322. end // if ( spi_clk_en )
  323. end // else: !if( ~|{spi_data_ctr, spi_cmd_ctr} )
  324. end // else: !if(~spi_active)
  325. end // else: !if(~rst_n)
  326. //
  327. // Input I/O: latch on the positive spi_clk, which is the
  328. // negative rom_clk.
  329. //
  330. always @(negedge rom_clk)
  331. spi_in_q <= spi_io;
  332. //
  333. // Output I/O: changed on the negative spi_clk, which is the
  334. // positive rom_clk (thus matching when these registers are set.)
  335. //
  336. assign spi_io[0] = spi_mosi_en ? spi_out_shr[31] : 1'bz;
  337. assign spi_io[1] = 1'bz;
  338. //
  339. // SPI_SCK output buffer: emit a spi_clk clock pulse if spi_clk_en
  340. // is high; note that this is phase-inverted versus the internal
  341. // rom_clk, and that this is sampled one full rom_clk before
  342. // output, so
  343. //
  344. ddio_out spi_clk_buf (
  345. .aclr ( ~rst_n ),
  346. .datain_h ( 1'b0 ),
  347. .datain_l ( spi_clk_en ),
  348. .outclock ( rom_clk ),
  349. .dataout ( spi_sck )
  350. );
  351. endmodule // spirom