// // Fast data download from 2-bit SPI flash, or zero SDRAM. // // Feed a FIFO that then writes to SDRAM. // Requires writes in aligned 8-byte chunks. // // This unit does *not* require a 2x SPI clock; // it uses a DDR buffer for clock out. // module spirom ( input rst_n, input rom_clk, input ram_clk, input sys_clk, /* SPI ROM interface */ output spi_sck, inout [1:0] spi_io, output reg spi_cs_n, /* SDRAM interface */ output [15:0] wd, // Data to RAM (* syn_preserve = 1 *) // Don't merge into FIFO output [24:1] waddr, // RAM address output reg [1:0] wrq, // Write request (min 4/8 bytes) input wacc, // Data accepted (ready for next data) /* CPU control interface */ output reg [31:0] cpu_rdata, input [31:0] cpu_wdata, input cpu_valid, input [3:0] cpu_wstrb, input [2:0] cpu_addr, output reg irq ); reg [24:2] ramstart; reg [31:0] romcmd; reg [23:2] datalen; reg [2:0] cmdlen; reg go_spi; reg is_spi; reg go_ram; reg is_ram; reg spi_dual; reg spi_more; // Do not raise CS# after command done reg ram_done; reg ram_done_q; reg [1:0] cpu_wr_q; reg [31:0] spi_in_shr; // Input shift register for one-bit input wire spi_active_s; wire cpu_wr_w = cpu_valid & cpu_wstrb[0]; always @(negedge rst_n or posedge ram_clk) if (~rst_n) begin ramstart <= 23'b0; romcmd <= 32'b0; datalen <= 22'b0; cmdlen <= 3'b0; go_spi <= 1'b0; is_spi <= 1'b0; go_ram <= 1'b0; is_ram <= 1'b0; ram_done_q <= 1'b1; irq <= 1'b1; spi_dual <= 1'b0; spi_more <= 1'b0; cpu_wr_q <= 2'b0; end else begin ram_done_q <= ram_done; if (~ram_done_q) go_ram <= 1'b0; if (spi_active_s) go_spi <= 1'b0; if (ram_done_q & ~go_ram & ~spi_active_s & ~go_spi) irq <= 1'b1; // Don't allow writing unless the unit is idle (IRQ = 1) // Delay the recognition of the write by one ram_clk // cycle (so it is recognized on the second half of the // corresponding sys_clk cycle) to relax timings; this // is not performance critical at all. cpu_wr_q <= { cpu_wr_q[0], cpu_wr_w & irq }; if (cpu_wr_q == 2'b01) begin // Only full word accesses supported via DMA!! case (cpu_addr) 2'b00: begin ramstart <= cpu_wdata[24:2]; end 2'b01: begin romcmd <= cpu_wdata[31:0]; end 2'b10: begin datalen <= cpu_wdata[23:2]; cmdlen <= cpu_wdata[26:24]; go_spi <= cpu_wdata[26:24] != 3'd0; is_spi <= cpu_wdata[26:24] != 3'd0; spi_dual <= cpu_wdata[27]; spi_more <= cpu_wdata[28]; is_ram <= cpu_wdata[29]; go_ram <= cpu_wdata[29]; irq <= 1'b0; end default: begin // Do nothing end endcase // case (cpu_addr) end // if (cpu_valid & cpu_wstrb[0]) end // else: !if(~rst_n) always_comb case (cpu_addr) 3'b000: cpu_rdata = { 7'b0, ramstart, 2'b0 }; 3'b001: cpu_rdata = romcmd; 3'b010: cpu_rdata = { 2'b0, is_ram, spi_more, spi_dual, cmdlen, datalen, 2'b0 }; 3'b011: cpu_rdata = { 31'b0, irq }; 3'b100: cpu_rdata = spi_in_shr; default: cpu_rdata = 32'bx; endcase // case (cpu_addr) // // FIFO and input latches // reg [1:0] spi_in_q; reg spi_ram_in_req; wire [11:0] wrusedw; wire [8:0] rdusedw; wire [15:0] fifo_out; wire [1:0] spi_in_data; assign spi_in_data[0] = spi_dual ? spi_in_q[0] : spi_in_q[1]; assign spi_in_data[1] = spi_dual ? spi_in_q[1] : spi_in_shr[0]; ddufifo spirom_fifo ( .aclr ( ~rst_n ), .wrclk ( rom_clk ), .data ( spi_in_data ), .wrreq ( spi_ram_in_req ), .wrusedw ( wrusedw ), .rdclk ( ram_clk ), .q ( fifo_out ), .rdreq ( wacc & is_spi ), .rdusedw ( rdusedw ) ); // // Interfacing between FIFO and input signals // // Shuffle fifo_out because SPI brings in data in bigendian bit // order within bytes, but the FIFO IP assumes littleendian // wire [15:0] spi_wd; assign spi_wd[ 7: 6] = fifo_out[ 1: 0]; assign spi_wd[ 5: 4] = fifo_out[ 3: 2]; assign spi_wd[ 3: 2] = fifo_out[ 5: 4]; assign spi_wd[ 1: 0] = fifo_out[ 7: 6]; assign spi_wd[15:14] = fifo_out[ 9: 8]; assign spi_wd[13:12] = fifo_out[11:10]; assign spi_wd[11:10] = fifo_out[13:12]; assign spi_wd[ 9: 8] = fifo_out[15:14]; reg [24:1] waddr_q; reg [23:1] ram_data_ctr; reg wacc_q; assign waddr = waddr_q; assign wd = is_spi ? spi_wd : 16'h0000; always @(negedge rst_n or posedge ram_clk) if (~rst_n) begin waddr_q <= 24'bx; ram_data_ctr <= 23'b0; wacc_q <= 1'b0; wrq <= 2'b00; ram_done <= 1'b1; end else begin wacc_q <= wacc; if (|ram_data_ctr) begin ram_done <= 1'b0; if (is_spi) begin // Reading from SPI ROM wrq[0] <= rdusedw >= 9'd4; // 4*2 = 8 bytes min available wrq[1] <= rdusedw >= 9'd8; // 8*2 = 16 bytes min available end else begin // Zeroing memory wrq[0] <= |ram_data_ctr[23:3]; wrq[1] <= |ram_data_ctr[23:4]; end waddr_q <= waddr_q + wacc_q; ram_data_ctr <= ram_data_ctr - wacc_q; end // if (|ram_data_ctr) else begin wrq <= 2'b00; ram_done <= 1'b1; if (go_ram) begin waddr_q <= { ramstart, 1'b0 }; ram_data_ctr <= { datalen, 1'b0 }; ram_done <= 1'b0; end end end // else: !if(~rst_n) // Negative indicies refer to fractional bytes reg [2:-3] spi_cmd_ctr; reg [23:-3] spi_data_ctr; reg spi_clk_en; reg [1:0] spi_clk_en_q; reg spi_mosi_en; reg [1:0] go_spi_q; wire go_spi_s; reg spi_more_q; reg spi_active; reg spi_active_q; reg [31:0] spi_out_shr; reg spi_in_req; reg spi_in_req_q; // Wait these many bit times between CS# high and the next CS# high // (tSHSL). The worst of these is tSHSL2 = 50 ns = 8 cycles @ 134 MHz. localparam spi_cs_wait_lg2 = 3; reg [spi_cs_wait_lg2:0] spi_cs_ctr; wire spi_cs_ready = spi_cs_ctr[spi_cs_wait_lg2]; // Explicit synchronizers for handshake signals synchronizer #(.width(1)) go_spi_synchro ( .rst_n ( rst_n ), .clk ( rom_clk ), .d ( go_spi ), .q ( go_spi_s ) ); synchronizer #(.width(1)) spi_active_synchro ( .rst_n ( rst_n ), .clk ( ram_clk ), .d ( spi_active ), .q ( spi_active_s ) ); // 64/4 = 16 bytes min space wire dma_queue_space = (~wrusedw) >= 12'd128; always @(negedge rst_n or posedge rom_clk) if (~rst_n) begin spi_cmd_ctr <= 6'b0; spi_clk_en <= 1'b0; spi_clk_en_q <= 'b0; spi_data_ctr <= 27'b0; spi_cs_n <= 1'b1; spi_cs_ctr <= 'b0; spi_in_req <= 1'b0; spi_in_req_q <= 1'b0; spi_ram_in_req <= 1'b0; spi_mosi_en <= 1'b1; spi_in_shr <= 32'b0; spi_active <= 1'b0; spi_active_q <= 1'b0; spi_more_q <= 1'b0; spi_out_shr <= 32'b0; end else begin // Fun with long pipelined chains of registers... spi_in_req <= 1'b0; spi_in_req_q <= spi_in_req; spi_ram_in_req <= spi_in_req_q & is_ram; spi_clk_en <= 1'b0; spi_clk_en_q <= (spi_clk_en_q << 1'b1) | spi_clk_en; spi_active_q <= spi_active; if ( spi_clk_en_q[1] ) spi_in_shr <= { spi_in_shr[30:0], spi_in_q[1] }; // Bit to start transmitting on the next clock down transition // This needs to be delayed by one cycle in order to match // the one-cycle delay imposed by the DDR output buffer // when spi_clk_en goes high. if ( spi_clk_en_q ) spi_out_shr <= { spi_out_shr[30:0], 1'b1 }; // tSHSL: make sure we get 8 bit times of CS# deselect between // commands. if ( ~spi_cs_n ) spi_cs_ctr <= 'b0; else spi_cs_ctr <= spi_cs_ctr + !spi_cs_ready; // Note: datalen <- spi_data_ctr is a 2-cycle multipath if (~spi_active) begin spi_cs_n <= ~spi_more_q; if ( go_spi_s & (spi_more_q | spi_cs_ready) ) begin // Starting new transaction spi_cmd_ctr <= { cmdlen, 3'b0 }; spi_data_ctr <= { datalen, 5'b0 }; spi_active <= 1'b1; spi_cs_n <= 1'b0; spi_more_q <= spi_more; spi_out_shr <= romcmd; end end // if (~spi_active) else begin spi_cs_n <= 1'b0; if ( ~|{spi_data_ctr, spi_cmd_ctr} ) begin // Transaction completed. Note: CS# needs to remain // asserted for at least one more cycle in case of read. spi_clk_en <= 1'b0; spi_mosi_en <= 1'b1; spi_active <= 1'b0; end else begin // This will block unnecessarily if the DMA queue // is full from a previous transaction, but that doesn't // matter in practice... just let it drain. spi_clk_en <= dma_queue_space; spi_mosi_en <= ~spi_dual | |spi_cmd_ctr; if ( spi_clk_en & ~|spi_cmd_ctr ) spi_in_req <= spi_data_ctr[-3] | spi_dual; if ( spi_clk_en | ~spi_active_q ) begin // This is either the kickoff cycle or advancing if ( ~|spi_cmd_ctr ) spi_data_ctr <= spi_data_ctr - (1'b1 << spi_dual); else spi_cmd_ctr <= spi_cmd_ctr - 1'b1; end // if ( spi_clk_en ) end // else: !if( ~|{spi_data_ctr, spi_cmd_ctr} ) end // else: !if(~spi_active) end // else: !if(~rst_n) // // Input I/O: latch on the positive spi_clk, which is the // negative rom_clk. // always @(negedge rom_clk) spi_in_q <= spi_io; // // Output I/O: changed on the negative spi_clk, which is the // positive rom_clk (thus matching when these registers are set.) // assign spi_io[0] = spi_mosi_en ? spi_out_shr[31] : 1'bz; assign spi_io[1] = 1'bz; // // SPI_SCK output buffer: emit a spi_clk clock pulse if spi_clk_en // is high; note that this is phase-inverted versus the internal // rom_clk, and that this is sampled one full rom_clk before // output, so // ddio_out spi_clk_buf ( .aclr ( ~rst_n ), .datain_h ( 1'b0 ), .datain_l ( spi_clk_en ), .outclock ( rom_clk ), .dataout ( spi_sck ) ); endmodule // spirom