123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 |
- //
- // Fast data download from 2-bit SPI flash, or zero SDRAM.
- //
- // Feed a FIFO that then writes to SDRAM.
- // Requires writes in aligned 8-byte chunks.
- //
- // This unit does *not* require a 2x SPI clock;
- // it uses a DDR buffer for clock out.
- //
- module spirom (
- input rst_n,
- input rom_clk,
- input ram_clk,
- input sys_clk,
- /* SPI ROM interface */
- output spi_sck,
- inout [1:0] spi_io,
- output reg spi_cs_n,
- /* SDRAM interface */
- output [15:0] wd, // Data to RAM
- (* syn_preserve = 1 *) // Don't merge into FIFO
- output [24:1] waddr, // RAM address
- output reg [1:0] wrq, // Write request (min 4/8 bytes)
- input wacc, // Data accepted (ready for next data)
- /* CPU control interface */
- output reg [31:0] cpu_rdata,
- input [31:0] cpu_wdata,
- input cpu_valid,
- input [3:0] cpu_wstrb,
- input [2:0] cpu_addr,
- output reg irq
- );
- reg [24:2] ramstart;
- reg [31:0] romcmd;
- reg [23:2] datalen;
- reg [2:0] cmdlen;
- reg go_spi;
- reg is_spi;
- reg go_ram;
- reg is_ram;
- reg spi_dual;
- reg spi_more; // Do not raise CS# after command done
- reg ram_done;
- reg ram_done_q;
- reg [1:0] cpu_wr_q;
- reg [31:0] spi_in_shr; // Input shift register for one-bit input
- wire spi_active_s;
- wire cpu_wr_w = cpu_valid & cpu_wstrb[0];
- always @(negedge rst_n or posedge ram_clk)
- if (~rst_n)
- begin
- ramstart <= 23'b0;
- romcmd <= 32'b0;
- datalen <= 22'b0;
- cmdlen <= 3'b0;
- go_spi <= 1'b0;
- is_spi <= 1'b0;
- go_ram <= 1'b0;
- is_ram <= 1'b0;
- ram_done_q <= 1'b1;
- irq <= 1'b1;
- spi_dual <= 1'b0;
- spi_more <= 1'b0;
- cpu_wr_q <= 2'b0;
- end
- else
- begin
- ram_done_q <= ram_done;
- if (~ram_done_q)
- go_ram <= 1'b0;
- if (spi_active_s)
- go_spi <= 1'b0;
- if (ram_done_q & ~go_ram & ~spi_active_s & ~go_spi)
- irq <= 1'b1;
- // Don't allow writing unless the unit is idle (IRQ = 1)
- // Delay the recognition of the write by one ram_clk
- // cycle (so it is recognized on the second half of the
- // corresponding sys_clk cycle) to relax timings; this
- // is not performance critical at all.
- cpu_wr_q <= { cpu_wr_q[0], cpu_wr_w & irq };
- if (cpu_wr_q == 2'b01)
- begin
- // Only full word accesses supported via DMA!!
- case (cpu_addr)
- 2'b00: begin
- ramstart <= cpu_wdata[24:2];
- end
- 2'b01: begin
- romcmd <= cpu_wdata[31:0];
- end
- 2'b10: begin
- datalen <= cpu_wdata[23:2];
- cmdlen <= cpu_wdata[26:24];
- go_spi <= cpu_wdata[26:24] != 3'd0;
- is_spi <= cpu_wdata[26:24] != 3'd0;
- spi_dual <= cpu_wdata[27];
- spi_more <= cpu_wdata[28];
- is_ram <= cpu_wdata[29];
- go_ram <= cpu_wdata[29];
- irq <= 1'b0;
- end
- default: begin
- // Do nothing
- end
- endcase // case (cpu_addr)
- end // if (cpu_valid & cpu_wstrb[0])
- end // else: !if(~rst_n)
- always_comb
- case (cpu_addr)
- 3'b000: cpu_rdata = { 7'b0, ramstart, 2'b0 };
- 3'b001: cpu_rdata = romcmd;
- 3'b010: cpu_rdata = { 2'b0, is_ram, spi_more, spi_dual,
- cmdlen, datalen, 2'b0 };
- 3'b011: cpu_rdata = { 31'b0, irq };
- 3'b100: cpu_rdata = spi_in_shr;
- default: cpu_rdata = 32'bx;
- endcase // case (cpu_addr)
- //
- // FIFO and input latches
- //
- reg [1:0] spi_in_q;
- reg spi_ram_in_req;
- wire [11:0] wrusedw;
- wire [8:0] rdusedw;
- wire [15:0] fifo_out;
- wire [1:0] spi_in_data;
- assign spi_in_data[0] = spi_dual ? spi_in_q[0] : spi_in_q[1];
- assign spi_in_data[1] = spi_dual ? spi_in_q[1] : spi_in_shr[0];
- ddufifo spirom_fifo (
- .aclr ( ~rst_n ),
- .wrclk ( rom_clk ),
- .data ( spi_in_data ),
- .wrreq ( spi_ram_in_req ),
- .wrusedw ( wrusedw ),
- .rdclk ( ram_clk ),
- .q ( fifo_out ),
- .rdreq ( wacc & is_spi ),
- .rdusedw ( rdusedw )
- );
- //
- // Interfacing between FIFO and input signals
- //
- // Shuffle fifo_out because SPI brings in data in bigendian bit
- // order within bytes, but the FIFO IP assumes littleendian
- //
- wire [15:0] spi_wd;
- assign spi_wd[ 7: 6] = fifo_out[ 1: 0];
- assign spi_wd[ 5: 4] = fifo_out[ 3: 2];
- assign spi_wd[ 3: 2] = fifo_out[ 5: 4];
- assign spi_wd[ 1: 0] = fifo_out[ 7: 6];
- assign spi_wd[15:14] = fifo_out[ 9: 8];
- assign spi_wd[13:12] = fifo_out[11:10];
- assign spi_wd[11:10] = fifo_out[13:12];
- assign spi_wd[ 9: 8] = fifo_out[15:14];
- reg [24:1] waddr_q;
- reg [23:1] ram_data_ctr;
- reg wacc_q;
- assign waddr = waddr_q;
- assign wd = is_spi ? spi_wd : 16'h0000;
- always @(negedge rst_n or posedge ram_clk)
- if (~rst_n)
- begin
- waddr_q <= 24'bx;
- ram_data_ctr <= 23'b0;
- wacc_q <= 1'b0;
- wrq <= 2'b00;
- ram_done <= 1'b1;
- end
- else
- begin
- wacc_q <= wacc;
- if (|ram_data_ctr)
- begin
- ram_done <= 1'b0;
- if (is_spi)
- begin
- // Reading from SPI ROM
- wrq[0] <= rdusedw >= 9'd4; // 4*2 = 8 bytes min available
- wrq[1] <= rdusedw >= 9'd8; // 8*2 = 16 bytes min available
- end
- else
- begin
- // Zeroing memory
- wrq[0] <= |ram_data_ctr[23:3];
- wrq[1] <= |ram_data_ctr[23:4];
- end
- waddr_q <= waddr_q + wacc_q;
- ram_data_ctr <= ram_data_ctr - wacc_q;
- end // if (|ram_data_ctr)
- else
- begin
- wrq <= 2'b00;
- ram_done <= 1'b1;
- if (go_ram)
- begin
- waddr_q <= { ramstart, 1'b0 };
- ram_data_ctr <= { datalen, 1'b0 };
- ram_done <= 1'b0;
- end
- end
- end // else: !if(~rst_n)
- // Negative indicies refer to fractional bytes
- reg [2:-3] spi_cmd_ctr;
- reg [23:-3] spi_data_ctr;
- reg spi_clk_en;
- reg [1:0] spi_clk_en_q;
- reg spi_mosi_en;
- reg [1:0] go_spi_q;
- wire go_spi_s;
- reg spi_more_q;
- reg spi_active;
- reg spi_active_q;
- reg [31:0] spi_out_shr;
- reg spi_in_req;
- reg spi_in_req_q;
- // Wait these many bit times between CS# high and the next CS# high
- // (tSHSL). The worst of these is tSHSL2 = 50 ns = 8 cycles @ 134 MHz.
- localparam spi_cs_wait_lg2 = 3;
- reg [spi_cs_wait_lg2:0] spi_cs_ctr;
- wire spi_cs_ready = spi_cs_ctr[spi_cs_wait_lg2];
- // Explicit synchronizers for handshake signals
- synchronizer #(.width(1)) go_spi_synchro
- (
- .rst_n ( rst_n ),
- .clk ( rom_clk ),
- .d ( go_spi ),
- .q ( go_spi_s )
- );
- synchronizer #(.width(1)) spi_active_synchro
- (
- .rst_n ( rst_n ),
- .clk ( ram_clk ),
- .d ( spi_active ),
- .q ( spi_active_s )
- );
- // 64/4 = 16 bytes min space
- wire dma_queue_space = (~wrusedw) >= 12'd128;
- always @(negedge rst_n or posedge rom_clk)
- if (~rst_n)
- begin
- spi_cmd_ctr <= 6'b0;
- spi_clk_en <= 1'b0;
- spi_clk_en_q <= 'b0;
- spi_data_ctr <= 27'b0;
- spi_cs_n <= 1'b1;
- spi_cs_ctr <= 'b0;
- spi_in_req <= 1'b0;
- spi_in_req_q <= 1'b0;
- spi_ram_in_req <= 1'b0;
- spi_mosi_en <= 1'b1;
- spi_in_shr <= 32'b0;
- spi_active <= 1'b0;
- spi_active_q <= 1'b0;
- spi_more_q <= 1'b0;
- spi_out_shr <= 32'b0;
- end
- else
- begin
- // Fun with long pipelined chains of registers...
- spi_in_req <= 1'b0;
- spi_in_req_q <= spi_in_req;
- spi_ram_in_req <= spi_in_req_q & is_ram;
- spi_clk_en <= 1'b0;
- spi_clk_en_q <= (spi_clk_en_q << 1'b1) | spi_clk_en;
- spi_active_q <= spi_active;
- if ( spi_clk_en_q[1] )
- spi_in_shr <= { spi_in_shr[30:0], spi_in_q[1] };
- // Bit to start transmitting on the next clock down transition
- // This needs to be delayed by one cycle in order to match
- // the one-cycle delay imposed by the DDR output buffer
- // when spi_clk_en goes high.
- if ( spi_clk_en_q )
- spi_out_shr <= { spi_out_shr[30:0], 1'b1 };
- // tSHSL: make sure we get 8 bit times of CS# deselect between
- // commands.
- if ( ~spi_cs_n )
- spi_cs_ctr <= 'b0;
- else
- spi_cs_ctr <= spi_cs_ctr + !spi_cs_ready;
- // Note: datalen <- spi_data_ctr is a 2-cycle multipath
- if (~spi_active)
- begin
- spi_cs_n <= ~spi_more_q;
- if ( go_spi_s & (spi_more_q | spi_cs_ready) )
- begin
- // Starting new transaction
- spi_cmd_ctr <= { cmdlen, 3'b0 };
- spi_data_ctr <= { datalen, 5'b0 };
- spi_active <= 1'b1;
- spi_cs_n <= 1'b0;
- spi_more_q <= spi_more;
- spi_out_shr <= romcmd;
- end
- end // if (~spi_active)
- else
- begin
- spi_cs_n <= 1'b0;
- if ( ~|{spi_data_ctr, spi_cmd_ctr} )
- begin
- // Transaction completed. Note: CS# needs to remain
- // asserted for at least one more cycle in case of read.
- spi_clk_en <= 1'b0;
- spi_mosi_en <= 1'b1;
- spi_active <= 1'b0;
- end
- else
- begin
- // This will block unnecessarily if the DMA queue
- // is full from a previous transaction, but that doesn't
- // matter in practice... just let it drain.
- spi_clk_en <= dma_queue_space;
- spi_mosi_en <= ~spi_dual | |spi_cmd_ctr;
- if ( spi_clk_en & ~|spi_cmd_ctr )
- spi_in_req <= spi_data_ctr[-3] | spi_dual;
- if ( spi_clk_en | ~spi_active_q )
- begin
- // This is either the kickoff cycle or advancing
- if ( ~|spi_cmd_ctr )
- spi_data_ctr <= spi_data_ctr - (1'b1 << spi_dual);
- else
- spi_cmd_ctr <= spi_cmd_ctr - 1'b1;
- end // if ( spi_clk_en )
- end // else: !if( ~|{spi_data_ctr, spi_cmd_ctr} )
- end // else: !if(~spi_active)
- end // else: !if(~rst_n)
- //
- // Input I/O: latch on the positive spi_clk, which is the
- // negative rom_clk.
- //
- always @(negedge rom_clk)
- spi_in_q <= spi_io;
- //
- // Output I/O: changed on the negative spi_clk, which is the
- // positive rom_clk (thus matching when these registers are set.)
- //
- assign spi_io[0] = spi_mosi_en ? spi_out_shr[31] : 1'bz;
- assign spi_io[1] = 1'bz;
- //
- // SPI_SCK output buffer: emit a spi_clk clock pulse if spi_clk_en
- // is high; note that this is phase-inverted versus the internal
- // rom_clk, and that this is sampled one full rom_clk before
- // output, so
- //
- ddio_out spi_clk_buf (
- .aclr ( ~rst_n ),
- .datain_h ( 1'b0 ),
- .datain_l ( spi_clk_en ),
- .outclock ( rom_clk ),
- .dataout ( spi_sck )
- );
- endmodule // spirom
|