| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 | //// Fast data download from 2-bit SPI flash, or zero SDRAM.//// Feed a FIFO that then writes to SDRAM.// Requires writes in aligned 8-byte chunks.//// This unit does *not* require a 2x SPI clock;// it uses a DDR buffer for clock out.//module spirom (	       input		 rst_n,	       input		 rom_clk,	       input		 ram_clk,	       input		 sys_clk,	       /* SPI ROM interface */	       output		 spi_sck,	       inout [1:0]	 spi_io,	       output reg	 spi_cs_n,	       /* SDRAM interface */	       output [15:0]	 wd,    // Data to RAM	       (* syn_preserve = 1 *)	// Don't merge into FIFO	       output [24:1]	 waddr, // RAM address	       output reg [1:0]  wrq,   // Write request (min 4/8 bytes)	       input		 wacc,  // Data accepted (ready for next data)	       /* CPU control interface */	       output reg [31:0] cpu_rdata,	       input [31:0]	 cpu_wdata,	       input		 cpu_valid,	       input [3:0]	 cpu_wstrb,	       input [2:0]	 cpu_addr,	       output reg	 irq	       );   reg [24:2] ramstart;   reg [31:0] romcmd;   reg [23:2] datalen;   reg [2:0]  cmdlen;   reg	      go_spi;   reg	      is_spi;   reg	      go_ram;   reg	      is_ram;   reg	      spi_dual;   reg	      spi_more;		// Do not raise CS# after command done   reg	      ram_done;   reg	      ram_done_q;   reg [1:0]  cpu_wr_q;   reg [31:0] spi_in_shr;	// Input shift register for one-bit input   wire       spi_active_s;   wire       cpu_wr_w = cpu_valid & cpu_wstrb[0];   always @(negedge rst_n or posedge ram_clk)     if (~rst_n)       begin	  ramstart     <= 23'b0;	  romcmd       <= 32'b0;	  datalen      <= 22'b0;	  cmdlen       <= 3'b0;	  go_spi       <= 1'b0;	  is_spi       <= 1'b0;	  go_ram       <= 1'b0;	  is_ram       <= 1'b0;	  ram_done_q   <= 1'b1;	  irq          <= 1'b1;	  spi_dual     <= 1'b0;	  spi_more     <= 1'b0;	  cpu_wr_q     <= 2'b0;       end     else       begin	  ram_done_q <= ram_done;	  if (~ram_done_q)	    go_ram <= 1'b0;	  if (spi_active_s)	    go_spi <= 1'b0;	  if (ram_done_q & ~go_ram & ~spi_active_s & ~go_spi)	    irq     <= 1'b1;	  // Don't allow writing unless the unit is idle (IRQ = 1)	  // Delay the recognition of the write by one ram_clk	  // cycle (so it is recognized on the second half of the	  // corresponding sys_clk cycle) to relax timings; this	  // is not performance critical at all.	  cpu_wr_q <= { cpu_wr_q[0], cpu_wr_w & irq };	  if (cpu_wr_q == 2'b01)	    begin	       // Only full word accesses supported via DMA!!	       case (cpu_addr)		 2'b00: begin		    ramstart <= cpu_wdata[24:2];		 end		 2'b01: begin		    romcmd   <= cpu_wdata[31:0];		 end		 2'b10: begin		    datalen     <= cpu_wdata[23:2];		    cmdlen      <= cpu_wdata[26:24];		    go_spi      <= cpu_wdata[26:24] != 3'd0;		    is_spi      <= cpu_wdata[26:24] != 3'd0;		    spi_dual    <= cpu_wdata[27];		    spi_more    <= cpu_wdata[28];		    is_ram      <= cpu_wdata[29];		    go_ram      <= cpu_wdata[29];		    irq         <= 1'b0;		 end		 default: begin		    // Do nothing		 end	       endcase // case (cpu_addr)	    end // if (cpu_valid & cpu_wstrb[0])       end // else: !if(~rst_n)   always_comb     case (cpu_addr)       3'b000:  cpu_rdata = { 7'b0, ramstart, 2'b0 };       3'b001:  cpu_rdata = romcmd;       3'b010:  cpu_rdata = { 2'b0, is_ram, spi_more, spi_dual,			      cmdlen, datalen,  2'b0 };       3'b011:  cpu_rdata = { 31'b0, irq };       3'b100:  cpu_rdata = spi_in_shr;       default: cpu_rdata = 32'bx;     endcase // case (cpu_addr)   //   // FIFO and input latches   //   reg [1:0]		  spi_in_q;   reg			  spi_ram_in_req;   wire [11:0]		  wrusedw;   wire [8:0]		  rdusedw;   wire [15:0]		  fifo_out;   wire [1:0]		  spi_in_data;   assign spi_in_data[0] = spi_dual ? spi_in_q[0] : spi_in_q[1];   assign spi_in_data[1] = spi_dual ? spi_in_q[1] : spi_in_shr[0];   ddufifo spirom_fifo (			.aclr ( ~rst_n ),			.wrclk ( rom_clk ),			.data ( spi_in_data ),			.wrreq ( spi_ram_in_req ),			.wrusedw ( wrusedw ),			.rdclk ( ram_clk ),			.q ( fifo_out ),			.rdreq ( wacc & is_spi ),			.rdusedw ( rdusedw )			);   //   // Interfacing between FIFO and input signals   //   // Shuffle fifo_out because SPI brings in data in bigendian bit   // order within bytes, but the FIFO IP assumes littleendian   //   wire [15:0]		  spi_wd;   assign spi_wd[ 7: 6] = fifo_out[ 1: 0];   assign spi_wd[ 5: 4] = fifo_out[ 3: 2];   assign spi_wd[ 3: 2] = fifo_out[ 5: 4];   assign spi_wd[ 1: 0] = fifo_out[ 7: 6];   assign spi_wd[15:14] = fifo_out[ 9: 8];   assign spi_wd[13:12] = fifo_out[11:10];   assign spi_wd[11:10] = fifo_out[13:12];   assign spi_wd[ 9: 8] = fifo_out[15:14];   reg [24:1] waddr_q;   reg [23:1] ram_data_ctr;   reg	      wacc_q;   assign waddr = waddr_q;   assign wd = is_spi ? spi_wd : 16'h0000;   always @(negedge rst_n or posedge ram_clk)     if (~rst_n)       begin	  waddr_q      <= 24'bx;	  ram_data_ctr <= 23'b0;	  wacc_q       <= 1'b0;	  wrq          <= 2'b00;	  ram_done     <= 1'b1;       end     else       begin	  wacc_q       <= wacc;	  if (|ram_data_ctr)	    begin	       ram_done <= 1'b0;	       if (is_spi)		 begin		    // Reading from SPI ROM		    wrq[0] <= rdusedw >=  9'd4; // 4*2 =  8 bytes min available		    wrq[1] <= rdusedw >=  9'd8; // 8*2 = 16 bytes min available		 end	       else		 begin		    // Zeroing memory		    wrq[0] <= |ram_data_ctr[23:3];		    wrq[1] <= |ram_data_ctr[23:4];		 end	       waddr_q      <= waddr_q      + wacc_q;	       ram_data_ctr <= ram_data_ctr - wacc_q;	    end // if (|ram_data_ctr)	  else	    begin	       wrq      <= 2'b00;	       ram_done <= 1'b1;	       if (go_ram)		 begin		    waddr_q      <= { ramstart, 1'b0 };		    ram_data_ctr <= { datalen,  1'b0 };		    ram_done     <= 1'b0;		 end	    end       end // else: !if(~rst_n)   // Negative indicies refer to fractional bytes   reg [2:-3]  spi_cmd_ctr;   reg [23:-3] spi_data_ctr;   reg	       spi_clk_en;   reg [1:0]   spi_clk_en_q;   reg	       spi_mosi_en;   reg [1:0]   go_spi_q;   wire        go_spi_s;   reg	       spi_more_q;   reg	       spi_active;   reg	       spi_active_q;   reg [31:0]  spi_out_shr;   reg	       spi_in_req;   reg	       spi_in_req_q;   // Wait these many bit times between CS# high and the next CS# high   // (tSHSL). The worst of these is tSHSL2 = 50 ns = 8 cycles @ 134 MHz.   localparam spi_cs_wait_lg2 = 3;   reg [spi_cs_wait_lg2:0] spi_cs_ctr;   wire 		   spi_cs_ready = spi_cs_ctr[spi_cs_wait_lg2];   // Explicit synchronizers for handshake signals   synchronizer #(.width(1)) go_spi_synchro     (      .rst_n ( rst_n ),      .clk ( rom_clk ),      .d ( go_spi ),      .q ( go_spi_s )      );   synchronizer #(.width(1)) spi_active_synchro     (      .rst_n ( rst_n ),      .clk ( ram_clk ),      .d ( spi_active ),      .q ( spi_active_s )      );   // 64/4 = 16 bytes min space   wire dma_queue_space = (~wrusedw) >= 12'd128;   always @(negedge rst_n or posedge rom_clk)     if (~rst_n)       begin	  spi_cmd_ctr  <= 6'b0;	  spi_clk_en   <= 1'b0;	  spi_clk_en_q <= 'b0;	  spi_data_ctr <= 27'b0;	  spi_cs_n     <= 1'b1;	  spi_cs_ctr   <= 'b0;	  spi_in_req   <= 1'b0;	  spi_in_req_q <= 1'b0;	  spi_ram_in_req <= 1'b0;	  spi_mosi_en  <= 1'b1;	  spi_in_shr   <= 32'b0;	  spi_active   <= 1'b0;	  spi_active_q <= 1'b0;	  spi_more_q   <= 1'b0;	  spi_out_shr  <= 32'b0;       end     else       begin	  // Fun with long pipelined chains of registers...	  spi_in_req     <= 1'b0;	  spi_in_req_q   <= spi_in_req;	  spi_ram_in_req <= spi_in_req_q & is_ram;	  spi_clk_en     <= 1'b0;	  spi_clk_en_q   <= (spi_clk_en_q << 1'b1) | spi_clk_en;	  spi_active_q   <= spi_active;	  if ( spi_clk_en_q[1] )	    spi_in_shr <= { spi_in_shr[30:0], spi_in_q[1] };	  // Bit to start transmitting on the next clock down transition	  // This needs to be delayed by one cycle in order to match	  // the one-cycle delay imposed by the DDR output buffer	  // when spi_clk_en goes high.	  if ( spi_clk_en_q )	    spi_out_shr <= { spi_out_shr[30:0], 1'b1 };	  // tSHSL: make sure we get 8 bit times of CS# deselect between	  // commands.	  if ( ~spi_cs_n )	    spi_cs_ctr <= 'b0;	  else	    spi_cs_ctr <= spi_cs_ctr + !spi_cs_ready;	  // Note: datalen <- spi_data_ctr is a 2-cycle multipath	  if (~spi_active)	    begin	       spi_cs_n <= ~spi_more_q;	       if ( go_spi_s & (spi_more_q | spi_cs_ready) )		 begin		    // Starting new transaction		    spi_cmd_ctr  <= { cmdlen,  3'b0 };		    spi_data_ctr <= { datalen, 5'b0 };		    spi_active   <= 1'b1;		    spi_cs_n     <= 1'b0;		    spi_more_q   <= spi_more;		    spi_out_shr  <= romcmd;		 end	    end // if (~spi_active)	  else	    begin	       spi_cs_n <= 1'b0;	       if ( ~|{spi_data_ctr, spi_cmd_ctr} )		 begin		    // Transaction completed. Note: CS# needs to remain		    // asserted for at least one more cycle in case of read.		    spi_clk_en  <= 1'b0;		    spi_mosi_en <= 1'b1;		    spi_active  <= 1'b0;		 end	       else		 begin		    // This will block unnecessarily if the DMA queue		    // is full from a previous transaction, but that doesn't		    // matter in practice... just let it drain.		    spi_clk_en <= dma_queue_space;		    spi_mosi_en <= ~spi_dual | |spi_cmd_ctr;		    if ( spi_clk_en & ~|spi_cmd_ctr )		      spi_in_req <= spi_data_ctr[-3] | spi_dual;		    if ( spi_clk_en | ~spi_active_q )		      begin			 // This is either the kickoff cycle or advancing			 if ( ~|spi_cmd_ctr )			   spi_data_ctr <= spi_data_ctr - (1'b1 << spi_dual);			 else			   spi_cmd_ctr <= spi_cmd_ctr - 1'b1;		      end // if ( spi_clk_en )		 end // else: !if( ~|{spi_data_ctr, spi_cmd_ctr} )	    end // else: !if(~spi_active)       end // else: !if(~rst_n)   //   // Input I/O: latch on the positive spi_clk, which is the   // negative rom_clk.   //   always @(negedge rom_clk)     spi_in_q <= spi_io;   //   // Output I/O: changed on the negative spi_clk, which is the   // positive rom_clk (thus matching when these registers are set.)   //   assign spi_io[0] = spi_mosi_en ? spi_out_shr[31] : 1'bz;   assign spi_io[1] = 1'bz;   //   // SPI_SCK output buffer: emit a spi_clk clock pulse if spi_clk_en   // is high; note that this is phase-inverted versus the internal   // rom_clk, and that this is sampled one full rom_clk before   // output, so   //   ddio_out spi_clk_buf (			 .aclr ( ~rst_n ),			 .datain_h ( 1'b0 ),			 .datain_l ( spi_clk_en ),			 .outclock ( rom_clk ),			 .dataout ( spi_sck )			 );endmodule // spirom
 |