Преглед на файлове

vjtag: allow both SRAM and DRAM to be accessed over VJTAG

Now both SRAM and DRAM can be accessed over VJTAG. Unbreak even more
phase problems in the VJTAG state machine...
H. Peter Anvin преди 3 години
родител
ревизия
e1c53dfb56

+ 80 - 18
fpga/fast_mem.sv

@@ -4,25 +4,87 @@
 //
 
 module fast_mem
+  #(
+    parameter integer words_lg2,
+    parameter data_file
+    )
    (
-    input	  rst_n,
-    input	  clk,
-    input	  write,
-    input	  read,
-    input [3:0]   wstrb,
-    input [12:0]  addr,
-    input [31:0]  wdata,
-    output [31:0] rdata
+    input 		  rst_n,
+    input 		  clk,
+
+    input 		  write0,
+    input 		  read0,
+    input [3:0] 	  wstrb0,
+    input [words_lg2-1:0] addr0,
+    input [31:0] 	  wdata0,
+    output [31:0] 	  rdata0,
+
+    input 		  write1,
+    input 		  read1,
+    input [3:0] 	  wstrb1,
+    input [words_lg2-1:0] addr1,
+    input [31:0] 	  wdata1,
+    output [31:0] 	  rdata1
     );
 
-   fastmem_ip ip (
-		  .aclr ( ~rst_n ),
-		  .address ( addr ),
-		  .byteena ( wstrb ),
-		  .clock ( clk ),
-		  .data ( wdata ),
-		  .rden ( 1'b1 ), // Slows down too much to modulate
-		  .wren ( write ),
-		  .q ( rdata )
-		  );
+   altsyncram ip (
+		  .aclr0 ( 1'b0 ),
+		  .clock0 ( clk ),
+
+		  .address_a ( addr0 ),
+		  .byteena_a ( wstrb0 ),
+		  .data_a ( wdata0 ),
+		  .rden_a  ( read0 ),
+		  .wren_a  ( write0 ),
+		  .q_a ( rdata0 ),
+
+		  .address_b ( addr1 ),
+		  .byteena_b ( wstrb1 ),
+		  .data_b ( wdata1 ),
+		  .rden_b  ( read1 ),
+		  .wren_b  ( write1 ),
+		  .q_b ( rdata1 ),
+
+		  // Unused signals
+		  .aclr1 (1'b0),
+		  .addressstall_a (1'b0),
+		  .addressstall_b (1'b0),
+		  .clock1 (1'b1),
+		  .clocken0 (1'b1),
+		  .clocken1 (1'b1),
+		  .clocken2 (1'b1),
+		  .clocken3 (1'b1),
+		  .eccstatus ());
+
+	defparam
+		ip.address_reg_b = "CLOCK0",
+		ip.byteena_reg_b = "CLOCK0",
+		ip.byte_size = 8,
+		ip.clock_enable_input_a = "BYPASS",
+		ip.clock_enable_input_b = "BYPASS",
+		ip.clock_enable_output_a = "BYPASS",
+		ip.clock_enable_output_b = "BYPASS",
+		ip.indata_reg_b = "CLOCK0",
+		ip.init_file = data_file,
+		ip.intended_device_family = "Cyclone IV E",
+		ip.lpm_type = "altsyncram",
+		ip.numwords_a = 1 << words_lg2,
+		ip.numwords_b = 1 << words_lg2,
+		ip.operation_mode = "BIDIR_DUAL_PORT",
+		ip.outdata_aclr_a = "CLEAR0",
+		ip.outdata_aclr_b = "CLEAR0",
+		ip.outdata_reg_a = "UNREGISTERED",
+		ip.outdata_reg_b = "UNREGISTERED",
+		ip.power_up_uninitialized = "FALSE",
+		ip.read_during_write_mode_mixed_ports = "OLD_DATA",
+		ip.read_during_write_mode_port_a = "OLD_DATA",
+		ip.read_during_write_mode_port_b = "OLD_DATA",
+		ip.widthad_a = words_lg2,
+		ip.widthad_b = words_lg2,
+		ip.width_a = 32,
+		ip.width_b = 32,
+		ip.width_byteena_a = 4,
+		ip.width_byteena_b = 4,
+		ip.wrcontrol_wraddress_reg_b = "CLOCK0";
+
 endmodule // fast_mem

+ 0 - 6
fpga/ip/fastmem_ip.qip

@@ -1,6 +0,0 @@
-set_global_assignment -name IP_TOOL_NAME "RAM: 1-PORT"
-set_global_assignment -name IP_TOOL_VERSION "21.1"
-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fastmem_ip.v"]
-set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "fastmem_ip_inst.v"]
-set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "fastmem_ip_bb.v"]

+ 0 - 192
fpga/ip/fastmem_ip.v

@@ -1,192 +0,0 @@
-// megafunction wizard: %RAM: 1-PORT%
-// GENERATION: STANDARD
-// VERSION: WM1.0
-// MODULE: altsyncram 
-
-// ============================================================
-// File Name: fastmem_ip.v
-// Megafunction Name(s):
-// 			altsyncram
-//
-// Simulation Library Files(s):
-// 			altera_mf
-// ============================================================
-// ************************************************************
-// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
-//
-// 21.1.0 Build 842 10/21/2021 SJ Lite Edition
-// ************************************************************
-
-
-//Copyright (C) 2021  Intel Corporation. All rights reserved.
-//Your use of Intel Corporation's design tools, logic functions 
-//and other software and tools, and any partner logic 
-//functions, and any output files from any of the foregoing 
-//(including device programming or simulation files), and any 
-//associated documentation or information are expressly subject 
-//to the terms and conditions of the Intel Program License 
-//Subscription Agreement, the Intel Quartus Prime License Agreement,
-//the Intel FPGA IP License Agreement, or other applicable license
-//agreement, including, without limitation, that your use is for
-//the sole purpose of programming logic devices manufactured by
-//Intel and sold by Intel or its authorized distributors.  Please
-//refer to the applicable agreement for further details, at
-//https://fpgasoftware.intel.com/eula.
-
-
-// synopsys translate_off
-`timescale 1 ps / 1 ps
-// synopsys translate_on
-module fastmem_ip (
-	aclr,
-	address,
-	byteena,
-	clock,
-	data,
-	rden,
-	wren,
-	q);
-
-	input	  aclr;
-	input	[12:0]  address;
-	input	[3:0]  byteena;
-	input	  clock;
-	input	[31:0]  data;
-	input	  rden;
-	input	  wren;
-	output	[31:0]  q;
-`ifndef ALTERA_RESERVED_QIS
-// synopsys translate_off
-`endif
-	tri0	  aclr;
-	tri1	[3:0]  byteena;
-	tri1	  clock;
-	tri1	  rden;
-`ifndef ALTERA_RESERVED_QIS
-// synopsys translate_on
-`endif
-
-	wire [31:0] sub_wire0;
-	wire [31:0] q = sub_wire0[31:0];
-
-	altsyncram	altsyncram_component (
-				.aclr0 (aclr),
-				.address_a (address),
-				.byteena_a (byteena),
-				.clock0 (clock),
-				.data_a (data),
-				.rden_a (rden),
-				.wren_a (wren),
-				.q_a (sub_wire0),
-				.aclr1 (1'b0),
-				.address_b (1'b1),
-				.addressstall_a (1'b0),
-				.addressstall_b (1'b0),
-				.byteena_b (1'b1),
-				.clock1 (1'b1),
-				.clocken0 (1'b1),
-				.clocken1 (1'b1),
-				.clocken2 (1'b1),
-				.clocken3 (1'b1),
-				.data_b (1'b1),
-				.eccstatus (),
-				.q_b (),
-				.rden_b (1'b1),
-				.wren_b (1'b0));
-	defparam
-		altsyncram_component.byte_size = 8,
-		altsyncram_component.clock_enable_input_a = "BYPASS",
-		altsyncram_component.clock_enable_output_a = "BYPASS",
-		altsyncram_component.init_file = "output/sram.mif",
-		altsyncram_component.intended_device_family = "Cyclone IV E",
-		altsyncram_component.lpm_hint = "ENABLE_RUNTIME_MOD=YES,INSTANCE_NAME=SRAM",
-		altsyncram_component.lpm_type = "altsyncram",
-		altsyncram_component.numwords_a = 8192,
-		altsyncram_component.operation_mode = "SINGLE_PORT",
-		altsyncram_component.outdata_aclr_a = "CLEAR0",
-		altsyncram_component.outdata_reg_a = "UNREGISTERED",
-		altsyncram_component.power_up_uninitialized = "FALSE",
-		altsyncram_component.read_during_write_mode_port_a = "DONT_CARE",
-		altsyncram_component.widthad_a = 13,
-		altsyncram_component.width_a = 32,
-		altsyncram_component.width_byteena_a = 4;
-
-
-endmodule
-
-// ============================================================
-// CNX file retrieval info
-// ============================================================
-// Retrieval info: PRIVATE: ADDRESSSTALL_A NUMERIC "0"
-// Retrieval info: PRIVATE: AclrAddr NUMERIC "0"
-// Retrieval info: PRIVATE: AclrByte NUMERIC "0"
-// Retrieval info: PRIVATE: AclrData NUMERIC "0"
-// Retrieval info: PRIVATE: AclrOutput NUMERIC "1"
-// Retrieval info: PRIVATE: BYTE_ENABLE NUMERIC "1"
-// Retrieval info: PRIVATE: BYTE_SIZE NUMERIC "8"
-// Retrieval info: PRIVATE: BlankMemory NUMERIC "0"
-// Retrieval info: PRIVATE: CLOCK_ENABLE_INPUT_A NUMERIC "0"
-// Retrieval info: PRIVATE: CLOCK_ENABLE_OUTPUT_A NUMERIC "0"
-// Retrieval info: PRIVATE: Clken NUMERIC "0"
-// Retrieval info: PRIVATE: DataBusSeparated NUMERIC "1"
-// Retrieval info: PRIVATE: IMPLEMENT_IN_LES NUMERIC "0"
-// Retrieval info: PRIVATE: INIT_FILE_LAYOUT STRING "PORT_A"
-// Retrieval info: PRIVATE: INIT_TO_SIM_X NUMERIC "0"
-// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
-// Retrieval info: PRIVATE: JTAG_ENABLED NUMERIC "1"
-// Retrieval info: PRIVATE: JTAG_ID STRING "SRAM"
-// Retrieval info: PRIVATE: MAXIMUM_DEPTH NUMERIC "0"
-// Retrieval info: PRIVATE: MIFfilename STRING "output/sram.mif"
-// Retrieval info: PRIVATE: NUMWORDS_A NUMERIC "8192"
-// Retrieval info: PRIVATE: RAM_BLOCK_TYPE NUMERIC "0"
-// Retrieval info: PRIVATE: READ_DURING_WRITE_MODE_PORT_A NUMERIC "2"
-// Retrieval info: PRIVATE: RegAddr NUMERIC "1"
-// Retrieval info: PRIVATE: RegData NUMERIC "1"
-// Retrieval info: PRIVATE: RegOutput NUMERIC "0"
-// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
-// Retrieval info: PRIVATE: SingleClock NUMERIC "1"
-// Retrieval info: PRIVATE: UseDQRAM NUMERIC "1"
-// Retrieval info: PRIVATE: WRCONTROL_ACLR_A NUMERIC "0"
-// Retrieval info: PRIVATE: WidthAddr NUMERIC "13"
-// Retrieval info: PRIVATE: WidthData NUMERIC "32"
-// Retrieval info: PRIVATE: rden NUMERIC "1"
-// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
-// Retrieval info: CONSTANT: BYTE_SIZE NUMERIC "8"
-// Retrieval info: CONSTANT: CLOCK_ENABLE_INPUT_A STRING "BYPASS"
-// Retrieval info: CONSTANT: CLOCK_ENABLE_OUTPUT_A STRING "BYPASS"
-// Retrieval info: CONSTANT: INIT_FILE STRING "output/sram.mif"
-// Retrieval info: CONSTANT: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
-// Retrieval info: CONSTANT: LPM_HINT STRING "ENABLE_RUNTIME_MOD=YES,INSTANCE_NAME=SRAM"
-// Retrieval info: CONSTANT: LPM_TYPE STRING "altsyncram"
-// Retrieval info: CONSTANT: NUMWORDS_A NUMERIC "8192"
-// Retrieval info: CONSTANT: OPERATION_MODE STRING "SINGLE_PORT"
-// Retrieval info: CONSTANT: OUTDATA_ACLR_A STRING "CLEAR0"
-// Retrieval info: CONSTANT: OUTDATA_REG_A STRING "UNREGISTERED"
-// Retrieval info: CONSTANT: POWER_UP_UNINITIALIZED STRING "FALSE"
-// Retrieval info: CONSTANT: READ_DURING_WRITE_MODE_PORT_A STRING "DONT_CARE"
-// Retrieval info: CONSTANT: WIDTHAD_A NUMERIC "13"
-// Retrieval info: CONSTANT: WIDTH_A NUMERIC "32"
-// Retrieval info: CONSTANT: WIDTH_BYTEENA_A NUMERIC "4"
-// Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT GND "aclr"
-// Retrieval info: USED_PORT: address 0 0 13 0 INPUT NODEFVAL "address[12..0]"
-// Retrieval info: USED_PORT: byteena 0 0 4 0 INPUT VCC "byteena[3..0]"
-// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT VCC "clock"
-// Retrieval info: USED_PORT: data 0 0 32 0 INPUT NODEFVAL "data[31..0]"
-// Retrieval info: USED_PORT: q 0 0 32 0 OUTPUT NODEFVAL "q[31..0]"
-// Retrieval info: USED_PORT: rden 0 0 0 0 INPUT VCC "rden"
-// Retrieval info: USED_PORT: wren 0 0 0 0 INPUT NODEFVAL "wren"
-// Retrieval info: CONNECT: @aclr0 0 0 0 0 aclr 0 0 0 0
-// Retrieval info: CONNECT: @address_a 0 0 13 0 address 0 0 13 0
-// Retrieval info: CONNECT: @byteena_a 0 0 4 0 byteena 0 0 4 0
-// Retrieval info: CONNECT: @clock0 0 0 0 0 clock 0 0 0 0
-// Retrieval info: CONNECT: @data_a 0 0 32 0 data 0 0 32 0
-// Retrieval info: CONNECT: @rden_a 0 0 0 0 rden 0 0 0 0
-// Retrieval info: CONNECT: @wren_a 0 0 0 0 wren 0 0 0 0
-// Retrieval info: CONNECT: q 0 0 32 0 @q_a 0 0 32 0
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip.v TRUE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip.inc FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip.cmp FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip.bsf FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip_inst.v TRUE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fastmem_ip_bb.v TRUE
-// Retrieval info: LIB_FILE: altera_mf

+ 3 - 3
fpga/max80.qpf

@@ -19,16 +19,16 @@
 #
 # Quartus Prime
 # Version 21.1.0 Build 842 10/21/2021 SJ Lite Edition
-# Date created = 18:47:04  February 09, 2022
+# Date created = 00:52:55  February 10, 2022
 #
 # -------------------------------------------------------------------------- #
 
 QUARTUS_VERSION = "21.1"
-DATE = "18:47:04  February 09, 2022"
+DATE = "00:52:55  February 10, 2022"
 
 # Revisions
 
 PROJECT_REVISION = "v1"
-PROJECT_REVISION = "v2"
 PROJECT_REVISION = "v2boot"
+PROJECT_REVISION = "v2"
 PROJECT_REVISION = "v2alt"

+ 31 - 11
fpga/max80.sv

@@ -460,7 +460,6 @@ module max80
 	   );
 
    // Embedded RISC-V CPU
-   localparam cpu_fast_mem_bits = SRAM_BITS-2; /* 2^[this] * 4 bytes */
 
    // Edge-triggered IRQs. picorv32 latches interrupts
    // but doesn't edge detect for a slow signal, so do it
@@ -516,7 +515,7 @@ module max80
 	      .MASKED_IRQ ( irq_masked ),
 	      .LATCHED_IRQ ( 32'h0000_0007 ),
 	      .REGS_INIT_ZERO ( 1 ),
-	      .STACKADDR ( 32'h4 << cpu_fast_mem_bits )
+	      .STACKADDR ( 1'b1 << SRAM_BITS )
    ) cpu (
 	.clk ( sys_clk ),
 	.resetn ( rst_n ),
@@ -561,16 +560,31 @@ module max80
    //
    wire [31:0] fast_mem_rdata;
 
-   fast_mem // #(.bits(cpu_fast_mem_bits), .mif("../rv32/boot"))
+   wire [SRAM_BITS-1:2] vjtag_sram_addr;
+   wire 		vjtag_sram_read;
+   wire 		vjtag_sram_write;
+   wire [31:0] 		vjtag_sram_rdata;
+   wire [31:0] 		vjtag_sram_wdata;
+
+   fast_mem #(.words_lg2(SRAM_BITS-2),
+	      .data_file("output/sram.mif"))
    fast_mem(
 	    .rst_n ( rst_n ),
 	    .clk   ( sys_clk ),
-	    .read  ( cpu_la_read  & cpu_la_addr[31:30] == 2'b00 ),
-	    .write ( cpu_la_write & cpu_la_addr[31:30] == 2'b00 ),
-	    .wstrb ( cpu_la_wstrb ),
-	    .addr  ( cpu_la_addr[14:2] ),
-	    .wdata ( cpu_la_wdata ),
-	    .rdata ( fast_mem_rdata )
+
+	    .read0  ( 1'b1 ), // cpu_la_read  & cpu_la_addr[31:30] == 2'b00
+	    .write0 ( cpu_la_write & cpu_la_addr[31:30] == 2'b00 ),
+	    .wstrb0 ( cpu_la_wstrb ),
+	    .addr0  ( cpu_la_addr[SRAM_BITS-1:2] ),
+	    .wdata0 ( cpu_la_wdata ),
+	    .rdata0 ( fast_mem_rdata ),
+
+	    .read1  ( 1'b1 ), // vjtag_sram_read
+	    .write1 ( vjtag_sram_write ),
+	    .wstrb1 ( 4'b1111 ),
+	    .addr1  ( vjtag_sram_addr ),
+	    .wdata1 ( vjtag_sram_wdata ),
+	    .rdata1 ( vjtag_sram_rdata )
 	    );
 
    // Register I/O data to reduce the size of the read data MUX
@@ -891,10 +905,12 @@ module max80
    wire        vjtag_cpu_halt;
 
    vjtag_max80 #(.sdram_base_addr(SDRAM_ADDR),
-		 .sdram_bits(SDRAM_BITS))
+		 .sdram_bits(SDRAM_BITS),
+		 .sram_bits(SRAM_BITS))
    vjtag (
 	  .rst_n	( rst_n ),
 	  .sys_clk      ( sys_clk ),
+	  .reset_cmd    ( vjtag_reset_cmd ),
 
 	  .sdram	( sr_bus[2].dstr ),
 
@@ -906,7 +922,11 @@ module max80
 	  .cpu_irq      ( iodev_irq_vjtag ),
 	  .cpu_halt     ( vjtag_cpu_halt ),
 
-	  .reset_cmd    ( vjtag_reset_cmd )
+	  .sram_addr    ( vjtag_sram_addr ),
+	  .sram_rdata   ( vjtag_sram_rdata ),
+	  .sram_wdata   ( vjtag_sram_wdata ),
+	  .sram_read    ( vjtag_sram_read ),
+	  .sram_write   ( vjtag_sram_write )
 	  );
 
    assign cpu_halt = vjtag_cpu_halt;

+ 6 - 6
fpga/output/sram.mif

@@ -1747,12 +1747,12 @@ CONTENT BEGIN
 06CC : 6362612F;
 06CD : 6B736964;
 06CE : 3030382E;
-06CF : 6557002F;
-06D0 : 65462064;
-06D1 : 39202062;
-06D2 : 3A383120;
-06D3 : 333A3434;
-06D4 : 53502030;
+06CF : 6854002F;
+06D0 : 65462075;
+06D1 : 30312062;
+06D2 : 3A303020;
+06D3 : 343A3934;
+06D4 : 53502037;
 06D5 : 30322054;
 06D6 : 003232;
 [06D7..1FFF] : 00;

BIN
fpga/output/v1.jic


BIN
fpga/output/v1.rbf.gz


BIN
fpga/output/v1.rpd.gz


BIN
fpga/output/v1.sof


BIN
fpga/output/v1.svf.gz


BIN
fpga/output/v1.xsvf.gz


BIN
fpga/output/v2.jic


BIN
fpga/output/v2.rbf.gz


BIN
fpga/output/v2.rpd.gz


BIN
fpga/output/v2.sof


BIN
fpga/output/v2.svf.gz


BIN
fpga/output/v2.xsvf.gz


BIN
fpga/output/v2boot.rbf.gz


BIN
fpga/output/v2boot.sof


BIN
fpga/output/v2boot.svf.gz


BIN
fpga/output/v2boot.xsvf.gz


+ 1 - 1
fpga/v2.qsf

@@ -1,7 +1,7 @@
 # -*- tcl -*-
 
 set_global_assignment -name TOP_LEVEL_ENTITY v2
-set_global_assignment -name SOURCE_FILE "output/v2.jic.cof"
+set_global_assignment -name SOURCE_FILE output/v2.jic.cof
 set_global_assignment -name SYSTEMVERILOG_FILE v2.sv
 
 set_global_assignment -name SOURCE_TCL_SCRIPT_FILE v2_common.qsf

+ 136 - 98
fpga/vjtag_max80.sv

@@ -38,28 +38,36 @@
 //
 module vjtag_max80
 #(
+  parameter        sram_bits,
   parameter [31:0] sdram_base_addr,
   parameter        sdram_bits,
   parameter [31:0] VJTAG_WRITE_PREFIX = 32'hABC80FED
 ) (
-   input	 rst_n,
-   input	 sys_clk,
+   input		  rst_n,
+   input		  sys_clk,
 
-   dram_bus.dstr sdram,
+   output		  reset_cmd,
 
-   input	 cpu_valid,
-   input   [6:2] cpu_addr,
-   input  [31:0] cpu_wdata,
-   input  [ 3:0] cpu_wstrb,
-   output [31:0] cpu_rdata,
-   output	 cpu_irq,
-   output	 cpu_halt,
+   input		  cpu_valid,
+   input [6:2]		  cpu_addr,
+   input [31:0]		  cpu_wdata,
+   input [ 3:0]		  cpu_wstrb,
+   output [31:0]	  cpu_rdata,
+   output		  cpu_irq,
+   output		  cpu_halt,
 
-   output reg	 reset_cmd
-		);
+   dram_bus.dstr          sdram,
+
+   // SRAM interface
+   output [sram_bits-1:2] sram_addr,
+   input [31:0]		  sram_rdata,
+   output [31:0]	  sram_wdata,
+   output		  sram_read,
+   output		  sram_write
+   );
 
    wire		  v_tdi;
-   wire		  v_tdo;
+   reg		  v_tdo;
    wire [4:0]	  v_ir;
    wire		  v_tck;
    wire		  v_st_cdr;
@@ -104,17 +112,13 @@ module vjtag_max80
    localparam cmd_cpustatus  = 4'b1111;
 
    reg	       jtag_bypass;
-   reg  [31:0] jtag_shr;
+   wire        jtag_out;
    reg	       tdi_s;
+   wire	       tdo_s;
 
    // Latched information for use in the synchronous state machine
    reg [3:0]   ir_cmd;		// Command part of IR
    reg	       ir_ro;		// Readonly (update suppress)
-   reg	       st_cdr_s;
-   reg	       st_sdr_s;
-   reg	       st_xdr_s;	// Any state between CDR and UDR, exclusively
-   reg	       st_udr_s;
-   reg	       st_uir_s;
 
    always @(posedge v_tck)
      begin
@@ -122,20 +126,15 @@ module vjtag_max80
 	tdi_s       <= v_tdi;
 	ir_cmd      <= v_ir[4:1];
 	ir_ro       <= v_ir[0];
-	st_cdr_s    <= v_st_cdr;
-	st_sdr_s    <= v_st_sdr;
-	st_xdr_s    <= v_st_sdr|v_st_e1dr|v_st_e2dr|v_st_pdr;
-	st_udr_s    <= v_st_udr;
-	st_uir_s    <= v_st_uir;
      end
 
-   assign v_tdo = jtag_bypass ? tdi_s : jtag_shr[0];
+   assign v_tdo = jtag_bypass ? tdi_s : tdo_s;
 
    // Sync incoming JTAG signals. Only tck needs an actual
    // synchronizer; the rest just need holding registers (see above)
    // as the delay of tck will guarantee the others are stable.
    wire       tck_s;
-   synchronizer #(.width(1)) tck_sync
+   synchronizer #(.width(1), .stages(3)) tck_sync
      (
       .rst_n ( rst_n ),
       .clk   ( sys_clk ),
@@ -144,13 +143,17 @@ module vjtag_max80
       );
 
    // Mask of memaddr bits that are not settable: the top bits
-   // and the bottom two bits (byte within dword)
-   localparam [31:0] memaddr_mask = (1'b1 << sdram_bits) - 3'b100;
+   // and the bottom two bits (byte within dword), and the
+   // sram/dram select bit
+   localparam [31:0] memaddr_mask = ((1'b1 << sdram_bits) - 3'b100)
+     | sdram_base_addr;
 
    function logic [31:0] maskaddr (input [31:0] addr);
-      maskaddr = (addr & memaddr_mask) | sdram_base_addr;
+      maskaddr = addr & memaddr_mask;
    endfunction
 
+   wire is_dram = |(mem_addr & sdram_base_addr); // Really just one bit
+
    reg	      jtag_cpu_irq   = 1'b0;
    reg	      jtag_cpu_halt  = 1'b0;
    reg	      jtag_reset_cmd = 1'b0;
@@ -168,13 +171,15 @@ module vjtag_max80
    reg	       mem_write;
    reg  [31:0] mem_addr;
    wire [31:0] mem_addr_next = maskaddr(mem_addr + 3'h4);
-   wire [31:0] mem_rdata;
+   wire [31:0] sdram_rdata;
    reg  [31:0] mem_wdata;
-   wire        mem_ready;
+   wire        sdram_ready;
+   reg	       sram_ready;
    reg	       mem_done;
    reg	       mem_error;	// Memory underrun
    reg	       advance_mem_addr;
    reg	       mem_header_done;
+   reg 	       mem_do_write;
 
    reg	       tck_q;
    reg	       tck_stb;
@@ -184,14 +189,18 @@ module vjtag_max80
 	tck_stb <= tck_s & ~tck_q;
      end
 
+
    // Keep a counter to keep track of SDRAM data bit count; this is to
    // allow streaming of data to/from SDRAM without leaving the
    // SDR state.
    reg [4:0] sdr_ctr;
 
-   wire [31:0] jtag_shr_in =
-	       ir_cmd[3] ? { tdi_s : jtag_shr[31:1] } :
+   // Main data shift register.
+   reg  [31:0] jtag_shr;
+   wire [31:0] jtag_shr_in = ir_cmd[3]
+	       ? { tdi_s, jtag_shr[31:1] } :
 	       { 30'bx, tdi_s, jtag_shr[1] };
+   assign tdo_s = jtag_shr[0];
 
    always @(posedge sys_clk)
      begin
@@ -202,22 +211,30 @@ module vjtag_max80
 
 	if ( tck_stb )
 	  begin
-	     if ( st_sdr_s )
+	     if ( v_st_sdr )
 	       jtag_shr <= jtag_shr_in;
 
-	     if ( st_cdr_s )
+	     if ( v_st_cdr )
 	       case ( ir_cmd )
-		 cmd_halt:      jtag_shr[0] <= jtag_cpu_halt;
+		 cmd_halt: begin
+		    jtag_shr[0] <= jtag_cpu_halt;
+		 end
 		 cmd_memerr: begin
 		    jtag_shr[0] <= mem_error;
 		    if ( ~ir_ro ) mem_error <= 1'b0;
 		 end
-		 cmd_mem0, cmd_memaddr, cmd_memread, cmd_memwrite:
-		   jtag_shr <= jtag_memaddr;
-		 cmd_cpucmd, cmd_cpucmd_irq:
-		   jtag_shr  <= jtag_cpucmd;
-		 cmd_cpuinfo:    jtag_shr    <= jtag_cpuinfo;
-		 cmd_cpustatus:  jtag_shr    <= jtag_cpustatus;
+		 cmd_mem0, cmd_memaddr, cmd_memread, cmd_memwrite: begin
+		    jtag_shr <= jtag_memaddr;
+		 end
+		 cmd_cpucmd, cmd_cpucmd_irq: begin
+		    jtag_shr  <= jtag_cpucmd;
+		 end
+		 cmd_cpuinfo: begin
+		    jtag_shr <= jtag_cpuinfo;
+		 end
+		 cmd_cpustatus: begin
+		    jtag_shr <= jtag_cpustatus;
+		 end
 		 default:       ;
 	       endcase // case ( ir_cmd )
 
@@ -233,24 +250,23 @@ module vjtag_max80
 	     // suppress the update until we know that the user will
 	     // be reading the fetched data.
 
+	     mem_do_write <= 1'b0;
+	     
 	     if ( ir_cmd[3:1] == cmd_memwr )
 	       begin
-		  if ( st_cdr_s )
+		  if ( v_st_cdr )
 		    begin
 		       mem_done           <= 1'b0;
 		       mem_valid          <= 1'b0;
 		       mem_write          <= ir_cmd[0];
 		       advance_mem_addr   <= 1'b0;
-		       mem_addr           <= jtag_memaddr;
-		    end
-
-		  if ( ~st_xdr_s )
-		    begin
 		       mem_header_done    <= 1'b0;
+		       mem_addr           <= jtag_memaddr;
 		       sdr_ctr            <= 5'b0;
+		       mem_do_write       <= 1'b0;
 		    end
 
-		  if ( st_sdr_s )
+		  if ( v_st_sdr )
 		    begin
 		       sdr_ctr <= sdr_ctr + 1'b1;
 
@@ -266,74 +282,91 @@ module vjtag_max80
 				 // Write
 				 if ( jtag_shr_in == VJTAG_WRITE_PREFIX )
 				   mem_header_done <= 1'b1;
-				 sdr_ctr <= 5'b0;
+				 else
+				   sdr_ctr <= 5'b0;
 			      end
 			 end
 
 		       // Memory access underrun?
-		       if ( &sdr_ctr )
+		       if ( sdr_ctr == 5'd31 )
 			 mem_error <= mem_error | mem_valid;
 
 		       if ( ~mem_write )
-			 // Read
-			 case ( sdr_ctr )
-			   5'd2: begin
-			      // For a read, make sure we are committed
-			      // to reading the new word
-			      if ( ~ir_ro )
-				jtag_memaddr     <= mem_addr;
-
-			      advance_mem_addr   <= mem_header_done;
-			   end
-			   5'd3:
-			     begin
-				// After mem_addr advanced
-				mem_valid          <= 1'b1;
-				mem_done           <= 1'b0;
-			     end
-			   5'd31: begin
-			      jtag_shr <= mem_rdata;
-			   end
-			   default: ;
-			 endcase // case ( sdr_ctr )
+			 begin
+			    // Read
+			    case ( sdr_ctr )
+			      5'd2: begin
+				 // For a read, make sure we are committed
+				 // to reading the new word
+				 if ( ~ir_ro )
+				   jtag_memaddr     <= mem_addr;
+				 
+				 advance_mem_addr   <= mem_header_done;
+			      end
+			      5'd3:
+				begin
+				   // After mem_addr advanced
+				   mem_valid          <= 1'b1;
+				   mem_done           <= 1'b0;
+				end
+			      5'd31: begin
+				 jtag_shr <= is_dram ? sdram_rdata : sram_rdata;
+			      end
+			      default: ;
+			    endcase // case ( sdr_ctr )
+			 end // if ( ~mem_write )
 		       else
-			 // Write
-			 if ( &sdr_ctr )
-			   begin
-			      mem_wdata          <= jtag_shr_in;
-			      mem_valid          <= 1'b1;
-			      mem_done           <= 1'b0;
-			      advance_mem_addr   <= 1'b1;
-			      if ( ~ir_ro )
-				jtag_memaddr     <= mem_addr_next;
-			   end
+			 begin
+			    // Write
+			    mem_do_write <= &sdr_ctr;
+			 end // else: !if( ~mem_write )
 		    end // if ( st_sdr_s )
 	       end // if ( ir_cmd[3:1] == cmd_memwr )
-
-	     if ( st_uir_s )
+	     
+	     if ( mem_do_write )
+	       begin
+		  mem_wdata          <= jtag_shr_in;
+		  mem_valid          <= 1'b1;
+		  mem_done           <= 1'b0;
+		  advance_mem_addr   <= 1'b1;
+		  if ( ~ir_ro )
+		    jtag_memaddr     <= mem_addr_next;
+	       end
+
+	     if ( v_st_uir )
 	       jtag_cpu_irq <= ir_cmd == cmd_irq;
-	     else if ( st_udr_s )
+	     else if ( v_st_udr )
 	       jtag_cpu_irq <= ir_cmd == cmd_cpucmd_irq;
 
-	     if ( st_uir_s )
+	     if ( v_st_uir )
 	       jtag_reset_cmd <= jtag_reset_cmd | (ir_cmd == cmd_reset);
 
-	     if ( st_udr_s & ~ir_ro )
+	     if ( v_st_udr & ~ir_ro )
 	       case ( ir_cmd )
-		 cmd_halt:    jtag_cpu_halt  <= jtag_shr[0];
-		 cmd_memaddr: jtag_memaddr   <= maskaddr(jtag_shr);
-		 cmd_cpucmd, cmd_cpucmd_irq:
-		   jtag_cpucmd    <= jtag_shr;
+		 cmd_halt: begin
+		    jtag_cpu_halt  <= jtag_shr_in[0];
+		 end
+		 cmd_memaddr: begin
+		    jtag_memaddr   <= maskaddr(jtag_shr_in);
+		 end
+		 cmd_cpucmd, cmd_cpucmd_irq: begin
+		    jtag_cpucmd    <= jtag_shr_in;
+		 end
 		 default:     /* nothing */ ;
 	       endcase // case ( ir_cmd )
 	  end // if ( tck_stb )
 
 	// Increment the temporary address register if applicable,
 	// but only after the previous transaction is done...
-	if ( mem_valid & mem_ready )
+	if ( mem_valid )
 	  begin
-	     mem_valid <= 1'b0;
-	     mem_done  <= 1'b1;
+	     if (is_dram ? sdram_ready : sram_ready)
+	       begin
+		  mem_valid <= 1'b0;
+		  mem_done  <= 1'b1;
+	       end
+	     else
+	       sram_ready <= ~is_dram;
 	  end
 	if ( advance_mem_addr & ~mem_valid )
 	  begin
@@ -347,15 +380,20 @@ module vjtag_max80
       .bus   ( sdram ),
       .prio  ( 2'd2 ),
       .addr  ( mem_addr ),
-      .valid ( mem_valid ),
+      .valid ( mem_valid & is_dram ),
       .wd    ( mem_wdata ),
       .wstrb ( {4{mem_write}} ),
-      .ready ( mem_ready ),
-      .rd    ( mem_rdata )
+      .ready ( sdram_ready ),
+      .rd    ( sdram_rdata )
       );
 
+   assign sram_addr  = mem_addr[sram_bits-1:2];
+   assign sram_wdata = mem_wdata;
+   assign sram_read  = mem_valid & ~is_dram & ~mem_write;
+   assign sram_write = mem_valid & ~is_dram &  mem_write;
+
    wire [7:0] cpustatus_new =
-	      ( tck_stb & st_cdr_s & ~ir_ro & (ir_cmd == cmd_cpustatus) )
+	      ( tck_stb & v_st_cdr & ~ir_ro & (ir_cmd == cmd_cpustatus) )
 	      ? 'b0 : jtag_cpustatus;
 
    always @(negedge rst_n or posedge sys_clk)