Browse Source

picorv32: revamp the Q registers to be a full bank switch

There really is no more cost to do a full register bank switch than
the qregs as implemented, and it really speeds up interrupts.

Something isn't right yet, though; this is a checkpoint.
H. Peter Anvin 3 years ago
parent
commit
cd15d8fd49
12 changed files with 3682 additions and 3693 deletions
  1. 3 2
      fpga/max80.sv
  2. BIN
      fpga/output_files/max80.jbc
  3. BIN
      fpga/output_files/max80.jic
  4. BIN
      fpga/output_files/max80.pof
  5. BIN
      fpga/output_files/max80.sof
  6. 177 147
      fpga/picorv32.v
  7. 2 1
      fw/Makefile
  8. 3470 3470
      fw/boot.mif
  9. 2 1
      fw/fw.h
  10. 1 3
      fw/hello.c
  11. 20 46
      fw/irq.c
  12. 7 23
      fw/picorv32.h

+ 3 - 2
fpga/max80.sv

@@ -526,8 +526,6 @@ module max80 (
 	      .ENABLE_IRQ_QREGS ( 1 ),
 	      .ENABLE_IRQ_TIMER ( 1 ),
 	      .LATCHED_IRQ ( 32'h0000_0007 ),
-	      .PROGADDR_RESET ( 32'h0000_0000 ),
-	      .PROGADDR_IRQ ( 32'h0000_0020 ),
 	      .REGS_INIT_ZERO ( 1 ),
 	      .STACKADDR ( 32'h4 << cpu_fast_mem_bits )
 	      )
@@ -537,6 +535,9 @@ module max80 (
 	.resetn ( rst_n ),
 	.trap ( ),
 
+	.progaddr_reset ( 32'h0000_0000 ),
+	.progaddr_irq   ( 32'h0000_0020 ),
+
 	.mem_instr ( cpu_mem_instr ),
 	.mem_ready ( cpu_mem_ready ),
 	.mem_valid ( cpu_mem_valid ),

BIN
fpga/output_files/max80.jbc


BIN
fpga/output_files/max80.jic


BIN
fpga/output_files/max80.pof


BIN
fpga/output_files/max80.sof


+ 177 - 147
fpga/picorv32.v

@@ -15,6 +15,18 @@
  *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
+ *  Changes by hpa 2021:
+ *  - maskirq instruction takes a mask in rs2.
+ *  - retirq opcode changed to mret; no functional change.
+ *  - qregs replaced with a full register bank switch. In general,
+ *    non-power-of-two register files don't save anything, especially in
+ *    FPGAs.
+ *  - getq and setq replaced with new instructions addqxi and addxqi
+ *    for cross-bank register accesses if needed,
+ *    e.g. for stack setup (addqxi sp,sp,frame_size).
+ *  - PROGADDR_RESET and PROGADDR_IRQ changed to ports (allows external
+ *    implementation of vectorized interrupts or fallback reset.)
+ *  - maskirq, waitirq and timer require func3 == 3'b000.
  */
 
 /* verilator lint_off WIDTH */
@@ -83,50 +95,53 @@ module picorv32 #(
 	parameter [ 0:0] REGS_INIT_ZERO = 0,
 	parameter [31:0] MASKED_IRQ = 32'h 0000_0000,
 	parameter [31:0] LATCHED_IRQ = 32'h ffff_ffff,
-	parameter [31:0] PROGADDR_RESET = 32'h 0000_0000,
-	parameter [31:0] PROGADDR_IRQ = 32'h 0000_0010,
-	parameter [31:0] STACKADDR = 32'h ffff_ffff
+	parameter [31:0] STACKADDR = 32'h ffff_ffff,
+	parameter [ 4:0] RA_IRQ_REG   = ENABLE_IRQ_QREGS ? 26 : 3,
+	parameter [ 4:0] MASK_IRQ_REG = ENABLE_IRQ_QREGS ? 27 : 4
 ) (
-	input clk, resetn,
-	output reg trap,
+	input		  clk, resetn,
+	output reg	  trap,
 
-	output reg        mem_valid,
-	output reg        mem_instr,
-	input             mem_ready,
+	input [31:0]	  progaddr_reset,
+        input [31:0]	  progaddr_irq,
+
+	output reg	  mem_valid,
+	output reg	  mem_instr,
+	input		  mem_ready,
 
 	output reg [31:0] mem_addr,
 	output reg [31:0] mem_wdata,
 	output reg [ 3:0] mem_wstrb,
-	input      [31:0] mem_rdata,
+	input [31:0]	  mem_rdata,
 
 	// Look-Ahead Interface
-	output            mem_la_read,
-	output            mem_la_write,
-	output     [31:0] mem_la_addr,
+	output		  mem_la_read,
+	output		  mem_la_write,
+	output [31:0]	  mem_la_addr,
 	output reg [31:0] mem_la_wdata,
 	output reg [ 3:0] mem_la_wstrb,
 
 	// Pico Co-Processor Interface (PCPI)
-	output reg        pcpi_valid,
+	output reg	  pcpi_valid,
 	output reg [31:0] pcpi_insn,
-	output     [31:0] pcpi_rs1,
-	output     [31:0] pcpi_rs2,
-	input             pcpi_wr,
-	input      [31:0] pcpi_rd,
-	input             pcpi_wait,
-	input             pcpi_ready,
+	output [31:0]	  pcpi_rs1,
+	output [31:0]	  pcpi_rs2,
+	input		  pcpi_wr,
+	input [31:0]	  pcpi_rd,
+	input		  pcpi_wait,
+	input		  pcpi_ready,
 
 	// IRQ Interface
-	input      [31:0] irq,
+	input [31:0]	  irq,
 	output reg [31:0] eoi,
 
 `ifdef RISCV_FORMAL
-	output reg        rvfi_valid,
+	output reg	  rvfi_valid,
 	output reg [63:0] rvfi_order,
 	output reg [31:0] rvfi_insn,
-	output reg        rvfi_trap,
-	output reg        rvfi_halt,
-	output reg        rvfi_intr,
+	output reg	  rvfi_trap,
+	output reg	  rvfi_halt,
+	output reg	  rvfi_intr,
 	output reg [ 1:0] rvfi_mode,
 	output reg [ 1:0] rvfi_ixl,
 	output reg [ 4:0] rvfi_rs1_addr,
@@ -155,16 +170,18 @@ module picorv32 #(
 `endif
 
 	// Trace Interface
-	output reg        trace_valid,
+	output reg	  trace_valid,
 	output reg [35:0] trace_data
 );
 	localparam integer irq_timer = 0;
 	localparam integer irq_ebreak = 1;
 	localparam integer irq_buserror = 2;
 
-	localparam integer irqregs_offset = ENABLE_REGS_16_31 ? 32 : 16;
-	localparam integer regfile_size = (ENABLE_REGS_16_31 ? 32 : 16) + 4*ENABLE_IRQ*ENABLE_IRQ_QREGS;
-	localparam integer regindex_bits = (ENABLE_REGS_16_31 ? 5 : 4) + ENABLE_IRQ*ENABLE_IRQ_QREGS;
+	localparam integer xreg_count   = ENABLE_REGS_16_31 ? 32 : 16;
+        localparam integer qreg_count   = (ENABLE_IRQ && ENABLE_IRQ_QREGS) ? xreg_count : 0;
+        localparam integer qreg_offset  = qreg_count; // 0 for no qregs
+        localparam integer regfile_size = xreg_count + qreg_count;
+        localparam integer regindex_bits = $clog2(regfile_size);
 
 	localparam WITH_PCPI = ENABLE_PCPI || ENABLE_MUL || ENABLE_FAST_MUL || ENABLE_DIV;
 
@@ -218,38 +235,40 @@ module picorv32 #(
 	endtask
 
 `ifdef DEBUGREGS
+`define dr_reg(x) cpuregs[x | (irq_active ? qreg_offset : 0)]
+
 	wire [31:0] dbg_reg_x0  = 0;
-	wire [31:0] dbg_reg_x1  = cpuregs[1];
-	wire [31:0] dbg_reg_x2  = cpuregs[2];
-	wire [31:0] dbg_reg_x3  = cpuregs[3];
-	wire [31:0] dbg_reg_x4  = cpuregs[4];
-	wire [31:0] dbg_reg_x5  = cpuregs[5];
-	wire [31:0] dbg_reg_x6  = cpuregs[6];
-	wire [31:0] dbg_reg_x7  = cpuregs[7];
-	wire [31:0] dbg_reg_x8  = cpuregs[8];
-	wire [31:0] dbg_reg_x9  = cpuregs[9];
-	wire [31:0] dbg_reg_x10 = cpuregs[10];
-	wire [31:0] dbg_reg_x11 = cpuregs[11];
-	wire [31:0] dbg_reg_x12 = cpuregs[12];
-	wire [31:0] dbg_reg_x13 = cpuregs[13];
-	wire [31:0] dbg_reg_x14 = cpuregs[14];
-	wire [31:0] dbg_reg_x15 = cpuregs[15];
-	wire [31:0] dbg_reg_x16 = cpuregs[16];
-	wire [31:0] dbg_reg_x17 = cpuregs[17];
-	wire [31:0] dbg_reg_x18 = cpuregs[18];
-	wire [31:0] dbg_reg_x19 = cpuregs[19];
-	wire [31:0] dbg_reg_x20 = cpuregs[20];
-	wire [31:0] dbg_reg_x21 = cpuregs[21];
-	wire [31:0] dbg_reg_x22 = cpuregs[22];
-	wire [31:0] dbg_reg_x23 = cpuregs[23];
-	wire [31:0] dbg_reg_x24 = cpuregs[24];
-	wire [31:0] dbg_reg_x25 = cpuregs[25];
-	wire [31:0] dbg_reg_x26 = cpuregs[26];
-	wire [31:0] dbg_reg_x27 = cpuregs[27];
-	wire [31:0] dbg_reg_x28 = cpuregs[28];
-	wire [31:0] dbg_reg_x29 = cpuregs[29];
-	wire [31:0] dbg_reg_x30 = cpuregs[30];
-	wire [31:0] dbg_reg_x31 = cpuregs[31];
+	wire [31:0] dbg_reg_x1  = `dr_reg(1);
+	wire [31:0] dbg_reg_x2  = `dr_reg(2);
+	wire [31:0] dbg_reg_x3  = `dr_reg(3);
+	wire [31:0] dbg_reg_x4  = `dr_reg(4);
+	wire [31:0] dbg_reg_x5  = `dr_reg(5);
+	wire [31:0] dbg_reg_x6  = `dr_reg(6);
+	wire [31:0] dbg_reg_x7  = `dr_reg(7);
+	wire [31:0] dbg_reg_x8  = `dr_reg(8);
+	wire [31:0] dbg_reg_x9  = `dr_reg(9);
+	wire [31:0] dbg_reg_x10 = `dr_reg(10);
+	wire [31:0] dbg_reg_x11 = `dr_reg(11);
+	wire [31:0] dbg_reg_x12 = `dr_reg(12);
+	wire [31:0] dbg_reg_x13 = `dr_reg(13);
+	wire [31:0] dbg_reg_x14 = `dr_reg(14);
+	wire [31:0] dbg_reg_x15 = `dr_reg(15);
+	wire [31:0] dbg_reg_x16 = `dr_reg(16);
+	wire [31:0] dbg_reg_x17 = `dr_reg(17);
+	wire [31:0] dbg_reg_x18 = `dr_reg(18);
+	wire [31:0] dbg_reg_x19 = `dr_reg(19);
+	wire [31:0] dbg_reg_x20 = `dr_reg(20);
+	wire [31:0] dbg_reg_x21 = `dr_reg(21);
+	wire [31:0] dbg_reg_x22 = `dr_reg(22);
+	wire [31:0] dbg_reg_x23 = `dr_reg(23);
+	wire [31:0] dbg_reg_x24 = `dr_reg(24);
+	wire [31:0] dbg_reg_x25 = `dr_reg(25);
+	wire [31:0] dbg_reg_x26 = `dr_reg(26);
+	wire [31:0] dbg_reg_x27 = `dr_reg(27);
+	wire [31:0] dbg_reg_x28 = `dr_reg(28);
+	wire [31:0] dbg_reg_x29 = `dr_reg(29);
+	wire [31:0] dbg_reg_x30 = `dr_reg(30);
+	wire [31:0] dbg_reg_x31 = `dr_reg(31);
 `endif
 
 	// Internal PCPI Cores
@@ -649,7 +668,8 @@ module picorv32 #(
 	reg instr_addi, instr_slti, instr_sltiu, instr_xori, instr_ori, instr_andi, instr_slli, instr_srli, instr_srai;
 	reg instr_add, instr_sub, instr_sll, instr_slt, instr_sltu, instr_xor, instr_srl, instr_sra, instr_or, instr_and;
 	reg instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh, instr_ecall_ebreak;
-	reg instr_getq, instr_setq, instr_retirq, instr_maskirq, instr_waitirq, instr_timer;
+	reg instr_addqxi, instr_addxqi, instr_retirq, instr_maskirq, instr_waitirq, instr_timer;
+
 	wire instr_trap;
 
 	reg [regindex_bits-1:0] decoded_rd, decoded_rs1, decoded_rs2;
@@ -663,10 +683,10 @@ module picorv32 #(
 	reg is_lui_auipc_jal;
 	reg is_lb_lh_lw_lbu_lhu;
 	reg is_slli_srli_srai;
-	reg is_jalr_addi_slti_sltiu_xori_ori_andi;
+	reg is_jalr_addi_slti_sltiu_xori_ori_andi_addqxi;
 	reg is_sb_sh_sw;
 	reg is_sll_srl_sra;
-	reg is_lui_auipc_jal_jalr_addi_add_sub;
+	reg is_lui_auipc_jal_jalr_addi_add_sub_addqxi;
 	reg is_slti_blt_slt;
 	reg is_sltiu_bltu_sltu;
 	reg is_beq_bne_blt_bge_bltu_bgeu;
@@ -674,6 +694,7 @@ module picorv32 #(
 	reg is_alu_reg_imm;
 	reg is_alu_reg_reg;
 	reg is_compare;
+        reg is_addqxi;
 
 	assign instr_trap = (CATCH_ILLINSN || WITH_PCPI) && !{instr_lui, instr_auipc, instr_jal, instr_jalr,
 			instr_beq, instr_bne, instr_blt, instr_bge, instr_bltu, instr_bgeu,
@@ -681,7 +702,7 @@ module picorv32 #(
 			instr_addi, instr_slti, instr_sltiu, instr_xori, instr_ori, instr_andi, instr_slli, instr_srli, instr_srai,
 			instr_add, instr_sub, instr_sll, instr_slt, instr_sltu, instr_xor, instr_srl, instr_sra, instr_or, instr_and,
 			instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh,
-			instr_getq, instr_setq, instr_retirq, instr_maskirq, instr_waitirq, instr_timer};
+			instr_addqxi, instr_retirq, instr_maskirq, instr_waitirq, instr_timer};
 
 	wire is_rdcycle_rdcycleh_rdinstr_rdinstrh;
 	assign is_rdcycle_rdcycleh_rdinstr_rdinstrh = |{instr_rdcycle, instr_rdcycleh, instr_rdinstr, instr_rdinstrh};
@@ -747,8 +768,8 @@ module picorv32 #(
 		if (instr_rdinstr)  new_ascii_instr = "rdinstr";
 		if (instr_rdinstrh) new_ascii_instr = "rdinstrh";
 
-		if (instr_getq)     new_ascii_instr = "getq";
-		if (instr_setq)     new_ascii_instr = "setq";
+	        if (instr_addqxi)   new_ascii_instr = "addqxi";
+	        if (instr_addxqi)   new_ascii_instr = "addxqi";
 		if (instr_retirq)   new_ascii_instr = "retirq";
 		if (instr_maskirq)  new_ascii_instr = "maskirq";
 		if (instr_waitirq)  new_ascii_instr = "waitirq";
@@ -853,15 +874,14 @@ module picorv32 #(
 	end
 `endif
 
-	// hpa: allow mret as an alias for retirq, so that
+	// hpa: retirq opcode changed to mret, so
 	// __attribute__((interrupt)) works in gcc
 	wire instr_la_retirq = ENABLE_IRQ &&
-			 ((mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[31:25] == 7'b0000010) ||
-			  (mem_rdata_latched[6:0] == 7'b1110011 && mem_rdata_latched[31:25] == 7'b0011000));
+			  (mem_rdata_latched[6:0] == 7'b1110011 && mem_rdata_latched[31:25] == 7'b0011000);
 
 	always @(posedge clk) begin
 		is_lui_auipc_jal <= |{instr_lui, instr_auipc, instr_jal};
-		is_lui_auipc_jal_jalr_addi_add_sub <= |{instr_lui, instr_auipc, instr_jal, instr_jalr, instr_addi, instr_add, instr_sub};
+		is_lui_auipc_jal_jalr_addi_add_sub_addqxi <= |{instr_lui, instr_auipc, instr_jal, instr_jalr, instr_addi, instr_add, instr_sub, instr_addqxi};
 		is_slti_blt_slt <= |{instr_slti, instr_blt, instr_slt};
 		is_sltiu_bltu_sltu <= |{instr_sltiu, instr_bltu, instr_sltu};
 		is_lbu_lhu_lw <= |{instr_lbu, instr_lhu, instr_lw};
@@ -873,7 +893,7 @@ module picorv32 #(
 			instr_jal     <= mem_rdata_latched[6:0] == 7'b1101111;
 			instr_jalr    <= mem_rdata_latched[6:0] == 7'b1100111 && mem_rdata_latched[14:12] == 3'b000;
 			instr_retirq  <= instr_la_retirq;
-			instr_waitirq <= mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[31:25] == 7'b0000100 && ENABLE_IRQ;
+			instr_waitirq <= mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[14:12] == 3'b000 && mem_rdata_latched[31:25] == 7'b0000100 && ENABLE_IRQ;
 
 			is_beq_bne_blt_bge_bltu_bgeu <= mem_rdata_latched[6:0] == 7'b1100011;
 			is_lb_lh_lw_lbu_lhu          <= mem_rdata_latched[6:0] == 7'b0000011;
@@ -883,15 +903,12 @@ module picorv32 #(
 
 			{ decoded_imm_j[31:20], decoded_imm_j[10:1], decoded_imm_j[11], decoded_imm_j[19:12], decoded_imm_j[0] } <= $signed({mem_rdata_latched[31:12], 1'b0});
 
-			decoded_rd <= mem_rdata_latched[11:7];
-			decoded_rs1 <= mem_rdata_latched[19:15];
-			decoded_rs2 <= mem_rdata_latched[24:20];
+			decoded_rd    <= mem_rdata_latched[11:7];
+			decoded_rs1   <= mem_rdata_latched[19:15];
+			decoded_rs2   <= mem_rdata_latched[24:20];
 
-			if (mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[31:25] == 7'b0000000 && ENABLE_IRQ && ENABLE_IRQ_QREGS)
-				decoded_rs1[regindex_bits-1] <= 1; // instr_getq
-
-			if (instr_la_retirq)
-				decoded_rs1 <= ENABLE_IRQ_QREGS ? irqregs_offset : 3; // instr_retirq
+		        if (instr_la_retirq)
+				decoded_rs1 <= RA_IRQ_REG;
 
 			compressed_instr <= 0;
 			if (COMPRESSED_ISA && mem_rdata_latched[1:0] != 2'b11) begin
@@ -1036,7 +1053,25 @@ module picorv32 #(
 					end
 				endcase
 			end
-		end
+
+		        // hpa: IRQ bank switch support
+		        is_addqxi <= 0;
+
+		        if (ENABLE_IRQ && ENABLE_IRQ_QREGS)
+			  begin
+			     decoded_rd [regindex_bits-1] <= irq_active;
+			     decoded_rs1[regindex_bits-1] <= irq_active;
+			     decoded_rs2[regindex_bits-1] <= irq_active;
+
+			     // addqxi, addxqi
+			     if (mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[14:13] == 2'b01) begin
+				is_addqxi <= 1; // True for both addqxi and addxqi
+
+				decoded_rd [regindex_bits-1] <= ~mem_rdata_latched[12]; // addxqi
+				decoded_rs1[regindex_bits-1] <=  mem_rdata_latched[12]; // addqxi
+			     end
+			  end
+		end // if (mem_do_rinst && mem_done)
 
 		if (decoder_trigger && !decoder_pseudo_trigger) begin
 			pcpi_insn <= WITH_PCPI ? mem_rdata_q : 'bx;
@@ -1090,10 +1125,11 @@ module picorv32 #(
 			instr_ecall_ebreak <= ((mem_rdata_q[6:0] == 7'b1110011 && !mem_rdata_q[31:21] && !mem_rdata_q[19:7]) ||
 					(COMPRESSED_ISA && mem_rdata_q[15:0] == 16'h9002));
 
-			instr_getq    <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[31:25] == 7'b0000000 && ENABLE_IRQ && ENABLE_IRQ_QREGS;
-			instr_setq    <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[31:25] == 7'b0000001 && ENABLE_IRQ && ENABLE_IRQ_QREGS;
-			instr_maskirq <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[31:25] == 7'b0000011 && ENABLE_IRQ;
-			instr_timer   <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[31:25] == 7'b0000101 && ENABLE_IRQ && ENABLE_IRQ_TIMER;
+			instr_maskirq <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b000 && mem_rdata_q[31:25] == 7'b0000011 && ENABLE_IRQ;
+			instr_timer   <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b000 && mem_rdata_q[31:25] == 7'b0000101 && ENABLE_IRQ && ENABLE_IRQ_TIMER;
+			// instr_addqxi includes addxqi; instr_addxqi is only used for debug
+		        instr_addqxi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:13] == 2'b01 && ENABLE_IRQ && ENABLE_IRQ_QREGS;
+		        instr_addxqi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b011 && ENABLE_IRQ && ENABLE_IRQ_QREGS;
 
 			is_slli_srli_srai <= is_alu_reg_imm && |{
 				mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000,
@@ -1101,7 +1137,7 @@ module picorv32 #(
 				mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000
 			};
 
-			is_jalr_addi_slti_sltiu_xori_ori_andi <= instr_jalr || is_alu_reg_imm && |{
+			is_jalr_addi_slti_sltiu_xori_ori_andi_addqxi <= instr_jalr || is_addqxi || is_alu_reg_imm && |{
 				mem_rdata_q[14:12] == 3'b000,
 				mem_rdata_q[14:12] == 3'b010,
 				mem_rdata_q[14:12] == 3'b011,
@@ -1116,7 +1152,7 @@ module picorv32 #(
 				mem_rdata_q[14:12] == 3'b101 && mem_rdata_q[31:25] == 7'b0100000
 			};
 
-			is_lui_auipc_jal_jalr_addi_add_sub <= 0;
+			is_lui_auipc_jal_jalr_addi_add_sub_addqxi <= 0;
 			is_compare <= 0;
 
 			(* parallel_case *)
@@ -1125,7 +1161,7 @@ module picorv32 #(
 					decoded_imm <= decoded_imm_j;
 				|{instr_lui, instr_auipc}:
 					decoded_imm <= mem_rdata_q[31:12] << 12;
-				|{instr_jalr, is_lb_lh_lw_lbu_lhu, is_alu_reg_imm}:
+				|{instr_jalr, is_lb_lh_lw_lbu_lhu, is_alu_reg_imm, is_addqxi}:
 					decoded_imm <= $signed(mem_rdata_q[31:20]);
 				is_beq_bne_blt_bge_bltu_bgeu:
 					decoded_imm <= $signed({mem_rdata_q[31], mem_rdata_q[7], mem_rdata_q[30:25], mem_rdata_q[11:8], 1'b0});
@@ -1140,30 +1176,31 @@ module picorv32 #(
 			is_beq_bne_blt_bge_bltu_bgeu <= 0;
 			is_compare <= 0;
 
-			instr_beq   <= 0;
-			instr_bne   <= 0;
-			instr_blt   <= 0;
-			instr_bge   <= 0;
-			instr_bltu  <= 0;
-			instr_bgeu  <= 0;
-
-			instr_addi  <= 0;
-			instr_slti  <= 0;
-			instr_sltiu <= 0;
-			instr_xori  <= 0;
-			instr_ori   <= 0;
-			instr_andi  <= 0;
-
-			instr_add   <= 0;
-			instr_sub   <= 0;
-			instr_sll   <= 0;
-			instr_slt   <= 0;
-			instr_sltu  <= 0;
-			instr_xor   <= 0;
-			instr_srl   <= 0;
-			instr_sra   <= 0;
-			instr_or    <= 0;
-			instr_and   <= 0;
+			instr_beq    <= 0;
+			instr_bne    <= 0;
+			instr_blt    <= 0;
+			instr_bge    <= 0;
+			instr_bltu   <= 0;
+			instr_bgeu   <= 0;
+
+			instr_addi   <= 0;
+			instr_slti   <= 0;
+			instr_sltiu  <= 0;
+			instr_xori   <= 0;
+			instr_ori    <= 0;
+			instr_andi   <= 0;
+
+			instr_add    <= 0;
+			instr_sub    <= 0;
+			instr_sll    <= 0;
+			instr_slt    <= 0;
+			instr_sltu   <= 0;
+			instr_xor    <= 0;
+			instr_srl    <= 0;
+			instr_sra    <= 0;
+			instr_or     <= 0;
+			instr_and    <= 0;
+		        instr_addqxi <= 0;
 		end
 	end
 
@@ -1268,7 +1305,7 @@ module picorv32 #(
 		alu_out = 'bx;
 		(* parallel_case, full_case *)
 		case (1'b1)
-			is_lui_auipc_jal_jalr_addi_add_sub:
+			is_lui_auipc_jal_jalr_addi_add_sub_addqxi:
 				alu_out = alu_add_sub;
 			is_compare:
 				alu_out = alu_out_0;
@@ -1336,7 +1373,7 @@ module picorv32 #(
 
 `ifndef PICORV32_REGS
 	always @(posedge clk) begin
-		if (resetn && cpuregs_write && latched_rd)
+		if (resetn && cpuregs_write && (latched_rd & 5'h1f))
 `ifdef PICORV32_TESTBUG_001
 			cpuregs[latched_rd ^ 1] <= cpuregs_wrdata;
 `elsif PICORV32_TESTBUG_002
@@ -1346,22 +1383,32 @@ module picorv32 #(
 `endif
 	end
 
+	// hpa: if REGS_INIT_ZERO, then there is no reason not to simply
+	// read from the register file even for x0; the above code
+	// ensures that we never *write* to x0, which is a simple
+	// write enable thing.
 	always @* begin
 		decoded_rs = 'bx;
 		if (ENABLE_REGS_DUALPORT) begin
 `ifndef RISCV_FORMAL_BLACKBOX_REGS
-			cpuregs_rs1 = decoded_rs1 ? cpuregs[decoded_rs1] : 0;
-			cpuregs_rs2 = decoded_rs2 ? cpuregs[decoded_rs2] : 0;
+		        cpuregs_rs1 = cpuregs[decoded_rs1];
+			cpuregs_rs2 = cpuregs[decoded_rs2];
+		        if (!REGS_INIT_ZERO) begin
+				if (!(decoded_rs1 & 5'h1f)) cpuregs_rs1 = 32'h0;
+				if (!(decoded_rs2 & 5'h1f)) cpuregs_rs2 = 32'h0;
+			end
 `else
-			cpuregs_rs1 = decoded_rs1 ? $anyseq : 0;
-			cpuregs_rs2 = decoded_rs2 ? $anyseq : 0;
+		        cpuregs_rs1 = (decoded_rs1 & 5'h1f) ? $anyseq : 32'h0;
+		        cpuregs_rs2 = (decoded_rs2 & 5'h1f) ? $anyseq : 32'h0;
 `endif
 		end else begin
 			decoded_rs = (cpu_state == cpu_state_ld_rs2) ? decoded_rs2 : decoded_rs1;
 `ifndef RISCV_FORMAL_BLACKBOX_REGS
-			cpuregs_rs1 = decoded_rs ? cpuregs[decoded_rs] : 0;
+			cpuregs_rs1 = cpuregs[decoded_rs];
+			if (!REGS_INIT_ZERO)
+				if (!(decoded_rs & 5'h1f)) cpuregs_rs1 = 32'h0;
 `else
-			cpuregs_rs1 = decoded_rs ? $anyseq : 0;
+			cpuregs_rs1 = decoded_rs & 5'h1f ? $anyseq : 0;
 `endif
 			cpuregs_rs2 = cpuregs_rs1;
 		end
@@ -1388,11 +1435,11 @@ module picorv32 #(
 	always @* begin
 		decoded_rs = 'bx;
 		if (ENABLE_REGS_DUALPORT) begin
-			cpuregs_rs1 = decoded_rs1 ? cpuregs_rdata1 : 0;
-			cpuregs_rs2 = decoded_rs2 ? cpuregs_rdata2 : 0;
+			cpuregs_rs1 = decoded_rs1 & 4'h1f ? cpuregs_rdata1 : 0;
+			cpuregs_rs2 = decoded_rs2 & 4'h1f ? cpuregs_rdata2 : 0;
 		end else begin
 			decoded_rs = (cpu_state == cpu_state_ld_rs2) ? decoded_rs2 : decoded_rs1;
-			cpuregs_rs1 = decoded_rs ? cpuregs_rdata1 : 0;
+			cpuregs_rs1 = decoded_rs & 4'h1f ? cpuregs_rdata1 : 0;
 			cpuregs_rs2 = cpuregs_rs1;
 		end
 	end
@@ -1456,8 +1503,8 @@ module picorv32 #(
 			trace_data <= 'bx;
 
 		if (!resetn) begin
-			reg_pc <= PROGADDR_RESET;
-			reg_next_pc <= PROGADDR_RESET;
+			reg_pc <= progaddr_reset;
+			reg_next_pc <= progaddr_reset;
 			if (ENABLE_COUNTERS)
 				count_instr <= 0;
 			latched_store <= 0;
@@ -1505,7 +1552,7 @@ module picorv32 #(
 						`debug($display("ST_RD:  %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
 					end
 					ENABLE_IRQ && irq_state[0]: begin
-						current_pc = PROGADDR_IRQ;
+						current_pc = progaddr_irq;
 						irq_active <= 1;
 						mem_do_rinst <= 1;
 					end
@@ -1541,10 +1588,8 @@ module picorv32 #(
 						irq_state == 2'b00 ? 2'b01 :
 						irq_state == 2'b01 ? 2'b10 : 2'b00;
 					latched_compr <= latched_compr;
-					if (ENABLE_IRQ_QREGS)
-						latched_rd <= irqregs_offset | irq_state[0];
-					else
-						latched_rd <= irq_state[0] ? 4 : 3;
+				        latched_rd <= qreg_offset |
+						      (irq_state[0] ? MASK_IRQ_REG : RA_IRQ_REG);
 				end else
 				if (ENABLE_IRQ && (decoder_trigger || do_waitirq) && instr_waitirq) begin
 					if (irq_pending) begin
@@ -1556,7 +1601,7 @@ module picorv32 #(
 						do_waitirq <= 1;
 				end else
 				if (decoder_trigger) begin
-					`debug($display("-- %-0t", $time);)
+					`debug($display("-- %-0t pc: 0x%08x irq: %x", $time, current_pc, irq_active);)
 					irq_delay <= irq_active;
 					reg_next_pc <= current_pc + (compressed_instr ? 2 : 4);
 					if (ENABLE_TRACE)
@@ -1648,23 +1693,6 @@ module picorv32 #(
 							mem_do_rinst <= mem_do_prefetch;
 						cpu_state <= cpu_state_exec;
 					end
-					ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_getq: begin
-						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
-						reg_out <= cpuregs_rs1;
-						dbg_rs1val <= cpuregs_rs1;
-						dbg_rs1val_valid <= 1;
-						latched_store <= 1;
-						cpu_state <= cpu_state_fetch;
-					end
-					ENABLE_IRQ && ENABLE_IRQ_QREGS && instr_setq: begin
-						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
-						reg_out <= cpuregs_rs1;
-						dbg_rs1val <= cpuregs_rs1;
-						dbg_rs1val_valid <= 1;
-						latched_rd <= latched_rd | irqregs_offset;
-						latched_store <= 1;
-						cpu_state <= cpu_state_fetch;
-					end
 					ENABLE_IRQ && instr_retirq: begin
 						eoi <= 0;
 						irq_active <= 0;
@@ -1681,6 +1709,7 @@ module picorv32 #(
 						reg_out <= irq_mask;
 						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
 						// hpa: allow rs2 to specify bits to be preserved
+						// XXX: support !ENABLE REGS_DUALPORT
 					        `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
 						irq_mask <= ((irq_mask & cpuregs_rs2) ^ cpuregs_rs1) | MASKED_IRQ;
 						dbg_rs1val <= cpuregs_rs1;
@@ -1712,7 +1741,7 @@ module picorv32 #(
 						reg_sh <= decoded_rs2;
 						cpu_state <= cpu_state_shift;
 					end
-					is_jalr_addi_slti_sltiu_xori_ori_andi, is_slli_srli_srai && BARREL_SHIFTER: begin
+					is_jalr_addi_slti_sltiu_xori_ori_andi_addqxi, is_slli_srli_srai && BARREL_SHIFTER: begin
 						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
 						reg_op1 <= cpuregs_rs1;
 						dbg_rs1val <= cpuregs_rs1;
@@ -2032,6 +2061,7 @@ module picorv32 #(
 		end
 
 		casez (dbg_insn_opcode)
+			/* hpa: XXX: update this */
 			32'b 0000000_?????_000??_???_?????_0001011: begin // getq
 				rvfi_rs1_addr <= 0;
 				rvfi_rs1_rdata <= 0;

+ 2 - 1
fw/Makefile

@@ -33,7 +33,8 @@ boot_depth  := 8192
 boot_width  := 32
 boot_stride := 1
 
-boot.elf: head.o die.o dummy.o irq.o sbrk.o hello.o console.o sdcard.o fatfs.a
+boot.elf: head.o die.o dummy.o irq.o irqasm.o sbrk.o hello.o \
+	  console.o sdcard.o fatfs.a
 
 FATFS_C = $(wildcard fatfs/source/*.c)
 FATFS_O = $(FATFS_C:.c=.o)

File diff suppressed because it is too large
+ 3470 - 3470
fw/boot.mif


+ 2 - 1
fw/fw.h

@@ -21,9 +21,10 @@ extern int disk_init(void);
 
 #define IRQ_VECTORS	32
 
-typedef bool (*irq_handler_t)(unsigned int vector);
+typedef void (*irq_handler_t)(unsigned int vector);
 extern irq_handler_t
 register_irq(unsigned int vector, irq_handler_t handler, bool enable);
+extern void null_irq_handler(unsigned int vector);
 
 static inline unsigned int mask_irq(unsigned int vector)
 {

+ 1 - 3
fw/hello.c

@@ -135,7 +135,7 @@ static void scrub_sdram(void)
 }
 
 static volatile uint32_t timer_irq_count;
-static bool periodic_irq(unsigned int vector)
+static void periodic_irq(unsigned int vector)
 {
     uint32_t count = timer_irq_count;
     (void)vector;
@@ -143,8 +143,6 @@ static bool periodic_irq(unsigned int vector)
     count++;
     timer_irq_count = count;
     set_led(count >> 3); /* 4 Hz */
-
-    return true;		/* Handled */
 }
 
 static void init(void)

+ 20 - 46
fw/irq.c

@@ -2,54 +2,20 @@
 #include "fw.h"
 #include "console.h"
 
-static irq_handler_t irq_handlers[IRQ_VECTORS];
-
-/* Main IRQ dispatch; the .init.irq section puts it at the IRQ vector */
-void __attribute__((interrupt,section(".init.irq"))) _irq(void)
+/* Invalid interrupt */
+static void spurious_irq_handler(unsigned int vector)
 {
-    unsigned int mask = p_getq(1);
-    unsigned int nirq = 0;
-
-    while (mask) {
-	bool handled;
-	irq_handler_t handler;
-
-	if (!(uint16_t)mask) {
-	    mask >>= 16;
-	    nirq += 16;
-	}
-	if (!(uint8_t)mask) {
-	    mask >>= 8;
-	    nirq += 8;
-	}
-	if (!(mask & 15)) {
-	    mask >>= 4;
-	    nirq += 4;
-	}
-	if (!(mask & 3)) {
-	    mask >>= 2;
-	    nirq += 2;
-	}
-	if (!(mask & 1)) {
-	    mask >>= 1;
-	    nirq += 1;
-	}
-
-	/* Now mask[0] is known to be 1 and nirq contains an active irq */
-	handled = false;
-	handler = irq_handlers[nirq];
-
-	if (likely(handler))
-	    handled = handler(nirq);
-
-	if (unlikely(!handled))
-	    mask_irq(nirq);	/* Spurious interrupt; mask it */
+    mask_irq(vector);
+}
 
-	mask >>= 1;
-	nirq++;
-    }
+/* Valid edge-triggered interrupt just to wake up waitirq */
+void null_irq_handler(unsigned int vector)
+{
 }
 
+irq_handler_t __irq_handler_table[IRQ_VECTORS] =
+  { [0 ... IRQ_VECTORS-1] = spurious_irq_handler };
+
 irq_handler_t register_irq(unsigned int vector, irq_handler_t handler,
 			   bool enable)
 {
@@ -60,12 +26,20 @@ irq_handler_t register_irq(unsigned int vector, irq_handler_t handler,
 
     mask_irq(vector);
 
-    old_handler = irq_handlers[vector];
-    irq_handlers[vector] = handler;
+    if (!handler) {
+	enable = false;
+	handler = spurious_irq_handler;
+    }
+    
+    old_handler = __irq_handler_table[vector];
+    __irq_handler_table[vector] = handler;
 
     if (enable && handler)
 	unmask_irq(vector);
 
+    if (old_handler == spurious_irq_handler)
+	handler = NULL;
+
     con_printf("irq: register vector %u, mask = %08x\n",
 	       vector, irqmask());
 

+ 7 - 23
fw/picorv32.h

@@ -3,20 +3,6 @@
 
 #ifndef __ASSEMBLY__
 
-static inline unsigned int p_getq(unsigned int qr)
-{
-    unsigned int rd;
-    asm volatile(".insn r 0x0b, 0, 0, %0, x%1, zero"
-		 : "=r" (rd) : "K" (qr));
-    return rd;
-}
-
-static inline void p_setq(unsigned int qr, unsigned int val)
-{
-    asm volatile(".insn r 0x0b, 0, 1, x%1, %0, zero"
-		 : : "r" (val), "K" (qr));
-}
-
 static inline void p_retirq(void)
 {
     asm volatile(".insn r 0x0b, 0, 2, zero, zero, zero");
@@ -64,20 +50,18 @@ static inline unsigned int p_timer(unsigned int newval)
 #define q2 x2
 #define q3 x3
 
-.macro getq rd, qs
-	.insn r 0x0b, 0, 0, \rd, \qs, zero
-.endm
-
-.macro setq qd, rs
-	.insn r 0x0b, 0, 1, \qd, \rs, zero
+.macro addqxi qd, rs, imm
+	.insn i 0x0b, 0x02, \qd, \rs, \imm
+.endm  
+.macro addxqi rd, qs, imm
+	.insn i 0x0b, 0x03, \rd, \qs, \imm
 .endm
-
 .macro retirq
 	.insn r 0x0b, 0, 2, zero, zero, zero
 .endm
 
-.macro maskirq rd, rs
-	.insn r 0x0b, 0, 3, \rd, \rs, zero
+.macro maskirq rd, rs1, rs2
+	.insn r 0x0b, 0, 3, \rd, \rs1, \rs2
 .endm
 
 .macro waitirq rd

Some files were not shown because too many files changed in this diff