Răsfoiți Sursa

fpga, rv32: Add custom atomic instructions lw.l/sw.u

Add custom atomic instructions lw.l/sw.u to support multithreading.

Add the standard MEINFO register to go with MEPC.
H. Peter Anvin 1 an în urmă
părinte
comite
78fb11f073
7 a modificat fișierele cu 420 adăugiri și 288 ștergeri
  1. 2 0
      fpga/iodevs.vh
  2. 324 284
      fpga/picorv32.v
  3. 3 1
      iodevs.conf
  4. 80 0
      rv32/atomic.h
  5. 4 0
      rv32/head.S
  6. 6 2
      rv32/killed.c
  7. 1 1
      tools/gnusrc/binutils

+ 2 - 0
fpga/iodevs.vh

@@ -1,5 +1,6 @@
 	localparam IODEV_ADDR_BITS         = 'h00000004; // 4
 	localparam IODEV_ADDR_SHIFT        = 'h00000007; // 7
+	localparam MEINFO_CSR              = 'h000007f1; // 2033
 	localparam SDRAM_ADDR              = 'h40000000; // 1073741824
 	localparam SDRAM_BITS              = 'h00000019; // 25
 	localparam SRAM_ADDR               = 'h00000000; // 0
@@ -7,6 +8,7 @@
 	localparam SYS_MAGIC_MAX80         = 'h3858414d; // 945307981
 	localparam TIMER_SHIFT             = 'h00000005; // 5
 	localparam TTY_CHANNELS            = 'h00000002; // 2
+	localparam USER_CONTEXT_CSR        = 'h000007f0; // 2032
 	localparam XDEV_ADDR_BITS          = 'h00000002; // 2
 	localparam XDEV_ADDR_SHIFT         = 'h0000001c; // 28
 	localparam _PC_IRQ                 = 'h00000020; // 32

+ 324 - 284
fpga/picorv32.v

@@ -56,6 +56,19 @@
  *  - Separately parameterize the width of the cycle and instruction counters;
  *    they can be independently set to any value from 0 to 64 bits.
  *  - The user context number (user_context CSR) is exported to a port.
+ *  - Add "lw.l" and "sw.u" (load and lock/store and unlock) instructions;
+ *    same opcodes as lw and sw except funct3 == 3 instead of 2.
+ *    These serve the same function as a "constrained lr/sc loop" in that
+ *    the atomicity is guaranteed, and thus the result doesn't need to be
+ *    checked; they simply lock out interrupts for the duration of the
+ *    execution. (When encoded as 16-bit instructions these overlay c.fld
+ *    and c.fst, which aren't supported anyway.)
+ *  - Exception status bit (instruction length) and prior load_lock
+ *    status moved to a new "meinfo" register rather than spare bits in
+ *    mepc.
+ *    meinfo[1:0]: (instruction length/2)-1
+ *    meinfo[2]:   load lock active
+ *    meinfo[3]:   trap taken in IRQ context (usually fatal)
  */
 
 /* verilator lint_off WIDTH */
@@ -132,15 +145,15 @@ module picorv32 #(
 	parameter [ 0:0]  ENABLE_IRQ_TIMER = 1,
 	parameter [ 0:0]  ENABLE_TRACE = 0,
 	parameter [ 0:0]  REGS_INIT_ZERO = 0,
-	parameter [31:0]  MASKED_IRQ = 32'h 0000_0000,
-	parameter [31:0]  LATCHED_IRQ = 32'h ffff_ffff,
-	parameter [31:0]  STACKADDR = 32'h ffff_ffff,
-	parameter [ 4:0]  MASK_IRQ_REG = ENABLE_IRQ_QREGS ? 27 : 4,
-	parameter integer USER_CONTEXTS = 1,
-	parameter [ 0:0]  ENABLE_IRQ_QREGS = USER_CONTEXTS > 0,
-
-	parameter integer context_bits = $clog2(USER_CONTEXTS + 1),
-	parameter integer context_max_bit = context_bits ? context_bits-1 : 0
+        parameter [31:0]  MASKED_IRQ = 32'h 0000_0000,
+        parameter [31:0]  LATCHED_IRQ = 32'h ffff_ffff,
+        parameter [31:0]  STACKADDR = 32'h ffff_ffff,
+        parameter [ 4:0]  MASK_IRQ_REG = ENABLE_IRQ_QREGS ? 27 : 4,
+        parameter integer USER_CONTEXTS = 1,
+        parameter [ 0:0]  ENABLE_IRQ_QREGS = USER_CONTEXTS > 0,
+
+        parameter integer context_bits = $clog2(USER_CONTEXTS + 1),
+        parameter integer context_max_bit = context_bits ? context_bits-1 : 0
 ) (
 	input			       clk, resetn,
 	input			       halt,
@@ -217,14 +230,14 @@ module picorv32 #(
 `endif
 
 	// Trace Interface
-	output reg		       trace_valid,
-	output reg [35:0]	       trace_data
+        output reg                     trace_valid,
+        output reg [35:0]              trace_data
 );
-	localparam integer irq_timer = 0;
-	localparam integer irq_ebreak = 1;
-	localparam integer irq_buserror = 2;
+        localparam integer irq_timer = 0;
+        localparam integer irq_ebreak = 1;
+        localparam integer irq_buserror = 2;
 
-	localparam integer xreg_count   = ENABLE_REGS_16_31 ? 32 : 16;
+        localparam integer xreg_count   = ENABLE_REGS_16_31 ? 32 : 16;
         localparam integer xreg_bits    = $clog2(xreg_count);
         localparam integer xreg_banks   = USER_CONTEXTS + 1;
         localparam integer regfile_size = xreg_count * xreg_banks;
@@ -247,7 +260,10 @@ module picorv32 #(
         localparam [63:0] count_instr_mask = (1'b1 << COUNTER_INSTR_WIDTH) - 1'b1;
 
 	reg [31:0] reg_pc, reg_next_pc, reg_mepc, reg_op1, reg_op2, reg_out;
-	reg [4:0] reg_sh;
+	reg [4:0]  reg_sh;
+
+	reg  [3:0] reg_meinfo;
+	wire [2:0] reg_meinfo_mask = 4'b1101;
 
 	reg [31:0] next_insn_opcode;
 	reg [31:0] dbg_insn_opcode;
@@ -267,15 +283,16 @@ module picorv32 #(
 	wire [31:0] next_pc;
 
 	reg irq_delay;
-	reg irq_active;
-	reg [31:0] irq_mask;
-	reg [31:0] irq_pending;
-	reg [31:0] timer;
-	reg [31:0] buserr_address;
-        wire [31:0] active_irqs = irq_pending & ~irq_mask;
+        reg irq_active;
+        reg load_lock;
+        reg [31:0]  irq_mask;
+        reg [31:0] irq_pending;
+        reg [31:0] timer;
+        reg [31:0] buserr_address;
+	wire [31:0] active_irqs = irq_pending & (~irq_mask | 32'h6);
 
 `ifndef PICORV32_REGS
-	reg [31:0] cpuregs [0:regfile_size-1];
+        reg [31:0] cpuregs [0:regfile_size-1];
 
 	integer i;
 	initial begin
@@ -518,13 +535,13 @@ module picorv32 #(
 							mem_rdata_q[14:12] <= 3'b000;
 							mem_rdata_q[31:20] <= {2'b0, mem_rdata_latched[10:7], mem_rdata_latched[12:11], mem_rdata_latched[5], mem_rdata_latched[6], 2'b00};
 						end
-						3'b010: begin // C.LW
+						3'b010, 3'b011: begin // C.LW, C.LW.L
 							mem_rdata_q[31:20] <= {5'b0, mem_rdata_latched[5], mem_rdata_latched[12:10], mem_rdata_latched[6], 2'b00};
-							mem_rdata_q[14:12] <= 3'b 010;
+						   mem_rdata_q[14:12] <= { 2'b01, mem_rdata_latched[13] };
 						end
-						3'b 110: begin // C.SW
+						3'b 110, 3'b111: begin // C.SW, C.SW.U
 							{mem_rdata_q[31:25], mem_rdata_q[11:7]} <= {5'b0, mem_rdata_latched[5], mem_rdata_latched[12:10], mem_rdata_latched[6], 2'b00};
-							mem_rdata_q[14:12] <= 3'b 010;
+						   mem_rdata_q[14:12] <= { 2'b01, mem_rdata_latched[13] };
 						end
 					endcase
 				end
@@ -588,9 +605,9 @@ module picorv32 #(
 							mem_rdata_q[31:25] <= 7'b0000000;
 							mem_rdata_q[14:12] <= 3'b 001;
 						end
-						3'b010: begin // C.LWSP
+						3'b010, 3'b011: begin // C.LWSP, C.LWSP.L
 							mem_rdata_q[31:20] <= {4'b0, mem_rdata_latched[3:2], mem_rdata_latched[12], mem_rdata_latched[6:4], 2'b00};
-							mem_rdata_q[14:12] <= 3'b 010;
+							mem_rdata_q[14:12] <= { 2'b01, mem_rdata_latched[13] };
 						end
 						3'b100: begin
 							if (mem_rdata_latched[12] == 0 && mem_rdata_latched[6:2] == 0) begin // C.JR
@@ -610,9 +627,9 @@ module picorv32 #(
 								mem_rdata_q[31:25] <= 7'b0000000;
 							end
 						end
-						3'b110: begin // C.SWSP
+						3'b110, 3'b111: begin // C.SWSP, C.SWSP.U
 							{mem_rdata_q[31:25], mem_rdata_q[11:7]} <= {4'b0, mem_rdata_latched[8:7], mem_rdata_latched[12:9], 2'b00};
-							mem_rdata_q[14:12] <= 3'b 010;
+							mem_rdata_q[14:12] <= { 2'b01, mem_rdata_latched[13] };
 						end
 					endcase
 				end
@@ -728,7 +745,7 @@ module picorv32 #(
 	reg instr_csrr, instr_ecall_ebreak;
 	reg instr_addqxi, instr_addxqi, instr_retirq, instr_maskirq, instr_waitirq, instr_timer, instr_pollirq;
 	reg instr_ctz;
-        reg [2:0] instr_funct2;
+        reg [2:0] instr_funct3;
 
 	wire instr_trap;
 
@@ -1105,23 +1122,22 @@ module picorv32 #(
 				endcase
 			end
 
-		        // hpa: IRQ bank switch support
+		        // hpa: user context support
 		        is_addqxi <= 0;
 
-		        if (ENABLE_IRQ && ENABLE_IRQ_QREGS)
-			  begin
-			     decoded_rd [regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
-			     decoded_rs1[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
-			     decoded_rs2[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
+		        if (USER_CONTEXTS > 0) begin
+			   decoded_rd [regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
+			   decoded_rs1[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
+			   decoded_rs2[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
 
-			     // addqxi, addxqi
-			     if (mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[14:13] == 2'b01) begin
-				is_addqxi <= 1; // True for both addqxi and addxqi
+			   // addqxi, addxqi
+			   if (mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[14:13] == 2'b01) begin
+			      is_addqxi <= 1; // True for both addqxi and addxqi
 
-				decoded_rd [regfile_bits-1:xreg_bits] <= ~mem_rdata_latched[12] ? 0 : user_context;
-				decoded_rs1[regfile_bits-1:xreg_bits] <=  mem_rdata_latched[12] ? 0 : user_context;
-			     end
-			  end
+			      decoded_rd [regfile_bits-1:xreg_bits] <= ~mem_rdata_latched[12] ? 0 : user_context;
+			      decoded_rs1[regfile_bits-1:xreg_bits] <=  mem_rdata_latched[12] ? 0 : user_context;
+			   end
+			end
 		end // if (mem_do_rinst && mem_done)
 
 		if (decoder_trigger && !decoder_pseudo_trigger) begin
@@ -1136,14 +1152,13 @@ module picorv32 #(
 
 			instr_lb    <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b000;
 			instr_lh    <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b001;
-			instr_lw    <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b010;
+			instr_lw    <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:13] == 2'b01; // Includes lw.l
 			instr_lbu   <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b100;
 			instr_lhu   <= is_lb_lh_lw_lbu_lhu && mem_rdata_q[14:12] == 3'b101;
 
 			instr_sb    <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b000;
 			instr_sh    <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b001;
-			instr_sw    <= is_sb_sh_sw && mem_rdata_q[14:12] == 3'b010;
-
+			instr_sw    <= is_sb_sh_sw && mem_rdata_q[14:12] == 2'b01; // Includes sw.u
 			instr_addi  <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b000;
 			instr_slti  <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b010;
 			instr_sltiu <= is_alu_reg_imm && mem_rdata_q[14:12] == 3'b011;
@@ -1181,8 +1196,8 @@ module picorv32 #(
 
 
 			// instr_addqxi includes addxqi; instr_addxqi is only used for debug
-		        instr_addqxi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:13] == 2'b01  && ENABLE_IRQ && ENABLE_IRQ_QREGS;
-		        instr_addxqi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b011 && ENABLE_IRQ && ENABLE_IRQ_QREGS;
+		   instr_addqxi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:13] == 2'b01  && (USER_CONTEXTS > 0);
+		   instr_addxqi  <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b011 && (USER_CONTEXTS > 0);
 
 			is_slli_srli_srai <= is_alu_reg_imm && |{
 				mem_rdata_q[14:12] == 3'b001 && mem_rdata_q[31:25] == 7'b0000000,
@@ -1222,7 +1237,7 @@ module picorv32 #(
 					decoded_imm <= $signed(mem_rdata_q[31:20]);
 			endcase // case (1'b1)
 
-		        instr_funct2 <= mem_rdata_q[14:12];
+		        instr_funct3 <= mem_rdata_q[14:12];
 		end
 
 		if (!resetn) begin
@@ -1310,13 +1325,13 @@ module picorv32 #(
 	reg latched_is_lu;
 	reg latched_is_lh;
 	reg latched_is_lb;
-	reg [regfile_bits-1:0] latched_rd;
+        reg [regfile_bits-1:0] latched_rd;
 
-	reg [31:0] current_pc;
-	assign next_pc = latched_store && latched_branch ? reg_out & ~1 : reg_next_pc;
+        reg [31:0] current_pc;
+        assign next_pc = latched_store && latched_branch ? reg_out & ~1 : reg_next_pc;
 
-	reg [3:0] pcpi_timeout_counter;
-	reg pcpi_timeout;
+        reg [3:0] pcpi_timeout_counter;
+        reg pcpi_timeout;
 
 	reg [31:0] next_irq_pending;
 	reg do_waitirq;
@@ -1501,17 +1516,17 @@ module picorv32 #(
 			cpuregs_rs1 = decoded_rs & xreg_mask ? cpuregs_rdata1 : 0;
 			cpuregs_rs2 = cpuregs_rs1;
 		end
-	end
+        end
 `endif
 
-	assign launch_next_insn = cpu_state == cpu_state_fetch &&
-				  decoder_trigger &&
-				  (!ENABLE_IRQ || irq_delay || irq_active || !active_irqs);
+        assign launch_next_insn = cpu_state == cpu_state_fetch &&
+                                  decoder_trigger &&
+                                  (!ENABLE_IRQ || irq_delay || load_lock || irq_active || !active_irqs);
 
-        wire [31:0] csrr_src = instr_funct2[2] ? { 29'b0, decoded_rs1[4:0] } : cpuregs_rs1;
+        wire [31:0] csrr_src = instr_funct3[2] ? { 29'b0, decoded_rs1[4:0] } : cpuregs_rs1;
 
-	always @(posedge clk) begin
-		trap <= 0;
+        always @(posedge clk) begin
+                trap <= 0;
 		reg_sh <= 'bx;
 		reg_out <= 'bx;
 		set_mem_do_rinst = 0;
@@ -1537,13 +1552,13 @@ module picorv32 #(
 					pcpi_timeout_counter <= pcpi_timeout_counter - 1;
 			end else
 				pcpi_timeout_counter <= ~0;
-			pcpi_timeout <= !pcpi_timeout_counter;
-		end
+                        pcpi_timeout <= !pcpi_timeout_counter;
+                end
 
-		next_irq_pending = ENABLE_IRQ ? (irq_pending & LATCHED_IRQ & ~MASKED_IRQ) : 'bx;
+                next_irq_pending = ENABLE_IRQ ? (irq_pending & LATCHED_IRQ & ~MASKED_IRQ) : 'bx;
 
-		if (ENABLE_IRQ && ENABLE_IRQ_TIMER && timer) begin
-			timer <= timer - 1;
+                if (ENABLE_IRQ && ENABLE_IRQ_TIMER && timer) begin
+                        timer <= timer - 1;
 		end
 
 		decoder_trigger <= mem_do_rinst && mem_done;
@@ -1559,13 +1574,14 @@ module picorv32 #(
 	        if (!resetn)
 			count_cycle <= 0;
 	        else
-			count_cycle <= (count_cycle + 1'b1) & count_cycle_mask;
-
-		if (!resetn) begin
-			reg_pc <= progaddr_reset;
-			reg_next_pc <= progaddr_reset;
-		        reg_mepc <= 0;
-		        count_instr <= 0;
+                        count_cycle <= (count_cycle + 1'b1) & count_cycle_mask;
+
+                if (!resetn) begin
+                        reg_pc <= progaddr_reset;
+                        reg_next_pc <= progaddr_reset;
+                        reg_mepc <= 0;
+                        reg_meinfo <= 0;
+                        count_instr <= 0;
 			latched_store <= 0;
 			latched_stalu <= 0;
 			latched_branch <= 0;
@@ -1574,13 +1590,14 @@ module picorv32 #(
 			latched_is_lu <= 0;
 			latched_is_lh <= 0;
 			latched_is_lb <= 0;
-		        user_context <= USER_CONTEXTS; // On reset highest supported context
-			pcpi_valid <= 0;
-			pcpi_timeout <= 0;
-			irq_active <= 0;
-			irq_delay <= 0;
-			irq_mask <= ~0;
-			next_irq_pending = 0;
+                        user_context <= USER_CONTEXTS; // On reset highest supported context
+                        pcpi_valid <= 0;
+                        pcpi_timeout <= 0;
+                        irq_active <= 0; // XXX: really should change this (reset into IRQ context)
+                        load_lock <= 0;
+                        irq_delay <= 0;
+                        irq_mask <= ~0;
+                        next_irq_pending = 0;
 			eoi <= 0;
 		        timer <= 0;
 	                do_waitirq <= 0;
@@ -1599,34 +1616,34 @@ module picorv32 #(
 
 			cpu_state_fetch: begin
 			        eoi <= 0;
-			        mem_do_rinst <= !decoder_trigger && !do_waitirq && !halt;
-				mem_wordsize <= 0;
-
-				current_pc = reg_next_pc;
-
-				(* parallel_case *)
-				case (1'b1)
-					latched_branch: begin
-						current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) & ~1 : reg_next_pc;
-						`debug($display("ST_RD:  %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + (latched_compr ? 2 : 4), current_pc);)
+                                mem_do_rinst <= !decoder_trigger && !do_waitirq && !halt;
+                                mem_wordsize <= 0;
+
+                                current_pc = reg_next_pc;
+
+                                (* parallel_case *)
+                                case (1'b1)
+                                        latched_branch: begin
+                                                current_pc = latched_store ? (latched_stalu ? alu_out_q : reg_out) & ~1 : reg_next_pc;
+                                                `debug($display("ST_RD:  %2d 0x%08x, BRANCH 0x%08x", latched_rd, reg_pc + (latched_compr ? 2 : 4), current_pc);)
+                                        end
+                                        latched_store && !latched_branch && !latched_irq: begin
+                                                `debug($display("ST_RD:  %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
 					end
-					latched_store && !latched_branch && !latched_irq: begin
-						`debug($display("ST_RD:  %2d 0x%08x", latched_rd, latched_stalu ? alu_out_q : reg_out);)
-					end
-				endcase
-
-			        if (latched_irq) begin
-					current_pc = progaddr_irq & ~1;
-				        mem_do_rinst  <= 1'b1;
-				end
-				if (ENABLE_TRACE && latched_trace) begin
-					latched_trace <= 0;
-					trace_valid <= 1;
-					if (latched_branch)
-						trace_data <= (irq_active ? TRACE_IRQ : 0) | TRACE_BRANCH | (current_pc & 32'hfffffffe);
-					else
-						trace_data <= (irq_active ? TRACE_IRQ : 0) | (latched_stalu ? alu_out_q : reg_out);
-				end
+                                endcase
+
+                                if (latched_irq) begin
+                                        current_pc = progaddr_irq & ~1;
+                                        mem_do_rinst  <= 1'b1;
+                                end
+                                if (ENABLE_TRACE && latched_trace) begin
+                                        latched_trace <= 0;
+                                        trace_valid <= 1;
+                                        if (latched_branch)
+                                                trace_data <= (irq_active ? TRACE_IRQ : 0) | TRACE_BRANCH | (current_pc & 32'hfffffffe);
+                                        else
+                                                trace_data <= (irq_active ? TRACE_IRQ : 0) | (latched_stalu ? alu_out_q : reg_out);
+                                end
 
 				reg_pc <= current_pc;
 				reg_next_pc <= current_pc;
@@ -1641,47 +1658,50 @@ module picorv32 #(
 				latched_rd <= decoded_rd;
 				latched_compr <= compressed_instr;
 
-			        if (halt && !latched_irq) begin
-				        // Do nothing, but allow an already started instruction or IRQ to complete
-				end else
-				if (ENABLE_IRQ && do_waitirq &&
-				    (&(irq_pending | ~reg_op1) || |(irq_pending & reg_op2))) begin
-				      // Waited-for interrupt: wake up and exit waitirq
-				      // If this interrupt is enabled, it will be taken on the next cycle
-				      latched_store   <= 1;
-				      reg_out         <= irq_pending;
-				      reg_next_pc     <= current_pc + (compressed_instr ? 2 : 4);
-				      do_waitirq      <= 0;
-				 end else
-				 if (ENABLE_IRQ && decoder_trigger && !irq_active && !irq_delay && |active_irqs) begin
-				        irq_active    <= 1'b1;
-				        latched_irq   <= 1'b1;
-				        latched_rd    <= MASK_IRQ_REG;
-				        reg_out       <= active_irqs;
-				        latched_store <= 1'b1;
-				        eoi           <= active_irqs;
-					next_irq_pending = next_irq_pending & irq_mask;
-				        reg_mepc      <= reg_next_pc | latched_compr;
-				        do_waitirq    <= 0; // An unwaited-for interrupt can break waitirq
-				end else
-				if (ENABLE_IRQ && do_waitirq) begin
-					// Actually waiting for an IRQ...
-					do_waitirq    <= 1; // Keep waiting...
-				end else
-				if (decoder_trigger) begin
-					`debug($display("-- %-0t pc: 0x%08x irq: %x", $time, current_pc, irq_active);)
-					irq_delay <= irq_active;
-					reg_next_pc <= current_pc + (compressed_instr ? 2 : 4);
-					if (ENABLE_TRACE)
-						latched_trace <= 1;
-				        count_instr <= (count_instr + 1'b1) & count_instr_mask;
-					if (instr_jal) begin
-						mem_do_rinst <= 1;
-						reg_next_pc <= current_pc + decoded_imm_j;
-						latched_branch <= 1;
-					end else begin
-						mem_do_rinst <= 0;
-						mem_do_prefetch <= !instr_jalr && !instr_retirq;
+                                if (halt && !latched_irq) begin
+                                        // Do nothing, but allow an already started instruction or IRQ to complete
+                                end else
+                                if (ENABLE_IRQ && do_waitirq &&
+                                    (&(irq_pending | ~reg_op1) || |(irq_pending & reg_op2))) begin
+                                      // Waited-for interrupt: wake up and exit waitirq
+                                      // If this interrupt is enabled, it will be taken on the next cycle
+                                      latched_store   <= 1;
+                                      reg_out         <= irq_pending;
+                                      reg_next_pc     <= current_pc + (compressed_instr ? 2 : 4);
+                                      do_waitirq      <= 0;
+                                 end else
+                                 if (ENABLE_IRQ && decoder_trigger && !irq_delay &&
+				     |(active_irqs & (!irq_active && !load_lock ? ~32'b0 : 32'h6))) begin
+                                        irq_active    <= 1'b1;
+                                        latched_irq   <= 1'b1;
+                                        latched_rd    <= MASK_IRQ_REG;
+                                        reg_out       <= active_irqs;
+                                        latched_store <= 1'b1;
+                                        eoi           <= active_irqs;
+                                        next_irq_pending = next_irq_pending & irq_mask;
+                                        reg_mepc      <= reg_next_pc;
+                                        reg_meinfo    <= { irq_active, load_lock, 1'b0, ~latched_compr };
+                                        load_lock     <= 0;
+                                        do_waitirq    <= 0; // An unwaited-for interrupt can break waitirq
+                                end else
+                                if (ENABLE_IRQ && do_waitirq) begin
+                                        // Actually waiting for an IRQ...
+                                        do_waitirq    <= 1; // Keep waiting...
+                                end else
+                                if (decoder_trigger) begin
+                                        `debug($display("-- %-0t pc: 0x%08x irq: %x", $time, current_pc, irq_active);)
+                                        irq_delay <= irq_active;
+                                        reg_next_pc <= current_pc + (compressed_instr ? 2 : 4);
+                                        if (ENABLE_TRACE)
+                                                latched_trace <= 1;
+                                        count_instr <= (count_instr + 1'b1) & count_instr_mask;
+                                        if (instr_jal) begin
+                                                mem_do_rinst <= 1;
+                                                reg_next_pc <= current_pc + decoded_imm_j;
+                                                latched_branch <= 1;
+                                        end else begin
+                                                mem_do_rinst <= 0;
+                                                mem_do_prefetch <= !instr_jalr && !instr_retirq;
 						cpu_state <= cpu_state_ld_rs1;
 					end
 				end
@@ -1709,28 +1729,28 @@ module picorv32 #(
 								if (pcpi_int_ready) begin
 									mem_do_rinst <= 1;
 									pcpi_valid <= 0;
-									reg_out <= pcpi_int_rd;
-									latched_store <= pcpi_int_wr;
-									cpu_state <= cpu_state_fetch;
-								end else
-								if (CATCH_ILLINSN && (pcpi_timeout || instr_ecall_ebreak)) begin
-									pcpi_valid <= 0;
-									`debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
-									if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
-										next_irq_pending[irq_ebreak] = 1;
-										cpu_state <= cpu_state_fetch;
-									end else
+                                                                        reg_out <= pcpi_int_rd;
+                                                                        latched_store <= pcpi_int_wr;
+                                                                        cpu_state <= cpu_state_fetch;
+                                                                end else
+                                                                if (CATCH_ILLINSN && (pcpi_timeout || instr_ecall_ebreak)) begin
+                                                                        pcpi_valid <= 0;
+                                                                        `debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
+                                                                        if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
+                                                                                next_irq_pending[irq_ebreak] = 1;
+                                                                                cpu_state <= cpu_state_fetch;
+                                                                        end else
 										cpu_state <= cpu_state_trap;
 								end
 							end else begin
 								cpu_state <= cpu_state_ld_rs2;
-							end
-						end else begin
-							`debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
-							if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
-								next_irq_pending[irq_ebreak] = 1;
-								cpu_state <= cpu_state_fetch;
-							end else
+                                                        end
+                                                end else begin
+                                                        `debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
+                                                        if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
+                                                                next_irq_pending[irq_ebreak] = 1;
+                                                                cpu_state <= cpu_state_fetch;
+                                                        end else
 								cpu_state <= cpu_state_trap;
 						end
 					end
@@ -1749,26 +1769,40 @@ module picorv32 #(
 						        12'h341:		 // mepc
 							  if (ENABLE_IRQ) reg_out <= reg_mepc;
 							12'h343:		 // mtval
-							  if (CATCH_MISALIGN)    reg_out <= buserr_address;
-						        12'h7f0:                 // user_context
-							  if (USER_CONTEXTS > 0) reg_out <= user_context;
-							default:
-							  reg_out <= 32'bx;
-						endcase // case (decoded_imm[11:0])
-
-					        // Bitops not supported ATM, treat as readonly
-					        if (~instr_funct2[1])
-						  case (decoded_imm[11:0])
-						    12'h341: if (ENABLE_IRQ) begin // mepc
-						       reg_mepc <= csrr_src;
-						    end
-						    12'h7f0:
-						      if (USER_CONTEXTS > 0) begin
-							 user_context <= csrr_src;
-							 irq_active   <= 1'b1;
-						      end
-						    default: begin
-						       // Do nothing
+                                                          if (CATCH_MISALIGN)    reg_out <= buserr_address;
+                                                        12'h7f0:                 // user_context
+                                                          if (USER_CONTEXTS > 0) reg_out <= user_context;
+                                                        12'h7f1:                 // meinfo
+                                                          if (ENABLE_IRQ) reg_out <= reg_meinfo;
+                                                        default:
+                                                          reg_out <= 32'bx;
+                                                endcase // case (decoded_imm[11:0])
+
+                                                // Bitops not supported ATM, treat as readonly
+                                                if (~instr_funct3[1])
+                                                  case (decoded_imm[11:0])
+                                                    12'h341: // mepc
+                                                      if (ENABLE_IRQ) begin
+                                                         reg_mepc <= csrr_src & ~1;
+                                                         irq_active <= 1'b1;
+                                                         load_lock  <= 1'b0;
+                                                         reg_meinfo <= { irq_active, load_lock, 1'b0, ~latched_compr };
+                                                      end
+                                                    12'h7f0: // user_context
+                                                      if (USER_CONTEXTS > 0) begin
+                                                         user_context <= csrr_src;
+                                                         if (ENABLE_IRQ) begin
+                                                            irq_active   <= 1'b1;
+                                                            load_lock    <= 1'b0;
+                                                            reg_meinfo <= { irq_active, load_lock, 1'b0, ~latched_compr };
+                                                         end
+                                                      end
+                                                    12'h7f1: // meinfo
+                                                      if (ENABLE_IRQ) begin
+                                                         reg_meinfo <= csrr_src & reg_meinfo_mask;
+                                                      end
+                                                    default: begin
+                                                       // Do nothing
 						    end
 						  endcase // case (decoded_imm[11:0])
 
@@ -1785,26 +1819,29 @@ module picorv32 #(
 						cpu_state <= cpu_state_exec;
 					end
 					ENABLE_IRQ && instr_retirq: begin
-						irq_active <= 0;
-						latched_branch <= 1;
-						latched_store <= 1;
-					        `debug($display("MRET: 0x%08x", reg_mepc);)
-					        reg_out <= reg_mepc & ~1;
-					        dbg_rs1val <= reg_mepc;
-						dbg_rs1val_valid <= 1;
-						cpu_state <= cpu_state_fetch;
-					end
-					ENABLE_IRQ && instr_maskirq: begin
-						latched_store <= 1;
-						reg_out <= irq_mask;
-						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
-						// hpa: allow rs2 to specify bits to be preserved
-						// XXX: support !ENABLE REGS_DUALPORT
-					        `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
-						irq_mask <= ((irq_mask & cpuregs_rs2) ^ cpuregs_rs1) | MASKED_IRQ;
-						dbg_rs1val <= cpuregs_rs1;
-						dbg_rs1val_valid <= 1;
-						dbg_rs2val <= cpuregs_rs2;
+                                                latched_branch <= 1;
+                                                latched_store <= 1;
+                                                `debug($display("MRET: 0x%08x %x", reg_mepc, reg_meinfo);)
+                                                reg_out <= reg_mepc;
+                                                load_lock <= reg_meinfo[2]; // Restore load lock
+						irq_active <= reg_meinfo[3];
+                                                dbg_rs1val <= reg_mepc;
+                                                dbg_rs1val_valid <= 1;
+						dbg_rs2val <= reg_meinfo;
+					        dbg_rs2val_valid <= 1;
+                                                cpu_state <= cpu_state_fetch;
+                                        end
+                                        ENABLE_IRQ && instr_maskirq: begin
+                                                latched_store <= 1;
+                                                reg_out <= irq_mask;
+                                                `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+                                                // hpa: allow rs2 to specify bits to be preserved
+                                                // XXX: support !ENABLE REGS_DUALPORT
+                                                `debug($display("LD_RS2: %2d 0x%08x", decoded_rs2, cpuregs_rs2);)
+                                                irq_mask <= ((irq_mask & cpuregs_rs2) ^ cpuregs_rs1) | MASKED_IRQ;
+                                                dbg_rs1val <= cpuregs_rs1;
+                                                dbg_rs1val_valid <= 1;
+                                                dbg_rs2val <= cpuregs_rs2;
 						dbg_rs2val_valid <= 1;
 						cpu_state <= cpu_state_fetch;
 					end // case: ENABLE_IRQ && instr_maskirq
@@ -1829,13 +1866,13 @@ module picorv32 #(
 						cpu_state <= cpu_state_fetch;
 					end
 				        ENABLE_IRQ && instr_pollirq: begin
-						latched_store <= 1;
-					        reg_out <= (active_irqs & ~cpuregs_rs1) | cpuregs_rs2;
-					        eoi <= active_irqs & ~cpuregs_rs1;
-					        next_irq_pending = next_irq_pending & (irq_mask | cpuregs_rs1);
-						dbg_rs1val <= cpuregs_rs1;
-						dbg_rs1val_valid <= 1;
-						dbg_rs2val <= cpuregs_rs2;
+                                                latched_store <= 1;
+                                                reg_out <= (active_irqs & ~cpuregs_rs1) | cpuregs_rs2;
+                                                eoi <= active_irqs & ~cpuregs_rs1;
+                                                next_irq_pending = next_irq_pending & (irq_mask | cpuregs_rs1);
+                                                dbg_rs1val <= cpuregs_rs1;
+                                                dbg_rs1val_valid <= 1;
+                                                dbg_rs2val <= cpuregs_rs2;
 						dbg_rs2val_valid <= 1;
 						cpu_state <= cpu_state_fetch;
 					end
@@ -1843,12 +1880,13 @@ module picorv32 #(
 						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
 						reg_op1 <= cpuregs_rs1;
 						dbg_rs1val <= cpuregs_rs1;
-						dbg_rs1val_valid <= 1;
-						cpu_state <= cpu_state_ldmem;
-						mem_do_rinst <= 1;
-					end
-					is_slli_srli_srai && !BARREL_SHIFTER: begin
-						`debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
+                                                dbg_rs1val_valid <= 1;
+                                                cpu_state <= cpu_state_ldmem;
+                                                mem_do_rinst <= 1;
+                                                load_lock <= load_lock | (instr_lw & instr_funct3[0]);
+                                        end
+                                        is_slli_srli_srai && !BARREL_SHIFTER: begin
+                                                `debug($display("LD_RS1: %2d 0x%08x", decoded_rs1, cpuregs_rs1);)
 						reg_op1 <= cpuregs_rs1;
 						dbg_rs1val <= cpuregs_rs1;
 						dbg_rs1val_valid <= 1;
@@ -1880,12 +1918,13 @@ module picorv32 #(
 							dbg_rs2val_valid <= 1;
 							(* parallel_case *)
 							case (1'b1)
-								is_sb_sh_sw: begin
-									cpu_state <= cpu_state_stmem;
-									mem_do_rinst <= 1;
-								end
-								is_sll_srl_sra && !BARREL_SHIFTER: begin
-									cpu_state <= cpu_state_shift;
+                                                                is_sb_sh_sw: begin
+                                                                        cpu_state <= cpu_state_stmem;
+                                                                        mem_do_rinst <= 1;
+                                                                        load_lock <= load_lock & ~(instr_sw & instr_funct3[0]);
+                                                                end
+                                                                is_sll_srl_sra && !BARREL_SHIFTER: begin
+                                                                        cpu_state <= cpu_state_shift;
 								end
 								default: begin
 									if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
@@ -1920,22 +1959,23 @@ module picorv32 #(
 							latched_store <= pcpi_int_wr;
 							cpu_state <= cpu_state_fetch;
 						end else
-						if (CATCH_ILLINSN && (pcpi_timeout || instr_ecall_ebreak)) begin
-							pcpi_valid <= 0;
-							`debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
-							if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
-								next_irq_pending[irq_ebreak] = 1;
-								cpu_state <= cpu_state_fetch;
-							end else
+                                                if (CATCH_ILLINSN && (pcpi_timeout || instr_ecall_ebreak)) begin
+                                                        pcpi_valid <= 0;
+                                                        `debug($display("EBREAK OR UNSUPPORTED INSN AT 0x%08x", reg_pc);)
+                                                        if (ENABLE_IRQ && !irq_mask[irq_ebreak] && !irq_active) begin
+                                                                next_irq_pending[irq_ebreak] = 1;
+                                                                cpu_state <= cpu_state_fetch;
+                                                        end else
 								cpu_state <= cpu_state_trap;
 						end
 					end
-					is_sb_sh_sw: begin
-						cpu_state <= cpu_state_stmem;
-						mem_do_rinst <= 1;
-					end
-					is_sll_srl_sra && !BARREL_SHIFTER: begin
-						cpu_state <= cpu_state_shift;
+                                        is_sb_sh_sw: begin
+                                                cpu_state <= cpu_state_stmem;
+                                                mem_do_rinst <= 1;
+                                                load_lock <= load_lock & ~(instr_sw & instr_funct3[0]);
+                                        end
+                                        is_sll_srl_sra && !BARREL_SHIFTER: begin
+                                                cpu_state <= cpu_state_shift;
 					end
 					default: begin
 						if (TWO_CYCLE_ALU || (TWO_CYCLE_COMPARE && is_beq_bne_blt_bge_bltu_bgeu)) begin
@@ -2065,35 +2105,35 @@ module picorv32 #(
 					next_irq_pending[irq_timer] = 1;
 		end
 
-		if (CATCH_MISALIGN && resetn && (mem_do_rdata || mem_do_wdata)) begin
-			if (mem_wordsize == 0 && reg_op1[1:0] != 0) begin
-				`debug($display("MISALIGNED WORD: 0x%08x", reg_op1);)
-				if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
-					buserr_address <= reg_op1;
-					next_irq_pending[irq_buserror] = 1;
-				end else
-					cpu_state <= cpu_state_trap;
-			end
-			if (mem_wordsize == 1 && reg_op1[0] != 0) begin
-				`debug($display("MISALIGNED HALFWORD: 0x%08x", reg_op1);)
-				if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
-					buserr_address <= reg_op1;
-					next_irq_pending[irq_buserror] = 1;
-				end else
+                if (CATCH_MISALIGN && resetn && (mem_do_rdata || mem_do_wdata)) begin
+                        if (mem_wordsize == 0 && reg_op1[1:0] != 0) begin
+                                `debug($display("MISALIGNED WORD: 0x%08x", reg_op1);)
+                                if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
+                                        buserr_address <= reg_op1;
+                                        next_irq_pending[irq_buserror] = 1;
+                                end else
 					cpu_state <= cpu_state_trap;
-			end
-		end
-		if (CATCH_MISALIGN && resetn && mem_do_rinst && (COMPRESSED_ISA ? reg_pc[0] : |reg_pc[1:0])) begin
-			`debug($display("MISALIGNED INSTRUCTION: 0x%08x", reg_pc);)
-			if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
-				buserr_address <= reg_pc;
-				next_irq_pending[irq_buserror] = 1;
-			end else
-				cpu_state <= cpu_state_trap;
-		end
-		if (!CATCH_ILLINSN && decoder_trigger_q && !decoder_pseudo_trigger_q && instr_ecall_ebreak) begin
-			cpu_state <= cpu_state_trap;
-		end
+                        end
+                        if (mem_wordsize == 1 && reg_op1[0] != 0) begin
+                                `debug($display("MISALIGNED HALFWORD: 0x%08x", reg_op1);)
+                                if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
+                                        buserr_address <= reg_op1;
+                                        next_irq_pending[irq_buserror] = 1;
+                                end else
+                                        cpu_state <= cpu_state_trap;
+                        end
+                end
+                if (CATCH_MISALIGN && resetn && mem_do_rinst && (COMPRESSED_ISA ? reg_pc[0] : |reg_pc[1:0])) begin
+                        `debug($display("MISALIGNED INSTRUCTION: 0x%08x", reg_pc);)
+                        if (ENABLE_IRQ && !irq_mask[irq_buserror] && !irq_active) begin
+                                buserr_address <= reg_pc;
+                                next_irq_pending[irq_buserror] = 1;
+                        end else
+                                cpu_state <= cpu_state_trap;
+                end
+                if (!CATCH_ILLINSN && decoder_trigger_q && !decoder_pseudo_trigger_q && instr_ecall_ebreak) begin
+                        cpu_state <= cpu_state_trap;
+                end
 
 		if (!resetn || mem_done) begin
 			mem_do_prefetch <= 0;
@@ -2106,22 +2146,22 @@ module picorv32 #(
 			mem_do_rinst <= 1;
 		if (set_mem_do_rdata)
 			mem_do_rdata <= 1;
-		if (set_mem_do_wdata)
-			mem_do_wdata <= 1;
-
-		irq_pending <= next_irq_pending & ~MASKED_IRQ;
-
-		if (!CATCH_MISALIGN) begin
-			if (COMPRESSED_ISA) begin
-				reg_pc[0] <= 0;
-				reg_next_pc[0] <= 0;
-			end else begin
-				reg_pc[1:0] <= 0;
-				reg_next_pc[1:0] <= 0;
-			end
-		end
-		current_pc = 'bx;
-	end
+                if (set_mem_do_wdata)
+                        mem_do_wdata <= 1;
+
+                irq_pending <= next_irq_pending & ~MASKED_IRQ;
+
+                if (!CATCH_MISALIGN) begin
+                        if (COMPRESSED_ISA) begin
+                                reg_pc[0] <= 0;
+                                reg_next_pc[0] <= 0;
+                        end else begin
+                                reg_pc[1:0] <= 0;
+                                reg_next_pc[1:0] <= 0;
+                     end
+                end
+                current_pc = 'bx;
+        end
 
 `ifdef RISCV_FORMAL
 	reg dbg_irq_call;

+ 3 - 1
iodevs.conf

@@ -13,7 +13,9 @@ our %consts = (
     'SDRAM_BITS' => 25,
     'SYS_MAGIC_MAX80' => unpack('V', 'MAX8'),
     'TIMER_SHIFT' => 5,		# 32 Hz
-    'TTY_CHANNELS' => 2
+    'TTY_CHANNELS' => 2,
+    'USER_CONTEXT_CSR' => 0x7f0,
+    'MEINFO_CSR' => 0x7f1
     );
 
 # I/O address definitions

+ 80 - 0
rv32/atomic.h

@@ -0,0 +1,80 @@
+/*
+ * Hacked atomics with hpa-specific lw.l/sw.u instructions
+ */
+
+#ifndef ATOMIC_H
+#define ATOMIC_H
+
+#include "compiler.h"
+
+typedef uint32_t atomic_t;
+
+#define lw_l(x) ({							\
+	    volatile atomic_t *__p = (volatile atomic_t *)(x);		\
+	    atomic_t __v;						\
+	    asm inline volatile("lw.l %0,%1"				\
+				: "=r" (__v), "+m" (*__p)		\
+				: : "memory");				\
+	    (typeof(*(x))) __v;						\
+	})
+
+static inline void _sw_u(volatile atomic_t *__p, atomic_t __v)
+{
+    asm inline volatile("sw.u %1,%0"
+			: "+m" (*__p) : "r" (__v)
+			: "memory");
+}
+
+#define sw_u(x,v) _sw_u((volatile atomic_t *)(x), (v))
+
+/* Simple barrier-enforcing accessors */
+
+static inline atomic_t atomic_get(const volatile atomic_t *_p)
+{
+    atomic_t _v;
+    asm inline volatile("lw %0,%1" : "=r" (_v) : "m" (*_p) : "memory");
+    return _v;
+}
+
+static inline void atomic_set(volatile atomic_t *_p, atomic_t _v)
+{
+    asm inline volatile("sw %1,%0" : "=m" (*_p) : "r" (_v) : "memory");
+}
+
+static inline atomic_t atomic_swap(volatile atomic_t *_p, atomic_t _v)
+{
+    atomic_t _o = lw_l(_p);
+    sw_u(_p, _v);
+    return _o;
+}
+
+/* These functions all do read-modify-write returning the old or new values */
+#define ATOMIC_OP2_RET(name, op, ret)					\
+    static inline atomic_t						\
+    atomic_ ## name (volatile atomic_t *_p, atomic_t _v)		\
+{									\
+    atomic_t _o = lw_l(_p);						\
+    atomic_t _n = _o op _v;						\
+    sw_u(_p, _n);							\
+    return ret;								\
+}
+
+#define ATOMIC_OP2(name, op)			\
+    ATOMIC_OP2_RET(name, op, _n)		\
+    ATOMIC_OP2_RET(xchg_ ## name, op, _o)
+
+ATOMIC_OP2(add, +)
+ATOMIC_OP2(sub, -)
+ATOMIC_OP2(and, &)
+ATOMIC_OP2(or, |)
+ATOMIC_OP2(xor, ^)
+
+static inline atomic_t atomic_xchg_and_xor(volatile atomic_t *_p, atomic_t _va, atomic_t _vx)
+{
+    atomic_t _o = lw_l(_p);
+    atomic_t _n = (_o & _va) ^ _vx;
+    sw_u(_p, _n);
+    return _o;
+}
+
+#endif /* ATOMIC_H */

+ 4 - 0
rv32/head.S

@@ -74,6 +74,10 @@ __start:
 	.option pop
 	addqxi gp,gp,0		// Set gp for interrupt code too
 
+	// Unblock fatal exceptions - only
+	li a0,~6
+	maskirq zero,a0,zero
+
 	// Clear esplink_head.magic as quickly as possible
 	sw zero,esplink_head,a0
 

+ 6 - 2
rv32/killed.c

@@ -11,12 +11,13 @@ static void __hot __text_hot killed(const char *how)
     /* Cannot use con_printf() here */
     const uint16_t *pcp;
     size_t mtval;
-    size_t mepc;
+    size_t mepc, meinfo, task;
     size_t pc;
 
     /* Try to move back to the previous instruction (wrong for jumps...) */
     asm volatile("csrr %0,mepc" : "=r" (mepc));
-    pc = mepc-4 + (mepc & 1);
+    asm volatile("csrr %0,%1" : "=r" (meinfo) : "i" (MEINFO_CSR));
+    pc = mepc - (((meinfo & 3) + 1) << 4);
     pcp = (const uint16_t *)pc;
 
     con_puts(hotstr("ERROR: "));
@@ -28,6 +29,9 @@ static void __hot __text_hot killed(const char *how)
     con_puts(hotstr(")\nBad address: 0x"));
     asm volatile("csrr %0,mtval" : "=r" (mtval));
     con_print_hex(mtval);
+    con_puts(hotstr(" task "));
+    asm volatile("csrr %0,user_context" : "=r" (task));
+    con_putc('0' + task);
     con_putc('\n');
 
     for (int i = 0; i < 32; i += 8) {

+ 1 - 1
tools/gnusrc/binutils

@@ -1 +1 @@
-Subproject commit 2c90c941d59acdc6d9948e4bdbfed6e9ef4fec3a
+Subproject commit 6a3e883fcf5ea9f42a349d9ffc7feafcb7c20faa