Browse Source

picorv32: add support for multiple user CPU contexts

Add a custom extension for multiple user CPU contexts, as a way to
leverage the fact that FPGA SRAM arrays are much bigger than they need
to be to hold even two copies of the register file (they are, in
fact, in increments of 8.)

Not yet used, but this can allow an RTOS context switch to be done in
as little as 4 instructions, or 3 if from within an interrupt handler.
H. Peter Anvin 2 years ago
parent
commit
1917765d2f

BIN
esp32/output/max80.ino.bin


+ 3 - 3
fpga/max80.qpf

@@ -19,15 +19,15 @@
 #
 # Quartus Prime
 # Version 21.1.0 Build 842 10/21/2021 SJ Lite Edition
-# Date created = 09:33:44  June 06, 2022
+# Date created = 11:53:07  June 06, 2022
 #
 # -------------------------------------------------------------------------- #
 
 QUARTUS_VERSION = "21.1"
-DATE = "09:33:44  June 06, 2022"
+DATE = "11:53:07  June 06, 2022"
 
 # Revisions
 
-PROJECT_REVISION = "v2"
 PROJECT_REVISION = "v1"
+PROJECT_REVISION = "v2"
 PROJECT_REVISION = "bypass"

+ 2 - 1
fpga/max80.sv

@@ -527,7 +527,8 @@ module max80
 	      .MASKED_IRQ ( irq_masked ),
 	      .LATCHED_IRQ ( 32'h0000_0007 ),
 	      .REGS_INIT_ZERO ( 1 ),
-	      .STACKADDR ( 1'b1 << SRAM_BITS )
+	      .STACKADDR ( 1'b1 << SRAM_BITS ),
+	      .USER_CONTEXTS ( 7 )
    ) cpu (
 	.clk ( sys_clk ),
 	.resetn ( rst_n ),

BIN
fpga/output/v1.fw


BIN
fpga/output/v1.jic


BIN
fpga/output/v1.rbf.gz


BIN
fpga/output/v1.rpd.gz


BIN
fpga/output/v1.sof


BIN
fpga/output/v1.svf.gz


BIN
fpga/output/v1.xsvf.gz


BIN
fpga/output/v2.fw


BIN
fpga/output/v2.jic


BIN
fpga/output/v2.rbf.gz


BIN
fpga/output/v2.rpd.gz


BIN
fpga/output/v2.sof


BIN
fpga/output/v2.svf.gz


BIN
fpga/output/v2.xsvf.gz


+ 84 - 54
fpga/picorv32.v

@@ -15,7 +15,7 @@
  *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
- *  Changes by hpa 2021:
+ *  Changes by hpa 2021-2022:
  *  - maskirq instruction takes a mask in rs2.
  *  - retirq opcode changed to mret; no functional change.
  *  - qregs replaced with a full register bank switch. In general,
@@ -27,9 +27,11 @@
  *  - PROGADDR_RESET and PROGADDR_IRQ changed to ports (allows external
  *    implementation of vectorized interrupts or fallback reset.)
  *  - maskirq, waitirq and timer require func3 == 3'b000.
- * -  add two masks to waitirq: an AND mask and an OR mask.
+ *  - add two masks to waitirq: an AND mask and an OR mask.
  *    waitirq exists if either all interrupts in the AND
  *    mask are pending or any interrupt in the OR mask is pending.
+ *  - multiple user (non-interrupt) register banks (tasks) now supported;
+ *
  */
 
 /* verilator lint_off WIDTH */
@@ -92,60 +94,61 @@ module picorv32 #(
 	parameter [ 0:0] ENABLE_FAST_MUL = 0,
 	parameter [ 0:0] ENABLE_DIV = 0,
 	parameter [ 0:0] ENABLE_IRQ = 0,
-	parameter [ 0:0] ENABLE_IRQ_QREGS = 1,
 	parameter [ 0:0] ENABLE_IRQ_TIMER = 1,
 	parameter [ 0:0] ENABLE_TRACE = 0,
 	parameter [ 0:0] REGS_INIT_ZERO = 0,
 	parameter [31:0] MASKED_IRQ = 32'h 0000_0000,
 	parameter [31:0] LATCHED_IRQ = 32'h ffff_ffff,
 	parameter [31:0] STACKADDR = 32'h ffff_ffff,
-	parameter [ 4:0] RA_IRQ_REG   = ENABLE_IRQ_QREGS ? 26 : 3,
-	parameter [ 4:0] MASK_IRQ_REG = ENABLE_IRQ_QREGS ? 27 : 4
+	parameter [ 4:0] RA_IRQ_REG    = ENABLE_IRQ_QREGS ? 26 : 3,
+	parameter [ 4:0] MASK_IRQ_REG  = ENABLE_IRQ_QREGS ? 27 : 4,
+        parameter        USER_CONTEXTS = 1,
+	parameter [ 0:0] ENABLE_IRQ_QREGS = USER_CONTEXTS > 0
 ) (
-	input 		  clk, resetn,
-	input 		  halt,
-	output reg 	  trap,
+	input		  clk, resetn,
+	input		  halt,
+	output reg	  trap,
 
-	input [31:0] 	  progaddr_reset,
-        input [31:0] 	  progaddr_irq,
+	input [31:0]	  progaddr_reset,
+        input [31:0]	  progaddr_irq,
 
-	output reg 	  mem_valid,
-	output reg 	  mem_instr,
-	input 		  mem_ready,
+	output reg	  mem_valid,
+	output reg	  mem_instr,
+	input		  mem_ready,
 
 	output reg [31:0] mem_addr,
 	output reg [31:0] mem_wdata,
 	output reg [ 3:0] mem_wstrb,
-	input [31:0] 	  mem_rdata,
+	input [31:0]	  mem_rdata,
 
 	// Look-Ahead Interface
-	output 		  mem_la_read,
-	output 		  mem_la_write,
-	output [31:0] 	  mem_la_addr,
+	output		  mem_la_read,
+	output		  mem_la_write,
+	output [31:0]	  mem_la_addr,
 	output reg [31:0] mem_la_wdata,
 	output reg [ 3:0] mem_la_wstrb,
 
 	// Pico Co-Processor Interface (PCPI)
-	output reg 	  pcpi_valid,
+	output reg	  pcpi_valid,
 	output reg [31:0] pcpi_insn,
-	output [31:0] 	  pcpi_rs1,
-	output [31:0] 	  pcpi_rs2,
-	input 		  pcpi_wr,
-	input [31:0] 	  pcpi_rd,
-	input 		  pcpi_wait,
-	input 		  pcpi_ready,
+	output [31:0]	  pcpi_rs1,
+	output [31:0]	  pcpi_rs2,
+	input		  pcpi_wr,
+	input [31:0]	  pcpi_rd,
+	input		  pcpi_wait,
+	input		  pcpi_ready,
 
 	// IRQ Interface
-	input [31:0] 	  irq,
+	input [31:0]	  irq,
 	output reg [31:0] eoi,
 
 `ifdef RISCV_FORMAL
-	output reg 	  rvfi_valid,
+	output reg	  rvfi_valid,
 	output reg [63:0] rvfi_order,
 	output reg [31:0] rvfi_insn,
-	output reg 	  rvfi_trap,
-	output reg 	  rvfi_halt,
-	output reg 	  rvfi_intr,
+	output reg	  rvfi_trap,
+	output reg	  rvfi_halt,
+	output reg	  rvfi_intr,
 	output reg [ 1:0] rvfi_mode,
 	output reg [ 1:0] rvfi_ixl,
 	output reg [ 4:0] rvfi_rs1_addr,
@@ -174,7 +177,7 @@ module picorv32 #(
 `endif
 
 	// Trace Interface
-	output reg 	  trace_valid,
+	output reg	  trace_valid,
 	output reg [35:0] trace_data
 );
 	localparam integer irq_timer = 0;
@@ -182,11 +185,18 @@ module picorv32 #(
 	localparam integer irq_buserror = 2;
 
 	localparam integer xreg_count   = ENABLE_REGS_16_31 ? 32 : 16;
-        localparam integer qreg_count   = (ENABLE_IRQ && ENABLE_IRQ_QREGS) ? xreg_count : 0;
-        localparam integer qreg_offset  = qreg_count; // 0 for no qregs
-        localparam integer regfile_size = xreg_count + qreg_count;
-        localparam integer regindex_bits = $clog2(regfile_size);
-	wire [regindex_bits-1:0] xreg_mask = xreg_count - 1;
+        localparam integer xreg_bits    = $clog2(xreg_count);
+        localparam integer xreg_banks   = USER_CONTEXTS + 1;
+        localparam integer context_bits = $clog2(xreg_banks);
+        localparam integer regfile_size = xreg_count * xreg_banks;
+        localparam integer regfile_bits = $clog2(regfile_size);
+	wire [regfile_bits-1:0] xreg_mask = xreg_count - 1;
+
+        reg [context_bits-1:0]	user_context;
+
+        wire [regfile_bits-1:0]		xreg_offset;
+        assign xreg_offset[regfile_bits-1:xreg_bits] = irq_active ? 0 : user_context;
+	assign xreg_offset[xreg_bits-1:0] = 0;
 
 	localparam WITH_PCPI = ENABLE_PCPI || ENABLE_MUL || ENABLE_FAST_MUL || ENABLE_DIV;
 
@@ -241,7 +251,7 @@ module picorv32 #(
 	endtask
 
 `ifdef DEBUGREGS
-`define dr_reg(x) cpuregs[x | (irq_active ? qreg_offset : 0)]
+`define dr_reg(x) cpuregs[x | xreg_offset]
 
 	wire [31:0] dbg_reg_x0  = 0;
 	wire [31:0] dbg_reg_x1  = `dr_reg(1);
@@ -675,10 +685,11 @@ module picorv32 #(
 	reg instr_add, instr_sub, instr_sll, instr_slt, instr_sltu, instr_xor, instr_srl, instr_sra, instr_or, instr_and;
 	reg instr_csrr, instr_ecall_ebreak;
 	reg instr_addqxi, instr_addxqi, instr_retirq, instr_maskirq, instr_waitirq, instr_timer;
+        reg [2:0] instr_funct2;
 
 	wire instr_trap;
 
-	reg [regindex_bits-1:0] decoded_rd, decoded_rs1, decoded_rs2;
+	reg [regfile_bits-1:0] decoded_rd, decoded_rs1, decoded_rs2;
 	reg [31:0] decoded_imm, decoded_imm_j;
 	reg decoder_trigger;
 	reg decoder_trigger_q;
@@ -1057,16 +1068,16 @@ module picorv32 #(
 
 		        if (ENABLE_IRQ && ENABLE_IRQ_QREGS)
 			  begin
-			     decoded_rd [regindex_bits-1] <= irq_active;
-			     decoded_rs1[regindex_bits-1] <= irq_active;
-			     decoded_rs2[regindex_bits-1] <= irq_active;
+			     decoded_rd [regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
+			     decoded_rs1[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
+			     decoded_rs2[regfile_bits-1:xreg_bits] <= irq_active ? 0 : user_context;
 
 			     // addqxi, addxqi
 			     if (mem_rdata_latched[6:0] == 7'b0001011 && mem_rdata_latched[14:13] == 2'b01) begin
 				is_addqxi <= 1; // True for both addqxi and addxqi
 
-				decoded_rd [regindex_bits-1] <= ~mem_rdata_latched[12]; // addxqi
-				decoded_rs1[regindex_bits-1] <=  mem_rdata_latched[12]; // addqxi
+				decoded_rd [regfile_bits-1:xreg_bits] <= ~mem_rdata_latched[12] ? 0 : user_context;
+				decoded_rs1[regfile_bits-1:xreg_bits] <=  mem_rdata_latched[12] ? 0 : user_context;
 			     end
 			  end
 		end // if (mem_do_rinst && mem_done)
@@ -1113,10 +1124,9 @@ module picorv32 #(
 			instr_or    <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b110 && mem_rdata_q[31:25] == 7'b0000000;
 			instr_and   <= is_alu_reg_reg && mem_rdata_q[14:12] == 3'b111 && mem_rdata_q[31:25] == 7'b0000000;
 
-			// The only CSR reference supported is CSRR
-			instr_csrr     <= (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[19:12] == 'b00000010);
+		        instr_csrr     <= (mem_rdata_q[6:0] == 7'b1110011 && mem_rdata_q[13:12] != 2'b00);
 
-			instr_ecall_ebreak <= ((mem_rdata_q[6:0] == 7'b1110011 && !mem_rdata_q[31:21] && !mem_rdata_q[19:7]) ||
+			instr_ecall_ebreak <= ((mem_rdata_q[6:0] == 7'b1110011 && !mem_rdata_q[13:12]) ||
 					(COMPRESSED_ISA && mem_rdata_q[15:0] == 16'h9002));
 
 			instr_maskirq <= mem_rdata_q[6:0] == 7'b0001011 && mem_rdata_q[14:12] == 3'b000 && mem_rdata_q[31:25] == 7'b0000011 && ENABLE_IRQ;
@@ -1163,7 +1173,9 @@ module picorv32 #(
 					decoded_imm <= $signed({mem_rdata_q[31:25], mem_rdata_q[11:7]});
 				default:
 					decoded_imm <= $signed(mem_rdata_q[31:20]);
-			endcase
+			endcase // case (1'b1)
+
+		        instr_funct2 <= mem_rdata_q[14:12];
 		end
 
 		if (!resetn) begin
@@ -1239,7 +1251,7 @@ module picorv32 #(
 	reg latched_is_lu;
 	reg latched_is_lh;
 	reg latched_is_lb;
-	reg [regindex_bits-1:0] latched_rd;
+	reg [regfile_bits-1:0] latched_rd;
 
 	reg [31:0] current_pc;
 	assign next_pc = latched_store && latched_branch ? reg_out & ~1 : reg_next_pc;
@@ -1336,7 +1348,7 @@ module picorv32 #(
 	reg [31:0] cpuregs_wrdata;
 	reg [31:0] cpuregs_rs1;
 	reg [31:0] cpuregs_rs2;
-	reg [regindex_bits-1:0] decoded_rs;
+	reg [regfile_bits-1:0] decoded_rs;
 
 	always @* begin
 		cpuregs_write = 0;
@@ -1411,9 +1423,9 @@ module picorv32 #(
 	wire[31:0] cpuregs_rdata1;
 	wire[31:0] cpuregs_rdata2;
 
-	wire [5:0] cpuregs_waddr = latched_rd;
-	wire [5:0] cpuregs_raddr1 = ENABLE_REGS_DUALPORT ? decoded_rs1 : decoded_rs;
-	wire [5:0] cpuregs_raddr2 = ENABLE_REGS_DUALPORT ? decoded_rs2 : 0;
+	wire [regfile_bits-1:0] cpuregs_waddr = latched_rd;
+	wire [regfile_bits-1:0] cpuregs_raddr1 = ENABLE_REGS_DUALPORT ? decoded_rs1 : decoded_rs;
+	wire [regfile_bits-1:0] cpuregs_raddr2 = ENABLE_REGS_DUALPORT ? decoded_rs2 : 0;
 
 	`PICORV32_REGS cpuregs (
 		.clk(clk),
@@ -1443,6 +1455,8 @@ module picorv32 #(
 				  decoder_trigger &&
 				  (!ENABLE_IRQ || irq_delay || irq_active || !(irq_pending & ~irq_mask));
 
+        wire [31:0] csrr_src = instr_funct2[2] ? { 29'b0, decoded_rs1[4:0] } : cpuregs_rs1;
+
 	always @(posedge clk) begin
 		trap <= 0;
 		reg_sh <= 'bx;
@@ -1510,6 +1524,7 @@ module picorv32 #(
 			latched_is_lu <= 0;
 			latched_is_lh <= 0;
 			latched_is_lb <= 0;
+		        user_context <= USER_CONTEXTS; // On reset highest supported context
 			pcpi_valid <= 0;
 			pcpi_timeout <= 0;
 			irq_active <= 0;
@@ -1521,7 +1536,7 @@ module picorv32 #(
 			timer <= 0;
 			if (~STACKADDR) begin
 				latched_store <= 1;
-				latched_rd <= 2;
+				latched_rd <= (USER_CONTEXTS << xreg_bits) | 2;
 				reg_out <= STACKADDR;
 			end
 			cpu_state <= cpu_state_fetch;
@@ -1587,8 +1602,7 @@ module picorv32 #(
 						irq_state == 2'b00 ? 2'b01 :
 						irq_state == 2'b01 ? 2'b10 : 2'b00;
 					latched_compr <= latched_compr;
-				        latched_rd <= qreg_offset |
-						      (irq_state[0] ? MASK_IRQ_REG : RA_IRQ_REG);
+				        latched_rd <= irq_state[0] ? MASK_IRQ_REG : RA_IRQ_REG;
 				end else
 				if (ENABLE_IRQ && do_waitirq) begin
 					if (&(irq_pending | ~reg_op1) || |(irq_pending & reg_op2)) begin
@@ -1675,6 +1689,7 @@ module picorv32 #(
 						end
 					end
 					instr_csrr: begin
+						// Always read (suppress iff rd == 0 and side effects)
 						reg_out <= 32'bx;
 						case (decoded_imm[11:0])
 							12'hc00, 12'hc01:	 // cycle, time
@@ -1687,9 +1702,24 @@ module picorv32 #(
 							  if (ENABLE_COUNTERS64) reg_out <= count_instr[63:32];
 							12'h343:		 // mtval
 							  if (CATCH_MISALIGN)    reg_out <= buserr_address;
+						        12'h7f0:                 // user_context
+							  if (USER_CONTEXTS > 0) reg_out <= user_context;
 							default:
 							  reg_out <= 32'bx;
 						endcase // case (decoded_imm[11:0])
+
+					        // Bitops not supported ATM, treat as readonly
+					        if (~instr_funct2[1])
+						  case (decoded_imm[11:0])
+						    12'h7f0: begin			// user_context
+						       user_context <= csrr_src;
+						       irq_active   <= 1'b1;
+						    end
+						    default: begin
+						       // Do nothing
+						    end
+						  endcase // case (decoded_imm[11:0])
+
 						latched_store <= 1;
 						cpu_state <= cpu_state_fetch;
 					end