Skip to content

A-extension updates #780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bhv/cv32e40x_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ module cv32e40x_wrapper
.tselect_q (core_i.cs_registers_i.debug_triggers_i.gen_triggers.tselect_q),
.tdata1_q (core_i.cs_registers_i.debug_triggers_i.gen_triggers.tdata1_q),
.tdata2_q (core_i.cs_registers_i.debug_triggers_i.gen_triggers.tdata2_q),
.lsu_addr_match_en (core_i.cs_registers_i.debug_triggers_i.gen_triggers.lsu_addr_match_en),
.*);
end
endgenerate
Expand Down Expand Up @@ -347,6 +348,9 @@ module cv32e40x_wrapper
.irq_ack (core_i.irq_ack),
.mie_n (core_i.cs_registers_i.mie_n),
.mie_we (core_i.cs_registers_i.mie_we),
.lsu_exception_wb (core_i.wb_stage_i.lsu_exception),
.lsu_wpt_match_wb (core_i.wb_stage_i.lsu_wpt_match),
.lsu_exokay_wb (core_i.data_exokay_i), // todo: Could poke into LSU, but this signal is fed directly through the LSU
.*);
generate
if (SMCLIC) begin : clic_asserts
Expand Down
2 changes: 1 addition & 1 deletion rtl/cv32e40x_a_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ module cv32e40x_a_decoder import cv32e40x_pkg::*;
decoder_ctrl_o.rf_we = 1'b1;
decoder_ctrl_o.alu_op_a_mux_sel = OP_A_REGA_OR_FWD;
decoder_ctrl_o.alu_op_b_mux_sel = OP_B_NONE;
decoder_ctrl_o.lsu_size = 2'b00;
decoder_ctrl_o.lsu_size = 2'b10; // All atomics are 32-bit word accesses
decoder_ctrl_o.lsu_sext = 1'b1;
decoder_ctrl_o.lsu_atop = {1'b1, instr_rdata_i[31:27]};

Expand Down
14 changes: 12 additions & 2 deletions rtl/cv32e40x_controller.sv
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
module cv32e40x_controller import cv32e40x_pkg::*;
#(
parameter bit X_EXT = 0,
parameter bit A_EXT = 0,
parameter int unsigned REGFILE_NUM_READ_PORTS = 2,
parameter bit SMCLIC = 0,
parameter int SMCLIC_ID_WIDTH = 5
Expand Down Expand Up @@ -77,9 +78,12 @@ module cv32e40x_controller import cv32e40x_pkg::*;
// LSU
input logic data_stall_wb_i, // WB stalled by LSU
input logic [1:0] lsu_err_wb_i, // LSU bus error in WB stage
input logic lsu_busy_i, // LSU is busy with outstanding transfers
input logic lsu_busy_i, // LSU is busy with outstanding transfers or is initiating a new transfer
input logic lsu_bus_busy_i, // LSU is busy with outstanding transfers
input logic lsu_interruptible_i, // LSU may be interrupted
input logic lsu_valid_wb_i, // LSU is valid in WB (factors in rvalid from either OBI bus or write buffer)
input lsu_atomic_e lsu_atomic_ex_i,
input lsu_atomic_e lsu_atomic_wb_i,

// jump/branch signals
input logic branch_decision_ex_i, // branch decision signal from EX ALU
Expand Down Expand Up @@ -236,7 +240,8 @@ module cv32e40x_controller import cv32e40x_pkg::*;
// Hazard/bypass/stall control instance
cv32e40x_controller_bypass
#(
.REGFILE_NUM_READ_PORTS ( REGFILE_NUM_READ_PORTS )
.REGFILE_NUM_READ_PORTS ( REGFILE_NUM_READ_PORTS ),
.A_EXT ( A_EXT )
)
bypass_i
(
Expand All @@ -261,6 +266,11 @@ module cv32e40x_controller import cv32e40x_pkg::*;
.wb_ready_i ( wb_ready_i ),
.csr_irq_enable_write_i ( csr_irq_enable_write_i ),

// From LSU
.lsu_atomic_ex_i ( lsu_atomic_ex_i ),
.lsu_atomic_wb_i ( lsu_atomic_wb_i ),
.lsu_bus_busy_i ( lsu_bus_busy_i ),

// Outputs
.ctrl_byp_o ( ctrl_byp_o )
);
Expand Down
26 changes: 25 additions & 1 deletion rtl/cv32e40x_controller_bypass.sv
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@

module cv32e40x_controller_bypass import cv32e40x_pkg::*;
#(
parameter int unsigned REGFILE_NUM_READ_PORTS = 2
parameter int unsigned REGFILE_NUM_READ_PORTS = 2,
parameter bit A_EXT = 1'b0
)
(
// From decoder
Expand All @@ -58,6 +59,11 @@ module cv32e40x_controller_bypass import cv32e40x_pkg::*;
input logic wb_ready_i, // WB stage is ready
input logic csr_irq_enable_write_i, // WB is writing to a CSR that may enable an interrupt.

// From LSU
input lsu_atomic_e lsu_atomic_ex_i,
input lsu_atomic_e lsu_atomic_wb_i,
input logic lsu_bus_busy_i,

// Controller Bypass outputs
output ctrl_byp_t ctrl_byp_o
);
Expand Down Expand Up @@ -232,6 +238,24 @@ module cv32e40x_controller_bypass import cv32e40x_pkg::*;
end
end

generate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indent everything 2 positions to the right

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indented

if (A_EXT) begin : atomic_stall
always_comb begin
ctrl_byp_o.atomic_stall = 1'b0;

// Stall EX for the following two scenarios
// 1: There is an atomic instruction in EX while we have outstanding transactions on the bus
// 2: There is any LSU instruction in EX while there is an outstanding atomic transfer in progress
if ((id_ex_pipe_i.lsu_en && (lsu_atomic_ex_i != AT_NONE) && id_ex_pipe_i.instr_valid) ||
(id_ex_pipe_i.lsu_en && ex_wb_pipe_i.lsu_en && (lsu_atomic_wb_i != AT_NONE) && ex_wb_pipe_i.instr_valid)) begin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(id_ex_pipe_i.lsu_en && ex_wb_pipe_i.lsu_en && (lsu_atomic_wb_i != AT_NONE) && ex_wb_pipe_i.instr_valid) ->
(id_ex_pipe_i.lsu_en && id_ex_pipe_i.instr_valid && ex_wb_pipe_i.lsu_en && (lsu_atomic_wb_i != AT_NONE) && ex_wb_pipe_i.instr_valid)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

ctrl_byp_o.atomic_stall = lsu_bus_busy_i;
end
end
end else begin : no_atomic_stall
assign ctrl_byp_o.atomic_stall = 1'b0;
end
endgenerate

assign ctrl_byp_o.id_stage_abort = ctrl_byp_o.deassert_we;

// Forwarding control unit
Expand Down
2 changes: 1 addition & 1 deletion rtl/cv32e40x_controller_fsm.sv
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// Also halting EX if an offloaded instruction in WB may cause an exception, such that a following offloaded
// instruction can correctly receive commit_kill.
// Halting EX when an instruction in WB may cause an interrupt to become pending.
ctrl_fsm_o.halt_ex = ctrl_byp_i.minstret_stall || ctrl_byp_i.xif_exception_stall || ctrl_byp_i.irq_enable_stall || ctrl_byp_i.mnxti_ex_stall;
ctrl_fsm_o.halt_ex = ctrl_byp_i.minstret_stall || ctrl_byp_i.xif_exception_stall || ctrl_byp_i.irq_enable_stall || ctrl_byp_i.mnxti_ex_stall || ctrl_byp_i.atomic_stall;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update above comments with reason why atomic stall is needed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comment

ctrl_fsm_o.halt_wb = 1'b0;
ctrl_fsm_o.halt_limited_wb = 1'b0;

Expand Down
15 changes: 13 additions & 2 deletions rtl/cv32e40x_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ module cv32e40x_core import cv32e40x_pkg::*;

// Busy signals
logic if_busy;
logic lsu_busy;
logic lsu_busy; // LSU is busy, outstanding OBI or new transaction being initiated
logic lsu_bus_busy; // LSU has outstanding transactions on the OBI bus
logic lsu_interruptible;

// ID/EX pipeline
Expand Down Expand Up @@ -246,10 +247,12 @@ module cv32e40x_core import cv32e40x_pkg::*;
logic lsu_split_ex;
logic lsu_first_op_ex;
logic lsu_last_op_ex;
lsu_atomic_e lsu_atomic_ex;
mpu_status_e lsu_mpu_status_wb;
logic lsu_wpt_match_wb;
logic [31:0] lsu_rdata_wb;
logic [1:0] lsu_err_wb;
lsu_atomic_e lsu_atomic_wb;

logic lsu_valid_0; // Handshake with EX
logic lsu_ready_ex;
Expand Down Expand Up @@ -656,6 +659,7 @@ module cv32e40x_core import cv32e40x_pkg::*;

// Control signals
.busy_o ( lsu_busy ),
.bus_busy_o ( lsu_bus_busy ),
.interruptible_o ( lsu_interruptible ),

// Trigger match
Expand All @@ -665,6 +669,7 @@ module cv32e40x_core import cv32e40x_pkg::*;
.lsu_split_0_o ( lsu_split_ex ),
.lsu_first_op_0_o ( lsu_first_op_ex ),
.lsu_last_op_0_o ( lsu_last_op_ex ),
.lsu_atomic_0_o ( lsu_atomic_ex ),

// Outputs to trigger module
.lsu_addr_o ( lsu_addr_ex ),
Expand All @@ -680,6 +685,7 @@ module cv32e40x_core import cv32e40x_pkg::*;
.lsu_rdata_1_o ( lsu_rdata_wb ),
.lsu_mpu_status_1_o ( lsu_mpu_status_wb ),
.lsu_wpt_match_1_o ( lsu_wpt_match_wb ),
.lsu_atomic_1_o ( lsu_atomic_wb ),

// Valid/ready
.valid_0_i ( lsu_valid_ex ), // First LSU stage (EX)
Expand Down Expand Up @@ -843,7 +849,8 @@ module cv32e40x_core import cv32e40x_pkg::*;
.lsu_valid_ex_i ( lsu_valid_ex ),
.lsu_addr_ex_i ( lsu_addr_ex ),
.lsu_we_ex_i ( lsu_we_ex ),
.lsu_be_ex_i ( lsu_be_ex )
.lsu_be_ex_i ( lsu_be_ex ),
.lsu_atomic_ex_i ( lsu_atomic_ex )
);

////////////////////////////////////////////////////////////////////
Expand All @@ -858,6 +865,7 @@ module cv32e40x_core import cv32e40x_pkg::*;
cv32e40x_controller
#(
.X_EXT ( X_EXT ),
.A_EXT ( A_EXT ),
.REGFILE_NUM_READ_PORTS ( REGFILE_NUM_READ_PORTS ),
.SMCLIC ( SMCLIC ),
.SMCLIC_ID_WIDTH ( SMCLIC_ID_WIDTH )
Expand Down Expand Up @@ -910,8 +918,11 @@ module cv32e40x_core import cv32e40x_pkg::*;
.data_stall_wb_i ( data_stall_wb ),
.lsu_err_wb_i ( lsu_err_wb ),
.lsu_busy_i ( lsu_busy ),
.lsu_bus_busy_i ( lsu_bus_busy ),
.lsu_interruptible_i ( lsu_interruptible ),
.lsu_valid_wb_i ( lsu_valid_wb ),
.lsu_atomic_ex_i ( lsu_atomic_ex ),
.lsu_atomic_wb_i ( lsu_atomic_wb ),

// jump/branch control
.branch_decision_ex_i ( branch_decision_ex ),
Expand Down
4 changes: 3 additions & 1 deletion rtl/cv32e40x_cs_registers.sv
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*;
input logic lsu_valid_ex_i,
input logic [31:0] lsu_addr_ex_i,
input logic lsu_we_ex_i,
input logic [3:0] lsu_be_ex_i
input logic [3:0] lsu_be_ex_i,
input lsu_atomic_e lsu_atomic_ex_i
);

localparam logic [31:0] CORE_MISA =
Expand Down Expand Up @@ -1582,6 +1583,7 @@ module cv32e40x_cs_registers import cv32e40x_pkg::*;
.lsu_we_ex_i ( lsu_we_ex_i ),
.lsu_be_ex_i ( lsu_be_ex_i ),
.priv_lvl_ex_i ( id_ex_pipe_i.priv_lvl ),
.lsu_atomic_ex_i ( lsu_atomic_ex_i ),

// WB inputs
.priv_lvl_wb_i ( ex_wb_pipe_i.priv_lvl ),
Expand Down
4 changes: 3 additions & 1 deletion rtl/cv32e40x_debug_triggers.sv
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ import cv32e40x_pkg::*;
input logic lsu_we_ex_i,
input logic [3:0] lsu_be_ex_i,
input privlvl_t priv_lvl_ex_i,
input lsu_atomic_e lsu_atomic_ex_i,

// WB stage inputs
input privlvl_t priv_lvl_wb_i,
Expand Down Expand Up @@ -331,7 +332,8 @@ import cv32e40x_pkg::*;
(tdata1_rdata[idx][MCONTROL2_6_U] && (priv_lvl_ex_i == PRIV_LVL_U));

// Enable LSU address matching
assign lsu_addr_match_en[idx] = lsu_valid_ex_i && ((tdata1_rdata[idx][MCONTROL2_6_LOAD] && !lsu_we_ex_i) || (tdata1_rdata[idx][MCONTROL2_6_STORE] && lsu_we_ex_i));
// AMO transactions have lsu_we_ex_i == 1'b1, but also perform a read. Thus AMOs will also match loads regardless of the rf_we bit.
assign lsu_addr_match_en[idx] = lsu_valid_ex_i && ((tdata1_rdata[idx][MCONTROL2_6_LOAD] && (!lsu_we_ex_i || (lsu_atomic_ex_i == AT_AMO))) || (tdata1_rdata[idx][MCONTROL2_6_STORE] && lsu_we_ex_i));

// Signal trigger match for LSU address
assign trigger_match_ex[idx] = ((tdata1_rdata[idx][TDATA1_TTYPE_HIGH:TDATA1_TTYPE_LOW] == TTYPE_MCONTROL) ||
Expand Down
51 changes: 46 additions & 5 deletions rtl/cv32e40x_load_store_unit.sv
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;

// Control outputs
output logic busy_o,
output logic bus_busy_o, // There are outstanding OBI transactions
output logic interruptible_o,

// Trigger match input
Expand All @@ -60,6 +61,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
output logic lsu_split_0_o, // Misaligned access is split in two transactions (to controller)
output logic lsu_first_op_0_o, // First operation is active in EX
output logic lsu_last_op_0_o, // Last operation is active in EX
output lsu_atomic_e lsu_atomic_0_o, // IS there an atomic in EX, and of which type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IS -> Is

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


// outputs to trigger module
output logic [31:0] lsu_addr_o,
Expand All @@ -71,6 +73,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
output logic [31:0] lsu_rdata_1_o, // LSU read data
output mpu_status_e lsu_mpu_status_1_o, // MPU (PMA) status, response/WB timing. To controller and wb_stage
output logic lsu_wpt_match_1_o, // Address match trigger, WB timing.
output lsu_atomic_e lsu_atomic_1_o, // Is there an atomic in WB, and of which type.

// Handshakes
input logic valid_0_i, // Handshakes for first LSU stage (EX)
Expand Down Expand Up @@ -105,6 +108,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
logic mpu_trans_valid;
logic mpu_trans_ready;
logic mpu_trans_pushpop;
logic mpu_trans_atomic;
obi_data_req_t mpu_trans;

// Transaction response
Expand Down Expand Up @@ -392,10 +396,45 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
end
end

// output to register file
// Always rdata_ext regardless of split accesses
// Output will be valid (valid_1_o) only for the last phase of split access.
assign lsu_rdata_1_o = rdata_ext;
// Set rdata output and atomic type output depending on A_EXT
generate
if (A_EXT) begin : a_ext
lsu_atomic_e lsu_atomic_q;

always_ff @(posedge clk, negedge rst_n)
begin
if (rst_n == 1'b0) begin
lsu_atomic_q <= AT_NONE;
end else if (ctrl_update) begin // request was granted, we wait for rvalid and can continue to WB
if (xif_req) begin
lsu_atomic_q <= AT_NONE;
end else begin
// Set type of atomic instruction in WB, if any.
lsu_atomic_q <= lsu_atomic_0_o;
end
end
end

assign lsu_atomic_0_o = !trans.atop[5] ? AT_NONE :
(trans.atop[4:0] == 5'h2) ? AT_LR :
(trans.atop[4:0] == 5'h3) ? AT_SC :
AT_AMO;
assign lsu_atomic_1_o = lsu_atomic_q;

// SC.W must write 0 to rd on success, and 1 on failure. All other instructions including AMO write the response data.
assign lsu_rdata_1_o = (lsu_atomic_q == AT_SC) ? {{31{1'b0}}, !resp.bus_resp.exokay} : rdata_ext;

end else begin : no_a_ext
// A_EXT not enabled, tie off outputs.
assign lsu_atomic_0_o = AT_NONE;
assign lsu_atomic_1_o = AT_NONE;

// output to register file
// Always rdata_ext regardless of split accesses
// Output will be valid (valid_1_o) only for the last phase of split access.
assign lsu_rdata_1_o = rdata_ext;
end
endgenerate

// misaligned_access is high for both transfers of a misaligned transfer
// TODO: Give MPU a separate modified_access_i input
Expand Down Expand Up @@ -717,6 +756,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
//////////////////////////////////////////////////////////////////////////////
// MPU
//////////////////////////////////////////////////////////////////////////////
assign mpu_trans_atomic = |(mpu_trans.atop);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only use bit 5

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


cv32e40x_mpu
#(
Expand All @@ -734,7 +774,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
(
.clk ( clk ),
.rst_n ( rst_n ),
.atomic_access_i ( 1'b0 ), // TODO:OE update to support atomic PMA checks
.atomic_access_i ( mpu_trans_atomic ),
.misaligned_access_i ( misaligned_access ),

.core_one_txn_pend_n ( cnt_is_one_next ),
Expand Down Expand Up @@ -764,6 +804,7 @@ module cv32e40x_load_store_unit import cv32e40x_pkg::*;
(.clk ( clk ),
.rst_n ( rst_n ),
.busy_o ( filter_resp_busy ),
.bus_busy_o ( bus_busy_o ),

.valid_i ( filter_trans_valid ),
.ready_o ( filter_trans_ready ),
Expand Down
2 changes: 2 additions & 0 deletions rtl/cv32e40x_lsu_response_filter.sv
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ module cv32e40x_lsu_response_filter
output logic ready_o,

output logic busy_o,
output logic bus_busy_o, // There are outstanding transactions on the bus side.
output logic resp_valid_o,
output obi_data_resp_t resp_o, // Todo: This also carries the obi error field. Could replace by data_resp_t

Expand Down Expand Up @@ -85,6 +86,7 @@ module cv32e40x_lsu_response_filter
outstanding_t [DEPTH:0] outstanding_next;

assign busy_o = ( bus_cnt_q != '0) || valid_i;
assign bus_busy_o = ( bus_cnt_q != '0);

// The two trans valid signals will always have the same value as they are gated with the same condition
assign core_trans_accepted = ready_o && valid_i; // Transfer accepted on the core side of the response filter
Expand Down
8 changes: 8 additions & 0 deletions rtl/cv32e40x_pma.sv
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ module cv32e40x_pma import cv32e40x_pkg::*;
pma_err_o = 1'b1;
end

// Check that atomic accesses are not misaligned
// Not strictly at part of the PMA, but reusing the PMA logic for flagging errors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at -> a

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

// and consume transactions rather than making separate logic in the LSU. Uses the same exception
// codes as PMA errors.
if (atomic_access_i && misaligned_access_i) begin
pma_err_o = 1'b1;
end

// Instruction fetches only allowed in main memory
if (instr_fetch_access_i && !pma_cfg.main) begin
pma_err_o = 1'b1;
Expand Down
Loading