diff --git a/Bender.yml b/Bender.yml index bf14f6f2..ea8f550b 100644 --- a/Bender.yml +++ b/Bender.yml @@ -49,6 +49,7 @@ sources: - rtl/ecc_wrap/ecc_scrubber.sv - rtl/rel_fifo.sv - rtl/rel_rr_arb_tree.sv + - rtl/rel_spill_register.sv - target: any(deprecated, axi_ecc, hci_ecc, pulp_ecc, test) files: diff --git a/rtl/hsiao_ecc/hsiao_ecc_pkg.sv b/rtl/hsiao_ecc/hsiao_ecc_pkg.sv index b2efdfac..96acdcf5 100644 --- a/rtl/hsiao_ecc/hsiao_ecc_pkg.sv +++ b/rtl/hsiao_ecc/hsiao_ecc_pkg.sv @@ -14,7 +14,12 @@ package hsiao_ecc_pkg; function automatic int unsigned min_ecc(int unsigned data_width); - min_ecc = $clog2(data_width)+2; + for (int unsigned i = 0; i < 10; i++) begin + if (2**i >= data_width + i + 1) begin + return i + 1; + end + end + // min_ecc = $clog2(data_width)+2; endfunction /// Static parameters for synthesizability (avoiding dynamic arrays) diff --git a/rtl/rel_fifo.sv b/rtl/rel_fifo.sv index c3637c08..a116570d 100644 --- a/rtl/rel_fifo.sv +++ b/rtl/rel_fifo.sv @@ -19,16 +19,12 @@ module rel_fifo #( parameter int unsigned DataWidth = 32, /// depth can be arbitrary from 0 to 2**32 parameter int unsigned Depth = 8, - /// custom data type, overrides DataWidth parameter - parameter type data_t = logic [DataWidth-1:0], /// Status and control signals are triplicated parameter bit TmrStatus = 1'b0, /// Input data has ECC (setting to 0 will add ecc encoders and decoders and assume simple logic vector, currently unimplemented) parameter bit DataHasEcc = 1'b1, /// Use dedicated registers for status_cnt_q (better timing, higher area, likely to be optimized away, currently unimplemented) parameter bit StatusFF = 1'b0, - /// Have the TMR before the register, otherwise after. - parameter bit TmrBeforeReg = 1'b1, // DO NOT OVERWRITE THESE PARAMETERS parameter int unsigned AddrDepth = cf_math_pkg::idx_width(Depth), parameter int unsigned HsWidth = TmrStatus ? 3 : 1 @@ -50,12 +46,12 @@ module rel_fifo #( output logic [AddrDepth-1:0] usage_o, /// as long as the queue is not full we can push new data /// data to push into the queue - input data_t data_i, + input logic [DataWidth-1:0] data_i, /// data is valid and can be pushed to the queue input logic [ HsWidth-1:0] push_i, /// as long as the queue is not empty we can pop new elements /// output data - output data_t data_o, + output logic [DataWidth-1:0] data_o, /// pop head from queue input logic [ HsWidth-1:0] pop_i, /// tmr fault output signal @@ -66,14 +62,14 @@ module rel_fifo #( localparam int unsigned FifoDepth = (Depth > 0) ? Depth : 1; // TODO: DataHasEcc ? data_t : logic [hsiao_pkg::min_ecc(DataWidth)+DataWidth-1:0]; - localparam type ecc_data_t = data_t; + localparam int unsigned EccDataWidth = DataWidth; - logic [5:0] tmr_faults; - logic [FifoDepth-1:0][$bits(ecc_data_t)-1:0] data_tmr_faults; + logic [9:0] tmr_faults; + logic [FifoDepth-1:0][EccDataWidth-1:0] data_tmr_faults; assign fault_o = |tmr_faults; // clock gating control - logic [FifoDepth-1:0][2:0] gate_clock; + logic [2:0][FifoDepth-1:0][EccDataWidth-1:0] gate_clock; // pointer to the read and write section of the queue logic [2:0][AddrDepth:0] read_pointer_n, read_pointer_q, @@ -81,13 +77,13 @@ module rel_fifo #( write_pointer_q, status_cnt_n, status_cnt_q; - logic [2:0] full, empty; + logic [2:0] full, empty, push, pop, flush; - ecc_data_t data_in; - ecc_data_t [2:0] data_out; + logic [EccDataWidth-1:0] data_in; + logic [EccDataWidth-1:0] data_out; // actual memory - ecc_data_t [FifoDepth-1:0] mem_q; + logic [FifoDepth-1:0][EccDataWidth-1:0] mem_q; if (!DataHasEcc) begin : gen_ecc_encode $error("unimplemented"); @@ -95,199 +91,132 @@ module rel_fifo #( // TODO ecc decoding of data_out into data_o end else begin : gen_ecc_passthrough assign data_in = data_i; - `VOTE31F(data_out, data_o, tmr_faults[0]) + assign data_o = data_out; end - if (StatusFF) begin : gen_status_ff - $error("unimplemented"); - // logic [2:0][AddrDepth:0] status_cnt_d; - - // always_comb begin - // if () - // end - - // always_ff @(posedge clk_i or negedge rst_ni) begin : proc_status_cnt - // if(!rst_ni) begin - // status_cnt_q <= '0; - // end else begin - // status_cnt_q <= status_cnt_d; - // end - // end - end else begin : gen_status_calc - for (genvar i = 0; i < 3; i++) begin : gen_tmr_status - assign status_cnt_q[i] = write_pointer_q[i] - read_pointer_q[i]; - end + logic [2:0][AddrDepth:0] read_pointer_n_sync, + write_pointer_n_sync; + logic [2:0][1:0][AddrDepth:0] alt_read_pointer_n_sync, + alt_write_pointer_n_sync; + + logic [2:0][EccDataWidth-1:0][AddrDepth:0] read_pointer_next; + logic [2:0][EccDataWidth-1:0] use_fallthrough; + + logic [EccDataWidth-1:0] data_out_faults; + assign tmr_faults[0] = |data_out_faults; + + for (genvar i = 0; i < EccDataWidth; i++) begin : gen_data_out_mux + logic [AddrDepth:0] read_pointer_next_local; + logic use_fallthrough_local; + logic [1:0] local_faults; + assign data_out_faults[i] = |local_faults; + bitwise_TMR_voter_fail #( + .DataWidth(AddrDepth+1) + ) i_read_pointer_next_vote ( + .a_i(read_pointer_next[0][i]), + .b_i(read_pointer_next[1][i]), + .c_i(read_pointer_next[2][i]), + .majority_o(read_pointer_next_local), + .fault_detected_o(local_faults[0]) + ); + TMR_voter_fail #( + .VoterType(1) + ) i_use_fallthrough_vote ( + .a_i(use_fallthrough[0][i]), + .b_i(use_fallthrough[1][i]), + .c_i(use_fallthrough[2][i]), + .majority_o(use_fallthrough_local), + .fault_detected_o(local_faults[1]) + ); + assign data_out[i] = use_fallthrough_local ? + data_in[i] : mem_q[read_pointer_next_local[AddrDepth-1:0]][i]; end - assign usage_o = status_cnt_q[0][AddrDepth-1:0]; - - // status flags - if (Depth == 0) begin : gen_pass_through - assign empty_o = ~push_i; - assign full_o = ~pop_i; - end else begin : gen_fifo - for (genvar i = 0; i < 3; i++) begin : gen_tmr_full_empty - if (StatusFF) begin : gen_full_empty_from_status - assign full[i] = (status_cnt_q[i] == FifoDepth[AddrDepth:0]); - assign empty[i] = (status_cnt_q[i] == 0) & ~(FallThrough & push_i[i]); - end else begin : gen_full_empty_calc - assign full[i] = (write_pointer_q[i][AddrDepth-1:0] == read_pointer_q[i][AddrDepth-1:0] & - write_pointer_q[i][AddrDepth] != read_pointer_q[i][AddrDepth]); - assign empty[i] = (write_pointer_q[i] == read_pointer_q[i]) & - ~(FallThrough & push_i[i]); - end - end - if (TmrStatus) begin : gen_tmr_status - assign full_o = full; - assign empty_o = empty; - assign tmr_faults[1] = '0; - assign tmr_faults[2] = '0; - end else begin : gen_voted_status - TMR_voter_fail #( - .VoterType(1) - ) i_full_tmr_vote ( - .a_i(full[0]), - .b_i(full[1]), - .c_i(full[2]), - .majority_o(full_o), - .fault_detected_o(tmr_faults[1]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_empty_tmr_vote ( - .a_i(empty[0]), - .b_i(empty[1]), - .c_i(empty[2]), - .majority_o(empty_o), - .fault_detected_o(tmr_faults[2]) - ); - end - + if (TmrStatus) begin : gen_tmr_status + assign full_o = full; + assign empty_o = empty; + assign push = push_i; + assign pop = pop_i; + assign flush = flush_i; + assign tmr_faults[1] = '0; + assign tmr_faults[2] = '0; + end else begin : gen_voted_status + assign push = {push_i, push_i, push_i}; + assign pop = {pop_i, pop_i, pop_i}; + assign flush = {flush_i, flush_i, flush_i}; + TMR_voter_fail #( + .VoterType(1) + ) i_full_tmr_vote ( + .a_i(full[0]), + .b_i(full[1]), + .c_i(full[2]), + .majority_o(full_o), + .fault_detected_o(tmr_faults[1]) + ); + TMR_voter_fail #( + .VoterType(1) + ) i_empty_tmr_vote ( + .a_i(empty[0]), + .b_i(empty[1]), + .c_i(empty[2]), + .majority_o(empty_o), + .fault_detected_o(tmr_faults[2]) + ); end - // read and write queue logic - always_comb begin : read_write_comb - // default assignment - read_pointer_n = read_pointer_q; - write_pointer_n = write_pointer_q; - status_cnt_n = status_cnt_q; - data_out [0] = (Depth == 0) ? data_in : mem_q[read_pointer_q[0][AddrDepth-1:0]]; - data_out [1] = (Depth == 0) ? data_in : mem_q[read_pointer_q[1][AddrDepth-1:0]]; - data_out [2] = (Depth == 0) ? data_in : mem_q[read_pointer_q[2][AddrDepth-1:0]]; - gate_clock = {FifoDepth{3'b111}}; - - // For reliability we vote at the end and handle streams individually - for (int i = 0; i < 3; i++) begin - - // For reliability each data bit handled individually - for (int j = 0; j < $bits(ecc_data_t); j++) begin - - // push a new element to the queue - if (push_i[i] && ~full[i]) begin - // un-gate the clock, we want to write something - gate_clock[write_pointer_q[i][AddrDepth-1:0]][i] = 1'b0; - // increment the write counter - // this is dead code when DEPTH is a power of two - if (write_pointer_q[i][AddrDepth-1:0] == FifoDepth[AddrDepth-1:0] - 1) begin - write_pointer_n[i][AddrDepth-1:0] = '0; - write_pointer_n[i][AddrDepth] = ~write_pointer_q[i][AddrDepth]; - end else begin - write_pointer_n[i] = write_pointer_q[i] + 1; - end - if (StatusFF) begin - // increment the overall counter - status_cnt_n[i] = status_cnt_q[i] + 1; - end - end - - // pop an element from the queue - if (pop_i[i] && ~empty[i]) begin - // read from the queue is a default assignment - // but increment the read pointer... - // this is dead code when DEPTH is a power of two - if (read_pointer_q[i][AddrDepth-1:0] == FifoDepth[AddrDepth-1:0] - 1) begin - read_pointer_n[i][AddrDepth-1:0] = '0; - read_pointer_n[i][AddrDepth] = ~read_pointer_q[i][AddrDepth]; - end else begin - read_pointer_n[i] = read_pointer_q[i] + 1; - end - // ... and decrement the overall count - if (StatusFF) begin - status_cnt_n[i] = status_cnt_q[i] - 1; - end - end - - // keep the count pointer stable if we push and pop at the same time - if (StatusFF) begin - if (push_i[i] && pop_i[i] && ~full[i] && ~empty[i]) - status_cnt_n = status_cnt_q; - end - - // FIFO is in pass through mode -> do not change the pointers - if (FallThrough && (write_pointer_q[i] == read_pointer_q[i]) && push_i[i]) begin - data_out[i] = data_in; - if (pop_i[i]) begin - status_cnt_n[i] = status_cnt_q[i]; - read_pointer_n[i] = read_pointer_q[i]; - write_pointer_n[i] = write_pointer_q[i]; - end - end - end + for (genvar i = 0; i < 3; i++) begin : gen_tmr_parts + for (genvar j = 0; j < 2; j++) begin : gen_alt_sync + assign alt_read_pointer_n_sync[i][j] = read_pointer_n_sync[(i+j+1) % 3]; + assign alt_write_pointer_n_sync[i][j] = write_pointer_n_sync[(i+j+1) % 3]; end + rel_fifo_tmr_part #( + .FallThrough(FallThrough), + .EccDataWidth(EccDataWidth), + .Depth(Depth), + .FifoDepth(FifoDepth), + .AddrDepth(AddrDepth), + .StatusFF(StatusFF) + ) i_tmr_part ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .flush_i(flush[i]), + .full_o(full[i]), + .empty_o(empty[i]), + .push_i(push[i]), + .pop_i(pop[i]), + .gate_clock_o(gate_clock[i]), + .read_pointer_next_multi_o(read_pointer_next[i]), + .use_fallthrough_o(use_fallthrough[i]), + .status_cnt_q_o(status_cnt_q[i]), + .read_pointer_q_o(read_pointer_q[i]), + .write_pointer_q_o(write_pointer_q[i]), + .status_cnt_n_o(status_cnt_n[i]), + .read_pointer_n_o(read_pointer_n[i]), + .write_pointer_n_o(write_pointer_n[i]), + .alt_read_pointer_n_sync_i(alt_read_pointer_n_sync[i]), + .read_pointer_n_sync_o(read_pointer_n_sync[i]), + .alt_write_pointer_n_sync_i(alt_write_pointer_n_sync[i]), + .write_pointer_n_sync_o(write_pointer_n_sync[i]), + .tmr_faults_o(tmr_faults[5+(2*i):4+(2*i)]) + ); end - if (TmrBeforeReg) begin : gen_tmr_before_reg - logic [2:0][AddrDepth:0] read_pointer_voted, write_pointer_voted; - - `VOTE33F(read_pointer_n, read_pointer_voted, tmr_faults[4]) - `VOTE33F(write_pointer_n, write_pointer_voted, tmr_faults[5]) - - for (genvar i = 0; i < 3; i++) begin : gen_pointer_ffs - always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - read_pointer_q[i] <= '0; - write_pointer_q[i] <= '0; - end else begin - if (flush_i[i]) begin - read_pointer_q[i] <= '0; - write_pointer_q[i] <= '0; - end else begin - read_pointer_q[i] <= read_pointer_voted[i]; - write_pointer_q[i] <= write_pointer_voted[i]; - end - end - end - end - end else begin : gen_tmr_after_reg - logic [2:0][AddrDepth:0] read_pointer_next, write_pointer_next; - - `VOTE33F(read_pointer_next, read_pointer_q, tmr_faults[4]) - `VOTE33F(write_pointer_next, write_pointer_q, tmr_faults[5]) - - for (genvar i = 0; i < 3; i++) begin : gen_pointer_ffs - always_ff @(posedge clk_i or negedge rst_ni) begin - if(!rst_ni) begin - read_pointer_next[i] <= '0; - write_pointer_next[i] <= '0; - end else begin - if (flush_i) begin - read_pointer_next[i] <= '0; - write_pointer_next[i] <= '0; - end else begin - read_pointer_next[i] <= read_pointer_n[i]; - write_pointer_next[i] <= write_pointer_n[i]; - end - end - end - end - end + assign usage_o = status_cnt_q[0][AddrDepth-1:0]; assign tmr_faults[3] = |data_tmr_faults; for (genvar i = 0; i < FifoDepth; i++) begin : gen_mem_ffs_depth - for (genvar j = 0; j < $bits(ecc_data_t); j++) begin : gen_mem_ffs + for (genvar j = 0; j < EccDataWidth; j++) begin : gen_mem_ffs logic gate_clock_local; - `VOTE31F(gate_clock[i], gate_clock_local, data_tmr_faults[i][j]) + TMR_voter_fail #( + .VoterType(1) + ) i_gate_clock_vote ( + .a_i(gate_clock[0][i][j]), + .b_i(gate_clock[1][i][j]), + .c_i(gate_clock[2][i][j]), + .majority_o(gate_clock_local), + .fault_detected_o(data_tmr_faults[i][j]) + ); always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin mem_q[i][j] <= '0; @@ -300,6 +229,7 @@ module rel_fifo #( // pragma translate_off `ifndef VERILATOR +`ifndef RED_CELLS_ASSERTS_OFF initial begin assert (Depth > 0) else $error("Depth must be greater than 0."); end @@ -312,6 +242,179 @@ module rel_fifo #( @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i)) else $fatal (1, "Trying to pop data although the FIFO is empty."); `endif +`endif // pragma translate_on endmodule + +(* no_ungroup *) +(* no_boundary_optimization *) +module rel_fifo_tmr_part #( + parameter bit FallThrough = 1'b0, + parameter int unsigned EccDataWidth = 39, + parameter int unsigned Depth = 8, + parameter int unsigned FifoDepth = 8, + parameter int unsigned AddrDepth = 8, + parameter bit StatusFF = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + output logic full_o, + output logic empty_o, + input logic push_i, + input logic pop_i, + output logic [FifoDepth-1:0][EccDataWidth-1:0] gate_clock_o, + output logic [EccDataWidth-1:0][AddrDepth:0] read_pointer_next_multi_o, + output logic [EccDataWidth-1:0] use_fallthrough_o, + output logic [AddrDepth:0] status_cnt_q_o, + output logic [AddrDepth:0] read_pointer_q_o, + output logic [AddrDepth:0] write_pointer_q_o, + output logic [AddrDepth:0] status_cnt_n_o, + output logic [AddrDepth:0] read_pointer_n_o, + output logic [AddrDepth:0] write_pointer_n_o, + input logic [1:0][AddrDepth:0] alt_read_pointer_n_sync_i, + output logic [AddrDepth:0] read_pointer_n_sync_o, + input logic [1:0][AddrDepth:0] alt_write_pointer_n_sync_i, + output logic [AddrDepth:0] write_pointer_n_sync_o, + output logic [1:0] tmr_faults_o +); + + logic [AddrDepth:0] read_pointer_next, write_pointer_next; + + for (genvar i = 0; i < EccDataWidth; i++) begin : gen_read_write_next + assign read_pointer_next_multi_o[i] = read_pointer_next; + assign use_fallthrough_o[i] = FallThrough && + (read_pointer_next == write_pointer_next) && + push_i; + end + + if (StatusFF) begin : gen_status_ff + $error("unimplemented"); + // logic [2:0][AddrDepth:0] status_cnt_d; + + // always_comb begin + // if () + // end + + // always_ff @(posedge clk_i or negedge rst_ni) begin : proc_status_cnt + // if(!rst_ni) begin + // status_cnt_q_o <= '0; + // end else begin + // status_cnt_q_o <= status_cnt_d; + // end + // end + end else begin : gen_status_calc + assign status_cnt_q_o = write_pointer_q_o - read_pointer_q_o; + end + + if (Depth == 0) begin : gen_pass_through + assign empty_o = push_i; + assign full_o = pop_i; + end else begin : gen_fifo + if (StatusFF) begin : gen_full_empty_from_status + assign full_o = (status_cnt_q_o == FifoDepth[AddrDepth:0]); + assign empty_o = (status_cnt_q_o == 0) & ~(FallThrough & push_i); + end else begin : gen_full_empty_calc + assign full_o = (write_pointer_q_o[AddrDepth-1:0] == read_pointer_q_o[AddrDepth-1:0] & + write_pointer_q_o[AddrDepth] != read_pointer_q_o[AddrDepth]); + assign empty_o = (write_pointer_q_o == read_pointer_q_o) & + ~(FallThrough & push_i); + end + end + + // read and write queue logic + always_comb begin : read_write_comb + // default assignment + read_pointer_n_o = read_pointer_q_o; + write_pointer_n_o = write_pointer_q_o; + status_cnt_n_o = status_cnt_q_o; + gate_clock_o = {FifoDepth{{EccDataWidth{1'b1}}}}; + + // push a new element to the queue + if (push_i && ~full_o) begin + // un-gate the clock, we want to write something + gate_clock_o[write_pointer_q_o[AddrDepth-1:0]] = {EccDataWidth{1'b0}}; + // increment the write counter + // this is dead code when DEPTH is a power of two + if (write_pointer_q_o[AddrDepth-1:0] == FifoDepth[AddrDepth-1:0] - 1) begin + write_pointer_n_o[AddrDepth-1:0] = '0; + write_pointer_n_o[AddrDepth] = ~write_pointer_q_o[AddrDepth]; + end else begin + write_pointer_n_o = write_pointer_q_o + 1; + end + if (StatusFF) begin + // increment the overall counter + status_cnt_n_o = status_cnt_q_o + 1; + end + end + + // pop an element from the queue + if (pop_i && ~empty_o) begin + // read from the queue is a default assignment + // but increment the read pointer... + // this is dead code when DEPTH is a power of two + if (read_pointer_q_o[AddrDepth-1:0] == FifoDepth[AddrDepth-1:0] - 1) begin + read_pointer_n_o[AddrDepth-1:0] = '0; + read_pointer_n_o[AddrDepth] = ~read_pointer_q_o[AddrDepth]; + end else begin + read_pointer_n_o = read_pointer_q_o + 1; + end + // ... and decrement the overall count + if (StatusFF) begin + status_cnt_n_o = status_cnt_q_o - 1; + end + end + + // keep the count pointer stable if we push and pop at the same time + if (StatusFF) begin + if (push_i && pop_i && ~full_o && ~empty_o) + status_cnt_n_o = status_cnt_q_o; + end + + // FIFO is in pass through mode -> do not change the pointers + if (FallThrough && (write_pointer_next == read_pointer_next) && push_i) begin + if (pop_i) begin + status_cnt_n_o = status_cnt_q_o; + read_pointer_n_o = read_pointer_q_o; + write_pointer_n_o = write_pointer_q_o; + end + end + end + + assign read_pointer_n_sync_o = read_pointer_next; + assign write_pointer_n_sync_o = write_pointer_next; + bitwise_TMR_voter_fail #( + .DataWidth(AddrDepth+1) + ) i_read_pointer_vote ( + .a_i(read_pointer_next), + .b_i(alt_read_pointer_n_sync_i[0]), + .c_i(alt_read_pointer_n_sync_i[1]), + .majority_o(read_pointer_q_o), + .fault_detected_o(tmr_faults_o[0]) + ); + bitwise_TMR_voter_fail #( + .DataWidth(AddrDepth+1) + ) i_write_pointer_vote ( + .a_i(write_pointer_next), + .b_i(alt_write_pointer_n_sync_i[0]), + .c_i(alt_write_pointer_n_sync_i[1]), + .majority_o(write_pointer_q_o), + .fault_detected_o(tmr_faults_o[1]) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if(!rst_ni) begin + read_pointer_next <= '0; + write_pointer_next <= '0; + end else begin + if (flush_i) begin + read_pointer_next <= '0; + write_pointer_next <= '0; + end else begin + read_pointer_next <= read_pointer_n_o; + write_pointer_next <= write_pointer_n_o; + end + end + end +endmodule diff --git a/rtl/rel_rr_arb_tree.sv b/rtl/rel_rr_arb_tree.sv index 9a79f556..d1f20e53 100644 --- a/rtl/rel_rr_arb_tree.sv +++ b/rtl/rel_rr_arb_tree.sv @@ -82,7 +82,7 @@ module rel_rr_arb_tree #( /// IO handshake signal triplicated or not parameter bit TmrStatus = 1'b0, /// Have the TMR before the register, otherwise after. - parameter bit TmrBeforeReg = 1'b1, + parameter bit TmrBeforeReg = 1'b0, /// Dependent parameter, do **not** overwrite. /// Width of the arbitration priority signal and the arbitrated index. parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, @@ -131,7 +131,7 @@ module rel_rr_arb_tree #( `endif `endif - logic [8+NumIn-1:0] tmr_errors; + logic [8:0] tmr_errors; assign fault_o = |tmr_errors; // just pass through in this corner case @@ -143,25 +143,41 @@ module rel_rr_arb_tree #( assign tmr_errors = '0; // non-degenerate cases end else begin : gen_arbiter - logic [NumIn-1:0][2:0] req_in, gnt_out; - logic [2:0] req_out, gnt_in; - idx_t [2:0] idx_out; + logic [2:0][NumIn-1:0] req_in, gnt_out; + logic [2:0] req_out, gnt_in; + idx_t [2:0] idx_out; + idx_t [2:0][$bits(DataType)-1:0] idx_data_out; + DataType [2**IdxWidth-1:0] data_in; + always_comb begin + data_in = '0; + for (int i = 0; i < NumIn; i++) begin : gen_data_in + data_in[i] = data_i[i]; + end + end if (TmrStatus) begin : gen_req_in - assign req_in = req_i; + for (genvar i = 0; i < NumIn; i++) begin : gen_req_in_single + assign req_in[0][i] = req_i[i][0]; + assign req_in[1][i] = req_i[i][1]; + assign req_in[2][i] = req_i[i][2]; + assign gnt_o[i][0] = gnt_out[0][i]; + assign gnt_o[i][1] = gnt_out[1][i]; + assign gnt_o[i][2] = gnt_out[2][i]; + end assign req_o = req_out; - assign gnt_o = gnt_out; assign gnt_in = gnt_i; assign idx_o = idx_out; assign tmr_errors[4:0] = '0; end else begin : gen_req_in_triplicate for (genvar i = 0; i < NumIn; i++) begin : gen_req_in - assign req_in[i] = {3{req_i[i]}}; + assign req_in[0][i] = req_i[i]; + assign req_in[1][i] = req_i[i]; + assign req_in[2][i] = req_i[i]; TMR_voter_fail #( .VoterType(1) ) i_gnt_o_vote ( - .a_i(gnt_out[i][0]), - .b_i(gnt_out[i][1]), - .c_i(gnt_out[i][2]), + .a_i(gnt_out[0][i]), + .b_i(gnt_out[1][i]), + .c_i(gnt_out[2][i]), .majority_o(gnt_o[i]), .fault_detected_o(tmr_errors[i]) ); @@ -176,7 +192,7 @@ module rel_rr_arb_tree #( .fault_detected_o(tmr_errors[3]) ); assign gnt_in = gnt_i; - TMR_voter_fail #( + bitwise_TMR_voter_fail #( .VoterType(1) ) i_idx_o_vote ( .a_i(idx_out[0]), @@ -188,374 +204,27 @@ module rel_rr_arb_tree #( end localparam int unsigned NumLevels = unsigned'($clog2(NumIn)); - - /* verilator lint_off UNOPTFLAT */ - idx_t [2**NumLevels-2:0][2:0] index_nodes; // used to propagate the indices - DataType [2**NumLevels-2:0][2:0] data_nodes; // used to propagate the data - logic [2**NumLevels-2:0][2:0] gnt_nodes; // used to propagate the grant to masters - logic [2**NumLevels-2:0][2:0] req_nodes; // used to propagate the requests to slave - /* lint_off */ - idx_t [2:0] rr_q; - logic [NumIn-1:0][2:0] req_d; - - // the final arbitration decision can be taken from the root of the tree - assign req_out = req_nodes[0]; - assign idx_out = index_nodes[0]; - // assign data_o = data_nodes[0]; - bitwise_TMR_voter_fail #( - .DataWidth($bits(DataType)), - .VoterType(1) - ) i_data_vote ( - .a_i (data_nodes[0][0]), - .b_i (data_nodes[0][1]), - .c_i (data_nodes[0][2]), - .majority_o (data_o), - .fault_detected_o(tmr_errors[5]) - ); - - if (ExtPrio) begin : gen_ext_rr - assign rr_q = {3{rr_i}}; - assign req_d = req_in; - end else begin : gen_int_rr - idx_t [2:0] rr_d; - - // lock arbiter decision in case we got at least one req and no acknowledge - if (LockIn) begin : gen_lock - logic [2:0] lock_d, lock_q; - logic [NumIn-1:0][2:0] req_q; - - assign lock_d = req_out & ~gnt_in; - for (genvar i = 0; i < NumIn; i++) begin : gen_req_d - for (genvar j = 0; j < 3; j++) begin : gen_req_d_tmr - assign req_d[i][j] = (lock_q[j]) ? req_q[i][j] : req_in[i][j]; - end - end - - if (TmrBeforeReg) begin : gen_lock_tmr_before_reg - logic [2:0] lock_voted; - TMR_voter_fail #( - .VoterType(1) - ) i_lock0_vote ( - .a_i(lock_d[0]), - .b_i(lock_d[1]), - .c_i(lock_d[2]), - .majority_o(lock_voted[0]), - .fault_detected_o(tmr_errors[6]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_lock1_vote ( - .a_i(lock_d[0]), - .b_i(lock_d[1]), - .c_i(lock_d[2]), - .majority_o(lock_voted[1]), - .fault_detected_o() - ); - TMR_voter_fail #( - .VoterType(1) - ) i_lock2_vote ( - .a_i(lock_d[0]), - .b_i(lock_d[1]), - .c_i(lock_d[2]), - .majority_o(lock_voted[2]), - .fault_detected_o() - ); - always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg - if (!rst_ni) begin - lock_q <= '0; - end else begin - if (flush_i) begin - lock_q <= '0; - end else begin - lock_q <= lock_voted; - end - end - end - end else begin : gen_lock_tmr_after_reg - logic [2:0] lock_next; - TMR_voter_fail #( - .VoterType(1) - ) i_lock0_vote ( - .a_i(lock_next[0]), - .b_i(lock_next[1]), - .c_i(lock_next[2]), - .majority_o(lock_q[0]), - .fault_detected_o(tmr_errors[6]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_lock1_vote ( - .a_i(lock_next[0]), - .b_i(lock_next[1]), - .c_i(lock_next[2]), - .majority_o(lock_q[1]), - .fault_detected_o() - ); - TMR_voter_fail #( - .VoterType(1) - ) i_lock2_vote ( - .a_i(lock_next[0]), - .b_i(lock_next[1]), - .c_i(lock_next[2]), - .majority_o(lock_q[2]), - .fault_detected_o() - ); - always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg - if (!rst_ni) begin - lock_next <= '0; - end else begin - if (flush_i) begin - lock_next <= '0; - end else begin - lock_next <= lock_d; - end - end - end - end - - - `ifndef SYNTHESIS - `ifndef COMMON_CELLS_ASSERTS_OFF - lock: assert property( - @(posedge clk_i) disable iff (!rst_ni || flush_i) - LockIn |-> req_o[0] && (!gnt_i[0] && !flush_i) |=> idx_o[0] == $past(idx_o[0])) else - $fatal (1, {"Lock implies same arbiter decision in next cycle if output is not ", - "ready."}); - - logic [NumIn-1:0][2:0] req_tmp; - assign req_tmp = req_q & req_in; - lock_req: assume property( - @(posedge clk_i) disable iff (!rst_ni || flush_i) - LockIn |-> lock_d[0] |=> req_tmp == req_q) else - $fatal (1, {"It is disallowed to deassert unserved request signals when LockIn is ", - "enabled."}); - `endif - `endif - - if (TmrBeforeReg) begin : gen_req_tmr_before_reg - logic [NumIn-1:0][2:0] req_voted; - for (genvar i = 0; i < NumIn; i++) begin : gen_vote_req - TMR_voter_fail #( - .VoterType(1) - ) i_req_d0_vote ( - .a_i(req_d[i][0]), - .b_i(req_d[i][1]), - .c_i(req_d[i][2]), - .majority_o(req_voted[i][0]), - .fault_detected_o(tmr_errors[8+i]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_req_d1_vote ( - .a_i(req_d[i][0]), - .b_i(req_d[i][1]), - .c_i(req_d[i][2]), - .majority_o(req_voted[i][1]), - .fault_detected_o() - ); - TMR_voter_fail #( - .VoterType(1) - ) i_req_d2_vote ( - .a_i(req_d[i][0]), - .b_i(req_d[i][1]), - .c_i(req_d[i][2]), - .majority_o(req_voted[i][2]), - .fault_detected_o() - ); - end - always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg - if (!rst_ni) begin - req_q <= '0; - end else begin - if (flush_i) begin - req_q <= '0; - end else begin - req_q <= req_voted; - end - end - end - end else begin : gen_req_tmr_after_reg - logic [NumIn-1:0][2:0] req_next; - for (genvar i = 0; i < NumIn; i++) begin : gen_vote_req - TMR_voter_fail #( - .VoterType(1) - ) i_req_next0_vote ( - .a_i(req_next[i][0]), - .b_i(req_next[i][1]), - .c_i(req_next[i][2]), - .majority_o(req_q[i][0]), - .fault_detected_o(tmr_errors[8+i]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_req_next1_vote ( - .a_i(req_next[i][0]), - .b_i(req_next[i][1]), - .c_i(req_next[i][2]), - .majority_o(req_q[i][1]), - .fault_detected_o() - ); - TMR_voter_fail #( - .VoterType(1) - ) i_req_next2_vote ( - .a_i(req_next[i][0]), - .b_i(req_next[i][1]), - .c_i(req_next[i][2]), - .majority_o(req_q[i][2]), - .fault_detected_o() - ); - end - always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg - if (!rst_ni) begin - req_next <= '0; - end else begin - if (flush_i) begin - req_next <= '0; - end else begin - req_next <= req_d; - end - end - end - end - end else begin : gen_no_lock - assign req_d = req_in; - assign tmr_errors[7:6] = '0; - end - - if (FairArb) begin : gen_fair_arb - for (genvar j = 0; j < 3; j++) begin : gen_tmr_fair_arb - logic [NumIn-1:0] upper_mask, lower_mask; - idx_t upper_idx, lower_idx, next_idx; - logic upper_empty, lower_empty; - - for (genvar i = 0; i < NumIn; i++) begin : gen_mask - assign upper_mask[i] = (i > rr_q[j]) ? req_d[i][j] : 1'b0; - assign lower_mask[i] = (i <= rr_q[j]) ? req_d[i][j] : 1'b0; - end - - lzc #( - .WIDTH ( NumIn ), - .MODE ( 1'b0 ) - ) i_lzc_upper ( - .in_i ( upper_mask ), - .cnt_o ( upper_idx ), - .empty_o ( upper_empty ) - ); - - lzc #( - .WIDTH ( NumIn ), - .MODE ( 1'b0 ) - ) i_lzc_lower ( - .in_i ( lower_mask ), - .cnt_o ( lower_idx ), - .empty_o ( /*unused*/ ) - ); - - assign next_idx = upper_empty ? lower_idx : upper_idx; - assign rr_d[j] = (gnt_in[j] && req_out[j]) ? next_idx : rr_q[j]; - end - end else begin : gen_unfair_arb - for (genvar j = 0; j < 3; j++) begin : gen_tmr_unfair_arb - assign rr_d[j] = (gnt_in[j] && req_out[j]) ? - ((rr_q[j] == idx_t'(NumIn-1)) ? '0 : rr_q[j] + 1'b1) : rr_q[j]; - end - end - - // this holds the highest priority - if (TmrBeforeReg) begin : gen_rr_tmr_before_reg - idx_t [2:0] rr_voted; - bitwise_TMR_voter_fail #( - .DataWidth(IdxWidth), - .VoterType(1) - ) i_rr_d0_vote ( - .a_i(rr_d[0]), - .b_i(rr_d[1]), - .c_i(rr_d[2]), - .majority_o(rr_voted[0]), - .fault_detected_o(tmr_errors[7]) - ); - bitwise_TMR_voter_fail #( - .DataWidth(IdxWidth), - .VoterType(1) - ) i_rr_d1_vote ( - .a_i(rr_d[0]), - .b_i(rr_d[1]), - .c_i(rr_d[2]), - .majority_o(rr_voted[1]), - .fault_detected_o() - ); - bitwise_TMR_voter_fail #( - .DataWidth(IdxWidth), - .VoterType(1) - ) i_rr_d2_vote ( - .a_i(rr_d[0]), - .b_i(rr_d[1]), - .c_i(rr_d[2]), - .majority_o(rr_voted[2]), - .fault_detected_o() - ); - always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs - if (!rst_ni) begin - rr_q <= '0; - end else begin - if (flush_i) begin - rr_q <= '0; - end else begin - rr_q <= rr_voted; - end - end - end - end else begin : gen_rr_tmr_after_reg - logic [2:0] rr_next; - TMR_voter_fail #( - .VoterType(1) - ) i_rr_next0_vote ( - .a_i(rr_next[0]), - .b_i(rr_next[1]), - .c_i(rr_next[2]), - .majority_o(rr_q[0]), - .fault_detected_o(tmr_errors[7]) - ); - TMR_voter_fail #( - .VoterType(1) - ) i_rr_next1_vote ( - .a_i(rr_next[0]), - .b_i(rr_next[1]), - .c_i(rr_next[2]), - .majority_o(rr_q[1]), - .fault_detected_o() - ); - TMR_voter_fail #( - .VoterType(1) - ) i_rr_next2_vote ( - .a_i(rr_next[0]), - .b_i(rr_next[1]), - .c_i(rr_next[2]), - .majority_o(rr_q[2]), - .fault_detected_o() - ); - always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs - if (!rst_ni) begin - rr_next <= '0; - end else begin - if (flush_i) begin - rr_next <= '0; - end else begin - rr_next <= rr_d; - end - end - end - end - end - - assign gnt_nodes[0] = gnt_in; - - // arbiter tree - for (genvar i = 0; i < 3; i++) begin : gen_tmr_arb_tree + logic [2:0][$bits(DataType)-1:0][2**NumLevels-2:0] data_nodes_sel; + + for (genvar i = 0; i < $bits(DataType); i++) begin : gen_data_out_mux + logic [2**NumLevels-2:0] local_sel; + logic [2**NumLevels-2:0] data_nodes; + logic local_fault; + if (i == 0) assign tmr_errors[5] = local_fault; + bitwise_TMR_voter_fail #( + .DataWidth(2**NumLevels-1), + .VoterType(1) + ) i_idx_vote ( + .a_i(data_nodes_sel[0][i]), + .b_i(data_nodes_sel[1][i]), + .c_i(data_nodes_sel[2][i]), + .majority_o(local_sel), + .fault_detected_o(local_fault) + ); + assign data_o[i] = data_nodes[0]; + // assign data_o[i] = data_in[local_index][i]; for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels for (genvar l = 0; l < 2**level; l++) begin : gen_level - // local select signal - logic sel; // index calcs localparam int unsigned Idx0 = 2**level-1+l;// current node localparam int unsigned Idx1 = 2**(level+1)-1+l*2; @@ -564,52 +233,91 @@ module rel_rr_arb_tree #( if (unsigned'(level) == NumLevels-1) begin : gen_first_level // if two successive indices are still in the vector... if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce - assign req_nodes[Idx0][i] = req_d[l*2][i] | req_d[l*2+1][i]; - - // arbitration: round robin - assign sel = ~req_d[l*2][i] | req_d[l*2+1][i] & rr_q[i][NumLevels-1-level]; - - assign index_nodes[Idx0][i] = idx_t'(sel); - assign data_nodes[Idx0][i] = (sel) ? data_i[l*2+1] : data_i[l*2]; - assign gnt_out[l*2][i] = gnt_nodes[Idx0][i] & (AxiVldRdy | req_d[l*2][i]) & ~sel; - assign gnt_out[l*2+1][i] = gnt_nodes[Idx0][i] & (AxiVldRdy | req_d[l*2+1][i]) & sel; + assign data_nodes[Idx0] = (local_sel[Idx0]) ? data_in[l*2+1][i] : data_in[l*2][i]; end // if only the first index is still in the vector... if (unsigned'(l) * 2 == NumIn-1) begin : gen_first - assign req_nodes[Idx0][i] = req_d[l*2][i]; - assign index_nodes[Idx0][i] = '0;// always zero in this case - assign data_nodes[Idx0][i] = data_i[l*2]; - assign gnt_out[l*2][i] = gnt_nodes[Idx0][i] & (AxiVldRdy | req_d[l*2][i]); + assign data_nodes[Idx0] = data_in[l*2][i]; end // if index is out of range, fill up with zeros (will get pruned) if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range - assign req_nodes[Idx0][i] = 1'b0; - assign index_nodes[Idx0][i] = idx_t'('0); - assign data_nodes[Idx0][i] = DataType'('0); + assign data_nodes[Idx0] = '0; end ////////////////////////////////////////////////////////////// // general case for other levels within the tree end else begin : gen_other_levels - assign req_nodes[Idx0][i] = req_nodes[Idx1][i] | req_nodes[Idx1+1][i]; - - // arbitration: round robin - assign sel = ~req_nodes[Idx1][i] | req_nodes[Idx1+1][i] & rr_q[i][NumLevels-1-level]; - - assign index_nodes[Idx0][i] = (sel) ? - idx_t'({1'b1, index_nodes[Idx1+1][i][NumLevels-unsigned'(level)-2:0]}) : - idx_t'({1'b0, index_nodes[Idx1][i][NumLevels-unsigned'(level)-2:0]}); - - assign data_nodes[Idx0][i] = (sel) ? data_nodes[Idx1+1][i] : data_nodes[Idx1][i]; - assign gnt_nodes[Idx1][i] = gnt_nodes[Idx0][i] & ~sel; - assign gnt_nodes[Idx1+1][i] = gnt_nodes[Idx0][i] & sel; + assign data_nodes[Idx0] = (local_sel[Idx0]) ? data_nodes[Idx1+1] : data_nodes[Idx1]; end - ////////////////////////////////////////////////////////////// end end end + /* verilator lint_off UNOPTFLAT */ + // DataType [2:0] data_nodes_0; // used to propagate the data + /* lint_off */ + // idx_t [2:0] rr_q; + // logic [NumIn-1:0][2:0] req_d; + + // the final arbitration decision can be taken from the root of the tree + // bitwise_TMR_voter_fail #( + // .DataWidth($bits(DataType)), + // .VoterType(1) + // ) i_data_vote ( + // .a_i (data_nodes_0[0]), + // .b_i (data_nodes_0[1]), + // .c_i (data_nodes_0[2]), + // .majority_o (data_o), + // .fault_detected_o(tmr_errors[5]) + // ); + + logic [2:0] lock_sync; + logic [2:0][1:0] alt_lock_sync; + logic [2:0][NumIn-1:0] req_d_sync; + logic [2:0][1:0][NumIn-1:0] alt_req_d_sync; + idx_t [2:0] rr_d_sync; + idx_t [2:0][1:0] alt_rr_d_sync; + + for (genvar i = 0; i < 3; i++) begin : gen_in_rr_tmr + for (genvar j = 0; j < 2; j++) begin: gen_sync + assign alt_lock_sync[i][j] = lock_sync[(i+j+1) % 3]; + assign alt_req_d_sync[i][j] = req_d_sync[(i+j+1) % 3]; + assign alt_rr_d_sync[i][j] = rr_d_sync[(i+j+1) % 3]; + end + rel_rr_arb_tree_tmr_part #( + .NumIn ( NumIn ), + .DataWidth ( DataWidth ), + .DataType ( DataType ), + .ExtPrio ( ExtPrio ), + .AxiVldRdy ( AxiVldRdy ), + .LockIn ( LockIn ), + .FairArb ( FairArb ), + .TmrBeforeReg ( TmrBeforeReg ), + .IdxWidth ( IdxWidth ), + .idx_t ( idx_t ) + ) i_tmr_part ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .rr_i ( rr_i ), + .req_in_i ( req_in[i] ), + .gnt_out_o ( gnt_out[i] ), + .req_out_o ( req_out[i] ), + .gnt_in_i ( gnt_in[i] ), + .idx_out_o ( idx_out[i] ), + .idx_data_out_o ( idx_data_out[i] ), + .alt_lock_sync_i ( alt_lock_sync[i] ), + .lock_sync_o ( lock_sync[i] ), + .alt_req_d_sync_i ( alt_req_d_sync[i] ), + .req_d_sync_o ( req_d_sync[i] ), + .alt_rr_d_sync_i ( alt_rr_d_sync[i] ), + .rr_d_sync_o ( rr_d_sync[i] ), + .data_nodes_sel_o ( data_nodes_sel[i] ), + .tmr_error_o ( tmr_errors[6+i] ) + ); + end + `ifndef SYNTHESIS - `ifndef COMMON_CELLS_ASSERTS_OFF + `ifndef RED_CELLS_ASSERTS_OFF `ifndef XSIM initial begin : p_assert assert(NumIn) @@ -618,28 +326,21 @@ module rel_rr_arb_tree #( else $fatal(1,"Cannot use LockIn feature together with external ExtPrio."); end - logic [2:0][NumIn-1:0] gnt_o_trsp; - for (genvar i = 0; i < NumIn; i++) begin : gen_numin_trsps - for (genvar j = 0; j < 3; j++) begin : gen_tmr_trsp - assign gnt_o_trsp[j][i] = gnt_o[i][j]; - end - end - hot_one : assert property( - @(posedge clk_i) disable iff (!rst_ni || flush_i) $onehot0(gnt_o_trsp[0])) + @(posedge clk_i) disable iff (!rst_ni || flush_i) $onehot0(gnt_out[0])) else $fatal (1, "Grant signal must be hot1 or zero."); gnt0 : assert property( - @(posedge clk_i) disable iff (!rst_ni || flush_i) |gnt_o_trsp[0] |-> gnt_i[0]) + @(posedge clk_i) disable iff (!rst_ni || flush_i) |gnt_out[0] |-> gnt_i[0]) else $fatal (1, "Grant out implies grant in."); gnt1 : assert property( - @(posedge clk_i) disable iff (!rst_ni || flush_i) req_o[0] |-> gnt_i[0] |-> |gnt_o_trsp[0]) + @(posedge clk_i) disable iff (!rst_ni || flush_i) req_o[0] |-> gnt_i[0] |-> |gnt_out[0]) else $fatal (1, "Req out and grant in implies grant out."); gnt_idx : assert property( @(posedge clk_i) disable iff (!rst_ni || flush_i) req_o[0] |-> - gnt_i[0] |-> gnt_o[idx_o[0]][0]) + gnt_i[0] |-> gnt_out[0][idx_o[0]]) else $fatal (1, "Idx_o / gnt_o do not match."); req0 : assert property( @@ -655,3 +356,354 @@ module rel_rr_arb_tree #( end endmodule + +(* no_ungroup *) +(* no_boundary_optimization *) +module rel_rr_arb_tree_tmr_part #( + parameter int unsigned NumIn = 64, + parameter int unsigned DataWidth = 32, + parameter type DataType = logic [DataWidth-1:0], + parameter bit ExtPrio = 1'b0, + parameter bit AxiVldRdy = 1'b0, + parameter bit LockIn = 1'b0, + parameter bit FairArb = 1'b1, + parameter bit TmrBeforeReg = 1'b0, + parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + parameter type idx_t = logic [IdxWidth-1:0], + localparam int unsigned NumLevels = unsigned'($clog2(NumIn)) +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input idx_t rr_i, + input logic [NumIn-1:0] req_in_i, + output logic [NumIn-1:0] gnt_out_o, + output logic req_out_o, + input logic gnt_in_i, + output idx_t idx_out_o, + output idx_t [$bits(DataType)-1:0] idx_data_out_o, + input logic [1:0] alt_lock_sync_i, + output logic lock_sync_o, + input logic [1:0][NumIn-1:0] alt_req_d_sync_i, + output logic [NumIn-1:0] req_d_sync_o, + input idx_t [1:0] alt_rr_d_sync_i, + output idx_t rr_d_sync_o, + output logic [$bits(DataType)-1:0][2**NumLevels-2:0] data_nodes_sel_o, + output logic tmr_error_o +); + + idx_t rr_q; + logic [NumIn-1:0] req_d; + + logic [2+NumIn-1:0] tmr_errors; + assign tmr_error_o = |tmr_errors; + + for (genvar i = 0; i < $bits(DataType); i++) begin : gen_idx_data_out + assign idx_data_out_o[i] = idx_out_o; + if (i != 0) + assign data_nodes_sel_o[i] = data_nodes_sel_o[0]; + end + + if (ExtPrio) begin : gen_ext_rr + assign rr_q = rr_i; + assign req_d = req_in_i; + assign tmr_errors = '0; + assign lock_sync_o = '0; + assign req_d_sync_o = '0; + assign rr_d_sync_o = '0; + end else begin : gen_int_rr + idx_t rr_d; + + if (LockIn) begin : gen_lock + logic lock_d, lock_q; + logic [NumIn-1:0] req_q; + + assign lock_d = req_out_o & ~gnt_in_i; + assign req_d = (lock_q) ? req_q : req_in_i; + + if (TmrBeforeReg) begin : gen_lock_tmr_before_reg + logic lock_voted; + assign lock_sync_o = lock_d; + TMR_voter_fail #( + .VoterType(1) + ) i_lock_vote ( + .a_i(lock_d), + .b_i(alt_lock_sync_i[0]), + .c_i(alt_lock_sync_i[1]), + .majority_o(lock_voted), + .fault_detected_o(tmr_errors[0]) + ); + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= '0; + end else begin + lock_q <= lock_voted; + end + end + end + end else begin : gen_lock_tmr_after_reg + logic lock_next; + assign lock_sync_o = lock_next; + TMR_voter_fail #( + .VoterType(1) + ) i_lock_vote ( + .a_i(lock_next), + .b_i(alt_lock_sync_i[0]), + .c_i(alt_lock_sync_i[1]), + .majority_o(lock_q), + .fault_detected_o(tmr_errors[0]) + ); + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_next <= '0; + end else begin + if (flush_i) begin + lock_next <= '0; + end else begin + lock_next <= lock_d; + end + end + end + end + + `ifndef SYNTHESIS + `ifndef RED_CELLS_ASSERTS_OFF + lock: assert property( + @(posedge clk_i) disable iff (!rst_ni || flush_i) + LockIn |-> req_out_o && (!gnt_in_i && !flush_i) |=> idx_out_o == $past(idx_out_o)) else + $fatal (1, {"Lock implies same arbiter decision in next cycle if output is not ", + "ready."}); + + logic [NumIn-1:0] req_tmp; + assign req_tmp = req_q & req_in_i; + lock_req: assume property( + @(posedge clk_i) disable iff (!rst_ni || flush_i) + LockIn |-> lock_d |=> req_tmp == req_q) else + $fatal (1, {"It is disallowed to deassert unserved request signals when LockIn is ", + "enabled."}); + `endif + `endif + + if (TmrBeforeReg) begin : gen_req_tmr_before_reg + logic [NumIn-1:0] req_voted; + assign req_d_sync_o = req_d; + for (genvar i = 0; i < NumIn; i++) begin : gen_vote_req + TMR_voter_fail #( + .VoterType(1) + ) i_req_d_vote ( + .a_i(req_d[i]), + .b_i(alt_req_d_sync_i[0][i]), + .c_i(alt_req_d_sync_i[1][i]), + .majority_o(req_voted[i]), + .fault_detected_o(tmr_errors[2+i]) + ); + end + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + req_q <= '0; + end else begin + if (flush_i) begin + req_q <= '0; + end else begin + req_q <= req_voted; + end + end + end + end else begin : gen_req_tmr_after_reg + logic [NumIn-1:0] req_next; + assign req_d_sync_o = req_next; + for (genvar i = 0; i < NumIn; i++) begin : gen_vote_req + TMR_voter_fail #( + .VoterType(1) + ) i_req_next_vote ( + .a_i(req_next[i]), + .b_i(alt_req_d_sync_i[0][i]), + .c_i(alt_req_d_sync_i[1][i]), + .majority_o(req_q[i]), + .fault_detected_o(tmr_errors[2+i]) + ); + end + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + req_next <= '0; + end else begin + if (flush_i) begin + req_next <= '0; + end else begin + req_next <= req_d; + end + end + end + end + end else begin : gen_no_lock + assign req_d = req_in_i; + assign tmr_errors[0] = '0; + assign tmr_errors[2:NumIn+1] = '0; + assign lock_sync_o = '0; + assign req_d_sync_o = '0; + end + + + if (FairArb) begin : gen_fair_arb + logic [NumIn-1:0] upper_mask, lower_mask; + idx_t upper_idx, lower_idx, next_idx; + logic upper_empty, lower_empty; + + for (genvar i = 0; i < NumIn; i++) begin : gen_mask + assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0; + assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0; + end + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_upper ( + .in_i ( upper_mask ), + .cnt_o ( upper_idx ), + .empty_o ( upper_empty ) + ); + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_lower ( + .in_i ( lower_mask ), + .cnt_o ( lower_idx ), + .empty_o ( /*unused*/ ) + ); + + assign next_idx = upper_empty ? lower_idx : upper_idx; + assign rr_d = (gnt_in_i && req_out_o) ? next_idx : rr_q; + + end else begin : gen_unfair_arb + assign rr_d = (gnt_in_i && req_out_o) ? + ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q; + end + + if (TmrBeforeReg) begin : gen_rr_tmr_before_reg + idx_t rr_voted; + assign rr_d_sync_o = rr_d; + bitwise_TMR_voter_fail #( + .DataWidth(IdxWidth), + .VoterType(1) + ) i_rr_d_vote ( + .a_i(rr_d), + .b_i(alt_rr_d_sync_i[0]), + .c_i(alt_rr_d_sync_i[1]), + .majority_o(rr_voted), + .fault_detected_o(tmr_errors[1]) + ); + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_q <= '0; + end else begin + if (flush_i) begin + rr_q <= '0; + end else begin + rr_q <= rr_voted; + end + end + end + end else begin : gen_rr_tmr_after_reg + idx_t rr_next; + assign rr_d_sync_o = rr_next; + bitwise_TMR_voter_fail #( + .DataWidth(IdxWidth), + .VoterType(1) + ) i_rr_next_vote ( + .a_i(rr_next), + .b_i(alt_rr_d_sync_i[0]), + .c_i(alt_rr_d_sync_i[1]), + .majority_o(rr_q), + .fault_detected_o(tmr_errors[1]) + ); + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_next <= '0; + end else begin + if (flush_i) begin + rr_next <= '0; + end else begin + rr_next <= rr_d; + end + end + end + end + end + + /* verilator lint_off UNOPTFLAT */ + logic [2**NumLevels-2:0] req_nodes; + logic [2**NumLevels-2:0] gnt_nodes; + // DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data + idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices + /* lint_off */ + + assign req_out_o = req_nodes[0]; + assign gnt_nodes[0] = gnt_in_i; + // assign data_nodes_0 = data_nodes[0]; + assign idx_out_o = index_nodes[0]; + + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels + for (genvar l = 0; l < 2**level; l++) begin : gen_level + // local select signal + logic sel; + // index calcs + localparam int unsigned Idx0 = 2**level-1+l;// current node + localparam int unsigned Idx1 = 2**(level+1)-1+l*2; + ////////////////////////////////////////////////////////////// + // uppermost level where data is fed in from the inputs + if (unsigned'(level) == NumLevels-1) begin : gen_first_level + // if two successive indices are still in the vector... + if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce + assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1]; + + // arbitration: round robin + assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = idx_t'(sel); + assign data_nodes_sel_o[0][Idx0] = sel; + // assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2]; + assign gnt_out_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel; + assign gnt_out_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel; + end + // if only the first index is still in the vector... + if (unsigned'(l) * 2 == NumIn-1) begin : gen_first + assign req_nodes[Idx0] = req_d[l*2]; + assign index_nodes[Idx0] = '0;// always zero in this case + assign data_nodes_sel_o[0][Idx0] = 1'b0; + // assign data_nodes[Idx0] = data_i[l*2]; + assign gnt_out_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]); + end + // if index is out of range, fill up with zeros (will get pruned) + if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range + assign req_nodes[Idx0] = 1'b0; + assign index_nodes[Idx0] = idx_t'('0); + assign data_nodes_sel_o[0][Idx0] = '0; + // assign data_nodes[Idx0] = DataType'('0); + end + ////////////////////////////////////////////////////////////// + // general case for other levels within the tree + end else begin : gen_other_levels + assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1]; + + // arbitration: round robin + assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = (sel) ? + idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) : + idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]}); + + assign data_nodes_sel_o[0][Idx0] = sel; + // assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1]; + assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel; + assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel; + end + ////////////////////////////////////////////////////////////// + end + end + + +endmodule diff --git a/rtl/rel_spill_register.sv b/rtl/rel_spill_register.sv new file mode 100644 index 00000000..9b954fa5 --- /dev/null +++ b/rtl/rel_spill_register.sv @@ -0,0 +1,261 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Michael Rogenmoser + +module rel_spill_register #( + parameter type T = logic, // Assumed ECC protected + parameter bit Bypass = 1'b0, // make this spill register transparent + parameter bit TmrHandshake = 1'b1, // use TMR handshake + parameter bit DataCorrector = 1'b0, // use 0-cycle data corrector signals + parameter int unsigned HsWidth = TmrHandshake ? 3 : 1 // width of the handshake signals +) ( + input logic clk_i , + input logic rst_ni , + input logic [HsWidth-1:0] valid_i , + output logic [HsWidth-1:0] ready_o , + input T data_i , + output logic [HsWidth-1:0] valid_o , + input logic [HsWidth-1:0] ready_i , + output T data_o, + output logic fault_o, + output T data_corrector_o, + input T data_corrected_i +); + + typedef logic [$bits(T)-1:0] T_vec_t; + + if (Bypass) begin : gen_bypass + assign valid_o = valid_i; + assign ready_o = ready_i; + assign data_o = data_i; + assign fault_o = 1'b0; + assign data_corrector_o = '0; + end else begin : gen_spill_reg + logic [7+$bits(T):0] faults; + assign fault_o = |faults; + + logic [2:0] valid_in, ready_out, valid_out, ready_in; + if (TmrHandshake) begin : gen_tmr_handshake + assign valid_in = valid_i; + assign ready_o = ready_out; + assign valid_o = valid_out; + assign ready_in = ready_i; + assign faults[1:0] = '0; + end else begin : gen_non_tmr_handshake + assign valid_in = {3{valid_i}}; + assign ready_in = {3{ready_i}}; + TMR_voter_fail #( + .VoterType ( 0 ) // Classical_MV + ) i_ready_tmr ( + .a_i ( ready_out[0] ), + .b_i ( ready_out[1] ), + .c_i ( ready_out[2] ), + .majority_o ( ready_o ), + .fault_detected_o ( faults[0] ) + ); + TMR_voter_fail #( + .VoterType ( 0 ) // Classical_MV + ) i_valid_tmr ( + .a_i ( valid_out[0] ), + .b_i ( valid_out[1] ), + .c_i ( valid_out[2] ), + .majority_o ( valid_o ), + .fault_detected_o ( faults[1] ) + ); + end + + // The A register. + T_vec_t a_data_d, a_data_q; + + // The B register. + T_vec_t b_data_d, b_data_q; + + T_vec_t [2:0] a_fill_tmr, b_fill_tmr, b_full_q_tmr; + + logic [2:0] a_full_q_sync, b_full_q_sync; + logic [2:0][1:0] alt_a_full_q_sync, alt_b_full_q_sync; + + for (genvar i = 0; i < 3; i++) begin : gen_tmr_part + for (genvar j = 0; j < 2; j++) begin : gen_sync + assign alt_a_full_q_sync[i][j] = a_full_q_sync[(i+j+1) % 3]; + assign alt_b_full_q_sync[i][j] = b_full_q_sync[(i+j+1) % 3]; + end + rel_spill_reg_tmr_part #( + .T ( T ), + .Bypass ( Bypass ) + ) i_tmr_part ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .alt_a_full_q_sync_i ( alt_a_full_q_sync[i] ), + .a_full_q_sync_o ( a_full_q_sync[i] ), + .alt_b_full_q_sync_i ( alt_b_full_q_sync[i] ), + .b_full_q_sync_o ( b_full_q_sync[i] ), + .a_fill_tmr_o ( a_fill_tmr[i] ), + .b_fill_tmr_o ( b_fill_tmr[i] ), + .b_full_q_tmr_o ( b_full_q_tmr[i] ), + .valid_i ( valid_in[i] ), + .valid_o ( valid_out[i] ), + .ready_i ( ready_in[i] ), + .ready_o ( ready_out[i] ), + .faults_o ( faults[3+2*i:2+2*i] ) + ); + end + + for (genvar i = 0; i < $bits(T); i++) begin : gen_muxes + logic a_fill, b_fill, b_full_q; + logic [2:0] faults_here; + assign faults[8+i] = |faults_here; + TMR_voter_fail #( + .VoterType ( 1 ) // KP_MV + ) i_a_data_tmr ( + .a_i ( a_fill_tmr[0][i] ), + .b_i ( a_fill_tmr[1][i] ), + .c_i ( a_fill_tmr[2][i] ), + .majority_o ( a_fill ), + .fault_detected_o ( faults_here[0] ) + ); + assign a_data_d[i] = a_fill ? data_i[i] : a_data_q[i]; + + TMR_voter_fail #( + .VoterType ( 1 ) // KP_MV + ) i_b_data_tmr ( + .a_i ( b_fill_tmr[0][i] ), + .b_i ( b_fill_tmr[1][i] ), + .c_i ( b_fill_tmr[2][i] ), + .majority_o ( b_fill ), + .fault_detected_o ( faults_here[1] ) + ); + if (DataCorrector) begin : gen_data_corrector_connect + assign data_corrector_o[i] = b_data_q[i]; + assign b_data_d[i] = b_fill ? a_data_q[i] : data_corrected_i[i]; + end else begin : gen_no_data_corrector + assign data_corrector_o[i] = '0; + assign b_data_d[i] = b_fill ? a_data_q[i] : b_data_q[i]; + end + + TMR_voter_fail #( + .VoterType ( 1 ) // KP_MV + ) i_b_full_q_tmr ( + .a_i ( b_full_q_tmr[0][i] ), + .b_i ( b_full_q_tmr[1][i] ), + .c_i ( b_full_q_tmr[2][i] ), + .majority_o ( b_full_q ), + .fault_detected_o ( faults_here[2] ) + ); + assign data_o[i] = b_full_q ? b_data_q[i] : a_data_q[i]; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_data_q <= '0; + else + a_data_q <= a_data_d; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_data_q <= '0; + else + b_data_q <= b_data_d; + end + end + +endmodule + +(* no_ungroup *) +(* no_boundary_optimization *) +module rel_spill_reg_tmr_part #( + parameter type T = logic, // Assumed ECC protected + parameter bit Bypass = 1'b0 +) ( + input logic clk_i, + input logic rst_ni, + input logic [1:0] alt_a_full_q_sync_i, + output logic a_full_q_sync_o, + input logic [1:0] alt_b_full_q_sync_i, + output logic b_full_q_sync_o, + output logic [$bits(T)-1:0] a_fill_tmr_o, + output logic [$bits(T)-1:0] b_fill_tmr_o, + output logic [$bits(T)-1:0] b_full_q_tmr_o, + input logic valid_i, + output logic valid_o, + input logic ready_i, + output logic ready_o, + output logic [1:0] faults_o +); + + logic a_full_q; + logic a_fill, a_drain; + logic b_full_q; + logic b_fill, b_drain; + + for (genvar i = 0; i < $bits(T); i++) begin : gen_tmr_fill + assign a_fill_tmr_o[i] = a_fill; + assign b_fill_tmr_o[i] = b_fill; + assign b_full_q_tmr_o[i] = b_full_q_sync_o; + end + + + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_full_q_sync_o <= '0; + else if (a_fill || a_drain) + a_full_q_sync_o <= a_fill; + end + + TMR_voter_fail #( + .VoterType ( 1 ) // KP_MV + ) i_a_full_tmr ( + .a_i ( a_full_q_sync_o ), + .b_i ( alt_a_full_q_sync_i[0] ), + .c_i ( alt_a_full_q_sync_i[1] ), + .majority_o ( a_full_q ), + .fault_detected_o ( faults_o[0] ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_full_q_sync_o <= '0; + else if (b_fill || b_drain) + b_full_q_sync_o <= b_fill; + end + + TMR_voter_fail #( + .VoterType ( 0 ) // Classical_MV + ) i_b_full_tmr ( + .a_i ( b_full_q_sync_o ), + .b_i ( alt_b_full_q_sync_i[0] ), + .c_i ( alt_b_full_q_sync_i[1] ), + .majority_o ( b_full_q ), + .fault_detected_o ( faults_o[1] ) + ); + + // Fill the A register when the A or B register is empty. Drain the A register + // whenever it is full and being filled, or if a flush is requested. + assign a_fill = valid_i && ready_o; + assign a_drain = (a_full_q && !b_full_q); + + // Fill the B register whenever the A register is drained, but the downstream + // circuit is not ready. Drain the B register whenever it is full and the + // downstream circuit is ready, or if a flush is requested. + assign b_fill = a_drain && (!ready_i); + assign b_drain = (b_full_q && ready_i); + + // We can accept input as long as register B is not full. + // Note: flush_i and valid_i must not be high at the same time, + // otherwise an invalid handshake may occur + assign ready_o = !a_full_q || !b_full_q; + + // The unit provides output as long as one of the registers is filled. + assign valid_o = a_full_q | b_full_q; + +endmodule diff --git a/test/tb_rel_fifo.sv b/test/tb_rel_fifo.sv index 825a4854..2b0fdc55 100644 --- a/test/tb_rel_fifo.sv +++ b/test/tb_rel_fifo.sv @@ -53,8 +53,7 @@ module tb_rel_fifo_inst #( .Depth ( DEPTH ), .TmrStatus ( 1'b1 ), .DataHasEcc ( 1'b1 ), - .StatusFF ( 1'b0 ), - .TmrBeforeReg ( 1'b0 ) + .StatusFF ( 1'b0 ) ) dut ( .clk_i, .rst_ni,