diff --git a/hw/snitch/src/snitch.sv b/hw/snitch/src/snitch.sv index 2c879614f9..9e0143ace7 100644 --- a/hw/snitch/src/snitch.sv +++ b/hw/snitch/src/snitch.sv @@ -26,6 +26,8 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( /// Enable FP in general parameter bit FP_EN = 1, /// Enable F Extension. + parameter bit ZFINX_EN = 0, + /// Enable ZFINX Extension parameter bit RVF = 0, /// Enable D Extension. parameter bit RVD = 0, @@ -366,11 +368,14 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( // accelerator offloading interface // register int destination in scoreboard logic acc_register_rd; + assign acc_qreq_o.id = rd; assign acc_qreq_o.data_op = inst_data_i; - assign acc_qreq_o.data_arga = {{32{opa[31]}}, opa}; - assign acc_qreq_o.data_argb = {{32{opb[31]}}, opb}; + + assign acc_qreq_o.data_arga = ZFINX_EN ? {{32{1'b1}}, opa} : {{32{opa[31]}}, opa}; + assign acc_qreq_o.data_argb = ZFINX_EN ? {{32{1'b1}}, opb} : {{32{opb[31]}}, opb}; + // operand C is currently only used for load/store instructions assign acc_qreq_o.data_argc = ls_paddr; @@ -511,6 +516,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( assign rs1 = inst_data_i[15 + RegWidth - 1:15]; assign rs2 = inst_data_i[20 + RegWidth - 1:20]; + always_comb begin illegal_inst = 1'b0; ecall = 1'b0; @@ -1111,7 +1117,32 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( // Offload FP-FP Instructions - fire and forget // TODO (smach): Check legal rounding modes and issue illegal isn if needed // Single Precision Floating-Point - FADD_S, + FADD_S: begin + if (FP_EN && RVF + && (!(inst_data_i inside {FDIV_S, FSQRT_S}) || XDivSqrt)) begin + if (ZFINX_EN) begin + //acc_qreq_o.id is assigned rd which comes from outside + //acc_qreq_o.data_op is by default FP_SS + opa_select = Reg; + opb_select = Reg; + //acc_qreq_o.data_argc only for store and loads + acc_qvalid_o = valid_instr; + write_rd = 1'b0; + acc_register_rd =1'b1; + //mark the sb as busy for rd entry + end else begin + write_rd = 1'b0; + acc_qvalid_o = valid_instr; + end + + end else begin + illegal_inst = 1'b1; + end + end + //FMADD_S: begin + // + //end + FSUB_S, FMUL_S, FDIV_S, @@ -1766,15 +1797,17 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( FCVT_W_S, FCVT_WU_S, FMV_X_W: begin - if (FP_EN && RVF) begin + if ((ZFINX_EN && (inst_data_i == FMV_X_W)) && !(FP_EN && RVF)) begin + illegal_inst = 1'b1; + end else begin write_rd = 1'b0; uses_rd = 1'b1; acc_qvalid_o = valid_instr; acc_register_rd = 1'b1; // No RS in GPR but RD in GPR, register in int scoreboard - end else begin - illegal_inst = 1'b1; + end - end + end + // Vectors VFEQ_S, VFEQ_R_S, @@ -1963,17 +1996,19 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( end end // Single Precision Floating-Point + FMV_W_X, FCVT_S_W, FCVT_S_WU: begin - if (FP_EN && RVF) begin + if ((ZFINX_EN && (inst_data_i == FMV_W_X))||(!(FP_EN && RVF))) begin + illegal_inst = 1'b1; + end else begin opa_select = Reg; write_rd = 1'b0; acc_qvalid_o = valid_instr; - end else begin - illegal_inst = 1'b1; end end + // [Alternate] Half Precision Floating-Point FMV_H_X, FCVT_H_W, @@ -2051,54 +2086,50 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( // Floating-Point Load/Store // Single Precision Floating-Point FLW: begin - if (FP_EN && RVF) begin - opa_select = Reg; - opb_select = IImmediate; - write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready & caq_qready; - ls_size = Word; - is_fp_load = 1'b1; - end else begin - illegal_inst = 1'b1; + if (FP_EN && RVF) begin + if(ZFINX_EN) begin + write_rd = 1'b0; + uses_rd = 1'b1; + is_load = 1'b1; + is_signed = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = IImmediate; + end + else begin + opa_select = Reg; + opb_select = IImmediate; + write_rd = 1'b0; + acc_qvalid_o = valid_instr & trans_ready & caq_qready; + ls_size = Word; + is_fp_load = 1'b1; + end + end else begin + illegal_inst = 1'b1; + end end - end FSW: begin if (FP_EN && RVF) begin - opa_select = Reg; - opb_select = SFImmediate; - write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready & caq_qready; - ls_size = Word; - is_fp_store = 1'b1; - end else begin - illegal_inst = 1'b1; - end - end - // Double Precision Floating-Point - FLD: begin - if (FP_EN && (RVD || XFVEC)) begin - opa_select = Reg; - opb_select = IImmediate; - write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready & caq_qready; - ls_size = Double; - is_fp_load = 1'b1; - end else begin - illegal_inst = 1'b1; - end - end - FSD: begin - if (FP_EN && (RVD || XFVEC)) begin - opa_select = Reg; - opb_select = SFImmediate; - write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready & caq_qready; - ls_size = Double; - is_fp_store = 1'b1; + if(ZFINX_EN) begin + write_rd = 1'b0; + is_store = 1'b1; + ls_size = Word; + opa_select = Reg; + opb_select = SImmediate; + end else begin + opa_select = Reg; + opb_select = SFImmediate; + write_rd = 1'b0; + acc_qvalid_o = valid_instr & trans_ready & caq_qready; + ls_size = Word; + is_fp_store = 1'b1; + end end else begin illegal_inst = 1'b1; end end + + // Half Precision Floating-Point FLH: begin if (FP_EN && (XF16 || XF16ALT)) begin diff --git a/hw/snitch_cluster/src/snitch_cc.sv b/hw/snitch_cluster/src/snitch_cc.sv index 31584cd1cd..731be681d4 100644 --- a/hw/snitch_cluster/src/snitch_cc.sv +++ b/hw/snitch_cluster/src/snitch_cc.sv @@ -104,6 +104,8 @@ module snitch_cc #( parameter bit RegisterFPUIn = 0, /// Insert Pipeline registers immediately after FPU datapath parameter bit RegisterFPUOut = 0, + /// Enable Zfinx ISA extension + parameter bit ZFINX_EN = 1, parameter snitch_pma_pkg::snitch_pma_t SnitchPMACfg = '{default: 0}, /// Consistency Address Queue (CAQ) parameters. parameter int unsigned CaqDepth = 0, @@ -231,6 +233,7 @@ module snitch_cc #( .NumITLBEntries (NumITLBEntries), .RVE (RVE), .FP_EN (FPEn), + .ZFINX_EN (ZFINX_EN), .Xdma (Xdma), .Xssr (Xssr), .Xcopift (Xcopift), @@ -509,6 +512,7 @@ module snitch_cc #( .Xssr (Xssr), .Xcopift (Xcopift), .RVF (RVF), + .ZFINX_EN (ZFINX_EN), .RVD (RVD), .XF16 (XF16), .XF16ALT (XF16ALT), diff --git a/hw/snitch_cluster/src/snitch_fp_ss.sv b/hw/snitch_cluster/src/snitch_fp_ss.sv index d0e49cc1d0..f170012b60 100644 --- a/hw/snitch_cluster/src/snitch_fp_ss.sv +++ b/hw/snitch_cluster/src/snitch_fp_ss.sv @@ -27,6 +27,7 @@ module snitch_fp_ss import snitch_pkg::*; #( parameter type acc_req_t = logic, parameter type acc_resp_t = logic, parameter bit RVF = 1, + parameter bit ZFINX_EN = 0, parameter bit RVD = 1, parameter bit XF16 = 0, parameter bit XF16ALT = 0, @@ -92,6 +93,7 @@ module snitch_fp_ss import snitch_pkg::*; #( logic [2:0][4:0] fpr_raddr; logic [2:0][FLEN-1:0] fpr_rdata; + logic [2:0][FLEN-1:0] fpr_rdata_plch; logic [0:0] fpr_we; logic [0:0][4:0] fpr_waddr; @@ -99,8 +101,21 @@ module snitch_fp_ss import snitch_pkg::*; #( logic [0:0] fpr_wvalid; logic [0:0] fpr_wready; - logic ssr_active_d, ssr_active_q, ssr_active_ena; - `FFLAR(ssr_active_q, Xssr & ssr_active_d, ssr_active_ena, 1'b0, clk_i, rst_i) + logic [6:0] op_code; + logic [1:0] prec; + logic is_single_prec, is_vector_instr, zfinx_s_nv; + //boolean for precision & vec operation + assign op_code = acc_req_q.data_op[6:0]; + assign prec = acc_req_q.data_op[26:25]; + //checking for precison + + assign is_vector_instr = (op_code == 7'b0110011); + assign is_single_prec = (prec == 2'b00); + assign zfinx_s_nv = (ZFINX_EN & is_single_prec & !(is_vector_instr)) ; + + logic ssr_active_d, ssr_active_q, ssr_active_ena, Xssr_dmx; + assign Xssr_dmx = zfinx_s_nv ? 1'b0 : Xssr; + `FFLAR(ssr_active_q, Xssr_dmx & ssr_active_d, ssr_active_ena, 1'b0, clk_i, rst_i) typedef struct packed { logic ssr; // write-back to SSR at rd @@ -111,6 +126,7 @@ module snitch_fp_ss import snitch_pkg::*; #( logic use_fpu; logic [2:0][FLEN-1:0] op; + logic [2:0][63:0] op_z; logic [2:0] op_ready; // operand is ready logic lsu_qready; @@ -139,14 +155,19 @@ module snitch_fp_ss import snitch_pkg::*; #( logic fpu_out_valid, fpu_out_ready; logic fpu_in_valid, fpu_in_ready; - typedef enum logic [2:0] { + + typedef enum logic [3:0] { None, AccBus, + AccBus_A, + AccBus_B, + AccBus_C, RegA, RegB, RegC, RegBRep, // Replication for vectors RegDest } op_select_e; - op_select_e [2:0] op_select; + op_select_e [3:0] op_select; + typedef enum logic [1:0] { ResNone, ResAccBus @@ -157,7 +178,7 @@ module snitch_fp_ss import snitch_pkg::*; #( logic op_mode; logic [4:0] rs1, rs2, rs3, rd; - + // LSU typedef enum logic [1:0] { Byte = 2'b00, @@ -269,13 +290,15 @@ module snitch_fp_ss import snitch_pkg::*; #( assign fpu_out_ready = ((fpu_tag_out.acc & acc_resp_ready_i) | (~fpu_tag_out.acc & fpr_wready)); // FPU Result - logic [FLEN-1:0] fpu_result; + logic [63:0] fpu_result; + logic [FLEN-1:0] fpu_32_result; + assign fpu_32_result = fpu_result[31:0]; // FPU Tag assign acc_resp_o.id = fpu_tag_out.rd; // accelerator bus write-port - assign acc_resp_o.data = fpu_result; - + assign acc_resp_o.data = fpu_32_result; + assign rd = acc_req_q.data_op[11:7]; assign rs1 = acc_req_q.data_op[19:15]; assign rs2 = acc_req_q.data_op[24:20]; @@ -300,16 +323,18 @@ module snitch_fp_ss import snitch_pkg::*; #( sc_valid_d = sc_valid_q; // If an instruction is reading a chaining-enabled register, // clear the register's valid bit - if (acc_req_valid_q & acc_req_ready_q) begin - for (int i = 0; i < 3; i++) begin - if (sc_mask_q[fpr_raddr[i]]) sc_valid_d[fpr_raddr[i]] = 1'b0; + if(!zfinx_s_nv) begin + if (acc_req_valid_q & acc_req_ready_q) begin + for (int i = 0; i < 3; i++) begin + if (sc_mask_q[fpr_raddr[i]]) sc_valid_d[fpr_raddr[i]] = 1'b0; + end end - end - // If there is a writeback to a chaining-enabled register, - // set the register's valid bit - if (fpr_we) begin - if (sc_mask_q[fpr_waddr]) begin - sc_valid_d[fpr_waddr] = 1'b1; + // If there is a writeback to a chaining-enabled register, + // set the register's valid bit + if (fpr_we) begin + if (sc_mask_q[fpr_waddr]) begin + sc_valid_d[fpr_waddr] = 1'b1; + end end end end @@ -354,7 +379,11 @@ module snitch_fp_ss import snitch_pkg::*; #( ls_size = Word; // Destination register is in FPR - rd_is_fp = 1'b1; + if(ZFINX_EN) begin + rd_is_fp = 1'b0; + end else begin + rd_is_fp = 1'b1; + end csr_instr = 1'b0; // is a csr instruction // SSR register ssr_active_d = ssr_active_q; @@ -363,11 +392,18 @@ module snitch_fp_ss import snitch_pkg::*; #( unique casez (acc_req_q.data_op) // FP - FP Operations // Single Precision - riscv_instr::FADD_S: begin + riscv_instr::FADD_S: begin fpu_op = fpnew_pkg::ADD; - op_select[1] = RegA; - op_select[2] = RegB; + if (ZFINX_EN) begin + op_select[0] = AccBus_A; + op_select[1] = AccBus_B; + end else begin + op_select[1] = RegA; + op_select[2] = RegB; + end end + + riscv_instr::FSUB_S: begin fpu_op = fpnew_pkg::ADD; op_select[1] = RegA; @@ -2490,7 +2526,7 @@ module snitch_fp_ss import snitch_pkg::*; #( if (src_fmt == fpnew_pkg::FP8 && fpu_fmt_mode_i.src == 1'b1) src_fmt = fpnew_pkg::FP8ALT; if (dst_fmt == fpnew_pkg::FP8 && fpu_fmt_mode_i.dst == 1'b1) dst_fmt = fpnew_pkg::FP8ALT; end - + snitch_regfile #( .DataWidth ( FLEN ), .NrReadPorts ( 3 ), @@ -2500,18 +2536,24 @@ module snitch_fp_ss import snitch_pkg::*; #( ) i_ff_regfile ( .clk_i, .rst_ni ( ~rst_i ), - .raddr_i ( fpr_raddr ), - .rdata_o ( fpr_rdata ), - .waddr_i ( fpr_waddr ), - .wdata_i ( fpr_wdata ), - .we_i ( fpr_we ) + .raddr_i (fpr_raddr), + .rdata_o (fpr_rdata_plch), + .waddr_i (fpr_waddr), + .wdata_i (fpr_wdata), + .we_i (fpr_we) ); + + + // ---------------------- // Operand Select // ---------------------- logic [2:0][FLEN-1:0] acc_qdata; + logic [2:0][63:0] acc_qdata_z; assign acc_qdata = {acc_req_q.data_argc, acc_req_q.data_argb, acc_req_q.data_arga}; + assign acc_qdata_z = {acc_req_q.data_argc, acc_req_q.data_argb, acc_req_q.data_arga}; + assign fpr_rdata = zfinx_s_nv ? '{ default : '0 } : fpr_rdata_plch; // Mux address lines as operands for the FPU can be mangled always_comb begin @@ -2549,11 +2591,25 @@ module snitch_fp_ss import snitch_pkg::*; #( ssr_rvalid_o[i] = 1'b0; unique case (op_select[i]) None: begin - op[i] = '1; + op_z[i] = '1; op_ready[i] = 1'b1; end AccBus: begin - op[i] = acc_qdata[i]; + op[i] = acc_qdata[i];//TODO pass op[] as well and select in FPU + op_ready[i] = acc_req_valid_q; + end + AccBus_A: begin + op_z[i] = acc_qdata_z[0][63:0]; + op_ready[i] = acc_req_valid_q; + end + + AccBus_B: begin + op_z[i] = acc_qdata_z[1][63:0]; + op_ready[i] = acc_req_valid_q; + end + + AccBus_C: begin + op_z[i] = acc_qdata_z[2][63:0]; op_ready[i] = acc_req_valid_q; end // Scoreboard or SSR @@ -2580,7 +2636,7 @@ module snitch_fp_ss import snitch_pkg::*; #( end end default: begin - op[i] = '0; + op_z[i] = '0; op_ready[i] = 1'b1; end endcase @@ -2606,7 +2662,7 @@ module snitch_fp_ss import snitch_pkg::*; #( .clk_i , .rst_ni ( ~rst_i ), .hart_id_i ( hart_id_i ), - .operands_i ( op ), + .operands_i ( op_z ), .rnd_mode_i ( fpu_rnd_mode ), .op_i ( fpu_op ), .op_mod_i ( op_mode ), // Sign of operand? @@ -2630,6 +2686,7 @@ module snitch_fp_ss import snitch_pkg::*; #( assign nan_boxed_arga = {{32{1'b1}}, acc_req_q.data_arga[31:0]}; // Arbitrate Register File Write Port + always_comb begin fpr_we = 1'b0; fpr_waddr = '0; @@ -2665,7 +2722,7 @@ module snitch_fp_ss import snitch_pkg::*; #( fpr_we = ~sc_valid_q[fpu_tag_out.rd]; fpr_wready = ~sc_valid_q[fpu_tag_out.rd]; if (((fpu_tag_out.rd == fpr_raddr[0]) | (fpu_tag_out.rd == fpr_raddr[1]) | - (fpu_tag_out.rd == fpr_raddr[2])) & + (fpu_tag_out.rd == fpr_raddr[2])) & (fpu_in_valid | (lsu_qvalid & lsu_qready))) begin fpr_wready = 1'b1; fpr_we = 1'b1; @@ -2683,12 +2740,13 @@ module snitch_fp_ss import snitch_pkg::*; #( fpr_wready = 1'b0; end end - + // ---------------------- // Load/Store Unit // ---------------------- - assign lsu_qvalid = acc_req_valid_q & (&op_ready) & (is_load | is_store) & dst_ready; - + assign lsu_qvalid = zfinx_s_nv ? 1'b0 : (acc_req_valid_q & (&op_ready) & (is_load | is_store) & dst_ready); + //assign lsu_qvalid = acc_req_valid_q & (&op_ready) & (is_load | is_store) & dst_ready; + snitch_lsu #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), @@ -2731,8 +2789,10 @@ module snitch_fp_ss import snitch_pkg::*; #( .data_req_o, .data_rsp_i ); + // SSRs + for (genvar i = 0; i < 3; i++) assign ssr_rdone_o[i] = ssr_rvalid_o[i] & acc_req_ready_q; assign ssr_raddr_o = fpr_raddr; @@ -2766,12 +2826,12 @@ module snitch_fp_ss import snitch_pkg::*; #( assign trace_port_o.src_fmt = src_fmt; assign trace_port_o.dst_fmt = dst_fmt; assign trace_port_o.int_fmt = int_fmt; - assign trace_port_o.acc_qdata_0 = acc_qdata[0]; - assign trace_port_o.acc_qdata_1 = acc_qdata[1]; - assign trace_port_o.acc_qdata_2 = acc_qdata[2]; - assign trace_port_o.op_0 = op[0]; - assign trace_port_o.op_1 = op[1]; - assign trace_port_o.op_2 = op[2]; + assign trace_port_o.acc_qdata_0 = acc_qdata_z[0]; + assign trace_port_o.acc_qdata_1 = acc_qdata_z[1]; + assign trace_port_o.acc_qdata_2 = acc_qdata_z[2]; + assign trace_port_o.op_0 = op_z[0]; + assign trace_port_o.op_1 = op_z[1]; + assign trace_port_o.op_2 = op_z[2]; assign trace_port_o.use_fpu = use_fpu; assign trace_port_o.fpu_in_rd = fpu_tag_in.rd; assign trace_port_o.fpu_in_acc = fpu_tag_in.acc; diff --git a/hw/snitch_cluster/src/snitch_fpu.sv b/hw/snitch_cluster/src/snitch_fpu.sv index ed7958edca..ba5ccbc60b 100644 --- a/hw/snitch_cluster/src/snitch_fpu.sv +++ b/hw/snitch_cluster/src/snitch_fpu.sv @@ -20,7 +20,7 @@ module snitch_fpu import snitch_pkg::*; #( input logic rst_ni, // Input signals input logic [31:0] hart_id_i, - input logic [2:0][FLEN-1:0] operands_i, + input logic [2:0][63:0] operands_i, input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, @@ -33,7 +33,7 @@ module snitch_fpu import snitch_pkg::*; #( input logic in_valid_i, output logic in_ready_o, // Output signals - output logic [FLEN-1:0] result_o, + output logic [63:0] result_o, output logic [4:0] status_o, output logic [6:0] tag_o, // Output handshake @@ -50,6 +50,19 @@ module snitch_fpu import snitch_pkg::*; #( }; typedef struct packed { + logic [2:0][63:0] operands; + fpnew_pkg::roundmode_e rnd_mode; + fpnew_pkg::operation_e op; + logic op_mod; + fpnew_pkg::fp_format_e src_fmt; + fpnew_pkg::fp_format_e dst_fmt; + fpnew_pkg::int_format_e int_fmt; + logic vectorial_op; + logic [6:0] tag; + } fpu_inz_t; + + +typedef struct packed { logic [2:0][FLEN-1:0] operands; fpnew_pkg::roundmode_e rnd_mode; fpnew_pkg::operation_e op; @@ -62,13 +75,24 @@ module snitch_fpu import snitch_pkg::*; #( } fpu_in_t; typedef struct packed { - logic [FLEN-1:0] result; + logic [63:0] result; + logic [4:0] status; + logic [6:0] tag; + } fpu_outz_t; + + typedef struct packed { + logic [FLEN-1:0] result; logic [4:0] status; logic [6:0] tag; } fpu_out_t; - fpu_in_t fpu_in_q, fpu_in; - fpu_out_t fpu_out_q, fpu_out; + + + fpu_inz_t fpu_in_q, fpu_in; + fpu_in_t fpu_in_tr_q, fpu_tr_in; + fpu_outz_t fpu_out_q, fpu_out; + fpu_out_t fpu_out_tr_q, fpu_tr_out; + //handle the 32bit integer case logic in_valid_q, in_ready_q; logic out_valid, out_ready; @@ -85,7 +109,7 @@ module snitch_fpu import snitch_pkg::*; #( }; spill_register #( - .T ( fpu_in_t ), + .T ( fpu_inz_t ), .Bypass ( ~RegisterFPUIn ) ) i_spill_register_fpu_in ( .clk_i , @@ -131,7 +155,7 @@ module snitch_fpu import snitch_pkg::*; #( ); spill_register #( - .T ( fpu_out_t ), + .T ( fpu_outz_t ), .Bypass ( ~RegisterFPUOut ) ) i_spill_register_fpu_out ( .clk_i ,