diff --git a/Bender.lock b/Bender.lock index ba510b5a53..03dee2d38f 100644 --- a/Bender.lock +++ b/Bender.lock @@ -10,7 +10,7 @@ packages: revision: bd1abffc0812f8170902e5fb93142c7785c0b8c1 version: null source: - Git: https://github.com/colluca/axi.git + Git: https://github.com/pulp-platform/axi.git dependencies: - common_cells - common_verification @@ -71,7 +71,7 @@ packages: dependencies: - common_cells idma: - revision: 7829f71691a62c1e2e5e3230f370f222c7a83087 + revision: 9e352db1b70bc87e88670e1ef359aa3fd8bad5f6 version: null source: Git: https://github.com/pulp-platform/iDMA.git diff --git a/Bender.yml b/Bender.yml index 4280d80990..a6f23f2718 100644 --- a/Bender.yml +++ b/Bender.yml @@ -19,15 +19,16 @@ package: - Matheus Cavalcante dependencies: - axi: { git: https://github.com/colluca/axi, rev: multicast } + axi: { git: https://github.com/pulp-platform/axi, rev: multicast } axi_riscv_atomics: { git: https://github.com/pulp-platform/axi_riscv_atomics, version: 0.6.0 } common_cells: { git: https://github.com/pulp-platform/common_cells, rev: snitch } apb: { git: https://github.com/pulp-platform/apb.git, version: 0.2.2 } FPnew: { git: https://github.com/pulp-platform/cvfpu.git, rev: pulp-v0.1.3 } tech_cells_generic: { git: https://github.com/pulp-platform/tech_cells_generic, version: 0.2.13 } riscv-dbg: { git: https://github.com/pulp-platform/riscv-dbg, version: 0.8.0 } + obi: { git: "https://github.com/pulp-platform/obi.git", version: 0.1.2 } cluster_icache: { git: https://github.com/pulp-platform/cluster_icache.git, rev: 64e21ae455bbdde850c4df13bef86ea55ac42537 } - idma: { git: https://github.com/pulp-platform/iDMA.git, rev: __deploy__bebefa3__master } + idma: { git: https://github.com/pulp-platform/iDMA, rev: 9e352db1b70bc87e88670e1ef359aa3fd8bad5f6 } export_include_dirs: - hw/reqrsp_interface/include @@ -79,6 +80,7 @@ sources: - hw/tcdm_interface/src/tcdm_interface.sv # Level 1 - hw/tcdm_interface/src/axi_to_tcdm.sv + - hw/tcdm_interface/src/obi_to_tcdm.sv - hw/tcdm_interface/src/reqrsp_to_tcdm.sv - hw/tcdm_interface/src/tcdm_mux.sv - target: simulation diff --git a/hw/reqrsp_interface/include/reqrsp_interface/typedef.svh b/hw/reqrsp_interface/include/reqrsp_interface/typedef.svh index 07a4179cc2..19f2e794dc 100644 --- a/hw/reqrsp_interface/include/reqrsp_interface/typedef.svh +++ b/hw/reqrsp_interface/include/reqrsp_interface/typedef.svh @@ -8,15 +8,15 @@ `ifndef REQRSP_INTERFACE_TYPEDEF_SVH_ `define REQRSP_INTERFACE_TYPEDEF_SVH_ -`define REQRSP_TYPEDEF_REQ_CHAN_T(__req_chan_t, __addr_t, __data_t, __strb_t) \ +`define REQRSP_TYPEDEF_REQ_CHAN_T(__req_chan_t, __addr_t, __data_t, __strb_t, __user_t) \ typedef struct packed { \ - __addr_t addr; \ - __addr_t mask; \ - logic write; \ - reqrsp_pkg::amo_op_e amo; \ - __data_t data; \ - __strb_t strb; \ - reqrsp_pkg::size_t size; \ + __addr_t addr; \ + logic write; \ + reqrsp_pkg::amo_op_e amo; \ + __data_t data; \ + __strb_t strb; \ + __user_t user; \ + reqrsp_pkg::size_t size; \ } __req_chan_t; `define REQRSP_TYPEDEF_RSP_CHAN_T(__rsp_chan_t, __data_t) \ @@ -39,8 +39,8 @@ logic q_ready; \ } __rsp_t; -`define REQRSP_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t) \ - `REQRSP_TYPEDEF_REQ_CHAN_T(__name``_req_chan_t, __addr_t, __data_t, __strb_t) \ +`define REQRSP_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t, __user_t) \ + `REQRSP_TYPEDEF_REQ_CHAN_T(__name``_req_chan_t, __addr_t, __data_t, __strb_t, __user_t) \ `REQRSP_TYPEDEF_RSP_CHAN_T(__name``_rsp_chan_t, __data_t) \ `REQRSP_TYPEDEF_REQ_T(__name``_req_t, __name``_req_chan_t) \ `REQRSP_TYPEDEF_RSP_T(__name``_rsp_t, __name``_rsp_chan_t) diff --git a/hw/reqrsp_interface/src/axi_to_reqrsp.sv b/hw/reqrsp_interface/src/axi_to_reqrsp.sv index d96cca556d..cdafa9d8fb 100644 --- a/hw/reqrsp_interface/src/axi_to_reqrsp.sv +++ b/hw/reqrsp_interface/src/axi_to_reqrsp.sv @@ -448,7 +448,7 @@ module axi_to_reqrsp_intf #( typedef logic [IdWidth-1:0] id_t; typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) diff --git a/hw/reqrsp_interface/src/reqrsp_cut.sv b/hw/reqrsp_interface/src/reqrsp_cut.sv index 2e804dec84..e221f8906f 100644 --- a/hw/reqrsp_interface/src/reqrsp_cut.sv +++ b/hw/reqrsp_interface/src/reqrsp_cut.sv @@ -12,6 +12,8 @@ module reqrsp_cut #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Request type. parameter type req_t = logic, /// Response type. @@ -32,8 +34,9 @@ module reqrsp_cut #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) spill_register #( .T (reqrsp_req_chan_t), @@ -74,6 +77,8 @@ module reqrsp_cut_intf #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Bypass request channel. parameter bit BypassReq = 0, /// Bypass Response channel. @@ -88,8 +93,9 @@ module reqrsp_cut_intf #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) reqrsp_req_t reqrsp_slv_req, reqrsp_mst_req; reqrsp_rsp_t reqrsp_slv_rsp, reqrsp_mst_rsp; @@ -97,6 +103,7 @@ module reqrsp_cut_intf #( reqrsp_cut #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (UserWidth), .req_t (reqrsp_req_t), .rsp_t (reqrsp_rsp_t), .BypassReq (BypassReq), diff --git a/hw/reqrsp_interface/src/reqrsp_demux.sv b/hw/reqrsp_interface/src/reqrsp_demux.sv index ce14f98627..2d4fb0cf69 100644 --- a/hw/reqrsp_interface/src/reqrsp_demux.sv +++ b/hw/reqrsp_interface/src/reqrsp_demux.sv @@ -104,6 +104,8 @@ module reqrsp_demux_intf #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Amount of outstanding responses. Determines the FIFO size. parameter int unsigned RespDepth = 8, // Dependent parameters, DO NOT OVERRIDE! @@ -120,8 +122,9 @@ module reqrsp_demux_intf #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) reqrsp_req_t reqrsp_slv_req; reqrsp_rsp_t reqrsp_slv_rsp; diff --git a/hw/reqrsp_interface/src/reqrsp_iso.sv b/hw/reqrsp_interface/src/reqrsp_iso.sv index 997591615f..82a456b88c 100644 --- a/hw/reqrsp_interface/src/reqrsp_iso.sv +++ b/hw/reqrsp_interface/src/reqrsp_iso.sv @@ -14,6 +14,8 @@ module reqrsp_iso #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Request type. parameter type req_t = logic, /// Response type. @@ -43,8 +45,9 @@ module reqrsp_iso #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) isochronous_spill_register #( .T (reqrsp_req_chan_t), @@ -89,6 +92,8 @@ module reqrsp_iso_intf #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Bypass. parameter bit BypassReq = 0, parameter bit BypassRsp = 0 @@ -110,8 +115,9 @@ module reqrsp_iso_intf #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) reqrsp_req_t reqrsp_src_req, reqrsp_dst_req; reqrsp_rsp_t reqrsp_src_rsp, reqrsp_dst_rsp; @@ -119,6 +125,7 @@ module reqrsp_iso_intf #( reqrsp_iso #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (UserWidth), .req_t (reqrsp_req_t), .rsp_t (reqrsp_rsp_t), .BypassReq (BypassReq), diff --git a/hw/reqrsp_interface/src/reqrsp_mux.sv b/hw/reqrsp_interface/src/reqrsp_mux.sv index afab89bbb0..9a828e7dd2 100644 --- a/hw/reqrsp_interface/src/reqrsp_mux.sv +++ b/hw/reqrsp_interface/src/reqrsp_mux.sv @@ -14,6 +14,8 @@ module reqrsp_mux #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Request type. parameter type req_t = logic, /// Response type. @@ -39,8 +41,9 @@ module reqrsp_mux #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_REQ_CHAN_T(req_chan_t, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_REQ_CHAN_T(req_chan_t, addr_t, data_t, strb_t, user_t) localparam int unsigned LogNrPorts = cf_math_pkg::idx_width(NrPorts); @@ -159,6 +162,8 @@ module reqrsp_mux_intf #( parameter int unsigned AddrWidth = 0, /// Data width of the interface. parameter int unsigned DataWidth = 0, + /// User width of the interface. + parameter int unsigned UserWidth = 0, /// Amount of outstanding responses. Determines the FIFO size. parameter int unsigned RespDepth = 8, /// Cut timing paths on the request path. Incurs a cycle additional latency. @@ -175,8 +180,9 @@ module reqrsp_mux_intf #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) reqrsp_req_t [NrPorts-1:0] reqrsp_slv_req; reqrsp_rsp_t [NrPorts-1:0] reqrsp_slv_rsp; @@ -188,6 +194,7 @@ module reqrsp_mux_intf #( .NrPorts (NrPorts), .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (UserWidth), .req_t (reqrsp_req_t), .rsp_t (reqrsp_rsp_t), .RespDepth (RespDepth), diff --git a/hw/reqrsp_interface/src/reqrsp_to_axi.sv b/hw/reqrsp_interface/src/reqrsp_to_axi.sv index 55ad93c549..d3fa511508 100644 --- a/hw/reqrsp_interface/src/reqrsp_to_axi.sv +++ b/hw/reqrsp_interface/src/reqrsp_to_axi.sv @@ -48,7 +48,6 @@ module reqrsp_to_axi import reqrsp_pkg::*; #( parameter int unsigned ID = 0, /// Data width of bus, must be 32 or 64. parameter int unsigned DataWidth = 32'b0, - parameter int unsigned UserWidth = 32'b0, parameter type reqrsp_req_t = logic, parameter type reqrsp_rsp_t = logic, parameter type axi_req_t = logic, @@ -56,7 +55,6 @@ module reqrsp_to_axi import reqrsp_pkg::*; #( ) ( input logic clk_i, input logic rst_ni, - input logic [UserWidth-1:0] user_i, input reqrsp_req_t reqrsp_req_i, output reqrsp_rsp_t reqrsp_rsp_o, output axi_req_t axi_req_o, @@ -175,7 +173,7 @@ module reqrsp_to_axi import reqrsp_pkg::*; #( assign axi_req_o.ar.lock = (reqrsp_req_i.q.amo == AMOLR); assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; assign axi_req_o.ar.id = $unsigned(ID); - assign axi_req_o.ar.user = user_i; + assign axi_req_o.ar.user = reqrsp_req_i.q.user; assign axi_req_o.ar_valid = q_valid_read; assign q_ready_read = axi_rsp_i.ar_ready; @@ -190,11 +188,11 @@ module reqrsp_to_axi import reqrsp_pkg::*; #( assign axi_req_o.aw.lock = (reqrsp_req_i.q.amo == AMOSC); assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; assign axi_req_o.aw.id = $unsigned(ID); - assign axi_req_o.aw.user = user_i; + assign axi_req_o.aw.user = reqrsp_req_i.q.user; assign axi_req_o.w.data = write_data; assign axi_req_o.w.strb = reqrsp_req_i.q.strb; assign axi_req_o.w.last = 1'b1; - assign axi_req_o.w.user = user_i; + assign axi_req_o.w.user = reqrsp_req_i.q.user; // Both channels need to handshake (independently). stream_fork #( @@ -305,12 +303,11 @@ module reqrsp_to_axi_intf #( parameter int unsigned AddrWidth = 32'd0, /// AXI and REQRSP data width. parameter int unsigned DataWidth = 32'd0, - /// AXI user width. - parameter int unsigned AxiUserWidth = 32'd0 + /// AXI and REQRSP user width. + parameter int unsigned UserWidth = 32'd0 ) ( input logic clk_i, input logic rst_ni, - input logic [AxiUserWidth-1:0] user_i, REQRSP_BUS reqrsp, AXI_BUS axi ); @@ -319,9 +316,9 @@ module reqrsp_to_axi_intf #( typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; typedef logic [AxiIdWidth-1:0] id_t; - typedef logic [AxiUserWidth-1:0] user_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) @@ -347,7 +344,6 @@ module reqrsp_to_axi_intf #( ) i_reqrsp_to_axi ( .clk_i, .rst_ni, - .user_i, .reqrsp_req_i (reqrsp_req), .reqrsp_rsp_o (reqrsp_rsp), .axi_req_o (axi_req), diff --git a/hw/reqrsp_interface/test/reqrsp_to_axi_tb.sv b/hw/reqrsp_interface/test/reqrsp_to_axi_tb.sv index ebc509a2ba..4a1cb85c74 100644 --- a/hw/reqrsp_interface/test/reqrsp_to_axi_tb.sv +++ b/hw/reqrsp_interface/test/reqrsp_to_axi_tb.sv @@ -58,11 +58,10 @@ module reqrsp_to_axi_tb import reqrsp_pkg::*; #( .AxiIdWidth (IW), .AddrWidth (AW), .DataWidth (DW), - .AxiUserWidth (UW) + .UserWidth (UW) ) i_reqrsp_to_axi ( .clk_i (clk), .rst_ni (rst_n), - .user_i ('0), .reqrsp (master), .axi (slave) ); diff --git a/hw/snitch/src/riscv_instr.sv b/hw/snitch/src/riscv_instr.sv index c76795b711..d055b1bd02 100644 --- a/hw/snitch/src/riscv_instr.sv +++ b/hw/snitch/src/riscv_instr.sv @@ -327,7 +327,8 @@ package riscv_instr; localparam logic [31:0] DMSTAT = 32'b0000101?????00000000?????0101011; localparam logic [31:0] DMSTR = 32'b0000110??????????000000000101011; localparam logic [31:0] DMREP = 32'b000011100000?????000000000101011; - localparam logic [31:0] DMMCAST = 32'b000100000000?????000000000101011; + localparam logic [31:0] DMUSER = 32'b0001000??????????000000000101011; + localparam logic [31:0] DMINIT = 32'b0001001??????????000?????0101011; localparam logic [31:0] FREP_O = 32'b????????????????????????10001011; localparam logic [31:0] IREP = 32'b?????????????????????????0111111; localparam logic [31:0] SCFGRI = 32'b????????????00000001?????0101011; @@ -1140,7 +1141,8 @@ package riscv_instr; localparam logic [11:0] CSR_FPMODE = 12'h7c1; localparam logic [11:0] CSR_BARRIER = 12'h7c2; localparam logic [11:0] CSR_SC = 12'h7c3; - localparam logic [11:0] CSR_MCAST = 12'h7c4; + localparam logic [11:0] CSR_USER_LOW = 12'h7c4; + localparam logic [11:0] CSR_USER_HIGH = 12'h7c5; localparam logic [11:0] CSR_HTIMEDELTAH = 12'h615; localparam logic [11:0] CSR_CYCLEH = 12'hc80; localparam logic [11:0] CSR_TIMEH = 12'hc81; diff --git a/hw/snitch/src/snitch.sv b/hw/snitch/src/snitch.sv index 9687bab7c0..83a4435c2c 100644 --- a/hw/snitch/src/snitch.sv +++ b/hw/snitch/src/snitch.sv @@ -247,8 +247,10 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( logic csr_en; logic csr_dump; logic csr_stall_d, csr_stall_q; - // Multicast mask - logic [31:0] csr_mcast_d, csr_mcast_q; + + // User Field + logic [31:0] csr_user_high_d, csr_user_high_q; + logic [31:0] csr_user_low_d, csr_user_low_q; localparam logic M = 0; localparam logic S = 1; @@ -320,7 +322,8 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( end `FFAR(csr_stall_q, csr_stall_d, '0, clk_i, rst_i) - `FFAR(csr_mcast_q, csr_mcast_d, '0, clk_i, rst_i) + `FFAR(csr_user_high_q, csr_user_high_d, '0, clk_i, rst_i) + `FFAR(csr_user_low_q, csr_user_low_d, '0, clk_i, rst_i) typedef struct packed { fpnew_pkg::fmt_mode_t fmode; @@ -2152,6 +2155,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( // DMA instructions DMSRC, DMDST, + DMUSER, DMSTR: begin if (Xdma) begin acc_qreq_o.addr = DMA_SS; @@ -2163,7 +2167,8 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( illegal_inst = 1'b1; end end - DMCPYI: begin + DMCPYI, + DMINIT: begin if (Xdma) begin acc_qreq_o.addr = DMA_SS; opa_select = Reg; @@ -2221,16 +2226,6 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( illegal_inst = 1'b1; end end - DMMCAST: begin - if (Xdma) begin - acc_qreq_o.addr = DMA_SS; - opa_select = Reg; - acc_qvalid_o = valid_instr; - write_rd = 1'b0; - end else begin - illegal_inst = 1'b1; - end - end SCFGRI: begin if (Xssr) begin write_rd = 1'b0; @@ -2358,7 +2353,8 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( dscratch_d = dscratch_q; csr_stall_d = csr_stall_q; - csr_mcast_d = csr_mcast_q; + csr_user_high_d = csr_user_high_q; + csr_user_low_d = csr_user_low_q; if (barrier_i) csr_stall_d = 1'b0; barrier_o = 1'b0; @@ -2585,10 +2581,15 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( barrier_o = 1'b1; csr_stall_d = 1'b1; end - // Multicast mask - CSR_MCAST: begin - csr_rvalue = csr_mcast_q; - csr_mcast_d = alu_result[31:0]; + // User field high + CSR_USER_HIGH: begin + csr_rvalue = csr_user_high_q; + csr_user_high_d = alu_result[31:0]; + end + // User field low + CSR_USER_LOW: begin + csr_rvalue = csr_user_low_q; + csr_user_low_d = alu_result[31:0]; end default: begin csr_rvalue = '0; @@ -2890,6 +2891,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( snitch_lsu #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (64), .dreq_t (dreq_t), .drsp_t (drsp_t), .tag_t (logic[RegWidth-1:0]), @@ -2910,7 +2912,7 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( .lsu_qsize_i (ls_size), .lsu_qamo_i (ls_amo), .lsu_qrepd_i (1'b0), - .lsu_qmcast_i (addr_t'(csr_mcast_q)), + .lsu_quser_i ({csr_user_high_q, csr_user_low_q}), .lsu_qvalid_i (lsu_qvalid), .lsu_qready_o (lsu_qready), .lsu_pdata_o (ld_result), diff --git a/hw/snitch/src/snitch_lsu.sv b/hw/snitch/src/snitch_lsu.sv index 4ae44ad049..8a0efa9882 100644 --- a/hw/snitch/src/snitch_lsu.sv +++ b/hw/snitch/src/snitch_lsu.sv @@ -11,6 +11,7 @@ module snitch_lsu #( parameter int unsigned AddrWidth = 32, parameter int unsigned DataWidth = 32, + parameter int unsigned UserWidth = 0, /// Tag passed from input to output. All transactions are in-order. parameter type tag_t = logic [4:0], /// Number of outstanding memory transactions. @@ -37,7 +38,8 @@ module snitch_lsu #( parameter type drsp_t = logic, /// Derived parameter *Do not override* parameter type addr_t = logic [AddrWidth-1:0], - parameter type data_t = logic [DataWidth-1:0] + parameter type data_t = logic [DataWidth-1:0], + parameter type user_t = logic [UserWidth-1:0] ) ( input logic clk_i, input logic rst_i, @@ -50,7 +52,7 @@ module snitch_lsu #( input logic [1:0] lsu_qsize_i, input reqrsp_pkg::amo_op_e lsu_qamo_i, input logic lsu_qrepd_i, // Whether this is a sequencer repetition - input addr_t lsu_qmcast_i, // Multicast mask + input user_t lsu_quser_i, // User field for the axi transmission input logic lsu_qvalid_i, output logic lsu_qready_o, // response channel @@ -254,7 +256,7 @@ module snitch_lsu #( assign data_req_o.q_valid = lsu_postcaq_qvalid & (lsu_qwrite_i | ~laq_full) & ~mem_full; assign data_req_o.q.write = lsu_qwrite_i; assign data_req_o.q.addr = lsu_qaddr_i; - assign data_req_o.q.mask = lsu_qmcast_i; + assign data_req_o.q.user = lsu_quser_i; assign data_req_o.q.amo = lsu_qamo_i; assign data_req_o.q.size = lsu_qsize_i; diff --git a/hw/snitch/src/snitch_pkg.sv b/hw/snitch/src/snitch_pkg.sv index 5bc04f7909..d1e9397ee5 100644 --- a/hw/snitch/src/snitch_pkg.sv +++ b/hw/snitch/src/snitch_pkg.sv @@ -141,16 +141,18 @@ package snitch_pkg; // Slaves on Cluster DMA AXI Bus typedef enum int unsigned { - TCDMDMA = 0, - SoCDMAOut = 1, - ZeroMemory = 2, - BootRom = 3 + SoCDMAOut = 0, + BootRom = 1 } cluster_slave_dma_e; + typedef enum logic { + TCDMDMA = 0, + ToSoC = 1 + } dma_e; + typedef enum int unsigned { - SoCDMAIn = 32'd0, - SDMAMst = 32'd1, - ICache = 32'd2 + SDMAMst = 32'd0, + ICache = 32'd1 } cluster_master_dma_e; /// Possible interconnect implementations. diff --git a/hw/snitch_cluster/src/snitch_cc.sv b/hw/snitch_cluster/src/snitch_cc.sv index 31584cd1cd..98fe73771b 100644 --- a/hw/snitch_cluster/src/snitch_cc.sv +++ b/hw/snitch_cluster/src/snitch_cc.sv @@ -40,6 +40,14 @@ module snitch_cc #( parameter type axi_aw_chan_t = logic, parameter type axi_req_t = logic, parameter type axi_rsp_t = logic, + parameter type init_req_chan_t = logic, + parameter type init_rsp_chan_t = logic, + parameter type init_req_t = logic, + parameter type init_rsp_t = logic, + parameter type obi_a_chan_t = logic, + parameter type obi_r_chan_t = logic, + parameter type obi_req_t = logic, + parameter type obi_rsp_t = logic, parameter type hive_req_t = logic, parameter type hive_rsp_t = logic, parameter type acc_req_t = logic, @@ -116,7 +124,8 @@ module snitch_cc #( /// Derived parameter *Do not override* parameter int unsigned TCDMPorts = (NumSsrs > 1 ? NumSsrs : 1), parameter type addr_t = logic [AddrWidth-1:0], - parameter type data_t = logic [DataWidth-1:0] + parameter type data_t = logic [DataWidth-1:0], + parameter type addr_rule_t = axi_pkg::xbar_rule_64_t ) ( input logic clk_i, input logic clk_d2_i, @@ -137,6 +146,8 @@ module snitch_cc #( // DMA ports output axi_req_t [DMANumChannels-1:0] axi_dma_req_o, input axi_rsp_t [DMANumChannels-1:0] axi_dma_res_i, + output obi_req_t [DMANumChannels-1:0] obi_dma_req_o, + input obi_rsp_t [DMANumChannels-1:0] obi_dma_res_i, output logic [DMANumChannels-1:0] axi_dma_busy_o, output dma_events_t [DMANumChannels-1:0] axi_dma_events_o, // Core event strobes @@ -144,7 +155,9 @@ module snitch_cc #( input addr_t tcdm_addr_base_i, // Cluster HW barrier output logic barrier_o, - input logic barrier_i + input logic barrier_i, + // address decode map + input addr_rule_t [TCDMAliasEnable:0] dma_addr_rule_i ); // FMA architecture is "merged" -> mulexp and macexp instructions are supported @@ -286,6 +299,7 @@ module snitch_cc #( reqrsp_iso #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (64), .req_t (dreq_t), .rsp_t (drsp_t), .BypassReq (!RegisterCoreReq), @@ -391,20 +405,32 @@ module snitch_cc #( .NumAxInFlight (DMANumAxInFlight), .DMAReqFifoDepth (DMAReqFifoDepth), .NumChannels (DMANumChannels), + .TCDMAliasEnable (TCDMAliasEnable), .DMATracing (1), .axi_ar_chan_t (axi_ar_chan_t), .axi_aw_chan_t (axi_aw_chan_t), .axi_req_t (axi_req_t), .axi_res_t (axi_rsp_t), + .init_req_chan_t (init_req_chan_t), + .init_rsp_chan_t (init_rsp_chan_t), + .init_req_t (init_req_t), + .init_rsp_t (init_rsp_t), + .obi_a_chan_t (obi_a_chan_t), + .obi_r_chan_t (obi_r_chan_t), + .obi_req_t (obi_req_t), + .obi_res_t (obi_rsp_t), .acc_req_t (acc_req_t), .acc_res_t (acc_resp_t), - .dma_events_t (dma_events_t) + .dma_events_t (dma_events_t), + .addr_rule_t (addr_rule_t) ) i_idma_inst64_top ( .clk_i, .rst_ni, .testmode_i ( 1'b0 ), .axi_req_o ( axi_dma_req_o ), .axi_res_i ( axi_dma_res_i ), + .obi_req_o ( obi_dma_req_o ), + .obi_res_i ( obi_dma_res_i ), .busy_o ( axi_dma_busy_o ), .acc_req_i ( acc_snitch_req ), .acc_req_valid_i ( dma_qvalid ), @@ -413,7 +439,8 @@ module snitch_cc #( .acc_res_valid_o ( dma_pvalid ), .acc_res_ready_i ( dma_pready ), .hart_id_i ( hart_id_i ), - .events_o ( axi_dma_events_o ) + .events_o ( axi_dma_events_o ), + .addr_map_i ( dma_addr_rule_i ) ); // no DMA instanciated @@ -556,6 +583,7 @@ module snitch_cc #( .NrPorts (2), .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (64), .req_t (dreq_t), .rsp_t (drsp_t), // TODO(zarubaf): Wire-up to top-level. @@ -665,6 +693,7 @@ module snitch_cc #( reqrsp_to_tcdm #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (64), // TODO(zarubaf): Make a parameter. .BufDepth (4), .reqrsp_req_t (dreq_t), diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index 4188beab48..e4fc1af931 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -18,6 +18,8 @@ `include "snitch_vm/typedef.svh" +`include "obi/typedef.svh" + /// Snitch many-core cluster with improved TCDM interconnect. /// Snitch Cluster Top-Level. module snitch_cluster @@ -309,6 +311,9 @@ module snitch_cluster localparam int unsigned NumTCDMIn = NrTCDMPortsCores + 1; localparam logic [PhysicalAddrWidth-1:0] TCDMMask = ~(TCDMSizeNapotRounded - 1); + // User widths + localparam int unsigned CoreUserWidth = 64; + // Core Requests, SoC Request, PTW. localparam int unsigned NrNarrowMasters = 3; localparam int unsigned NarrowIdWidthOut = $clog2(NrNarrowMasters) + NarrowIdWidthIn; @@ -318,11 +323,11 @@ module snitch_cluster localparam int unsigned NrRules = (1 + AliasRegionEnable) * NrRuleIdcs; // DMA X-BAR configuration - // SoC in Request, DMA Channels, `n` instruction caches. - localparam int unsigned NrWideMasters = 1 + DMANumChannels + NrHives; + // DMA Channels, `n` instruction caches. + localparam int unsigned NrWideMasters = DMANumChannels + NrHives; localparam int unsigned WideIdWidthOut = $clog2(NrWideMasters) + WideIdWidthIn; - // TCDM, SoC out, ZeroMemory, (Bootrom) - localparam int unsigned NrWideSlaves = 3 + IntBootromEnable; + // SoC out, (Bootrom) + localparam int unsigned NrWideSlaves = 1 + IntBootromEnable; localparam int unsigned NrWideRuleIdcs = NrWideSlaves - 1; localparam int unsigned NrWideRules = (1 + AliasRegionEnable) * NrWideRuleIdcs; @@ -403,13 +408,14 @@ module snitch_cluster typedef logic [PhysicalAddrWidth-1:0] addr_t; typedef logic [NarrowDataWidth-1:0] data_t; typedef logic [NarrowDataWidth/8-1:0] strb_t; + typedef logic [CoreUserWidth-1:0] user_t; typedef logic [WideDataWidth-1:0] data_dma_t; typedef logic [WideDataWidth/8-1:0] strb_dma_t; typedef logic [NarrowIdWidthIn-1:0] id_mst_t; typedef logic [NarrowIdWidthOut-1:0] id_slv_t; typedef logic [WideIdWidthIn-1:0] id_dma_mst_t; typedef logic [WideIdWidthOut-1:0] id_dma_slv_t; - typedef logic [NarrowUserWidth-1:0] user_t; + typedef logic [NarrowUserWidth-1:0] user_narrow_t; typedef struct packed { logic [WideUserWidth-1:0] collective_mask; } user_dma_t; @@ -425,8 +431,8 @@ module snitch_cluster typedef logic [CoreIDWidth:0] tcdm_user_t; // Regbus peripherals. - `AXI_TYPEDEF_ALL(axi_mst, addr_t, id_mst_t, data_t, strb_t, user_t) - `AXI_TYPEDEF_ALL(axi_slv, addr_t, id_slv_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_ALL(axi_mst, addr_t, id_mst_t, data_t, strb_t, user_narrow_t) + `AXI_TYPEDEF_ALL(axi_slv, addr_t, id_slv_t, data_t, strb_t, user_narrow_t) `AXI_TYPEDEF_ALL(axi_mst_dma, addr_t, id_dma_mst_t, data_dma_t, strb_dma_t, user_dma_t) `AXI_TYPEDEF_ALL(axi_slv_dma, addr_t, id_dma_slv_t, data_dma_t, strb_dma_t, user_dma_t) @@ -434,13 +440,51 @@ module snitch_cluster `APB_TYPEDEF_ALL(apb, addr_t, data_t, strb_t) - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + // Reqrsp interface of the core has a 64b user field + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) + // Reqrsp interface in the cluster additionally contains the cluster ID + // (used for atomic operations) in the user field + `REQRSP_TYPEDEF_ALL(reqrsp_amo, addr_t, data_t, strb_t, user_narrow_t) `MEM_TYPEDEF_ALL(mem, tcdm_mem_addr_t, data_t, strb_t, tcdm_user_t) `MEM_TYPEDEF_ALL(mem_dma, tcdm_mem_addr_t, data_dma_t, strb_dma_t, logic) `TCDM_TYPEDEF_ALL(tcdm, tcdm_addr_t, data_t, strb_t, tcdm_user_t) + // Memory Init typedefs + typedef struct packed { + logic [PhysicalAddrWidth-1:0] cfg; + logic [WideDataWidth-1:0] term; + logic [WideDataWidth/8-1:0] strb; + logic [WideIdWidthOut-1:0] id; + } init_req_chan_t; + + typedef struct packed { + init_req_chan_t req_chan; + logic req_valid; + logic rsp_ready; + } init_req_t; + + typedef struct packed { + logic [WideDataWidth-1:0] init; + } init_rsp_chan_t; + + typedef struct packed { + init_rsp_chan_t rsp_chan; + logic rsp_valid; + logic req_ready; + } init_rsp_t; + + // OBI typedefs + `OBI_TYPEDEF_MINIMAL_A_OPTIONAL(a_opt_t) + `OBI_TYPEDEF_MINIMAL_R_OPTIONAL(r_opt_t) + + `OBI_TYPEDEF_TYPE_A_CHAN_T(obi_a_chan_t, addr_t, data_dma_t, strb_dma_t, id_dma_mst_t, a_opt_t) + `OBI_TYPEDEF_TYPE_R_CHAN_T(obi_r_chan_t, data_dma_t, id_dma_slv_t, r_opt_t) + + `OBI_TYPEDEF_REQ_T(obi_dma_req_t, obi_a_chan_t) + `OBI_TYPEDEF_RSP_T(obi_dma_rsp_t, obi_r_chan_t) + // Event counter increments for the TCDM. typedef struct packed { /// Number requests going in @@ -535,6 +579,7 @@ module snitch_cluster assign cluster_periph_start_address = IntBootromEnable ? bootrom_end_address : tcdm_end_address; assign cluster_periph_end_address = cluster_periph_start_address + ClusterPeriphSize * 1024; + // Zero mem is no longer needed, should we get rid of this address range? addr_t zero_mem_start_address, zero_mem_end_address; assign zero_mem_start_address = cluster_periph_end_address; assign zero_mem_end_address = cluster_periph_end_address + ZeroMemorySize * 1024; @@ -568,6 +613,9 @@ module snitch_cluster axi_mst_req_t [NrNarrowMasters-1:0] narrow_axi_mst_req; axi_mst_resp_t [NrNarrowMasters-1:0] narrow_axi_mst_rsp; + axi_mst_dma_req_t soc_in_axi_req; + axi_mst_dma_resp_t soc_in_axi_rsp; + // DMA AXI buses axi_mst_dma_req_t [NrWideMasters-1:0] wide_axi_mst_req; axi_mst_dma_resp_t [NrWideMasters-1:0] wide_axi_mst_rsp; @@ -600,6 +648,9 @@ module snitch_cluster dma_events_t [DMANumChannels-1:0] dma_events; icache_l0_events_t [NrCores-1:0] icache_events; + tcdm_dma_req_t [DMANumChannels-1:0] tcdm_dma_req; + tcdm_dma_rsp_t [DMANumChannels-1:0] tcdm_dma_rsp; + // 4. Memory Subsystem (Core side). reqrsp_req_t [NrCores-1:0] core_req; reqrsp_rsp_t [NrCores-1:0] core_rsp; @@ -618,6 +669,11 @@ module snitch_cluster logic [NrCores-1:0] barrier_in; logic barrier_out; + // OBI + obi_dma_req_t [NrCores-1:0][DMANumChannels-1:0] obi_dma_req; + obi_dma_rsp_t [NrCores-1:0][DMANumChannels-1:0] obi_dma_res; + + // ------------- // DMA Subsystem // ------------- @@ -654,11 +710,10 @@ module snitch_cluster .rst_ni (rst_ni), .slv_req_i (wide_in_req_i), .slv_resp_o (wide_in_resp_o), - .mst_req_o (wide_axi_mst_req[SoCDMAIn]), - .mst_resp_i (wide_axi_mst_rsp[SoCDMAIn]) + .mst_req_o (soc_in_axi_req), + .mst_resp_i (soc_in_axi_rsp) ); - logic [WideSlaveIdxBits-1:0] dma_xbar_default_port; assign dma_xbar_default_port = SoCDMAOut; @@ -669,33 +724,42 @@ module snitch_cluster end_addr: zero_mem_end_address }; - xbar_rule_t [5:0] dma_xbar_rules; + xbar_rule_t [1:0] dma_xbar_rules; xbar_rule_t [DmaXbarCfg.NoAddrRules-1:0] enabled_dma_xbar_rule; assign dma_xbar_rules = '{ '{idx: BootRom, start_addr: BootRomAliasStart, end_addr: BootRomAliasEnd}, - '{idx: ZeroMemory, start_addr: ZeroMemAliasStart, end_addr: ZeroMemAliasEnd}, + '{idx: BootRom, start_addr: bootrom_start_address, end_addr: bootrom_end_address} + }; + + always_comb begin + automatic int unsigned i = 0; + if (IntBootromEnable) enabled_dma_xbar_rule[i] = dma_xbar_rules[0]; i++; // Bootrom + if (AliasRegionEnable) begin + if (IntBootromEnable) enabled_dma_xbar_rule[i] = dma_xbar_rules[1]; // Bootrom Alias + end + end + + // dma address rules + xbar_rule_t [1:0] dma_addr_rule; + xbar_rule_t [AliasRegionEnable:0] enabled_dma_addr_rule; + + assign dma_addr_rule = '{ '{idx: TCDMDMA, start_addr: TCDMAliasStart, end_addr: TCDMAliasEnd}, - '{idx: BootRom, start_addr: bootrom_start_address, end_addr: bootrom_end_address}, - '{idx: ZeroMemory, start_addr: zero_mem_start_address, end_addr: zero_mem_end_address}, '{idx: TCDMDMA, start_addr: tcdm_start_address, end_addr: tcdm_end_address} }; always_comb begin automatic int unsigned i = 0; - enabled_dma_xbar_rule[i] = dma_xbar_rules[0]; i++; // TCDM - enabled_dma_xbar_rule[i] = dma_xbar_rules[1]; i++; // ZeroMemory - if (IntBootromEnable) enabled_dma_xbar_rule[i] = dma_xbar_rules[2]; i++; // Bootrom + enabled_dma_addr_rule[i] = dma_addr_rule[0]; i++; // TCDM if (AliasRegionEnable) begin - enabled_dma_xbar_rule[i] = dma_xbar_rules[3]; i++; // TCDM Alias - enabled_dma_xbar_rule[i] = dma_xbar_rules[4]; i++; // ZeroMemory Alias - if (IntBootromEnable) enabled_dma_xbar_rule[i] = dma_xbar_rules[5]; // Bootrom Alias + enabled_dma_addr_rule[i] = dma_addr_rule[1]; i++; // TCDM Alias end end localparam bit [DmaXbarCfg.NoSlvPorts-1:0] DMAEnableDefaultMstPort = '1; if (EnableDMAMulticast) begin : gen_mcast_dma_xbar - axi_mcast_xbar #( + axi_mcast_xbar #( .Cfg (DmaMcastXbarCfg), .ATOPs (0), .slv_aw_chan_t (axi_mst_dma_aw_chan_t), @@ -712,7 +776,7 @@ module snitch_cluster .mst_req_t (axi_slv_dma_req_t), .mst_resp_t (axi_slv_dma_resp_t), .rule_t (xbar_rule_t) - ) i_axi_dma_xbar ( + ) i_axi_dma_xbar ( .clk_i (clk_i), .rst_ni (rst_ni), .test_i (1'b0), @@ -753,42 +817,25 @@ module snitch_cluster .addr_map_i (enabled_dma_xbar_rule), .en_default_mst_port_i (DMAEnableDefaultMstPort), .default_mst_port_i ({DmaXbarCfg.NoSlvPorts{dma_xbar_default_port}}) - ); - end - - axi_zero_mem #( - .axi_req_t (axi_slv_dma_req_t), - .axi_resp_t (axi_slv_dma_resp_t), - .AddrWidth (PhysicalAddrWidth), - .DataWidth (WideDataWidth), - .IdWidth (WideIdWidthOut), - .NumBanks (1), - .BufDepth (1) - ) i_axi_zeromem ( - .clk_i, - .rst_ni, - .busy_o (), - .axi_req_i (wide_axi_slv_req[ZeroMemory]), - .axi_resp_o (wide_axi_slv_rsp[ZeroMemory]) ); + end addr_t ext_dma_req_q_addr_nontrunc; axi_to_mem_interleaved #( - .axi_req_t (axi_slv_dma_req_t), - .axi_resp_t (axi_slv_dma_resp_t), + .axi_req_t (axi_mst_dma_req_t), + .axi_resp_t (axi_mst_dma_resp_t), .AddrWidth (PhysicalAddrWidth), .DataWidth (WideDataWidth), .IdWidth (WideIdWidthOut), .NumBanks (1), .BufDepth (MemoryMacroLatency + 1) - ) i_axi_to_mem_dma ( + ) i_axi_to_mem_soc_in ( .clk_i, .rst_ni, .busy_o (), - .test_i (1'b0), - .axi_req_i (wide_axi_slv_req[TCDMDMA]), - .axi_resp_o (wide_axi_slv_rsp[TCDMDMA]), + .axi_req_i ( soc_in_axi_req ), + .axi_resp_o ( soc_in_axi_rsp ), .mem_req_o (ext_dma_req.q_valid), .mem_gnt_i (ext_dma_rsp.q_ready), .mem_addr_o (ext_dma_req_q_addr_nontrunc), @@ -800,12 +847,46 @@ module snitch_cluster .mem_rdata_i (ext_dma_rsp.p.data) ); + + // ------------ + // TCDM Arbiter + // ------------ + for (genvar i = 0; i < NrCores; i++) begin : gen_core_obi_to_tcdm + // This currently assumes only one DMA core is present in the system. However this limitation + // could easily be overcome by adapting the number of inputs to the i_dma_interconnect + // according to the number of DMA cores present. + if (Xdma[i]) begin : gen_dma_obi_to_tcdm + obi_to_tcdm #( + .obi_req_t (obi_dma_req_t), + .obi_rsp_t (obi_dma_rsp_t), + .tcdm_req_t (tcdm_dma_req_t), + .tcdm_rsp_t (tcdm_dma_rsp_t), + .AddrWidth (PhysicalAddrWidth), + .DataWidth (WideDataWidth), + .IdWidth (WideIdWidthOut), + .UserWidth (CoreUserWidth), + .BufDepth (MemoryMacroLatency + 1), + .NumChannels (DMANumChannels) + ) i_obi_to_tcdm ( + .clk_i, + .rst_ni, + .obi_req_i (obi_dma_req[i]), + .obi_rsp_o (obi_dma_res[i]), + .tcdm_req_o (tcdm_dma_req), + .tcdm_rsp_i (tcdm_dma_rsp) + ); + end else begin : gen_dma_obi_to_tcdm_stub + assign obi_dma_res[i] = '0; + end + end + assign ext_dma_req.q.addr = tcdm_addr_t'(ext_dma_req_q_addr_nontrunc); assign ext_dma_req.q.amo = reqrsp_pkg::AMONone; assign ext_dma_req.q.user = '0; + localparam int unsigned NumDMAIcoInputs = DMANumChannels + 1; snitch_tcdm_interconnect #( - .NumInp (1), + .NumInp (NumDMAIcoInputs), .NumOut (NrSuperBanks), .NumHyperBanks (NrHyperBanks), .tcdm_req_t (tcdm_dma_req_t), @@ -820,8 +901,8 @@ module snitch_cluster ) i_dma_interconnect ( .clk_i, .rst_ni, - .req_i (ext_dma_req), - .rsp_o (ext_dma_rsp), + .req_i ({ext_dma_req, tcdm_dma_req}), + .rsp_o ({ext_dma_rsp, tcdm_dma_rsp}), .mem_req_o (sb_dma_req), .mem_rsp_i (sb_dma_rsp) ); @@ -1030,6 +1111,14 @@ module snitch_cluster .axi_aw_chan_t (axi_mst_dma_aw_chan_t), .axi_req_t (axi_mst_dma_req_t), .axi_rsp_t (axi_mst_dma_resp_t), + .init_req_chan_t (init_req_chan_t), + .init_rsp_chan_t (init_rsp_chan_t), + .init_req_t (init_req_t), + .init_rsp_t (init_rsp_t), + .obi_a_chan_t (obi_a_chan_t), + .obi_r_chan_t (obi_r_chan_t), + .obi_req_t (obi_dma_req_t), + .obi_rsp_t (obi_dma_rsp_t), .hive_req_t (hive_req_t), .hive_rsp_t (hive_rsp_t), .acc_req_t (acc_req_t), @@ -1079,7 +1168,8 @@ module snitch_cluster .CaqTagWidth (CaqTagWidth), .DebugSupport (DebugSupport), .TCDMAliasEnable (AliasRegionEnable), - .TCDMAliasStart (TCDMAliasStart) + .TCDMAliasStart (TCDMAliasStart), + .addr_rule_t (xbar_rule_t) ) i_snitch_cc ( .clk_i, .clk_d2_i (clk_d2), @@ -1096,12 +1186,15 @@ module snitch_cluster .tcdm_rsp_i (tcdm_rsp[TcdmPortsOffs+:TcdmPorts]), .axi_dma_req_o (axi_dma_req), .axi_dma_res_i (axi_dma_res), + .obi_dma_req_o (obi_dma_req[i]), + .obi_dma_res_i (obi_dma_res[i]), .axi_dma_busy_o (), .axi_dma_events_o (dma_core_events), .core_events_o (core_events[i]), .tcdm_addr_base_i (tcdm_start_address), .barrier_o (barrier_in[i]), - .barrier_i (barrier_out) + .barrier_i (barrier_out), + .dma_addr_rule_i (enabled_dma_addr_rule) ); for (genvar j = 0; j < TcdmPorts; j++) begin : gen_tcdm_user always_comb begin @@ -1181,6 +1274,7 @@ module snitch_cluster .NrPorts (NrHives), .AddrWidth (PhysicalAddrWidth), .DataWidth (NarrowDataWidth), + .UserWidth (CoreUserWidth), .req_t (reqrsp_req_t), .rsp_t (reqrsp_rsp_t), .RespDepth (2) @@ -1196,7 +1290,6 @@ module snitch_cluster reqrsp_to_axi #( .DataWidth (NarrowDataWidth), - .UserWidth (NarrowUserWidth), .reqrsp_req_t (reqrsp_req_t), .reqrsp_rsp_t (reqrsp_rsp_t), .axi_req_t (axi_mst_req_t), @@ -1204,7 +1297,6 @@ module snitch_cluster ) i_reqrsp_to_axi_ptw ( .clk_i, .rst_ni, - .user_i ('0), .reqrsp_req_i (ptw_to_axi_req), .reqrsp_rsp_o (ptw_to_axi_rsp), .axi_req_o (narrow_axi_mst_req[PTW]), @@ -1229,13 +1321,14 @@ module snitch_cluster user_t cluster_user; // Atomic ID, needs to be unique ID of cluster // cluster_id + HartIdOffset + 1 (because 0 is for non-atomic masters) - assign cluster_user = (core_to_axi_req.q.mask << AtomicIdWidth) | + assign cluster_user = (core_to_axi_req.q.user << AtomicIdWidth) | ((hart_base_id_i / NrCores) + (hart_base_id_i % NrCores) + 1'b1); reqrsp_mux #( .NrPorts (NrCores), .AddrWidth (PhysicalAddrWidth), .DataWidth (NarrowDataWidth), + .UserWidth (CoreUserWidth), .req_t (reqrsp_req_t), .rsp_t (reqrsp_rsp_t), .RespDepth (2) @@ -1249,19 +1342,34 @@ module snitch_cluster .idx_o (/*unused*/) ); + + reqrsp_amo_req_t core_to_axi_amo_req; + reqrsp_amo_rsp_t core_to_axi_amo_rsp; + + always_comb begin + core_to_axi_amo_req.q.addr = core_to_axi_req.q.addr; + core_to_axi_amo_req.q.write = core_to_axi_req.q.write; + core_to_axi_amo_req.q.amo = core_to_axi_req.q.amo; + core_to_axi_amo_req.q.data = core_to_axi_req.q.data; + core_to_axi_amo_req.q.strb = core_to_axi_req.q.strb; + core_to_axi_amo_req.q.user = cluster_user; + core_to_axi_amo_req.q.size = core_to_axi_req.q.size; + core_to_axi_amo_req.q_valid = core_to_axi_req.q_valid; + core_to_axi_amo_req.p_ready = core_to_axi_req.p_ready; + core_to_axi_rsp = core_to_axi_amo_rsp; + end + reqrsp_to_axi #( .DataWidth (NarrowDataWidth), - .UserWidth (NarrowUserWidth), - .reqrsp_req_t (reqrsp_req_t), - .reqrsp_rsp_t (reqrsp_rsp_t), + .reqrsp_req_t (reqrsp_amo_req_t), + .reqrsp_rsp_t (reqrsp_amo_rsp_t), .axi_req_t (axi_mst_req_t), .axi_rsp_t (axi_mst_resp_t) ) i_reqrsp_to_axi_core ( .clk_i, .rst_ni, - .user_i (cluster_user), - .reqrsp_req_i (core_to_axi_req), - .reqrsp_rsp_o (core_to_axi_rsp), + .reqrsp_req_i (core_to_axi_amo_req), + .reqrsp_rsp_o (core_to_axi_amo_rsp), .axi_req_o (narrow_axi_mst_req[CoreReq]), .axi_rsp_i (narrow_axi_mst_rsp[CoreReq]) ); diff --git a/hw/snitch_cluster/src/snitch_cluster_pkg.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_pkg.sv.tpl index 04fbfe890a..d9ed748e76 100644 --- a/hw/snitch_cluster/src/snitch_cluster_pkg.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_pkg.sv.tpl @@ -55,7 +55,7 @@ package ${cfg['cluster']['name']}_pkg; localparam int unsigned NrNarrowMasters = 3; localparam int unsigned NarrowIdWidthOut = $clog2(NrNarrowMasters) + NarrowIdWidthIn; - localparam int unsigned NrWideMasters = 1 + ${cfg['cluster']['dma_nr_channels']} + ${cfg['cluster']['nr_hives']}; + localparam int unsigned NrWideMasters = ${cfg['cluster']['dma_nr_channels']} + ${cfg['cluster']['nr_hives']}; localparam int unsigned WideIdWidthIn = ${cfg['cluster']['dma_id_width_in']}; localparam int unsigned WideIdWidthOut = $clog2(NrWideMasters) + WideIdWidthIn; diff --git a/hw/snitch_cluster/src/snitch_fp_ss.sv b/hw/snitch_cluster/src/snitch_fp_ss.sv index d0e49cc1d0..345e1eb3ad 100644 --- a/hw/snitch_cluster/src/snitch_fp_ss.sv +++ b/hw/snitch_cluster/src/snitch_fp_ss.sv @@ -2711,7 +2711,7 @@ module snitch_fp_ss import snitch_pkg::*; #( .lsu_qsize_i (ls_size), .lsu_qamo_i (reqrsp_pkg::AMONone), .lsu_qrepd_i (acc_req_repd_q), - .lsu_qmcast_i ('0), + .lsu_quser_i ('0), .lsu_qvalid_i (lsu_qvalid), .lsu_qready_o (lsu_qready), .lsu_pdata_o (ld_result), diff --git a/hw/tcdm_interface/src/axi_to_tcdm.sv b/hw/tcdm_interface/src/axi_to_tcdm.sv index c7502019ad..4eeb18dbfe 100644 --- a/hw/tcdm_interface/src/axi_to_tcdm.sv +++ b/hw/tcdm_interface/src/axi_to_tcdm.sv @@ -13,6 +13,7 @@ module axi_to_tcdm #( parameter type tcdm_rsp_t = logic, parameter int unsigned AddrWidth = 0, parameter int unsigned DataWidth = 0, + parameter int unsigned UserWidth = 0, parameter int unsigned IdWidth = 0, parameter int unsigned BufDepth = 1 ) ( @@ -27,8 +28,9 @@ module axi_to_tcdm #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) reqrsp_req_t reqrsp_req; reqrsp_rsp_t reqrsp_rsp; @@ -55,6 +57,7 @@ module axi_to_tcdm #( reqrsp_to_tcdm #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (UserWidth), .BufDepth (BufDepth), .reqrsp_req_t (reqrsp_req_t), .reqrsp_rsp_t (reqrsp_rsp_t), diff --git a/hw/tcdm_interface/src/obi_to_tcdm.sv b/hw/tcdm_interface/src/obi_to_tcdm.sv new file mode 100644 index 0000000000..ef92ff3af8 --- /dev/null +++ b/hw/tcdm_interface/src/obi_to_tcdm.sv @@ -0,0 +1,58 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Lucia Luzi + +`include "reqrsp_interface/typedef.svh" + +/// Convert OBI to TCDM protocol. +module obi_to_tcdm #( + parameter type obi_req_t = logic, + parameter type obi_rsp_t = logic, + parameter type tcdm_req_t = logic, + parameter type tcdm_rsp_t = logic, + parameter int unsigned AddrWidth = 0, + parameter int unsigned DataWidth = 0, + parameter int unsigned IdWidth = 0, + parameter int unsigned UserWidth = 0, + parameter int unsigned BufDepth = 1, + parameter int unsigned NumChannels = 1 +) ( + input logic clk_i, + input logic rst_ni, + input obi_req_t [NumChannels-1:0] obi_req_i, + output obi_rsp_t [NumChannels-1:0] obi_rsp_o, + output tcdm_req_t [NumChannels-1:0] tcdm_req_o, + input tcdm_rsp_t [NumChannels-1:0] tcdm_rsp_i +); + + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; + + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) + + for (genvar i = 0; i < NumChannels; i++) begin : gen_tcdm_obi_adapt + assign tcdm_req_o[i].q_valid = obi_req_i[i].req; + assign tcdm_req_o[i].q = '{ + addr: obi_req_i[i].a.addr, + write: obi_req_i[i].a.we, + amo: reqrsp_pkg::AMONone, + data: obi_req_i[i].a.wdata, + strb: obi_req_i[i].a.be, + user: '0 + }; + + assign obi_rsp_o[i].r = '{ + rdata: tcdm_rsp_i[i].p.data, + rid: '0, + err: 1'b0, + r_optional: '0 + }; + assign obi_rsp_o[i].gnt = tcdm_rsp_i[i].q_ready; + assign obi_rsp_o[i].rvalid = tcdm_rsp_i[i].p_valid; + end + +endmodule diff --git a/hw/tcdm_interface/src/reqrsp_to_tcdm.sv b/hw/tcdm_interface/src/reqrsp_to_tcdm.sv index 0e2c476faa..ca73c40dd1 100644 --- a/hw/tcdm_interface/src/reqrsp_to_tcdm.sv +++ b/hw/tcdm_interface/src/reqrsp_to_tcdm.sv @@ -10,6 +10,7 @@ module reqrsp_to_tcdm #( parameter int unsigned AddrWidth = 0, parameter int unsigned DataWidth = 0, + parameter int unsigned UserWidth = 0, parameter int unsigned BufDepth = 2, parameter type reqrsp_req_t = logic, parameter type reqrsp_rsp_t = logic, @@ -27,8 +28,9 @@ module reqrsp_to_tcdm #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(rr, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(rr, addr_t, data_t, strb_t, user_t) rr_req_chan_t req; rr_rsp_chan_t rsp; @@ -58,7 +60,7 @@ module reqrsp_to_tcdm #( amo: req.amo, data: req.data, strb: req.strb, - user: '0 + user: req.user }; assign rsp = '{ @@ -77,7 +79,7 @@ endmodule module reqrsp_to_tcdm_intf #( parameter int unsigned AddrWidth = 0, parameter int unsigned DataWidth = 0, - parameter type user_t = logic, + parameter int unsigned UserWidth = 0, parameter int unsigned BufDepth = 2 ) ( input logic clk_i, @@ -89,8 +91,9 @@ module reqrsp_to_tcdm_intf #( typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [DataWidth/8-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; - `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t) + `REQRSP_TYPEDEF_ALL(reqrsp, addr_t, data_t, strb_t, user_t) `TCDM_TYPEDEF_ALL(tcdm, addr_t, data_t, strb_t, user_t) reqrsp_req_t reqrsp_req; @@ -102,6 +105,7 @@ module reqrsp_to_tcdm_intf #( reqrsp_to_tcdm #( .AddrWidth (AddrWidth), .DataWidth (DataWidth), + .UserWidth (UserWidth), .BufDepth (BufDepth), .reqrsp_req_t (reqrsp_req_t), .reqrsp_rsp_t (reqrsp_rsp_t), diff --git a/hw/tcdm_interface/test/reqrsp_to_tcdm_tb.sv b/hw/tcdm_interface/test/reqrsp_to_tcdm_tb.sv index d7b6608cbf..fdd68fc83c 100644 --- a/hw/tcdm_interface/test/reqrsp_to_tcdm_tb.sv +++ b/hw/tcdm_interface/test/reqrsp_to_tcdm_tb.sv @@ -50,8 +50,7 @@ module reqrsp_to_tcdm_tb import reqrsp_pkg::*; #( reqrsp_to_tcdm_intf #( .AddrWidth (AW), .DataWidth (DW), - .BufDepth (BufDepth), - .user_t (logic) + .BufDepth (BufDepth) ) i_dut ( .clk_i (clk), .rst_ni (rst_n), diff --git a/iis-setup.sh b/iis-setup.sh index 189060618a..cf847b8a9e 100755 --- a/iis-setup.sh +++ b/iis-setup.sh @@ -8,7 +8,7 @@ export CC=gcc-9.2.0 export CXX=g++-9.2.0 export SN_BENDER=bender-0.28.1 export SN_VCS_SEPP=vcs-2024.09 -export SN_VERILATOR_SEPP=oseda +export SN_VERILATOR_SEPP="oseda -2025.03" export SN_QUESTA_SEPP=questa-2023.4 export SN_LLVM_BINROOT=/usr/scratch2/vulcano/colluca/tools/riscv32-snitch-llvm-almalinux8-15.0.0-snitch-0.2.0/bin diff --git a/sw/deps/riscv-opcodes b/sw/deps/riscv-opcodes index 8874980e33..676282b811 160000 --- a/sw/deps/riscv-opcodes +++ b/sw/deps/riscv-opcodes @@ -1 +1 @@ -Subproject commit 8874980e337ad757e983ab90622bb29b2fbb87b4 +Subproject commit 676282b8112f746945e81554d796793637d43bfa diff --git a/sw/runtime/src/dma.h b/sw/runtime/src/dma.h index 163402d2ae..62f7e253e3 100644 --- a/sw/runtime/src/dma.h +++ b/sw/runtime/src/dma.h @@ -9,6 +9,22 @@ #pragma once +#define OP_CUSTOM1 0b0101011 +#define XDMA_FUNCT3 0b000 +#define DMSRC_FUNCT7 0b0000000 +#define DMDST_FUNCT7 0b0000001 +#define DMCPYI_FUNCT7 0b0000010 +#define DMCPY_FUNCT7 0b0000011 +#define DMSTATI_FUNCT7 0b0000100 +#define DMSTAT_FUNCT7 0b0000101 +#define DMSTR_FUNCT7 0b0000110 +#define DMREP_FUNCT7 0b0000111 +#define DMINIT_FUNCT7 0b0001000 + +#define R_TYPE_ENCODE(funct7, rs2, rs1, funct3, rd, opcode) \ + ((funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | \ + (opcode)) + #include /// A DMA transfer identifier. @@ -298,6 +314,72 @@ inline void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len) { } } + +/** + * @brief Fast memset function performed by DMA with the dminit instruction. + * @param ptr Pointer to the start of the region. + * @param value Value to set. + * @param size The size of the transfer in bytes. + * @param channel The index of the channel. + */ +inline uint32_t snrt_dma_memset_init_1d(uint64_t ptr, uint8_t value, uint32_t size, uint32_t channel) { + register uint32_t reg_dst_low asm("a0") = ptr >> 0; // 10 + register uint32_t reg_dst_high asm("a1") = ptr >> 32; // 11 + register uint32_t reg_value asm("a2") = value; // 12 + register uint32_t reg_txid asm("a3"); // 13 + register uint32_t reg_size asm("a4") = size; // 14 + + + // dmdst a0, a1 + asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10, + XDMA_FUNCT3, 0, OP_CUSTOM1)), + "r"(reg_dst_high), "r"(reg_dst_low)); + + + + if (value == 0x00) { + // register uint32_t cfg asm("a5") = channel << 2; // 15 + uint32_t cfg = channel << 2; + // dminit a3, a4, channel | 0b00 + asm volatile(".word %1\n" + : "=r"(reg_txid) + : "i"(R_TYPE_ENCODE(DMINIT_FUNCT7, cfg, 14, XDMA_FUNCT3, + 10, OP_CUSTOM1)), + "r"(reg_size)); + + } + else if (value == 0xff) { + uint32_t cfg = channel << 2 | 1; // 15 + + // dminit a3, a4, channel | 0b01 + asm volatile(".word %1\n" + : "=r"(reg_txid) + : "i"(R_TYPE_ENCODE(DMINIT_FUNCT7, cfg, 14, XDMA_FUNCT3, + 10, OP_CUSTOM1)), + "r"(reg_size)); + } + else { + uint32_t cfg = channel << 2 | 2; // 15 + + // dmsrc value, 0 + asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 0, 12, + XDMA_FUNCT3, 0, OP_CUSTOM1)), + "r"(reg_value)); + + // dminit a3, a4, channel | 0b10 + asm volatile(".word %1\n" + : "=r"(reg_txid) + : "i"(R_TYPE_ENCODE(DMINIT_FUNCT7, cfg, 14, XDMA_FUNCT3, + 10, OP_CUSTOM1)), + "r"(reg_size)); + + } + + + return reg_txid; +} + + /** * @brief Load a tile of a 1D array. * @param dst Pointer to the tile destination. diff --git a/sw/runtime/src/dump.h b/sw/runtime/src/dump.h index 0de36219ea..4786bf0362 100644 --- a/sw/runtime/src/dump.h +++ b/sw/runtime/src/dump.h @@ -24,4 +24,4 @@ asm volatile("csrw " #reg ", %0" ::"rK"(val)); \ } -#define DUMP(val) ({ asm volatile("csrw 0x7C4, %0" ::"rK"(val)); }) +#define DUMP(val) ({ asm volatile("csrw 0x7C6, %0" ::"rK"(val)); }) diff --git a/sw/tests/src/dma_dminit.c b/sw/tests/src/dma_dminit.c new file mode 100644 index 0000000000..8d67121895 --- /dev/null +++ b/sw/tests/src/dma_dminit.c @@ -0,0 +1,44 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include + +// Allocate a buffer in the main memory which we will use to copy data around +// with the DMA. +uint32_t buffer[32]; + +int main() { + if (snrt_global_core_idx() != 8) return 0; // only DMA core + uint32_t errors = 0; + + // Populate buffers. + uint32_t buffer_src[32], buffer_dst[32]; + for (uint32_t i = 0; i < 32; i++) { + buffer[i] = 0xAAAAAAAA; + buffer_dst[i] = 0x55555555; + buffer_src[i] = 0x55555555; + } + + uint8_t byte = 0x11; + // Write data to main memory. + snrt_dma_txid_t id = + snrt_dma_memset_init_1d((uint64_t)buffer, 0x55, sizeof(buffer), 0); + snrt_dma_wait_all_channels(0); + + // Check that the main memory buffer contains the correct data. + for (uint32_t i = 0; i < 32; i++) { + errors += (buffer[i] != buffer_src[i]); + } + + // Write data to L1. + id = snrt_dma_memset_init_1d((uint64_t)buffer_dst, 0xff, sizeof(buffer), 0); + snrt_dma_wait_all_channels(0); + + // Check that the L1 buffer contains the correct data. + for (uint32_t i = 0; i < 32; i++) { + errors += (buffer_dst[i] != 0xffffffff); + } + + return errors; +} diff --git a/test/run.yaml b/test/run.yaml index f990baa54d..35f1209087 100644 --- a/test/run.yaml +++ b/test/run.yaml @@ -10,6 +10,7 @@ runs: - elf: ../sw/tests/build/communicator.elf - elf: ../sw/tests/build/data_mover.elf simulators: [vsim, vcs, verilator] + - elf: ../sw/tests/build/dma_dminit.elf - elf: ../sw/tests/build/dma_empty_transfer.elf - elf: ../sw/tests/build/dma_1d.elf - elf: ../sw/tests/build/dma_2d.elf diff --git a/util/trace/gen_trace.py b/util/trace/gen_trace.py index ff23ba224d..335e4d2340 100755 --- a/util/trace/gen_trace.py +++ b/util/trace/gen_trace.py @@ -559,7 +559,7 @@ def update_dma(insn, extras, dma_trans): pass elif mnemonic == 'dmrep': dma_trans[-1]['rep'] = extras['opa'] - elif mnemonic in ['dmcpy', 'dmcpyi']: + elif mnemonic in ['dmcpy', 'dmcpyi', 'dminit']: # Create new placeholder transaction to inherit current DMA settings dma_trans.append(dma_trans[-1].copy()) # Set size of the transaction