diff --git a/CMakeLists.txt b/CMakeLists.txt index d3fdd7d36..d588730c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -742,6 +742,7 @@ add_subdirectory(providers/mthca) add_subdirectory(providers/ocrdma) add_subdirectory(providers/qedr) add_subdirectory(providers/vmw_pvrdma) +add_subdirectory(providers/zrdma) endif() add_subdirectory(providers/hfi1verbs) diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install index 42e939aed..b091668e4 100644 --- a/debian/ibverbs-providers.install +++ b/debian/ibverbs-providers.install @@ -5,3 +5,4 @@ usr/lib/*/libibverbs/lib*-rdmav*.so usr/lib/*/libmana.so.* usr/lib/*/libmlx4.so.* usr/lib/*/libmlx5.so.* +usr/lib/*/libzrdma.so.* diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols index d0da2e11f..103dd0ca5 100644 --- a/debian/ibverbs-providers.symbols +++ b/debian/ibverbs-providers.symbols @@ -188,3 +188,13 @@ libmana.so.1 ibverbs-providers #MINVER# MANA_1.0@MANA_1.0 41 manadv_init_obj@MANA_1.0 41 manadv_set_context_attr@MANA_1.0 41 +* Build-Depends-Package: libibverbs-dev +ZRDMA_1.0@ZRDMA_1.0 57 + zxdh_get_log_trace_switch@ZRDMA_1.0 57 +ZRDMA_1.1@ZRDMA_1.1 57 + zxdh_set_log_trace_switch@ZRDMA_1.1 57 + zxdh_modify_qp_udp_sport@ZRDMA_1.1 57 + zxdh_query_qpc@ZRDMA_1.1 57 + zxdh_modify_qpc@ZRDMA_1.1 57 + zxdh_reset_qp@ZRDMA_1.1 57 + diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install index e2009f5d0..1d62b8d9a 100644 --- a/debian/libibverbs-dev.install +++ b/debian/libibverbs-dev.install @@ -13,6 +13,8 @@ usr/include/infiniband/sa.h usr/include/infiniband/tm_types.h usr/include/infiniband/verbs.h usr/include/infiniband/verbs_api.h +usr/include/infiniband/zxdh_devids.h +usr/include/infiniband/zxdh_dv.h usr/lib/*/lib*-rdmav*.a usr/lib/*/libefa.a usr/lib/*/libefa.so @@ -26,12 +28,15 @@ usr/lib/*/libmlx4.a usr/lib/*/libmlx4.so usr/lib/*/libmlx5.a usr/lib/*/libmlx5.so +usr/lib/*/libzrdma.a +usr/lib/*/libzrdma.so usr/lib/*/pkgconfig/libefa.pc usr/lib/*/pkgconfig/libhns.pc usr/lib/*/pkgconfig/libibverbs.pc usr/lib/*/pkgconfig/libmana.pc usr/lib/*/pkgconfig/libmlx4.pc usr/lib/*/pkgconfig/libmlx5.pc +usr/lib/*//pkgconfig/libzrdma.pc usr/share/man/man3/efadv_*.3 usr/share/man/man3/hnsdv_*.3 usr/share/man/man3/ibv_* diff --git a/debian/libibverbs1.install b/debian/libibverbs1.install index 83bdd802b..1544c43ff 100644 --- a/debian/libibverbs1.install +++ b/debian/libibverbs1.install @@ -1,2 +1,4 @@ usr/lib/*/libibverbs*.so.* +usr/lib/*/libzrdma.so.* +usr/lib/*/libzrdma.a usr/share/doc/rdma-core/libibverbs.md usr/share/doc/libibverbs1/ diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt index 82c191cad..9ceac3169 100644 --- a/kernel-headers/CMakeLists.txt +++ b/kernel-headers/CMakeLists.txt @@ -26,6 +26,9 @@ publish_internal_headers(rdma rdma/rvt-abi.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h + rdma/zxdh-abi.h + rdma/zxdh_user_ioctl_cmds.h + rdma/zxdh_user_ioctl_verbs.h ) publish_internal_headers(rdma/hfi @@ -80,6 +83,7 @@ rdma_kernel_provider_abi( rdma/rdma_user_rxe.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h + rdma/zxdh-abi.h ) publish_headers(infiniband diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h index fe15bc7e9..17e63269f 100644 --- a/kernel-headers/rdma/ib_user_ioctl_verbs.h +++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h @@ -255,6 +255,7 @@ enum rdma_driver_id { RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, RDMA_DRIVER_MANA, + RDMA_DRIVER_ZXDH, }; enum ib_uverbs_gid_type { diff --git a/kernel-headers/rdma/zxdh-abi.h b/kernel-headers/rdma/zxdh-abi.h new file mode 100644 index 000000000..36c3343c9 --- /dev/null +++ b/kernel-headers/rdma/zxdh-abi.h @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ZXDH_ABI_H +#define ZXDH_ABI_H + +#include <linux/types.h> + +/* user-space whose last ABI ver is 5 */ +#define ZXDH_ABI_VER 5 +#define ZXDH_CONTEXT_VER_V1 5 + +enum zxdh_memreg_type { + ZXDH_MEMREG_TYPE_MEM = 0, + ZXDH_MEMREG_TYPE_QP = 1, + ZXDH_MEMREG_TYPE_CQ = 2, + ZXDH_MEMREG_TYPE_SRQ = 3, +}; + +enum zxdh_db_addr_type { + ZXDH_DB_ADDR_PHY = 0, + ZXDH_DB_ADDR_BAR = 1, +}; + +struct zxdh_alloc_ucontext_req { + __u32 rsvd32; + __u8 userspace_ver; + __u8 rsvd8[3]; +}; + +struct zxdh_alloc_ucontext_resp { + __u32 max_pds; + __u32 max_qps; + __u32 wq_size; /* size of the WQs (SQ+RQ) in the mmaped area */ + __u8 kernel_ver; + __u8 db_addr_type; + __u16 rdma_tool_flags; + __aligned_u64 feature_flags; + __aligned_u64 sq_db_mmap_key; + __aligned_u64 cq_db_mmap_key; + __aligned_u64 sq_db_pa; + __aligned_u64 cq_db_pa; + __u32 max_hw_wq_frags; + __u32 max_hw_read_sges; + __u32 max_hw_inline; + __u32 max_hw_rq_quanta; + __u32 max_hw_srq_quanta; + __u32 max_hw_wq_quanta; + __u32 max_hw_srq_wr; + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u8 rsvd; + __u8 chip_rev; +}; + +struct zxdh_alloc_pd_resp { + __u32 pd_id; + __u8 rsvd[4]; +}; + +struct zxdh_resize_cq_req { + __aligned_u64 user_cq_buffer; +}; + +struct zxdh_create_cq_req { + __aligned_u64 user_cq_buf; + __aligned_u64 user_shadow_area; +}; + +struct zxdh_create_qp_req { + __aligned_u64 user_wqe_bufs; + __aligned_u64 user_compl_ctx; +}; + +struct zxdh_create_srq_req { + __aligned_u64 user_wqe_bufs; + __aligned_u64 user_compl_ctx; + __aligned_u64 user_wqe_list; + __aligned_u64 user_wqe_db; +}; + +struct zxdh_mem_reg_req { + __u32 reg_type; /* enum zxdh_memreg_type */ + __u32 cq_pages; + __u32 rq_pages; + __u32 sq_pages; + __u32 srq_pages; + __u16 srq_list_pages; + __u8 rsvd[2]; +}; + +struct zxdh_reg_mr_resp { + __u32 mr_pa_low; + __u32 mr_pa_hig; + __u16 host_page_size; + __u16 leaf_pbl_size; + __u8 rsvd[4]; +}; + +struct zxdh_modify_qp_req { + __u8 sq_flush; + __u8 rq_flush; + __u8 rsvd[6]; +}; + +struct zxdh_create_cq_resp { + __u32 cq_id; + __u32 cq_size; +}; + +struct zxdh_create_qp_resp { + __u32 qp_id; + __u32 actual_sq_size; + __u32 actual_rq_size; + __u32 zxdh_drv_opt; + __u16 push_idx; + __u8 lsmm; + __u8 rsvd; + __u32 qp_caps; +}; + +struct zxdh_create_srq_resp { + __u32 srq_id; + __u32 actual_srq_size; + __u32 actual_srq_list_size; + __u8 rsvd[4]; +}; + +struct zxdh_modify_qp_resp { + __aligned_u64 push_wqe_mmap_key; + __aligned_u64 push_db_mmap_key; + __u16 push_offset; + __u8 push_valid; + __u8 rsvd[5]; +}; + +struct zxdh_create_ah_resp { + __u32 ah_id; + __u8 rsvd[4]; +}; +#endif /* ZXDH_ABI_H */ diff --git a/kernel-headers/rdma/zxdh_user_ioctl_cmds.h b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h new file mode 100644 index 000000000..d8525e0d3 --- /dev/null +++ b/kernel-headers/rdma/zxdh_user_ioctl_cmds.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ZXDH_USER_IOCTL_CMDS_H +#define ZXDH_USER_IOCTL_CMDS_H + +#include <linux/types.h> +#include <rdma/ib_user_ioctl_cmds.h> + +enum zxdh_ib_dev_get_log_trace_attrs { + ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum zxdh_ib_dev_set_log_trace_attrs { + ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum zxdh_ib_dev_methods { + ZXDH_IB_METHOD_DEV_GET_LOG_TRACE = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, +}; + +enum zxdh_ib_qp_modify_udp_sport_attrs { + ZXDH_IB_ATTR_QP_UDP_PORT = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_ATTR_QP_QPN, +}; + +enum zxdh_ib_qp_query_qpc_attrs { + ZXDH_IB_ATTR_QP_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_ATTR_QP_QUERY_RESP, +}; + +enum zxdh_ib_qp_modify_qpc_attrs { + ZXDH_IB_ATTR_QP_MODIFY_QPC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, + ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, +}; + +enum zxdh_ib_qp_reset_qp_attrs { + ZXDH_IB_ATTR_QP_RESET_QP_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_ATTR_QP_RESET_OP_CODE, +}; + +enum zxdh_ib_qp_methods { + ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_METHOD_QP_QUERY_QPC, + ZXDH_IB_METHOD_QP_MODIFY_QPC, + ZXDH_IB_METHOD_QP_RESET_QP, +}; + +enum zxdh_ib_objects { + ZXDH_IB_OBJECT_DEV = (1U << UVERBS_ID_NS_SHIFT), + ZXDH_IB_OBJECT_QP_OBJ, + ZXDH_IB_OBJECT_DEVICE_EX, +}; + +#endif diff --git a/kernel-headers/rdma/zxdh_user_ioctl_verbs.h b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h new file mode 100644 index 000000000..dde545825 --- /dev/null +++ b/kernel-headers/rdma/zxdh_user_ioctl_verbs.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ZXDH_USER_IOCTL_VERBS_H +#define ZXDH_USER_IOCTL_VERBS_H + +#include <linux/types.h> +#include <stdbool.h> + +//todo ailgn +struct zxdh_query_qpc_resp { + __u8 retry_flag; + __u8 rnr_retry_flag; + __u8 read_retry_flag; + __u8 cur_retry_count; + __u8 retry_cqe_sq_opcode; + __u8 err_flag; + __u8 ack_err_flag; + __u8 package_err_flag; + __u8 recv_err_flag; + __u8 retry_count; + __u32 tx_last_ack_psn; +}; + +struct zxdh_modify_qpc_req { + __u8 retry_flag; + __u8 rnr_retry_flag; + __u8 read_retry_flag; + __u8 cur_retry_count; + __u8 retry_cqe_sq_opcode; + __u8 err_flag; + __u8 ack_err_flag; + __u8 package_err_flag; +}; + +#endif diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 47cdc067e..3d8db6dc6 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -2267,6 +2267,7 @@ struct ibv_device **ibv_get_device_list(int *num_devices); none, none, none) struct verbs_devices_ops; +extern const struct verbs_device_ops verbs_provider_zrdma; extern const struct verbs_device_ops verbs_provider_bnxt_re; extern const struct verbs_device_ops verbs_provider_cxgb4; extern const struct verbs_device_ops verbs_provider_efa; diff --git a/providers/zrdma/CMakeLists.txt b/providers/zrdma/CMakeLists.txt new file mode 100644 index 000000000..18f9bf663 --- /dev/null +++ b/providers/zrdma/CMakeLists.txt @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2024 ZTE Corporation. + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") +rdma_shared_provider(zrdma libzrdma.map + 1 1.1.${PACKAGE_VERSION} + zxdh_hw.c + zxdh_zrdma.c + zxdh_verbs.c + private_verbs_cmd.c +) + +include_directories(${NL_INCLUDE_DIRS}) + +publish_headers(infiniband + zxdh_dv.h + zxdh_devids.h +) + +rdma_pkg_config("zrdma" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") + diff --git a/providers/zrdma/libzrdma.map b/providers/zrdma/libzrdma.map new file mode 100644 index 000000000..f95de4b3b --- /dev/null +++ b/providers/zrdma/libzrdma.map @@ -0,0 +1,16 @@ +/* Export symbols should be added below according to + Documentation/versioning.md document. */ +ZRDMA_1.0 { + global: + zxdh_get_log_trace_switch; + local: *; +}; + +ZRDMA_1.1 { + global: + zxdh_set_log_trace_switch; + zxdh_modify_qp_udp_sport; + zxdh_query_qpc; + zxdh_modify_qpc; + zxdh_reset_qp; +} ZRDMA_1.0; diff --git a/providers/zrdma/private_verbs_cmd.c b/providers/zrdma/private_verbs_cmd.c new file mode 100644 index 000000000..361471b63 --- /dev/null +++ b/providers/zrdma/private_verbs_cmd.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <rdma/zxdh_user_ioctl_cmds.h> +#include <rdma/zxdh_user_ioctl_verbs.h> +#include "private_verbs_cmd.h" +#include "zxdh_dv.h" + +static void copy_query_qpc(struct zxdh_query_qpc_resp *resp, + struct zxdh_rdma_qpc *qpc) +{ + qpc->ack_err_flag = resp->ack_err_flag; + qpc->retry_flag = resp->retry_flag; + qpc->rnr_retry_flag = resp->rnr_retry_flag; + qpc->cur_retry_count = resp->cur_retry_count; + qpc->retry_cqe_sq_opcode = resp->retry_cqe_sq_opcode; + qpc->err_flag = resp->err_flag; + qpc->package_err_flag = resp->package_err_flag; + qpc->recv_err_flag = resp->recv_err_flag; + qpc->tx_last_ack_psn = resp->tx_last_ack_psn; + qpc->retry_count = resp->retry_count; + qpc->read_retry_flag = resp->read_retry_flag; +} + +static int _zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc) +{ + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, + ZXDH_IB_METHOD_QP_QUERY_QPC, 2); + int ret; + struct zxdh_query_qpc_resp resp_ex = { 0 }; + + fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle); + fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_QP_QUERY_RESP, &resp_ex); + + ret = execute_ioctl(qp->context, cmd); + if (ret) + return ret; + + copy_query_qpc(&resp_ex, qpc); + return 0; +} + +static void copy_modify_qpc_fields(struct zxdh_modify_qpc_req *req_cmd, + uint64_t attr_mask, + struct zxdh_rdma_qpc *qpc) +{ + if (attr_mask & ZXDH_TX_READ_RETRY_FLAG_SET) { + req_cmd->retry_flag = qpc->retry_flag; + req_cmd->rnr_retry_flag = qpc->rnr_retry_flag; + req_cmd->read_retry_flag = qpc->read_retry_flag; + req_cmd->cur_retry_count = qpc->cur_retry_count; + } + if (attr_mask & ZXDH_RETRY_CQE_SQ_OPCODE) + req_cmd->retry_cqe_sq_opcode = qpc->retry_cqe_sq_opcode; + + if (attr_mask & ZXDH_ERR_FLAG_SET) { + req_cmd->err_flag = qpc->err_flag; + req_cmd->ack_err_flag = qpc->ack_err_flag; + } + if (attr_mask & ZXDH_PACKAGE_ERR_FLAG) + req_cmd->package_err_flag = qpc->package_err_flag; +} + +static int _zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode) +{ + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, + ZXDH_IB_METHOD_QP_RESET_QP, 2); + fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_RESET_QP_HANDLE, qp->handle); + fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_RESET_OP_CODE, opcode); + return execute_ioctl(qp->context, cmd); +} + +static int _zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, + uint64_t qpc_mask) +{ + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, + ZXDH_IB_METHOD_QP_MODIFY_QPC, 3); + struct zxdh_modify_qpc_req req = { 0 }; + + copy_modify_qpc_fields(&req, qpc_mask, qpc); + fill_attr_in_obj(cmd, ZXDH_IB_ATTR_QP_QUERY_HANDLE, qp->handle); + fill_attr_in_uint64(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_MASK, qpc_mask); + fill_attr_in_ptr(cmd, ZXDH_IB_ATTR_QP_MODIFY_QPC_REQ, &req); + return execute_ioctl(qp->context, cmd); +} + +static int _zxdh_modify_qp_udp_sport(struct ibv_context *ibctx, + uint16_t udp_sport, uint32_t qpn) +{ + if (udp_sport <= MIN_UDP_SPORT || qpn <= MIN_QP_QPN) + return -EINVAL; + + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_QP_OBJ, + ZXDH_IB_METHOD_QP_MODIFY_UDP_SPORT, 2); + fill_attr_in(cmd, ZXDH_IB_ATTR_QP_UDP_PORT, &udp_sport, + sizeof(udp_sport)); + fill_attr_in_uint32(cmd, ZXDH_IB_ATTR_QP_QPN, qpn); + return execute_ioctl(ibctx, cmd); +} + +static int _zxdh_get_log_trace_switch(struct ibv_context *ibctx, + uint8_t *switch_status) +{ + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV, + ZXDH_IB_METHOD_DEV_GET_LOG_TRACE, 1); + + fill_attr_out_ptr(cmd, ZXDH_IB_ATTR_DEV_GET_LOG_TARCE_SWITCH, + switch_status); + return execute_ioctl(ibctx, cmd); +} + +static int _zxdh_set_log_trace_switch(struct ibv_context *ibctx, + uint8_t switch_status) +{ + DECLARE_COMMAND_BUFFER(cmd, ZXDH_IB_OBJECT_DEV, + ZXDH_IB_METHOD_DEV_SET_LOG_TRACE, 1); + fill_attr_in(cmd, ZXDH_IB_ATTR_DEV_SET_LOG_TARCE_SWITCH, &switch_status, + sizeof(switch_status)); + return execute_ioctl(ibctx, cmd); +} + +static struct zxdh_uvcontext_ops zxdh_ctx_ops = { + .modify_qp_udp_sport = _zxdh_modify_qp_udp_sport, + .get_log_trace_switch = _zxdh_get_log_trace_switch, + .set_log_trace_switch = _zxdh_set_log_trace_switch, + .query_qpc = _zxdh_query_qpc, + .modify_qpc = _zxdh_modify_qpc, + .reset_qp = _zxdh_reset_qp, +}; + +static inline struct zxdh_uvcontext *to_zxdhtx(struct ibv_context *ibctx) +{ + return container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context); +} + +int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode) +{ + struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; + + if (!dvops || !dvops->reset_qp) + return -EOPNOTSUPP; + return dvops->reset_qp(qp, opcode); +} + +int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, + uint64_t qpc_mask) +{ + struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; + + if (!dvops || !dvops->modify_qpc) + return -EOPNOTSUPP; + return dvops->modify_qpc(qp, qpc, qpc_mask); +} + +int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc) +{ + struct zxdh_uvcontext_ops *dvops = to_zxdhtx(qp->context)->cxt_ops; + + if (!dvops || !dvops->query_qpc) + return -EOPNOTSUPP; + + return dvops->query_qpc(qp, qpc); +} + +void add_private_ops(struct zxdh_uvcontext *iwvctx) +{ + iwvctx->cxt_ops = &zxdh_ctx_ops; +} diff --git a/providers/zrdma/private_verbs_cmd.h b/providers/zrdma/private_verbs_cmd.h new file mode 100644 index 000000000..d1c20dd9f --- /dev/null +++ b/providers/zrdma/private_verbs_cmd.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ZXDH_RDMA_PRIVATE_VERBS_CMD_H +#define ZXDH_RDMA_PRIVATE_VERBS_CMD_H + +#include "zxdh_zrdma.h" +#include "zxdh_dv.h" + +struct zxdh_uvcontext_ops { + int (*modify_qp_udp_sport)(struct ibv_context *ibctx, + uint16_t udp_sport, uint32_t qpn); + int (*set_log_trace_switch)(struct ibv_context *ibctx, + uint8_t switch_status); + int (*get_log_trace_switch)(struct ibv_context *ibctx, + uint8_t *switch_status); + int (*query_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc); + int (*modify_qpc)(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, + uint64_t qpc_mask); + int (*reset_qp)(struct ibv_qp *qp, uint64_t opcode); +}; + +void add_private_ops(struct zxdh_uvcontext *iwvctx); + +#endif diff --git a/providers/zrdma/zxdh_abi.h b/providers/zrdma/zxdh_abi.h new file mode 100644 index 000000000..81bf384e1 --- /dev/null +++ b/providers/zrdma/zxdh_abi.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ZXDH_ABI_H__ +#define __ZXDH_ABI_H__ + +#include <infiniband/kern-abi.h> +#include <rdma/zxdh-abi.h> +#include <kernel-abi/zxdh-abi.h> +#include "zxdh_verbs.h" + +#define ZXDH_MIN_ABI_VERSION 0 +#define ZXDH_MAX_ABI_VERSION 5 + +DECLARE_DRV_CMD(zxdh_ualloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty, + zxdh_alloc_pd_resp); +DECLARE_DRV_CMD(zxdh_ucreate_cq, IB_USER_VERBS_CMD_CREATE_CQ, + zxdh_create_cq_req, zxdh_create_cq_resp); +DECLARE_DRV_CMD(zxdh_ucreate_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, + zxdh_create_cq_req, zxdh_create_cq_resp); +DECLARE_DRV_CMD(zxdh_uresize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, + zxdh_resize_cq_req, empty); +DECLARE_DRV_CMD(zxdh_ucreate_qp, IB_USER_VERBS_CMD_CREATE_QP, + zxdh_create_qp_req, zxdh_create_qp_resp); +DECLARE_DRV_CMD(zxdh_umodify_qp, IB_USER_VERBS_EX_CMD_MODIFY_QP, + zxdh_modify_qp_req, zxdh_modify_qp_resp); +DECLARE_DRV_CMD(zxdh_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, + zxdh_alloc_ucontext_req, zxdh_alloc_ucontext_resp); +DECLARE_DRV_CMD(zxdh_ureg_mr, IB_USER_VERBS_CMD_REG_MR, zxdh_mem_reg_req, + zxdh_reg_mr_resp); +DECLARE_DRV_CMD(zxdh_urereg_mr, IB_USER_VERBS_CMD_REREG_MR, zxdh_mem_reg_req, + empty); +DECLARE_DRV_CMD(zxdh_ucreate_ah, IB_USER_VERBS_CMD_CREATE_AH, empty, + zxdh_create_ah_resp); +DECLARE_DRV_CMD(zxdh_ucreate_srq, IB_USER_VERBS_CMD_CREATE_SRQ, + zxdh_create_srq_req, zxdh_create_srq_resp); +#endif /* __ZXDH_ABI_H__ */ diff --git a/providers/zrdma/zxdh_defs.h b/providers/zrdma/zxdh_defs.h new file mode 100644 index 000000000..c43c2ac8c --- /dev/null +++ b/providers/zrdma/zxdh_defs.h @@ -0,0 +1,396 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ZXDH_DEFS_H +#define ZXDH_DEFS_H +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <util/udma_barrier.h> +#include <util/util.h> +#include <linux/types.h> +#include <inttypes.h> +#include <pthread.h> +#include <endian.h> +#define ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK 1 +#define ZXDH_RECV_ERR_FLAG_READ_RESP 2 +#define ZXDH_RETRY_CQE_SQ_OPCODE_ERR 32 +#define ZXDH_QP_RETRY_COUNT 2 +#define ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR 0x1f + +#define ZXDH_QP_TYPE_ROCE_RC 1 +#define ZXDH_QP_TYPE_ROCE_UD 2 + +#define ZXDH_HW_PAGE_SIZE 4096 +#define ZXDH_HW_PAGE_SHIFT 12 +#define ZXDH_CQE_QTYPE_RQ 0 +#define ZXDH_CQE_QTYPE_SQ 1 + +#define ZXDH_MAX_SQ_WQES_PER_PAGE 128 +#define ZXDH_MAX_SQ_DEPTH 32768 + +#define ZXDH_QP_SW_MIN_WQSIZE 64u /* in WRs*/ +#define ZXDH_QP_WQE_MIN_SIZE 32 +#define ZXDH_QP_SQE_MIN_SIZE 32 +#define ZXDH_QP_RQE_MIN_SIZE 16 +#define ZXDH_QP_WQE_MAX_SIZE 256 +#define ZXDH_QP_WQE_MIN_QUANTA 1 +#define ZXDH_MAX_RQ_WQE_SHIFT_GEN1 2 +#define ZXDH_MAX_RQ_WQE_SHIFT_GEN2 3 +#define ZXDH_SRQ_FRAG_BYTESIZE 16 +#define ZXDH_QP_FRAG_BYTESIZE 16 +#define ZXDH_SQ_WQE_BYTESIZE 32 +#define ZXDH_SRQ_WQE_MIN_SIZE 16 + +#define ZXDH_SQ_RSVD 1 +#define ZXDH_RQ_RSVD 1 +#define ZXDH_SRQ_RSVD 1 + +#define ZXDH_FEATURE_RTS_AE 1ULL +#define ZXDH_FEATURE_CQ_RESIZE 2ULL +#define ZXDHQP_OP_RDMA_WRITE 0x00 +#define ZXDHQP_OP_RDMA_READ 0x01 +#define ZXDHQP_OP_RDMA_SEND 0x03 +#define ZXDHQP_OP_RDMA_SEND_INV 0x04 +#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT 0x05 +#define ZXDHQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06 +#define ZXDHQP_OP_BIND_MW 0x08 +#define ZXDHQP_OP_FAST_REGISTER 0x09 +#define ZXDHQP_OP_LOCAL_INVALIDATE 0x0a +#define ZXDHQP_OP_RDMA_READ_LOC_INV 0x0b +#define ZXDHQP_OP_NOP 0x0c + +#define ZXDH_CQPHC_QPCTX GENMASK_ULL(63, 0) +#define ZXDH_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) +#define ZXDH_CQ_DBSA_CQEIDX GENMASK_ULL(22, 0) +#define ZXDH_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(28, 23) +#define ZXDH_CQ_DBSA_ARM_NEXT BIT_ULL(31) +#define ZXDH_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(30, 29) +#define ZXDH_CQ_ARM_CQ_ID_S 10 +#define ZXDH_CQ_ARM_CQ_ID GENMASK_ULL(29, 10) +#define ZXDH_CQ_ARM_DBSA_VLD_S 30 +#define ZXDH_CQ_ARM_DBSA_VLD BIT_ULL(30) + +/* CQP and iWARP Completion Queue */ +#define ZXDH_CQ_QPCTX ZXDH_CQPHC_QPCTX + +#define ZXDH_CQ_MINERR GENMASK_ULL(22, 7) +#define ZXDH_CQ_MAJERR GENMASK_ULL(38, 23) +#define ZXDH_CQ_WQEIDX GENMASK_ULL(54, 40) +#define ZXDH_CQ_EXTCQE BIT_ULL(50) +#define ZXDH_OOO_CMPL BIT_ULL(54) +#define ZXDH_CQ_ERROR BIT_ULL(39) +#define ZXDH_CQ_SQ BIT_ULL(4) + +#define ZXDH_CQ_VALID BIT_ULL(5) +#define ZXDH_CQ_IMMVALID BIT_ULL(0) +#define ZXDH_CQ_UDSMACVALID BIT_ULL(26) +#define ZXDH_CQ_UDVLANVALID BIT_ULL(27) +#define ZXDH_CQ_IMMDATA GENMASK_ULL(31, 0) +#define ZXDH_CQ_UDSMAC GENMASK_ULL(47, 0) +#define ZXDH_CQ_UDVLAN GENMASK_ULL(63, 48) + +#define ZXDH_CQ_IMMDATA_S 0 +#define ZXDH_CQ_IMMDATA_M (0xffffffffffffffffULL << ZXDH_CQ_IMMVALID_S) +#define ZXDH_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) +#define ZXDH_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) +#define ZXDHCQ_PAYLDLEN GENMASK_ULL(63, 32) +#define ZXDHCQ_TCPSEQNUMRTT GENMASK_ULL(63, 32) +#define ZXDHCQ_INVSTAG_S 11 +#define ZXDHCQ_INVSTAG GENMASK_ULL(42, 11) +#define ZXDHCQ_QPID GENMASK_ULL(63, 44) + +#define ZXDHCQ_UDSRCQPN GENMASK_ULL(24, 1) +#define ZXDHCQ_PSHDROP BIT_ULL(51) +#define ZXDHCQ_STAG_S 43 +#define ZXDHCQ_STAG BIT_ULL(43) +#define ZXDHCQ_IPV4 BIT_ULL(25) +#define ZXDHCQ_SOEVENT BIT_ULL(6) +#define ZXDHCQ_OP GENMASK_ULL(63, 58) + +/* Manage Push Page - MPP */ +#define ZXDH_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff +#define ZXDH_INVALID_PUSH_PAGE_INDEX 0xffffffff + +#define ZXDHQPSQ_OPCODE GENMASK_ULL(62, 57) +#define ZXDHQPSQ_COPY_HOST_PBL BIT_ULL(43) +#define ZXDHQPSQ_ADDFRAGCNT GENMASK_ULL(39, 32) +#define ZXDHQPSQ_PUSHWQE BIT_ULL(56) +#define ZXDHQPSQ_STREAMMODE BIT_ULL(58) +#define ZXDHQPSQ_WAITFORRCVPDU BIT_ULL(59) +#define ZXDHQPSQ_READFENCE BIT_ULL(54) +#define ZXDHQPSQ_LOCALFENCE BIT_ULL(55) +#define ZXDHQPSQ_UDPHEADER BIT_ULL(61) +#define ZXDHQPSQ_L4LEN GENMASK_ULL(45, 42) +#define ZXDHQPSQ_SIGCOMPL BIT_ULL(56) +#define ZXDHQPSQ_SOLICITED BIT_ULL(53) +#define ZXDHQPSQ_VALID BIT_ULL(63) + +#define ZXDHQPSQ_FIRST_FRAG_VALID BIT_ULL(0) +#define ZXDHQPSQ_FIRST_FRAG_LEN GENMASK_ULL(31, 1) +#define ZXDHQPSQ_FIRST_FRAG_STAG GENMASK_ULL(63, 32) +#define ZXDHQPSQ_FRAG_TO ZXDH_CQPHC_QPCTX +#define ZXDHQPSQ_FRAG_VALID BIT_ULL(63) +#define ZXDHQPSQ_FRAG_LEN GENMASK_ULL(62, 32) +#define ZXDHQPSQ_FRAG_STAG GENMASK_ULL(31, 0) +#define ZXDHQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0) +#define ZXDHQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32) +#define ZXDHQPSQ_REMSTAGINV GENMASK_ULL(31, 0) +#define ZXDHQPSQ_DESTQKEY GENMASK_ULL(31, 0) +#define ZXDHQPSQ_DESTQPN GENMASK_ULL(55, 32) +#define ZXDHQPSQ_AHID GENMASK_ULL(18, 0) +#define ZXDHQPSQ_INLINEDATAFLAG BIT_ULL(63) +#define ZXDHQPSQ_UD_INLINEDATAFLAG BIT_ULL(50) +#define ZXDHQPSQ_UD_INLINEDATALEN GENMASK_ULL(49, 42) +#define ZXDHQPSQ_UD_ADDFRAGCNT GENMASK_ULL(36, 29) +#define ZXDHQPSQ_WRITE_INLINEDATAFLAG BIT_ULL(48) +#define ZXDHQPSQ_WRITE_INLINEDATALEN GENMASK_ULL(47, 40) + +#define ZXDH_INLINE_VALID_S 7 +#define ZXDHQPSQ_INLINE_VALID BIT_ULL(63) +#define ZXDHQPSQ_INLINEDATALEN GENMASK_ULL(62, 55) +#define ZXDHQPSQ_IMMDATAFLAG BIT_ULL(52) +#define ZXDHQPSQ_REPORTRTT BIT_ULL(46) + +#define ZXDHQPSQ_IMMDATA GENMASK_ULL(31, 0) +#define ZXDHQPSQ_REMSTAG_S 0 +#define ZXDHQPSQ_REMSTAG GENMASK_ULL(31, 0) + +#define ZXDHQPSQ_REMTO ZXDH_CQPHC_QPCTX + +#define ZXDHQPSQ_IMMDATA_VALID BIT_ULL(63) +#define ZXDHQPSQ_STAGRIGHTS GENMASK_ULL(50, 46) +#define ZXDHQPSQ_VABASEDTO BIT_ULL(51) +#define ZXDHQPSQ_MEMWINDOWTYPE BIT_ULL(52) + +#define ZXDHQPSQ_MWLEN ZXDH_CQPHC_QPCTX +#define ZXDHQPSQ_PARENTMRSTAG GENMASK_ULL(31, 0) +#define ZXDHQPSQ_MWSTAG GENMASK_ULL(31, 0) +#define ZXDHQPSQ_MW_PA_PBLE_ONE GENMASK_ULL(63, 46) +#define ZXDHQPSQ_MW_PA_PBLE_TWO GENMASK_ULL(63, 32) +#define ZXDHQPSQ_MW_PA_PBLE_THREE GENMASK_ULL(33, 32) +#define ZXDHQPSQ_MW_HOST_PAGE_SIZE GENMASK_ULL(40, 36) +#define ZXDHQPSQ_MW_LEAF_PBL_SIZE GENMASK_ULL(35, 34) +#define ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX GENMASK_ULL(41, 32) +#define ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX GENMASK_ULL(50, 42) + +#define ZXDHQPSQ_BASEVA_TO_FBO ZXDH_CQPHC_QPCTX + +#define ZXDHQPSQ_LOCSTAG GENMASK_ULL(31, 0) + +#define ZXDHQPSRQ_RSV GENMASK_ULL(63, 40) +#define ZXDHQPSRQ_VALID_SGE_NUM GENMASK_ULL(39, 32) +#define ZXDHQPSRQ_SIGNATURE GENMASK_ULL(31, 24) +#define ZXDHQPSRQ_NEXT_WQE_INDEX GENMASK_ULL(15, 0) +#define ZXDHQPSRQ_START_PADDING BIT_ULL(63) +#define ZXDHQPSRQ_FRAG_LEN GENMASK_ULL(62, 32) +#define ZXDHQPSRQ_FRAG_STAG GENMASK_ULL(31, 0) + +/* QP RQ WQE common fields */ +#define ZXDHQPRQ_SIGNATURE GENMASK_ULL(31, 16) +#define ZXDHQPRQ_ADDFRAGCNT ZXDHQPSQ_ADDFRAGCNT +#define ZXDHQPRQ_VALID ZXDHQPSQ_VALID +#define ZXDHQPRQ_COMPLCTX ZXDH_CQPHC_QPCTX +#define ZXDHQPRQ_FRAG_LEN ZXDHQPSQ_FRAG_LEN +#define ZXDHQPRQ_STAG ZXDHQPSQ_FRAG_STAG +#define ZXDHQPRQ_TO ZXDHQPSQ_FRAG_TO + +//QP RQ DBSA fields +#define ZXDHQPDBSA_RQ_POLARITY_S 15 +#define ZXDHQPDBSA_RQ_POLARITY BIT_ULL(15) +#define ZXDHQPDBSA_RQ_SW_HEAD_S 0 +#define ZXDHQPDBSA_RQ_SW_HEAD GENMASK_ULL(14, 0) + +#define ZXDHPFINT_OICR_HMC_ERR_M BIT(26) +#define ZXDHPFINT_OICR_PE_PUSH_M BIT(27) +#define ZXDHPFINT_OICR_PE_CRITERR_M BIT(28) + +#define ZXDH_SRQ_PARITY_SIGN_S 15 +#define ZXDH_SRQ_PARITY_SIGN BIT_ULL(15) +#define ZXDH_SRQ_SW_SRQ_HEAD_S 0 +#define ZXDH_SRQ_SW_SRQ_HEAD GENMASK_ULL(14, 0) +#define ZXDH_CQE_SQ_OPCODE_RESET BIT(5) + +#define ZXDH_CQP_INIT_WQE(wqe) memset(wqe, 0, 64) + +#define ZXDH_GET_CURRENT_CQ_ELEM(_cq) \ + ((_cq)->cq_base[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)].buf) +#define ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \ + (((struct zxdh_extended_cqe \ + *)((_cq)->cq_base))[ZXDH_RING_CURRENT_HEAD((_cq)->cq_ring)] \ + .buf) + +#define ZXDH_RING_INIT(_ring, _size) \ + { \ + (_ring).head = 0; \ + (_ring).tail = 0; \ + (_ring).size = (_size); \ + } +#define ZXDH_RING_SIZE(_ring) ((_ring).size) +#define ZXDH_RING_CURRENT_HEAD(_ring) ((_ring).head) +#define ZXDH_RING_CURRENT_TAIL(_ring) ((_ring).tail) + +#define ZXDH_RING_MOVE_HEAD(_ring, _retcode) \ + { \ + register __u32 size; \ + size = (_ring).size; \ + if (!ZXDH_RING_FULL_ERR(_ring)) { \ + (_ring).head = ((_ring).head + 1) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ZXDH_ERR_RING_FULL; \ + } \ + } +#define ZXDH_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ + { \ + register __u32 size; \ + size = (_ring).size; \ + if ((ZXDH_RING_USED_QUANTA(_ring) + (_count)) < size) { \ + (_ring).head = ((_ring).head + (_count)) % size; \ + (_retcode) = 0; \ + } else { \ + (_retcode) = ZXDH_ERR_RING_FULL; \ + } \ + } + +#define ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ + (_ring).head = ((_ring).head + (_count)) % (_ring).size + +#define ZXDH_RING_MOVE_TAIL(_ring) \ + (_ring).tail = ((_ring).tail + 1) % (_ring).size + +#define ZXDH_RING_MOVE_HEAD_NOCHECK(_ring) \ + (_ring).head = ((_ring).head + 1) % (_ring).size + +#define ZXDH_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ + (_ring).tail = ((_ring).tail + (_count)) % (_ring).size + +#define ZXDH_RING_SET_TAIL(_ring, _pos) (_ring).tail = (_pos) % (_ring).size + +#define ZXDH_RING_FULL_ERR(_ring) \ + ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 1))) + +#define ZXDH_ERR_RING_FULL2(_ring) \ + ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 2))) + +#define ZXDH_ERR_RING_FULL3(_ring) \ + ((ZXDH_RING_USED_QUANTA(_ring) == ((_ring).size - 3))) + +#define ZXDH_RING_MORE_WORK(_ring) ((ZXDH_RING_USED_QUANTA(_ring) != 0)) + +#define ZXDH_RING_USED_QUANTA(_ring) \ + ((((_ring).head + (_ring).size - (_ring).tail) % (_ring).size)) + +#define ZXDH_RING_FREE_QUANTA(_ring) \ + (((_ring).size - ZXDH_RING_USED_QUANTA(_ring) - 1)) + +#define ZXDH_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ + { \ + index = ZXDH_RING_CURRENT_HEAD(_ring); \ + ZXDH_RING_MOVE_HEAD(_ring, _retcode); \ + } + +enum zxdh_qp_wqe_size { + ZXDH_WQE_SIZE_32 = 32, + ZXDH_WQE_SIZE_64 = 64, + ZXDH_WQE_SIZE_96 = 96, + ZXDH_WQE_SIZE_128 = 128, + ZXDH_WQE_SIZE_256 = 256, +}; + +/** + * set_64bit_val - set 64 bit value to hw wqe + * @wqe_words: wqe addr to write + * @byte_index: index in wqe + * @val: value to write + **/ +static inline void set_64bit_val(__le64 *wqe_words, __u32 byte_index, __u64 val) +{ + wqe_words[byte_index >> 3] = htole64(val); +} + +/** + * set_32bit_val - set 32 bit value to hw wqe + * @wqe_words: wqe addr to write + * @byte_index: index in wqe + * @val: value to write + **/ +static inline void set_32bit_val(__le32 *wqe_words, __u32 byte_index, __u32 val) +{ + wqe_words[byte_index >> 2] = htole32(val); +} + +/** + * set_16bit_val - set 16 bit value to hw wqe + * @wqe_words: wqe addr to write + * @byte_index: index in wqe + * @val: value to write + **/ +static inline void set_16bit_val(__le16 *wqe_words, __u32 byte_index, __u16 val) +{ + wqe_words[byte_index >> 1] = htole16(val); +} + +/** + * get_64bit_val - read 64 bit value from wqe + * @wqe_words: wqe addr + * @byte_index: index to read from + * @val: read value + **/ +static inline void get_64bit_val(__le64 *wqe_words, __u32 byte_index, + __u64 *val) +{ + *val = le64toh(wqe_words[byte_index >> 3]); +} + +/** + * get_32bit_val - read 32 bit value from wqe + * @wqe_words: wqe addr + * @byte_index: index to reaad from + * @val: return 32 bit value + **/ +static inline void get_32bit_val(__le32 *wqe_words, __u32 byte_index, + __u32 *val) +{ + *val = le32toh(wqe_words[byte_index >> 2]); +} + +static inline void db_wr32(__u32 val, __u32 *wqe_word) +{ + *wqe_word = val; +} + +#define read_wqe_need_split(pre_cal_psn, next_psn, chip_rev) \ + (!(chip_rev == 2) && \ + (((pre_cal_psn < next_psn) && (pre_cal_psn != 0)) || \ + ((next_psn <= 0x7FFFFF) && (pre_cal_psn > 0x800000)))) +#endif /* ZXDH_DEFS_H */ diff --git a/providers/zrdma/zxdh_devids.h b/providers/zrdma/zxdh_devids.h new file mode 100644 index 000000000..abd88b479 --- /dev/null +++ b/providers/zrdma/zxdh_devids.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ZXDH_DEVIDS_H__ +#define __ZXDH_DEVIDS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ZXDH VENDOR ID */ +#define PCI_VENDOR_ID_ZXDH_EVB 0x16c3 +#define PCI_VENDOR_ID_ZXDH_E312 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_E312S_D 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_E310 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_E310_RDMA 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_E316 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_X512 0x1cf2 +#define PCI_VENDOR_ID_ZXDH_E312_TY_CLOUD 0x1b18 +#define PCI_VENDOR_ID_ZXDH_E310_TY_CLOUD 0x1b18 +/* ZXDH Devices ID */ +#define ZXDH_DEV_ID_ADAPTIVE_EVB_PF 0x8040 /* ZXDH EVB PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_EVB_VF 0x8041 /* ZXDH EVB VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312_PF 0x8049 /* ZXDH E312 PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312_VF 0x8060 /* ZXDH E312 VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_PF 0x8061 /* ZXDH E310 PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_VF 0x8062 /* ZXDH E310 VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_PF 0x8084 /* ZXDH E310_RDMA PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_VF 0x8085 /* ZXDH E310_RDMA VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E316_PF 0x807e /* ZXDH E316 PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E316_VF 0x807f /* ZXDH E316 VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_X512_PF 0x806B /* ZXDH X512 PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_X512_VF 0x806C /* ZXDH X512 VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312_TY_CLOUD_PF 0x1110 /* ZXDH E312 TY CLOUD PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312_TY_CLOUD_VF 0x1111 /* ZXDH E312 TY CLOUD VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_TY_CLOUD_PF 0x1100 /* ZXDH E310 TY CLOUD PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E310_TY_CLOUD_VF 0x1101 /* ZXDH E310 TY CLOUD VF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312S_D_PF 0x80a2 /* ZXDH E310 TY CLOUD PF DEVICE ID*/ +#define ZXDH_DEV_ID_ADAPTIVE_E312S_D_VF 0x80a3 /* ZXDH E310 TY CLOUD VF DEVICE ID*/ + +#ifdef __cplusplus +} +#endif + +#endif /* ZXDH_DEVIDS_H */ diff --git a/providers/zrdma/zxdh_dv.h b/providers/zrdma/zxdh_dv.h new file mode 100644 index 000000000..18f612a0b --- /dev/null +++ b/providers/zrdma/zxdh_dv.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ZXDH_DV_H_ +#define _ZXDH_DV_H_ + +#include <stdio.h> +#include <stdbool.h> +#include <linux/types.h> /* For the __be64 type */ +#include <sys/types.h> +#include <endian.h> +#if defined(__SSE3__) +#include <limits.h> +#include <emmintrin.h> +#include <tmmintrin.h> +#endif /* defined(__SSE3__) */ + +#include <infiniband/verbs.h> +#include <infiniband/tm_types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +enum switch_status { + SWITCH_CLOSE = 0, + SWITCH_OPEN = 1, + SWITCH_ERROR, +}; + +enum zxdh_qp_reset_qp_code { + ZXDH_RESET_RETRY_TX_ITEM_FLAG = 1, +}; + +enum zxdh_qp_modify_qpc_mask { + ZXDH_RETRY_CQE_SQ_OPCODE = 1 << 0, + ZXDH_ERR_FLAG_SET = 1 << 1, + ZXDH_PACKAGE_ERR_FLAG = 1 << 2, + ZXDH_TX_LAST_ACK_PSN = 1 << 3, + ZXDH_TX_LAST_ACK_WQE_OFFSET_SET = 1 << 4, + ZXDH_TX_READ_RETRY_FLAG_SET = 1 << 5, + ZXDH_TX_RDWQE_PYLD_LENGTH = 1 << 6, + ZXDH_TX_RECV_READ_FLAG_SET = 1 << 7, + ZXDH_TX_RD_MSG_LOSS_ERR_FLAG_SET = 1 << 8, +}; + +struct zxdh_rdma_qpc { + uint8_t retry_flag; + uint8_t rnr_retry_flag; + uint8_t read_retry_flag; + uint8_t cur_retry_count; + uint8_t retry_cqe_sq_opcode; + uint8_t err_flag; + uint8_t ack_err_flag; + uint8_t package_err_flag; + uint8_t recv_err_flag; + uint32_t tx_last_ack_psn; + uint8_t retry_count; +}; + +int zxdh_get_log_trace_switch(struct ibv_context *context, + enum switch_status *status); +int zxdh_set_log_trace_switch(struct ibv_context *context, + enum switch_status status); +int zxdh_modify_qp_udp_sport(struct ibv_context *context, uint16_t udp_sport, + uint32_t qpn); +int zxdh_query_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc); +int zxdh_modify_qpc(struct ibv_qp *qp, struct zxdh_rdma_qpc *qpc, + uint64_t qpc_mask); +int zxdh_reset_qp(struct ibv_qp *qp, uint64_t opcode); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/providers/zrdma/zxdh_hw.c b/providers/zrdma/zxdh_hw.c new file mode 100644 index 000000000..1eac64b0b --- /dev/null +++ b/providers/zrdma/zxdh_hw.c @@ -0,0 +1,2565 @@ +// SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "zxdh_status.h" +#include "zxdh_defs.h" +#include "zxdh_verbs.h" +#include "zxdh_zrdma.h" +#include <errno.h> +#include <ccan/container_of.h> +#include "private_verbs_cmd.h" +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#define ERROR_CODE_VALUE 65 + +static inline void qp_tx_psn_add(__u32 *x, __u32 y, __u16 mtu) +{ + if (y == 0) { + *x = (*x + 1) & 0xffffff; + return; + } + __u32 chunks = (y + mtu - 1) / mtu; + *x = (*x + chunks) & 0xffffff; +} + +/** + * zxdh_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ + * @frag_cnt: number of fragments + * @quanta: quanta for frag_cnt + */ +static inline enum zxdh_status_code zxdh_fragcnt_to_quanta_sq(__u32 frag_cnt, + __u16 *quanta) +{ + if (unlikely(frag_cnt > ZXDH_MAX_SQ_FRAG)) + return ZXDH_ERR_INVALID_FRAG_COUNT; + *quanta = (frag_cnt >> 1) + 1; + return 0; +} + +/** + * zxdh_set_fragment - set fragment in wqe + * @wqe: wqe for setting fragment + * @offset: offset value + * @sge: sge length and stag + * @valid: The wqe valid + */ +static void zxdh_set_fragment(__le64 *wqe, __u32 offset, struct zxdh_sge *sge, + __u8 valid) +{ + if (sge) { + set_64bit_val(wqe, offset + 8, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, sge->tag_off)); + set_64bit_val(wqe, offset, + FIELD_PREP(ZXDHQPSQ_VALID, valid) | + FIELD_PREP(ZXDHQPSQ_FRAG_LEN, sge->len) | + FIELD_PREP(ZXDHQPSQ_FRAG_STAG, + sge->stag)); + } else { + set_64bit_val(wqe, offset + 8, 0); + set_64bit_val(wqe, offset, FIELD_PREP(ZXDHQPSQ_VALID, valid)); + } +} + +/** + * zxdh_nop_1 - insert a NOP wqe + * @qp: hw qp ptr + */ +static enum zxdh_status_code zxdh_nop_1(struct zxdh_qp *qp) +{ + __u64 hdr; + __le64 *wqe; + __u32 wqe_idx; + bool signaled = false; + + if (!qp->sq_ring.head) + return ZXDH_ERR_PARAM; + + wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); + wqe = qp->sq_base[wqe_idx].elem; + + qp->sq_wrtrk_array[wqe_idx].quanta = ZXDH_QP_WQE_MIN_QUANTA; + + set_64bit_val(wqe, 8, 0); + set_64bit_val(wqe, 16, 0); + set_64bit_val(wqe, 24, 0); + + hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_NOP) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, signaled) | + FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity); + + /* make sure WQE is written before valid bit is set */ + udma_to_device_barrier(); + + set_64bit_val(wqe, 0, hdr); + + return 0; +} + +/** + * zxdh_qp_post_wr - ring doorbell + * @qp: hw qp ptr + */ +void zxdh_qp_post_wr(struct zxdh_qp *qp) +{ + /* valid bit is written before ringing doorbell */ + udma_to_device_barrier(); + + db_wr32(qp->qp_id, qp->wqe_alloc_db); + qp->initial_ring.head = qp->sq_ring.head; +} + +/** + * zxdh_qp_set_shadow_area - fill SW_RQ_Head + * @qp: hw qp ptr + */ +void zxdh_qp_set_shadow_area(struct zxdh_qp *qp) +{ + __u8 polarity = 0; + + polarity = ((ZXDH_RING_CURRENT_HEAD(qp->rq_ring) == 0) ? + !qp->rwqe_polarity : + qp->rwqe_polarity); + set_64bit_val(qp->shadow_area, 0, + FIELD_PREP(ZXDHQPDBSA_RQ_POLARITY, polarity) | + FIELD_PREP(ZXDHQPDBSA_RQ_SW_HEAD, + ZXDH_RING_CURRENT_HEAD(qp->rq_ring))); +} + +/** + * zxdh_qp_ring_push_db - ring qp doorbell + * @qp: hw qp ptr + * @wqe_idx: wqe index + */ +static void zxdh_qp_ring_push_db(struct zxdh_qp *qp, __u32 wqe_idx) +{ + set_32bit_val(qp->push_db, 0, + FIELD_PREP(ZXDH_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | + qp->qp_id); + qp->initial_ring.head = qp->sq_ring.head; + qp->push_mode = true; + qp->push_dropped = false; +} + +void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta, + __u32 wqe_idx, bool post_sq) +{ + __le64 *push; + + if (ZXDH_RING_CURRENT_HEAD(qp->initial_ring) != + ZXDH_RING_CURRENT_TAIL(qp->sq_ring) && + !qp->push_mode) { + if (post_sq) + zxdh_qp_post_wr(qp); + } else { + push = (__le64 *)((uintptr_t)qp->push_wqe + + (wqe_idx & 0x7) * 0x20); + memcpy(push, wqe, quanta * ZXDH_QP_WQE_MIN_SIZE); + zxdh_qp_ring_push_db(qp, wqe_idx); + } +} + +/** + * zxdh_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go + * @qp: hw qp ptr + * @wqe_idx: return wqe index + * @quanta: size of WR in quanta + * @total_size: size of WR in bytes + * @info: info on WR + */ +__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx, + __u16 quanta, __u32 total_size, + struct zxdh_post_sq_info *info) +{ + __le64 *wqe; + __u16 avail_quanta; + __u16 i; + + avail_quanta = ZXDH_MAX_SQ_WQES_PER_PAGE - + (ZXDH_RING_CURRENT_HEAD(qp->sq_ring) % + ZXDH_MAX_SQ_WQES_PER_PAGE); + if (likely(quanta <= avail_quanta)) { + /* WR fits in current chunk */ + if (unlikely(quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring))) + return NULL; + } else { + /* Need to pad with NOP */ + if (quanta + avail_quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return NULL; + + for (i = 0; i < avail_quanta; i++) { + zxdh_nop_1(qp); + ZXDH_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); + } + } + + *wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); + if (!*wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + + ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); + + wqe = qp->sq_base[*wqe_idx].elem; + qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; + qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; + qp->sq_wrtrk_array[*wqe_idx].quanta = quanta; + + return wqe; +} + +/** + * zxdh_qp_get_next_recv_wqe - get next qp's rcv wqe + * @qp: hw qp ptr + * @wqe_idx: return wqe index + */ +__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx) +{ + __le64 *wqe; + enum zxdh_status_code ret_code; + + if (ZXDH_RING_FULL_ERR(qp->rq_ring)) + return NULL; + + ZXDH_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); + if (ret_code) + return NULL; + + if (!*wqe_idx) + qp->rwqe_polarity = !qp->rwqe_polarity; + /* rq_wqe_size_multiplier is no of 16 byte quanta in one rq wqe */ + wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; + + return wqe; +} + +static enum zxdh_status_code +zxdh_post_rdma_write(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, + bool post_sq, __u32 total_size) +{ + enum zxdh_status_code ret_code; + struct zxdh_rdma_write *op_info; + __u32 i, byte_off = 0; + __u32 frag_cnt, addl_frag_cnt; + __le64 *wqe; + __u32 wqe_idx; + __u16 quanta; + __u64 hdr; + bool imm_data_flag; + + op_info = &info->op.rdma_write; + imm_data_flag = info->imm_data_valid ? 1 : 0; + + if (imm_data_flag) + frag_cnt = + op_info->num_lo_sges ? (op_info->num_lo_sges + 1) : 2; + else + frag_cnt = op_info->num_lo_sges; + addl_frag_cnt = + op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; + + ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); + if (ret_code) + return ret_code; + + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + if (op_info->num_lo_sges) { + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, + op_info->lo_sg_list->len == + ZXDH_MAX_SQ_PAYLOAD_SIZE ? + 1 : + 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, + op_info->lo_sg_list->len) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + op_info->lo_sg_list->stag)); + set_64bit_val(wqe, 8, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, + op_info->lo_sg_list->tag_off)); + } else { + /*if zero sge,post a special sge with zero length*/ + set_64bit_val(wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + 0x100)); + set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); + } + + if (imm_data_flag) { + byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE; + if (op_info->num_lo_sges > 1) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[1], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + } + set_64bit_val( + wqe, ZXDH_SQ_WQE_BYTESIZE, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); + i = 2; + if (i < op_info->num_lo_sges) { + for (byte_off = ZXDH_SQ_WQE_BYTESIZE + + 2 * ZXDH_QP_FRAG_BYTESIZE; + i < op_info->num_lo_sges; i += 2) { + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, + &op_info->lo_sg_list[i + 1], + qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; + } + } + } else { + i = 1; + for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges; + i += 2) { + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i + 1], + qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; + } + } + /* if not an odd number set valid bit in next fragment */ + if (!(frag_cnt & 0x01) && frag_cnt) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + } + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + if (post_sq) + zxdh_qp_post_wr(qp); + qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); + return 0; +} + +static void split_write_imm_wqe(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + struct zxdh_post_sq_info *split_part1_info, + struct zxdh_post_sq_info *split_part2_info) +{ + __u32 total_size = 0; + struct zxdh_rdma_write *op_info; + + op_info = &info->op.rdma_write; + total_size = op_info->rem_addr.len; + split_part1_info->op.rdma_write.lo_sg_list = + info->op.rdma_write.lo_sg_list; + split_part2_info->op.rdma_write.lo_sg_list = NULL; + + split_part1_info->op_type = ZXDH_OP_TYPE_WRITE; + split_part1_info->signaled = false; + split_part1_info->local_fence = info->local_fence; + split_part1_info->read_fence = info->read_fence; + split_part1_info->solicited = info->solicited; + split_part1_info->imm_data_valid = false; + split_part1_info->wr_id = info->wr_id; + split_part1_info->op.rdma_write.num_lo_sges = + info->op.rdma_write.num_lo_sges; + split_part1_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag; + split_part1_info->op.rdma_write.rem_addr.tag_off = + op_info->rem_addr.tag_off; + + split_part2_info->op_type = info->op_type; + split_part2_info->signaled = info->signaled; + split_part2_info->local_fence = info->local_fence; + split_part2_info->read_fence = info->read_fence; + split_part2_info->solicited = info->solicited; + split_part2_info->imm_data_valid = info->imm_data_valid; + split_part2_info->wr_id = info->wr_id; + split_part2_info->imm_data = info->imm_data; + split_part2_info->op.rdma_write.num_lo_sges = 0; + split_part2_info->op.rdma_write.rem_addr.stag = op_info->rem_addr.stag; + split_part2_info->op.rdma_write.rem_addr.tag_off = + op_info->rem_addr.tag_off + total_size; +} + +/** + * zxdh_rdma_write - rdma write operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq) +{ + struct zxdh_post_sq_info split_part1_info = { 0 }; + struct zxdh_post_sq_info split_part2_info = { 0 }; + struct zxdh_rdma_write *op_info; + struct zxdh_uqp *iwuqp; + struct zxdh_uvcontext *iwvctx; + __u32 i; + __u32 total_size = 0; + enum zxdh_status_code ret_code; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + + iwuqp = container_of(qp, struct zxdh_uqp, qp); + iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext, + ibv_ctx.context); + op_info = &info->op.rdma_write; + if (op_info->num_lo_sges > qp->max_sq_frag_cnt) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + for (i = 0; i < op_info->num_lo_sges; i++) { + total_size += op_info->lo_sg_list[i].len; + if (0 != i && 0 == op_info->lo_sg_list[i].len) + return ZXDH_ERR_INVALID_FRAG_LEN; + } + + if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) + return ZXDH_ERR_QP_INVALID_MSG_SIZE; + + op_info->rem_addr.len = total_size; + if (iwvctx->zxdh_write_imm_split_switch == 0) { + ret_code = zxdh_post_rdma_write(qp, info, post_sq, total_size); + if (ret_code) + return ret_code; + } else { + if (imm_data_flag && total_size > qp->mtu) { + split_write_imm_wqe(qp, info, &split_part1_info, + &split_part2_info); + + ret_code = zxdh_post_rdma_write(qp, &split_part1_info, + post_sq, total_size); + if (ret_code) + return ret_code; + ret_code = zxdh_post_rdma_write(qp, &split_part2_info, + post_sq, 0); + if (ret_code) + return ret_code; + } else { + ret_code = zxdh_post_rdma_write(qp, info, post_sq, + total_size); + if (ret_code) + return ret_code; + } + } + + return 0; +} + +static void split_two_part_info(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, __u32 ori_psn, + __u32 pre_cal_psn, + struct zxdh_post_sq_info *split_part1_info, + struct zxdh_post_sq_info *split_part2_info) +{ + __u32 total_size = 0; + __u32 remain_size = 0; + __u32 split_size = 0; + struct zxdh_rdma_read *op_info; + + op_info = &info->op.rdma_read; + total_size = op_info->rem_addr.len; + split_part1_info->op.rdma_read.lo_sg_list = qp->split_sg_list; + split_part2_info->op.rdma_read.lo_sg_list = + qp->split_sg_list + op_info->num_lo_sges; + + memset(split_part1_info->op.rdma_read.lo_sg_list, 0, + 2 * op_info->num_lo_sges * sizeof(struct zxdh_sge)); + if (pre_cal_psn < ori_psn && pre_cal_psn != 0) + remain_size = (0xffffff - ori_psn + 1) * qp->mtu; + else + remain_size = (0x800000 - ori_psn) * qp->mtu; + + split_size = total_size - remain_size; + + split_part1_info->signaled = false; + split_part1_info->local_fence = info->local_fence; + split_part1_info->read_fence = info->read_fence; + split_part1_info->solicited = false; + split_part1_info->wr_id = info->wr_id; + split_part1_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag; + split_part1_info->op.rdma_read.rem_addr.tag_off = + op_info->rem_addr.tag_off; + + split_part2_info->signaled = info->signaled; + split_part2_info->local_fence = info->local_fence; + split_part2_info->read_fence = info->read_fence; + split_part2_info->solicited = info->solicited; + split_part2_info->wr_id = info->wr_id; + split_part2_info->op.rdma_read.rem_addr.stag = op_info->rem_addr.stag; + split_part2_info->op.rdma_read.rem_addr.tag_off = + op_info->rem_addr.tag_off + remain_size; + + for (int i = 0; i < op_info->num_lo_sges; i++) { + if (op_info->lo_sg_list[i].len + + split_part1_info->op.rdma_read.rem_addr.len < + remain_size) { + split_part1_info->op.rdma_read.rem_addr.len += + op_info->lo_sg_list[i].len; + split_part1_info->op.rdma_read.num_lo_sges += 1; + memcpy(split_part1_info->op.rdma_read.lo_sg_list + i, + op_info->lo_sg_list + i, + sizeof(struct zxdh_sge)); + continue; + } else if (op_info->lo_sg_list[i].len + + split_part1_info->op.rdma_read.rem_addr.len == + remain_size) { + split_part1_info->op.rdma_read.rem_addr.len += + op_info->lo_sg_list[i].len; + split_part1_info->op.rdma_read.num_lo_sges += 1; + memcpy(split_part1_info->op.rdma_read.lo_sg_list + i, + op_info->lo_sg_list + i, + sizeof(struct zxdh_sge)); + split_part2_info->op.rdma_read.rem_addr.len = + split_size; + split_part2_info->op.rdma_read.num_lo_sges = + op_info->num_lo_sges - + split_part1_info->op.rdma_read.num_lo_sges; + memcpy(split_part2_info->op.rdma_read.lo_sg_list, + op_info->lo_sg_list + i + 1, + split_part2_info->op.rdma_read.num_lo_sges * + sizeof(struct zxdh_sge)); + break; + } + + split_part1_info->op.rdma_read.lo_sg_list[i].len = + remain_size - + split_part1_info->op.rdma_read.rem_addr.len; + split_part1_info->op.rdma_read.lo_sg_list[i].tag_off = + op_info->lo_sg_list[i].tag_off; + split_part1_info->op.rdma_read.lo_sg_list[i].stag = + op_info->lo_sg_list[i].stag; + split_part1_info->op.rdma_read.rem_addr.len = remain_size; + split_part1_info->op.rdma_read.num_lo_sges += 1; + split_part2_info->op.rdma_read.lo_sg_list[0].len = + op_info->lo_sg_list[i].len - + split_part1_info->op.rdma_read.lo_sg_list[i].len; + split_part2_info->op.rdma_read.lo_sg_list[0].tag_off = + op_info->lo_sg_list[i].tag_off + + split_part1_info->op.rdma_read.lo_sg_list[i].len; + split_part2_info->op.rdma_read.lo_sg_list[0].stag = + op_info->lo_sg_list[i].stag; + split_part2_info->op.rdma_read.rem_addr.len = split_size; + split_part2_info->op.rdma_read.num_lo_sges = + op_info->num_lo_sges - + split_part1_info->op.rdma_read.num_lo_sges + 1; + if (split_part2_info->op.rdma_read.num_lo_sges - 1 > 0) { + memcpy(split_part2_info->op.rdma_read.lo_sg_list + 1, + op_info->lo_sg_list + i + 1, + (split_part2_info->op.rdma_read.num_lo_sges - + 1) * sizeof(struct zxdh_sge)); + } + break; + } +} + +static enum zxdh_status_code zxdh_post_rdma_read(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq, __u32 total_size) +{ + enum zxdh_status_code ret_code; + struct zxdh_rdma_read *op_info; + __u32 i, byte_off = 0; + __u32 addl_frag_cnt; + __le64 *wqe; + __u32 wqe_idx; + __u16 quanta; + __u64 hdr; + + op_info = &info->op.rdma_read; + ret_code = zxdh_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); + if (ret_code) + return ret_code; + + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + addl_frag_cnt = + op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; + + if (op_info->num_lo_sges) { + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, + op_info->lo_sg_list->len == + ZXDH_MAX_SQ_PAYLOAD_SIZE ? + 1 : + 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, + op_info->lo_sg_list->len) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + op_info->lo_sg_list->stag)); + set_64bit_val(wqe, 8, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, + op_info->lo_sg_list->tag_off)); + } else { + /*if zero sge,post a special sge with zero length*/ + set_64bit_val(wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + 0x100)); + set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); + } + + i = 1; + for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_lo_sges; + i += 2) { + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i + 1], + qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->lo_sg_list[i], + qp->swqe_polarity); + byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; + } + + /* if not an odd number set valid bit in next fragment */ + if (!(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + } + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_READ) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + if (post_sq) + zxdh_qp_post_wr(qp); + return 0; +} + +/** + * zxdh_rdma_read - rdma read command + * @qp: hw qp ptr + * @info: post sq information + * @inv_stag: flag for inv_stag + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool inv_stag, bool post_sq) +{ + struct zxdh_post_sq_info split_part1_info = { 0 }; + struct zxdh_post_sq_info split_part2_info = { 0 }; + struct zxdh_rdma_read *op_info; + enum zxdh_status_code ret_code; + struct zxdh_uqp *iwuqp; + struct zxdh_uvcontext *iwvctx; + + __u32 i, total_size = 0, pre_cal_psn = 0; + + iwuqp = container_of(qp, struct zxdh_uqp, qp); + iwvctx = container_of(iwuqp->vqp.qp.context, struct zxdh_uvcontext, + ibv_ctx.context); + op_info = &info->op.rdma_read; + if (qp->max_sq_frag_cnt < op_info->num_lo_sges) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + for (i = 0; i < op_info->num_lo_sges; i++) { + total_size += op_info->lo_sg_list[i].len; + if (0 != i && 0 == op_info->lo_sg_list[i].len) + return ZXDH_ERR_INVALID_FRAG_LEN; + } + + if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) + return ZXDH_ERR_QP_INVALID_MSG_SIZE; + op_info->rem_addr.len = total_size; + pre_cal_psn = qp->next_psn; + qp_tx_psn_add(&pre_cal_psn, total_size, qp->mtu); + if (read_wqe_need_split(pre_cal_psn, qp->next_psn, + iwvctx->dev_attrs.chip_rev)) { + split_two_part_info(qp, info, qp->next_psn, pre_cal_psn, + &split_part1_info, &split_part2_info); + ret_code = zxdh_post_rdma_read(qp, &split_part1_info, post_sq, + total_size); + if (ret_code) + return ret_code; + + qp_tx_psn_add(&qp->next_psn, + split_part1_info.op.rdma_read.rem_addr.len, + qp->mtu); + ret_code = zxdh_post_rdma_read(qp, &split_part2_info, post_sq, + total_size); + if (ret_code) + return ret_code; + + qp_tx_psn_add(&qp->next_psn, + split_part2_info.op.rdma_read.rem_addr.len, + qp->mtu); + } else { + ret_code = zxdh_post_rdma_read(qp, info, post_sq, total_size); + if (ret_code) + return ret_code; + + qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); + } + return 0; +} + +/** + * zxdh_rc_send - rdma send command + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_rc_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, bool post_sq) +{ + __le64 *wqe; + struct zxdh_post_send *op_info; + __u64 hdr; + __u32 i, wqe_idx, total_size = 0, byte_off; + enum zxdh_status_code ret_code; + __u32 frag_cnt, addl_frag_cnt; + __u16 quanta; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + + op_info = &info->op.send; + if (qp->max_sq_frag_cnt < op_info->num_sges) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + for (i = 0; i < op_info->num_sges; i++) { + total_size += op_info->sg_list[i].len; + if (0 != i && 0 == op_info->sg_list[i].len) + return ZXDH_ERR_INVALID_FRAG_LEN; + } + + if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) + return ZXDH_ERR_QP_INVALID_MSG_SIZE; + + if (imm_data_flag) + frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2; + else + frag_cnt = op_info->num_sges; + ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); + if (ret_code) + return ret_code; + + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; + if (op_info->num_sges) { + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, + op_info->sg_list->len == + ZXDH_MAX_SQ_PAYLOAD_SIZE ? + 1 : + 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, + op_info->sg_list->len) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + op_info->sg_list->stag)); + set_64bit_val(wqe, 8, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, + op_info->sg_list->tag_off)); + } else { + /*if zero sge,post a special sge with zero length*/ + set_64bit_val(wqe, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + 0x100)); + set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); + } + + if (imm_data_flag) { + byte_off = ZXDH_SQ_WQE_BYTESIZE + ZXDH_QP_FRAG_BYTESIZE; + if (op_info->num_sges > 1) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->sg_list[1], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + } + set_64bit_val( + wqe, ZXDH_SQ_WQE_BYTESIZE, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); + i = 2; + if (i < op_info->num_sges) { + for (byte_off = ZXDH_SQ_WQE_BYTESIZE + + 2 * ZXDH_QP_FRAG_BYTESIZE; + i < op_info->num_sges; i += 2) { + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, + &op_info->sg_list[i], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, &op_info->sg_list[i + 1], + qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, &op_info->sg_list[i], + qp->swqe_polarity); + byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; + } + } + } else { + i = 1; + for (byte_off = ZXDH_SQ_WQE_BYTESIZE; i < op_info->num_sges; + i += 2) { + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe, byte_off, &op_info->sg_list[i], + qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->sg_list[i + 1], + qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment(wqe, byte_off, + &op_info->sg_list[i], + qp->swqe_polarity); + byte_off += 2 * ZXDH_QP_FRAG_BYTESIZE; + } + } + + /* if not an odd number set valid bit in next fragment */ + if (!(frag_cnt & 0x01) && frag_cnt) { + qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, + qp->swqe_polarity); + } + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 0) | + FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, 0)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + if (post_sq) + zxdh_qp_post_wr(qp); + qp_tx_psn_add(&qp->next_psn, total_size, qp->mtu); + + return 0; +} + +/** + * zxdh_ud_send - rdma send command + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_ud_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, bool post_sq) +{ + __le64 *wqe_base; + __le64 *wqe_ex = NULL; + struct zxdh_post_send *op_info; + __u64 hdr; + __u32 i, wqe_idx, total_size = 0, byte_off; + enum zxdh_status_code ret_code; + __u32 frag_cnt, addl_frag_cnt; + __u16 quanta; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + + op_info = &info->op.send; + if (qp->max_sq_frag_cnt < op_info->num_sges) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + for (i = 0; i < op_info->num_sges; i++) { + total_size += op_info->sg_list[i].len; + if (0 != i && 0 == op_info->sg_list[i].len) + return ZXDH_ERR_INVALID_FRAG_LEN; + } + + if (total_size > ZXDH_MAX_SQ_PAYLOAD_SIZE) + return ZXDH_ERR_QP_INVALID_MSG_SIZE; + + if (imm_data_flag) + frag_cnt = op_info->num_sges ? (op_info->num_sges + 1) : 2; + else + frag_cnt = op_info->num_sges; + ret_code = zxdh_fragcnt_to_quanta_sq(frag_cnt, &quanta); + if (ret_code) + return ret_code; + + if (quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + + ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); + + wqe_base = qp->sq_base[wqe_idx].elem; + qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; + qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; + qp->sq_wrtrk_array[wqe_idx].quanta = quanta; + + addl_frag_cnt = op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 0) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, 0) | + FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, addl_frag_cnt) | + FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id); + + if (op_info->num_sges) { + set_64bit_val( + wqe_base, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, + op_info->sg_list->len == + ZXDH_MAX_SQ_PAYLOAD_SIZE ? + 1 : + 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, + op_info->sg_list->len) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + op_info->sg_list->stag)); + set_64bit_val(wqe_base, 8, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, + op_info->sg_list->tag_off)); + } else { + /*if zero sge,post a special sge with zero length*/ + set_64bit_val(wqe_base, 16, + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_VALID, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPSQ_FIRST_FRAG_STAG, + 0x100)); + set_64bit_val(wqe_base, 8, FIELD_PREP(ZXDHQPSQ_FRAG_TO, 0)); + } + + if (imm_data_flag) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + if (op_info->num_sges > 1) { + qp->wqe_ops.iw_set_fragment(wqe_ex, + ZXDH_QP_FRAG_BYTESIZE, + &op_info->sg_list[1], + qp->swqe_polarity); + } + set_64bit_val( + wqe_ex, 0, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); + i = 2; + for (byte_off = ZXDH_QP_FRAG_BYTESIZE; i < op_info->num_sges; + i += 2) { + if (!(i & 0x1)) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + } + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe_ex, 0, &op_info->sg_list[i], + qp->swqe_polarity); + break; + } + qp->wqe_ops.iw_set_fragment( + wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, + &op_info->sg_list[i + 1], qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, + &op_info->sg_list[i], qp->swqe_polarity); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + } + } else { + i = 1; + for (byte_off = 0; i < op_info->num_sges; i += 2) { + if (i & 0x1) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + } + if (i == addl_frag_cnt) { + qp->wqe_ops.iw_set_fragment( + wqe_ex, 0, &op_info->sg_list[i], + qp->swqe_polarity); + break; + } + byte_off += ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, + &op_info->sg_list[i + 1], qp->swqe_polarity); + byte_off -= ZXDH_QP_FRAG_BYTESIZE; + qp->wqe_ops.iw_set_fragment( + wqe_ex, byte_off % ZXDH_SQ_WQE_BYTESIZE, + &op_info->sg_list[i], qp->swqe_polarity); + } + } + + /* if not an odd number set valid bit in next fragment */ + if (!(frag_cnt & 0x01) && frag_cnt && wqe_ex) { + qp->wqe_ops.iw_set_fragment(wqe_ex, ZXDH_QP_FRAG_BYTESIZE, NULL, + qp->swqe_polarity); + } + + set_64bit_val(wqe_base, 24, + FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) | + FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe_base, 0, hdr); + if (post_sq) + zxdh_qp_post_wr(qp); + + return 0; +} + +/** + * zxdh_set_mw_bind_wqe - set mw bind in wqe + * @wqe: wqe for setting mw bind + * @op_info: info for setting wqe values + */ +static void zxdh_set_mw_bind_wqe(__le64 *wqe, struct zxdh_bind_window *op_info) +{ + __u32 value = 0; + __u8 leaf_pbl_size = op_info->leaf_pbl_size; + + set_64bit_val(wqe, 8, (uintptr_t)op_info->va); + + if (leaf_pbl_size == 0) { + value = (__u32)(op_info->mw_pa_pble_index >> 12); + value = (value & 0x03FFFFFFFC0000) >> 18; + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | + FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value)); + } else if (leaf_pbl_size == 1) { + value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18); + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | + FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_TWO, value)); + } else { + value = (__u32)((op_info->mw_pa_pble_index & 0x0FFC0000) >> 18); + set_64bit_val( + wqe, 16, + FIELD_PREP(ZXDHQPSQ_PARENTMRSTAG, op_info->mr_stag) | + FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_FIRST_PBLE_INDEX, + value) | + FIELD_PREP(ZXDHQPSQ_MW_LEVLE2_ROOT_PBLE_INDEX, + op_info->root_leaf_offset)); + } + + if (leaf_pbl_size == 0) { + value = (__u32)(op_info->mw_pa_pble_index >> 12); + value = value & 0x3FFFF; + } else { + value = (__u32)(op_info->mw_pa_pble_index & 0x3FFFF); + } + + set_64bit_val(wqe, 24, + op_info->bind_len | + FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_ONE, value)); +} + +/** + * zxdh_copy_inline_data - Copy inline data to wqe + * @dest: pointer to wqe + * @src: pointer to inline data + * @len: length of inline data to copy + * @polarity: polarity of wqe valid bit + */ +static void zxdh_copy_inline_data(__u8 *dest, __u8 *src, __u32 len, + __u8 polarity, bool imm_data_flag) +{ + __u8 inline_valid = polarity << ZXDH_INLINE_VALID_S; + __u32 copy_size; + __u8 *inline_valid_addr; + + dest += ZXDH_WQE_SIZE_32; /* point to additional 32 byte quanta */ + if (len) { + inline_valid_addr = dest + WQE_OFFSET_7BYTES; + if (imm_data_flag) { + copy_size = len < INLINE_DATASIZE_24BYTES ? + len : + INLINE_DATASIZE_24BYTES; + dest += WQE_OFFSET_8BYTES; + memcpy(dest, src, copy_size); + len -= copy_size; + dest += WQE_OFFSET_24BYTES; + src += copy_size; + } else { + if (len <= INLINE_DATASIZE_7BYTES) { + copy_size = len; + memcpy(dest, src, copy_size); + *inline_valid_addr = inline_valid; + return; + } + memcpy(dest, src, INLINE_DATASIZE_7BYTES); + len -= INLINE_DATASIZE_7BYTES; + dest += WQE_OFFSET_8BYTES; + src += INLINE_DATA_OFFSET_7BYTES; + copy_size = len < INLINE_DATASIZE_24BYTES ? + len : + INLINE_DATASIZE_24BYTES; + memcpy(dest, src, copy_size); + len -= copy_size; + dest += WQE_OFFSET_24BYTES; + src += copy_size; + } + *inline_valid_addr = inline_valid; + } + + while (len) { + inline_valid_addr = dest + WQE_OFFSET_7BYTES; + if (len <= INLINE_DATASIZE_7BYTES) { + copy_size = len; + memcpy(dest, src, copy_size); + *inline_valid_addr = inline_valid; + return; + } + memcpy(dest, src, INLINE_DATASIZE_7BYTES); + len -= INLINE_DATASIZE_7BYTES; + dest += WQE_OFFSET_8BYTES; + src += INLINE_DATA_OFFSET_7BYTES; + copy_size = len < INLINE_DATASIZE_24BYTES ? + len : + INLINE_DATASIZE_24BYTES; + memcpy(dest, src, copy_size); + len -= copy_size; + dest += WQE_OFFSET_24BYTES; + src += copy_size; + + *inline_valid_addr = inline_valid; + } +} + +/** + * zxdh_inline_data_size_to_quanta - based on inline data, quanta + * @data_size: data size for inline + * @imm_data_flag: flag for immediate data + * + * Gets the quanta based on inline and immediate data. + */ +static __u16 zxdh_inline_data_size_to_quanta(__u32 data_size, + bool imm_data_flag) +{ + if (imm_data_flag) + data_size += INLINE_DATASIZE_7BYTES; + + return data_size % 31 ? data_size / 31 + 2 : data_size / 31 + 1; +} + +/** + * zxdh_inline_rdma_write - inline rdma write operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + __u8 imm_valid; + struct zxdh_inline_rdma_write *op_info; + __u64 hdr = 0; + __u32 wqe_idx; + bool read_fence = false; + __u16 quanta; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + + op_info = &info->op.inline_rdma_write; + + if (op_info->len > qp->max_inline_data) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, + imm_data_flag); + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + read_fence |= info->read_fence; + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATAFLAG, 1) | + FIELD_PREP(ZXDHQPSQ_WRITE_INLINEDATALEN, op_info->len) | + FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->rem_addr.stag); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + + if (imm_data_flag) { + /* if inline exist, not update imm valid */ + imm_valid = (op_info->len == 0) ? qp->swqe_polarity : + (!qp->swqe_polarity); + + set_64bit_val(wqe, 32, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, + info->imm_data)); + } + qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data, + op_info->len, qp->swqe_polarity, + imm_data_flag); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + + if (post_sq) + zxdh_qp_post_wr(qp); + qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu); + return 0; +} + +/** + * zxdh_rc_inline_send - inline send operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + __u8 imm_valid; + struct zxdh_inline_rdma_send *op_info; + __u64 hdr; + __u32 wqe_idx; + __u16 quanta; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + + op_info = &info->op.inline_rdma_send; + + if (op_info->len > qp->max_inline_data) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, + imm_data_flag); + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, info->stag_to_inv); + set_64bit_val(wqe, 24, + FIELD_PREP(ZXDHQPSQ_INLINEDATAFLAG, 1) | + FIELD_PREP(ZXDHQPSQ_INLINEDATALEN, op_info->len)); + + if (imm_data_flag) { + /* if inline exist, not update imm valid */ + imm_valid = (op_info->len == 0) ? qp->swqe_polarity : + (!qp->swqe_polarity); + set_64bit_val(wqe, 32, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, imm_valid) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, + info->imm_data)); + } + + qp->wqe_ops.iw_copy_inline_data((__u8 *)wqe, op_info->data, + op_info->len, qp->swqe_polarity, + imm_data_flag); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + + if (post_sq) + zxdh_qp_post_wr(qp); + + qp_tx_psn_add(&qp->next_psn, op_info->len, qp->mtu); + return 0; +} + +/** + * zxdh_ud_inline_send - inline send operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe_base; + __le64 *wqe_ex; + struct zxdh_inline_rdma_send *op_info; + __u64 hdr; + __u32 wqe_idx; + __u16 quanta; + bool imm_data_flag = info->imm_data_valid ? 1 : 0; + __u8 *inline_dest; + __u8 *inline_src; + __u32 inline_len; + __u32 copy_size; + __u8 *inline_valid_addr; + + op_info = &info->op.inline_rdma_send; + inline_len = op_info->len; + + if (op_info->len > qp->max_inline_data) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + if (imm_data_flag && op_info->len > ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM) + return ZXDH_ERR_INVALID_INLINE_DATA_SIZE; + + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, + imm_data_flag); + if (quanta > ZXDH_RING_FREE_QUANTA(qp->sq_ring)) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + wqe_idx = ZXDH_RING_CURRENT_HEAD(qp->sq_ring); + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + + ZXDH_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta); + + wqe_base = qp->sq_base[wqe_idx].elem; + qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; + qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; + qp->sq_wrtrk_array[wqe_idx].quanta = quanta; + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, info->op_type) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_SOLICITED, info->solicited) | + FIELD_PREP(ZXDHQPSQ_IMMDATAFLAG, imm_data_flag) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATAFLAG, 1) | + FIELD_PREP(ZXDHQPSQ_UD_INLINEDATALEN, op_info->len) | + FIELD_PREP(ZXDHQPSQ_UD_ADDFRAGCNT, (__u16)(quanta - 1)) | + FIELD_PREP(ZXDHQPSQ_AHID, op_info->ah_id); + set_64bit_val(wqe_base, 24, + FIELD_PREP(ZXDHQPSQ_DESTQPN, op_info->dest_qp) | + FIELD_PREP(ZXDHQPSQ_DESTQKEY, op_info->qkey)); + + if (imm_data_flag) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + + if (inline_len) { + /* imm and inline use the same valid, valid set after inline data updated*/ + copy_size = inline_len < INLINE_DATASIZE_24BYTES ? + inline_len : + INLINE_DATASIZE_24BYTES; + inline_dest = (__u8 *)wqe_ex + WQE_OFFSET_8BYTES; + inline_src = (__u8 *)op_info->data; + memcpy(inline_dest, inline_src, copy_size); + inline_len -= copy_size; + inline_src += copy_size; + } + set_64bit_val( + wqe_ex, 0, + FIELD_PREP(ZXDHQPSQ_IMMDATA_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_IMMDATA, info->imm_data)); + + } else if (inline_len) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + inline_dest = (__u8 *)wqe_ex; + inline_src = (__u8 *)op_info->data; + + if (inline_len <= INLINE_DATASIZE_7BYTES) { + copy_size = inline_len; + memcpy(inline_dest, inline_src, copy_size); + inline_len = 0; + } else { + copy_size = INLINE_DATASIZE_7BYTES; + memcpy(inline_dest, inline_src, copy_size); + inline_len -= copy_size; + inline_src += copy_size; + inline_dest += WQE_OFFSET_8BYTES; + copy_size = inline_len < INLINE_DATASIZE_24BYTES ? + inline_len : + INLINE_DATASIZE_24BYTES; + memcpy(inline_dest, inline_src, copy_size); + inline_len -= copy_size; + inline_src += copy_size; + } + inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES; + *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S; + } + + while (inline_len) { + wqe_idx = (wqe_idx + 1) % qp->sq_ring.size; + if (!wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + wqe_ex = qp->sq_base[wqe_idx].elem; + inline_dest = (__u8 *)wqe_ex; + + if (inline_len <= INLINE_DATASIZE_7BYTES) { + copy_size = inline_len; + memcpy(inline_dest, inline_src, copy_size); + inline_len = 0; + } else { + copy_size = INLINE_DATASIZE_7BYTES; + memcpy(inline_dest, inline_src, copy_size); + inline_len -= copy_size; + inline_src += copy_size; + inline_dest += WQE_OFFSET_8BYTES; + copy_size = inline_len < INLINE_DATASIZE_24BYTES ? + inline_len : + INLINE_DATASIZE_24BYTES; + memcpy(inline_dest, inline_src, copy_size); + inline_len -= copy_size; + inline_src += copy_size; + } + inline_valid_addr = (__u8 *)wqe_ex + WQE_OFFSET_7BYTES; + *inline_valid_addr = qp->swqe_polarity << ZXDH_INLINE_VALID_S; + } + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe_base, 0, hdr); + + if (post_sq) + zxdh_qp_post_wr(qp); + + return 0; +} + +/** + * zxdh_stag_local_invalidate - stag invalidate operation + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq) +{ + __le64 *wqe; + struct zxdh_inv_local_stag *op_info; + __u64 hdr; + __u32 wqe_idx; + bool local_fence = true; + + op_info = &info->op.inv_local_stag; + + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0, + info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + set_64bit_val(wqe, 16, 0); + + hdr = FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity) | + FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_LOCAL_INV) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, local_fence) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_REMSTAG, op_info->target_stag); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + + if (post_sq) + zxdh_qp_post_wr(qp); + + return 0; +} + +/** + * zxdh_mw_bind - bind Memory Window + * @qp: hw qp ptr + * @info: post sq information + * @post_sq: flag to post sq + */ +enum zxdh_status_code zxdh_mw_bind(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, bool post_sq) +{ + __le64 *wqe; + struct zxdh_bind_window *op_info; + __u64 hdr; + __u32 wqe_idx; + __u8 access = 1; + __u16 value = 0; + + op_info = &info->op.bind_window; + wqe = zxdh_qp_get_next_send_wqe(qp, &wqe_idx, ZXDH_QP_WQE_MIN_QUANTA, 0, + info); + if (!wqe) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + if (op_info->ena_writes) { + access = (op_info->ena_reads << 2) | + (op_info->ena_writes << 3) | (1 << 1) | access; + } else { + access = (op_info->ena_reads << 2) | + (op_info->ena_writes << 3) | access; + } + + qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); + + value = (__u16)((op_info->mw_pa_pble_index >> 12) & 0xC000000000000); + + hdr = FIELD_PREP(ZXDHQPSQ_OPCODE, ZXDH_OP_TYPE_BIND_MW) | + FIELD_PREP(ZXDHQPSQ_MWSTAG, op_info->mw_stag) | + FIELD_PREP(ZXDHQPSQ_STAGRIGHTS, access) | + FIELD_PREP(ZXDHQPSQ_VABASEDTO, + (op_info->addressing_type == ZXDH_ADDR_TYPE_VA_BASED ? + 1 : + 0)) | + FIELD_PREP(ZXDHQPSQ_MEMWINDOWTYPE, + (op_info->mem_window_type_1 ? 1 : 0)) | + FIELD_PREP(ZXDHQPSQ_READFENCE, info->read_fence) | + FIELD_PREP(ZXDHQPSQ_LOCALFENCE, info->local_fence) | + FIELD_PREP(ZXDHQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(ZXDHQPSQ_MW_HOST_PAGE_SIZE, op_info->host_page_size) | + FIELD_PREP(ZXDHQPSQ_MW_LEAF_PBL_SIZE, op_info->leaf_pbl_size) | + FIELD_PREP(ZXDHQPSQ_MW_PA_PBLE_THREE, value) | + FIELD_PREP(ZXDHQPSQ_VALID, qp->swqe_polarity); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + set_64bit_val(wqe, 0, hdr); + + if (post_sq) + zxdh_qp_post_wr(qp); + + return 0; +} + +static void zxdh_sleep_ns(unsigned int nanoseconds) +{ + struct timespec req; + + req.tv_sec = 0; + req.tv_nsec = nanoseconds; + nanosleep(&req, NULL); +} + +/** + * zxdh_post_receive - post receive wqe + * @qp: hw qp ptr + * @info: post rq information + */ +enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp, + struct zxdh_post_rq_info *info) +{ + __u32 wqe_idx, i, byte_off; + __le64 *wqe; + struct zxdh_sge *sge; + + if (qp->max_rq_frag_cnt < info->num_sges) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + wqe = zxdh_qp_get_next_recv_wqe(qp, &wqe_idx); + if (unlikely(!wqe)) + return ZXDH_ERR_QP_TOOMANY_WRS_POSTED; + + qp->rq_wrid_array[wqe_idx] = info->wr_id; + + for (i = 0, byte_off = ZXDH_QP_FRAG_BYTESIZE; i < info->num_sges; i++) { + sge = &info->sg_list[i]; + set_64bit_val(wqe, byte_off, sge->tag_off); + set_64bit_val(wqe, byte_off + 8, + FIELD_PREP(ZXDHQPRQ_FRAG_LEN, sge->len) | + FIELD_PREP(ZXDHQPRQ_STAG, sge->stag)); + byte_off += ZXDH_QP_FRAG_BYTESIZE; + } + + /** + * while info->num_sges < qp->max_rq_frag_cnt, or 0 == info->num_sges, + * fill next fragment with FRAG_LEN=0, FRAG_STAG=0x00000100, + * witch indicates a invalid fragment + */ + if (info->num_sges < qp->max_rq_frag_cnt || 0 == info->num_sges) { + set_64bit_val(wqe, byte_off, 0); + set_64bit_val(wqe, byte_off + 8, + FIELD_PREP(ZXDHQPRQ_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPRQ_STAG, 0x00000100)); + } + + set_64bit_val(wqe, 0, + FIELD_PREP(ZXDHQPRQ_ADDFRAGCNT, info->num_sges) | + FIELD_PREP(ZXDHQPRQ_SIGNATURE, + qp->rwqe_signature)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + if (info->num_sges > 3) + zxdh_sleep_ns(1000); + + set_64bit_val(wqe, 8, FIELD_PREP(ZXDHQPRQ_VALID, qp->rwqe_polarity)); + + return 0; +} + +/** + * zxdh_cq_resize - reset the cq buffer info + * @cq: cq to resize + * @cq_base: new cq buffer addr + * @cq_size: number of cqes + */ +void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int cq_size) +{ + cq->cq_base = cq_base; + cq->cq_size = cq_size; + ZXDH_RING_INIT(cq->cq_ring, cq->cq_size); + cq->polarity = 1; +} + +/** + * zxdh_cq_set_resized_cnt - record the count of the resized buffers + * @cq: cq to resize + * @cq_cnt: the count of the resized cq buffers + */ +void zxdh_cq_set_resized_cnt(struct zxdh_cq *cq, __u16 cq_cnt) +{ + __u64 temp_val; + __u16 sw_cq_sel; + __u8 arm_next; + __u8 arm_seq_num; + + get_64bit_val(cq->shadow_area, 0, &temp_val); + + sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val); + sw_cq_sel += cq_cnt; + + arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val); + arm_next = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_NEXT, temp_val); + cq->cqe_rd_cnt = 0; + + temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | + FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | + FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) | + FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt); + + set_64bit_val(cq->shadow_area, 0, temp_val); +} + +/** + * zxdh_cq_request_notification - cq notification request (door bell) + * @cq: hw cq + * @cq_notify: notification type + */ +void zxdh_cq_request_notification(struct zxdh_cq *cq, + enum zxdh_cmpl_notify cq_notify) +{ + __u64 temp_val; + __u16 sw_cq_sel; + __u8 arm_next = 0; + __u8 arm_seq_num; + __u32 cqe_index; + __u32 hdr; + + get_64bit_val(cq->shadow_area, 0, &temp_val); + arm_seq_num = (__u8)FIELD_GET(ZXDH_CQ_DBSA_ARM_SEQ_NUM, temp_val); + arm_seq_num++; + sw_cq_sel = (__u16)FIELD_GET(ZXDH_CQ_DBSA_SW_CQ_SELECT, temp_val); + cqe_index = (__u32)FIELD_GET(ZXDH_CQ_DBSA_CQEIDX, temp_val); + + if (cq_notify == ZXDH_CQ_COMPL_SOLICITED) + arm_next = 1; + temp_val = FIELD_PREP(ZXDH_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | + FIELD_PREP(ZXDH_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | + FIELD_PREP(ZXDH_CQ_DBSA_ARM_NEXT, arm_next) | + FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cqe_index); + + set_64bit_val(cq->shadow_area, 0, temp_val); + + hdr = FIELD_PREP(ZXDH_CQ_ARM_DBSA_VLD, 0) | + FIELD_PREP(ZXDH_CQ_ARM_CQ_ID, cq->cq_id); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + + db_wr32(hdr, cq->cqe_alloc_db); +} + +static inline void build_comp_status(__u32 cq_type, + struct zxdh_cq_poll_info *info) +{ + if (!info->error) { + info->comp_status = ZXDH_COMPL_STATUS_SUCCESS; + if (cq_type == ZXDH_CQE_QTYPE_RQ) { + if (info->major_err != ERROR_CODE_VALUE && + info->minor_err != ERROR_CODE_VALUE) { + info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN; + } + } + return; + } + + switch (info->major_err) { + case ZXDH_RETRY_ACK_MAJOR_ERR: + if (info->minor_err == ZXDH_RETRY_ACK_MINOR_ERR) { + info->comp_status = ZXDH_COMPL_STATUS_RETRY_ACK_ERR; + return; + } + if (info->minor_err == ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR) { + info->comp_status = + ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR; + return; + } + break; + case ZXDH_FLUSH_MAJOR_ERR: + info->comp_status = ZXDH_COMPL_STATUS_FLUSHED; + return; + default: + info->comp_status = ZXDH_COMPL_STATUS_UNKNOWN; + return; + } +} + +__le64 *get_current_cqe(struct zxdh_cq *cq) +{ + return ZXDH_GET_CURRENT_EXTENDED_CQ_ELEM(cq); +} + +static inline void zxdh_get_cq_poll_info(struct zxdh_qp *qp, + struct zxdh_cq_poll_info *info, + __u64 qword2, __u64 qword3) +{ + __u8 qp_type; + + qp_type = qp->qp_type; + + info->imm_valid = (bool)FIELD_GET(ZXDH_CQ_IMMVALID, qword2); + if (info->imm_valid) { + info->imm_data = (__u32)FIELD_GET(ZXDH_CQ_IMMDATA, qword3); + info->op_type = ZXDH_OP_TYPE_REC_IMM; + } else { + info->op_type = ZXDH_OP_TYPE_REC; + } + + info->bytes_xfered = (__u32)FIELD_GET(ZXDHCQ_PAYLDLEN, qword3); + + if (likely(qp_type == ZXDH_QP_TYPE_ROCE_RC)) { + if (qword2 & ZXDHCQ_STAG) { + info->stag_invalid_set = true; + info->inv_stag = + (__u32)FIELD_GET(ZXDHCQ_INVSTAG, qword2); + } else { + info->stag_invalid_set = false; + } + } else if (qp_type == ZXDH_QP_TYPE_ROCE_UD) { + info->ipv4 = (bool)FIELD_GET(ZXDHCQ_IPV4, qword2); + info->ud_src_qpn = (__u32)FIELD_GET(ZXDHCQ_UDSRCQPN, qword2); + } +} + +static enum zxdh_status_code update_cq_poll_info(struct zxdh_qp *qp, + struct zxdh_cq_poll_info *info, + __u32 wqe_idx, __u64 qword0) +{ + info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + if (!info->comp_status) + info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; + info->op_type = (__u8)FIELD_GET(ZXDHCQ_OP, qword0); + ZXDH_RING_SET_TAIL(qp->sq_ring, + wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); + return ZXDH_SUCCESS; +} + +static enum zxdh_status_code +process_tx_window_query_item_err(struct zxdh_qp *qp, + struct zxdh_cq_poll_info *info) +{ + int ret; + struct ibv_qp *ib_qp; + struct zxdh_uqp *iwuqp; + struct zxdh_rdma_qpc qpc = { 0 }; + + iwuqp = container_of(qp, struct zxdh_uqp, qp); + ib_qp = &iwuqp->vqp.qp; + ret = zxdh_query_qpc(ib_qp, &qpc); + if (ret) { + zxdh_dbg(ZXDH_DBG_QP, + "process tx window query item query qpc failed:%d\n", + ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + if (qpc.tx_last_ack_psn != qp->qp_last_ack_qsn) + qp->qp_reset_cnt = 0; + + qp->qp_last_ack_qsn = qpc.tx_last_ack_psn; + if (qp->qp_reset_cnt >= ZXDH_QP_RETRY_COUNT) + return ZXDH_ERR_RETRY_ACK_ERR; + + ret = zxdh_reset_qp(ib_qp, ZXDH_RESET_RETRY_TX_ITEM_FLAG); + if (ret) { + zxdh_dbg(ZXDH_DBG_QP, + "process tx window query item reset qp failed:%d\n", + ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + qp->qp_reset_cnt++; + return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR; +} + +static enum zxdh_status_code +process_retry_ack_err(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info) +{ + int ret; + struct ibv_qp *ib_qp; + struct zxdh_uqp *iwuqp; + struct zxdh_rdma_qpc qpc = { 0 }; + struct zxdh_rdma_qpc qpc_req_cmd = { 0 }; + + iwuqp = container_of(qp, struct zxdh_uqp, qp); + + ib_qp = &iwuqp->vqp.qp; + ret = zxdh_query_qpc(ib_qp, &qpc); + if (ret) { + zxdh_dbg(ZXDH_DBG_QP, "process retry ack query qpc failed:%d\n", + ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + if (!(qpc.retry_cqe_sq_opcode >= ZXDH_RETRY_CQE_SQ_OPCODE_ERR && + (qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_NAK_RNR_NAK || + qpc.recv_err_flag == ZXDH_RECV_ERR_FLAG_READ_RESP))) { + return ZXDH_ERR_RETRY_ACK_ERR; + } + if (qpc.tx_last_ack_psn != qp->cqe_last_ack_qsn) + qp->cqe_retry_cnt = 0; + + qp->cqe_last_ack_qsn = qpc.tx_last_ack_psn; + if (qp->cqe_retry_cnt >= ZXDH_QP_RETRY_COUNT) + return ZXDH_ERR_RETRY_ACK_ERR; + + memcpy(&qpc_req_cmd, &qpc, sizeof(qpc)); + qpc_req_cmd.package_err_flag = 0; + qpc_req_cmd.ack_err_flag = 0; + qpc_req_cmd.err_flag = 0; + qpc_req_cmd.retry_cqe_sq_opcode &= ZXDH_RESET_RETRY_CQE_SQ_OPCODE_ERR; + qpc_req_cmd.cur_retry_count = qpc.retry_count; + ret = zxdh_modify_qpc(ib_qp, &qpc_req_cmd, + ZXDH_PACKAGE_ERR_FLAG | ZXDH_ERR_FLAG_SET | + ZXDH_RETRY_CQE_SQ_OPCODE | + ZXDH_TX_READ_RETRY_FLAG_SET); + if (ret) { + zxdh_dbg(ZXDH_DBG_QP, + "process retry ack modify qpc failed:%d\n", ret); + return ZXDH_ERR_RETRY_ACK_ERR; + } + qp->cqe_retry_cnt++; + return ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR; +} + +static enum zxdh_status_code +zxdh_flush_sq_comp_info(struct zxdh_qp *qp, struct zxdh_cq_poll_info *info, + bool *move_cq_head) +{ + if (!ZXDH_RING_MORE_WORK(qp->sq_ring)) { + ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size) + return ZXDH_ERR_Q_EMPTY; + } + do { + __le64 *sw_wqe; + __u64 wqe_qword; + __u64 wqe_idx; + + wqe_idx = qp->sq_ring.tail; + sw_wqe = qp->sq_base[wqe_idx].elem; + get_64bit_val(sw_wqe, 0, &wqe_qword); + info->op_type = (__u8)FIELD_GET(ZXDHQPSQ_OPCODE, wqe_qword); + ZXDH_RING_SET_TAIL(qp->sq_ring, + wqe_idx + + qp->sq_wrtrk_array[wqe_idx].quanta); + + if (info->op_type != ZXDH_OP_TYPE_NOP) { + info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + break; + } + } while (1); + qp->sq_flush_seen = true; + if (!ZXDH_RING_MORE_WORK(qp->sq_ring)) { + qp->sq_flush_complete = true; + ZXDH_RING_INIT(qp->sq_ring, qp->sq_ring.size) + } else + *move_cq_head = false; + return ZXDH_SUCCESS; +} + +static enum zxdh_status_code zxdh_sq_comp_info(struct zxdh_qp *qp, + struct zxdh_cq_poll_info *info, + __u32 wqe_idx, __u64 qword0, + bool *move_cq_head) +{ + enum zxdh_status_code status_code; + + switch (info->comp_status) { + case ZXDH_COMPL_STATUS_SUCCESS: + case ZXDH_COMPL_STATUS_UNKNOWN: + break; + case ZXDH_COMPL_STATUS_RETRY_ACK_ERR: + if (qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { + status_code = process_retry_ack_err(qp, info); + return (status_code == ZXDH_ERR_RETRY_ACK_ERR) ? + update_cq_poll_info(qp, info, wqe_idx, + qword0) : + status_code; + } + break; + case ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR: + if (qp->qp_type == ZXDH_QP_TYPE_ROCE_RC) { + status_code = + process_tx_window_query_item_err(qp, info); + return (status_code == ZXDH_ERR_RETRY_ACK_ERR) ? + update_cq_poll_info(qp, info, wqe_idx, + qword0) : + status_code; + } + break; + case ZXDH_COMPL_STATUS_FLUSHED: + return zxdh_flush_sq_comp_info(qp, info, move_cq_head); + default: + break; + } + return update_cq_poll_info(qp, info, wqe_idx, qword0); +} + +static enum zxdh_status_code zxdh_rq_comp_info(struct zxdh_qp *qp, + struct zxdh_cq_poll_info *info, + __u32 wqe_idx, __u64 qword2, + __u64 qword3, bool *move_cq_head) +{ + struct zxdh_uqp *iwuqp = NULL; + struct zxdh_usrq *iwusrq = NULL; + struct zxdh_srq *srq = NULL; + + if (qp->is_srq) { + iwuqp = container_of(qp, struct zxdh_uqp, qp); + iwusrq = iwuqp->srq; + srq = &iwusrq->srq; + zxdh_free_srq_wqe(srq, wqe_idx); + info->wr_id = srq->srq_wrid_array[wqe_idx]; + zxdh_get_cq_poll_info(qp, info, qword2, qword3); + } else { + if (unlikely(info->comp_status == ZXDH_COMPL_STATUS_FLUSHED || + info->comp_status == ZXDH_COMPL_STATUS_UNKNOWN)) { + if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) + return ZXDH_ERR_Q_EMPTY; + + wqe_idx = qp->rq_ring.tail; + } + info->wr_id = qp->rq_wrid_array[wqe_idx]; + zxdh_get_cq_poll_info(qp, info, qword2, qword3); + ZXDH_RING_SET_TAIL(qp->rq_ring, wqe_idx + 1); + if (info->comp_status == ZXDH_COMPL_STATUS_FLUSHED) { + qp->rq_flush_seen = true; + if (!ZXDH_RING_MORE_WORK(qp->rq_ring)) + qp->rq_flush_complete = true; + else + *move_cq_head = false; + } + } + return ZXDH_SUCCESS; +} + +/** + * zxdh_cq_poll_cmpl - get cq completion info + * @cq: hw cq + * @info: cq poll information returned + */ +enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + struct zxdh_cq_poll_info *info) +{ + __u64 comp_ctx, qword0, qword2, qword3; + __le64 *cqe; + struct zxdh_qp *qp; + struct zxdh_ring *pring = NULL; + __u32 wqe_idx, q_type; + int ret_code; + bool move_cq_head = true; + __u8 polarity; + + cqe = get_current_cqe(cq); + + get_64bit_val(cqe, 0, &qword0); + polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0); + if (polarity != cq->polarity) + return ZXDH_ERR_Q_EMPTY; + + /* Ensure CQE contents are read after valid bit is checked */ + udma_from_device_barrier(); + get_64bit_val(cqe, 8, &comp_ctx); + get_64bit_val(cqe, 16, &qword2); + get_64bit_val(cqe, 24, &qword3); + + qp = (struct zxdh_qp *)(unsigned long)comp_ctx; + if (unlikely(!qp || qp->destroy_pending)) { + ret_code = ZXDH_ERR_Q_DESTROYED; + goto exit; + } + + info->qp_handle = (zxdh_qp_handle)(unsigned long)qp; + q_type = (__u8)FIELD_GET(ZXDH_CQ_SQ, qword0); + info->solicited_event = (bool)FIELD_GET(ZXDHCQ_SOEVENT, qword0); + wqe_idx = (__u32)FIELD_GET(ZXDH_CQ_WQEIDX, qword0); + info->error = (bool)FIELD_GET(ZXDH_CQ_ERROR, qword0); + info->major_err = FIELD_GET(ZXDH_CQ_MAJERR, qword0); + info->minor_err = FIELD_GET(ZXDH_CQ_MINERR, qword0); + + /* Set the min error to standard flush error code for remaining cqes */ + if (unlikely(info->error && info->major_err == ZXDH_FLUSH_MAJOR_ERR && + info->minor_err != FLUSH_GENERAL_ERR)) { + qword0 &= ~ZXDH_CQ_MINERR; + qword0 |= FIELD_PREP(ZXDH_CQ_MINERR, FLUSH_GENERAL_ERR); + set_64bit_val(cqe, 0, qword0); + } + build_comp_status(q_type, info); + + info->qp_id = (__u32)FIELD_GET(ZXDHCQ_QPID, qword2); + info->imm_valid = false; + switch (q_type) { + case ZXDH_CQE_QTYPE_SQ: + ret_code = zxdh_sq_comp_info(qp, info, wqe_idx, qword0, + &move_cq_head); + pring = &qp->sq_ring; + break; + case ZXDH_CQE_QTYPE_RQ: + ret_code = zxdh_rq_comp_info(qp, info, wqe_idx, qword2, qword3, + &move_cq_head); + pring = &qp->rq_ring; + break; + default: + zxdh_dbg(ZXDH_DBG_CQ, "zxdh get cqe type unknown!\n"); + ret_code = ZXDH_ERR_Q_DESTROYED; + break; + } +exit: + if (move_cq_head) { + __u64 cq_shadow_temp; + + ZXDH_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); + if (!ZXDH_RING_CURRENT_HEAD(cq->cq_ring)) + cq->polarity ^= 1; + + ZXDH_RING_MOVE_TAIL(cq->cq_ring); + cq->cqe_rd_cnt++; + get_64bit_val(cq->shadow_area, 0, &cq_shadow_temp); + cq_shadow_temp &= ~ZXDH_CQ_DBSA_CQEIDX; + cq_shadow_temp |= + FIELD_PREP(ZXDH_CQ_DBSA_CQEIDX, cq->cqe_rd_cnt); + set_64bit_val(cq->shadow_area, 0, cq_shadow_temp); + } else { + qword0 &= ~ZXDH_CQ_WQEIDX; + qword0 |= FIELD_PREP(ZXDH_CQ_WQEIDX, pring->tail); + set_64bit_val(cqe, 0, qword0); + } + + return ret_code; +} + +/** + * zxdh_qp_round_up - return round up qp wq depth + * @wqdepth: wq depth in quanta to round up + */ +int zxdh_qp_round_up(__u32 wqdepth) +{ + int scount = 1; + + if (wqdepth == 0) + return 0; + + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + + return ++wqdepth; +} + +/** + * zxdh_cq_round_up - return round up cq wq depth + * @wqdepth: wq depth in quanta to round up + */ +int zxdh_cq_round_up(__u32 wqdepth) +{ + int scount = 1; + + if (wqdepth == 0) + return 0; + + for (wqdepth--; scount <= 16; scount *= 2) + wqdepth |= wqdepth >> scount; + + return ++wqdepth; +} + +/** + * zxdh_get_rq_wqe_shift - get shift count for maximum rq wqe size + * @sge: Maximum Scatter Gather Elements wqe + * @shift: Returns the shift needed based on sge + * + * Shift can be used to left shift the rq wqe size based on number of SGEs. + * For 1 SGE, shift = 1 (wqe size of 2*16 bytes). + * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes). + * For 4-7 SGE's Shift of 3. + * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes). + */ +void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift) +{ + *shift = 0; //16bytes RQE, need to confirm configuration + if (sge < 2) + *shift = 1; + else if (sge < 4) + *shift = 2; + else if (sge < 8) + *shift = 3; + else if (sge < 16) + *shift = 4; + else + *shift = 5; +} + +/** + * zxdh_get_sq_wqe_shift - get shift count for maximum wqe size + * @sge: Maximum Scatter Gather Elements wqe + * @inline_data: Maximum inline data size + * @shift: Returns the shift needed based on sge + * + * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. + * To surport WR with imm_data,shift = 1 (wqe size of 2*32 bytes). + * For 2-7 SGEs or 24 < inline data <= 86, shift = 2 (wqe size of 4*32 bytes). + * Otherwise (wqe size of 256 bytes). + */ +void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift) +{ + *shift = 1; + + if (sge > 1 || inline_data > 24) { + if (sge < 8 && inline_data <= 86) + *shift = 2; + else + *shift = 3; + } +} + +/* + * zxdh_get_sqdepth - get SQ depth (quanta) + * @dev_attrs: qp HW attributes + * @sq_size: SQ size + * @shift: shift which determines size of WQE + * @sqdepth: depth of SQ + * + */ +enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs, + __u32 sq_size, __u8 shift, + __u32 *sqdepth) +{ + if (sq_size > ZXDH_MAX_SQ_DEPTH) + return ZXDH_ERR_INVALID_SIZE; + + *sqdepth = zxdh_qp_round_up((sq_size << shift) + ZXDH_SQ_RSVD); + + if (*sqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) + *sqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; + else if (*sqdepth > dev_attrs->max_hw_wq_quanta) + return ZXDH_ERR_INVALID_SIZE; + + return 0; +} + +/* + * zxdh_get_rqdepth - get RQ depth (quanta) + * @dev_attrs: qp HW attributes + * @rq_size: RQ size + * @shift: shift which determines size of WQE + * @rqdepth: depth of RQ + */ +enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs, + __u32 rq_size, __u8 shift, + __u32 *rqdepth) +{ + *rqdepth = zxdh_qp_round_up((rq_size << shift) + ZXDH_RQ_RSVD); + + if (*rqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) + *rqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; + else if (*rqdepth > dev_attrs->max_hw_rq_quanta) + return ZXDH_ERR_INVALID_SIZE; + + return 0; +} + +static const struct zxdh_wqe_ops iw_wqe_ops = { + .iw_copy_inline_data = zxdh_copy_inline_data, + .iw_inline_data_size_to_quanta = zxdh_inline_data_size_to_quanta, + .iw_set_fragment = zxdh_set_fragment, + .iw_set_mw_bind_wqe = zxdh_set_mw_bind_wqe, +}; + +/** + * zxdh_qp_init - initialize shared qp + * @qp: hw qp (user and kernel) + * @info: qp initialization info + * + * initializes the vars used in both user and kernel mode. + * size of the wqe depends on numbers of max. fragements + * allowed. Then size of wqe * the number of wqes should be the + * amount of memory allocated for sq and rq. + */ +enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp, + struct zxdh_qp_init_info *info) +{ + enum zxdh_status_code ret_code = 0; + __u32 sq_ring_size; + __u8 sqshift, rqshift; + + qp->dev_attrs = info->dev_attrs; + if (info->max_sq_frag_cnt > qp->dev_attrs->max_hw_wq_frags || + info->max_rq_frag_cnt > qp->dev_attrs->max_hw_wq_frags) + return ZXDH_ERR_INVALID_FRAG_COUNT; + + zxdh_get_rq_wqe_shift(info->max_rq_frag_cnt, &rqshift); + zxdh_get_sq_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data, + &sqshift); + + qp->qp_caps = info->qp_caps; + qp->sq_base = info->sq; + qp->rq_base = info->rq; + qp->qp_type = info->type; + qp->shadow_area = info->shadow_area; + set_64bit_val(qp->shadow_area, 0, 0x8000); + qp->sq_wrtrk_array = info->sq_wrtrk_array; + + qp->rq_wrid_array = info->rq_wrid_array; + qp->wqe_alloc_db = info->wqe_alloc_db; + qp->qp_id = info->qp_id; + qp->sq_size = info->sq_size; + qp->push_mode = false; + qp->max_sq_frag_cnt = info->max_sq_frag_cnt; + sq_ring_size = qp->sq_size << sqshift; + ZXDH_RING_INIT(qp->sq_ring, sq_ring_size); + ZXDH_RING_INIT(qp->initial_ring, sq_ring_size); + qp->swqe_polarity = 0; + qp->swqe_polarity_deferred = 1; + qp->rwqe_polarity = 0; + qp->rwqe_signature = 0; + qp->rq_size = info->rq_size; + qp->max_rq_frag_cnt = info->max_rq_frag_cnt; + qp->max_inline_data = (info->max_inline_data == 0) ? + ZXDH_MAX_INLINE_DATA_SIZE : + info->max_inline_data; + qp->rq_wqe_size = rqshift; + ZXDH_RING_INIT(qp->rq_ring, qp->rq_size); + qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->wqe_ops = iw_wqe_ops; + return ret_code; +} + +/** + * zxdh_cq_init - initialize shared cq (user and kernel) + * @cq: hw cq + * @info: hw cq initialization info + */ +enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq, + struct zxdh_cq_init_info *info) +{ + cq->cq_base = info->cq_base; + cq->cq_id = info->cq_id; + cq->cq_size = info->cq_size; + cq->cqe_alloc_db = info->cqe_alloc_db; + cq->cq_ack_db = info->cq_ack_db; + cq->shadow_area = info->shadow_area; + cq->cqe_size = info->cqe_size; + ZXDH_RING_INIT(cq->cq_ring, cq->cq_size); + cq->polarity = 1; + cq->cqe_rd_cnt = 0; + + return 0; +} + +/** + * zxdh_clean_cq - clean cq entries + * @q: completion context + * @cq: cq to clean + */ +void zxdh_clean_cq(void *q, struct zxdh_cq *cq) +{ + __le64 *cqe; + __u64 qword0, comp_ctx; + __u32 cq_head; + __u8 polarity, temp; + + cq_head = cq->cq_ring.head; + temp = cq->polarity; + do { + if (cq->cqe_size) + cqe = ((struct zxdh_extended_cqe + *)(cq->cq_base))[cq_head] + .buf; + else + cqe = cq->cq_base[cq_head].buf; + get_64bit_val(cqe, 0, &qword0); + polarity = (__u8)FIELD_GET(ZXDH_CQ_VALID, qword0); + + if (polarity != temp) + break; + + get_64bit_val(cqe, 8, &comp_ctx); + if ((void *)(uintptr_t)comp_ctx == q) + set_64bit_val(cqe, 8, 0); + + cq_head = (cq_head + 1) % cq->cq_ring.size; + if (!cq_head) + temp ^= 1; + } while (true); +} + +/** + * zxdh_get_srq_wqe_shift - get shift count for maximum srq wqe size + * @dev_attrs: srq HW attributes + * @sge: Maximum Scatter Gather Elements wqe + * @shift: Returns the shift needed based on sge + * + * Shift can be used to left shift the srq wqe size based on number of SGEs. + * For 1 SGE, shift = 1 (wqe size of 2*16 bytes). + * For 2 or 3 SGEs, shift = 2 (wqe size of 4*16 bytes). + * For 4-7 SGE's Shift of 3. + * For 8-15 SGE's Shift of 4 otherwise (wqe size of 512 bytes). + */ +void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge, + __u8 *shift) +{ + *shift = 0; //16bytes RQE, need to confirm configuration + if (sge < 2) + *shift = 1; + else if (sge < 4) + *shift = 2; + else if (sge < 8) + *shift = 3; + else if (sge < 16) + *shift = 4; + else + *shift = 5; +} + +/* + * zxdh_get_srqdepth - get SRQ depth (quanta) + * @max_hw_rq_quanta: HW SRQ size limit + * @srq_size: SRQ size + * @shift: shift which determines size of WQE + * @srqdepth: depth of SRQ + */ +int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift, + __u32 *srqdepth) +{ + *srqdepth = zxdh_qp_round_up((srq_size << shift) + ZXDH_SRQ_RSVD); + + if (*srqdepth < (ZXDH_QP_SW_MIN_WQSIZE << shift)) + *srqdepth = ZXDH_QP_SW_MIN_WQSIZE << shift; + else if ((*srqdepth >> shift) > max_hw_srq_quanta) + return ZXDH_ERR_INVALID_SIZE; + + return 0; +} + +__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index) +{ + __le64 *wqe; + + wqe = srq->srq_base[wqe_index * srq->srq_wqe_size_multiplier].elem; + return wqe; +} + +__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx) +{ + __le16 *wqe; + __u16 wqe_idx; + + wqe_idx = srq->srq_list_ring.tail; + srq->srq_list_ring.tail++; + srq->srq_list_ring.tail %= srq->srq_list_ring.size; + *idx = srq->srq_list_ring.tail; + + if (!(*idx)) + srq->srq_list_polarity = !srq->srq_list_polarity; + + wqe = &srq->srq_list_base[wqe_idx]; + + return wqe; +} + +/** + * zxdh_srq_init - initialize srq + * @srq: hw srq (user and kernel) + * @info: srq initialization info + * + * initializes the vars used in both user and kernel mode. + * size of the wqe depends on numbers of max. fragements + * allowed. Then size of wqe * the number of wqes should be the + * amount of memory allocated for srq. + */ +enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq, + struct zxdh_srq_init_info *info) +{ + __u32 srq_ring_size; + __u8 srqshift; + + srq->dev_attrs = info->dev_attrs; + if (info->max_srq_frag_cnt > srq->dev_attrs->max_hw_wq_frags) + return -ZXDH_ERR_INVALID_FRAG_COUNT; + zxdh_get_srq_wqe_shift(srq->dev_attrs, info->max_srq_frag_cnt, + &srqshift); + srq->srq_base = info->srq_base; + srq->srq_list_base = info->srq_list_base; + srq->srq_db_base = info->srq_db_base; + srq->srq_wrid_array = info->srq_wrid_array; + srq->srq_id = info->srq_id; + srq->srq_size = info->srq_size; + srq->log2_srq_size = info->log2_srq_size; + srq->srq_list_size = info->srq_list_size; + srq->max_srq_frag_cnt = info->max_srq_frag_cnt; + srq_ring_size = srq->srq_size; + srq->srq_wqe_size = srqshift; + srq->srq_wqe_size_multiplier = 1 << srqshift; + ZXDH_RING_INIT(srq->srq_ring, srq_ring_size); + ZXDH_RING_INIT(srq->srq_list_ring, srq->srq_list_size); + srq->srq_ring.tail = srq->srq_size - 1; + srq->srq_list_polarity = 1; + zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_wqe_size_multiplier:%d srqshift:%d\n", + __func__, srq->srq_wqe_size_multiplier, srqshift); + zxdh_dbg( + ZXDH_DBG_SRQ, + "%s srq->srq_id:%d srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n", + __func__, srq->srq_id, srq->srq_base, srq->srq_list_base, + srq->srq_db_base); + zxdh_dbg(ZXDH_DBG_SRQ, + "%s srq->srq_id:%d srq_ring_size:%d srq->srq_list_size:%d\n", + __func__, srq->srq_id, srq_ring_size, srq->srq_list_size); + return 0; +} + +void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index) +{ + struct zxdh_usrq *iwusrq; + __le64 *wqe; + __u64 hdr; + + iwusrq = container_of(srq, struct zxdh_usrq, srq); + /* always called with interrupts disabled. */ + pthread_spin_lock(&iwusrq->lock); + wqe = zxdh_get_srq_wqe(srq, srq->srq_ring.tail); + srq->srq_ring.tail = wqe_index; + hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, wqe_index); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(wqe, 0, hdr); + + pthread_spin_unlock(&iwusrq->lock); + zxdh_dbg(ZXDH_DBG_SRQ, "%s srq->srq_id:%d wqe_index:%d\n", __func__, + srq->srq_id, wqe_index); +} diff --git a/providers/zrdma/zxdh_status.h b/providers/zrdma/zxdh_status.h new file mode 100644 index 000000000..cb8dbf351 --- /dev/null +++ b/providers/zrdma/zxdh_status.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ZXDH_STATUS_H +#define ZXDH_STATUS_H + +/* Error Codes */ +enum zxdh_status_code { + ZXDH_SUCCESS = 0, + ZXDH_ERR_NVM = -1, + ZXDH_ERR_NVM_CHECKSUM = -2, + ZXDH_ERR_CFG = -4, + ZXDH_ERR_PARAM = -5, + ZXDH_ERR_DEVICE_NOT_SUPPORTED = -6, + ZXDH_ERR_RESET_FAILED = -7, + ZXDH_ERR_SWFW_SYNC = -8, + ZXDH_ERR_NO_MEMORY = -9, + ZXDH_ERR_BAD_PTR = -10, + ZXDH_ERR_INVALID_PD_ID = -11, + ZXDH_ERR_INVALID_QP_ID = -12, + ZXDH_ERR_INVALID_CQ_ID = -13, + ZXDH_ERR_INVALID_CEQ_ID = -14, + ZXDH_ERR_INVALID_AEQ_ID = -15, + ZXDH_ERR_INVALID_SIZE = -16, + ZXDH_ERR_INVALID_ARP_INDEX = -17, + ZXDH_ERR_INVALID_FPM_FUNC_ID = -18, + ZXDH_ERR_QP_INVALID_MSG_SIZE = -19, + ZXDH_ERR_QP_TOOMANY_WRS_POSTED = -20, + ZXDH_ERR_INVALID_FRAG_COUNT = -21, + ZXDH_ERR_Q_EMPTY = -22, + ZXDH_ERR_INVALID_ALIGNMENT = -23, + ZXDH_ERR_FLUSHED_Q = -24, + ZXDH_ERR_INVALID_PUSH_PAGE_INDEX = -25, + ZXDH_ERR_INVALID_INLINE_DATA_SIZE = -26, + ZXDH_ERR_TIMEOUT = -27, + ZXDH_ERR_OPCODE_MISMATCH = -28, + ZXDH_ERR_CQP_COMPL_ERROR = -29, + ZXDH_ERR_INVALID_VF_ID = -30, + ZXDH_ERR_INVALID_HMCFN_ID = -31, + ZXDH_ERR_BACKING_PAGE_ERROR = -32, + ZXDH_ERR_NO_PBLCHUNKS_AVAILABLE = -33, + ZXDH_ERR_INVALID_PBLE_INDEX = -34, + ZXDH_ERR_INVALID_SD_INDEX = -35, + ZXDH_ERR_INVALID_PAGE_DESC_INDEX = -36, + ZXDH_ERR_INVALID_SD_TYPE = -37, + ZXDH_ERR_MEMCPY_FAILED = -38, + ZXDH_ERR_INVALID_HMC_OBJ_INDEX = -39, + ZXDH_ERR_INVALID_HMC_OBJ_COUNT = -40, + ZXDH_ERR_BUF_TOO_SHORT = -43, + ZXDH_ERR_BAD_IWARP_CQE = -44, + ZXDH_ERR_NVM_BLANK_MODE = -45, + ZXDH_ERR_NOT_IMPL = -46, + ZXDH_ERR_PE_DOORBELL_NOT_ENA = -47, + ZXDH_ERR_NOT_READY = -48, + ZXDH_NOT_SUPPORTED = -49, + ZXDH_ERR_FIRMWARE_API_VER = -50, + ZXDH_ERR_RING_FULL = -51, + ZXDH_ERR_MPA_CRC = -61, + ZXDH_ERR_NO_TXBUFS = -62, + ZXDH_ERR_SEQ_NUM = -63, + ZXDH_ERR_LIST_EMPTY = -64, + ZXDH_ERR_INVALID_MAC_ADDR = -65, + ZXDH_ERR_BAD_STAG = -66, + ZXDH_ERR_CQ_COMPL_ERROR = -67, + ZXDH_ERR_Q_DESTROYED = -68, + ZXDH_ERR_INVALID_FEAT_CNT = -69, + ZXDH_ERR_REG_CQ_FULL = -70, + ZXDH_ERR_VF_MSG_ERROR = -71, + ZXDH_ERR_NO_INTR = -72, + ZXDH_ERR_REG_QSET = -73, + ZXDH_ERR_FEATURES_OP = -74, + ZXDH_ERR_INVALID_FRAG_LEN = -75, + ZXDH_ERR_RETRY_ACK_ERR = -76, + ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR = -77, +}; +#endif /* ZXDH_STATUS_H */ diff --git a/providers/zrdma/zxdh_verbs.c b/providers/zrdma/zxdh_verbs.c new file mode 100644 index 000000000..924496907 --- /dev/null +++ b/providers/zrdma/zxdh_verbs.c @@ -0,0 +1,3281 @@ +// SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <config.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <errno.h> +#include <sys/param.h> +#include <sys/mman.h> +#include <netinet/in.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <malloc.h> +#include <linux/if_ether.h> +#include <infiniband/driver.h> +#include <ccan/container_of.h> + +#include "zxdh_zrdma.h" +#include "zxdh_abi.h" +#include "zxdh_verbs.h" + +uint32_t zxdh_debug_mask; + +static const unsigned int zxdh_roce_mtu[] = { + [IBV_MTU_256] = 256, [IBV_MTU_512] = 512, [IBV_MTU_1024] = 1024, + [IBV_MTU_2048] = 2048, [IBV_MTU_4096] = 4096, +}; + +static inline unsigned int mtu_enum_to_int(enum ibv_mtu mtu) +{ + return zxdh_roce_mtu[mtu]; +} + +static inline void print_fw_ver(uint64_t fw_ver, char *str, size_t len) +{ + uint16_t major, minor, sub_minor, sub_major; + + major = (fw_ver >> 48) & 0xffff; + sub_major = (fw_ver >> 32) & 0xffff; + minor = (fw_ver >> 16) & 0xffff; + sub_minor = fw_ver & 0xffff; + snprintf(str, len, "%d.%02d.%02d.%02d", major, sub_major, minor, + sub_minor); +} + +/** + * zxdh_get_inline_data - get inline_multi_sge data + * @inline_data: uint8_t* + * @ib_wr: work request ptr + * @len: sge total length + */ +static int zxdh_get_inline_data(uint8_t *inline_data, struct ibv_send_wr *ib_wr, + __u32 *len) +{ + int num = 0; + int offset = 0; + + while (num < ib_wr->num_sge) { + *len += ib_wr->sg_list[num].length; + if (*len > ZXDH_MAX_INLINE_DATA_SIZE) { + printf("err:inline bytes over max inline length\n"); + return -EINVAL; + } + memcpy(inline_data + offset, + (void *)(uintptr_t)ib_wr->sg_list[num].addr, + ib_wr->sg_list[num].length); + offset += ib_wr->sg_list[num].length; + num++; + } + return 0; +} + +/** + * zxdh_uquery_device_ex - query device attributes including extended properties + * @context: user context for the device + * @input: extensible input struct for ibv_query_device_ex verb + * @attr: extended device attribute struct + * @attr_size: size of extended device attribute struct + **/ +int zxdh_uquery_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size) +{ + struct ib_uverbs_ex_query_device_resp resp = {}; + size_t resp_size = sizeof(resp); + int ret; + + ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp, + &resp_size); + if (ret) + return ret; + + print_fw_ver(resp.base.fw_ver, attr->orig_attr.fw_ver, + sizeof(attr->orig_attr.fw_ver)); + + return 0; +} + +/** + * zxdh_uquery_port - get port attributes (msg size, lnk, mtu...) + * @context: user context of the device + * @port: port for the attributes + * @attr: to return port attributes + **/ +int zxdh_uquery_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); +} + +/** + * zxdh_ualloc_pd - allocates protection domain and return pd ptr + * @context: user context of the device + **/ +struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct zxdh_ualloc_pd_resp resp = {}; + struct zxdh_upd *iwupd; + int err; + + iwupd = malloc(sizeof(*iwupd)); + if (!iwupd) + return NULL; + + err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (err) + goto err_free; + + iwupd->pd_id = resp.pd_id; + + return &iwupd->ibv_pd; + +err_free: + free(iwupd); + errno = err; + return NULL; +} + +/** + * zxdh_ufree_pd - free pd resources + * @pd: pd to free resources + */ +int zxdh_ufree_pd(struct ibv_pd *pd) +{ + struct zxdh_upd *iwupd; + int ret; + + iwupd = container_of(pd, struct zxdh_upd, ibv_pd); + ret = ibv_cmd_dealloc_pd(pd); + if (ret) + return ret; + + free(iwupd); + + return 0; +} + +/** + * zxdh_ureg_mr - register user memory region + * @pd: pd for the mr + * @addr: user address of the memory region + * @length: length of the memory + * @hca_va: hca_va + * @access: access allowed on this mr + */ +struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access) +{ + struct zxdh_umr *umr; + struct zxdh_ureg_mr cmd; + struct zxdh_ureg_mr_resp resp = {}; + int err; + + umr = malloc(sizeof(*umr)); + if (!umr) + return NULL; + + cmd.reg_type = ZXDH_MEMREG_TYPE_MEM; + err = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &umr->vmr, + &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, + sizeof(resp)); + if (err) { + free(umr); + errno = err; + return NULL; + } + umr->acc_flags = access; + umr->host_page_size = resp.host_page_size; + umr->leaf_pbl_size = resp.leaf_pbl_size; + umr->mr_pa_pble_index = resp.mr_pa_hig; + umr->mr_pa_pble_index = (umr->mr_pa_pble_index << 32) | resp.mr_pa_low; + + return &umr->vmr.ibv_mr; +} + +/* + * zxdh_urereg_mr - re-register memory region + * @vmr: mr that was allocated + * @flags: bit mask to indicate which of the attr's of MR modified + * @pd: pd of the mr + * @addr: user address of the memory region + * @length: length of the memory + * @access: access allowed on this mr + */ +int zxdh_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, + void *addr, size_t length, int access) +{ + struct zxdh_urereg_mr cmd = {}; + struct ib_uverbs_rereg_mr_resp resp; + + cmd.reg_type = ZXDH_MEMREG_TYPE_MEM; + return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, + access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, + sizeof(resp)); +} + +/** + * zxdh_udereg_mr - re-register memory region + * @vmr: mr that was allocated + */ +int zxdh_udereg_mr(struct verbs_mr *vmr) +{ + int ret; + + ret = ibv_cmd_dereg_mr(vmr); + if (ret) + return ret; + + free(vmr); + + return 0; +} + +/** + * zxdh_ualloc_mw - allocate memory window + * @pd: protection domain + * @type: memory window type + */ +struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) +{ + struct ibv_mw *mw; + struct ibv_alloc_mw cmd; + struct ib_uverbs_alloc_mw_resp resp; + + mw = calloc(1, sizeof(*mw)); + if (!mw) + return NULL; + + if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, + sizeof(resp))) { + free(mw); + return NULL; + } + + return mw; +} + +/** + * zxdh_ubind_mw - bind a memory window + * @qp: qp to post WR + * @mw: memory window to bind + * @mw_bind: bind info + */ +int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, + struct ibv_mw_bind *mw_bind) +{ + struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; + struct verbs_mr *vmr = verbs_get_mr(bind_info->mr); + struct zxdh_umr *umr = container_of(vmr, struct zxdh_umr, vmr); + struct ibv_send_wr wr = {}; + struct ibv_send_wr *bad_wr; + int err; + + if (vmr->mr_type != IBV_MR_TYPE_MR) + return -ENOTSUP; + + if (umr->acc_flags & IBV_ACCESS_ZERO_BASED) + return -EINVAL; + + if (mw->type != IBV_MW_TYPE_1) + return -EINVAL; + + wr.opcode = IBV_WR_BIND_MW; + wr.bind_mw.bind_info = mw_bind->bind_info; + wr.bind_mw.mw = mw; + wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); + + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + + err = zxdh_upost_send(qp, &wr, &bad_wr); + if (!err) + mw->rkey = wr.bind_mw.rkey; + + return err; +} + +/** + * zxdh_udealloc_mw - deallocate memory window + * @mw: memory window to dealloc + */ +int zxdh_udealloc_mw(struct ibv_mw *mw) +{ + int ret; + + ret = ibv_cmd_dealloc_mw(mw); + if (ret) + return ret; + free(mw); + + return 0; +} + +static void *zxdh_alloc_hw_buf(size_t size) +{ + void *buf; + + buf = memalign(ZXDH_HW_PAGE_SIZE, size); + + if (!buf) + return NULL; + if (ibv_dontfork_range(buf, size)) { + free(buf); + return NULL; + } + + return buf; +} + +static void zxdh_free_hw_buf(void *buf, size_t size) +{ + ibv_dofork_range(buf, size); + free(buf); +} + +/** + * get_cq_size - returns actual cqe needed by HW + * @ncqe: minimum cqes requested by application + */ +static inline int get_cq_size(int ncqe) +{ + if (ncqe < ZXDH_U_MINCQ_SIZE) + ncqe = ZXDH_U_MINCQ_SIZE; + return ncqe; +} + +static inline size_t get_cq_total_bytes(__u32 cq_size) +{ + return roundup(cq_size * sizeof(struct zxdh_cqe), ZXDH_HW_PAGE_SIZE); +} + +/** + * ucreate_cq - zxdh util function to create a CQ + * @context: ibv context + * @attr_ex: CQ init attributes + * @ext_cq: flag to create an extendable or normal CQ + */ +static struct ibv_cq_ex *ucreate_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex, + bool ext_cq) +{ + struct zxdh_cq_init_info info = {}; + struct zxdh_ureg_mr reg_mr_cmd = {}; + struct zxdh_ucreate_cq_ex cmd = {}; + struct zxdh_ucreate_cq_ex_resp resp = {}; + struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; + struct zxdh_ureg_mr reg_mr_shadow_cmd = {}; + struct ib_uverbs_reg_mr_resp reg_mr_shadow_resp = {}; + struct zxdh_dev_attrs *dev_attrs; + struct zxdh_uvcontext *iwvctx; + struct zxdh_ucq *iwucq; + size_t total_size; + __u32 cq_pages; + int ret, ncqe; + __u64 resize_supported; + + iwvctx = container_of(context, struct zxdh_uvcontext, ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + if (attr_ex->cqe < ZXDH_MIN_CQ_SIZE || + attr_ex->cqe > dev_attrs->max_hw_cq_size) { + errno = EINVAL; + return NULL; + } + + info.cq_size = get_cq_size(attr_ex->cqe); + info.cq_size = zxdh_cq_round_up(info.cq_size); + if (info.cq_size > dev_attrs->max_hw_cq_size) { + errno = EINVAL; + return NULL; + } + + /* save the cqe requested by application */ + ncqe = attr_ex->cqe; + iwucq = calloc(1, sizeof(*iwucq)); + if (!iwucq) + return NULL; + + ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { + errno = ret; + free(iwucq); + return NULL; + } + + iwucq->resize_enable = false; + iwucq->comp_vector = attr_ex->comp_vector; + list_head_init(&iwucq->resize_list); + total_size = get_cq_total_bytes(info.cq_size); + cq_pages = total_size >> ZXDH_HW_PAGE_SHIFT; + resize_supported = dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE; + + if (!resize_supported) + total_size = (cq_pages << ZXDH_HW_PAGE_SHIFT) + + ZXDH_DB_SHADOW_AREA_SIZE; + + iwucq->buf_size = total_size; + info.cq_base = zxdh_alloc_hw_buf(total_size); + if (!info.cq_base) + goto err_cq_base; + + memset(info.cq_base, 0, total_size); + reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; + reg_mr_cmd.cq_pages = cq_pages; + + ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, total_size, + (uintptr_t)info.cq_base, IBV_ACCESS_LOCAL_WRITE, + &iwucq->vmr, ®_mr_cmd.ibv_cmd, + sizeof(reg_mr_cmd), ®_mr_resp, + sizeof(reg_mr_resp)); + if (ret) { + errno = ret; + goto err_dereg_mr; + } + + iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; + + if (resize_supported) { + info.shadow_area = zxdh_alloc_hw_buf(ZXDH_DB_SHADOW_AREA_SIZE); + if (!info.shadow_area) + goto err_dereg_mr; + + memset(info.shadow_area, 0, ZXDH_DB_SHADOW_AREA_SIZE); + reg_mr_shadow_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; + reg_mr_shadow_cmd.cq_pages = 1; + + ret = ibv_cmd_reg_mr( + &iwvctx->iwupd->ibv_pd, info.shadow_area, + ZXDH_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, + IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area, + ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), + ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); + if (ret) { + errno = ret; + goto err_dereg_shadow; + } + + iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; + } else { + info.shadow_area = (__le64 *)((__u8 *)info.cq_base + + (cq_pages << ZXDH_HW_PAGE_SHIFT)); + } + + attr_ex->cqe = info.cq_size; + cmd.user_cq_buf = (__u64)((uintptr_t)info.cq_base); + cmd.user_shadow_area = (__u64)((uintptr_t)info.shadow_area); + + ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq, + &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, + sizeof(resp), 0); + if (ret) { + errno = ret; + goto err_dereg_shadow; + } + + if (ext_cq) + zxdh_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); + info.cq_id = resp.cq_id; + /* Do not report the cqe's burned by HW */ + iwucq->verbs_cq.cq.cqe = ncqe; + + info.cqe_alloc_db = + (__u32 *)((__u8 *)iwvctx->cq_db + ZXDH_DB_CQ_OFFSET); + zxdh_cq_init(&iwucq->cq, &info); + + return &iwucq->verbs_cq.cq_ex; + +err_dereg_shadow: + ibv_cmd_dereg_mr(&iwucq->vmr); + if (iwucq->vmr_shadow_area.ibv_mr.handle) { + ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area); + if (resize_supported) + zxdh_free_hw_buf(info.shadow_area, + ZXDH_DB_SHADOW_AREA_SIZE); + } +err_dereg_mr: + zxdh_free_hw_buf(info.cq_base, total_size); +err_cq_base: + pthread_spin_destroy(&iwucq->lock); + + free(iwucq); + + return NULL; +} + +struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ibv_cq_init_attr_ex attr_ex = { + .cqe = cqe, + .channel = channel, + .comp_vector = comp_vector, + }; + struct ibv_cq_ex *ibvcq_ex; + + ibvcq_ex = ucreate_cq(context, &attr_ex, false); + + return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; +} + +struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex) +{ + if (attr_ex->wc_flags & ~ZXDH_CQ_SUPPORTED_WC_FLAGS) { + errno = EOPNOTSUPP; + return NULL; + } + + return ucreate_cq(context, attr_ex, true); +} + +/** + * zxdh_free_cq_buf - free memory for cq buffer + * @cq_buf: cq buf to free + */ +static void zxdh_free_cq_buf(struct zxdh_cq_buf *cq_buf) +{ + ibv_cmd_dereg_mr(&cq_buf->vmr); + zxdh_free_hw_buf(cq_buf->cq.cq_base, + get_cq_total_bytes(cq_buf->cq.cq_size)); + free(cq_buf); +} + +/** + * zxdh_process_resize_list - process the cq list to remove buffers + * @iwucq: cq which owns the list + * @lcqe_buf: cq buf where the last cqe is found + */ +static int zxdh_process_resize_list(struct zxdh_ucq *iwucq, + struct zxdh_cq_buf *lcqe_buf) +{ + struct zxdh_cq_buf *cq_buf, *next; + int cq_cnt = 0; + + list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) { + if (cq_buf == lcqe_buf) + return cq_cnt; + + list_del(&cq_buf->list); + zxdh_free_cq_buf(cq_buf); + cq_cnt++; + } + + return cq_cnt; +} + +/** + * zxdh_udestroy_cq - destroys cq + * @cq: ptr to cq to be destroyed + */ +int zxdh_udestroy_cq(struct ibv_cq *cq) +{ + struct zxdh_dev_attrs *dev_attrs; + struct zxdh_uvcontext *iwvctx; + struct zxdh_ucq *iwucq; + __u64 cq_shadow_temp; + int ret; + + iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); + iwvctx = container_of(cq->context, struct zxdh_uvcontext, + ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + ret = pthread_spin_destroy(&iwucq->lock); + if (ret) + goto err; + + get_64bit_val(iwucq->cq.shadow_area, 0, &cq_shadow_temp); + + zxdh_process_resize_list(iwucq, NULL); + ret = ibv_cmd_destroy_cq(cq); + if (ret) + goto err; + + ibv_cmd_dereg_mr(&iwucq->vmr); + zxdh_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); + + if (dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE) { + ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area); + zxdh_free_hw_buf(iwucq->cq.shadow_area, + ZXDH_DB_SHADOW_AREA_SIZE); + } + free(iwucq); + return 0; + +err: + return ret; +} + +int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) +{ + struct ibv_modify_cq cmd = {}; + + return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); +} + +static enum ibv_wc_status +zxdh_err_to_ib_wc_status(__u32 opcode) +{ + switch (opcode) { + case ZXDH_RX_WQE_LEN_ERR: + return IBV_WC_LOC_LEN_ERR; + case ZXDH_TX_ACK_SYS_TOP_VADDR_LEN_CHECK_ERR: + case ZXDH_TX_ACK_SYS_TOP_LKEY_CHECK_ERR: + case ZXDH_TX_ACK_SYS_TOP_ACCESS_RIGHT_CHECK_ERR: + case ZXDH_RX_MR_MW_STATE_FREE_ERR: + case ZXDH_RX_MR_MW_STATE_INVALID_ERR: + case ZXDH_RX_MR_MW_PD_CHECK_ERR: + case ZXDH_RX_MR_MW_KEY_CHECK_ERR: + case ZXDH_RX_MR_MW_STAG_INDEX_CHECK_ERR: + case ZXDH_RX_MR_MW_BOUNDARY_CHECK_ERR: + case ZXDH_RX_MR_MW_0STAG_INDEX_CHECK_ERR: + case ZXDH_RX_MW_STATE_INVALID_ERR: + case ZXDH_RX_MW_PD_CHECK_ERR: + case ZXDH_RX_MW_STAG_INDEX_CHECK_ERR: + case ZXDH_RX_MW_SHARE_MR_CHECK_ERR: + case ZXDH_RX_MR_PD_CHECK_ERR: + case ZXDH_RX_MR_SHARE_MR_CHECK_ERR: + case ZXDH_RX_MR_MW_ACCESS_CHECK_ERR: + return IBV_WC_LOC_PROT_ERR; + case ZXDH_TX_PARSE_TOP_WQE_FLUSH: + return IBV_WC_WR_FLUSH_ERR; + case ZXDH_TX_ACK_SYS_TOP_NAK_INVALID_REQ: + return IBV_WC_REM_INV_REQ_ERR; + case ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_ACCESS_ERR: + case ZXDH_RX_MW_RKEY_CHECK_ERR: + case ZXDH_RX_MR_RKEY_CHECK_ERR: + return IBV_WC_REM_ACCESS_ERR; + case ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_OPERATIONAL_ERR: + return IBV_WC_REM_OP_ERR; + case ZXDH_TX_ACK_SYS_TOP_NAK_RETRY_LIMIT: + case ZXDH_TX_ACK_SYS_TOP_READ_RETRY_LIMIT: + case ZXDH_TX_ACK_SYS_TOP_TIMEOUT_RETRY_LIMIT: + return IBV_WC_RETRY_EXC_ERR; + case ZXDH_TX_ACK_SYS_TOP_RNR_RETRY_LIMIT: + return IBV_WC_RNR_RETRY_EXC_ERR; + case ZXDH_TX_PARSE_TOP_AXI_ERR: + case ZXDH_RX_AXI_RESP_ERR: + return IBV_WC_FATAL_ERR; + default: + return IBV_WC_GENERAL_ERR; + } +} + +/** + * zxdh_process_cqe_ext - process current cqe for extended CQ + * @cur_cqe - current cqe info + */ +static inline void zxdh_process_cqe_ext(struct zxdh_cq_poll_info *cur_cqe) +{ + struct zxdh_ucq *iwucq = + container_of(cur_cqe, struct zxdh_ucq, cur_cqe); + struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; + + ibvcq_ex->wr_id = cur_cqe->wr_id; + if (cur_cqe->error) + ibvcq_ex->status = + zxdh_err_to_ib_wc_status(cur_cqe->major_err << 16 | + cur_cqe->minor_err); + else + ibvcq_ex->status = IBV_WC_SUCCESS; +} + +/** + * zxdh_process_cqe - process current cqe info + * @entry - ibv_wc object to fill in for non-extended CQ + * @cur_cqe - current cqe info + */ +static inline void zxdh_process_cqe(struct ibv_wc *entry, + struct zxdh_cq_poll_info *cur_cqe) +{ + struct zxdh_qp *qp; + struct ibv_qp *ib_qp; + + entry->wc_flags = 0; + entry->wr_id = cur_cqe->wr_id; + entry->qp_num = cur_cqe->qp_id; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + if (cur_cqe->error) { + entry->status = + zxdh_err_to_ib_wc_status(cur_cqe->major_err << 16 | + cur_cqe->minor_err); + entry->vendor_err = + cur_cqe->major_err << 16 | cur_cqe->minor_err; + } else { + entry->status = IBV_WC_SUCCESS; + } + + if (cur_cqe->imm_valid) { + entry->imm_data = htonl(cur_cqe->imm_data); + entry->wc_flags |= IBV_WC_WITH_IMM; + } + + switch (cur_cqe->op_type) { + case ZXDH_OP_TYPE_SEND: + case ZXDH_OP_TYPE_SEND_WITH_IMM: + case ZXDH_OP_TYPE_SEND_INV: + case ZXDH_OP_TYPE_UD_SEND: + case ZXDH_OP_TYPE_UD_SEND_WITH_IMM: + entry->opcode = IBV_WC_SEND; + break; + case ZXDH_OP_TYPE_WRITE: + case ZXDH_OP_TYPE_WRITE_WITH_IMM: + entry->opcode = IBV_WC_RDMA_WRITE; + break; + case ZXDH_OP_TYPE_READ: + entry->opcode = IBV_WC_RDMA_READ; + break; + case ZXDH_OP_TYPE_BIND_MW: + entry->opcode = IBV_WC_BIND_MW; + break; + case ZXDH_OP_TYPE_LOCAL_INV: + entry->opcode = IBV_WC_LOCAL_INV; + break; + case ZXDH_OP_TYPE_REC: + entry->opcode = IBV_WC_RECV; + if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { + entry->invalidated_rkey = cur_cqe->inv_stag; + entry->wc_flags |= IBV_WC_WITH_INV; + } + break; + case ZXDH_OP_TYPE_REC_IMM: + entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; + if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { + entry->invalidated_rkey = cur_cqe->inv_stag; + entry->wc_flags |= IBV_WC_WITH_INV; + } + break; + default: + entry->status = IBV_WC_GENERAL_ERR; + return; + } + + if (ib_qp->qp_type == IBV_QPT_UD) { + entry->src_qp = cur_cqe->ud_src_qpn; + entry->wc_flags |= IBV_WC_GRH; + entry->sl = cur_cqe->ipv4 ? 2 : 1; + } else { + entry->src_qp = cur_cqe->qp_id; + } + entry->byte_len = cur_cqe->bytes_xfered; +} + +/** + * zxdh_poll_one - poll one entry of the CQ + * @cq: cq to poll + * @cur_cqe: current CQE info to be filled in + * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ + * + * Returns the internal zxdh device error code or 0 on success + */ +static int zxdh_poll_one(struct zxdh_cq *cq, + struct zxdh_cq_poll_info *cur_cqe, + struct ibv_wc *entry) +{ + int ret = zxdh_cq_poll_cmpl(cq, cur_cqe); + + if (ret) + return ret; + + if (entry) + zxdh_process_cqe(entry, cur_cqe); + else + zxdh_process_cqe_ext(cur_cqe); + + return 0; +} + +/** + * __zxdh_upoll_resize_cq - zxdh util function to poll device CQ + * @iwucq: zxdh cq to poll + * @num_entries: max cq entries to poll + * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ + * + * Returns non-negative value equal to the number of completions + * found. On failure, -EINVAL + */ +static int __zxdh_upoll_resize_cq(struct zxdh_ucq *iwucq, int num_entries, + struct ibv_wc *entry) +{ + struct zxdh_cq_buf *cq_buf, *next; + struct zxdh_cq_buf *last_buf = NULL; + struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe; + bool cq_new_cqe = false; + int resized_bufs = 0; + int npolled = 0; + int ret; + + /* go through the list of previously resized CQ buffers */ + list_for_each_safe(&iwucq->resize_list, cq_buf, next, list) { + while (npolled < num_entries) { + ret = zxdh_poll_one(&cq_buf->cq, cur_cqe, + entry ? entry + npolled : NULL); + if (ret == ZXDH_SUCCESS) { + ++npolled; + cq_new_cqe = true; + continue; + } + if (ret == ZXDH_ERR_Q_EMPTY) + break; + if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR) + break; + /* QP using the CQ is destroyed. Skip reporting this CQE */ + if (ret == ZXDH_ERR_Q_DESTROYED) { + cq_new_cqe = true; + continue; + } + printf("__zrdma_upoll_cq resize goto error failed\n"); + goto error; + } + + /* save the resized CQ buffer which received the last cqe */ + if (cq_new_cqe) + last_buf = cq_buf; + cq_new_cqe = false; + } + + /* check the current CQ for new cqes */ + while (npolled < num_entries) { + ret = zxdh_poll_one(&iwucq->cq, cur_cqe, + entry ? entry + npolled : NULL); + if (ret == ZXDH_SUCCESS) { + ++npolled; + cq_new_cqe = true; + continue; + } + if (ret == ZXDH_ERR_Q_EMPTY) + break; + if (ret == ZXDH_ERR_RETRY_ACK_NOT_EXCEED_ERR) + break; + /* QP using the CQ is destroyed. Skip reporting this CQE */ + if (ret == ZXDH_ERR_Q_DESTROYED) { + cq_new_cqe = true; + continue; + } + printf("__zrdma_upoll_cq goto error failed\n"); + goto error; + } + if (cq_new_cqe) + /* all previous CQ resizes are complete */ + resized_bufs = zxdh_process_resize_list(iwucq, NULL); + else if (last_buf) + /* only CQ resizes up to the last_buf are complete */ + resized_bufs = zxdh_process_resize_list(iwucq, last_buf); + if (resized_bufs) + /* report to the HW the number of complete CQ resizes */ + zxdh_cq_set_resized_cnt(&iwucq->cq, resized_bufs); + + return npolled; + +error: + + return -EINVAL; +} + +/** + * __zxdh_upoll_current_cq - zxdh util function to poll device CQ + * @iwucq: zxdh cq to poll + * @num_entries: max cq entries to poll + * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ + * + * Returns non-negative value equal to the number of completions + * found. On failure, -EINVAL + */ +static int __zxdh_upoll_curent_cq(struct zxdh_ucq *iwucq, int num_entries, + struct ibv_wc *entry) +{ + struct zxdh_cq_poll_info *cur_cqe = &iwucq->cur_cqe; + int npolled = 0; + int ret; + + /* check the current CQ for new cqes */ + while (npolled < num_entries) { + ret = zxdh_poll_one(&iwucq->cq, cur_cqe, + entry ? entry + npolled : NULL); + if (unlikely(ret != ZXDH_SUCCESS)) + break; + ++npolled; + } + return npolled; +} + +/** + * zxdh_upoll_cq - verb API callback to poll device CQ + * @cq: ibv_cq to poll + * @num_entries: max cq entries to poll + * @entry: pointer to array of ibv_wc objects to be filled in for each completion + * + * Returns non-negative value equal to the number of completions + * found and a negative error code on failure + */ +int zxdh_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) +{ + struct zxdh_ucq *iwucq; + int ret; + + iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return -ret; + + if (likely(!iwucq->resize_enable)) + ret = __zxdh_upoll_curent_cq(iwucq, num_entries, entry); + else + ret = __zxdh_upoll_resize_cq(iwucq, num_entries, entry); + + pthread_spin_unlock(&iwucq->lock); + + return ret; +} + +/** + * zxdh_start_poll - verb_ex API callback to poll batch of WC's + * @ibvcq_ex: ibv extended CQ + * @attr: attributes (not used) + * + * Start polling batch of work completions. Return 0 on success, ENONENT when + * no completions are available on CQ. And an error code on errors + */ +static int zxdh_start_poll(struct ibv_cq_ex *ibvcq_ex, + struct ibv_poll_cq_attr *attr) +{ + struct zxdh_ucq *iwucq; + int ret; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return ret; + + if (!iwucq->resize_enable) { + ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + } else { + ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + } + + /* No Completions on CQ */ + if (!ret) + ret = ENOENT; + + pthread_spin_unlock(&iwucq->lock); + + return ret; +} + +/** + * zxdh_next_poll - verb_ex API callback to get next WC + * @ibvcq_ex: ibv extended CQ + * + * Return 0 on success, ENONENT when no completions are available on CQ. + * And an error code on errors + */ +static int zxdh_next_poll(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq; + int ret; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + if (!iwucq->resize_enable) { + ret = __zxdh_upoll_curent_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + } else { + ret = __zxdh_upoll_resize_cq(iwucq, 1, NULL); + if (ret == 1) + return 0; + } + + /* No Completions on CQ */ + if (!ret) + ret = ENOENT; + + return ret; +} + +/** + * zxdh_end_poll - verb_ex API callback to end polling of WC's + * @ibvcq_ex: ibv extended CQ + */ +static void zxdh_end_poll(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + pthread_spin_unlock(&iwucq->lock); +} + +/** + * zxdh_wc_read_completion_ts - Get completion timestamp + * @ibvcq_ex: ibv extended CQ + * + * Get completion timestamp in HCA clock units + */ +static uint64_t zxdh_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); +#define HCA_CORE_CLOCK_800_MHZ 800 + + return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ; +} + +/** + * zxdh_wc_read_completion_wallclock_ns - Get completion timestamp in ns + * @ibvcq_ex: ibv extended CQ + * + * Get completion timestamp from current completion in wall clock nanoseconds + */ +static uint64_t zxdh_wc_read_completion_wallclock_ns(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + /* RTT is in usec */ + return (uint64_t)iwucq->cur_cqe.tcp_seq_num_rtt * 1000; +} + +static enum ibv_wc_opcode zxdh_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + switch (iwucq->cur_cqe.op_type) { + case ZXDH_OP_TYPE_WRITE: + case ZXDH_OP_TYPE_WRITE_WITH_IMM: + return IBV_WC_RDMA_WRITE; + case ZXDH_OP_TYPE_READ: + return IBV_WC_RDMA_READ; + case ZXDH_OP_TYPE_SEND: + case ZXDH_OP_TYPE_SEND_WITH_IMM: + case ZXDH_OP_TYPE_SEND_INV: + case ZXDH_OP_TYPE_UD_SEND: + case ZXDH_OP_TYPE_UD_SEND_WITH_IMM: + return IBV_WC_SEND; + case ZXDH_OP_TYPE_BIND_MW: + return IBV_WC_BIND_MW; + case ZXDH_OP_TYPE_REC: + return IBV_WC_RECV; + case ZXDH_OP_TYPE_REC_IMM: + return IBV_WC_RECV_RDMA_WITH_IMM; + case ZXDH_OP_TYPE_LOCAL_INV: + return IBV_WC_LOCAL_INV; + } + + return 0; +} + +static uint32_t zxdh_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_cq_poll_info *cur_cqe; + struct zxdh_ucq *iwucq; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + + return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : + 0; +} + +static unsigned int zxdh_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_cq_poll_info *cur_cqe; + struct zxdh_ucq *iwucq; + struct zxdh_qp *qp; + struct ibv_qp *ib_qp; + unsigned int wc_flags = 0; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + if (cur_cqe->imm_valid) + wc_flags |= IBV_WC_WITH_IMM; + + if (ib_qp->qp_type == IBV_QPT_UD) { + wc_flags |= IBV_WC_GRH; + } else { + if (cur_cqe->stag_invalid_set) { + switch (cur_cqe->op_type) { + case ZXDH_OP_TYPE_REC: + wc_flags |= IBV_WC_WITH_INV; + break; + case ZXDH_OP_TYPE_REC_IMM: + wc_flags |= IBV_WC_WITH_INV; + break; + } + } + } + + return wc_flags; +} + +static uint32_t zxdh_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + return iwucq->cur_cqe.bytes_xfered; +} + +static __be32 zxdh_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_cq_poll_info *cur_cqe; + struct zxdh_ucq *iwucq; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + + return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; +} + +static uint32_t zxdh_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_ucq *iwucq = + container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + + return iwucq->cur_cqe.qp_id; +} + +static uint32_t zxdh_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex) +{ + struct zxdh_cq_poll_info *cur_cqe; + struct zxdh_ucq *iwucq; + struct zxdh_qp *qp; + struct ibv_qp *ib_qp; + + iwucq = container_of(ibvcq_ex, struct zxdh_ucq, verbs_cq.cq_ex); + cur_cqe = &iwucq->cur_cqe; + qp = cur_cqe->qp_handle; + ib_qp = qp->back_qp; + + return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : + cur_cqe->qp_id; +} + +static uint32_t zxdh_wc_read_slid(struct ibv_cq_ex *ibvcq_ex) +{ + return 0; +} + +static uint8_t zxdh_wc_read_sl(struct ibv_cq_ex *ibvcq_ex) +{ + return 0; +} + +static uint8_t zxdh_wc_read_dlid_path_bits(struct ibv_cq_ex *ibvcq_ex) +{ + return 0; +} + +void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq, + struct ibv_cq_init_attr_ex *attr_ex) +{ + struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; + + ibvcq_ex->start_poll = zxdh_start_poll; + ibvcq_ex->end_poll = zxdh_end_poll; + ibvcq_ex->next_poll = zxdh_next_poll; + + if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { + ibvcq_ex->read_completion_ts = zxdh_wc_read_completion_ts; + iwucq->report_rtt = true; + } + if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { + ibvcq_ex->read_completion_wallclock_ns = + zxdh_wc_read_completion_wallclock_ns; + iwucq->report_rtt = true; + } + + ibvcq_ex->read_opcode = zxdh_wc_read_opcode; + ibvcq_ex->read_vendor_err = zxdh_wc_read_vendor_err; + ibvcq_ex->read_wc_flags = zxdh_wc_read_wc_flags; + + if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) + ibvcq_ex->read_byte_len = zxdh_wc_read_byte_len; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) + ibvcq_ex->read_imm_data = zxdh_wc_read_imm_data; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) + ibvcq_ex->read_qp_num = zxdh_wc_read_qp_num; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) + ibvcq_ex->read_src_qp = zxdh_wc_read_src_qp; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_SLID) + ibvcq_ex->read_slid = zxdh_wc_read_slid; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) + ibvcq_ex->read_sl = zxdh_wc_read_sl; + if (attr_ex->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) + ibvcq_ex->read_dlid_path_bits = zxdh_wc_read_dlid_path_bits; +} + +/** + * zxdh_arm_cq - arm of cq + * @iwucq: cq to which arm + * @cq_notify: notification params + */ +static void zxdh_arm_cq(struct zxdh_ucq *iwucq, enum zxdh_cmpl_notify cq_notify) +{ + iwucq->is_armed = true; + iwucq->last_notify = cq_notify; + + zxdh_cq_request_notification(&iwucq->cq, cq_notify); +} + +/** + * zxdh_uarm_cq - callback for arm of cq + * @cq: cq to arm + * @solicited: to get notify params + */ +int zxdh_uarm_cq(struct ibv_cq *cq, int solicited) +{ + struct zxdh_ucq *iwucq; + enum zxdh_cmpl_notify cq_notify = ZXDH_CQ_COMPL_EVENT; + bool promo_event = false; + int ret; + + iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); + if (solicited) { + cq_notify = ZXDH_CQ_COMPL_SOLICITED; + } else { + if (iwucq->last_notify == ZXDH_CQ_COMPL_SOLICITED) + promo_event = true; + } + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return ret; + + if (!iwucq->is_armed || promo_event) + zxdh_arm_cq(iwucq, cq_notify); + + pthread_spin_unlock(&iwucq->lock); + + return 0; +} + +/** + * zxdh_cq_event - cq to do completion event + * @cq: cq to arm + */ +void zxdh_cq_event(struct ibv_cq *cq) +{ + struct zxdh_ucq *iwucq; + + iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); + if (pthread_spin_lock(&iwucq->lock)) + return; + + iwucq->is_armed = false; + + pthread_spin_unlock(&iwucq->lock); +} + +void *zxdh_mmap(int fd, off_t offset) +{ + void *map; + + map = mmap(NULL, ZXDH_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, offset); + if (map == MAP_FAILED) + return map; + + if (ibv_dontfork_range(map, ZXDH_HW_PAGE_SIZE)) { + munmap(map, ZXDH_HW_PAGE_SIZE); + return MAP_FAILED; + } + + return map; +} + +void zxdh_munmap(void *map) +{ + ibv_dofork_range(map, ZXDH_HW_PAGE_SIZE); + munmap(map, ZXDH_HW_PAGE_SIZE); +} + +/** + * zxdh_destroy_vmapped_qp - destroy resources for qp + * @iwuqp: qp struct for resources + */ +static int zxdh_destroy_vmapped_qp(struct zxdh_uqp *iwuqp) +{ + int ret; + + ret = ibv_cmd_destroy_qp(&iwuqp->vqp.qp); + if (ret) + return ret; + + ibv_cmd_dereg_mr(&iwuqp->vmr); + + return 0; +} + +/** + * zxdh_vmapped_qp - create resources for qp + * @iwuqp: qp struct for resources + * @pd: pd for the qp + * @attr: attributes of qp passed + * @resp: response back from create qp + * @sqdepth: depth of sq + * @rqdepth: depth of rq + * @info: info for initializing user level qp + * @abi_ver: abi version of the create qp command + */ +static int zxdh_vmapped_qp(struct zxdh_uqp *iwuqp, struct ibv_pd *pd, + struct ibv_qp_init_attr *attr, int sqdepth, + int rqdepth, struct zxdh_qp_init_info *info, + bool legacy_mode) +{ + struct zxdh_ucreate_qp cmd = {}; + size_t sqsize, rqsize, totalqpsize; + struct zxdh_ucreate_qp_resp resp = {}; + struct zxdh_ureg_mr reg_mr_cmd = {}; + struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; + int ret; + + rqsize = 0; + sqsize = roundup(sqdepth * ZXDH_QP_SQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE); + if (iwuqp->is_srq == false) { + rqsize = roundup(rqdepth * ZXDH_QP_RQE_MIN_SIZE, + ZXDH_HW_PAGE_SIZE); + totalqpsize = rqsize + sqsize + ZXDH_DB_SHADOW_AREA_SIZE; + } else { + totalqpsize = sqsize + ZXDH_DB_SHADOW_AREA_SIZE; + } + info->sq = zxdh_alloc_hw_buf(totalqpsize); + iwuqp->buf_size = totalqpsize; + + if (!info->sq) + return -ENOMEM; + + memset(info->sq, 0, totalqpsize); + if (iwuqp->is_srq == false) { + info->rq = (struct zxdh_qp_rq_quanta *)&info + ->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE]; + info->shadow_area = + info->rq[rqsize / ZXDH_QP_RQE_MIN_SIZE].elem; + reg_mr_cmd.rq_pages = rqsize >> ZXDH_HW_PAGE_SHIFT; + } else { + info->shadow_area = + (__le64 *)&info->sq[sqsize / ZXDH_QP_SQE_MIN_SIZE]; + } + reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_QP; + reg_mr_cmd.sq_pages = sqsize >> ZXDH_HW_PAGE_SHIFT; + + ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, (uintptr_t)info->sq, + IBV_ACCESS_LOCAL_WRITE, &iwuqp->vmr, + ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), + ®_mr_resp, sizeof(reg_mr_resp)); + if (ret) + goto err_dereg_mr; + + cmd.user_wqe_bufs = (__u64)((uintptr_t)info->sq); + cmd.user_compl_ctx = (__u64)(uintptr_t)&iwuqp->qp; + ret = ibv_cmd_create_qp(pd, &iwuqp->vqp.qp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, + sizeof(struct zxdh_ucreate_qp_resp)); + if (ret) + goto err_qp; + + info->sq_size = resp.actual_sq_size; + info->rq_size = resp.actual_rq_size; + info->qp_caps = resp.qp_caps; + info->qp_id = resp.qp_id; + iwuqp->zxdh_drv_opt = resp.zxdh_drv_opt; + iwuqp->vqp.qp.qp_num = resp.qp_id; + + iwuqp->send_cq = + container_of(attr->send_cq, struct zxdh_ucq, verbs_cq.cq); + iwuqp->recv_cq = + container_of(attr->recv_cq, struct zxdh_ucq, verbs_cq.cq); + iwuqp->send_cq->uqp = iwuqp; + iwuqp->recv_cq->uqp = iwuqp; + + return 0; +err_qp: + ibv_cmd_dereg_mr(&iwuqp->vmr); +err_dereg_mr: + zxdh_free_hw_buf(info->sq, iwuqp->buf_size); + return ret; +} + +static void zxdh_wr_local_inv(struct ibv_qp_ex *ibqp, uint32_t invalidate_rkey) +{ + struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); + struct ibv_send_wr wr = {}; + struct ibv_send_wr *bad_wr = NULL; + + wr.opcode = IBV_WR_LOCAL_INV; + wr.invalidate_rkey = invalidate_rkey; + + zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); +} + +static void zxdh_send_wr_send_inv(struct ibv_qp_ex *ibqp, + uint32_t invalidate_rkey) +{ + struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); + struct ibv_send_wr wr = {}; + struct ibv_send_wr *bad_wr = NULL; + + wr.opcode = IBV_WR_SEND_WITH_INV; + wr.invalidate_rkey = invalidate_rkey; + + zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); +} + +static void zxdh_wr_bind_mw(struct ibv_qp_ex *ibqp, struct ibv_mw *ibmw, + uint32_t rkey, const struct ibv_mw_bind_info *info) +{ + struct zxdh_uqp *qp = container_of(ibqp, struct zxdh_uqp, vqp.qp_ex); + struct ibv_send_wr wr = {}; + struct ibv_send_wr *bad_wr = NULL; + + if (ibmw->type != IBV_MW_TYPE_2) + return; + + wr.opcode = IBV_WR_BIND_MW; + wr.bind_mw.bind_info = *info; + wr.bind_mw.mw = ibmw; + wr.bind_mw.rkey = rkey; + + zxdh_upost_send(&qp->vqp.qp, &wr, &bad_wr); +} + +static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, + struct ibv_qp_init_attr_ex *attr_ex) +{ + struct zxdh_qp_init_info info = {}; + struct zxdh_dev_attrs *dev_attrs; + struct zxdh_uvcontext *iwvctx; + struct zxdh_uqp *iwuqp; + struct zxdh_usrq *iwusrq; + struct ibv_pd *pd = attr_ex->pd; + struct ibv_qp_init_attr *attr; + __u32 sqdepth, rqdepth; + __u8 sqshift, rqshift; + int status; + + attr = calloc(1, sizeof(*attr)); + if (!attr) + return NULL; + + memcpy(attr, attr_ex, sizeof(*attr)); + + if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { + errno = EOPNOTSUPP; + free(attr); + return NULL; + } + + iwvctx = container_of(ibv_ctx, struct zxdh_uvcontext, ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + if (attr->cap.max_send_sge > dev_attrs->max_hw_wq_frags || + attr->cap.max_recv_sge > dev_attrs->max_hw_wq_frags) { + errno = EINVAL; + free(attr); + return NULL; + } + + if (attr->cap.max_inline_data > dev_attrs->max_hw_inline) { + zxdh_dbg(ZXDH_DBG_QP, "max_inline_data over max_hw_inline\n"); + attr->cap.max_inline_data = dev_attrs->max_hw_inline; + } + + zxdh_get_sq_wqe_shift(attr->cap.max_send_sge, + attr->cap.max_inline_data, &sqshift); + status = zxdh_get_sqdepth(dev_attrs, attr->cap.max_send_wr, sqshift, + &sqdepth); + if (status) { + errno = EINVAL; + free(attr); + return NULL; + } + + zxdh_get_rq_wqe_shift(attr->cap.max_recv_sge, &rqshift); + status = zxdh_get_rqdepth(dev_attrs, attr->cap.max_recv_wr, rqshift, + &rqdepth); + if (status) { + errno = EINVAL; + free(attr); + return NULL; + } + + iwuqp = memalign(1024, sizeof(*iwuqp)); + if (!iwuqp) { + free(attr); + return NULL; + } + + memset(iwuqp, 0, sizeof(*iwuqp)); + + if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { + if (attr_ex->send_ops_flags & ~IBV_QP_EX_WITH_BIND_MW) { + errno = EOPNOTSUPP; + free(iwuqp); + free(attr); + return NULL; + } + + iwuqp->vqp.comp_mask |= VERBS_QP_EX; + if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_BIND_MW) + iwuqp->vqp.qp_ex.wr_bind_mw = zxdh_wr_bind_mw; + + if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND_WITH_INV) + iwuqp->vqp.qp_ex.wr_send_inv = zxdh_send_wr_send_inv; + + if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_LOCAL_INV) + iwuqp->vqp.qp_ex.wr_local_inv = zxdh_wr_local_inv; + } + + if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE)) + goto err_free_qp; + + info.sq_size = sqdepth >> sqshift; + info.rq_size = rqdepth >> rqshift; + attr->cap.max_send_wr = info.sq_size; + attr->cap.max_recv_wr = info.rq_size; + + info.dev_attrs = dev_attrs; + info.max_sq_frag_cnt = attr->cap.max_send_sge; + info.max_rq_frag_cnt = attr->cap.max_recv_sge; + + if (attr->srq != NULL) { + iwuqp->is_srq = true; + iwusrq = container_of(attr->srq, struct zxdh_usrq, ibv_srq); + iwuqp->srq = iwusrq; + iwuqp->qp.is_srq = true; + } + + if (iwuqp->is_srq == false) { + iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, + sizeof(*iwuqp->recv_sges)); + if (!iwuqp->recv_sges) + goto err_destroy_lock; + } + + info.wqe_alloc_db = + (__u32 *)((__u8 *)iwvctx->sq_db + ZXDH_DB_SQ_OFFSET); + info.abi_ver = iwvctx->abi_ver; + info.legacy_mode = iwvctx->legacy_mode; + info.sq_wrtrk_array = calloc(sqdepth, sizeof(*info.sq_wrtrk_array)); + if (!info.sq_wrtrk_array) + goto err_free_rsges; + + if (iwuqp->is_srq == false) { + info.rq_wrid_array = + calloc(info.rq_size, sizeof(*info.rq_wrid_array)); + if (!info.rq_wrid_array) + goto err_free_sq_wrtrk; + } + + iwuqp->sq_sig_all = attr->sq_sig_all; + iwuqp->qp_type = attr->qp_type; + if (attr->qp_type == IBV_QPT_UD) + info.type = ZXDH_QP_TYPE_ROCE_UD; + else + info.type = ZXDH_QP_TYPE_ROCE_RC; + status = zxdh_vmapped_qp(iwuqp, pd, attr, sqdepth, rqdepth, &info, + iwvctx->legacy_mode); + if (status) { + errno = status; + goto err_free_rq_wrid; + } + + iwuqp->qp.back_qp = iwuqp; + iwuqp->qp.lock = &iwuqp->lock; + info.max_sq_frag_cnt = attr->cap.max_send_sge; + info.max_rq_frag_cnt = attr->cap.max_recv_sge; + info.max_inline_data = attr->cap.max_inline_data; + if (info.type == ZXDH_QP_TYPE_ROCE_RC) { + iwuqp->qp.split_sg_list = + calloc(2 * dev_attrs->max_hw_read_sges, + sizeof(*iwuqp->qp.split_sg_list)); + if (!iwuqp->qp.split_sg_list) + goto err_free_vmap_qp; + } + status = zxdh_qp_init(&iwuqp->qp, &info); + if (status) { + errno = EINVAL; + goto err_free_sg_list; + } + iwuqp->qp.mtu = mtu_enum_to_int(IBV_MTU_1024); + attr->cap.max_send_wr = (sqdepth - ZXDH_SQ_RSVD) >> sqshift; + attr->cap.max_recv_wr = (rqdepth - ZXDH_RQ_RSVD) >> rqshift; + memcpy(attr_ex, attr, sizeof(*attr)); + free(attr); + return &iwuqp->vqp.qp; + +err_free_sg_list: + if (iwuqp->qp.split_sg_list) + free(iwuqp->qp.split_sg_list); +err_free_vmap_qp: + zxdh_destroy_vmapped_qp(iwuqp); + zxdh_free_hw_buf(info.sq, iwuqp->buf_size); +err_free_rq_wrid: + free(info.rq_wrid_array); +err_free_sq_wrtrk: + free(info.sq_wrtrk_array); +err_free_rsges: + free(iwuqp->recv_sges); +err_destroy_lock: + pthread_spin_destroy(&iwuqp->lock); +err_free_qp: + free(iwuqp); + free(attr); + + return NULL; +} + +/** + * zxdh_ucreate_qp - create qp on user app + * @pd: pd for the qp + * @attr: attributes of the qp to be created (sizes, sge, cq) + */ +struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) +{ + struct ibv_qp_init_attr_ex attrx = {}; + struct ibv_qp *qp; + + memcpy(&attrx, attr, sizeof(*attr)); + attrx.comp_mask = IBV_QP_INIT_ATTR_PD; + attrx.pd = pd; + + qp = create_qp(pd->context, &attrx); + if (qp) + memcpy(attr, &attrx, sizeof(*attr)); + + return qp; +} + +/** + * zxdh_ucreate_qp_ex - create qp_ex on user app + * @context: user context of the device + * @attr: attributes of the qp_ex to be created + */ +struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) +{ + return create_qp(context, attr); +} + +/** + * zxdh_uquery_qp - query qp for some attribute + * @qp: qp for the attributes query + * @attr: to return the attributes + * @attr_mask: mask of what is query for + * @init_attr: initial attributes during create_qp + */ +int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + + return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, + sizeof(cmd)); +} + +/** + * zxdh_clean_cqes - clean cq entries for qp + * @qp: qp for which completions are cleaned + * @iwcq: cq to be cleaned + */ +static void zxdh_clean_cqes(struct zxdh_qp *qp, struct zxdh_ucq *iwucq) +{ + struct zxdh_cq *ukcq = &iwucq->cq; + int ret; + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + return; + + zxdh_clean_cq(qp, ukcq); + pthread_spin_unlock(&iwucq->lock); +} + +static void zxdh_init_qp_indices(struct zxdh_qp *qp) +{ + __u32 sq_ring_size; + + sq_ring_size = ZXDH_RING_SIZE(qp->sq_ring); + ZXDH_RING_INIT(qp->sq_ring, sq_ring_size); + ZXDH_RING_INIT(qp->initial_ring, sq_ring_size); + qp->swqe_polarity = 0; + qp->swqe_polarity_deferred = 1; + qp->rwqe_polarity = 0; + qp->rwqe_signature = 0; + ZXDH_RING_INIT(qp->rq_ring, qp->rq_size); +} + +/** + * zxdh_umodify_qp - send qp modify to driver + * @qp: qp to modify + * @attr: attribute to modify + * @attr_mask: mask of the attribute + */ +int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) +{ + struct zxdh_uqp *iwuqp; + struct zxdh_umodify_qp_resp resp = {}; + struct ibv_modify_qp cmd = {}; + struct zxdh_umodify_qp cmd_ex = {}; + int ret; + __u16 mtu = 0; + + iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp); + if (attr_mask & IBV_QP_STATE || attr_mask & IBV_QP_RATE_LIMIT) { + ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, + sizeof(cmd_ex), &resp.ibv_resp, + sizeof(resp)); + } else { + ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); + } + + if (!ret && + (attr_mask & IBV_QP_STATE) && + attr->qp_state == IBV_QPS_RESET) { + if (iwuqp->send_cq) + zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq); + + if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) + zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); + zxdh_init_qp_indices(&iwuqp->qp); + } + + if (!ret && (attr_mask & IBV_QP_PATH_MTU) && + qp->qp_type == IBV_QPT_RC) { + mtu = mtu_enum_to_int(attr->path_mtu); + if (mtu == 0) + return -EINVAL; + iwuqp->qp.mtu = mtu; + } + if (!ret && (attr_mask & IBV_QP_SQ_PSN) && qp->qp_type == IBV_QPT_RC) { + iwuqp->qp.next_psn = attr->sq_psn; + iwuqp->qp.cqe_last_ack_qsn = attr->sq_psn - 1; + iwuqp->qp.qp_last_ack_qsn = attr->sq_psn - 1; + iwuqp->qp.cqe_retry_cnt = 0; + iwuqp->qp.qp_reset_cnt = 0; + } + return ret; +} + +static void zxdh_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) +{ + struct ib_uverbs_ex_modify_qp_resp resp = {}; + struct zxdh_umodify_qp cmd_ex = {}; + struct ibv_qp_attr attr = {}; + + attr.qp_state = IBV_QPS_ERR; + cmd_ex.sq_flush = sq_flush; + cmd_ex.rq_flush = rq_flush; + + ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, &cmd_ex.ibv_cmd, + sizeof(cmd_ex), &resp, sizeof(resp)); +} + +/** + * zxdh_udestroy_qp - destroy qp + * @qp: qp to destroy + */ +int zxdh_udestroy_qp(struct ibv_qp *qp) +{ + struct zxdh_uqp *iwuqp; + int ret; + + iwuqp = container_of(qp, struct zxdh_uqp, vqp.qp); + ret = pthread_spin_destroy(&iwuqp->lock); + if (ret) + goto err; + + iwuqp->qp.destroy_pending = true; + + ret = zxdh_destroy_vmapped_qp(iwuqp); + if (ret) + goto err; + + /* Clean any pending completions from the cq(s) */ + if (iwuqp->send_cq) + zxdh_clean_cqes(&iwuqp->qp, iwuqp->send_cq); + + if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) + zxdh_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); + + if (iwuqp->qp.sq_wrtrk_array) + free(iwuqp->qp.sq_wrtrk_array); + if (iwuqp->qp.rq_wrid_array) + free(iwuqp->qp.rq_wrid_array); + if (iwuqp->qp.split_sg_list) + free(iwuqp->qp.split_sg_list); + + zxdh_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); + free(iwuqp->recv_sges); + free(iwuqp); + return 0; + +err: + return ret; +} + +/** + * zxdh_copy_sg_list - copy sg list for qp + * @sg_list: copied into sg_list + * @sgl: copy from sgl + * @num_sges: count of sg entries + * @max_sges: count of max supported sg entries + */ +static void zxdh_copy_sg_list(struct zxdh_sge *sg_list, struct ibv_sge *sgl, + int num_sges) +{ + int i; + + for (i = 0; i < num_sges; i++) { + sg_list[i].tag_off = sgl[i].addr; + sg_list[i].len = sgl[i].length; + sg_list[i].stag = sgl[i].lkey; + } +} + +/** + * calc_type2_mw_stag - calculate type 2 MW stag + * @rkey: desired rkey of the MW + * @mw_rkey: type2 memory window rkey + * + * compute type2 memory window stag by taking lower 8 bits + * of the desired rkey and leaving 24 bits if mw->rkey unchanged + */ +static inline __u32 calc_type2_mw_stag(__u32 rkey, __u32 mw_rkey) +{ + const __u32 mask = 0xff; + + return (rkey & mask) | (mw_rkey & ~mask); +} + +/** + * zxdh_post_send - post send wr for user application + * @ib_qp: qp to post wr + * @ib_wr: work request ptr + * @bad_wr: return of bad wr if err + */ +int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + struct ibv_send_wr **bad_wr) +{ + struct zxdh_post_sq_info info; + struct zxdh_uvcontext *iwvctx; + struct zxdh_dev_attrs *dev_attrs; + enum zxdh_status_code ret = 0; + struct zxdh_uqp *iwuqp; + bool reflush = false; + int err = 0; + struct verbs_mr *vmr = NULL; + struct zxdh_umr *umr = NULL; + __u64 mr_va = 0, mw_va = 0, value_dffer = 0, mw_pa_pble_index = 0; + __u16 mr_offset = 0; + + iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp); + iwvctx = container_of(ib_qp->context, struct zxdh_uvcontext, + ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + err = pthread_spin_lock(&iwuqp->lock); + if (err) + return err; + + if (!ZXDH_RING_MORE_WORK(iwuqp->qp.sq_ring) && + ib_qp->state == IBV_QPS_ERR) + reflush = true; + + while (ib_wr) { + memset(&info, 0, sizeof(info)); + info.wr_id = (__u64)(ib_wr->wr_id); + if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || + iwuqp->sq_sig_all) + info.signaled = true; + if (ib_wr->send_flags & IBV_SEND_FENCE) + info.read_fence = true; + + switch (ib_wr->opcode) { + case IBV_WR_SEND_WITH_IMM: + if (iwuqp->qp.qp_caps & ZXDH_SEND_WITH_IMM) { + info.imm_data_valid = true; + info.imm_data = ntohl(ib_wr->imm_data); + } else { + err = EINVAL; + break; + } + SWITCH_FALLTHROUGH; + case IBV_WR_SEND: + case IBV_WR_SEND_WITH_INV: + if (ib_wr->send_flags & IBV_SEND_SOLICITED) + info.solicited = 1; + + if (ib_wr->opcode == IBV_WR_SEND) { + if (ib_qp->qp_type == IBV_QPT_UD) + info.op_type = ZXDH_OP_TYPE_UD_SEND; + else + info.op_type = ZXDH_OP_TYPE_SEND; + } else if (ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { + if (ib_qp->qp_type == IBV_QPT_UD) + info.op_type = + ZXDH_OP_TYPE_UD_SEND_WITH_IMM; + else + info.op_type = + ZXDH_OP_TYPE_SEND_WITH_IMM; + } else { + info.op_type = ZXDH_OP_TYPE_SEND_INV; + info.stag_to_inv = ib_wr->invalidate_rkey; + } + + if ((ib_wr->send_flags & IBV_SEND_INLINE) && + (ib_wr->num_sge != 0)) { + ret = zxdh_get_inline_data( + iwuqp->inline_data, ib_wr, + &info.op.inline_rdma_send.len); + if (ret) { + printf("err:zxdh_get_inline_data fail\n"); + pthread_spin_unlock(&iwuqp->lock); + return -EINVAL; + } + info.op.inline_rdma_send.data = + iwuqp->inline_data; + if (ib_qp->qp_type == IBV_QPT_UD) { + struct zxdh_uah *ah = + container_of(ib_wr->wr.ud.ah, + struct zxdh_uah, + ibv_ah); + info.op.inline_rdma_send.ah_id = + ah->ah_id; + info.op.inline_rdma_send.qkey = + ib_wr->wr.ud.remote_qkey; + info.op.inline_rdma_send.dest_qp = + ib_wr->wr.ud.remote_qpn; + ret = zxdh_ud_inline_send( + &iwuqp->qp, &info, false); + } else { + ret = zxdh_rc_inline_send( + &iwuqp->qp, &info, false); + } + } else { + info.op.send.num_sges = ib_wr->num_sge; + info.op.send.sg_list = + (struct zxdh_sge *)ib_wr->sg_list; + if (ib_qp->qp_type == IBV_QPT_UD) { + struct zxdh_uah *ah = + container_of(ib_wr->wr.ud.ah, + struct zxdh_uah, + ibv_ah); + + info.op.inline_rdma_send.ah_id = + ah->ah_id; + info.op.inline_rdma_send.qkey = + ib_wr->wr.ud.remote_qkey; + info.op.inline_rdma_send.dest_qp = + ib_wr->wr.ud.remote_qpn; + ret = zxdh_ud_send(&iwuqp->qp, &info, + false); + } else { + ret = zxdh_rc_send(&iwuqp->qp, &info, + false); + } + } + if (ret) + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? + ENOMEM : + EINVAL; + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + if (iwuqp->qp.qp_caps & ZXDH_WRITE_WITH_IMM) { + info.imm_data_valid = true; + info.imm_data = ntohl(ib_wr->imm_data); + } else { + err = -EINVAL; + break; + } + SWITCH_FALLTHROUGH; + case IBV_WR_RDMA_WRITE: + if (ib_wr->send_flags & IBV_SEND_SOLICITED) + info.solicited = 1; + + if (ib_wr->opcode == IBV_WR_RDMA_WRITE) + info.op_type = ZXDH_OP_TYPE_WRITE; + else + info.op_type = ZXDH_OP_TYPE_WRITE_WITH_IMM; + + if ((ib_wr->send_flags & IBV_SEND_INLINE) && + (ib_wr->num_sge != 0)) { + ret = zxdh_get_inline_data( + iwuqp->inline_data, ib_wr, + &info.op.inline_rdma_write.len); + if (ret) { + printf("err:zxdh_get_inline_data fail\n"); + pthread_spin_unlock(&iwuqp->lock); + return -EINVAL; + } + info.op.inline_rdma_write.data = + iwuqp->inline_data; + info.op.inline_rdma_write.rem_addr.tag_off = + ib_wr->wr.rdma.remote_addr; + info.op.inline_rdma_write.rem_addr.stag = + ib_wr->wr.rdma.rkey; + ret = zxdh_inline_rdma_write(&iwuqp->qp, + &info, false); + } else { + info.op.rdma_write.lo_sg_list = + (void *)ib_wr->sg_list; + info.op.rdma_write.num_lo_sges = ib_wr->num_sge; + info.op.rdma_write.rem_addr.tag_off = + ib_wr->wr.rdma.remote_addr; + info.op.rdma_write.rem_addr.stag = + ib_wr->wr.rdma.rkey; + ret = zxdh_rdma_write(&iwuqp->qp, &info, + false); + } + if (ret) + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? + ENOMEM : + EINVAL; + break; + case IBV_WR_RDMA_READ: + if (ib_wr->num_sge > dev_attrs->max_hw_read_sges) { + err = EINVAL; + break; + } + info.op_type = ZXDH_OP_TYPE_READ; + info.op.rdma_read.rem_addr.tag_off = + ib_wr->wr.rdma.remote_addr; + info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; + + info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; + info.op.rdma_read.num_lo_sges = ib_wr->num_sge; + ret = zxdh_rdma_read(&iwuqp->qp, &info, false, + false); + if (ret) + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? + ENOMEM : + EINVAL; + break; + case IBV_WR_BIND_MW: + vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); + umr = container_of(vmr, struct zxdh_umr, vmr); + mr_va = (uintptr_t)ib_wr->bind_mw.bind_info.mr->addr; + mw_va = ib_wr->bind_mw.bind_info.addr; + mr_offset = 0; + value_dffer = 0; + mw_pa_pble_index = 0; + + if (ib_qp->qp_type != IBV_QPT_RC) { + err = EINVAL; + break; + } + info.op_type = ZXDH_OP_TYPE_BIND_MW; + info.op.bind_window.mr_stag = + ib_wr->bind_mw.bind_info.mr->rkey; + + if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { + info.op.bind_window.mem_window_type_1 = true; + info.op.bind_window.mw_stag = + ib_wr->bind_mw.rkey; + } else { + info.op.bind_window.mem_window_type_1 = false; + info.op.bind_window.mw_stag = + calc_type2_mw_stag( + ib_wr->bind_mw.rkey, + ib_wr->bind_mw.mw->rkey); + ib_wr->bind_mw.mw->rkey = + info.op.bind_window.mw_stag; + } + + if (ib_wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_ZERO_BASED) { + info.op.bind_window.addressing_type = + ZXDH_ADDR_TYPE_ZERO_BASED; + if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { + err = EINVAL; + break; + } + + info.op.bind_window.addressing_type = + ZXDH_ADDR_TYPE_ZERO_BASED; + info.op.bind_window.host_page_size = + umr->host_page_size; + if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) { + mr_offset = mr_va & 0x0fff; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 3) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + (4096 * 512); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + mw_pa_pble_index = + ((mr_offset + + value_dffer) / + 4096) % + 512; + + info.op.bind_window + .root_leaf_offset = + (__u16)mw_pa_pble_index; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x0fff); + info.op.bind_window + .leaf_pbl_size = 3; + + } else if (umr->leaf_pbl_size == 1) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + 4096; + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x0fff); + info.op.bind_window + .root_leaf_offset = 0; + } else { + mw_pa_pble_index = + umr->mr_pa_pble_index + + mr_offset + value_dffer; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x0fff); + info.op.bind_window + .mw_pa_pble_index = + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .root_leaf_offset = 0; + } + + } else if (umr->host_page_size == + ZXDH_PAGE_SIZE_2M) { + mr_offset = mr_va & 0x1FFFFF; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 3) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + ((4096 * 512) * 512); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + mw_pa_pble_index = + ((mr_offset + + value_dffer) / + (4096 * 512)) % + 512; + + info.op.bind_window + .root_leaf_offset = + (__u16)mw_pa_pble_index; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x1FFFFF); + info.op.bind_window + .leaf_pbl_size = 3; + + } else if (umr->leaf_pbl_size == 1) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + (4096 * 512); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x1FFFFF); + info.op.bind_window + .root_leaf_offset = 0; + } else { + mw_pa_pble_index = + umr->mr_pa_pble_index + + mr_offset + value_dffer; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x1FFFFF); + info.op.bind_window + .mw_pa_pble_index = + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .root_leaf_offset = 0; + } + } else if (umr->host_page_size == + ZXDH_PAGE_SIZE_1G) { + mr_offset = mr_va & 0x3FFFFFFF; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 1) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + (1024 * 1024 * 1024); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x3FFFFFFF); + info.op.bind_window + .root_leaf_offset = 0; + } else if (umr->leaf_pbl_size == 0) { + mw_pa_pble_index = + umr->mr_pa_pble_index + + mr_offset + value_dffer; + info.op.bind_window.va = + (void *)(uintptr_t)(mw_va & + 0x3FFFFFFF); + info.op.bind_window + .mw_pa_pble_index = + mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .root_leaf_offset = 0; + } + } + + } else { + info.op.bind_window.addressing_type = + ZXDH_ADDR_TYPE_VA_BASED; + info.op.bind_window.va = + (void *)(uintptr_t) + ib_wr->bind_mw.bind_info.addr; + info.op.bind_window.host_page_size = + umr->host_page_size; + + if (umr->host_page_size == ZXDH_PAGE_SIZE_4K) { + mr_offset = mr_va & 0x0fff; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 3) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + (4096 * 512); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + mw_pa_pble_index = + ((mr_offset + + value_dffer) / + 4096) % + 512; + info.op.bind_window + .root_leaf_offset = + (__u16)mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 3; + } else if (umr->leaf_pbl_size == 1) { + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + ((mr_offset + + value_dffer) / + 4096); + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window + .root_leaf_offset = 0; + } else { + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + (mr_va & 0x0fff) + + (mw_va - mr_va); + info.op.bind_window + .root_leaf_offset = 0; + } + } else if (umr->host_page_size == + ZXDH_PAGE_SIZE_2M) { + mr_offset = mr_va & 0x1FFFFF; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 3) { + mw_pa_pble_index = + (mr_offset + + value_dffer) / + ((4096 * 512) * 512); + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + mw_pa_pble_index; + mw_pa_pble_index = + ((mr_offset + + value_dffer) / + (4096 * 512)) % + 512; + info.op.bind_window + .root_leaf_offset = + (__u16)mw_pa_pble_index; + info.op.bind_window + .leaf_pbl_size = 3; + } else if (umr->leaf_pbl_size == 1) { + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + ((mr_offset + + value_dffer) / + (4096 * 512)); + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window + .root_leaf_offset = 0; + } else { + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + (mr_va & 0x1FFFFF) + + (mw_va - mr_va); + info.op.bind_window + .root_leaf_offset = 0; + } + } else if (umr->host_page_size == + ZXDH_PAGE_SIZE_1G) { + mr_offset = mr_va & 0x3FFFFFFF; + value_dffer = mw_va - mr_va; + if (umr->leaf_pbl_size == 1) { + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + ((mr_offset + + value_dffer) / + (1024 * 1024 * 1024)); + info.op.bind_window + .leaf_pbl_size = 1; + info.op.bind_window + .root_leaf_offset = 0; + } else if (umr->leaf_pbl_size == 0) { + info.op.bind_window + .leaf_pbl_size = 0; + info.op.bind_window + .mw_pa_pble_index = + umr->mr_pa_pble_index + + (mr_va & 0x3FFFFFFF) + + (mw_va - mr_va); + info.op.bind_window + .root_leaf_offset = 0; + } + } + } + + info.op.bind_window.bind_len = + ib_wr->bind_mw.bind_info.length; + info.op.bind_window.ena_reads = + (ib_wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_REMOTE_READ) ? + 1 : + 0; + info.op.bind_window.ena_writes = + (ib_wr->bind_mw.bind_info.mw_access_flags & + IBV_ACCESS_REMOTE_WRITE) ? + 1 : + 0; + + ret = zxdh_mw_bind(&iwuqp->qp, &info, false); + if (ret) + err = ENOMEM; + + break; + case IBV_WR_LOCAL_INV: + info.op_type = ZXDH_OP_TYPE_LOCAL_INV; + info.op.inv_local_stag.target_stag = + ib_wr->invalidate_rkey; + ret = zxdh_stag_local_invalidate(&iwuqp->qp, &info, + true); + if (ret) + err = ENOMEM; + break; + default: + /* error */ + err = EINVAL; + break; + } + if (err) + break; + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + + zxdh_qp_post_wr(&iwuqp->qp); + if (reflush) + zxdh_issue_flush(ib_qp, 1, 0); + + pthread_spin_unlock(&iwuqp->lock); + + return err; +} + +/** + * zxdh_post_recv - post receive wr for user application + * @ib_wr: work request for receive + * @bad_wr: bad wr caused an error + */ +int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr) +{ + struct zxdh_post_rq_info post_recv = {}; + enum zxdh_status_code ret = 0; + struct zxdh_sge *sg_list; + struct zxdh_uqp *iwuqp; + bool reflush = false; + int err = 0; + + iwuqp = container_of(ib_qp, struct zxdh_uqp, vqp.qp); + sg_list = iwuqp->recv_sges; + + if (unlikely(ib_qp->state == IBV_QPS_RESET || ib_qp->srq)) { + *bad_wr = ib_wr; + printf("err:post recv at reset or using srq\n"); + return -EINVAL; + } + + err = pthread_spin_lock(&iwuqp->lock); + if (err) + return err; + + if (unlikely(!ZXDH_RING_MORE_WORK(iwuqp->qp.rq_ring)) && + ib_qp->state == IBV_QPS_ERR) + reflush = true; + + while (ib_wr) { + if (unlikely(ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt)) { + *bad_wr = ib_wr; + err = EINVAL; + goto error; + } + post_recv.num_sges = ib_wr->num_sge; + post_recv.wr_id = ib_wr->wr_id; + zxdh_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); + post_recv.sg_list = sg_list; + ret = zxdh_post_receive(&iwuqp->qp, &post_recv); + if (unlikely(ret)) { + err = (ret == ZXDH_ERR_QP_TOOMANY_WRS_POSTED) ? ENOMEM : + EINVAL; + *bad_wr = ib_wr; + goto error; + } + + if (reflush) + zxdh_issue_flush(ib_qp, 0, 1); + + ib_wr = ib_wr->next; + } +error: + zxdh_qp_set_shadow_area(&iwuqp->qp); + pthread_spin_unlock(&iwuqp->lock); + + return err; +} + +/** + * zxdh_ucreate_ah - create address handle associated with a pd + * @ibpd: pd for the address handle + * @attr: attributes of address handle + */ +struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) +{ + struct zxdh_uah *ah; + union ibv_gid sgid; + struct zxdh_ucreate_ah_resp resp; + int err; + + memset(&resp, 0, sizeof(resp)); + err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, + &sgid); + if (err) { + errno = err; + return NULL; + } + + ah = calloc(1, sizeof(*ah)); + if (!ah) + return NULL; + + err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, + sizeof(resp)); + if (err) { + free(ah); + errno = err; + return NULL; + } + + ah->ah_id = resp.ah_id; + + return &ah->ibv_ah; +} + +/** + * zxdh_udestroy_ah - destroy the address handle + * @ibah: address handle + */ +int zxdh_udestroy_ah(struct ibv_ah *ibah) +{ + struct zxdh_uah *ah; + int ret; + + ah = container_of(ibah, struct zxdh_uah, ibv_ah); + + ret = ibv_cmd_destroy_ah(ibah); + if (ret) + return ret; + + free(ah); + + return 0; +} + +/** + * zxdh_uattach_mcast - Attach qp to multicast group implemented + * @qp: The queue pair + * @gid:The Global ID for multicast group + * @lid: The Local ID + */ +int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid) +{ + return ibv_cmd_attach_mcast(qp, gid, lid); +} + +/** + * zxdh_udetach_mcast - Detach qp from multicast group + * @qp: The queue pair + * @gid:The Global ID for multicast group + * @lid: The Local ID + */ +int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid) +{ + return ibv_cmd_detach_mcast(qp, gid, lid); +} + +/** + * zxdh_uresize_cq - resizes a cq + * @cq: cq to resize + * @cqe: the number of cqes of the new cq + */ +int zxdh_uresize_cq(struct ibv_cq *cq, int cqe) +{ + struct zxdh_uvcontext *iwvctx; + struct zxdh_dev_attrs *dev_attrs; + struct zxdh_uresize_cq cmd = {}; + struct ib_uverbs_resize_cq_resp resp = {}; + struct zxdh_ureg_mr reg_mr_cmd = {}; + struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; + struct zxdh_cq_buf *cq_buf = NULL; + struct zxdh_cqe *cq_base = NULL; + struct verbs_mr new_mr = {}; + struct zxdh_ucq *iwucq; + size_t cq_size; + __u32 cq_pages; + int cqe_needed; + int ret = 0; + + iwucq = container_of(cq, struct zxdh_ucq, verbs_cq.cq); + iwvctx = container_of(cq->context, struct zxdh_uvcontext, + ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + if (!(dev_attrs->feature_flags & ZXDH_FEATURE_CQ_RESIZE)) + return -EOPNOTSUPP; + + if (cqe > dev_attrs->max_hw_cq_size) + return -EINVAL; + + cqe_needed = zxdh_cq_round_up(cqe); + + if (cqe_needed < ZXDH_U_MINCQ_SIZE) + cqe_needed = ZXDH_U_MINCQ_SIZE; + + if (cqe_needed == iwucq->cq.cq_size) + return 0; + + cq_size = get_cq_total_bytes(cqe_needed); + cq_pages = cq_size >> ZXDH_HW_PAGE_SHIFT; + cq_base = zxdh_alloc_hw_buf(cq_size); + if (!cq_base) + return -ENOMEM; + + memset(cq_base, 0, cq_size); + + cq_buf = malloc(sizeof(*cq_buf)); + if (!cq_buf) { + ret = -ENOMEM; + goto err_buf; + } + + ret = pthread_spin_lock(&iwucq->lock); + if (ret) + goto err_lock; + + new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; + reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_CQ; + reg_mr_cmd.cq_pages = cq_pages; + + ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, + (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, + &new_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), + ®_mr_resp, sizeof(reg_mr_resp)); + if (ret) + goto err_dereg_mr; + + cmd.user_cq_buffer = (__u64)((uintptr_t)cq_base); + ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, + sizeof(cmd), &resp, sizeof(resp)); + if (ret) + goto err_resize; + + memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); + cq_buf->vmr = iwucq->vmr; + iwucq->vmr = new_mr; + zxdh_cq_resize(&iwucq->cq, cq_base, cqe_needed); + iwucq->verbs_cq.cq.cqe = cqe; + list_add_tail(&iwucq->resize_list, &cq_buf->list); + iwucq->resize_enable = true; + pthread_spin_unlock(&iwucq->lock); + + return ret; + +err_resize: + ibv_cmd_dereg_mr(&new_mr); +err_dereg_mr: + pthread_spin_unlock(&iwucq->lock); +err_lock: + free(cq_buf); +err_buf: + zxdh_free_hw_buf(cq_base, cq_size); + return ret; +} + +static void zxdh_srq_wqe_init(struct zxdh_usrq *iwusrq) +{ + uint32_t i; + struct zxdh_srq *srq; + __le64 *wqe; + __u64 hdr; + + srq = &iwusrq->srq; + + for (i = srq->srq_ring.head; i < srq->srq_ring.tail; i++) { + wqe = zxdh_get_srq_wqe(srq, i); + + hdr = FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, (uint32_t)(i + 1)); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(wqe, 0, hdr); + } +} + +static size_t zxdh_get_srq_queue_size(int srqdepth) +{ + return roundup(srqdepth * ZXDH_SRQ_WQE_MIN_SIZE, ZXDH_HW_PAGE_SIZE); +} + +static size_t zxdh_get_srq_list_size(size_t srq_size) +{ + return roundup(srq_size * sizeof(__u16), ZXDH_HW_PAGE_SIZE); +} + +static size_t zxdh_get_srq_db_size(void) +{ + return 8 * sizeof(char); +} + +static size_t zxdh_get_total_srq_size(struct zxdh_usrq *iwusrq, int srqdepth, + size_t srq_size) +{ + size_t total_srq_queue_size; + size_t total_srq_list_size; + size_t total_srq_db_size; + size_t total_srq_size; + + total_srq_queue_size = zxdh_get_srq_queue_size(srqdepth); + iwusrq->buf_size = total_srq_queue_size; + total_srq_list_size = zxdh_get_srq_list_size(srq_size); + iwusrq->list_buf_size = total_srq_list_size; + total_srq_db_size = zxdh_get_srq_db_size(); + iwusrq->db_buf_size = total_srq_db_size; + total_srq_size = + total_srq_queue_size + total_srq_list_size + total_srq_db_size; + iwusrq->total_buf_size = total_srq_size; + zxdh_dbg( + ZXDH_DBG_SRQ, + "%s total_srq_queue_size:%zu total_srq_list_size:%zu total_srq_db_size:%zu srqdepth:%d\n", + __func__, total_srq_queue_size, total_srq_list_size, + total_srq_db_size, srqdepth); + + return total_srq_size; +} + +static int zxdh_alloc_srq_buf(struct zxdh_usrq *iwusrq, + struct zxdh_srq_init_info *info, + size_t total_srq_size) +{ + info->srq_base = zxdh_alloc_hw_buf(total_srq_size); + if (!info->srq_base) + return -ENOMEM; + memset(info->srq_base, 0, total_srq_size); + info->srq_list_base = + (__le16 *)&info + ->srq_base[iwusrq->buf_size / ZXDH_SRQ_WQE_MIN_SIZE]; + info->srq_db_base = + (__le64 *)&info->srq_list_base[iwusrq->list_buf_size / + (sizeof(__u16))]; + *(__le64 *)info->srq_db_base = (__le64)htole64((uint64_t)ZXDH_SRQ_DB_INIT_VALUE); + zxdh_dbg(ZXDH_DBG_SRQ, + "%s srq_base:0x%p srq_list_base:0x%p srq_db_base:0x%p\n", + __func__, info->srq_base, info->srq_list_base, + info->srq_db_base); + return 0; +} + +static int zxdh_reg_srq_mr(struct ibv_pd *pd, + struct zxdh_srq_init_info *info, + size_t total_srq_size, uint16_t srq_pages, + uint16_t srq_list_pages, struct zxdh_usrq *iwusrq) +{ + struct zxdh_ureg_mr reg_mr_cmd = {}; + struct ib_uverbs_reg_mr_resp reg_mr_resp = {}; + int ret; + + reg_mr_cmd.reg_type = ZXDH_MEMREG_TYPE_SRQ; + reg_mr_cmd.srq_pages = srq_pages; + reg_mr_cmd.srq_list_pages = srq_list_pages; + ret = ibv_cmd_reg_mr(pd, info->srq_base, total_srq_size, + (uintptr_t)info->srq_base, IBV_ACCESS_LOCAL_WRITE, + &iwusrq->vmr, ®_mr_cmd.ibv_cmd, + sizeof(reg_mr_cmd), ®_mr_resp, + sizeof(reg_mr_resp)); + if (ret) + return ret; + + return 0; +} + +static int create_srq(struct ibv_pd *pd, struct zxdh_usrq *iwusrq, + struct ibv_srq_init_attr *attr, + struct zxdh_srq_init_info *info) +{ + struct zxdh_ucreate_srq cmd = {}; + struct zxdh_ucreate_srq_resp resp = {}; + int ret; + + cmd.user_wqe_bufs = (__u64)((uintptr_t)info->srq_base); + cmd.user_compl_ctx = (__u64)(uintptr_t)&iwusrq->srq; + cmd.user_wqe_list = (__u64)((uintptr_t)info->srq_list_base); + cmd.user_wqe_db = (__u64)((uintptr_t)info->srq_db_base); + ret = ibv_cmd_create_srq(pd, &iwusrq->ibv_srq, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, + sizeof(struct zxdh_ucreate_srq_resp)); + if (ret) + return ret; + + iwusrq->srq_id = resp.srq_id; + info->srq_id = resp.srq_id; + info->srq_size = resp.actual_srq_size; + info->srq_list_size = resp.actual_srq_list_size; + zxdh_dbg( + ZXDH_DBG_SRQ, + "%s info->srq_id:%d info->srq_size:%d info->srq_list_size:%d\n", + __func__, info->srq_id, info->srq_size, info->srq_list_size); + + return 0; +} + +/** + * zxdh_vmapped_srq - create resources for srq + * @iwusrq: srq struct for resources + * @pd: pd for the srq + * @attr: attributes of srq passed + * @resp: response back from create srq + * @srqdepth: depth of sq + * @info: info for initializing user level srq + */ +static int zxdh_vmapped_srq(struct zxdh_usrq *iwusrq, struct ibv_pd *pd, + struct ibv_srq_init_attr *attr, int srqdepth, + struct zxdh_srq_init_info *info) +{ + size_t total_srq_size; + size_t srq_pages = 0; + size_t srq_list_pages = 0; + int ret; + + total_srq_size = + zxdh_get_total_srq_size(iwusrq, srqdepth, info->srq_size); + srq_pages = iwusrq->buf_size >> ZXDH_HW_PAGE_SHIFT; + srq_list_pages = iwusrq->list_buf_size >> ZXDH_HW_PAGE_SHIFT; + ret = zxdh_alloc_srq_buf(iwusrq, info, total_srq_size); + if (ret) + return -ENOMEM; + zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_pages:%zu srq_list_pages:%zu\n", + __func__, srq_pages, srq_list_pages); + + ret = zxdh_reg_srq_mr(pd, info, total_srq_size, srq_pages, + srq_list_pages, iwusrq); + zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret); + if (ret) { + errno = ret; + goto err_dereg_srq_mr; + } + ret = create_srq(pd, iwusrq, attr, info); + zxdh_dbg(ZXDH_DBG_SRQ, "%s %d ret:%d\n", __func__, __LINE__, ret); + if (ret) + goto err_srq; + + return 0; +err_srq: + ibv_cmd_dereg_mr(&iwusrq->vmr); +err_dereg_srq_mr: + zxdh_free_hw_buf(info->srq_base, total_srq_size); + + return ret; +} + +/** + * zxdh_destroy_vmapped_srq - destroy resources for srq + * @iwusrq: srq struct for resources + */ +static int zxdh_destroy_vmapped_srq(struct zxdh_usrq *iwusrq) +{ + int ret; + + ret = ibv_cmd_destroy_srq(&iwusrq->ibv_srq); + if (ret) + return ret; + + ibv_cmd_dereg_mr(&iwusrq->vmr); + return 0; +} + +static int zxdh_check_srq_init_attr(struct ibv_srq_init_attr *srq_init_attr, + struct zxdh_dev_attrs *dev_attrs) +{ + if ((srq_init_attr->attr.srq_limit > srq_init_attr->attr.max_wr) || + (srq_init_attr->attr.max_sge > dev_attrs->max_hw_wq_frags) || + (srq_init_attr->attr.max_wr > dev_attrs->max_hw_srq_wr)) { + return 1; + } + return 0; +} + +static int zxdh_init_iwusrq(struct zxdh_usrq *iwusrq, + struct ibv_srq_init_attr *srq_init_attr, + __u32 srqdepth, __u8 srqshift, + struct zxdh_srq_init_info *info, + struct zxdh_dev_attrs *dev_attrs) +{ + info->srq_size = srqdepth >> srqshift; + iwusrq->max_wr = info->srq_size; + iwusrq->max_sge = srq_init_attr->attr.max_sge; + iwusrq->srq_limit = srq_init_attr->attr.srq_limit; + + srq_init_attr->attr.max_wr = info->srq_size; + info->dev_attrs = dev_attrs; + info->max_srq_frag_cnt = srq_init_attr->attr.max_sge; + info->srq_wrid_array = + calloc(info->srq_size, sizeof(*info->srq_wrid_array)); + if (info->srq_wrid_array == NULL) + return 1; + + return 0; +} + +/** + * zxdh_ucreate_srq - create srq on user app + * @pd: pd for the srq + * @srq_init_attr: attributes of the srq to be created (sizes, sge) + */ +struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr) +{ + struct zxdh_srq_init_info info = {}; + struct zxdh_dev_attrs *dev_attrs; + struct zxdh_uvcontext *iwvctx; + __u32 srqdepth; + __u8 srqshift; + int status; + int ret; + struct zxdh_usrq *iwusrq; + + iwvctx = container_of(pd->context, struct zxdh_uvcontext, + ibv_ctx.context); + dev_attrs = &iwvctx->dev_attrs; + + if ((zxdh_check_srq_init_attr(srq_init_attr, dev_attrs)) != 0) { + zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_check_srq_init_attr failed\n"); + errno = EINVAL; + return NULL; + } + + /* get shift count for maximum wqe size */ + zxdh_get_srq_wqe_shift(dev_attrs, srq_init_attr->attr.max_sge, + &srqshift); + + /* get RQ/SRQ depth (quanta),minimum number of units in srq */ + status = zxdh_get_srqdepth(dev_attrs->max_hw_srq_quanta, + srq_init_attr->attr.max_wr, srqshift, + &srqdepth); + zxdh_dbg( + ZXDH_DBG_SRQ, + "%s %d status:%d srqshift:%d srqdepth:%d dev_attrs->max_hw_srq_quanta:%d srq_init_attr->attr.max_wr:%d\n", + __func__, __LINE__, status, srqshift, srqdepth, + dev_attrs->max_hw_srq_quanta, srq_init_attr->attr.max_wr); + if (status != 0) { + zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_get_srqdepth failed\n"); + errno = EINVAL; + return NULL; + } + iwusrq = memalign(1024, sizeof(*iwusrq)); + if (!iwusrq) + return NULL; + memset(iwusrq, 0, sizeof(*iwusrq)); + if (pthread_spin_init(&iwusrq->lock, PTHREAD_PROCESS_PRIVATE) != 0) + goto err_free_srq; + + if (zxdh_init_iwusrq(iwusrq, srq_init_attr, srqdepth, srqshift, &info, + dev_attrs)) { + zxdh_dbg(ZXDH_DBG_SRQ, "calloc srq_wrid_array failed\n"); + goto err_srq_wrid_array; + } + status = zxdh_vmapped_srq(iwusrq, pd, srq_init_attr, srqdepth, &info); + zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status); + if (status) { + zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_vmapped_srq failed\n"); + errno = status; + goto err_vmapped_srq; + } + + status = zxdh_srq_init(&iwusrq->srq, &info); + zxdh_dbg(ZXDH_DBG_SRQ, "%s %d status:%d\n", __func__, __LINE__, status); + if (status) { + zxdh_dbg(ZXDH_DBG_SRQ, "zxdh_srq_init failed\n"); + errno = EINVAL; + goto err_free_srq_init; + } + zxdh_srq_wqe_init(iwusrq); + + srq_init_attr->attr.max_wr = (srqdepth - ZXDH_SRQ_RSVD) >> srqshift; + + zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d info.srq_size:%d\n", + __func__, iwusrq->srq_id, info.srq_size); + return &iwusrq->ibv_srq; + +err_free_srq_init: + zxdh_destroy_vmapped_srq(iwusrq); + zxdh_free_hw_buf(info.srq_base, iwusrq->total_buf_size); +err_vmapped_srq: + free(info.srq_wrid_array); +err_srq_wrid_array: + ret = pthread_spin_destroy(&iwusrq->lock); + if (ret) + errno = EINVAL; +err_free_srq: + free(iwusrq); + zxdh_dbg(ZXDH_DBG_SRQ, "%s %d\n", __func__, __LINE__); + return NULL; +} + +/** + * zxdh_udestroy_srq - destroy srq on user app + * @srq: srq to destroy + */ +int zxdh_udestroy_srq(struct ibv_srq *srq) +{ + struct zxdh_usrq *iwusrq; + int ret; + + iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); + ret = pthread_spin_destroy(&iwusrq->lock); + if (ret) + goto err; + + ret = zxdh_destroy_vmapped_srq(iwusrq); + if (ret) + goto err; + zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d\n", __func__, + iwusrq->srq_id); + zxdh_free_hw_buf(iwusrq->srq.srq_base, iwusrq->total_buf_size); + free(iwusrq->srq.srq_wrid_array); + free(iwusrq); + + return 0; + +err: + return ret; +} + +/** + * zxdh_umodify_srq - modify srq on user app + * @srq: srq to destroy + */ +int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask) +{ + struct ibv_modify_srq cmd; + struct zxdh_usrq *iwusrq; + int ret; + + iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); + ret = ibv_cmd_modify_srq(srq, srq_attr, srq_attr_mask, &cmd, + sizeof(cmd)); + if (ret == 0) + iwusrq->srq_limit = srq_attr->srq_limit; + zxdh_dbg(ZXDH_DBG_SRQ, "%s iwusrq->srq_id:%d srq_attr->srq_limit:%d\n", + __func__, iwusrq->srq_id, srq_attr->srq_limit); + return ret; +} + +/** + * zxdh_uquery_srq - query srq on user app + * @srq: srq to query + * @srq_attr: attributes of the srq to be query + */ +int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) +{ + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(srq, srq_attr, &cmd, sizeof(cmd)); +} + +static int zxdh_check_srq_valid(struct ibv_recv_wr *recv_wr, + struct zxdh_usrq *iwusrq, + struct zxdh_srq *srq) +{ + if (unlikely(recv_wr->num_sge > iwusrq->max_sge)) + return -EINVAL; + + if (unlikely(srq->srq_ring.head == srq->srq_ring.tail)) + return -ENOMEM; + + return 0; +} + +static void zxdh_fill_srq_wqe(struct zxdh_usrq *iwusrq, + struct zxdh_srq *srq, __le64 *wqe_64, + struct ibv_recv_wr *recv_wr) +{ + __u32 byte_off; + int i; + + for (i = 0, byte_off = ZXDH_SRQ_FRAG_BYTESIZE; + i < recv_wr->num_sge && + byte_off + ZXDH_SRQ_FRAG_BYTESIZE < UINT32_MAX; + i++) { + set_64bit_val(wqe_64, byte_off, recv_wr->sg_list[i].addr); + set_64bit_val(wqe_64, byte_off + 8, + FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, + recv_wr->sg_list[i].length) | + FIELD_PREP(ZXDHQPSRQ_FRAG_STAG, + recv_wr->sg_list[i].lkey)); + byte_off += ZXDH_SRQ_FRAG_BYTESIZE; + } + + if ((recv_wr->num_sge < iwusrq->max_sge) || (recv_wr->num_sge == 0)) { + set_64bit_val(wqe_64, byte_off, 0); + set_64bit_val(wqe_64, byte_off + 8, + FIELD_PREP(ZXDHQPSRQ_FRAG_LEN, 0) | + FIELD_PREP(ZXDHQPSRQ_FRAG_STAG, + ZXDH_SRQ_INVALID_LKEY)); + } + + set_64bit_val(wqe_64, 8, ((uint64_t)iwusrq->srq_id) << 32); + + __u64 hdr = FIELD_PREP(ZXDHQPSRQ_RSV, 0) | + FIELD_PREP(ZXDHQPSRQ_VALID_SGE_NUM, recv_wr->num_sge) | + FIELD_PREP(ZXDHQPSRQ_SIGNATURE, 0) | + FIELD_PREP(ZXDHQPSRQ_NEXT_WQE_INDEX, srq->srq_ring.head); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(wqe_64, 0, hdr); + + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[0]:0x%llx\n", __func__, wqe_64[0]); + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[1]:0x%llx\n", __func__, wqe_64[1]); + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[2]:0x%llx\n", __func__, wqe_64[2]); + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[3]:0x%llx\n", __func__, wqe_64[3]); + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[4]:0x%llx\n", __func__, wqe_64[4]); + zxdh_dbg(ZXDH_DBG_SRQ, "%s wqe_64[5]:0x%llx\n", __func__, wqe_64[5]); +} + +static void zxdh_get_wqe_index(struct zxdh_srq *srq, __le16 *wqe_16, + __u16 *buf, __u16 nreq, __u16 *idx) +{ + int i; + + for (i = 0; i < nreq; i++) { + wqe_16 = zxdh_get_srq_list_wqe(srq, idx); + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_16bit_val(wqe_16, 0, buf[i]); + } +} + +static void zxdh_update_srq_db_base(struct zxdh_usrq *iwusrq, __u16 idx) +{ + __u64 hdr = FIELD_PREP(ZXDH_SRQ_PARITY_SIGN, + iwusrq->srq.srq_list_polarity) | + FIELD_PREP(ZXDH_SRQ_SW_SRQ_HEAD, idx); + + udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ + set_64bit_val(iwusrq->srq.srq_db_base, 0, hdr); + zxdh_dbg(ZXDH_DBG_SRQ, "%s srq_db_base(hdr):0x%llx\n", __func__, hdr); +} + +/** + * zxdh_upost_srq_recv - post srq recv on user app + * @srq: srq to post recv + * @recv_wr: a list of work requests to post on the receive queue + * @bad_recv_wr: pointer to first rejected wr + */ +int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, + struct ibv_recv_wr **bad_recv_wr) +{ + struct zxdh_usrq *iwusrq; + struct zxdh_srq *hw_srq; + __le16 *wqe_16 = NULL; + __le64 *wqe_64 = NULL; + __u64 temp_val; + int err = 0; + int nreq; + __u16 *buf; + size_t buf_size; + __u16 idx = 0; + + iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); + hw_srq = &iwusrq->srq; + pthread_spin_lock(&iwusrq->lock); + buf_size = iwusrq->max_wr * sizeof(__u16); + buf = malloc(buf_size); + if (buf == NULL) { + zxdh_dbg(ZXDH_DBG_SRQ, "malloc buf_size failed\n"); + err = -ENOMEM; + goto out; + } + + for (nreq = 0; recv_wr; nreq++, recv_wr = recv_wr->next) { + err = zxdh_check_srq_valid(recv_wr, iwusrq, hw_srq); + if (err) + break; + + iwusrq->srq.srq_wrid_array[hw_srq->srq_ring.head] = + recv_wr->wr_id; + buf[nreq] = hw_srq->srq_ring.head; + wqe_64 = zxdh_get_srq_wqe(hw_srq, hw_srq->srq_ring.head); + get_64bit_val(wqe_64, 0, &temp_val); + hw_srq->srq_ring.head = + (__u16)FIELD_GET(ZXDHQPSRQ_NEXT_WQE_INDEX, temp_val); + zxdh_fill_srq_wqe(iwusrq, hw_srq, wqe_64, recv_wr); + } + + zxdh_dbg(ZXDH_DBG_SRQ, "%s nreq:%d err:%d iwusrq->srq_id:%d\n", + __func__, nreq, err, iwusrq->srq_id); + + if (err == 0) { + zxdh_get_wqe_index(hw_srq, wqe_16, buf, nreq, &idx); + zxdh_update_srq_db_base(iwusrq, idx); + } +out: + pthread_spin_unlock(&iwusrq->lock); + if (err) + *bad_recv_wr = recv_wr; + if (buf) + free(buf); + return err; +} + +/** + * zxdh_uget_srq_num - get srq num on user app + * @srq: srq to get num + * @srq_num: to get srq num + */ +int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num) +{ + struct zxdh_usrq *iwusrq; + + iwusrq = container_of(srq, struct zxdh_usrq, ibv_srq); + + *srq_num = iwusrq->srq_id; + return 0; +} + +void zxdh_set_debug_mask(void) +{ + char *env; + + env = getenv("ZXDH_DEBUG_MASK"); + if (env) + zxdh_debug_mask = strtol(env, NULL, 0); +} + +int zxdh_get_write_imm_split_switch(void) +{ + char *env; + + env = getenv("ZXDH_WRITE_IMM_SPILT_ENABLE"); + return (env != NULL) ? atoi(env) : 0; +} diff --git a/providers/zrdma/zxdh_verbs.h b/providers/zrdma/zxdh_verbs.h new file mode 100644 index 000000000..a04061560 --- /dev/null +++ b/providers/zrdma/zxdh_verbs.h @@ -0,0 +1,720 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ZXDH_VERBS_H__ +#define __ZXDH_VERBS_H__ +#include "zxdh_defs.h" + +#define zxdh_handle void * +#define zxdh_adapter_handle zxdh_handle +#define zxdh_qp_handle zxdh_handle +#define zxdh_cq_handle zxdh_handle +#define zxdh_pd_id zxdh_handle +#define zxdh_stag_handle zxdh_handle +#define zxdh_stag_index __u32 +#define zxdh_stag __u32 +#define zxdh_stag_key __u8 +#define zxdh_tagged_offset __u64 +#define zxdh_access_privileges __u32 +#define zxdh_physical_fragment __u64 +#define zxdh_address_list __u64 * +#define zxdh_sgl struct zxdh_sge * + +#define ZXDH_MAX_MR_SIZE 0x200000000000ULL + +#define ZXDH_ACCESS_FLAGS_LOCALREAD 0x01 +#define ZXDH_ACCESS_FLAGS_LOCALWRITE 0x02 +#define ZXDH_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04 +#define ZXDH_ACCESS_FLAGS_REMOTEREAD 0x05 +#define ZXDH_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08 +#define ZXDH_ACCESS_FLAGS_REMOTEWRITE 0x0a +#define ZXDH_ACCESS_FLAGS_BIND_WINDOW 0x10 +#define ZXDH_ACCESS_FLAGS_ZERO_BASED 0x20 +#define ZXDH_ACCESS_FLAGS_ALL 0x3f + +#define ZXDH_OP_TYPE_NOP 0x00 +#define ZXDH_OP_TYPE_SEND 0x01 +#define ZXDH_OP_TYPE_SEND_WITH_IMM 0x02 +#define ZXDH_OP_TYPE_SEND_INV 0x03 +#define ZXDH_OP_TYPE_WRITE 0x04 +#define ZXDH_OP_TYPE_WRITE_WITH_IMM 0x05 +#define ZXDH_OP_TYPE_READ 0x06 +#define ZXDH_OP_TYPE_BIND_MW 0x07 +#define ZXDH_OP_TYPE_FAST_REG_MR 0x08 +#define ZXDH_OP_TYPE_LOCAL_INV 0x09 +#define ZXDH_OP_TYPE_UD_SEND 0x0a +#define ZXDH_OP_TYPE_UD_SEND_WITH_IMM 0x0b +#define ZXDH_OP_TYPE_REC 0x3e +#define ZXDH_OP_TYPE_REC_IMM 0x3f + +#define ZXDH_FLUSH_MAJOR_ERR 1 +#define ZXDH_RETRY_ACK_MAJOR_ERR 0x8 +#define ZXDH_RETRY_ACK_MINOR_ERR 0xf3 +#define ZXDH_TX_WINDOW_QUERY_ITEM_MINOR_ERR 0xf5 + +#define ZXDH_MAX_SQ_FRAG 31 +#define ZXDH_MAX_SQ_INLINE_DATELEN_WITH_IMM 210 + +#define INLINE_DATASIZE_7BYTES 7 +#define INLINE_DATASIZE_24BYTES 24 +#define INLINE_FRAG_DATASIZE_31BYTES 31 + +#define INLINE_DATA_OFFSET_7BYTES 7 +#define WQE_OFFSET_7BYTES 7 +#define WQE_OFFSET_8BYTES 8 +#define WQE_OFFSET_24BYTES 24 + +#define ZXDH_SQE_SIZE 4 +#define ZXDH_RQE_SIZE 2 + +#define ZXDH_SRQ_INVALID_LKEY 0x100 +#define ZXDH_SRQ_DB_INIT_VALUE 0x8000 + +#define ZXDH_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) + +#define ZXDH_SRQE_SIZE 2 +#define ZXDH_CQE_SIZE 8 +#define ZXDH_EXTENDED_CQE_SIZE 8 +#define ZXDH_MAX_INLINE_DATA_SIZE 217 +#define ZXDH_MAX_SQ_PAYLOAD_SIZE 2147483648 +#define ZXDH_MIN_CQ_SIZE 1 +#define ZXDH_MAX_CQ_SIZE 2097152 + +enum zxdh_addressing_type { + ZXDH_ADDR_TYPE_ZERO_BASED = 0, + ZXDH_ADDR_TYPE_VA_BASED = 1, +}; + +enum zxdh_flush_opcode { + FLUSH_INVALID = 0, + FLUSH_GENERAL_ERR, + FLUSH_PROT_ERR, + FLUSH_REM_ACCESS_ERR, + FLUSH_LOC_QP_OP_ERR, + FLUSH_REM_OP_ERR, + FLUSH_LOC_LEN_ERR, + FLUSH_FATAL_ERR, + FLUSH_RETRY_EXC_ERR, + FLUSH_MW_BIND_ERR, + FLUSH_REM_INV_REQ_ERR, +}; + +enum zxdh_cmpl_status { + ZXDH_COMPL_STATUS_SUCCESS = 0, + ZXDH_COMPL_STATUS_FLUSHED, + ZXDH_COMPL_STATUS_INVALID_WQE, + ZXDH_COMPL_STATUS_QP_CATASTROPHIC, + ZXDH_COMPL_STATUS_REMOTE_TERMINATION, + ZXDH_COMPL_STATUS_INVALID_STAG, + ZXDH_COMPL_STATUS_BASE_BOUND_VIOLATION, + ZXDH_COMPL_STATUS_ACCESS_VIOLATION, + ZXDH_COMPL_STATUS_INVALID_PD_ID, + ZXDH_COMPL_STATUS_WRAP_ERROR, + ZXDH_COMPL_STATUS_STAG_INVALID_PDID, + ZXDH_COMPL_STATUS_RDMA_READ_ZERO_ORD, + ZXDH_COMPL_STATUS_QP_NOT_PRIVLEDGED, + ZXDH_COMPL_STATUS_STAG_NOT_INVALID, + ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_SIZE, + ZXDH_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY, + ZXDH_COMPL_STATUS_INVALID_FBO, + ZXDH_COMPL_STATUS_INVALID_LEN, + ZXDH_COMPL_STATUS_INVALID_ACCESS, + ZXDH_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG, + ZXDH_COMPL_STATUS_INVALID_VIRT_ADDRESS, + ZXDH_COMPL_STATUS_INVALID_REGION, + ZXDH_COMPL_STATUS_INVALID_WINDOW, + ZXDH_COMPL_STATUS_INVALID_TOTAL_LEN, + ZXDH_COMPL_STATUS_RETRY_ACK_ERR, + ZXDH_COMPL_STATUS_TX_WINDOW_QUERY_ITEM_ERR, + ZXDH_COMPL_STATUS_UNKNOWN, +}; + +enum zxdh_cmpl_notify { + ZXDH_CQ_COMPL_EVENT = 0, + ZXDH_CQ_COMPL_SOLICITED = 1, +}; + +enum zxdh_qp_caps { + ZXDH_WRITE_WITH_IMM = 1, + ZXDH_SEND_WITH_IMM = 2, + ZXDH_ROCE = 4, + ZXDH_PUSH_MODE = 8, +}; + +enum zxdh_page_size { + ZXDH_PAGE_SIZE_4K = 0, + ZXDH_PAGE_SIZE_2M = 9, + ZXDH_PAGE_SIZE_1G = 18, +}; + +enum zxdh_rdmatx_parse_top_err { + ZXDH_TX_PARSE_TOP_AXI_ERR = 0x1, + ZXDH_TX_PARSE_TOP_WQE_FLUSH = 0x10001, + ZXDH_TX_PARSE_TOP_ORD_ERR = 0x20020, + ZXDH_TX_PARSE_TOP_OPCODE_ERR_FLAG = 0x20021, + ZXDH_TX_PARSE_TOP_CQP_STATE_AXI_ERR = 0x20022, + ZXDH_TX_PARSE_TOP_WQE_LEN_ERR = 0x20023, + ZXDH_TX_PARSE_TOP_DATA_LEN_ERR = 0x20024, + ZXDH_TX_PARSE_TOP_AH_VALID0_ERR = 0x20025, + ZXDH_TX_PARSE_TOP_UD_PDINDEX_ERR = 0x20026, + ZXDH_TX_PARSE_TOP_QP_STATE_ERR = 0x20027, + ZXDH_TX_PARSE_TOP_SERVICE_TYPE_ERR = 0x20028, + ZXDH_TX_PARSE_TOP_UD_PAYLOAD_ERR = 0x20029, + ZXDH_TX_PARSE_TOP_WQE_LEN0_ERR = 0x2002a, + ZXDH_TX_PARSE_TOP_WQE_DEFICIENT_CLR_ERR = 0x2002b, + ZXDH_TX_PARSE_TOP_IMMDT_ERR = 0x2002c, + ZXDH_TX_PARSE_TOP_FRAGMENT_LENGTH_ERR = 0x2009f, + ZXDH_TX_PARSE_TOP_MRTE_STATE_ERR = 0x90091, + ZXDH_TX_PARSE_TOP_QP_CHECK_ERR = 0x90092, + ZXDH_TX_PARSE_TOP_PD_CHECK_ERR = 0x90093, + ZXDH_TX_PARSE_TOP_LKEY_CHECK_ERR = 0x90094, + ZXDH_TX_PARSE_TOP_STAG_INDEX_CHECK_ERR = 0x90095, + ZXDH_TX_PARSE_TOP_VADDR_LEN_CHECK_ERR = 0x90096, + ZXDH_TX_PARSE_TOP_ACCESS_RIGHT_CHECK_ERR = 0x90097, + ZXDH_TX_PARSE_TOP_STAG_INDEX_CHECK_ZERO_ERR = 0x90098, +}; + +enum zxdh_rdmatx_ack_sys_top_err { + ZXDH_TX_ACK_SYS_TOP_NVME_INDEX_ERR = 0x30030, + ZXDH_TX_ACK_SYS_TOP_NVME_NOF_QID_ERR = 0x30031, + ZXDH_TX_ACK_SYS_TOP_NVME_NOF_PD_INDEX_ERR = 0x30032, + ZXDH_TX_ACK_SYS_TOP_NVME_LENGTH_ERR = 0x30033, + ZXDH_TX_ACK_SYS_TOP_NVME_KEY_ERR = 0x30034, + ZXDH_TX_ACK_SYS_TOP_NVME_ACCESS_ERR = 0x30035, + ZXDH_TX_ACK_SYS_TOP_MRTE_STATE_ERR = 0x50091, + ZXDH_TX_ACK_SYS_TOP_QP_CHECK_ERR = 0x50092, + ZXDH_TX_ACK_SYS_TOP_PD_CHECK_ERR = 0x50093, + ZXDH_TX_ACK_SYS_TOP_LKEY_CHECK_ERR = 0x50094, + ZXDH_TX_ACK_SYS_TOP_STAG_INDEX_CHECK_ERR = 0x50095, + ZXDH_TX_ACK_SYS_TOP_VADDR_LEN_CHECK_ERR = 0x50096, + ZXDH_TX_ACK_SYS_TOP_ACCESS_RIGHT_CHECK_ERR = 0x50097, + ZXDH_TX_ACK_SYS_TOP_STAG_INDEX_CHECK_ZERO_ERR = 0x50098, + ZXDH_TX_ACK_SYS_TOP_LOC_LEN_ERR = 0x600c0, + ZXDH_TX_ACK_SYS_TOP_NAK_INVALID_REQ = 0x700d0, + ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_ACCESS_ERR = 0x700d1, + ZXDH_TX_ACK_SYS_TOP_NAK_REMOTE_OPERATIONAL_ERR = 0x700d2, + ZXDH_TX_ACK_SYS_TOP_NAK_RETRY_LIMIT = 0x800f1, + ZXDH_TX_ACK_SYS_TOP_READ_RETRY_LIMIT = 0x800f2, + ZXDH_TX_ACK_SYS_TOP_TIMEOUT_RETRY_LIMIT = 0x800f3, + ZXDH_TX_ACK_SYS_TOP_RNR_RETRY_LIMIT = 0x800f4, +}; + +enum zxdh_rdmatx_window_top_err { + ZXDH_TX_WINDOW_TOP_WINDOW_NO_ENTRY = 0x800f5, + ZXDH_TX_WINDOW_TOP_WINDOW_BACK_MSN = 0x800f6, + ZXDH_TX_WINDOW_TOP_WINDOW_SMALL_MSN = 0x800f7, +}; + +enum zxdh_rdmatx_doorbell_mgr_err { + ZXDH_TX_DOORBELL_MGR_INDEX_CHECK_ERROR = 0x30036, + ZXDH_TX_DOORBELL_MGR_QID_CHECK_ERROR = 0x30037, + ZXDH_TX_DOORBELL_MGR_PD_INDEX_CHECK_ERROR = 0x30038, + ZXDH_TX_DOORBELL_MGR_LENGTH_CHECK_ERROR = 0x30039, + ZXDH_TX_DOORBELL_MGR_KEY_CHECK_ERROR = 0x3003a, + ZXDH_TX_DOORBELL_MGR_ACCESS_CHECK_ERROR = 0x3003b, +}; + +enum zxdh_rdmarx_err { + ZXDH_RX_CQP_FLUSH = 0x12, + ZXDH_RX_FIRST_PACKET_ERR = 0x4f, + ZXDH_RX_INVALID_OPCODE = 0x50, + ZXDH_RX_ORDER_ERR = 0x51, + ZXDH_RX_LEN_ERR = 0x52, + ZXDH_RX_SQR_STATE_ERR = 0x53, + ZXDH_RX_WQE_SIGN_ERR = 0x54, + ZXDH_RX_WQE_LEN_ERR = 0x55, + ZXDH_RX_SQR_WATER_LEVEL_ERR = 0x80, + ZXDH_RX_SRQ_AXI_RESP_ERR = 0xb1, + ZXDH_RX_CQ_OVERFLOW_ERR = 0x76, + ZXDH_RX_QP_CQ_OVERFLOW_ERR = 0x78, + ZXDH_RX_CQ_STATE_ERR = 0x7a, + ZXDH_RX_CQ_AXI_ERR = 0x7b, + ZXDH_RX_QP_CQ_AXI_ERR = 0x7c, + ZXDH_RX_NOF_IOQ_ERR = 0x70, + ZXDH_RX_NOF_PDNUM_ERR = 0x71, + ZXDH_RX_NOF_LEN_ERR = 0x72, + ZXDH_RX_NOF_RKEY_ERR = 0x73, + ZXDH_RX_NOF_ACC_ERR = 0x74, + ZXDH_RX_IRD_OVF = 0x77, + ZXDH_RX_MR_MW_STATE_FREE_ERR = 0x90, + ZXDH_RX_MR_MW_STATE_INVALID_ERR = 0x91, + ZXDH_RX_TYPE2B_MW_QPN_CHECK_ERR = 0x92, + ZXDH_RX_MR_MW_PD_CHECK_ERR = 0x93, + ZXDH_RX_MR_MW_KEY_CHECK_ERR = 0x94, + ZXDH_RX_MR_MW_STAG_INDEX_CHECK_ERR = 0x95, + ZXDH_RX_MR_MW_BOUNDARY_CHECK_ERR = 0x96, + ZXDH_RX_MR_MW_ACCESS_CHECK_ERR = 0x97, + ZXDH_RX_MR_MW_0STAG_INDEX_CHECK_ERR = 0x98, + ZXDH_RX_MW_STATE_INVALID_ERR = 0x99, + ZXDH_RX_MW_PD_CHECK_ERR = 0x9a, + ZXDH_RX_MW_RKEY_CHECK_ERR = 0x9b, + ZXDH_RX_TYPE2BMW_QPN_CHECK_ERR = 0x9c, + ZXDH_RX_MW_STAG_INDEX_CHECK_ERR = 0x9d, + ZXDH_RX_MW_SHARE_MR_CHECK_ERR = 0x9e, + ZXDH_RX_MW_TYPE1_CHECK_ERR = 0x9f, + ZXDH_RX_MR_PD_CHECK_ERR = 0xa0, + ZXDH_RX_MR_RKEY_CHECK_ERR = 0xa1, + ZXDH_RX_MR_SHARE_MR_CHECK_ERR = 0xa4, + ZXDH_RX_MR_BOND_MW_NUM_CHECK_ERR = 0xa5, + ZXDH_RX_MR_CANBE_R_INVALID_CHECK_ERR = 0xa6, + ZXDH_RX_AXI_RESP_ERR = 0xb0, +}; + +struct zxdh_qp; +struct zxdh_cq; +struct zxdh_qp_init_info; +struct zxdh_cq_init_info; + +struct zxdh_sge { + zxdh_tagged_offset tag_off; + __u32 len; + zxdh_stag stag; +}; + +struct zxdh_ring { + __u32 head; + __u32 tail; + __u32 size; +}; + +struct zxdh_cqe { + __le64 buf[ZXDH_CQE_SIZE]; +}; + +struct zxdh_extended_cqe { + __le64 buf[ZXDH_EXTENDED_CQE_SIZE]; +}; + +struct zxdh_post_send { + zxdh_sgl sg_list; + __u32 num_sges; + __u32 qkey; + __u32 dest_qp; + __u32 ah_id; +}; + +struct zxdh_inline_rdma_send { + void *data; + __u32 len; + __u32 qkey; + __u32 dest_qp; + __u32 ah_id; +}; + +struct zxdh_post_rq_info { + __u64 wr_id; + zxdh_sgl sg_list; + __u32 num_sges; +}; + +struct zxdh_rdma_write { + zxdh_sgl lo_sg_list; + __u32 num_lo_sges; + struct zxdh_sge rem_addr; +}; + +struct zxdh_inline_rdma_write { + void *data; + __u32 len; + struct zxdh_sge rem_addr; +}; + +struct zxdh_rdma_read { + zxdh_sgl lo_sg_list; + __u32 num_lo_sges; + struct zxdh_sge rem_addr; +}; + +struct zxdh_bind_window { + zxdh_stag mr_stag; + __u64 bind_len; + void *va; + enum zxdh_addressing_type addressing_type; + __u8 ena_reads : 1; + __u8 ena_writes : 1; + zxdh_stag mw_stag; + __u8 mem_window_type_1 : 1; + __u8 host_page_size; + __u8 leaf_pbl_size; + __u16 root_leaf_offset; + __u64 mw_pa_pble_index; +}; + +struct zxdh_inv_local_stag { + zxdh_stag target_stag; +}; + +struct zxdh_post_sq_info { + __u64 wr_id; + __u8 op_type; + __u8 l4len; + __u8 signaled : 1; + __u8 read_fence : 1; + __u8 local_fence : 1; + __u8 inline_data : 1; + __u8 imm_data_valid : 1; + __u8 push_wqe : 1; + __u8 report_rtt : 1; + __u8 udp_hdr : 1; + __u8 defer_flag : 1; + __u8 solicited : 1; + __u32 imm_data; + __u32 stag_to_inv; + union { + struct zxdh_post_send send; + struct zxdh_rdma_write rdma_write; + struct zxdh_rdma_read rdma_read; + struct zxdh_bind_window bind_window; + struct zxdh_inv_local_stag inv_local_stag; + struct zxdh_inline_rdma_write inline_rdma_write; + struct zxdh_inline_rdma_send inline_rdma_send; + } op; +}; + +struct zxdh_cq_poll_info { + __u64 wr_id; + zxdh_qp_handle qp_handle; + __u32 bytes_xfered; + __u32 tcp_seq_num_rtt; + __u32 qp_id; + __u32 ud_src_qpn; + __u32 imm_data; + zxdh_stag inv_stag; /* or L_R_Key */ + enum zxdh_cmpl_status comp_status; + __u16 major_err; + __u16 minor_err; + __u8 op_type; + __u8 stag_invalid_set : 1; /* or L_R_Key set */ + __u8 push_dropped : 1; + __u8 error : 1; + __u8 solicited_event : 1; + __u8 ipv4 : 1; + __u8 imm_valid : 1; +}; + +enum zxdh_status_code zxdh_inline_rdma_write(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq); +enum zxdh_status_code zxdh_rc_inline_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq); +enum zxdh_status_code zxdh_ud_inline_send(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq); +enum zxdh_status_code +zxdh_mw_bind(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); +enum zxdh_status_code zxdh_post_nop(struct zxdh_qp *qp, __u64 wr_id, + bool signaled, bool post_sq); +enum zxdh_status_code zxdh_post_receive(struct zxdh_qp *qp, + struct zxdh_post_rq_info *info); +void zxdh_qp_post_wr(struct zxdh_qp *qp); +void zxdh_qp_set_shadow_area(struct zxdh_qp *qp); +enum zxdh_status_code zxdh_rdma_read(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool inv_stag, bool post_sq); +enum zxdh_status_code zxdh_rdma_write(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq); +enum zxdh_status_code +zxdh_rc_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); +enum zxdh_status_code +zxdh_ud_send(struct zxdh_qp *qp, struct zxdh_post_sq_info *info, bool post_sq); +enum zxdh_status_code zxdh_stag_local_invalidate(struct zxdh_qp *qp, + struct zxdh_post_sq_info *info, + bool post_sq); + +struct zxdh_wqe_ops { + void (*iw_copy_inline_data)(__u8 *dest, __u8 *src, __u32 len, + __u8 polarity, bool imm_data_flag); + __u16 (*iw_inline_data_size_to_quanta)(__u32 data_size, + bool imm_data_flag); + void (*iw_set_fragment)(__le64 *wqe, __u32 offset, struct zxdh_sge *sge, + __u8 valid); + void (*iw_set_mw_bind_wqe)(__le64 *wqe, + struct zxdh_bind_window *op_info); +}; + +__le64 *get_current_cqe(struct zxdh_cq *cq); +enum zxdh_status_code zxdh_cq_poll_cmpl(struct zxdh_cq *cq, + struct zxdh_cq_poll_info *info); +void zxdh_cq_request_notification(struct zxdh_cq *cq, + enum zxdh_cmpl_notify cq_notify); +void zxdh_cq_resize(struct zxdh_cq *cq, void *cq_base, int size); +void zxdh_cq_set_resized_cnt(struct zxdh_cq *qp, __u16 cnt); +enum zxdh_status_code zxdh_cq_init(struct zxdh_cq *cq, + struct zxdh_cq_init_info *info); +enum zxdh_status_code zxdh_qp_init(struct zxdh_qp *qp, + struct zxdh_qp_init_info *info); +struct zxdh_sq_wr_trk_info { + __u64 wrid; + __u32 wr_len; + __u16 quanta; + __u8 reserved[2]; +}; + +struct zxdh_qp_sq_quanta { + __le64 elem[ZXDH_SQE_SIZE]; +}; + +struct zxdh_qp_rq_quanta { + __le64 elem[ZXDH_RQE_SIZE]; +}; + +struct zxdh_dev_attrs { + __u64 feature_flags; + __aligned_u64 sq_db_pa; + __aligned_u64 cq_db_pa; + __u32 max_hw_wq_frags; + __u32 max_hw_read_sges; + __u32 max_hw_inline; + __u32 max_hw_rq_quanta; + __u32 max_hw_srq_quanta; + __u32 max_hw_wq_quanta; + __u32 min_hw_cq_size; + __u32 max_hw_cq_size; + __u16 max_hw_sq_chunk; + __u32 max_hw_srq_wr; + __u8 db_addr_type; + __u8 chip_rev; + __u16 rdma_tool_flags; +}; + +struct zxdh_hw_attrs { + struct zxdh_dev_attrs dev_attrs; + __u64 max_hw_outbound_msg_size; + __u64 max_hw_inbound_msg_size; + __u64 max_mr_size; + __u32 min_hw_qp_id; + __u32 min_hw_aeq_size; + __u32 max_hw_aeq_size; + __u32 min_hw_ceq_size; + __u32 max_hw_ceq_size; + __u32 max_hw_device_pages; + __u32 max_hw_vf_fpm_id; + __u32 first_hw_vf_fpm_id; + __u32 max_hw_ird; + __u32 max_hw_ord; + __u32 max_hw_wqes; + __u32 max_hw_pds; + __u32 max_hw_ena_vf_count; + __u32 max_qp_wr; + __u32 max_pe_ready_count; + __u32 max_done_count; + __u32 max_sleep_count; + __u32 max_cqp_compl_wait_time_ms; + __u16 max_stat_inst; +}; + +struct zxdh_qp { + struct zxdh_qp_sq_quanta *sq_base; + struct zxdh_qp_rq_quanta *rq_base; + struct zxdh_dev_attrs *dev_attrs; + __u32 *wqe_alloc_db; + struct zxdh_sq_wr_trk_info *sq_wrtrk_array; + __u64 *rq_wrid_array; + __le64 *shadow_area; + __le32 *push_db; + __le64 *push_wqe; + struct zxdh_ring sq_ring; + struct zxdh_ring rq_ring; + struct zxdh_ring initial_ring; + __u32 qp_id; + __u32 qp_caps; + __u32 sq_size; + __u32 rq_size; + __u32 max_sq_frag_cnt; + __u32 max_rq_frag_cnt; + __u32 max_inline_data; + struct zxdh_wqe_ops wqe_ops; + __u16 conn_wqes; + __u8 qp_type; + __u8 swqe_polarity; + __u8 swqe_polarity_deferred; + __u8 rwqe_polarity; + __u8 rq_wqe_size; + __u8 rq_wqe_size_multiplier; + __u8 deferred_flag : 1; + __u8 push_mode : 1; /* whether the last post wqe was pushed */ + __u8 push_dropped : 1; + __u8 sq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */ + __u8 rq_flush_complete : 1; /* Indicates flush was seen and RQ was empty after the flush */ + __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */ + void *back_qp; + zxdh_sgl split_sg_list; + pthread_spinlock_t *lock; + __u16 rwqe_signature; + __u8 dbg_rq_flushed; + __u8 sq_flush_seen; + __u8 rq_flush_seen; + __u8 is_srq; + __u16 mtu; + __u32 next_psn; + __u32 cqe_last_ack_qsn; + __u32 qp_last_ack_qsn; + __u8 cqe_retry_cnt; + __u8 qp_reset_cnt; +}; + +struct zxdh_cq { + struct zxdh_cqe *cq_base; + __u32 *cqe_alloc_db; + __u32 *cq_ack_db; + __le64 *shadow_area; + __u32 cq_id; + __u32 cq_size; + __u32 cqe_rd_cnt; + struct zxdh_ring cq_ring; + __u8 polarity; + __u8 cqe_size; +}; + +struct zxdh_srq { + struct zxdh_srq_wqe *srq_base; + struct zxdh_dev_attrs *dev_attrs; + __le16 *srq_list_base; + __le64 *srq_db_base; + __u32 srq_id; + __u32 srq_size; + __u32 log2_srq_size; + __u32 srq_list_size; + struct zxdh_ring srq_ring; + struct zxdh_ring srq_list_ring; + __u8 srq_list_polarity; + __u64 *srq_wrid_array; + __u8 srq_wqe_size; + __u8 srq_wqe_size_multiplier; + __u32 srq_caps; + __u32 max_srq_frag_cnt; + __u32 srq_type; + pthread_spinlock_t *lock; + __u8 srq_flush_complete : 1; /* Indicates flush was seen and SQ was empty after the flush */ + __u8 destroy_pending : 1; /* Indicates the QP is being destroyed */ + __u8 srq_flush_seen; +}; + +struct zxdh_qp_init_info { + struct zxdh_qp_sq_quanta *sq; + struct zxdh_qp_rq_quanta *rq; + struct zxdh_dev_attrs *dev_attrs; + __u32 *wqe_alloc_db; + __le64 *shadow_area; + struct zxdh_sq_wr_trk_info *sq_wrtrk_array; + __u64 *rq_wrid_array; + __u32 qp_id; + __u32 qp_caps; + __u32 sq_size; + __u32 rq_size; + __u32 max_sq_frag_cnt; + __u32 max_rq_frag_cnt; + __u32 max_inline_data; + __u8 type; + int abi_ver; + bool legacy_mode; +}; + +struct zxdh_cq_init_info { + __u32 *cqe_alloc_db; + __u32 *cq_ack_db; + struct zxdh_cqe *cq_base; + __le64 *shadow_area; + __u32 cq_size; + __u32 cq_id; + __u8 cqe_size; +}; + +struct zxdh_srq_init_info { + struct zxdh_srq_wqe *srq_base; + struct zxdh_dev_attrs *dev_attrs; + __le16 *srq_list_base; + __le64 *srq_db_base; + __u64 *srq_wrid_array; + __u32 srq_id; + __u32 srq_caps; + __u32 srq_size; + __u32 log2_srq_size; + __u32 srq_list_size; + __u32 srq_db_size; + __u32 max_srq_frag_cnt; + __u32 srq_limit; +}; + + +struct zxdh_wqe_srq_next_sge { + __le16 next_wqe_index; + __le16 signature; + __u8 valid_sge_num; + __u8 rsvd[11]; +}; + +struct zxdh_srq_sge { + __le64 addr; + __le32 length; + __le32 lkey; +}; + +struct zxdh_srq_wqe { + __le64 elem[ZXDH_SRQE_SIZE]; +}; + +__le64 *zxdh_qp_get_next_send_wqe(struct zxdh_qp *qp, __u32 *wqe_idx, + __u16 quanta, __u32 total_size, + struct zxdh_post_sq_info *info); +__le64 *zxdh_qp_get_next_recv_wqe(struct zxdh_qp *qp, __u32 *wqe_idx); +void zxdh_clean_cq(void *q, struct zxdh_cq *cq); +enum zxdh_status_code zxdh_nop(struct zxdh_qp *qp, __u64 wr_id, bool signaled, + bool post_sq); +enum zxdh_status_code zxdh_fragcnt_to_wqesize_rq(__u32 frag_cnt, + __u16 *wqe_size); +void zxdh_get_sq_wqe_shift(__u32 sge, __u32 inline_data, __u8 *shift); + +void zxdh_get_rq_wqe_shift(__u32 sge, __u8 *shift); +enum zxdh_status_code zxdh_get_sqdepth(struct zxdh_dev_attrs *dev_attrs, + __u32 sq_size, __u8 shift, + __u32 *wqdepth); +enum zxdh_status_code zxdh_get_rqdepth(struct zxdh_dev_attrs *dev_attrs, + __u32 rq_size, __u8 shift, + __u32 *wqdepth); +int zxdh_qp_round_up(__u32 wqdepth); +int zxdh_cq_round_up(__u32 wqdepth); +void zxdh_qp_push_wqe(struct zxdh_qp *qp, __le64 *wqe, __u16 quanta, + __u32 wqe_idx, bool post_sq); +void zxdh_get_srq_wqe_shift(struct zxdh_dev_attrs *dev_attrs, __u32 sge, + __u8 *shift); +int zxdh_get_srqdepth(__u32 max_hw_srq_quanta, __u32 srq_size, __u8 shift, + __u32 *srqdepth); +__le64 *zxdh_get_srq_wqe(struct zxdh_srq *srq, int wqe_index); +__le16 *zxdh_get_srq_list_wqe(struct zxdh_srq *srq, __u16 *idx); + +enum zxdh_status_code zxdh_srq_init(struct zxdh_srq *srq, + struct zxdh_srq_init_info *info); +void zxdh_free_srq_wqe(struct zxdh_srq *srq, int wqe_index); +#endif /* __ZXDH_VERBS_H__ */ diff --git a/providers/zrdma/zxdh_zrdma.c b/providers/zrdma/zxdh_zrdma.c new file mode 100644 index 000000000..d4aefba71 --- /dev/null +++ b/providers/zrdma/zxdh_zrdma.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "zxdh_devids.h" +#include "zxdh_zrdma.h" +#include "zxdh_abi.h" +#include "private_verbs_cmd.h" + +#define ZXDH_HCA(v, d) VERBS_PCI_MATCH(v, d, NULL) +static const struct verbs_match_ent hca_table[] = { + VERBS_DRIVER_ID(RDMA_DRIVER_ZXDH), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_EVB, ZXDH_DEV_ID_ADAPTIVE_EVB_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312, ZXDH_DEV_ID_ADAPTIVE_E312_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310, ZXDH_DEV_ID_ADAPTIVE_E310_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310, ZXDH_DEV_ID_ADAPTIVE_E310_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_RDMA, ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_RDMA, ZXDH_DEV_ID_ADAPTIVE_E310_RDMA_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E316, ZXDH_DEV_ID_ADAPTIVE_E316_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E316, ZXDH_DEV_ID_ADAPTIVE_E316_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_X512, ZXDH_DEV_ID_ADAPTIVE_X512_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312_TY_CLOUD, ZXDH_DEV_ID_ADAPTIVE_E312_TY_CLOUD_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312_TY_CLOUD, ZXDH_DEV_ID_ADAPTIVE_E312_TY_CLOUD_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_TY_CLOUD, ZXDH_DEV_ID_ADAPTIVE_E310_TY_CLOUD_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E310_TY_CLOUD, ZXDH_DEV_ID_ADAPTIVE_E310_TY_CLOUD_VF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312S_D, ZXDH_DEV_ID_ADAPTIVE_E312S_D_PF), + ZXDH_HCA(PCI_VENDOR_ID_ZXDH_E312S_D, ZXDH_DEV_ID_ADAPTIVE_E312S_D_VF), + {} +}; + +/** + * zxdh_ufree_context - free context that was allocated + * @ibctx: context allocated ptr + */ +static void zxdh_ufree_context(struct ibv_context *ibctx) +{ + struct zxdh_uvcontext *iwvctx; + + iwvctx = container_of(ibctx, struct zxdh_uvcontext, ibv_ctx.context); + + zxdh_ufree_pd(&iwvctx->iwupd->ibv_pd); + zxdh_munmap(iwvctx->sq_db); + zxdh_munmap(iwvctx->cq_db); + verbs_uninit_context(&iwvctx->ibv_ctx); + free(iwvctx); +} + +static const struct verbs_context_ops zxdh_uctx_ops = { + .alloc_mw = zxdh_ualloc_mw, + .alloc_pd = zxdh_ualloc_pd, + .attach_mcast = zxdh_uattach_mcast, + .bind_mw = zxdh_ubind_mw, + .cq_event = zxdh_cq_event, + .create_ah = zxdh_ucreate_ah, + .create_cq = zxdh_ucreate_cq, + .create_cq_ex = zxdh_ucreate_cq_ex, + .create_qp = zxdh_ucreate_qp, + .create_qp_ex = zxdh_ucreate_qp_ex, + .create_srq = zxdh_ucreate_srq, + .dealloc_mw = zxdh_udealloc_mw, + .dealloc_pd = zxdh_ufree_pd, + .dereg_mr = zxdh_udereg_mr, + .destroy_ah = zxdh_udestroy_ah, + .destroy_cq = zxdh_udestroy_cq, + .modify_cq = zxdh_umodify_cq, + .destroy_qp = zxdh_udestroy_qp, + .destroy_srq = zxdh_udestroy_srq, + .detach_mcast = zxdh_udetach_mcast, + .modify_qp = zxdh_umodify_qp, + .modify_srq = zxdh_umodify_srq, + .poll_cq = zxdh_upoll_cq, + .post_recv = zxdh_upost_recv, + .post_send = zxdh_upost_send, + .post_srq_recv = zxdh_upost_srq_recv, + .query_device_ex = zxdh_uquery_device_ex, + .query_port = zxdh_uquery_port, + .query_qp = zxdh_uquery_qp, + .query_srq = zxdh_uquery_srq, + .reg_mr = zxdh_ureg_mr, + .rereg_mr = zxdh_urereg_mr, + .req_notify_cq = zxdh_uarm_cq, + .resize_cq = zxdh_uresize_cq, + .free_context = zxdh_ufree_context, + .get_srq_num = zxdh_uget_srq_num, +}; + +/** + * zxdh_ualloc_context - allocate context for user app + * @ibdev: ib device created during zxdh_driver_init + * @cmd_fd: save fd for the device + * @private_data: device private data + * + * Returns callback routine table and calls driver for allocating + * context and getting back resource information to return as ibv_context. + */ +static struct verbs_context *zxdh_ualloc_context(struct ibv_device *ibdev, + int cmd_fd, void *private_data) +{ + struct ibv_pd *ibv_pd; + struct zxdh_uvcontext *iwvctx; + struct zxdh_get_context cmd; + struct zxdh_get_context_resp resp = {}; + __u64 sq_db_mmap_key, cq_db_mmap_key; + + iwvctx = verbs_init_and_alloc_context(ibdev, cmd_fd, iwvctx, ibv_ctx, + RDMA_DRIVER_ZXDH); + if (!iwvctx) + return NULL; + + zxdh_set_debug_mask(); + iwvctx->zxdh_write_imm_split_switch = zxdh_get_write_imm_split_switch(); + + cmd.userspace_ver = ZXDH_CONTEXT_VER_V1; + if (ibv_cmd_get_context(&iwvctx->ibv_ctx, + (struct ibv_get_context *)&cmd, sizeof(cmd), + NULL, &resp.ibv_resp, sizeof(resp))) + goto err_free; + + verbs_set_ops(&iwvctx->ibv_ctx, &zxdh_uctx_ops); + + iwvctx->dev_attrs.feature_flags = resp.feature_flags; + iwvctx->dev_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; + iwvctx->dev_attrs.max_hw_read_sges = resp.max_hw_read_sges; + iwvctx->dev_attrs.max_hw_inline = resp.max_hw_inline; + iwvctx->dev_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; + iwvctx->dev_attrs.max_hw_srq_quanta = resp.max_hw_srq_quanta; + iwvctx->dev_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; + iwvctx->dev_attrs.max_hw_srq_wr = resp.max_hw_srq_wr; + iwvctx->dev_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; + iwvctx->dev_attrs.max_hw_cq_size = resp.max_hw_cq_size; + iwvctx->dev_attrs.min_hw_cq_size = resp.min_hw_cq_size; + iwvctx->abi_ver = ZXDH_ABI_VER; + iwvctx->dev_attrs.chip_rev = resp.chip_rev; + iwvctx->dev_attrs.rdma_tool_flags = resp.rdma_tool_flags; + + sq_db_mmap_key = resp.sq_db_mmap_key; + cq_db_mmap_key = resp.cq_db_mmap_key; + + iwvctx->dev_attrs.db_addr_type = resp.db_addr_type; + iwvctx->dev_attrs.sq_db_pa = resp.sq_db_pa; + iwvctx->dev_attrs.cq_db_pa = resp.cq_db_pa; + + if (iwvctx->dev_attrs.db_addr_type != ZXDH_DB_ADDR_BAR) + goto err_free; + + iwvctx->sq_db = zxdh_mmap(cmd_fd, sq_db_mmap_key); + if (iwvctx->sq_db == MAP_FAILED) + goto err_free; + + iwvctx->cq_db = zxdh_mmap(cmd_fd, cq_db_mmap_key); + if (iwvctx->cq_db == MAP_FAILED) { + zxdh_munmap(iwvctx->sq_db); + goto err_free; + } + ibv_pd = zxdh_ualloc_pd(&iwvctx->ibv_ctx.context); + if (!ibv_pd) { + zxdh_munmap(iwvctx->sq_db); + zxdh_munmap(iwvctx->cq_db); + goto err_free; + } + + ibv_pd->context = &iwvctx->ibv_ctx.context; + iwvctx->iwupd = container_of(ibv_pd, struct zxdh_upd, ibv_pd); + add_private_ops(iwvctx); + return &iwvctx->ibv_ctx; +err_free: + free(iwvctx); + return NULL; +} + +static void zxdh_uninit_device(struct verbs_device *verbs_device) +{ + struct zxdh_udevice *dev; + + dev = container_of(&verbs_device->device, struct zxdh_udevice, + ibv_dev.device); + free(dev); +} + +static struct verbs_device *zxdh_device_alloc(struct verbs_sysfs_dev *sysfs_dev) +{ + struct zxdh_udevice *dev; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return NULL; + + return &dev->ibv_dev; +} + +static const struct verbs_device_ops zxdh_udev_ops = { + .alloc_context = zxdh_ualloc_context, + .alloc_device = zxdh_device_alloc, + .match_max_abi_version = ZXDH_MAX_ABI_VERSION, + .match_min_abi_version = ZXDH_MIN_ABI_VERSION, + .match_table = hca_table, + .name = "zrdma", + .uninit_device = zxdh_uninit_device, +}; + +PROVIDER_DRIVER(zrdma, zxdh_udev_ops); diff --git a/providers/zrdma/zxdh_zrdma.h b/providers/zrdma/zxdh_zrdma.h new file mode 100644 index 000000000..46ca163b4 --- /dev/null +++ b/providers/zrdma/zxdh_zrdma.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2024 ZTE Corporation. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ZXDH_ZRDMA_H__ +#define __ZXDH_ZRDMA_H__ + +#include <inttypes.h> +#include <stddef.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <infiniband/driver.h> + +#include "zxdh_defs.h" +#include "zxdh_status.h" +#include "zxdh_verbs.h" + +#define ZXDH_BASE_PUSH_PAGE 1 +#define ZXDH_U_MINCQ_SIZE 4 +#define ZXDH_DB_SHADOW_AREA_SIZE 8 +#define ZXDH_DB_SQ_OFFSET 0x404 +#define ZXDH_DB_CQ_OFFSET 0x588 + +#define MIN_UDP_SPORT 1024 +#define MIN_QP_QPN 1 + +enum zxdh_supported_wc_flags { + ZXDH_CQ_SUPPORTED_WC_FLAGS = + IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM | + IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP | + IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL | + IBV_WC_EX_WITH_DLID_PATH_BITS | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP, +}; + +enum { + ZXDH_DBG_QP = 1 << 0, + ZXDH_DBG_CQ = 1 << 1, + ZXDH_DBG_SRQ = 1 << 2, +}; +extern uint32_t zxdh_debug_mask; +#define zxdh_dbg(mask, format, arg...) \ + do { \ + if (mask & zxdh_debug_mask) { \ + int tmp = errno; \ + fprintf(stdout, "%s:%d: " format, __func__, __LINE__, \ + ##arg); \ + errno = tmp; \ + } \ + } while (0) + + +struct zxdh_udevice { + struct verbs_device ibv_dev; +}; + +struct zxdh_uah { + struct ibv_ah ibv_ah; + uint32_t ah_id; + struct ibv_global_route grh; +}; + +struct zxdh_upd { + struct ibv_pd ibv_pd; + uint32_t pd_id; +}; + +struct zxdh_uvcontext { + struct verbs_context ibv_ctx; + struct zxdh_upd *iwupd; + struct zxdh_dev_attrs dev_attrs; + void *db; + void *sq_db; + void *cq_db; + int abi_ver; + bool legacy_mode; + uint8_t zxdh_write_imm_split_switch; + struct zxdh_uvcontext_ops *cxt_ops; +}; + +struct zxdh_uqp; + +struct zxdh_cq_buf { + struct list_node list; + struct zxdh_cq cq; + struct verbs_mr vmr; +}; + +struct zxdh_ucq { + struct verbs_cq verbs_cq; + struct verbs_mr vmr; + struct verbs_mr vmr_shadow_area; + pthread_spinlock_t lock; + size_t buf_size; + bool is_armed; + enum zxdh_cmpl_notify last_notify; + int comp_vector; + uint32_t report_rtt; + struct zxdh_uqp *uqp; + struct zxdh_cq cq; + struct list_head resize_list; + /* for extended CQ completion fields */ + struct zxdh_cq_poll_info cur_cqe; + bool resize_enable; +}; + +struct zxdh_usrq { + struct ibv_srq ibv_srq; + struct verbs_mr vmr; + struct verbs_mr list_vmr; + struct verbs_mr db_vmr; + size_t total_buf_size; + size_t buf_size; + size_t list_buf_size; + size_t db_buf_size; + size_t srq_size; + size_t srq_list_size; + uint32_t srq_id; + uint32_t max_wr; + uint32_t max_sge; + uint32_t srq_limit; + pthread_spinlock_t lock; + uint32_t wq_size; + struct ibv_recv_wr *pend_rx_wr; + struct zxdh_srq srq; +}; + +struct zxdh_uqp { + struct verbs_qp vqp; + struct zxdh_ucq *send_cq; + struct zxdh_ucq *recv_cq; + struct zxdh_usrq *srq; + struct verbs_mr vmr; + size_t buf_size; + uint32_t zxdh_drv_opt; + pthread_spinlock_t lock; + uint16_t sq_sig_all; + uint16_t qperr; + uint16_t rsvd; + uint32_t pending_rcvs; + uint32_t wq_size; + struct ibv_recv_wr *pend_rx_wr; + struct zxdh_qp qp; + enum ibv_qp_type qp_type; + struct zxdh_sge *recv_sges; + uint8_t is_srq; + uint8_t inline_data[ZXDH_MAX_INLINE_DATA_SIZE]; +}; + +struct zxdh_umr { + struct verbs_mr vmr; + uint32_t acc_flags; + uint8_t leaf_pbl_size; + uint8_t host_page_size; + uint64_t mr_pa_pble_index; +}; + +/* zxdh_verbs.c */ +int zxdh_uquery_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +int zxdh_uquery_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); +struct ibv_pd *zxdh_ualloc_pd(struct ibv_context *context); +int zxdh_ufree_pd(struct ibv_pd *pd); +struct ibv_mr *zxdh_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int access); +int zxdh_udereg_mr(struct verbs_mr *vmr); + +int zxdh_urereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, + void *addr, size_t length, int access); + +struct ibv_mw *zxdh_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type); +int zxdh_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, + struct ibv_mw_bind *mw_bind); +int zxdh_udealloc_mw(struct ibv_mw *mw); +struct ibv_cq *zxdh_ucreate_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); +struct ibv_cq_ex *zxdh_ucreate_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *attr_ex); +void zxdh_ibvcq_ex_fill_priv_funcs(struct zxdh_ucq *iwucq, + struct ibv_cq_init_attr_ex *attr_ex); +int zxdh_uresize_cq(struct ibv_cq *cq, int cqe); +int zxdh_udestroy_cq(struct ibv_cq *cq); +int zxdh_umodify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); +int zxdh_upoll_cq(struct ibv_cq *cq, int entries, struct ibv_wc *entry); +int zxdh_uarm_cq(struct ibv_cq *cq, int solicited); +void zxdh_cq_event(struct ibv_cq *cq); +struct ibv_qp *zxdh_ucreate_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr); +struct ibv_qp *zxdh_ucreate_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr); +int zxdh_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, + struct ibv_qp_init_attr *init_attr); +int zxdh_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); +int zxdh_udestroy_qp(struct ibv_qp *qp); +int zxdh_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, + struct ibv_send_wr **bad_wr); +int zxdh_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, + struct ibv_recv_wr **bad_wr); +struct ibv_srq *zxdh_ucreate_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr); +int zxdh_udestroy_srq(struct ibv_srq *srq); +int zxdh_umodify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask); +int zxdh_uquery_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); +int zxdh_upost_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, + struct ibv_recv_wr **bad_recv_wr); +int zxdh_uget_srq_num(struct ibv_srq *srq, uint32_t *srq_num); +struct ibv_ah *zxdh_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr); +int zxdh_udestroy_ah(struct ibv_ah *ibah); +int zxdh_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid); +int zxdh_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, + uint16_t lid); +void zxdh_async_event(struct ibv_context *context, + struct ibv_async_event *event); +void zxdh_set_hw_attrs(struct zxdh_hw_attrs *attrs); +void *zxdh_mmap(int fd, off_t offset); +void zxdh_munmap(void *map); +void zxdh_set_debug_mask(void); +int zxdh_get_write_imm_split_switch(void); +#endif /* __ZXDH_ZRDMA_H__ */ diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec index 78549f2eb..f5501947e 100644 --- a/redhat/rdma-core.spec +++ b/redhat/rdma-core.spec @@ -176,6 +176,9 @@ Provides: libocrdma = %{version}-%{release} Obsoletes: libocrdma < %{version}-%{release} Provides: librxe = %{version}-%{release} Obsoletes: librxe < %{version}-%{release} +Provides: libzrdma = %{version}-%{release} +Obsoletes: libzrdma < %{version}-%{release} + %description -n libibverbs libibverbs is a library that allows userspace processes to use RDMA @@ -202,6 +205,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - librxe: A software implementation of the RoCE protocol - libsiw: A software implementation of the iWarp protocol - libvmw_pvrdma: VMware paravirtual RDMA device +- libzrdma: ZTE NX series RDMA device %package -n libibverbs-utils Summary: Examples for the libibverbs library @@ -597,6 +601,7 @@ fi %{_libdir}/libmana.so.* %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* +%{_libdir}/libzrdma.so.* %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver %doc %{_docdir}/%{name}/libibverbs.md diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec index 280db5ac3..876cd129e 100644 --- a/suse/rdma-core.spec +++ b/suse/rdma-core.spec @@ -36,6 +36,7 @@ Group: Productivity/Networking/Other %define efa_so_major 1 %define hns_so_major 1 +%define zrdma_so_major 1 %define verbs_so_major 1 %define rdmacm_so_major 1 %define umad_so_major 3 @@ -47,6 +48,7 @@ Group: Productivity/Networking/Other %define efa_lname libefa%{efa_so_major} %define hns_lname libhns%{hns_so_major} +%define zrdma_lname libzrdma%{zrdma_so_major} %define verbs_lname libibverbs%{verbs_so_major} %define rdmacm_lname librdmacm%{rdmacm_so_major} %define umad_lname libibumad%{umad_so_major} @@ -162,6 +164,7 @@ Requires: %{verbs_lname} = %{version}-%{release} %if 0%{?dma_coherent} Requires: %{efa_lname} = %{version}-%{release} Requires: %{hns_lname} = %{version}-%{release} +Requires: %{zrdma_lname} = %{version}-%{release} Requires: %{mana_lname} = %{version}-%{release} Requires: %{mlx4_lname} = %{version}-%{release} Requires: %{mlx5_lname} = %{version}-%{release} @@ -204,6 +207,7 @@ Obsoletes: libcxgb4-rdmav2 < %{version}-%{release} Obsoletes: libefa-rdmav2 < %{version}-%{release} Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release} Obsoletes: libhns-rdmav2 < %{version}-%{release} +Obsoletes: libzrdma-rdmav2 < %{version}-%{release} Obsoletes: libipathverbs-rdmav2 < %{version}-%{release} Obsoletes: libmana-rdmav2 < %{version}-%{release} Obsoletes: libmlx4-rdmav2 < %{version}-%{release} @@ -214,6 +218,7 @@ Obsoletes: librxe-rdmav2 < %{version}-%{release} %if 0%{?dma_coherent} Requires: %{efa_lname} = %{version}-%{release} Requires: %{hns_lname} = %{version}-%{release} +Requires: %{zrdma_lname} = %{version}-%{release} Requires: %{mana_lname} = %{version}-%{release} Requires: %{mlx4_lname} = %{version}-%{release} Requires: %{mlx5_lname} = %{version}-%{release} @@ -245,6 +250,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - librxe: A software implementation of the RoCE protocol - libsiw: A software implementation of the iWarp protocol - libvmw_pvrdma: VMware paravirtual RDMA device +- libzrdma: ZTE NX series RDMA device %package -n %verbs_lname Summary: Ibverbs runtime library @@ -268,10 +274,18 @@ Group: System/Libraries %description -n %hns_lname This package contains the hns runtime library. +%package -n %zrdma_lname +Summary: ZRDMA runtime library +Group: System/Libraries + +%description -n %zrdma_lname +This package contains the zrdma runtime library. + %package -n %mana_lname Summary: MANA runtime library Group: System/Libraries + %description -n %mana_lname This package contains the mana runtime library. @@ -525,6 +539,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh %post -n %hns_lname -p /sbin/ldconfig %postun -n %hns_lname -p /sbin/ldconfig +%post -n %zrdma_lname -p /sbin/ldconfig +%postun -n %zrdma_lname -p /sbin/ldconfig + %post -n %mana_lname -p /sbin/ldconfig %postun -n %mana_lname -p /sbin/ldconfig @@ -725,6 +742,10 @@ done %defattr(-,root,root) %{_libdir}/libhns*.so.* +%files -n %zrdma_lname +%defattr(-,root,root) +%{_libdir}/libzrdma*.so.* + %files -n %mana_lname %{_libdir}/libmana*.so.*