Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions contrib/jenkins_tests/gtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,18 @@ rc=$(($rc+$?))

# XLIO Ultra API

#IPV4
# IPV4
eval "${sudo_cmd} $timeout_exe env GTEST_TAP=2 LD_PRELOAD=$gtest_lib $gtest_app $gtest_opt --gtest_filter=ultra_api* --gtest_output=xml:${WORKSPACE}/${prefix}/test-xlio_ultra_api.xml"
rc=$(($rc+$?))

#IPV6
# IPV6
eval "${sudo_cmd} $timeout_exe env GTEST_TAP=2 LD_PRELOAD=$gtest_lib $gtest_app $gtest_opt_ipv6 --gtest_filter=ultra_api* --gtest_output=xml:${WORKSPACE}/${prefix}/test-xlio_ultra_api-ipv6.xml"
rc=$(($rc+$?))

# Full SQ completion test - Needs XLIO_TCP_CC_ALGO=2 XLIO_TCP_NODELAY=1 to fill SQ deterministically without handling initially small congestion window and delayed packets.
eval "${sudo_cmd} $timeout_exe env GTEST_TAP=2 LD_PRELOAD=$gtest_lib XLIO_TCP_CC_ALGO=2 XLIO_TCP_NODELAY=1 $gtest_app $gtest_opt --gtest_filter=ultra_api_socket_send_receive_full_sq* --gtest_output=xml:${WORKSPACE}/${prefix}/test-xlio_ultra_api_full_sq_completion.xml"
rc=$(($rc+$?))

# Worker Threads Mode tests

# Worker Threads Mode test filter
Expand Down
6 changes: 4 additions & 2 deletions src/core/dev/cq_mgr_tx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,11 @@ void cq_mgr_tx::handle_sq_wqe_prop(unsigned index)

prev = p;
p = p->next;
} while (p && m_hqtx_ptr->is_sq_wqe_prop_valid(p, prev));
} while (prev != m_hqtx_ptr->m_last_sq_wqe_prop_to_complete);

m_p_ring->return_tx_pool_to_global_pool();
m_hqtx_ptr->credits_return(credits);
m_hqtx_ptr->m_sq_wqe_prop_last_signalled = index;
m_hqtx_ptr->m_last_sq_wqe_prop_to_complete =
&m_hqtx_ptr->m_sq_wqe_idx_to_prop[(index + m_hqtx_ptr->m_sq_wqe_idx_to_prop[index].wqebbs) %
m_hqtx_ptr->m_tx_num_wr];
}
192 changes: 67 additions & 125 deletions src/core/dev/hw_queue_tx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ void hw_queue_tx::init_queue()
hwqtx_logerr("Failed allocating m_sq_wqe_idx_to_prop (errno=%d %m)", errno);
return;
}
m_sq_wqe_prop_last_signalled = m_tx_num_wr - 1;
m_last_sq_wqe_prop_to_complete = m_sq_wqe_idx_to_prop;
m_sq_wqe_prop_last = nullptr;
}

Expand All @@ -465,21 +465,14 @@ void hw_queue_tx::init_device_memory()
}
}

void hw_queue_tx::update_next_wqe_hot()
{
// Preparing pointer to the next WQE after a doorbell
m_sq_wqe_hot = &(*m_sq_wqes)[m_sq_wqe_counter & (m_tx_num_wr - 1)];
m_sq_wqe_hot_index = m_sq_wqe_counter & (m_tx_num_wr - 1);
}

cq_mgr_tx *hw_queue_tx::init_tx_cq_mgr()
{
m_tx_num_wr = align32pow2(m_tx_num_wr);
return new cq_mgr_tx(m_p_ring, m_p_ib_ctx_handler, m_tx_num_wr,
m_p_ring->get_tx_comp_event_channel());
}

inline void hw_queue_tx::ring_doorbell(int num_wqebb, bool skip_comp /*=false*/)
inline void hw_queue_tx::ring_doorbell(uint8_t num_wqebb, bool skip_comp /*=false*/)
{
uint64_t *dst = (uint64_t *)m_mlx5_qp.bf.reg;
uint64_t *src = reinterpret_cast<uint64_t *>(m_sq_wqe_hot);
Expand Down Expand Up @@ -537,7 +530,7 @@ inline int hw_queue_tx::fill_inl_segment(sg_array &sga, uint8_t *cur_seg, uint8_
}

//! Fill WQE dynamically, based on amount of free WQEBB in SQ
inline int hw_queue_tx::fill_wqe(xlio_ibv_send_wr *pswr)
inline uint8_t hw_queue_tx::fill_wqe(xlio_ibv_send_wr *pswr)
{
// control segment is mostly filled by preset after previous packet
// we always inline ETH header
Expand Down Expand Up @@ -587,14 +580,9 @@ inline int hw_queue_tx::fill_wqe(xlio_ibv_send_wr *pswr)
sizeof(struct mlx5_wqe_eth_seg)) = htonl(0x80000000 | inline_len);
rest_space = align_to_octoword_up(inline_len + 4); // align to OCTOWORDs
wqe_size += rest_space / OCTOWORD;
// assert((data_len-inline_len)==0);
// configuring control
m_sq_wqe_hot->ctrl.data[1] = htonl((m_mlx5_qp.qpn << 8) | wqe_size);
rest_space = align_to_WQEBB_up(wqe_size) / 4;
hwqtx_logfunc("data_len: %d inline_len: %d wqe_size: %d wqebbs: %d",
data_len - inline_len, inline_len, wqe_size, rest_space);
ring_doorbell(rest_space);
return rest_space;

hwqtx_logfunc("data_len: %d inline_len: %d wqe_size: %d", data_len - inline_len,
inline_len, wqe_size * OCTOWORD);
} else {
// wrap around case, first filling till the end of m_sq_wqes
int wrap_up_size = max_inline_len - rest_space;
Expand All @@ -608,55 +596,47 @@ inline int hw_queue_tx::fill_wqe(xlio_ibv_send_wr *pswr)
data_len -= inline_len;
rest_space = align_to_octoword_up(inline_len + 4);
wqe_size += rest_space / OCTOWORD;
rest_space =
align_to_WQEBB_up(rest_space / OCTOWORD) / 4; // size of 1st chunk at the end

hwqtx_logfunc(
"END chunk data_addr: %p data_len: %d inline_len: %d wqe_size: %d wqebbs: %d",
data_addr, data_len, inline_len, wqe_size, rest_space);
hwqtx_logfunc("END chunk data_addr: %p data_len: %d inline_len: %d wqe_size: %d",
data_addr, data_len, inline_len, wqe_size * OCTOWORD);

// Wrap around
//
cur_seg = (uint8_t *)m_sq_wqes;
data_addr = sga.get_data(&wrap_up_size);

wrap_up_size = fill_inl_segment(sga, cur_seg, data_addr, data_len, wrap_up_size);
inline_len += wrap_up_size;
max_inline_len = align_to_octoword_up(wrap_up_size);
wqe_size += max_inline_len / OCTOWORD;
max_inline_len = align_to_WQEBB_up(max_inline_len / OCTOWORD) / 4;
// store inline data size

// Store inline data size
*(uint32_t *)((uint8_t *)m_sq_wqe_hot + sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_eth_seg)) = htonl(0x80000000 | inline_len);
hwqtx_logfunc("BEGIN_CHUNK data_addr: %p data_len: %d wqe_size: %d inline_len: %d "
"end_wqebbs: %d wqebbs: %d",
data_addr, data_len - wrap_up_size, wqe_size, inline_len + wrap_up_size,
rest_space, max_inline_len);
// assert((data_len-wrap_up_size)==0);
// configuring control
m_sq_wqe_hot->ctrl.data[1] = htonl((m_mlx5_qp.qpn << 8) | wqe_size);

dbg_dump_wqe((uint32_t *)m_sq_wqe_hot, rest_space * 4 * 16);
dbg_dump_wqe((uint32_t *)m_sq_wqes, max_inline_len * 4 * 16);

ring_doorbell(rest_space + max_inline_len);
return rest_space + max_inline_len;

hwqtx_logfunc("BEGIN_CHUNK data_addr: %p data_len: %d wqe_size: %d inline_len: %d",
data_addr, data_len - wrap_up_size, wqe_size * OCTOWORD,
inline_len + wrap_up_size);

dbg_dump_wqe(
(uint32_t *)m_sq_wqe_hot,
sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_eth_seg) + rest_space);
dbg_dump_wqe((uint32_t *)m_sq_wqes, max_inline_len);
}
} else if (xlio_send_wr_opcode(*pswr) == XLIO_IBV_WR_SEND) {
// Data is bigger than max to inline we inlined only ETH header + uint from IP (18
// bytes) the rest will be in data pointer segment adding data seg with pointer if there
// still data to transfer
wqe_size = fill_wqe_send(pswr);
} else {
if (xlio_send_wr_opcode(*pswr) == XLIO_IBV_WR_SEND) {
/* data is bigger than max to inline we inlined only ETH header + uint from IP (18
* bytes) the rest will be in data pointer segment adding data seg with pointer if there
* still data to transfer
*/
wqe_size = fill_wqe_send(pswr);
return wqe_size;
} else {
/* Support XLIO_IBV_WR_SEND_TSO operation
*/
wqe_size = fill_wqe_lso(pswr, data_len);
return wqe_size;
}
// Support XLIO_IBV_WR_SEND_TSO operation
wqe_size = fill_wqe_lso(pswr, data_len);
}
return 1;

m_sq_wqe_hot->ctrl.data[1] = htonl((m_mlx5_qp.qpn << 8) | wqe_size);

uint8_t wqebbs = static_cast<uint8_t>(align_to_WQEBB_up(wqe_size) / 4);

return wqebbs;
}

inline int hw_queue_tx::fill_wqe_send(xlio_ibv_send_wr *pswr)
Expand Down Expand Up @@ -694,11 +674,7 @@ inline int hw_queue_tx::fill_wqe_send(xlio_ibv_send_wr *pswr)
}
}

m_sq_wqe_hot->ctrl.data[1] = htonl((m_mlx5_qp.qpn << 8) | wqe_size);
int wqebbs = align_to_WQEBB_up(wqe_size) / 4;
ring_doorbell(wqebbs);

return wqebbs;
return wqe_size;
}

//! Filling wqe for LSO
Expand Down Expand Up @@ -769,25 +745,8 @@ inline int hw_queue_tx::fill_wqe_lso(xlio_ibv_send_wr *pswr, int data_len)
dpseg++;
wqe_size += sizeof(struct mlx5_wqe_data_seg) / OCTOWORD;
}
m_sq_wqe_hot->ctrl.data[1] = htonl((m_mlx5_qp.qpn << 8) | wqe_size);

int wqebbs = align_to_WQEBB_up(wqe_size) / 4;
ring_doorbell(wqebbs);
return wqebbs;
}

void hw_queue_tx::store_current_wqe_prop(mem_buf_desc_t *buf, unsigned credits, xlio_ti *ti)
{
m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index] = sq_wqe_prop {
.buf = buf,
.credits = credits,
.ti = ti,
.next = m_sq_wqe_prop_last,
};
m_sq_wqe_prop_last = &m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index];
if (ti) {
ti->get();
}
return wqe_size;
}

//! Send one RAW packet
Expand Down Expand Up @@ -820,20 +779,39 @@ void hw_queue_tx::send_to_wire(xlio_ibv_send_wr *p_send_wqe, xlio_wr_tx_packet_a
eseg->rsvd2 = 0;
eseg->cs_flags = (uint8_t)(attr & (XLIO_TX_PACKET_L3_CSUM | XLIO_TX_PACKET_L4_CSUM) & 0xff);

submit_wqe(reinterpret_cast<mem_buf_desc_t *>(p_send_wqe->wr_id), credits, fill_wqe(p_send_wqe),
tis, false);
}

inline void hw_queue_tx::submit_wqe(mem_buf_desc_t *buf, unsigned credits, uint8_t wqebbs,
xlio_ti *ti, bool skip_comp)
{
/* Store buffer descriptor */
store_current_wqe_prop(reinterpret_cast<mem_buf_desc_t *>(p_send_wqe->wr_id), credits, tis);
m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index] = sq_wqe_prop {
.buf = buf,
.credits = credits,
.wqebbs = wqebbs,
.ti = ti,
.next = m_sq_wqe_prop_last,
};

m_sq_wqe_prop_last = &m_sq_wqe_idx_to_prop[m_sq_wqe_hot_index];

/* Complete WQE */
int wqebbs = fill_wqe(p_send_wqe);
assert(wqebbs > 0 && (unsigned)wqebbs <= credits);
NOT_IN_USE(wqebbs);
if (ti) {
ti->get();
}

update_next_wqe_hot();
assert(wqebbs > 0 && wqebbs <= credits);
ring_doorbell(wqebbs, skip_comp);

// Preparing pointer to the next WQE after a doorbell
m_sq_wqe_hot = &(*m_sq_wqes)[m_sq_wqe_counter & (m_tx_num_wr - 1)];
m_sq_wqe_hot_index = m_sq_wqe_counter & (m_tx_num_wr - 1);

hwqtx_logfunc(
"m_sq_wqe_hot: %p m_sq_wqe_hot_index: %d wqe_counter: %d new_hot_index: %d wr_id: %llx",
"m_sq_wqe_hot: %p m_sq_wqe_hot_index: %d wqe_counter: %d new_hot_index: %d buf: %p",
m_sq_wqe_hot, m_sq_wqe_hot_index, m_sq_wqe_counter, (m_sq_wqe_counter & (m_tx_num_wr - 1)),
p_send_wqe->wr_id);
buf);
}

std::unique_ptr<xlio_tis> hw_queue_tx::create_tis(uint32_t flags)
Expand Down Expand Up @@ -1151,12 +1129,10 @@ inline void hw_queue_tx::tls_post_static_params_wqe(xlio_ti *ti, const struct xl
memset(tspseg, 0, sizeof(*tspseg));

tls_fill_static_params_wqe(tspseg, info, key_id, resync_tcp_sn);
store_current_wqe_prop(nullptr, SQ_CREDITS_UMR, ti);

ring_doorbell(TLS_SET_STATIC_PARAMS_WQEBBS, true);
dbg_dump_wqe((uint32_t *)m_sq_wqe_hot, sizeof(mlx5_set_tls_static_params_wqe));

update_next_wqe_hot();
submit_wqe(nullptr, SQ_CREDITS_UMR, TLS_SET_STATIC_PARAMS_WQEBBS, ti, true);
}

inline void hw_queue_tx::tls_fill_progress_params_wqe(
Expand Down Expand Up @@ -1195,12 +1171,10 @@ inline void hw_queue_tx::tls_post_progress_params_wqe(xlio_ti *ti, uint32_t tis_
(fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0) | (is_tx ? 0 : MLX5_WQE_CTRL_CQ_UPDATE);

tls_fill_progress_params_wqe(&wqe->params, tis_tir_number, next_record_tcp_sn);
store_current_wqe_prop(nullptr, SQ_CREDITS_SET_PSV, ti);

ring_doorbell(TLS_SET_PROGRESS_PARAMS_WQEBBS);
dbg_dump_wqe((uint32_t *)m_sq_wqe_hot, sizeof(mlx5_set_tls_progress_params_wqe));

update_next_wqe_hot();
submit_wqe(nullptr, SQ_CREDITS_SET_PSV, TLS_SET_PROGRESS_PARAMS_WQEBBS, ti, false);
}

inline void hw_queue_tx::tls_get_progress_params_wqe(xlio_ti *ti, uint32_t tirn, void *buf,
Expand All @@ -1226,11 +1200,7 @@ inline void hw_queue_tx::tls_get_progress_params_wqe(xlio_ti *ti, uint32_t tirn,
psv->psv_index[0] = htobe32(tirn);
psv->va = htobe64((uintptr_t)buf);

store_current_wqe_prop(nullptr, SQ_CREDITS_GET_PSV, ti);

ring_doorbell(TLS_GET_PROGRESS_WQEBBS);

update_next_wqe_hot();
submit_wqe(nullptr, SQ_CREDITS_GET_PSV, TLS_GET_PROGRESS_WQEBBS, ti, false);
}

void hw_queue_tx::tls_tx_post_dump_wqe(xlio_tis *tis, void *addr, uint32_t len, uint32_t lkey,
Expand Down Expand Up @@ -1290,11 +1260,7 @@ void hw_queue_tx::post_nop_fence(void)
cseg->qpn_ds = htobe32((m_mlx5_qp.qpn << MLX5_WQE_CTRL_QPN_SHIFT) | 0x01);
cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;

store_current_wqe_prop(nullptr, SQ_CREDITS_NOP, nullptr);

ring_doorbell(1);

update_next_wqe_hot();
submit_wqe(nullptr, SQ_CREDITS_NOP, XLIO_NOP_WQEBBS, nullptr, false);
}

void hw_queue_tx::post_dump_wqe(xlio_tis *tis, void *addr, uint32_t len, uint32_t lkey,
Expand All @@ -1317,11 +1283,7 @@ void hw_queue_tx::post_dump_wqe(xlio_tis *tis, void *addr, uint32_t len, uint32_
dseg->lkey = htobe32(lkey);
dseg->byte_count = htobe32(len);

store_current_wqe_prop(nullptr, SQ_CREDITS_DUMP, tis);

ring_doorbell(XLIO_DUMP_WQEBBS, true);

update_next_wqe_hot();
submit_wqe(nullptr, SQ_CREDITS_DUMP, XLIO_DUMP_WQEBBS, tis, true);
}

//! Handle releasing of Tx buffers
Expand Down Expand Up @@ -1384,26 +1346,6 @@ void hw_queue_tx::trigger_completion_for_all_sent_packets()
}
}

void hw_queue_tx::reset_inflight_zc_buffers_ctx(void *ctx)
{
sq_wqe_prop *p = m_sq_wqe_prop_last;
sq_wqe_prop *prev;
if (p) {
unsigned p_i = p - m_sq_wqe_idx_to_prop;
if (p_i == m_sq_wqe_prop_last_signalled) {
return;
}
do {
mem_buf_desc_t *desc = p->buf;
if (desc && desc->tx.zc.ctx == ctx) {
desc->tx.zc.ctx = nullptr;
}
prev = p;
p = p->next;
} while (p && is_sq_wqe_prop_valid(p, prev));
}
}

uint32_t hw_queue_tx::is_ratelimit_change(struct xlio_rate_limit_t &rate_limit)
{
uint32_t rl_changes = 0;
Expand Down
Loading