Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -1036,7 +1036,14 @@ XLIO_PRINT_REPORT
Print a human readable report of resources usage at exit. The report is printed
during termination phase. Therefore, It can be missed if the process is killed
with the SIGKILL signal.
Default: 0 (Disabled)
Default value: auto

auto
Print report if anomaly is detected
on
Print report
off
Disabled

XLIO Monitoring & Performance Counters
=====================================
Expand Down
2 changes: 1 addition & 1 deletion src/core/dev/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ void xlio_allocator::print_hugepages_warning(size_t requested_size)
vlog_printf(VLOG_WARNING, "or switch to a different memory allocation type:\n");
vlog_printf(VLOG_WARNING, " %s=ANON\n", SYS_VAR_MEM_ALLOC_TYPE);

g_hugepage_mgr.print_report(true);
g_hugepage_mgr.print_report(VLOG_INFO, false, true);

vlog_printf(VLOG_WARNING, "************************************************************\n");
} else {
Expand Down
6 changes: 4 additions & 2 deletions src/core/dev/buffer_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ void buffer_pool::print_report(vlog_levels_t log_level /*=VLOG_DEBUG*/)
}

/* static */
void buffer_pool::print_full_report(vlog_levels_t log_level)
void buffer_pool::print_full_report(vlog_levels_t log_level, bool print_only_critical /*=false*/)
{
std::vector<buffer_pool *> pools = {g_buffer_pool_rx_rwqe, g_buffer_pool_rx_stride,
g_buffer_pool_tx, g_buffer_pool_zc};
Expand All @@ -209,7 +209,9 @@ void buffer_pool::print_full_report(vlog_levels_t log_level)
for (auto &pool : pools) {
if (pool != nullptr) {
is_error = is_error || pool->m_p_bpool_stat->n_buffer_pool_no_bufs;
pool->print_report(log_level);
if (!print_only_critical) {
pool->print_report(log_level);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/core/dev/buffer_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class buffer_pool {
void register_memory(ib_ctx_handler *p_ib_ctx_h);
void print_val_tbl();
void print_report(vlog_levels_t log_level = VLOG_DEBUG);
static void print_full_report(vlog_levels_t log_level);
static void print_full_report(vlog_levels_t log_level, bool print_only_critical = false);

uint32_t find_lkey_by_ib_ctx_thread_safe(ib_ctx_handler *p_ib_ctx_h);

Expand Down
15 changes: 11 additions & 4 deletions src/core/dev/cq_mgr_rx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "hw_queue_rx.h"
#include "ring_simple.h"

#include "core/dev/net_device_table_mgr.h"

#define MODULE_NAME "cq_mgr_rx"

#define cq_logpanic __log_info_panic
Expand Down Expand Up @@ -148,6 +150,8 @@ cq_mgr_rx::~cq_mgr_rx()
ENDIF_VERBS_FAILURE;
VALGRIND_MAKE_MEM_UNDEFINED(m_p_ibv_cq, sizeof(ibv_cq));

g_p_net_device_table_mgr->increase_closed_rings_rx_cq_drop_counter(
m_p_cq_stat->n_rx_hw_pkt_drops);
statistics_print();
xlio_stats_instance_remove_cq_block(m_p_cq_stat);

Expand All @@ -156,10 +160,13 @@ cq_mgr_rx::~cq_mgr_rx()

void cq_mgr_rx::statistics_print()
{
if (m_p_cq_stat->n_rx_pkt_drop || m_p_cq_stat->n_rx_sw_queue_len ||
m_p_cq_stat->n_rx_drained_at_once_max || m_p_cq_stat->n_buffer_pool_len) {
if (m_p_cq_stat->n_rx_sw_pkt_drops || m_p_cq_stat->n_rx_hw_pkt_drops ||
m_p_cq_stat->n_rx_sw_queue_len || m_p_cq_stat->n_rx_drained_at_once_max ||
m_p_cq_stat->n_buffer_pool_len) {
cq_logdbg_no_funcname("Packets dropped: %12llu",
(unsigned long long int)m_p_cq_stat->n_rx_pkt_drop);
(unsigned long long int)m_p_cq_stat->n_rx_sw_pkt_drops);
cq_logdbg_no_funcname("HW RX Packets dropped: %12llu",
(unsigned long long int)m_p_cq_stat->n_rx_hw_pkt_drops);
cq_logdbg_no_funcname("Drained max: %17u", m_p_cq_stat->n_rx_drained_at_once_max);
cq_logdbg_no_funcname("CQE errors: %18llu",
(unsigned long long int)m_p_cq_stat->n_rx_cqe_error);
Expand Down Expand Up @@ -360,7 +367,7 @@ bool cq_mgr_rx::compensate_qp_poll_success(mem_buf_desc_t *buff_cur)
m_debt -= buffers;
m_p_cq_stat->n_buffer_pool_len = m_rx_pool.size();
} else if (m_b_sysvar_cq_keep_qp_full || m_debt >= (int)m_hqrx_ptr->m_rx_num_wr) {
m_p_cq_stat->n_rx_pkt_drop++;
m_p_cq_stat->n_rx_sw_pkt_drops++;
m_hqrx_ptr->post_recv_buffer(buff_cur);
--m_debt;
return true;
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/cq_mgr_rx.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class cq_mgr_rx {
int reclaim_recv_single_buffer(mem_buf_desc_t *rx_reuse);

void get_cq_event(int count = 1) { xlio_ib_mlx5_get_cq_event(&m_mlx5_cq, count); };
uint64_t get_n_rx_hw_pkt_drops() { return m_p_cq_stat->n_rx_hw_pkt_drops; }

protected:
/**
Expand Down
8 changes: 5 additions & 3 deletions src/core/dev/cq_mgr_rx_regrq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ void cq_mgr_rx_regrq::cqe_to_mem_buff_desc(struct xlio_mlx5_cqe *cqe,
p_rx_wc_buf_desc->rx.tls_decrypted = (cqe->pkt_info >> 3) & 0x3;
#endif /* DEFINED_UTLS */
p_rx_wc_buf_desc->rx.timestamps.hw_raw = ntohll(cqe->timestamp);
p_rx_wc_buf_desc->rx.flow_tag_id = ntohl((uint32_t)(cqe->sop_drop_qpn));
uint32_t sop_rxdrop_qpn_flowtag_h_byte = ntohl(cqe->sop_rxdrop_qpn_flowtag);
p_rx_wc_buf_desc->rx.flow_tag_id = sop_rxdrop_qpn_flowtag_h_byte & 0x00FFFFFF;
m_p_cq_stat->n_rx_hw_pkt_drops += sop_rxdrop_qpn_flowtag_h_byte >> 24;
p_rx_wc_buf_desc->rx.is_sw_csum_need =
!(m_b_is_rx_hw_csum_on && (cqe->hds_ip_ext & MLX5_CQE_L4_OK) &&
(cqe->hds_ip_ext & MLX5_CQE_L3_OK));
Expand Down Expand Up @@ -204,7 +206,7 @@ int cq_mgr_rx_regrq::drain_and_proccess(uintptr_t *p_recycle_buffers_last_wr_id

if (cqe_process_rx(buff, status)) {
if (p_recycle_buffers_last_wr_id) {
m_p_cq_stat->n_rx_pkt_drop++;
m_p_cq_stat->n_rx_sw_pkt_drops++;
reclaim_recv_buffer_helper(buff);
} else {
bool procces_now = is_eth_tcp_frame(buff);
Expand Down Expand Up @@ -294,7 +296,7 @@ bool cq_mgr_rx_regrq::poll_and_process_element_rx(uint64_t *p_cq_poll_sn, void *
process_recv_buffer(buff, pv_fd_ready_array);
}
} else {
m_p_cq_stat->n_rx_pkt_drop++;
m_p_cq_stat->n_rx_sw_pkt_drops++;
if (++m_debt >= (int)m_n_sysvar_rx_num_wr_to_post_recv) {
compensate_qp_poll_failed();
}
Expand Down
6 changes: 4 additions & 2 deletions src/core/dev/cq_mgr_rx_strq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ inline bool cq_mgr_rx_strq::strq_cqe_to_mem_buff_desc(struct xlio_mlx5_cqe *cqe,
_current_wqe_consumed_bytes += _hot_buffer_stride->sz_buffer;

_hot_buffer_stride->rx.timestamps.hw_raw = ntohll(cqe->timestamp);
_hot_buffer_stride->rx.flow_tag_id = ntohl((uint32_t)(cqe->sop_drop_qpn));
uint32_t sop_rxdrop_qpn_flowtag_h_byte = ntohl(cqe->sop_rxdrop_qpn_flowtag);
_hot_buffer_stride->rx.flow_tag_id = sop_rxdrop_qpn_flowtag_h_byte & 0x00FFFFFF;
m_p_cq_stat->n_rx_hw_pkt_drops += sop_rxdrop_qpn_flowtag_h_byte >> 24;
_hot_buffer_stride->rx.is_sw_csum_need =
!(m_b_is_rx_hw_csum_on && (cqe->hds_ip_ext & MLX5_CQE_L4_OK) &&
(cqe->hds_ip_ext & MLX5_CQE_L3_OK));
Expand Down Expand Up @@ -320,7 +322,7 @@ int cq_mgr_rx_strq::drain_and_proccess_helper(mem_buf_desc_t *buff, mem_buf_desc
++ret_total;
if (process_strq_cq_element_rx(buff, status)) {
if (p_recycle_buffers_last_wr_id) {
m_p_cq_stat->n_rx_pkt_drop++;
m_p_cq_stat->n_rx_sw_pkt_drops++;
reclaim_recv_buffer_helper(buff);
} else {
bool procces_now = is_eth_tcp_frame(buff);
Expand Down
26 changes: 25 additions & 1 deletion src/core/dev/net_device_table_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ net_device_table_mgr::net_device_table_mgr()
m_num_devices = 0;
m_global_ring_epfd = 0;
m_max_mtu = 0;

m_closed_rings_rx_cq_drop_counter = 0;
ndtm_logdbg("");

m_global_ring_epfd = SYSCALL(epoll_create, 48);
Expand Down Expand Up @@ -457,6 +457,30 @@ int net_device_table_mgr::global_ring_epfd_get()
return m_global_ring_epfd;
}

uint64_t net_device_table_mgr::global_get_rx_drop_counter()
{
// coverity[missing_lock:FALSE] /*Turn off coverity missing_lock check*/
uint64_t accumulator = this->m_closed_rings_rx_cq_drop_counter;
std::for_each(g_p_net_device_table_mgr->m_net_device_map_index.begin(),
g_p_net_device_table_mgr->m_net_device_map_index.end(),
[&accumulator](const auto &net_dev_map_iter) {
accumulator += net_dev_map_iter.second->get_accumulative_rx_cq_drop_counter();
});
return accumulator;
}

void net_device_table_mgr::print_report(vlog_levels_t log_level,
bool print_only_critical /*=false*/)
{
uint64_t accumulator = global_get_rx_drop_counter();
if (print_only_critical && !accumulator) {
return;
}
vlog_printf(log_level, "*********************************\n");
vlog_printf(log_level, "Total HW RX drop counter: %lu\n", accumulator);
vlog_printf(log_level, "*********************************\n");
}

void net_device_table_mgr::global_ring_wait_for_notification_and_process_element(
uint64_t *p_poll_sn, void *pv_fd_ready_array /*=NULL*/)
{
Expand Down
16 changes: 16 additions & 0 deletions src/core/dev/net_device_table_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ class net_device_table_mgr : public cache_table_mgr<int, net_device_val *>, publ

int global_ring_epfd_get();

/*
* This will get accumlated RX out of buffer drops
* for all net devices.
*/
uint64_t global_get_rx_drop_counter(void);
void print_report(vlog_levels_t log_level, bool print_only_critical = false);

void handle_timer_expired(void *user_data);

uint32_t get_max_mtu() const { return m_max_mtu; }
Expand All @@ -80,6 +87,13 @@ class net_device_table_mgr : public cache_table_mgr<int, net_device_val *>, publ

void get_net_devices(local_dev_vector &vec);

void increase_closed_rings_rx_cq_drop_counter(uint64_t count)
{
m_closed_rings_rx_cq_drop_counter_lock.lock();
m_closed_rings_rx_cq_drop_counter += count;
m_closed_rings_rx_cq_drop_counter_lock.unlock();
}

private:
void del_link_event(const netlink_link_info *info);
void new_link_event(const netlink_link_info *info);
Expand All @@ -98,6 +112,8 @@ class net_device_table_mgr : public cache_table_mgr<int, net_device_val *>, publ
int m_global_ring_pipe_fds[2];

uint32_t m_max_mtu;
lock_mutex m_closed_rings_rx_cq_drop_counter_lock;
uint64_t m_closed_rings_rx_cq_drop_counter;
};

extern net_device_table_mgr *g_p_net_device_table_mgr;
Expand Down
10 changes: 9 additions & 1 deletion src/core/dev/net_device_val.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,15 @@ bool net_device_val::update_active_slaves()
}
return 0;
}

uint64_t net_device_val::get_accumulative_rx_cq_drop_counter()
{
uint64_t accumaltor = 0;
std::for_each(m_h_ring_map.begin(), m_h_ring_map.end(),
[&accumaltor](const auto &m_h_ring_map_iter) {
accumaltor += m_h_ring_map_iter.second.first->get_rx_cq_out_of_buffer_drop();
});
return accumaltor;
};
void net_device_val::update_netvsc_slaves(int if_index, int if_flags)
{
slave_data_t *s = nullptr;
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/net_device_val.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ class net_device_val {
void register_to_ibverbs_events(event_handler_ibverbs *handler);
void unregister_to_ibverbs_events(event_handler_ibverbs *handler);
uint32_t get_priority_by_tc_class(uint32_t tc_class);
uint64_t get_accumulative_rx_cq_drop_counter();

protected:
void set_slave_array();
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/ring.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class ring {
virtual ~ring();

virtual void print_val();
virtual uint64_t get_rx_cq_out_of_buffer_drop() = 0;

virtual bool attach_flow(flow_tuple &flow_spec_5t, sockinfo *sink, bool force_5t = false) = 0;
virtual bool detach_flow(flow_tuple &flow_spec_5t, sockinfo *sink) = 0;
Expand Down
8 changes: 8 additions & 0 deletions src/core/dev/ring_bond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,14 @@ int ring_bond::socketxtreme_poll(struct xlio_socketxtreme_completion_t *, unsign
{
return 0;
}
uint64_t ring_bond::get_rx_cq_out_of_buffer_drop()
{
uint64_t accumulator = 0;
for (uint32_t i = 0; i < m_recv_rings.size(); i++) {
accumulator += m_recv_rings[i]->get_rx_cq_out_of_buffer_drop();
}
return accumulator;
}

void ring_bond::slave_destroy(int if_index)
{
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/ring_bond.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class ring_bond : public ring {
{
m_xmit_rings[id]->reset_inflight_zc_buffers_ctx(id, ctx);
}
virtual uint64_t get_rx_cq_out_of_buffer_drop();

protected:
void update_cap(ring_slave *slave = nullptr);
Expand Down
5 changes: 5 additions & 0 deletions src/core/dev/ring_simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,11 @@ void ring_simple::modify_cq_moderation(uint32_t period, uint32_t count)
priv_ibv_modify_cq_moderation(m_p_cq_mgr_rx->get_ibv_cq_hndl(), period, count);
}

uint64_t ring_simple::get_rx_cq_out_of_buffer_drop()
{
return m_p_cq_mgr_rx->get_n_rx_hw_pkt_drops();
}

void ring_simple::adapt_cq_moderation()
{
if (m_lock_ring_rx.trylock()) {
Expand Down
2 changes: 2 additions & 0 deletions src/core/dev/ring_simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class ring_simple : public ring_slave {
m_p_ring_stat->simple.n_tx_tso_byte_count += bytes;
}

virtual uint64_t get_rx_cq_out_of_buffer_drop() override;

#ifdef DEFINED_UTLS
bool tls_tx_supported(void) override { return m_tls.tls_tx; }
bool tls_rx_supported(void) override { return m_tls.tls_rx; }
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/ring_tap.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class ring_tap : public ring_slave {
NOT_IN_USE(id);
return 0;
}
virtual uint64_t get_rx_cq_out_of_buffer_drop() { return 0; }
virtual bool is_tso(void) { return false; }

inline void set_tap_data_available() { m_tap_data_available = true; }
Expand Down
2 changes: 1 addition & 1 deletion src/core/ib/mlx5/ib_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ typedef struct xlio_mlx5_cqe {
uint8_t rsvd4[4];
__be32 byte_cnt;
__be64 timestamp;
__be32 sop_drop_qpn;
__be32 sop_rxdrop_qpn_flowtag;
__be16 wqe_counter;
uint8_t rsvd5;
uint8_t op_own;
Expand Down
13 changes: 8 additions & 5 deletions src/core/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ static int free_libxlio_resources()

g_b_exit = true;

if (safe_mce_sys().print_report) {
buffer_pool::print_full_report(VLOG_INFO);
g_hugepage_mgr.print_report();
if (safe_mce_sys().print_report != option_3::OFF) {
bool print_only_critical = (safe_mce_sys().print_report == option_3::AUTO);
buffer_pool::print_full_report(VLOG_INFO, print_only_critical);
g_hugepage_mgr.print_report(VLOG_INFO, print_only_critical);
g_p_net_device_table_mgr->print_report(VLOG_INFO, print_only_critical);
}

// Destroy polling groups before fd_collection to clear XLIO sockets from the fd_collection
Expand Down Expand Up @@ -471,8 +473,9 @@ void print_xlio_global_settings()
VLOG_PARAM_STRING("SegFault Backtrace", safe_mce_sys().handle_segfault,
MCE_DEFAULT_HANDLE_SIGFAULT, SYS_VAR_HANDLE_SIGSEGV,
safe_mce_sys().handle_segfault ? "Enabled " : "Disabled");
VLOG_PARAM_STRING("Print a report", safe_mce_sys().print_report, MCE_DEFAULT_PRINT_REPORT,
SYS_VAR_PRINT_REPORT, safe_mce_sys().print_report ? "Enabled " : "Disabled");
VLOG_PARAM_STRING("Print a report", option_3::to_str(safe_mce_sys().print_report),
option_3::to_str(MCE_DEFAULT_PRINT_REPORT), SYS_VAR_PRINT_REPORT,
option_3::to_str(safe_mce_sys().print_report));
VLOG_PARAM_STRING("Quick start", safe_mce_sys().quick_start, MCE_DEFAULT_QUICK_START,
SYS_VAR_QUICK_START, safe_mce_sys().quick_start ? "Enabled " : "Disabled");

Expand Down
Loading