diff --git a/config/m4/verbs.m4 b/config/m4/verbs.m4 index 41567f16f..dea7e878a 100644 --- a/config/m4/verbs.m4 +++ b/config/m4/verbs.m4 @@ -128,7 +128,6 @@ CHECK_VERBS_ATTRIBUTE([IBV_QPT_RAW_PACKET], [infiniband/verbs.h]) CHECK_VERBS_ATTRIBUTE([IBV_WC_WITH_VLAN], [infiniband/verbs.h]) CHECK_VERBS_ATTRIBUTE([IBV_ACCESS_ALLOCATE_MR], [infiniband/verbs.h]) CHECK_VERBS_ATTRIBUTE([IBV_QP_CREATE_SOURCE_QPN], [infiniband/verbs.h], [IBV_QP_INIT_SOURCE_QPN]) -CHECK_VERBS_ATTRIBUTE([IBV_FLOW_SPEC_IB], [infiniband/verbs.h], [IBV_FLOW_SPEC_IB]) CHECK_VERBS_ATTRIBUTE([IBV_DEVICE_RAW_IP_CSUM], [infiniband/verbs.h]) CHECK_VERBS_ATTRIBUTE([IBV_SEND_IP_CSUM], [infiniband/verbs.h]) CHECK_VERBS_ATTRIBUTE([IBV_FLOW_SPEC_ACTION_TAG], [infiniband/verbs.h], [IBV_FLOW_TAG]) @@ -145,7 +144,6 @@ if test "x$vma_cv_verbs" == x2; then CHECK_VERBS_ATTRIBUTE([IBV_EXP_WR_NOP], [infiniband/verbs_exp.h], [IBV_WR_NOP]) CHECK_VERBS_ATTRIBUTE([IBV_EXP_ACCESS_ALLOCATE_MR], [infiniband/verbs_exp.h]) CHECK_VERBS_ATTRIBUTE([IBV_EXP_QP_INIT_ATTR_ASSOCIATED_QPN], [infiniband/verbs_exp.h], [IBV_QP_INIT_SOURCE_QPN]) - CHECK_VERBS_ATTRIBUTE([IBV_EXP_FLOW_SPEC_IB], [infiniband/verbs_exp.h], [IBV_FLOW_SPEC_IB]) CHECK_VERBS_ATTRIBUTE([IBV_EXP_SEND_IP_CSUM], [infiniband/verbs_exp.h]) CHECK_VERBS_ATTRIBUTE([IBV_EXP_DEVICE_ATTR_MAX_DM_SIZE], [infiniband/verbs_exp.h], [IBV_DM]) CHECK_VERBS_ATTRIBUTE([IBV_EXP_QP_RATE_LIMIT], [infiniband/verbs_exp.h], [IBV_PACKET_PACING_CAPS]) diff --git a/src/vma/Makefile.am b/src/vma/Makefile.am index d2963700b..879edab5a 100644 --- a/src/vma/Makefile.am +++ b/src/vma/Makefile.am @@ -83,7 +83,6 @@ libvma_la_SOURCES := \ dev/net_device_entry.cpp \ dev/net_device_table_mgr.cpp \ dev/wqe_send_handler.cpp \ - dev/wqe_send_ib_handler.cpp \ dev/ring.cpp \ dev/ring_bond.cpp \ dev/ring_slave.cpp \ @@ -137,8 +136,6 @@ libvma_la_SOURCES := \ proto/dst_entry_tcp.cpp \ proto/header.cpp \ proto/arp.cpp \ - proto/igmp_mgr.cpp \ - proto/igmp_handler.cpp \ \ sock/sockinfo.cpp \ sock/sockinfo_udp.cpp \ @@ -190,7 +187,6 @@ libvma_la_SOURCES := \ dev/ring_profile.h \ dev/ring_allocation_logic.h \ dev/wqe_send_handler.h \ - dev/wqe_send_ib_handler.h \ \ event/command.h \ event/delta_timer.h \ @@ -245,14 +241,11 @@ libvma_la_SOURCES := \ proto/dst_entry_udp_mc.h \ proto/flow_tuple.h \ proto/header.h \ - proto/igmp_handler.h \ - proto/igmp_mgr.h \ proto/ip_address.h \ proto/ip_frag.h \ proto/L2_address.h \ proto/mem_buf_desc.h \ proto/neighbour.h \ - proto/neighbour_observer.h \ proto/neighbour_table_mgr.h \ proto/netlink_socket_mgr.h \ proto/peer_key.h \ diff --git a/src/vma/dev/cq_mgr.cpp b/src/vma/dev/cq_mgr.cpp index 388c452bb..8c9c10e8d 100644 --- a/src/vma/dev/cq_mgr.cpp +++ b/src/vma/dev/cq_mgr.cpp @@ -60,11 +60,9 @@ cq_mgr::cq_mgr(ring_simple* p_ring, ib_ctx_handler* p_ib_ctx_handler, int cq_siz ,m_n_sysvar_cq_poll_batch_max(safe_mce_sys().cq_poll_batch_max) ,m_n_sysvar_progress_engine_wce_max(safe_mce_sys().progress_engine_wce_max) ,m_p_cq_stat(&m_cq_stat_static) // use local copy of stats by default (on rx cq get shared memory stats) - ,m_transport_type(m_p_ring->get_transport_type()) ,m_p_next_rx_desc_poll(NULL) ,m_n_sysvar_rx_prefetch_bytes_before_poll(safe_mce_sys().rx_prefetch_bytes_before_poll) ,m_n_sysvar_rx_prefetch_bytes(safe_mce_sys().rx_prefetch_bytes) - ,m_sz_transport_header(0) ,m_p_ib_ctx_handler(p_ib_ctx_handler) ,m_n_sysvar_rx_num_wr_to_post_recv(safe_mce_sys().rx_num_wr_to_post_recv) ,m_comp_event_channel(p_comp_event_channel) @@ -106,25 +104,9 @@ void cq_mgr::configure(int cq_size) } BULLSEYE_EXCLUDE_BLOCK_END VALGRIND_MAKE_MEM_DEFINED(m_p_ibv_cq, sizeof(ibv_cq)); - switch (m_transport_type) { - case VMA_TRANSPORT_IB: - m_sz_transport_header = GRH_HDR_LEN; - break; - case VMA_TRANSPORT_ETH: - m_sz_transport_header = ETH_HDR_LEN; - break; - BULLSEYE_EXCLUDE_BLOCK_START - default: - cq_logpanic("Unknown transport type: %d", m_transport_type); - break; - BULLSEYE_EXCLUDE_BLOCK_END - } if (m_b_is_rx) { vma_stats_instance_create_cq_block(m_p_cq_stat); - } - - if (m_b_is_rx) { m_b_is_rx_hw_csum_on = vma_is_rx_hw_csum_supported(m_p_ib_ctx_handler->get_ibv_device_attr()); cq_logdbg("RX CSUM support = %d", m_b_is_rx_hw_csum_on); } @@ -499,9 +481,8 @@ mem_buf_desc_t* cq_mgr::process_cq_element_rx(vma_ibv_wc* p_wce) VALGRIND_MAKE_MEM_DEFINED(p_mem_buf_desc->p_buffer, p_mem_buf_desc->sz_data); - prefetch_range((uint8_t*)p_mem_buf_desc->p_buffer + m_sz_transport_header, - std::min(p_mem_buf_desc->sz_data - m_sz_transport_header, (size_t)m_n_sysvar_rx_prefetch_bytes)); - //prefetch((uint8_t*)p_mem_buf_desc->p_buffer + m_sz_transport_header); + prefetch_range((uint8_t*)p_mem_buf_desc->p_buffer + ETH_HDR_LEN, + std::min(p_mem_buf_desc->sz_data - ETH_HDR_LEN, (size_t)m_n_sysvar_rx_prefetch_bytes)); } return p_mem_buf_desc; @@ -790,29 +771,19 @@ int cq_mgr::drain_and_proccess(uintptr_t* p_recycle_buffers_last_wr_id /*=NULL*/ if (p_recycle_buffers_last_wr_id) { m_p_cq_stat->n_rx_pkt_drop++; reclaim_recv_buffer_helper(buff); - } else { - bool procces_now = false; - if (m_transport_type == VMA_TRANSPORT_ETH) { - procces_now = is_eth_tcp_frame(buff); - } - if (m_transport_type == VMA_TRANSPORT_IB) { - procces_now = is_ib_tcp_frame(buff); - } + } else if (is_eth_tcp_frame(buff)) { // We process immediately all non udp/ip traffic.. - if (procces_now) { - buff->rx.is_vma_thr = true; - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(buff)) { - process_recv_buffer(buff, NULL); - } + buff->rx.is_vma_thr = true; + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(buff)) { + process_recv_buffer(buff, NULL); } - else { //udp/ip traffic we just put in the cq's rx queue - m_rx_queue.push_back(buff); - mem_buf_desc_t* buff_cur = m_rx_queue.get_and_pop_front(); - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(buff_cur)) { - m_rx_queue.push_front(buff_cur); - } + } else { //udp/ip traffic we just put in the cq's rx queue + m_rx_queue.push_back(buff); + mem_buf_desc_t* buff_cur = m_rx_queue.get_and_pop_front(); + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(buff_cur)) { + m_rx_queue.push_front(buff_cur); } } } diff --git a/src/vma/dev/cq_mgr.h b/src/vma/dev/cq_mgr.h index c6015445e..074cc1fb4 100644 --- a/src/vma/dev/cq_mgr.h +++ b/src/vma/dev/cq_mgr.h @@ -184,11 +184,9 @@ class cq_mgr const uint32_t m_n_sysvar_cq_poll_batch_max; const uint32_t m_n_sysvar_progress_engine_wce_max; cq_stats_t* m_p_cq_stat; - transport_type_t m_transport_type; mem_buf_desc_t* m_p_next_rx_desc_poll; const uint32_t m_n_sysvar_rx_prefetch_bytes_before_poll; const uint32_t m_n_sysvar_rx_prefetch_bytes; - size_t m_sz_transport_header; ib_ctx_handler* m_p_ib_ctx_handler; const uint32_t m_n_sysvar_rx_num_wr_to_post_recv; private: diff --git a/src/vma/dev/cq_mgr.inl b/src/vma/dev/cq_mgr.inl index 744a0eeab..f561c9460 100644 --- a/src/vma/dev/cq_mgr.inl +++ b/src/vma/dev/cq_mgr.inl @@ -62,22 +62,4 @@ inline bool is_eth_tcp_frame(mem_buf_desc_t* buff) return false; } -inline bool is_ib_tcp_frame(mem_buf_desc_t* buff) -{ - struct ipoibhdr* p_ipoib_h = (struct ipoibhdr*)(buff->p_buffer + GRH_HDR_LEN); - - // Validate IPoIB header - if (unlikely(p_ipoib_h->ipoib_header != htonl(IPOIB_HEADER))) { - return false; - } - - size_t transport_header_len = GRH_HDR_LEN + IPOIB_HDR_LEN; - - struct iphdr * p_ip_h = (struct iphdr*)(buff->p_buffer + transport_header_len); - if (likely(p_ip_h->protocol == IPPROTO_TCP)) { - return true; - } - return false; -} - #endif//CQ_MGR_INL_H diff --git a/src/vma/dev/cq_mgr_mlx5.cpp b/src/vma/dev/cq_mgr_mlx5.cpp index d93bdd9df..990fa8da4 100644 --- a/src/vma/dev/cq_mgr_mlx5.cpp +++ b/src/vma/dev/cq_mgr_mlx5.cpp @@ -294,29 +294,19 @@ int cq_mgr_mlx5::drain_and_proccess(uintptr_t* p_recycle_buffers_last_wr_id /*=N if (p_recycle_buffers_last_wr_id) { m_p_cq_stat->n_rx_pkt_drop++; reclaim_recv_buffer_helper(m_rx_hot_buffer); - } else { - bool procces_now = false; - if (m_transport_type == VMA_TRANSPORT_ETH) { - procces_now = is_eth_tcp_frame(m_rx_hot_buffer); - } - if (m_transport_type == VMA_TRANSPORT_IB) { - procces_now = is_ib_tcp_frame(m_rx_hot_buffer); - } + } else if (is_eth_tcp_frame(m_rx_hot_buffer)) { // We process immediately all non udp/ip traffic.. - if (procces_now) { - m_rx_hot_buffer->rx.is_vma_thr = true; - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(m_rx_hot_buffer)) { - process_recv_buffer(m_rx_hot_buffer, NULL); - } + m_rx_hot_buffer->rx.is_vma_thr = true; + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(m_rx_hot_buffer)) { + process_recv_buffer(m_rx_hot_buffer, NULL); } - else { //udp/ip traffic we just put in the cq's rx queue - m_rx_queue.push_back(m_rx_hot_buffer); - mem_buf_desc_t* buff_cur = m_rx_queue.get_and_pop_front(); - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(buff_cur)) { - m_rx_queue.push_front(buff_cur); - } + } else { //udp/ip traffic we just put in the cq's rx queue + m_rx_queue.push_back(m_rx_hot_buffer); + mem_buf_desc_t* buff_cur = m_rx_queue.get_and_pop_front(); + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(buff_cur)) { + m_rx_queue.push_front(buff_cur); } } } @@ -344,30 +334,20 @@ int cq_mgr_mlx5::drain_and_proccess(uintptr_t* p_recycle_buffers_last_wr_id /*=N if (p_recycle_buffers_last_wr_id) { m_p_cq_stat->n_rx_pkt_drop++; reclaim_recv_buffer_helper(buff); - } else { - bool procces_now = false; - if (m_transport_type == VMA_TRANSPORT_ETH) { - procces_now = is_eth_tcp_frame(buff); - } - if (m_transport_type == VMA_TRANSPORT_IB) { - procces_now = is_ib_tcp_frame(buff); - } + } else if (is_eth_tcp_frame(buff)) { /* We process immediately all non udp/ip traffic.. */ - if (procces_now) { - buff->rx.is_vma_thr = true; - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(buff)) { - process_recv_buffer(buff, NULL); - } + buff->rx.is_vma_thr = true; + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(buff)) { + process_recv_buffer(buff, NULL); } - else { /* udp/ip traffic we just put in the cq's rx queue */ - m_rx_queue.push_back(buff); - mem_buf_desc_t* buff_cur = m_rx_queue.front(); - m_rx_queue.pop_front(); - if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || - !compensate_qp_poll_success(buff_cur)) { - m_rx_queue.push_front(buff_cur); - } + } else { /* udp/ip traffic we just put in the cq's rx queue */ + m_rx_queue.push_back(buff); + mem_buf_desc_t* buff_cur = m_rx_queue.front(); + m_rx_queue.pop_front(); + if ((++m_qp_rec.debt < (int)m_n_sysvar_rx_num_wr_to_post_recv) || + !compensate_qp_poll_success(buff_cur)) { + m_rx_queue.push_front(buff_cur); } } } @@ -444,8 +424,8 @@ mem_buf_desc_t* cq_mgr_mlx5::process_cq_element_rx(mem_buf_desc_t* p_mem_buf_des VALGRIND_MAKE_MEM_DEFINED(p_mem_buf_desc->p_buffer, p_mem_buf_desc->sz_data); - prefetch_range((uint8_t*)p_mem_buf_desc->p_buffer + m_sz_transport_header, - std::min(p_mem_buf_desc->sz_data - m_sz_transport_header, (size_t)m_n_sysvar_rx_prefetch_bytes)); + prefetch_range((uint8_t*)p_mem_buf_desc->p_buffer + ETH_HDR_LEN, + std::min(p_mem_buf_desc->sz_data - ETH_HDR_LEN, (size_t)m_n_sysvar_rx_prefetch_bytes)); return p_mem_buf_desc; diff --git a/src/vma/dev/net_device_table_mgr.cpp b/src/vma/dev/net_device_table_mgr.cpp index 198fbcaa1..6d1130919 100644 --- a/src/vma/dev/net_device_table_mgr.cpp +++ b/src/vma/dev/net_device_table_mgr.cpp @@ -214,9 +214,6 @@ void net_device_table_mgr::update_tbl() case ARPHRD_ETHER: p_net_device_val = new net_device_val_eth(&desc); break; - case ARPHRD_INFINIBAND: - p_net_device_val = new net_device_val_ib(&desc); - break; default: goto next; } diff --git a/src/vma/dev/net_device_val.cpp b/src/vma/dev/net_device_val.cpp index cb801469d..d74f91825 100644 --- a/src/vma/dev/net_device_val.cpp +++ b/src/vma/dev/net_device_val.cpp @@ -173,7 +173,6 @@ net_device_val::net_device_val(struct net_device_val_desc *desc) : m_lock("net_d m_if_active = 0; m_bond_xmit_hash_policy = XHP_LAYER_2; m_bond_fail_over_mac = 0; - m_transport_type = VMA_TRANSPORT_UNKNOWN; if (NULL == desc) { nd_logerr("Invalid net_device_val name=%s", "NA"); @@ -252,10 +251,10 @@ net_device_val::net_device_val(struct net_device_val_desc *desc) : m_lock("net_d case LAG_8023ad: case ACTIVE_BACKUP: // this is a bond interface (or a vlan/alias over bond), find the slaves - valid = verify_bond_ipoib_or_eth_qp_creation(); + valid = verify_bond_eth_qp_creation(); break; default: - valid = (bool)(ib_ctx && verify_ipoib_or_eth_qp_creation(get_ifname_link())); + valid = (bool)(ib_ctx && verify_eth_qp_creation(get_ifname_link())); break; } @@ -474,9 +473,6 @@ void net_device_val::set_str() case ARPHRD_ETHER: sprintf(str_x, " type %s", "ether"); break; - case ARPHRD_INFINIBAND: - sprintf(str_x, " type %s", "infiniband"); - break; default: sprintf(str_x, " type %s", "unknown"); break; @@ -1392,106 +1388,7 @@ std::string net_device_val_eth::to_str() return std::string("ETH: ") + net_device_val::to_str(); } -net_device_val_ib::~net_device_val_ib() -{ - struct in_addr in; - if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) { - g_p_neigh_table_mgr->unregister_observer(neigh_key(ip_address(in.s_addr), this), this); - } -} - -void net_device_val_ib::configure() -{ - ib_ctx_handler* p_ib_ctx = NULL; - struct in_addr in; - - m_p_L2_addr = create_L2_address(get_ifname()); - - BULLSEYE_EXCLUDE_BLOCK_START - if(m_p_L2_addr == NULL) { - nd_logpanic("m_p_L2_addr allocation error"); - } - BULLSEYE_EXCLUDE_BLOCK_END - - create_br_address(get_ifname()); - - if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) { - g_p_neigh_table_mgr->unregister_observer(neigh_key(ip_address(in.s_addr), this), this); - } - - //Register to IB BR neigh - cache_entry_subject* p_ces = NULL; - if (1 == inet_pton(AF_INET, BROADCAST_IP, &in)) { - g_p_neigh_table_mgr->register_observer(neigh_key(ip_address(in.s_addr), this), this, &p_ces); - } - m_br_neigh = dynamic_cast(p_ces); - - p_ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link()); - if (!p_ib_ctx || ibv_query_pkey(p_ib_ctx->get_ibv_context(), get_port_from_ifname(get_ifname_link()), 0, &m_pkey)) { - nd_logerr("failed querying pkey"); - } - nd_logdbg("pkey: %d", m_pkey); -} - -ring* net_device_val_ib::create_ring(resource_allocation_key *key) -{ - ring* ring = NULL; - - NOT_IN_USE(key); - try { - switch (m_bond) { - case NO_BOND: - ring = new ring_ib(get_if_idx()); - break; - case ACTIVE_BACKUP: - case LAG_8023ad: - ring = new ring_bond_ib(get_if_idx()); - break; - default: - nd_logdbg("Unknown ring type"); - break; - } - } catch (vma_error &error) { - nd_logdbg("failed creating ring %s", error.message); - } - - return ring; -} - -L2_address* net_device_val_ib::create_L2_address(const char* ifname) -{ - if (m_p_L2_addr) { - delete m_p_L2_addr; - m_p_L2_addr = NULL; - } - unsigned char hw_addr[IPOIB_HW_ADDR_LEN]; - get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, false); - return new IPoIB_addr(hw_addr); -} - -void net_device_val_ib::create_br_address(const char* ifname) -{ - if (m_p_br_addr) { - delete m_p_br_addr; - m_p_br_addr = NULL; - } - unsigned char hw_addr[IPOIB_HW_ADDR_LEN]; - get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, true); - m_p_br_addr = new IPoIB_addr(hw_addr); - BULLSEYE_EXCLUDE_BLOCK_START - if (m_p_br_addr == NULL) { - nd_logpanic("m_p_br_addr allocation error"); - } - BULLSEYE_EXCLUDE_BLOCK_END -} - -std::string net_device_val_ib::to_str() -{ - return std::string("IB: ") + net_device_val::to_str(); -} - - -bool net_device_val::verify_bond_ipoib_or_eth_qp_creation() +bool net_device_val::verify_bond_eth_qp_creation() { char slaves[IFNAMSIZ * MAX_SLAVES] = {0}; @@ -1510,7 +1407,7 @@ bool net_device_val::verify_bond_ipoib_or_eth_qp_creation() { char* p = strchr(slave_name, '\n'); if (p) *p = '\0'; // Remove the tailing 'new line" char - if (!verify_ipoib_or_eth_qp_creation(slave_name)) { + if (!verify_eth_qp_creation(slave_name)) { //check all slaves but print only once for bond bond_ok = false; } @@ -1546,69 +1443,9 @@ bool net_device_val::verify_bond_ipoib_or_eth_qp_creation() } //interface name can be slave while ifa struct can describe bond -bool net_device_val::verify_ipoib_or_eth_qp_creation(const char* interface_name) +bool net_device_val::verify_eth_qp_creation(const char* interface_name) { - if (m_type == ARPHRD_INFINIBAND) { - if (verify_enable_ipoib(interface_name) && verify_qp_creation(interface_name, IBV_QPT_UD)) { - return true; - } - } else { - if (verify_qp_creation(interface_name, IBV_QPT_RAW_PACKET)) { - return true; - } - } - return false; -} - -bool net_device_val::verify_enable_ipoib(const char* interface_name) -{ - char filename[256] = "\0"; - char ifname[IFNAMSIZ] = "\0"; - NOT_IN_USE(interface_name); // Suppress --enable-opt-log=high warning - - if(!safe_mce_sys().enable_ipoib) { - nd_logdbg("Blocking offload: IPoIB interfaces ('%s')", interface_name); - return false; - } - -#ifndef DEFINED_IBV_QP_INIT_SOURCE_QPN - // Note: mlx4 does not support this capability - ib_ctx_handler* ib_ctx = g_p_ib_ctx_handler_collection->get_ib_ctx(get_ifname_link()); - if (!ib_ctx->is_mlx4()) { - nd_logwarn("Blocking offload: SOURCE_QPN is not supported for this driver ('%s')", interface_name); - return false; - } -#endif - - // Verify IPoIB is in 'datagram mode' for proper VMA with flow steering operation - if (validate_ipoib_prop(get_ifname(), m_flags, IPOIB_MODE_PARAM_FILE, "datagram", 8, filename, ifname)) { - vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n"); - vlog_printf(VLOG_WARNING,"* IPoIB mode of interface '%s' is \"connected\" !\n", get_ifname()); - vlog_printf(VLOG_WARNING,"* Please change it to datagram: \"echo datagram > %s\" before loading your application with VMA library\n", filename); - vlog_printf(VLOG_WARNING,"* VMA doesn't support IPoIB in connected mode.\n"); - vlog_printf(VLOG_WARNING,"* Please refer to VMA Release Notes for more information\n"); - vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n"); - return false; - } - else { - nd_logdbg("verified interface '%s' is running in datagram mode", get_ifname()); - } - - // Verify umcast is disabled for IB flow - if (validate_ipoib_prop(get_ifname(), m_flags, UMCAST_PARAM_FILE, "0", 1, filename, ifname)) { // Extract UMCAST flag (only for IB transport types) - vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n"); - vlog_printf(VLOG_WARNING,"* UMCAST flag is Enabled for interface %s !\n", get_ifname()); - vlog_printf(VLOG_WARNING,"* Please disable it: \"echo 0 > %s\" before loading your application with VMA library\n", filename); - vlog_printf(VLOG_WARNING,"* This option in no longer needed in this version\n"); - vlog_printf(VLOG_WARNING,"* Please refer to Release Notes for more information\n"); - vlog_printf(VLOG_WARNING,"*******************************************************************************************************\n"); - return false; - } - else { - nd_logdbg("verified interface '%s' is running with umcast disabled", get_ifname()); - } - - return true; + return verify_qp_creation(interface_name, IBV_QPT_RAW_PACKET); } //ifname should point to a physical device @@ -1681,14 +1518,6 @@ bool net_device_val::verify_qp_creation(const char* ifname, enum ibv_qp_type qp_ qp_init_attr.recv_cq = cq; qp_init_attr.send_cq = cq; - // Set source qpn for non mlx4 IPoIB devices - if (qp_type == IBV_QPT_UD && !p_ib_ctx->is_mlx4()) { - unsigned char hw_addr[IPOIB_HW_ADDR_LEN]; - get_local_ll_addr(ifname, hw_addr, IPOIB_HW_ADDR_LEN, false); - IPoIB_addr ipoib_addr(hw_addr); - ibv_source_qpn_set(qp_init_attr, ipoib_addr.get_qpn()); - } - qp = vma_ibv_create_qp(p_ib_ctx->get_ibv_pd(), &qp_init_attr); if (qp) { if (qp_type == IBV_QPT_UD && priv_ibv_create_flow_supported(qp, port_num) == -1) { diff --git a/src/vma/dev/net_device_val.h b/src/vma/dev/net_device_val.h index cd48ee743..16b8d8314 100644 --- a/src/vma/dev/net_device_val.h +++ b/src/vma/dev/net_device_val.h @@ -21,7 +21,6 @@ #include "vma/event/event_handler_ibverbs.h" #include "vma/event/event_handler_rdma_cm.h" #include "vma/dev/ib_ctx_handler.h" -#include "vma/proto/neighbour_observer.h" #include "vma/proto/L2_address.h" #include "vma/infra/cache_subject_observer.h" @@ -29,7 +28,6 @@ class L2_address; class ring; class ib_ctx_handler; -class neigh_ib_broadcast; #define RING_ALLOC_STR_SIZE 256 class ring_alloc_logic_attr @@ -223,8 +221,6 @@ class net_device_val int release_ring(resource_allocation_key*); // delete from m_hash if ref_cnt == 0 state get_state() const { return m_state; } // not sure, look at state init at c'tor virtual std::string to_str(); - inline void set_transport_type(transport_type_t value) { m_transport_type = value; } - transport_type_t get_transport_type() const { return m_transport_type; } bool update_active_backup_slaves(); in_addr_t get_local_addr() { return m_ip[0]->local_addr; } // Valid object must have at least one address int global_ring_poll_and_process_element(uint64_t *p_poll_sn, void* pv_fd_ready_array = NULL); @@ -249,7 +245,6 @@ class net_device_val L2_address* m_p_L2_addr; L2_address* m_p_br_addr; - transport_type_t m_transport_type; lock_mutex_recursive m_lock; rings_hash_map_t m_h_ring_map; sys_image_guid_map_t m_sys_image_guid_map; @@ -266,9 +261,8 @@ class net_device_val private: void verify_bonding_mode(); bool verify_qp_creation(const char* ifname, enum ibv_qp_type qp_type); - bool verify_bond_ipoib_or_eth_qp_creation(); - bool verify_ipoib_or_eth_qp_creation(const char* interface_name); - bool verify_enable_ipoib(const char* ifname); + bool verify_bond_eth_qp_creation(); + bool verify_eth_qp_creation(const char* interface_name); resource_allocation_key* ring_key_redirection_reserve(resource_allocation_key *key); resource_allocation_key* get_ring_key_redirection(resource_allocation_key *key); @@ -297,7 +291,6 @@ class net_device_val_eth : public net_device_val { public: net_device_val_eth(struct net_device_val_desc *desc) : net_device_val(desc), m_vlan(0) { - set_transport_type(VMA_TRANSPORT_ETH); if (INVALID != get_state()) { set_slave_array(); configure(); @@ -307,7 +300,7 @@ class net_device_val_eth : public net_device_val std::string to_str(); protected: - virtual ring* create_ring(resource_allocation_key *key); + virtual ring* create_ring(resource_allocation_key *key); void parse_prio_egress_map(); private: void configure(); @@ -316,34 +309,4 @@ class net_device_val_eth : public net_device_val uint16_t m_vlan; }; - -class net_device_val_ib : public net_device_val, public neigh_observer, public cache_observer -{ -public: - net_device_val_ib(struct net_device_val_desc *desc) : net_device_val(desc), m_pkey(0), m_br_neigh(NULL) { - set_transport_type(VMA_TRANSPORT_IB); - if (INVALID != get_state()) { - set_slave_array(); - configure(); - } - } - ~net_device_val_ib(); - - std::string to_str(); - uint16_t get_pkey() { return m_pkey; } - const neigh_ib_broadcast* get_br_neigh() {return m_br_neigh;} - virtual transport_type_t get_obs_transport_type() const {return get_transport_type();} - -protected: - ring* create_ring(resource_allocation_key *key); - -private: - void configure(); - L2_address* create_L2_address(const char* ifname); - void create_br_address(const char* ifname); - uint16_t m_pkey; - neigh_ib_broadcast* m_br_neigh; -}; - - #endif diff --git a/src/vma/dev/qp_mgr.cpp b/src/vma/dev/qp_mgr.cpp index e7c1312c1..5e47d615f 100644 --- a/src/vma/dev/qp_mgr.cpp +++ b/src/vma/dev/qp_mgr.cpp @@ -32,8 +32,6 @@ #define FICTIVE_REMOTE_QPN 0x48 #define FICTIVE_REMOTE_QKEY 0x01234567 -#define FICTIVE_AH_SL 5 -#define FICTIVE_AH_DLID 0x3 #define MAX_UPSTREAM_CQ_MSHV_SIZE 8192 @@ -148,8 +146,7 @@ cq_mgr* qp_mgr::init_tx_cq_mgr() int qp_mgr::configure(struct qp_mgr_desc *desc) { - qp_logdbg("Creating QP of transport type '%s' on ibv device '%s' [%p] on port %d", - priv_vma_transport_type_str(m_p_ring->get_transport_type()), + qp_logdbg("Creating QP on ibv device '%s' [%p] on port %d", m_p_ib_ctx_handler->get_ibname(), m_p_ib_ctx_handler->get_ibv_device(), m_port_num); // Check device capabilities for max QP work requests @@ -445,7 +442,6 @@ void qp_mgr::trigger_completion_for_all_sent_packets() // Prepare dummy packet: zeroed payload ('0000'). // For ETH it replaces the MAC header!! (Nothing is going on the wire, QP in error state) - // For IB it replaces the IPoIB header. /* need to send at least eth+ip, since libmlx5 will drop just eth header */ ethhdr* p_buffer_ethhdr = (ethhdr *)p_mem_buf_desc->p_buffer; @@ -457,34 +453,9 @@ void qp_mgr::trigger_completion_for_all_sent_packets() sge[0].addr = (uintptr_t)(p_mem_buf_desc->p_buffer); sge[0].lkey = m_p_ring->m_tx_lkey; - struct ibv_ah *p_ah = NULL; - ibv_ah_attr ah_attr; - - if (m_p_ring->get_transport_type() == VMA_TRANSPORT_IB) { - memset(&ah_attr, 0, sizeof(ah_attr)); - ah_attr.dlid = FICTIVE_AH_DLID; - ah_attr.sl = FICTIVE_AH_SL; - ah_attr.src_path_bits = 0; - ah_attr.static_rate = 0; - ah_attr.is_global = 0; - ah_attr.port_num = m_port_num; // Do we need it? - - p_ah = ibv_create_ah(m_p_ib_ctx_handler->get_ibv_pd(), &ah_attr); - BULLSEYE_EXCLUDE_BLOCK_START - if (!p_ah && (errno != EIO)) { - qp_logpanic("failed creating address handler (errno=%d %m)", errno); - } - BULLSEYE_EXCLUDE_BLOCK_END - } - - // Prepare send wr for (does not care if it is UD/IB or RAW/ETH) - // UD requires AH+qkey, RAW requires minimal payload instead of MAC header. - memset(&send_wr, 0, sizeof(send_wr)); send_wr.wr_id = (uintptr_t)p_mem_buf_desc; - send_wr.wr.ud.ah = p_ah; - send_wr.wr.ud.remote_qpn = FICTIVE_REMOTE_QPN; - send_wr.wr.ud.remote_qkey = FICTIVE_REMOTE_QKEY; + send_wr.wr.ud.ah = nullptr; send_wr.sg_list = sge; send_wr.num_sge = 1; send_wr.next = NULL; @@ -502,12 +473,6 @@ void qp_mgr::trigger_completion_for_all_sent_packets() m_p_ring->m_tx_num_wr_free--; send_to_wire(&send_wr, (vma_wr_tx_packet_attr)(VMA_TX_PACKET_L3_CSUM|VMA_TX_PACKET_L4_CSUM), true); - if (p_ah) { - IF_VERBS_FAILURE_EX(ibv_destroy_ah(p_ah), EIO) - { - qp_logpanic("failed destroying address handle (errno=%d %m)", errno); - }ENDIF_VERBS_FAILURE; - } } } @@ -724,116 +689,6 @@ int qp_mgr_eth::prepare_ibv_qp(vma_ibv_qp_init_attr& qp_init_attr) return 0; } -void qp_mgr_ib::modify_qp_to_ready_state() -{ - qp_logdbg(""); - int ret = 0; - int qp_state = priv_ibv_query_qp_state(m_qp); - - BULLSEYE_EXCLUDE_BLOCK_START - if (qp_state != IBV_QPS_INIT) { - if ((ret = priv_ibv_modify_qp_from_err_to_init_ud(m_qp, m_port_num, m_pkey_index, m_underly_qpn)) != 0) { - qp_logpanic("failed to modify QP from %d to RTS state (ret = %d)", qp_state, ret); - } - } - if ((ret = priv_ibv_modify_qp_from_init_to_rts(m_qp, m_underly_qpn)) != 0) { - qp_logpanic("failed to modify QP from INIT to RTS state (ret = %d)", ret); - } - BULLSEYE_EXCLUDE_BLOCK_END -} - -int qp_mgr_ib::prepare_ibv_qp(vma_ibv_qp_init_attr& qp_init_attr) -{ - qp_logdbg(""); - int ret = 0; - - qp_init_attr.qp_type = IBV_QPT_UD; - vma_ibv_qp_init_attr_comp_mask(m_p_ib_ctx_handler->get_ibv_pd(), qp_init_attr); - -#ifdef DEFINED_TSO - if (m_p_ring->is_tso()) { - vma_ibv_qp_init_attr_tso(qp_init_attr, m_p_ring->get_max_header_sz()); - qp_logdbg("create qp with max_tso_header = %d", m_p_ring->get_max_header_sz()); - } -#endif /* DEFINED_TSO */ - - if (m_underly_qpn) { - ibv_source_qpn_set(qp_init_attr, m_underly_qpn); - qp_logdbg("create qp using underly qpn = 0x%X", m_underly_qpn); - } - - m_qp = vma_ibv_create_qp(m_p_ib_ctx_handler->get_ibv_pd(), &qp_init_attr); - - BULLSEYE_EXCLUDE_BLOCK_START - if (!m_qp) { - qp_logerr("ibv_create_qp failed (errno=%d %m)", errno); - return -1; - } - - if ((ret = priv_ibv_modify_qp_from_err_to_init_ud(m_qp, m_port_num, - m_pkey_index, - m_underly_qpn)) != 0) { - VLOG_PRINTF_INFO_ONCE_THEN_ALWAYS( - VLOG_ERROR, VLOG_DEBUG, - "failed to modify QP from ERR to INIT state (ret = %d) check number of available fds (ulimit -n)", - ret); - return ret; - } - BULLSEYE_EXCLUDE_BLOCK_END - -#ifdef DEFINED_TSO -#else - enum ibv_qp_attr_mask attr_mask = IBV_QP_CAP; - struct ibv_qp_attr tmp_ibv_qp_attr; - struct ibv_qp_init_attr tmp_ibv_qp_init_attr; - IF_VERBS_FAILURE(ibv_query_qp(m_qp, &tmp_ibv_qp_attr, attr_mask, - &tmp_ibv_qp_init_attr)) { - qp_logerr("ibv_query_qp failed (errno=%d %m)", errno); - return -1; - } ENDIF_VERBS_FAILURE; - uint32_t tx_max_inline = safe_mce_sys().tx_max_inline; - m_max_inline_data = min(tmp_ibv_qp_attr.cap.max_inline_data, tx_max_inline); - qp_logdbg("requested max inline = %d QP, actual max inline = %d, " - "VMA max inline set to %d, max_send_wr=%d, max_recv_wr=%d, " - "max_recv_sge=%d, max_send_sge=%d", - tx_max_inline, tmp_ibv_qp_init_attr.cap.max_inline_data, - m_max_inline_data, tmp_ibv_qp_attr.cap.max_send_wr, - tmp_ibv_qp_attr.cap.max_recv_wr, tmp_ibv_qp_attr.cap.max_recv_sge, - tmp_ibv_qp_attr.cap.max_send_sge); -#endif /* DEFINED_TSO */ - - return 0; -} - -void qp_mgr_ib::update_pkey_index() -{ - qp_logdbg(""); - VALGRIND_MAKE_MEM_DEFINED(&m_pkey, sizeof(m_pkey)); - if (priv_ibv_find_pkey_index(m_p_ib_ctx_handler->get_ibv_context(), get_port_num(), m_pkey, &m_pkey_index)) { - qp_logdbg("IB: Can't find correct pkey_index for pkey '%d'", m_pkey); - m_pkey_index = (uint16_t)-1; - } - else { - qp_logdbg("IB: Found correct pkey_index (%d) for pkey '%d'", m_pkey_index, m_pkey); - } -#ifdef DEFINED_IBV_QP_INIT_SOURCE_QPN - /* m_underly_qpn is introduced to detect if current qp_mgr is able to - * use associated qp. - * It is set to non zero value if OFED supports such possibility only but final - * decision can be made just after attempt to create qp. The value of - * m_underly_qpn is reverted to zero if function to qp creation returns - * failure. - * So zero value for this field means no such capability. - * Note: mlx4 does not support this capability. Disable it explicitly because dynamic check - * using ibv_create_qp does not help - */ - if (!m_p_ib_ctx_handler->is_mlx4()) { - m_underly_qpn = m_p_ring->get_qpn(); - } - qp_logdbg("IB: Use qpn = 0x%X for device: %s", m_underly_qpn, m_p_ib_ctx_handler->get_ibname()); -#endif /* DEFINED_IBV_QP_INIT_SOURCE_QPN */ -} - uint32_t qp_mgr::is_ratelimit_change(struct vma_rate_limit_t &rate_limit) { uint32_t rl_changes = 0; diff --git a/src/vma/dev/qp_mgr.h b/src/vma/dev/qp_mgr.h index b6081ceff..19512c4e0 100644 --- a/src/vma/dev/qp_mgr.h +++ b/src/vma/dev/qp_mgr.h @@ -87,7 +87,6 @@ friend class cq_mgr_mp; #endif /* DEFINED_TSO */ int get_port_num() const { return m_port_num; } virtual uint16_t get_partiton() const { return 0; }; - virtual uint32_t get_underly_qpn() const { return 0; }; struct ibv_qp* get_ibv_qp() const { return m_qp; }; class cq_mgr* get_tx_cq_mgr() const { return m_p_cq_mgr_tx; } class cq_mgr* get_rx_cq_mgr() const { return m_p_cq_mgr_rx; } @@ -187,28 +186,4 @@ class qp_mgr_eth : public qp_mgr const uint16_t m_vlan; }; -class qp_mgr_ib : public qp_mgr -{ -public: - qp_mgr_ib(struct qp_mgr_desc *desc, - const uint32_t tx_num_wr, const uint16_t pkey): - qp_mgr(desc, tx_num_wr), m_pkey(pkey), m_underly_qpn(0) { - update_pkey_index(); - if(configure(desc)) throw_vma_exception("failed creating qp"); }; - - virtual void modify_qp_to_ready_state(); - virtual uint16_t get_partiton() const { return m_pkey; }; - virtual uint32_t get_underly_qpn() const { return m_underly_qpn; }; - -protected: - virtual int prepare_ibv_qp(vma_ibv_qp_init_attr& qp_init_attr); - -private: - const uint16_t m_pkey; - uint16_t m_pkey_index; - uint32_t m_underly_qpn; - - void update_pkey_index(); -}; - #endif diff --git a/src/vma/dev/rfs.h b/src/vma/dev/rfs.h index adacb898a..695d9dfcd 100644 --- a/src/vma/dev/rfs.h +++ b/src/vma/dev/rfs.h @@ -30,8 +30,6 @@ class qp_mgr; class pkt_rcvr_sink; -/* ETHERNET - */ typedef struct attach_flow_data_eth_ipv4_tcp_udp_t { struct ibv_flow * ibv_flow; qp_mgr* p_qp_mgr; @@ -61,110 +59,6 @@ typedef struct attach_flow_data_eth_ipv4_tcp_udp_t { ibv_flow_attr(qp_mgr->get_port_num()) {} } attach_flow_data_eth_ipv4_tcp_udp_t; -/* IPOIB (MC) - */ -typedef struct attach_flow_data_ib_v2_t { - struct ibv_flow * ibv_flow; - qp_mgr* p_qp_mgr; - struct ibv_flow_attr_ib_v2 { - vma_ibv_flow_attr attr; - vma_ibv_flow_spec_ipv4 ipv4; - vma_ibv_flow_spec_tcp_udp tcp_udp; - - ibv_flow_attr_ib_v2(uint8_t port) { - memset(this, 0, sizeof(*this)); - attr.size = sizeof(struct ibv_flow_attr_ib_v2); - attr.num_of_specs = 2; - attr.type = VMA_IBV_FLOW_ATTR_NORMAL; - attr.priority = 1; // almost highest priority, 0 is used for 5-tuple later - attr.port = port; - } - } ibv_flow_attr; - attach_flow_data_ib_v2_t(qp_mgr* qp_mgr) : - ibv_flow(NULL), - p_qp_mgr(qp_mgr), - ibv_flow_attr(qp_mgr->get_port_num()) {} - -} attach_flow_data_ib_v2_t; - -#ifdef DEFINED_IBV_FLOW_SPEC_IB -typedef struct attach_flow_data_ib_v1_t { - struct ibv_flow * ibv_flow; - qp_mgr* p_qp_mgr; - struct ibv_flow_attr_ib_v1 { - vma_ibv_flow_attr attr; - vma_ibv_flow_spec_ib ib; - - ibv_flow_attr_ib_v1(uint8_t port) { - memset(this, 0, sizeof(*this)); - attr.size = sizeof(struct ibv_flow_attr_ib_v1); - attr.num_of_specs = 1; - attr.type = VMA_IBV_FLOW_ATTR_NORMAL; - attr.priority = 1; // almost highest priority, 0 is used for 5-tuple later - attr.port = port; - } - } ibv_flow_attr; - attach_flow_data_ib_v1_t(qp_mgr* qp_mgr) : - ibv_flow(NULL), - p_qp_mgr(qp_mgr), - ibv_flow_attr(qp_mgr->get_port_num()) {} - -} attach_flow_data_ib_v1_t; -#endif - -/* IPOIB (UC) - */ -typedef struct attach_flow_data_ib_ipv4_tcp_udp_v2_t { - struct ibv_flow * ibv_flow; - qp_mgr* p_qp_mgr; - struct ibv_flow_attr_ib_ipv4_tcp_udp_v2 { - - vma_ibv_flow_attr attr; - vma_ibv_flow_spec_ipv4 ipv4; - vma_ibv_flow_spec_tcp_udp tcp_udp; - - ibv_flow_attr_ib_ipv4_tcp_udp_v2(uint8_t port) { - memset(this, 0, sizeof(*this)); - attr.size = sizeof(struct ibv_flow_attr_ib_ipv4_tcp_udp_v2); - attr.num_of_specs = 2; - attr.type = VMA_IBV_FLOW_ATTR_NORMAL; - attr.priority = 1; // almost highest priority, 0 is used for 5-tuple later - attr.port = port; - } - } ibv_flow_attr; - attach_flow_data_ib_ipv4_tcp_udp_v2_t(qp_mgr* qp_mgr) : - ibv_flow(NULL), - p_qp_mgr(qp_mgr), - ibv_flow_attr(qp_mgr->get_port_num()) {} -} attach_flow_data_ib_ipv4_tcp_udp_v2_t; - -#ifdef DEFINED_IBV_FLOW_SPEC_IB -typedef struct attach_flow_data_ib_ipv4_tcp_udp_v1_t { - struct ibv_flow * ibv_flow; - qp_mgr* p_qp_mgr; - struct ibv_flow_attr_ib_ipv4_tcp_udp_v1 { - - vma_ibv_flow_attr attr; - vma_ibv_flow_spec_ib ib; - vma_ibv_flow_spec_ipv4 ipv4; - vma_ibv_flow_spec_tcp_udp tcp_udp; - - ibv_flow_attr_ib_ipv4_tcp_udp_v1(uint8_t port) { - memset(this, 0, sizeof(*this)); - attr.size = sizeof(struct ibv_flow_attr_ib_ipv4_tcp_udp_v1); - attr.num_of_specs = 3; - attr.type = VMA_IBV_FLOW_ATTR_NORMAL; - attr.priority = 1; // almost highest priority, 0 is used for 5-tuple later - attr.port = port; - } - } ibv_flow_attr; - attach_flow_data_ib_ipv4_tcp_udp_v1_t(qp_mgr* qp_mgr) : - ibv_flow(NULL), - p_qp_mgr(qp_mgr), - ibv_flow_attr(qp_mgr->get_port_num()) {} -} attach_flow_data_ib_ipv4_tcp_udp_v1_t; -#endif /* DEFINED_IBV_FLOW_SPEC_IB */ - typedef struct attach_flow_data_t { vma_ibv_flow * ibv_flow; qp_mgr* p_qp_mgr; @@ -227,7 +121,7 @@ class rfs bool destroy_ibv_flow(); // Detach flow from all qps bool add_sink(pkt_rcvr_sink* p_sink); bool del_sink(pkt_rcvr_sink* p_sink); - virtual bool prepare_flow_spec() = 0; + virtual void prepare_flow_spec() = 0; private: rfs(); // I don't want anyone to use the default constructor diff --git a/src/vma/dev/rfs_mc.cpp b/src/vma/dev/rfs_mc.cpp index 3b73f614c..1817162a9 100644 --- a/src/vma/dev/rfs_mc.cpp +++ b/src/vma/dev/rfs_mc.cpp @@ -22,12 +22,12 @@ rfs_mc::rfs_mc(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* ru } BULLSEYE_EXCLUDE_BLOCK_END - if (m_p_ring->is_simple() && !prepare_flow_spec()) { - throw_vma_exception("IB multicast offload is not supported"); + if (m_p_ring->is_simple()) { + prepare_flow_spec(); } } -bool rfs_mc::prepare_flow_spec() +void rfs_mc::prepare_flow_spec() { ring_simple* p_ring = dynamic_cast(m_p_ring); @@ -35,8 +35,6 @@ bool rfs_mc::prepare_flow_spec() rfs_logpanic("Incompatible ring type"); } - transport_type_t type = p_ring->get_transport_type(); - /* * todo note that ring is not locked here. * we touch members that should not change during the ring life. @@ -45,93 +43,39 @@ bool rfs_mc::prepare_flow_spec() */ attach_flow_data_t* p_attach_flow_data = NULL; - switch (type) { - case VMA_TRANSPORT_IB: - { - attach_flow_data_ib_v2_t* attach_flow_data_ib_v2 = NULL; - - if (0 == p_ring->m_p_qp_mgr->get_underly_qpn()) { - // IB MC flow steering is done only on L2 --> need to zero other fields to get correct behaviour - // CX3 HW does not support L3+L4 MC flow steering rule -#ifdef DEFINED_IBV_FLOW_SPEC_IB - attach_flow_data_ib_v1_t* attach_flow_data_ib_v1 = NULL; - - attach_flow_data_ib_v1 = new attach_flow_data_ib_v1_t(p_ring->m_p_qp_mgr); - - uint8_t dst_gid[16]; - create_mgid_from_ipv4_mc_ip(dst_gid, p_ring->m_p_qp_mgr->get_partiton(), m_flow_tuple.get_dst_ip()); - ibv_flow_spec_ib_set_by_dst_gid(&(attach_flow_data_ib_v1->ibv_flow_attr.ib), - dst_gid); - - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_ib_v1; - break; -#else - return false; -#endif - } - - attach_flow_data_ib_v2 = new attach_flow_data_ib_v2_t(p_ring->m_p_qp_mgr); - - ibv_flow_spec_ipv4_set(&(attach_flow_data_ib_v2->ibv_flow_attr.ipv4), - m_flow_tuple.get_dst_ip(), - 0); - - ibv_flow_spec_tcp_udp_set(&(attach_flow_data_ib_v2->ibv_flow_attr.tcp_udp), - (m_flow_tuple.get_protocol() == PROTO_TCP), - m_flow_tuple.get_dst_port(), - m_flow_tuple.get_src_port()); - - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_ib_v2; - break; - } - case VMA_TRANSPORT_ETH: - { - attach_flow_data_eth_ipv4_tcp_udp_t* attach_flow_data_eth = NULL; - - attach_flow_data_eth = new attach_flow_data_eth_ipv4_tcp_udp_t(p_ring->m_p_qp_mgr); - - uint8_t dst_mac[6]; - create_multicast_mac_from_ip(dst_mac, m_flow_tuple.get_dst_ip()); - ibv_flow_spec_eth_set(&(attach_flow_data_eth->ibv_flow_attr.eth), - dst_mac, - htons(p_ring->m_p_qp_mgr->get_partiton())); - - if (safe_mce_sys().eth_mc_l2_only_rules) { - ibv_flow_spec_ipv4_set(&(attach_flow_data_eth->ibv_flow_attr.ipv4), 0, 0); - ibv_flow_spec_tcp_udp_set(&(attach_flow_data_eth->ibv_flow_attr.tcp_udp), 0, 0, 0); - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; - break; - } - - ibv_flow_spec_ipv4_set(&(attach_flow_data_eth->ibv_flow_attr.ipv4), - m_flow_tuple.get_dst_ip(), - 0); - - ibv_flow_spec_tcp_udp_set(&(attach_flow_data_eth->ibv_flow_attr.tcp_udp), - (m_flow_tuple.get_protocol() == PROTO_TCP), - m_flow_tuple.get_dst_port(), - m_flow_tuple.get_src_port()); - - if (m_flow_tag_id) { // Will not attach flow_tag spec to rule for tag_id==0 - ibv_flow_spec_flow_tag_set(&attach_flow_data_eth->ibv_flow_attr.flow_tag, m_flow_tag_id); - attach_flow_data_eth->ibv_flow_attr.add_flow_tag_spec(); - rfs_logdbg("Adding flow_tag spec to MC rule, num_of_specs: %d flow_tag_id: %d", - attach_flow_data_eth->ibv_flow_attr.attr.num_of_specs, m_flow_tag_id); - } - - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; - break; - } - BULLSEYE_EXCLUDE_BLOCK_START - default: - rfs_logpanic("Incompatible transport type = %d", type); - return false; - break; - BULLSEYE_EXCLUDE_BLOCK_END + attach_flow_data_eth_ipv4_tcp_udp_t* attach_flow_data_eth = NULL; + + attach_flow_data_eth = new attach_flow_data_eth_ipv4_tcp_udp_t(p_ring->m_p_qp_mgr); + + uint8_t dst_mac[6]; + create_multicast_mac_from_ip(dst_mac, m_flow_tuple.get_dst_ip()); + ibv_flow_spec_eth_set(&(attach_flow_data_eth->ibv_flow_attr.eth), + dst_mac, htons(p_ring->m_p_qp_mgr->get_partiton())); + + if (safe_mce_sys().eth_mc_l2_only_rules) { + ibv_flow_spec_ipv4_set(&(attach_flow_data_eth->ibv_flow_attr.ipv4), 0, 0); + ibv_flow_spec_tcp_udp_set(&(attach_flow_data_eth->ibv_flow_attr.tcp_udp), 0, 0, 0); + p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; + } else { + ibv_flow_spec_ipv4_set(&(attach_flow_data_eth->ibv_flow_attr.ipv4), + m_flow_tuple.get_dst_ip(), 0); + + ibv_flow_spec_tcp_udp_set(&(attach_flow_data_eth->ibv_flow_attr.tcp_udp), + (m_flow_tuple.get_protocol() == PROTO_TCP), + m_flow_tuple.get_dst_port(), + m_flow_tuple.get_src_port()); + + if (m_flow_tag_id) { // Will not attach flow_tag spec to rule for tag_id==0 + ibv_flow_spec_flow_tag_set(&attach_flow_data_eth->ibv_flow_attr.flow_tag, m_flow_tag_id); + attach_flow_data_eth->ibv_flow_attr.add_flow_tag_spec(); + rfs_logdbg("Adding flow_tag spec to MC rule, num_of_specs: %d flow_tag_id: %d", + attach_flow_data_eth->ibv_flow_attr.attr.num_of_specs, m_flow_tag_id); + } + + p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; } m_attach_flow_data_vector.push_back(p_attach_flow_data); - return true; } bool rfs_mc::rx_dispatch_packet(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd_ready_array) diff --git a/src/vma/dev/rfs_mc.h b/src/vma/dev/rfs_mc.h index 6a4e2276e..9e80dbe27 100644 --- a/src/vma/dev/rfs_mc.h +++ b/src/vma/dev/rfs_mc.h @@ -28,7 +28,7 @@ class rfs_mc : public rfs virtual bool rx_dispatch_packet(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd_ready_array); protected: - virtual bool prepare_flow_spec(); + virtual void prepare_flow_spec(); }; diff --git a/src/vma/dev/rfs_uc.cpp b/src/vma/dev/rfs_uc.cpp index 708115251..93b07ed7e 100644 --- a/src/vma/dev/rfs_uc.cpp +++ b/src/vma/dev/rfs_uc.cpp @@ -13,7 +13,6 @@ #define MODULE_NAME "rfs_uc" - rfs_uc::rfs_uc(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* rule_filter, uint32_t flow_tag_id) : rfs(flow_spec_5t, p_ring, rule_filter, flow_tag_id) { @@ -23,12 +22,12 @@ rfs_uc::rfs_uc(flow_tuple *flow_spec_5t, ring_slave *p_ring, rfs_rule_filter* ru } BULLSEYE_EXCLUDE_BLOCK_END - if (m_p_ring->is_simple() && !prepare_flow_spec()) { - throw_vma_exception("rfs_uc: Incompatible transport type"); + if (m_p_ring->is_simple()) { + prepare_flow_spec(); } } -bool rfs_uc::prepare_flow_spec() +void rfs_uc::prepare_flow_spec() { ring_simple* p_ring = dynamic_cast(m_p_ring); @@ -36,7 +35,6 @@ bool rfs_uc::prepare_flow_spec() rfs_logpanic("Incompatible ring type"); } - transport_type_t type = p_ring->get_transport_type(); /* * todo note that ring is not locked here. * we touch members that should not change during the ring life. @@ -48,52 +46,16 @@ bool rfs_uc::prepare_flow_spec() vma_ibv_flow_spec_tcp_udp* p_tcp_udp = NULL; vma_ibv_flow_spec_action_tag* p_flow_tag = NULL; - attach_flow_data_eth_ipv4_tcp_udp_t* attach_flow_data_eth = NULL; - - switch (type) { - case VMA_TRANSPORT_IB: - { - attach_flow_data_ib_ipv4_tcp_udp_v2_t* attach_flow_data_ib_v2 = NULL; - -#ifdef DEFINED_IBV_FLOW_SPEC_IB - if (0 == p_ring->m_p_qp_mgr->get_underly_qpn()) { - attach_flow_data_ib_ipv4_tcp_udp_v1_t* attach_flow_data_ib_v1 = NULL; - - attach_flow_data_ib_v1 = new attach_flow_data_ib_ipv4_tcp_udp_v1_t(p_ring->m_p_qp_mgr); - ibv_flow_spec_ib_set_by_dst_qpn(&(attach_flow_data_ib_v1->ibv_flow_attr.ib), - htonl(((IPoIB_addr*)p_ring->m_p_l2_addr)->get_qpn())); - p_ipv4 = &(attach_flow_data_ib_v1->ibv_flow_attr.ipv4); - p_tcp_udp = &(attach_flow_data_ib_v1->ibv_flow_attr.tcp_udp); - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_ib_v1; - break; - } -#endif - attach_flow_data_ib_v2 = new attach_flow_data_ib_ipv4_tcp_udp_v2_t(p_ring->m_p_qp_mgr); + attach_flow_data_eth_ipv4_tcp_udp_t* attach_flow_data_eth = + new attach_flow_data_eth_ipv4_tcp_udp_t(p_ring->m_p_qp_mgr); - p_ipv4 = &(attach_flow_data_ib_v2->ibv_flow_attr.ipv4); - p_tcp_udp = &(attach_flow_data_ib_v2->ibv_flow_attr.tcp_udp); - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_ib_v2; - break; - } - case VMA_TRANSPORT_ETH: - { - attach_flow_data_eth = new attach_flow_data_eth_ipv4_tcp_udp_t(p_ring->m_p_qp_mgr); - - ibv_flow_spec_eth_set(&(attach_flow_data_eth->ibv_flow_attr.eth), - p_ring->m_p_l2_addr->get_address(), - htons(p_ring->m_p_qp_mgr->get_partiton())); - p_ipv4 = &(attach_flow_data_eth->ibv_flow_attr.ipv4); - p_tcp_udp = &(attach_flow_data_eth->ibv_flow_attr.tcp_udp); - p_flow_tag = &(attach_flow_data_eth->ibv_flow_attr.flow_tag); - p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; - break; - } - BULLSEYE_EXCLUDE_BLOCK_START - default: - return false; - break; - BULLSEYE_EXCLUDE_BLOCK_END - } + ibv_flow_spec_eth_set(&(attach_flow_data_eth->ibv_flow_attr.eth), + p_ring->m_p_l2_addr->get_address(), + htons(p_ring->m_p_qp_mgr->get_partiton())); + p_ipv4 = &(attach_flow_data_eth->ibv_flow_attr.ipv4); + p_tcp_udp = &(attach_flow_data_eth->ibv_flow_attr.tcp_udp); + p_flow_tag = &(attach_flow_data_eth->ibv_flow_attr.flow_tag); + p_attach_flow_data = (attach_flow_data_t*)attach_flow_data_eth; ibv_flow_spec_ipv4_set(p_ipv4, m_flow_tuple.get_dst_ip(), @@ -110,20 +72,19 @@ bool rfs_uc::prepare_flow_spec() p_attach_flow_data->ibv_flow_attr.priority = 0; } - if (m_flow_tag_id && attach_flow_data_eth) { // Will not attach flow_tag spec to rule for tag_id==0 + if (m_flow_tag_id) { // Will not attach flow_tag spec to rule for tag_id==0 ibv_flow_spec_flow_tag_set(p_flow_tag, m_flow_tag_id); attach_flow_data_eth->ibv_flow_attr.add_flow_tag_spec(); rfs_logdbg("Adding flow_tag spec to rule, num_of_specs: %d flow_tag_id: %d", attach_flow_data_eth->ibv_flow_attr.attr.num_of_specs, m_flow_tag_id); } - rfs_logfunc("transport type: %s, num_of_specs: %d flow_tag_id: %d", - priv_vma_transport_type_str(type), - p_attach_flow_data->ibv_flow_attr.num_of_specs, - m_flow_tag_id); + + rfs_logfunc("num_of_specs: %d flow_tag_id: %d", + p_attach_flow_data->ibv_flow_attr.num_of_specs, + m_flow_tag_id); m_attach_flow_data_vector.push_back(p_attach_flow_data); - return true; } bool rfs_uc::rx_dispatch_packet(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd_ready_array) diff --git a/src/vma/dev/rfs_uc.h b/src/vma/dev/rfs_uc.h index 4e4cfc102..77dd0e590 100644 --- a/src/vma/dev/rfs_uc.h +++ b/src/vma/dev/rfs_uc.h @@ -29,7 +29,7 @@ class rfs_uc : public rfs virtual bool rx_dispatch_packet(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd_ready_array); protected: - virtual bool prepare_flow_spec(); + virtual void prepare_flow_spec(); }; diff --git a/src/vma/dev/ring_bond.cpp b/src/vma/dev/ring_bond.cpp index 61e67fddd..b6cad994c 100644 --- a/src/vma/dev/ring_bond.cpp +++ b/src/vma/dev/ring_bond.cpp @@ -890,28 +890,6 @@ void ring_bond_eth::slave_create(int if_index) update_rx_channel_fds(); } -void ring_bond_ib::slave_create(int if_index) -{ - ring_slave *cur_slave; - ring_simple *cur_simple; - - cur_slave = cur_simple = new ring_ib(if_index, this); - if (cur_slave == NULL) { - ring_logpanic("Error creating bond ring: memory allocation error"); - } - - update_cap(cur_slave); - m_bond_rings.push_back(cur_slave); - - if (m_bond_rings.size() > MAX_NUM_RING_RESOURCES) { - ring_logpanic("Error creating bond ring with more than %d resource", MAX_NUM_RING_RESOURCES); - } - - popup_xmit_rings(); - popup_recv_rings(); - update_rx_channel_fds(); -} - void ring_bond_netvsc::slave_create(int if_index) { ring_slave *cur_slave = NULL; diff --git a/src/vma/dev/ring_bond.h b/src/vma/dev/ring_bond.h index 28b82522e..22f08c85d 100644 --- a/src/vma/dev/ring_bond.h +++ b/src/vma/dev/ring_bond.h @@ -134,26 +134,6 @@ class ring_bond_eth : public ring_bond virtual void slave_create(int if_index); }; -class ring_bond_ib : public ring_bond -{ -public: - ring_bond_ib(int if_index): - ring_bond(if_index) { - net_device_val* p_ndev = - g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index()); - if (p_ndev) { - const slave_data_vector_t& slaves = p_ndev->get_slave_array(); - update_cap(); - for (size_t i = 0; i < slaves.size(); i++) { - slave_create(slaves[i]->if_index); - } - } - } - -protected: - virtual void slave_create(int if_index); -}; - class ring_bond_netvsc : public ring_bond { public: diff --git a/src/vma/dev/ring_simple.cpp b/src/vma/dev/ring_simple.cpp index 360c13810..dcfcb9356 100644 --- a/src/vma/dev/ring_simple.cpp +++ b/src/vma/dev/ring_simple.cpp @@ -71,12 +71,6 @@ qp_mgr* ring_eth::create_qp_mgr(struct qp_mgr_desc *desc) get_tx_num_wr(), m_partition); } -qp_mgr* ring_ib::create_qp_mgr(struct qp_mgr_desc *desc) -{ - return new qp_mgr_ib(desc, - get_tx_num_wr(), m_partition); -} - ring_simple::ring_simple(int if_index, ring* parent, ring_type_t type): ring_slave(if_index, parent, type), m_p_ib_ctx(NULL), diff --git a/src/vma/dev/ring_simple.h b/src/vma/dev/ring_simple.h index b7ffd2b1e..953aa5f2b 100644 --- a/src/vma/dev/ring_simple.h +++ b/src/vma/dev/ring_simple.h @@ -62,8 +62,6 @@ class ring_simple : public ring_slave virtual void mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc); virtual bool get_hw_dummy_send_support(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe); inline void convert_hw_time_to_system_time(uint64_t hwtime, struct timespec* systime) { m_p_ib_ctx->convert_hw_time_to_system_time(hwtime, systime); } - inline uint32_t get_qpn() const { return (m_p_l2_addr ? ((IPoIB_addr *)m_p_l2_addr)->get_qpn() : 0); } - virtual uint32_t get_underly_qpn() { return m_p_qp_mgr->get_underly_qpn(); } virtual int modify_ratelimit(struct vma_rate_limit_t &rate_limit); virtual int get_tx_channel_fd() const { return m_p_tx_comp_event_channel ? m_p_tx_comp_event_channel->fd : -1; }; virtual uint32_t get_max_inline_data(); @@ -220,21 +218,4 @@ class ring_eth : public ring_simple virtual qp_mgr* create_qp_mgr(struct qp_mgr_desc *desc); }; -class ring_ib : public ring_simple -{ -public: - ring_ib(int if_index, - ring* parent = NULL): - ring_simple(if_index, parent, RING_IB) { - net_device_val_ib* p_ndev = - dynamic_cast(g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index())); - if (p_ndev) { - m_partition = p_ndev->get_pkey(); - create_resources(); - } - } -protected: - virtual qp_mgr* create_qp_mgr(struct qp_mgr_desc *desc); -}; - #endif //RING_SIMPLE_H diff --git a/src/vma/dev/ring_slave.cpp b/src/vma/dev/ring_slave.cpp index 0d88a8492..8a76c12ed 100644 --- a/src/vma/dev/ring_slave.cpp +++ b/src/vma/dev/ring_slave.cpp @@ -7,7 +7,6 @@ #include "ring_slave.h" #include "vma/proto/ip_frag.h" -#include "vma/proto/igmp_mgr.h" #include "vma/dev/rfs_mc.h" #include "vma/dev/rfs_uc_tcp_gro.h" #include "vma/sock/fd_collection.h" @@ -61,7 +60,6 @@ ring_slave::ring_slave(int if_index, ring* parent, ring_type_t type): p_slave = p_ndev->get_slave(get_if_index()); /* Configure ring_slave() fields */ - m_transport_type = p_ndev->get_transport_type(); m_local_if = p_ndev->get_local_addr(); /* Set the same ring active status as related slave has for all ring types @@ -222,7 +220,7 @@ bool ring_slave::attach_flow(flow_tuple& flow_spec_5t, pkt_rcvr_sink *sink) // It means that for every MC group, even if we have sockets with different ports - only one rule in the HW. // So the hash map below keeps track of the number of sockets per rule so we know when to call ibv_attach and ibv_detach rfs_rule_filter* l2_mc_ip_filter = NULL; - if ((m_transport_type == VMA_TRANSPORT_IB && 0 == get_underly_qpn()) || m_b_sysvar_eth_mc_l2_only_rules) { + if (m_b_sysvar_eth_mc_l2_only_rules) { rule_filter_map_t::iterator l2_mc_iter = m_l2_mc_ip_attach_map.find(key_udp_mc.dst_ip); if (l2_mc_iter == m_l2_mc_ip_attach_map.end()) { // It means that this is the first time attach called with this MC ip m_l2_mc_ip_attach_map[key_udp_mc.dst_ip].counter = 1; @@ -232,7 +230,7 @@ bool ring_slave::attach_flow(flow_tuple& flow_spec_5t, pkt_rcvr_sink *sink) } p_rfs = m_flow_udp_mc_map.get(key_udp_mc, NULL); if (p_rfs == NULL) { // It means that no rfs object exists so I need to create a new one and insert it to the flow map - if ((m_transport_type == VMA_TRANSPORT_IB && 0 == get_underly_qpn()) || m_b_sysvar_eth_mc_l2_only_rules) { + if (m_b_sysvar_eth_mc_l2_only_rules) { l2_mc_ip_filter = new rfs_rule_filter(m_l2_mc_ip_attach_map, key_udp_mc.dst_ip, flow_spec_5t); } try { @@ -374,7 +372,7 @@ bool ring_slave::detach_flow(flow_tuple& flow_spec_5t, pkt_rcvr_sink* sink) } else if (flow_spec_5t.is_udp_mc()) { int keep_in_map = 1; flow_spec_2t_key_t key_udp_mc(flow_spec_5t.get_dst_ip(), flow_spec_5t.get_dst_port()); - if (m_transport_type == VMA_TRANSPORT_IB || m_b_sysvar_eth_mc_l2_only_rules) { + if (m_b_sysvar_eth_mc_l2_only_rules) { rule_filter_map_t::iterator l2_mc_iter = m_l2_mc_ip_attach_map.find(key_udp_mc.dst_ip); BULLSEYE_EXCLUDE_BLOCK_START if (l2_mc_iter == m_l2_mc_ip_attach_map.end()) { @@ -590,78 +588,38 @@ bool ring_slave::rx_process_buffer(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd } } - // Validate transport type headers - switch (m_transport_type) { - case VMA_TRANSPORT_IB: - { - // Get the data buffer start pointer to the ipoib header pointer - struct ipoibhdr* p_ipoib_h = (struct ipoibhdr*)(p_rx_wc_buf_desc->p_buffer + GRH_HDR_LEN); - - transport_header_len = GRH_HDR_LEN + IPOIB_HDR_LEN; - - // Validate IPoIB header - if (unlikely(p_ipoib_h->ipoib_header != htonl(IPOIB_HEADER))) { - ring_logwarn("Rx buffer dropped - Invalid IPOIB Header Type (%#x : %#x)", p_ipoib_h->ipoib_header, htonl(IPOIB_HEADER)); - return false; - } + uint16_t h_proto = p_eth_h->h_proto; + + ring_logfunc("Rx buffer Ethernet dst=" ETH_HW_ADDR_PRINT_FMT " <- src=" ETH_HW_ADDR_PRINT_FMT " type=%#x", + ETH_HW_ADDR_PRINT_ADDR(p_eth_h->h_dest), + ETH_HW_ADDR_PRINT_ADDR(p_eth_h->h_source), + htons(h_proto)); + + // Handle VLAN header as next protocol + struct vlanhdr* p_vlan_hdr = NULL; + uint16_t packet_vlan = 0; + if (h_proto == htons(ETH_P_8021Q)) { + p_vlan_hdr = (struct vlanhdr*)((uint8_t*)p_eth_h + ETH_HDR_LEN); + transport_header_len = ETH_VLAN_HDR_LEN; + h_proto = p_vlan_hdr->h_vlan_encapsulated_proto; + packet_vlan = (htons(p_vlan_hdr->h_vlan_TCI) & VLAN_VID_MASK); + } else { + transport_header_len = ETH_HDR_LEN; } - break; - case VMA_TRANSPORT_ETH: - { -// printf("\nring_slave::rx_process_buffer\n"); -// { -// struct ethhdr* p_eth_h = (struct ethhdr*)(p_rx_wc_buf_desc->p_buffer); -// -// int i = 0; -// printf("p_eth_h->h_dest [0]=%d, [1]=%d, [2]=%d, [3]=%d, [4]=%d, [5]=%d\n", -// (uint8_t)p_eth_h->h_dest[0], (uint8_t)p_eth_h->h_dest[1], (uint8_t)p_eth_h->h_dest[2], (uint8_t)p_eth_h->h_dest[3], (uint8_t)p_eth_h->h_dest[4], (uint8_t)p_eth_h->h_dest[5]); -// printf("p_eth_h->h_source [0]=%d, [1]=%d, [2]=%d, [3]=%d, [4]=%d, [5]=%d\n", -// (uint8_t)p_eth_h->h_source[0], (uint8_t)p_eth_h->h_source[1], (uint8_t)p_eth_h->h_source[2], (uint8_t)p_eth_h->h_source[3], (uint8_t)p_eth_h->h_source[4], (uint8_t)p_eth_h->h_source[5]); -// -// while(i++<62){ -// printf("%d, ", (uint8_t)p_rx_wc_buf_desc->p_buffer[i]); -// } -// printf("\n"); -// } - - uint16_t h_proto = p_eth_h->h_proto; - - ring_logfunc("Rx buffer Ethernet dst=" ETH_HW_ADDR_PRINT_FMT " <- src=" ETH_HW_ADDR_PRINT_FMT " type=%#x", - ETH_HW_ADDR_PRINT_ADDR(p_eth_h->h_dest), - ETH_HW_ADDR_PRINT_ADDR(p_eth_h->h_source), - htons(h_proto)); - - // Handle VLAN header as next protocol - struct vlanhdr* p_vlan_hdr = NULL; - uint16_t packet_vlan = 0; - if (h_proto == htons(ETH_P_8021Q)) { - p_vlan_hdr = (struct vlanhdr*)((uint8_t*)p_eth_h + ETH_HDR_LEN); - transport_header_len = ETH_VLAN_HDR_LEN; - h_proto = p_vlan_hdr->h_vlan_encapsulated_proto; - packet_vlan = (htons(p_vlan_hdr->h_vlan_TCI) & VLAN_VID_MASK); - } else { - transport_header_len = ETH_HDR_LEN; - } - - //TODO: Remove this code when handling vlan in flow steering will be available. Change this code if vlan stripping is performed. - if((m_partition & VLAN_VID_MASK) != packet_vlan) { - ring_logfunc("Rx buffer dropped- Mismatched vlan. Packet vlan = %d, Local vlan = %d", packet_vlan, m_partition & VLAN_VID_MASK); - return false; - } - // Validate IP header as next protocol - if (unlikely(h_proto != htons(ETH_P_IP))) { - ring_logwarn("Rx buffer dropped - Invalid Ethr Type (%#x : %#x)", p_eth_h->h_proto, htons(ETH_P_IP)); - return false; - } + //TODO: Remove this code when handling vlan in flow steering will be available. Change this code if vlan stripping is performed. + if((m_partition & VLAN_VID_MASK) != packet_vlan) { + ring_logfunc("Rx buffer dropped- Mismatched vlan. Packet vlan = %d, Local vlan = %d", packet_vlan, m_partition & VLAN_VID_MASK); + return false; } - break; - default: - ring_logwarn("Rx buffer dropped - Unknown transport type %d", m_transport_type); + + // Validate IP header as next protocol + if (unlikely(h_proto != htons(ETH_P_IP))) { + ring_logwarn("Rx buffer dropped - Invalid Ethr Type (%#x : %#x)", p_eth_h->h_proto, htons(ETH_P_IP)); return false; } - // Jump to IP header - Skip IB (GRH and IPoIB) or Ethernet (MAC) header sizes + // Jump to IP header - Skip Ethernet (MAC) header sizes sz_data -= transport_header_len; // Validate size for IPv4 header @@ -709,16 +667,9 @@ bool ring_slave::rx_process_buffer(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd // Handle fragmentation p_rx_wc_buf_desc->rx.n_frags = 1; - if (unlikely((ip_frag_off & MORE_FRAGMENTS_FLAG) || n_frag_offset)) { // Currently we don't expect to receive fragments - //for disabled fragments handling: - /*ring_logwarn("Rx packet dropped - VMA doesn't support fragmentation in receive flow!"); - ring_logwarn("packet info: dst=%d.%d.%d.%d, src=%d.%d.%d.%d, packet_sz=%d, frag_offset=%d, id=%d, proto=%s[%d], transport type=%s, (local if: %d.%d.%d.%d)", - NIPQUAD(p_ip_h->daddr), NIPQUAD(p_ip_h->saddr), - (sz_data > ip_tot_len ? ip_tot_len : sz_data), n_frag_offset, ntohs(p_ip_h->id), - iphdr_protocol_type_to_str(p_ip_h->protocol), p_ip_h->protocol, (m_transport_type ? "ETH" : "IB"), - NIPQUAD(local_addr)); - return false;*/ -#if 1 //handle fragments + + // Currently we don't expect to receive fragments. + if (unlikely((ip_frag_off & MORE_FRAGMENTS_FLAG) || n_frag_offset)) { // Update fragments descriptor with datagram base address and length p_rx_wc_buf_desc->rx.frag.iov_base = (uint8_t*)p_ip_h + ip_hdr_len; p_rx_wc_buf_desc->rx.frag.iov_len = ip_tot_len - ip_hdr_len; @@ -743,21 +694,12 @@ bool ring_slave::rx_process_buffer(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd for (tmp = p_rx_wc_buf_desc; tmp; tmp = tmp->p_next_desc) { ++p_rx_wc_buf_desc->rx.n_frags; } -#endif } if (p_rx_wc_buf_desc->rx.is_sw_csum_need && compute_ip_checksum((unsigned short*)p_ip_h, p_ip_h->ihl * 2)) { return false; // false ip checksum } -//We want to enable loopback between processes for IB -#if 0 - //AlexV: We don't support Tx MC Loopback today! - if (p_ip_h->saddr == m_local_if) { - ring_logfunc("Rx udp datagram discarded - mc loop disabled"); - return false; - } -#endif rfs* p_rfs = NULL; // Update the L3 info @@ -854,31 +796,11 @@ bool ring_slave::rx_process_buffer(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd } break; - case IPPROTO_IGMP: - { - struct igmp* p_igmp_h= (struct igmp*)((uint8_t*)p_ip_h + ip_hdr_len); - NOT_IN_USE(p_igmp_h); /* to supress warning in case VMA_MAX_DEFINED_LOG_LEVEL */ - ring_logdbg("Rx IGMP packet info: type=%s (%d), group=%d.%d.%d.%d, code=%d", - priv_igmp_type_tostr(p_igmp_h->igmp_type), p_igmp_h->igmp_type, - NIPQUAD(p_igmp_h->igmp_group.s_addr), p_igmp_h->igmp_code); - if (m_transport_type == VMA_TRANSPORT_IB || m_b_sysvar_eth_mc_l2_only_rules) { - ring_logdbg("Transport type is IB (or eth_mc_l2_only_rules), passing igmp packet to igmp_manager to process"); - if(g_p_igmp_mgr) { - (g_p_igmp_mgr->process_igmp_packet(p_ip_h, m_local_if)); - return false; // we return false in order to free the buffer, although we handled the packet - } - ring_logdbg("IGMP packet drop. IGMP manager does not exist."); - return false; - } - ring_logerr("Transport type is ETH, dropping the packet"); - return false; - } - break; - default: ring_logwarn("Rx packet dropped - undefined protocol = %d", p_ip_h->protocol); return false; } + if (unlikely(p_rfs == NULL)) { ring_logdbg("Rx packet dropped - rfs object not found: dst:%d.%d.%d.%d:%d, src%d.%d.%d.%d:%d, proto=%s[%d]", NIPQUAD(p_rx_wc_buf_desc->rx.dst.sin_addr.s_addr), ntohs(p_rx_wc_buf_desc->rx.dst.sin_port), diff --git a/src/vma/dev/ring_slave.h b/src/vma/dev/ring_slave.h index 6804be7ad..1d167f095 100644 --- a/src/vma/dev/ring_slave.h +++ b/src/vma/dev/ring_slave.h @@ -110,12 +110,10 @@ class ring_slave : public ring virtual bool rx_process_buffer(mem_buf_desc_t* p_rx_wc_buf_desc, void* pv_fd_ready_array); virtual int reclaim_recv_single_buffer(mem_buf_desc_t* rx_reuse) = 0; virtual void inc_cq_moderation_stats(size_t sz_data) = 0; - virtual uint32_t get_underly_qpn() = 0; virtual bool attach_flow(flow_tuple& flow_spec_5t, pkt_rcvr_sink* sink); virtual bool detach_flow(flow_tuple& flow_spec_5t, pkt_rcvr_sink* sink); inline bool is_simple() const { return m_type != RING_TAP; } - transport_type_t get_transport_type() const { return m_transport_type; } inline ring_type_t get_type() const { return m_type; } bool m_active; /* State indicator */ @@ -141,7 +139,6 @@ class ring_slave : public ring lock_spin_recursive m_lock_ring_tx; descq_t m_tx_pool; - transport_type_t m_transport_type; /* transport ETH/IB */ ring_stats_t* m_p_ring_stat; in_addr_t m_local_if; uint16_t m_partition; diff --git a/src/vma/dev/ring_tap.h b/src/vma/dev/ring_tap.h index 7c00df454..2909ade51 100644 --- a/src/vma/dev/ring_tap.h +++ b/src/vma/dev/ring_tap.h @@ -43,8 +43,7 @@ class ring_tap : public ring_slave virtual int modify_ratelimit(struct vma_rate_limit_t &rate_limit) { NOT_IN_USE(rate_limit); return 0; } void inc_cq_moderation_stats(size_t sz_data) { NOT_IN_USE(sz_data); } - virtual uint32_t get_underly_qpn() { return -1; } - virtual uint32_t get_max_inline_data() { return 0; } + virtual uint32_t get_max_inline_data() { return 0; } #ifdef DEFINED_TSO virtual uint32_t get_max_send_sge(void) { return 1; } virtual uint32_t get_max_payload_sz(void) { return 0; } diff --git a/src/vma/dev/wqe_send_ib_handler.cpp b/src/vma/dev/wqe_send_ib_handler.cpp deleted file mode 100644 index bf9425f47..000000000 --- a/src/vma/dev/wqe_send_ib_handler.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - -#include "wqe_send_ib_handler.h" - -wqe_send_ib_handler::wqe_send_ib_handler() -{ -} - -wqe_send_ib_handler::~wqe_send_ib_handler() -{ -} - -void wqe_send_ib_handler::init_path_record(vma_ibv_send_wr &wqe_to_init, struct ibv_ah *ah, uint32_t rem_qkey, uint32_t rem_qpn) -{ - wqe_to_init.wr.ud.ah = ah; - wqe_to_init.wr.ud.remote_qkey = rem_qkey; - wqe_to_init.wr.ud.remote_qpn = rem_qpn; -} - -void wqe_send_ib_handler::init_ib_wqe(vma_ibv_send_wr &wqe_to_init, struct ibv_sge* sge_list, uint32_t num_sge, - struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey) -{ - wqe_send_handler::init_wqe(wqe_to_init, sge_list, num_sge); - init_path_record(wqe_to_init, ah, rem_qkey, rem_qpn); -} - -void wqe_send_ib_handler::init_inline_ib_wqe(vma_ibv_send_wr &wqe_to_init, struct ibv_sge* sge_list, uint32_t num_sge, - struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey) -{ - wqe_send_handler::init_inline_wqe(wqe_to_init, sge_list, num_sge); - init_path_record(wqe_to_init, ah, rem_qkey, rem_qpn); -} - -void wqe_send_ib_handler::init_not_inline_ib_wqe(vma_ibv_send_wr &wqe_to_init, struct ibv_sge* sge_list, uint32_t num_sge, - struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey) -{ - wqe_send_handler::init_not_inline_wqe(wqe_to_init, sge_list, num_sge); - init_path_record(wqe_to_init, ah, rem_qkey, rem_qpn); -} diff --git a/src/vma/dev/wqe_send_ib_handler.h b/src/vma/dev/wqe_send_ib_handler.h deleted file mode 100644 index f599b7bd8..000000000 --- a/src/vma/dev/wqe_send_ib_handler.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - -#include "wqe_send_handler.h" -#include "vma/util/vtypes.h" - -#ifndef WQE_TEMPLATE_SEND_IB_H_ -#define WQE_TEMPLATE_SEND_IB_H_ - -class wqe_send_ib_handler: public wqe_send_handler -{ -public: - wqe_send_ib_handler(); - virtual ~wqe_send_ib_handler(); - - void init_ib_wqe(vma_ibv_send_wr &wqe_to_init, struct ibv_sge* sge_list, uint32_t num_sge, struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey); - void init_inline_ib_wqe(vma_ibv_send_wr & wqe_to_init, struct ibv_sge *sge_list, uint32_t num_sge, struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey); - void init_not_inline_ib_wqe(vma_ibv_send_wr & wqe_to_init, struct ibv_sge *sge_list, uint32_t num_sge, struct ibv_ah *ah, uint32_t rem_qpn, uint32_t rem_qkey); - -private: - void init_path_record(vma_ibv_send_wr & wqe_to_init, struct ibv_ah *ah, uint32_t rem_qkey, uint32_t rem_qpn); -}; - -#endif /* WQE_TEMPLATE_SEND_IB_H_ */ diff --git a/src/vma/ib/base/verbs_extra.cpp b/src/vma/ib/base/verbs_extra.cpp index 29095fdc0..162d3ae06 100644 --- a/src/vma/ib/base/verbs_extra.cpp +++ b/src/vma/ib/base/verbs_extra.cpp @@ -79,20 +79,6 @@ const char* priv_ibv_event_desc_str(enum ibv_event_type type) BULLSEYE_EXCLUDE_BLOCK_END } -int priv_ibv_find_pkey_index(struct ibv_context *verbs, uint8_t port_num, uint16_t pkey, uint16_t *pkey_index) -{ - int ret, i; - uint16_t chk_pkey = 0; - for (i = 0, ret = 0; !ret; i++) { - ret = ibv_query_pkey(verbs, port_num, i, &chk_pkey); - if (!ret && pkey == chk_pkey) { - *pkey_index = (uint16_t)i; - return 0; - } - } - return -1; -} - int priv_ibv_modify_qp_to_err(struct ibv_qp *qp) { vma_ibv_qp_attr qp_attr; @@ -145,38 +131,6 @@ int priv_ibv_modify_qp_from_err_to_init_raw(struct ibv_qp *qp, uint8_t port_num) return 0; } -int priv_ibv_modify_qp_from_err_to_init_ud(struct ibv_qp *qp, uint8_t port_num, uint16_t pkey_index, uint32_t underly_qpn) -{ - vma_ibv_qp_attr qp_attr; - ibv_qp_attr_mask qp_attr_mask = (ibv_qp_attr_mask)IBV_QP_STATE; - - if (qp->qp_type != IBV_QPT_UD) - return -1; - - if (priv_ibv_query_qp_state(qp) != IBV_QPS_RESET) { - if (priv_ibv_modify_qp_to_reset(qp)) { - return -2; - } - } - - memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_INIT; - if (0 == underly_qpn) { - qp_attr_mask = (ibv_qp_attr_mask)(qp_attr_mask | IBV_QP_QKEY | IBV_QP_PKEY_INDEX | IBV_QP_PORT); - qp_attr.qkey = IPOIB_QKEY; - qp_attr.pkey_index = pkey_index; - qp_attr.port_num = port_num; - } - - BULLSEYE_EXCLUDE_BLOCK_START - IF_VERBS_FAILURE(vma_ibv_modify_qp(qp, &qp_attr, qp_attr_mask)) { - return -3; - } ENDIF_VERBS_FAILURE; - BULLSEYE_EXCLUDE_BLOCK_END - - return 0; -} - int priv_ibv_modify_qp_from_init_to_rts(struct ibv_qp *qp, uint32_t underly_qpn) { vma_ibv_qp_attr qp_attr; diff --git a/src/vma/ib/base/verbs_extra.h b/src/vma/ib/base/verbs_extra.h index 31c756529..a06b52270 100644 --- a/src/vma/ib/base/verbs_extra.h +++ b/src/vma/ib/base/verbs_extra.h @@ -45,7 +45,6 @@ inline int _errnocheck(int rc) { #define IF_RDMACM_FAILURE(__func__) IF_VERBS_FAILURE(__func__) #define ENDIF_RDMACM_FAILURE ENDIF_VERBS_FAILURE -#define IPOIB_QKEY 0x0b1b // See - IB Arch Spec - 11.6.2 COMPLETION RETURN STATUS const char* priv_ibv_wc_status_str(enum ibv_wc_status status); @@ -56,12 +55,8 @@ const char* priv_ibv_event_desc_str(enum ibv_event_type type); #define priv_rdma_cm_event_type_str(__rdma_cm_ev_t__) \ rdma_event_str(__rdma_cm_ev_t__) -// Find pkey_index from the ibv_context + port_num + pkey -int priv_ibv_find_pkey_index(struct ibv_context *verbs, uint8_t port_num, uint16_t pkey, uint16_t *pkey_index); - int priv_ibv_modify_qp_to_err(struct ibv_qp *qp); int priv_ibv_modify_qp_from_err_to_init_raw(struct ibv_qp *qp, uint8_t port_num); -int priv_ibv_modify_qp_from_err_to_init_ud(struct ibv_qp *qp, uint8_t port_num, uint16_t pkey_index, uint32_t underly_qpn); int priv_ibv_modify_qp_from_init_to_rts(struct ibv_qp *qp, uint32_t underly_qpn = 0); // Return 'ibv_qp_state' of the ibv_qp @@ -96,10 +91,6 @@ int priv_ibv_query_burst_supported(struct ibv_qp *qp, uint8_t port_num); #ifdef DEFINED_IBV_QP_INIT_SOURCE_QPN #define vma_ibv_create_qp(pd, attr) ibv_create_qp_ex((pd)->context, attr) typedef struct ibv_qp_init_attr_ex vma_ibv_qp_init_attr; -#define vma_ibv_qp_create_flags(attr) (attr).create_flags -#define vma_ibv_qp_source_qpn(attr) (attr).source_qpn -#define VMA_IBV_QP_INIT_QPN_CREATE_FLAGS IBV_QP_CREATE_SOURCE_QPN -#define VMA_IBV_QP_INIT_QPN_MASK IBV_QP_INIT_ATTR_CREATE_FLAGS #define vma_ibv_qp_init_attr_comp_mask(_pd, _attr) { (_attr).pd = _pd; (_attr).comp_mask |= IBV_QP_INIT_ATTR_PD; } #else #define vma_ibv_create_qp(pd, attr) ibv_create_qp(pd, attr) @@ -204,21 +195,17 @@ typedef struct ibv_send_wr vma_ibv_send_wr; //flow steering #define VMA_IBV_FLOW_ATTR_NORMAL IBV_FLOW_ATTR_NORMAL #define VMA_IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK -#ifdef DEFINED_IBV_FLOW_SPEC_IB -#define VMA_IBV_FLOW_SPEC_IB IBV_FLOW_SPEC_IB -#endif #define VMA_IBV_FLOW_SPEC_ETH IBV_FLOW_SPEC_ETH #define VMA_IBV_FLOW_SPEC_IPV4 IBV_FLOW_SPEC_IPV4 #define VMA_IBV_FLOW_SPEC_TCP IBV_FLOW_SPEC_TCP #define VMA_IBV_FLOW_SPEC_UDP IBV_FLOW_SPEC_UDP -#define vma_ibv_create_flow(qp, flow) ibv_create_flow(qp, flow) -#define vma_ibv_destroy_flow(flow_id) ibv_destroy_flow(flow_id) -typedef struct ibv_flow vma_ibv_flow; +#define vma_ibv_create_flow(qp, flow) ibv_create_flow(qp, flow) +#define vma_ibv_destroy_flow(flow_id) ibv_destroy_flow(flow_id) +typedef struct ibv_flow vma_ibv_flow; typedef struct ibv_flow_attr vma_ibv_flow_attr; -typedef struct ibv_flow_spec_ib vma_ibv_flow_spec_ib; typedef struct ibv_flow_spec_eth vma_ibv_flow_spec_eth; typedef struct ibv_flow_spec_ipv4 vma_ibv_flow_spec_ipv4; -typedef struct ibv_flow_spec_tcp_udp vma_ibv_flow_spec_tcp_udp; +typedef struct ibv_flow_spec_tcp_udp vma_ibv_flow_spec_tcp_udp; // Flow tag #ifdef DEFINED_IBV_FLOW_TAG @@ -291,13 +278,6 @@ typedef vma_ibv_qp_attr vma_ibv_rate_limit_attr; typedef struct ibv_exp_qp_init_attr vma_ibv_qp_init_attr; #define vma_ibv_qp_init_attr_comp_mask(_pd, _attr) { (_attr).pd = _pd; (_attr).comp_mask |= IBV_EXP_QP_INIT_ATTR_PD; } -#ifdef DEFINED_IBV_QP_INIT_SOURCE_QPN -#define vma_ibv_qp_create_flags(attr) (attr).exp_create_flags -#define vma_ibv_qp_source_qpn(attr) (attr).associated_qpn -#define VMA_IBV_QP_INIT_QPN_CREATE_FLAGS 0 -#define VMA_IBV_QP_INIT_QPN_MASK IBV_EXP_QP_INIT_ATTR_ASSOCIATED_QPN -#endif - //ibv_query_device #define vma_ibv_query_device(context, attr) ibv_exp_query_device(context, attr) typedef struct ibv_exp_device_attr vma_ibv_device_attr; @@ -427,18 +407,14 @@ typedef struct ibv_exp_send_wr vma_ibv_send_wr; //flow steering #define VMA_IBV_FLOW_ATTR_NORMAL IBV_EXP_FLOW_ATTR_NORMAL #define VMA_IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK IBV_EXP_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK -#ifdef DEFINED_IBV_FLOW_SPEC_IB -#define VMA_IBV_FLOW_SPEC_IB IBV_EXP_FLOW_SPEC_IB -#endif #define VMA_IBV_FLOW_SPEC_ETH IBV_EXP_FLOW_SPEC_ETH #define VMA_IBV_FLOW_SPEC_IPV4 IBV_EXP_FLOW_SPEC_IPV4 #define VMA_IBV_FLOW_SPEC_TCP IBV_EXP_FLOW_SPEC_TCP #define VMA_IBV_FLOW_SPEC_UDP IBV_EXP_FLOW_SPEC_UDP #define vma_ibv_create_flow(qp, flow) ibv_exp_create_flow(qp, flow) #define vma_ibv_destroy_flow(flow_id) ibv_exp_destroy_flow(flow_id) -typedef struct ibv_exp_flow vma_ibv_flow; -typedef struct ibv_exp_flow_attr vma_ibv_flow_attr; -typedef struct ibv_exp_flow_spec_ib vma_ibv_flow_spec_ib; +typedef struct ibv_exp_flow vma_ibv_flow; +typedef struct ibv_exp_flow_attr vma_ibv_flow_attr; typedef struct ibv_exp_flow_spec_eth vma_ibv_flow_spec_eth; typedef struct ibv_exp_flow_spec_ipv4 vma_ibv_flow_spec_ipv4; typedef struct ibv_exp_flow_spec_tcp_udp vma_ibv_flow_spec_tcp_udp; @@ -509,27 +485,6 @@ typedef enum { int vma_rdma_lib_reset(); -#ifdef DEFINED_IBV_FLOW_SPEC_IB -static inline void ibv_flow_spec_ib_set_by_dst_gid(vma_ibv_flow_spec_ib* ib, uint8_t* dst_gid) -{ - ib->type = VMA_IBV_FLOW_SPEC_IB; - ib->size = sizeof(vma_ibv_flow_spec_ib); - if (dst_gid) - { - memcpy(ib->val.dst_gid, dst_gid, 16); - memset(ib->mask.dst_gid, FS_MASK_ON_8, 16); - } -} - -static inline void ibv_flow_spec_ib_set_by_dst_qpn(vma_ibv_flow_spec_ib* ib, uint32_t dst_qpn) -{ - ib->type = VMA_IBV_FLOW_SPEC_IB; - ib->size = sizeof(vma_ibv_flow_spec_ib); - ib->val.qpn = dst_qpn; - ib->mask.qpn = FS_MASK_ON_32; -} -#endif - static inline void ibv_flow_spec_eth_set(vma_ibv_flow_spec_eth* eth, uint8_t* dst_mac, uint16_t vlan_tag) { eth->type = VMA_IBV_FLOW_SPEC_ETH; @@ -574,19 +529,4 @@ static inline void ibv_flow_spec_flow_tag_set(vma_ibv_flow_spec_action_tag* flow #endif //DEFINED_IBV_FLOW_TAG } - -static inline void ibv_source_qpn_set(vma_ibv_qp_init_attr& qp_init_attr, uint32_t source_qpn) -{ - NOT_IN_USE(qp_init_attr); - NOT_IN_USE(source_qpn); - -#ifdef DEFINED_IBV_QP_INIT_SOURCE_QPN - if (source_qpn) { - qp_init_attr.comp_mask |= VMA_IBV_QP_INIT_QPN_MASK; - vma_ibv_qp_create_flags(qp_init_attr) |= VMA_IBV_QP_INIT_QPN_CREATE_FLAGS; - vma_ibv_qp_source_qpn(qp_init_attr) = source_qpn; - } -#endif /* DEFINED_IBV_QP_INIT_SOURCE_QPN */ -} - #endif diff --git a/src/vma/main.cpp b/src/vma/main.cpp index c2356c866..05a4ba41d 100644 --- a/src/vma/main.cpp +++ b/src/vma/main.cpp @@ -36,8 +36,6 @@ #include "vma/proto/vma_lwip.h" #include "vma/proto/route_table_mgr.h" #include "vma/proto/rule_table_mgr.h" -#include "vma/proto/igmp_mgr.h" - #include "vma/proto/neighbour_table_mgr.h" #include "vma/netlink/netlink_wrapper.h" #include "vma/event/command.h" @@ -106,13 +104,6 @@ static int free_libvma_resources() g_p_net_device_table_mgr->global_ring_drain_and_procces(); } - if(g_p_igmp_mgr) { - igmp_mgr* g_p_igmp_mgr_tmp = g_p_igmp_mgr; - g_p_igmp_mgr = NULL; - delete g_p_igmp_mgr_tmp; - usleep(50000); - } - if (g_p_event_handler_manager) g_p_event_handler_manager->stop_thread(); @@ -522,8 +513,6 @@ void print_vma_global_settings() VLOG_PARAM_NUMBER("Num of UC ARPs", safe_mce_sys().neigh_uc_arp_quata, MCE_DEFAULT_NEIGH_UC_ARP_QUATA, SYS_VAR_NEIGH_UC_ARP_QUATA); VLOG_PARAM_NUMBER("UC ARP delay (msec)", safe_mce_sys().neigh_wait_till_send_arp_msec, MCE_DEFAULT_NEIGH_UC_ARP_DELAY_MSEC, SYS_VAR_NEIGH_UC_ARP_DELAY_MSEC); VLOG_PARAM_NUMBER("Num of neigh restart retries", safe_mce_sys().neigh_num_err_retries, MCE_DEFAULT_NEIGH_NUM_ERR_RETRIES, SYS_VAR_NEIGH_NUM_ERR_RETRIES ); - - VLOG_PARAM_STRING("IPOIB support", safe_mce_sys().enable_ipoib, MCE_DEFAULT_IPOIB_FLAG, SYS_VAR_IPOIB, safe_mce_sys().enable_ipoib ? "Enabled " : "Disabled"); VLOG_PARAM_STRING("SocketXtreme mode", safe_mce_sys().enable_socketxtreme, MCE_DEFAULT_SOCKETXTREME, SYS_VAR_SOCKETXTREME, safe_mce_sys().enable_socketxtreme ? "Enabled " : "Disabled"); #ifdef DEFINED_TSO VLOG_PARAM_STRING("TSO support", safe_mce_sys().enable_tso, MCE_DEFAULT_TSO, SYS_VAR_TSO, safe_mce_sys().enable_tso ? "Enabled " : "Disabled"); @@ -673,8 +662,6 @@ static void do_global_ctors_helper() NEW_CTOR(g_p_route_table_mgr, route_table_mgr()); - NEW_CTOR(g_p_igmp_mgr, igmp_mgr()); - NEW_CTOR(g_buffer_pool_rx, buffer_pool(safe_mce_sys().rx_num_bufs, RX_BUF_SIZE(g_p_net_device_table_mgr->get_max_mtu()), buffer_pool::free_rx_lwip_pbuf_custom, @@ -780,7 +767,6 @@ int do_global_ctors() void reset_globals() { g_p_fd_collection = NULL; - g_p_igmp_mgr = NULL; g_p_ip_frag_manager = NULL; g_buffer_pool_rx = NULL; g_buffer_pool_tx = NULL; diff --git a/src/vma/netlink/test_main.cpp b/src/vma/netlink/test_main.cpp index d554b104e..76b20aafa 100644 --- a/src/vma/netlink/test_main.cpp +++ b/src/vma/netlink/test_main.cpp @@ -17,28 +17,6 @@ extern uint8_t g_vlogger_level; #define MODULE_NAME "NETLINK_TEST" -class neigh_observer : public observer { - virtual void notify_cb(event * ev) { - if (ev) { -/* - neigh_nl_event* net_ev = dynamic_cast (ev); - if (net_ev->neigh_dst_addr_str == "1.1.1.12") { - //__log_info("!!! IN neigh_observer !!!"); - __log_info("%s", ev->to_str().c_str()); - } -*/ - __log_info("!!! IN neigh_observer !!!"); - neigh_nl_event* nlev = dynamic_cast(ev); - __log_info("%s", ev->to_str().c_str()); - netlink_neigh_info info; - g_p_netlink_handler->get_neigh("1.1.1.1", 1, &info); - __log_info("AFTER get_neigh"); - __log_info("NEIGH STATE=%s", nlev->get_neigh_info()->get_state2str().c_str()); - } - } -}; - - class route_observer : public observer { virtual void notify_cb(event * ev) { if (ev) { @@ -65,10 +43,8 @@ void netlink_test() g_vlogger_level=3; netlink_wrapper* nl = new netlink_wrapper(); g_p_netlink_handler=nl; - neigh_observer neigh_obs; route_observer route_obs; link_observer link_obs; - nl->register_event(nlgrpNEIGH, &neigh_obs); //nl->register_event(nlgrpROUTE, &route_obs); //nl->register_event(nlgrpLINK, &link_obs); int nevents; diff --git a/src/vma/proto/L2_address.cpp b/src/vma/proto/L2_address.cpp index 33a902c02..1b3f5ce62 100644 --- a/src/vma/proto/L2_address.cpp +++ b/src/vma/proto/L2_address.cpp @@ -54,25 +54,3 @@ const std::string ETH_addr::to_str() const sprintf(s, ETH_HW_ADDR_PRINT_FMT, ETH_HW_ADDR_PRINT_ADDR(m_p_raw_address)); return (std::string(s)); } - -const std::string IPoIB_addr::to_str() const -{ - char s[100] = ""; - if (m_len > 0) - sprintf(s, IPOIB_HW_ADDR_PRINT_FMT, IPOIB_HW_ADDR_PRINT_ADDR(m_p_raw_address)); - return (std::string(s)); -} - -void IPoIB_addr::extract_qpn() -{ - unsigned char rem_qpn[4]; - - rem_qpn[0] = m_p_raw_address[3]; - rem_qpn[1] = m_p_raw_address[2]; - rem_qpn[2] = m_p_raw_address[1]; - rem_qpn[3] = 0; - memcpy(&m_qpn, rem_qpn, 4); - L2_logdbg("qpn = %#x", m_qpn); -} - - diff --git a/src/vma/proto/L2_address.h b/src/vma/proto/L2_address.h index 6483651aa..41378e88d 100644 --- a/src/vma/proto/L2_address.h +++ b/src/vma/proto/L2_address.h @@ -55,39 +55,4 @@ class ETH_addr : public L2_address } }; -class IPoIB_addr : public L2_address -{ -public: - - IPoIB_addr(): L2_address(), m_qpn(0) - { - - } - - //This constructor is for UC - IPoIB_addr(address_t const address) : L2_address(address, 20), m_qpn(0) - { - extract_qpn(); - }; - //This constructor is for MC - IPoIB_addr(uint32_t qpn, address_t const address) : L2_address(address, 20), m_qpn(qpn) {}; - ~IPoIB_addr() {}; - - virtual L2_address* clone() const - { - uint32_t qpn = ((IPoIB_addr*)this)->get_qpn(); - return (new IPoIB_addr(qpn, get_address())); - } - - void set_qpn(uint32_t qpn) { m_qpn = qpn; }; - uint32_t get_qpn() { return m_qpn; }; - - const std::string to_str() const; - -private: - uint32_t m_qpn; - - void extract_qpn(); -}; - #endif /* L2_ADDRESS_H */ diff --git a/src/vma/proto/arp.cpp b/src/vma/proto/arp.cpp index 9bcad6491..571b410b7 100644 --- a/src/vma/proto/arp.cpp +++ b/src/vma/proto/arp.cpp @@ -16,10 +16,8 @@ #define ARP_REQUEST 0x0001 #define HWTYPE_ETHERNET 0x0001 -#define HWTYPE_IB 0x0020 #define IPv4_ALEN 0x04 #define ETHADDR_COPY(dst, src) memcpy(dst, src, ETH_ALEN) -#define IBADDR_COPY(dst, src) memcpy(dst, src, IPOIB_HW_ADDR_LEN) void set_eth_arp_hdr(eth_arp_hdr *p_arph, in_addr_t ipsrc_addr, in_addr_t ipdst_addr, const uint8_t* hwsrc_addr, const uint8_t* hwdst_addr) { @@ -34,16 +32,3 @@ void set_eth_arp_hdr(eth_arp_hdr *p_arph, in_addr_t ipsrc_addr, in_addr_t ipdst_ p_arph->m_dipaddr = ipdst_addr; } -void set_ib_arp_hdr(ib_arp_hdr* p_arph, in_addr_t ipsrc_addr, in_addr_t ipdst_addr, const uint8_t* hwsrc_addr, const uint8_t* hwdst_addr) -{ - p_arph->m_hwtype = htons(HWTYPE_IB); - p_arph->m_proto = htons(ETH_P_IP); - p_arph->m_hwlen = IPOIB_HW_ADDR_LEN; - p_arph->m_protolen = IPv4_ALEN; - p_arph->m_opcode = htons(ARP_REQUEST); - IBADDR_COPY(p_arph->m_shwaddr, hwsrc_addr); - p_arph->m_sipaddr = ipsrc_addr; - if(hwdst_addr) - IBADDR_COPY(p_arph->m_dhwaddr, hwdst_addr); - p_arph->m_dipaddr = ipdst_addr; -} diff --git a/src/vma/proto/arp.h b/src/vma/proto/arp.h index 627853705..d81c1377d 100644 --- a/src/vma/proto/arp.h +++ b/src/vma/proto/arp.h @@ -27,20 +27,4 @@ struct __attribute__ ((packed)) eth_arp_hdr void set_eth_arp_hdr(eth_arp_hdr* p_arph, in_addr_t ipsrc_addr, in_addr_t ipdst_addr, const uint8_t* hwsrc_addr, const uint8_t* hwdst_addr); -struct __attribute__ ((packed)) ib_arp_hdr -{ - uint16_t m_hwtype; - uint16_t m_proto; - uint8_t m_hwlen; - uint8_t m_protolen; - uint16_t m_opcode; - uint8_t m_shwaddr[IPOIB_HW_ADDR_LEN]; - uint32_t m_sipaddr; - uint8_t m_dhwaddr[IPOIB_HW_ADDR_LEN]; - uint32_t m_dipaddr; -}; - -void set_ib_arp_hdr(ib_arp_hdr* p_arph, in_addr_t ipsrc_addr, in_addr_t ipdst_addr, const uint8_t* hwsrc_addr, const uint8_t* hwdst_addr); - - #endif diff --git a/src/vma/proto/dst_entry.cpp b/src/vma/proto/dst_entry.cpp index 49490a5f9..60dd1e8a2 100644 --- a/src/vma/proto/dst_entry.cpp +++ b/src/vma/proto/dst_entry.cpp @@ -154,7 +154,7 @@ bool dst_entry::update_net_dev_val() if (m_p_net_dev_val) { // more resource clean and alloc... - ret_val = alloc_transport_dep_res(); + ret_val = alloc_neigh_val(); } else { dst_logdbg("Netdev is not offloaded fallback to OS"); @@ -385,76 +385,13 @@ bool dst_entry::conf_l2_hdr_and_snd_wqe_eth() return ret_val; } - -bool dst_entry::conf_l2_hdr_and_snd_wqe_ib() -{ - bool ret_val = false; - neigh_ib_val *neigh_ib = dynamic_cast(m_p_neigh_val); - - BULLSEYE_EXCLUDE_BLOCK_START - if (!neigh_ib) { - dst_logerr("Dynamic cast to neigh_ib failed, can't build proper ibv_send_wqe: header"); - BULLSEYE_EXCLUDE_BLOCK_END - } - else { - uint32_t qpn = neigh_ib->get_qpn(); - uint32_t qkey = neigh_ib->get_qkey(); - struct ibv_ah *ah = (struct ibv_ah *)neigh_ib->get_ah(); - - //Maybe we after invalidation so we free the wqe_handler since we are going to build it from scratch - if (m_p_send_wqe_handler) { - delete m_p_send_wqe_handler; - m_p_send_wqe_handler = NULL; - } - m_p_send_wqe_handler = new wqe_send_ib_handler(); - - BULLSEYE_EXCLUDE_BLOCK_START - if (!m_p_send_wqe_handler) { - dst_logpanic("%s Failed to allocate send WQE handler", to_str().c_str()); - } - BULLSEYE_EXCLUDE_BLOCK_END - ((wqe_send_ib_handler *)(m_p_send_wqe_handler))->init_inline_ib_wqe(m_inline_send_wqe, get_sge_lst_4_inline_send(), get_inline_sge_num(), ah, qpn, qkey); - ((wqe_send_ib_handler*)(m_p_send_wqe_handler))->init_not_inline_ib_wqe(m_not_inline_send_wqe, get_sge_lst_4_not_inline_send(), 1, ah, qpn, qkey); - ((wqe_send_ib_handler*)(m_p_send_wqe_handler))->init_ib_wqe(m_fragmented_send_wqe, get_sge_lst_4_not_inline_send(), 1, ah, qpn, qkey); - m_header.configure_ipoib_headers(); - init_sge(); - - ret_val = true; - } - return ret_val; -} - bool dst_entry::conf_hdrs_and_snd_wqe() { - transport_type_t tranposrt = VMA_TRANSPORT_IB; - bool ret_val = true; - dst_logdbg("dst_entry %s configuring the header template", to_str().c_str()); configure_ip_header(&m_header); - if (m_p_net_dev_val) { - tranposrt = m_p_net_dev_val->get_transport_type(); - } - - switch (tranposrt) { - case VMA_TRANSPORT_ETH: - ret_val = conf_l2_hdr_and_snd_wqe_eth(); - break; - case VMA_TRANSPORT_IB: - default: - ret_val = conf_l2_hdr_and_snd_wqe_ib(); - break; - } - return ret_val; -} - -//Implementation of pure virtual function of neigh_observer -transport_type_t dst_entry::get_obs_transport_type() const -{ - if(m_p_net_dev_val) - return(m_p_net_dev_val->get_transport_type()); - return VMA_TRANSPORT_UNKNOWN; + return conf_l2_hdr_and_snd_wqe_eth();; } bool dst_entry::offloaded_according_to_rules() @@ -502,11 +439,7 @@ bool dst_entry::prepare_to_send(struct vma_rate_limit_t &rate_limit, bool skip_r is_ofloaded = true; modify_ratelimit(rate_limit); if (resolve_neigh()) { - if (get_obs_transport_type() == VMA_TRANSPORT_ETH) { - dst_logdbg("local mac: %s peer mac: %s", m_p_net_dev_val->get_l2_address()->to_str().c_str(), m_p_neigh_val->get_l2_address()->to_str().c_str()); - } else { - dst_logdbg("peer L2 address: %s", m_p_neigh_val->get_l2_address()->to_str().c_str()); - } + dst_logdbg("local mac: %s peer mac: %s", m_p_net_dev_val->get_l2_address()->to_str().c_str(), m_p_neigh_val->get_l2_address()->to_str().c_str()); configure_headers(); m_id = m_p_ring->generate_id(m_p_net_dev_val->get_l2_address()->get_address(), m_p_neigh_val->get_l2_address()->get_address(), @@ -685,33 +618,15 @@ ssize_t dst_entry::pass_buff_to_neigh(const iovec * p_iov, size_t sz_iov, uint16 return ret_val; } -bool dst_entry::alloc_transport_dep_res() +bool dst_entry::alloc_neigh_val() { - return alloc_neigh_val(get_obs_transport_type()); -} - -bool dst_entry::alloc_neigh_val(transport_type_t tranport) -{ - bool ret_val = false; - if (m_p_neigh_val) { delete m_p_neigh_val; m_p_neigh_val = NULL; } - switch (tranport) { - case VMA_TRANSPORT_IB: - m_p_neigh_val = new neigh_ib_val; - break; - case VMA_TRANSPORT_ETH: - default: - m_p_neigh_val = new neigh_eth_val; - break; - } - if (m_p_neigh_val) { - ret_val = true; - } - return ret_val; + m_p_neigh_val = new neigh_eth_val; + return (!!m_p_neigh_val); } void dst_entry::return_buffers_pool() diff --git a/src/vma/proto/dst_entry.h b/src/vma/proto/dst_entry.h index 2a889d28d..ea562b9df 100644 --- a/src/vma/proto/dst_entry.h +++ b/src/vma/proto/dst_entry.h @@ -22,7 +22,6 @@ #include "vma/dev/net_device_val.h" #include "vma/dev/net_device_table_mgr.h" #include "vma/dev/wqe_send_handler.h" -#include "vma/dev/wqe_send_ib_handler.h" #include "vma/dev/ring.h" #include "vma/dev/ring_allocation_logic.h" #include "vma/infra/sender.h" @@ -41,7 +40,7 @@ typedef struct { uint16_t mss; } vma_send_attr; -class dst_entry : public cache_observer, public tostr, public neigh_observer +class dst_entry : public cache_observer, public tostr { public: @@ -74,8 +73,6 @@ class dst_entry : public cache_observer, public tostr, public neigh_observer int modify_ratelimit(struct vma_rate_limit_t &rate_limit); bool update_ring_alloc_logic(int fd, lock_base & socket_lock, resource_allocation_key & ring_alloc_logic); - virtual transport_type_t get_obs_transport_type() const; - void return_buffers_pool(); int get_route_mtu(); inline void set_ip_ttl(uint8_t ttl) { m_header.set_ip_ttl(ttl); } @@ -145,11 +142,8 @@ class dst_entry : public cache_observer, public tostr, public neigh_observer virtual void configure_headers() { conf_hdrs_and_snd_wqe();}; bool conf_hdrs_and_snd_wqe(); virtual bool conf_l2_hdr_and_snd_wqe_eth(); - virtual bool conf_l2_hdr_and_snd_wqe_ib(); virtual void init_sge() {}; - bool alloc_transport_dep_res(); - bool alloc_neigh_val(transport_type_t tranport); - + bool alloc_neigh_val(); void do_ring_migration(lock_base& socket_lock, resource_allocation_key &old_key); inline void set_tx_buff_list_pending(bool is_pending = true) {m_b_tx_mem_buf_desc_list_pending = is_pending;} int get_priority_by_tc_class(uint32_t tc_clas); diff --git a/src/vma/proto/dst_entry_udp_mc.cpp b/src/vma/proto/dst_entry_udp_mc.cpp index 316123686..d9f7cd9ce 100644 --- a/src/vma/proto/dst_entry_udp_mc.cpp +++ b/src/vma/proto/dst_entry_udp_mc.cpp @@ -33,30 +33,6 @@ dst_entry_udp_mc::~dst_entry_udp_mc() dst_udp_mc_logdbg("%s", to_str().c_str()); } -//The following function supposed to be called under m_lock -bool dst_entry_udp_mc::conf_l2_hdr_and_snd_wqe_ib() -{ - bool ret_val = false; - - dst_udp_mc_logfunc("%s", to_str().c_str()); - - ret_val = dst_entry_udp::conf_l2_hdr_and_snd_wqe_ib(); - - if (ret_val && !m_b_mc_loopback_enabled && m_p_send_wqe_handler) { - wqe_send_ib_handler *wqe_ib = dynamic_cast(m_p_send_wqe_handler); - if (wqe_ib) { - //Since checksum fails when packet contains an immediate header we don't enable an immediate header - //So MC loopback disable is NOT SUPPORTED! - //wqe_ib->enable_imm_data(m_inline_send_wqe); - //wqe_ib->enable_imm_data(m_not_inline_send_wqe); - } - else { - ret_val = false; - } - } - return ret_val; -} - void dst_entry_udp_mc::set_src_addr() { m_pkt_src_ip = INADDR_ANY; @@ -89,7 +65,7 @@ bool dst_entry_udp_mc::resolve_net_dev(bool is_connect) if (m_p_net_dev_entry) { m_p_net_dev_entry->get_val(m_p_net_dev_val); if (m_p_net_dev_val) { - ret_val = alloc_transport_dep_res(); + ret_val = alloc_neigh_val(); } else { dst_udp_mc_logdbg("Valid netdev value not found"); diff --git a/src/vma/proto/dst_entry_udp_mc.h b/src/vma/proto/dst_entry_udp_mc.h index 590d513c7..4a47b9b0b 100644 --- a/src/vma/proto/dst_entry_udp_mc.h +++ b/src/vma/proto/dst_entry_udp_mc.h @@ -18,8 +18,6 @@ class dst_entry_udp_mc : public dst_entry_udp resource_allocation_key &ring_alloc_logic); virtual ~dst_entry_udp_mc(); - virtual bool conf_l2_hdr_and_snd_wqe_ib(); - protected: ip_address m_mc_tx_if_ip; bool m_b_mc_loopback_enabled; diff --git a/src/vma/proto/header.cpp b/src/vma/proto/header.cpp index d6c090dab..85a4ff2a1 100644 --- a/src/vma/proto/header.cpp +++ b/src/vma/proto/header.cpp @@ -21,8 +21,7 @@ void header::init() header::header() : m_actual_hdr_addr(0), m_transport_header_tx_offset(0), - m_is_vlan_enabled(false), - m_transport_type(VMA_TRANSPORT_UNKNOWN) + m_is_vlan_enabled(false) { init(); } @@ -36,7 +35,6 @@ header::header(const header &h): tostr() m_aligned_l2_l3_len = h.m_aligned_l2_l3_len; m_transport_header_tx_offset = h.m_transport_header_tx_offset; m_is_vlan_enabled = h.m_is_vlan_enabled; - m_transport_type = h.m_transport_type; update_actual_hdr_addr(); } @@ -85,16 +83,6 @@ void header::configure_ip_header(uint8_t protocol, in_addr_t src_addr, in_addr_t m_total_hdr_len += m_ip_header_len; } -void header::configure_ipoib_headers(uint32_t ipoib_header /*=IPOIB_HEADER*/) -{ - ib_hdr_template_t *p_hdr = &m_header.hdr.m_l2_hdr.ib_hdr; - m_transport_header_tx_offset = sizeof(p_hdr->m_alignment); - m_transport_header_len = sizeof(p_hdr->m_ipoib_hdr); - m_total_hdr_len += m_transport_header_len; - p_hdr->m_ipoib_hdr.ipoib_header = htonl(ipoib_header); - update_actual_hdr_addr(); -} - void header::set_mac_to_eth_header(const L2_address &src, const L2_address &dst, ethhdr ð_header) { // copy source and destination mac address to eth header diff --git a/src/vma/proto/header.h b/src/vma/proto/header.h index 7f8808676..b819b9c4f 100644 --- a/src/vma/proto/header.h +++ b/src/vma/proto/header.h @@ -25,17 +25,10 @@ // We align the frame so IP header will be 4 bytes align // And we align the L2 headers so IP header on both transport // types will be at the same offset from buffer start -#define NET_IB_IP_ALIGN_SZ 16 #define NET_ETH_IP_ALIGN_SZ 6 #define NET_ETH_VLAN_IP_ALIGN_SZ 2 #define NET_ETH_VLAN_PCP_OFFSET 13 -struct __attribute__ ((packed)) ib_hdr_template_t { // Offeset Size - char m_alignment[NET_IB_IP_ALIGN_SZ]; // 0 16 = 16 - ipoibhdr m_ipoib_hdr; // 16 4 = 20 -// iphdr m_ip_hdr; // 20 20 = 40 -}; - struct __attribute__ ((packed)) eth_hdr_template_t { // Offeset Size char m_alignment[NET_ETH_IP_ALIGN_SZ]; // 0 6 = 6 ethhdr m_eth_hdr; // 6 14 = 20 @@ -50,7 +43,6 @@ struct __attribute__ ((packed)) vlan_eth_hdr_template_t { // Offeset Size }; union l2_hdr_template_t { - ib_hdr_template_t ib_hdr; eth_hdr_template_t eth_hdr; vlan_eth_hdr_template_t vlan_eth_hdr; }; @@ -82,7 +74,6 @@ class header: public tostr void configure_udp_header(uint16_t dest_port, uint16_t src_port); void configure_tcp_ports(uint16_t dest_port, uint16_t src_port); void configure_ip_header(uint8_t protocol, in_addr_t src_addr, in_addr_t dest_addr, uint8_t ttl = 64, uint8_t tos = 0, uint16_t packet_id = 0); - void configure_ipoib_headers(uint32_t ipoib_header = IPOIB_HEADER); void set_mac_to_eth_header(const L2_address &src, const L2_address &dst, ethhdr ð_header); void set_ip_ttl(uint8_t ttl); void set_ip_tos(uint8_t tos); @@ -132,7 +123,6 @@ class header: public tostr uint16_t m_aligned_l2_l3_len; uint16_t m_transport_header_tx_offset; bool m_is_vlan_enabled; - transport_type_t m_transport_type; }; #endif /* HEADER_H */ diff --git a/src/vma/proto/igmp_handler.cpp b/src/vma/proto/igmp_handler.cpp deleted file mode 100644 index f9ee67d6c..000000000 --- a/src/vma/proto/igmp_handler.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - - -#include -#include - -#include "utils/bullseye.h" -#include "vlogger/vlogger.h" -#include "vma/proto/neighbour_table_mgr.h" -#include "vma/dev/wqe_send_handler.h" -#include "vma/dev/wqe_send_ib_handler.h" -#include "vma/util/utils.h" -#include "igmp_handler.h" - - - -#define MODULE_NAME "igmp_hdlr" -#undef MODULE_HDR_INFO -#define MODULE_HDR_INFO MODULE_NAME "[%s]:%d:%s() " - -#undef __INFO__ -#define __INFO__ this->to_str().c_str() - -#define igmp_hdlr_logpanic __log_info_panic -#define igmp_hdlr_logerr __log_info_err -#define igmp_hdlr_logwarn __log_info_warn -#define igmp_hdlr_loginfo __log_info_info -#define igmp_hdlr_logdbg __log_info_dbg -#define igmp_hdlr_logfunc __log_info_func -#define igmp_hdlr_logfuncall __log_info_funcall - -#define IGMPV1_MAX_RESPONSE_TIME 100 - -igmp_handler::igmp_handler(const igmp_key &key, uint8_t igmp_code) : m_mc_addr (key.get_in_addr()), m_p_ndvl(key.get_net_device_val()), - m_ignore_timer(false), m_timer_handle(NULL), m_p_neigh_entry(NULL), m_p_neigh_val(NULL), - m_p_ring(NULL), m_igmp_code(igmp_code ? igmp_code : IGMPV1_MAX_RESPONSE_TIME), m_id(0) -{ - ring_alloc_logic_attr ring_attr(safe_mce_sys().ring_allocation_logic_tx); - m_ring_allocation_logic = ring_allocation_logic_tx(m_p_ndvl->get_local_addr(), ring_attr, this); - - memset(&m_sge, 0, sizeof(m_sge)); - memset(&m_p_send_igmp_wqe, 0, sizeof(m_p_send_igmp_wqe)); -} - -igmp_handler::~igmp_handler() -{ - if (m_p_neigh_entry) { - g_p_neigh_table_mgr->unregister_observer(igmp_key(m_mc_addr, m_p_ndvl),this); - m_p_neigh_entry = NULL; - } - - if (m_p_ring) { - m_p_ndvl->release_ring(m_ring_allocation_logic.get_key()); - m_p_ring = NULL; - } - - if (m_p_neigh_val) { - delete m_p_neigh_val; - m_p_neigh_val = NULL; - } -} - -bool igmp_handler::init(const igmp_key &key) -{ - igmp_hdlr_logfunc(""); - cache_entry_subject* p_ces = NULL; - g_p_neigh_table_mgr->register_observer(key, this, &p_ces); - m_p_neigh_entry = dynamic_cast(p_ces); - - BULLSEYE_EXCLUDE_BLOCK_START - if (!m_p_neigh_entry) { - igmp_hdlr_logerr("Dynamic casting to neigh_entry has failed"); - return false; - } - - m_p_neigh_val = new neigh_ib_val; - if (!m_p_neigh_val) { - igmp_hdlr_logerr("Failed allocating neigh_val"); - return false; - } - - m_p_ring = m_p_ndvl->reserve_ring(m_ring_allocation_logic.get_key()); - if (!m_p_ring) { - igmp_hdlr_logerr("Ring was not reserved"); - return false; - } - m_id = m_p_ring->generate_id(); - BULLSEYE_EXCLUDE_BLOCK_END - - return true; -} - - // will register timer and later do 'tx_igmp_report(mc_group, ndvl)' -void igmp_handler::handle_query(uint8_t igmp_code) -{ - igmp_hdlr_logdbg("Received igmp query, preparing to send report"); - - m_igmp_code = igmp_code ? igmp_code : IGMPV1_MAX_RESPONSE_TIME; - - m_ignore_timer = false; - - priv_register_timer_event(this, ONE_SHOT_TIMER, (void*)IGMP_TIMER_ID); -} - -void igmp_handler::priv_register_timer_event(timer_handler* handler, timer_req_type_t req_type, void* user_data) -{ - int duration = 0 ; - srand(time(NULL)); - /* coverity[dont_call] */ - duration = (rand() % (m_igmp_code * 100)); // igmp_code (1-255) is in 1/10 sec units - - lock(); - if (!m_timer_handle && g_p_event_handler_manager) { - igmp_hdlr_logdbg("Register timer (%d msec) for sending igmp report after seen an igmp query for this group", duration); - m_timer_handle = g_p_event_handler_manager->register_timer_event(duration, handler, req_type, user_data); - } - unlock(); -} - -void igmp_handler::handle_report() -{ - igmp_hdlr_logdbg("Ignoring self timer (%p) after seen an igmp report for this group", m_timer_handle); - m_ignore_timer = true; // check if was not ignored before ? -} - -void igmp_handler::clean_obj() -{ - if (is_cleaned()) { - return ; - } - - set_cleaned(); - m_timer_handle = NULL; - if (g_p_event_handler_manager->is_running()) { - g_p_event_handler_manager->unregister_timers_event_and_delete(this); - } else { - cleanable_obj::clean_obj(); - } -} - -void igmp_handler::handle_timer_expired(void* user_data) -{ - NOT_IN_USE(user_data); - igmp_hdlr_logdbg("Timeout expired"); - m_timer_handle = NULL; - - if (m_ignore_timer) { - igmp_hdlr_logdbg("Ignoring timeout handling due to captured IGMP report"); - return; - } - igmp_hdlr_logdbg("Sending igmp report"); - - if (!tx_igmp_report()) { - igmp_hdlr_logdbg("Send igmp report failed, registering new timer"); - priv_register_timer_event(this, ONE_SHOT_TIMER, (void*)IGMP_TIMER_ID); - } -} - -bool igmp_handler::tx_igmp_report() -{ - - if (m_p_neigh_entry->get_peer_info(m_p_neigh_val)) { - igmp_hdlr_logdbg("neigh is valid"); - } - else { - igmp_hdlr_logdbg("neigh is not valid"); - return false; - } - - mem_buf_desc_t* p_mem_buf_desc = m_p_ring->mem_buf_tx_get(m_id, false, 1); - if (unlikely(p_mem_buf_desc == NULL)) { - igmp_hdlr_logdbg("No free TX buffer, not sending igmp report"); - return false; - } - - wqe_send_ib_handler wqe_sh; - wqe_sh.init_ib_wqe(m_p_send_igmp_wqe, &m_sge, 1, ((neigh_ib_val *)m_p_neigh_val)->get_ah(), - ((neigh_ib_val *)m_p_neigh_val)->get_qpn(), ((neigh_ib_val *)m_p_neigh_val)->get_qkey()); - m_header.init(); - m_header.configure_ipoib_headers(); - size_t m_total_l2_hdr_len = m_header.m_total_hdr_len; - m_header.configure_ip_header(IPPROTO_IGMP, m_p_ndvl->get_local_addr(), m_mc_addr.get_in_addr(),/*ttl for IGMP*/1); - m_header.copy_l2_ip_hdr((tx_packet_template_t*)p_mem_buf_desc->p_buffer); - - // Override IP header with IGMPV2 specific info - ip_igmp_tx_hdr_template_t* p_ip_pkt = (ip_igmp_tx_hdr_template_t*)(p_mem_buf_desc->p_buffer + m_header.m_transport_header_tx_offset + m_total_l2_hdr_len); - set_ip_igmp_hdr(p_ip_pkt); - - m_sge.addr = (uintptr_t)(p_mem_buf_desc->p_buffer + (uint8_t)m_header.m_transport_header_tx_offset); - m_sge.length = m_header.m_total_hdr_len + sizeof(uint32_t /*m_ip_hdr_ext*/) + sizeof (igmphdr /*m_igmp_hdr*/); - m_sge.lkey = p_mem_buf_desc->lkey; - p_mem_buf_desc->p_next_desc = NULL; - m_p_send_igmp_wqe.wr_id = (uintptr_t)p_mem_buf_desc; - - igmp_hdlr_logdbg("Sending igmp report"); - m_p_ring->send_ring_buffer(m_id, &m_p_send_igmp_wqe, (vma_wr_tx_packet_attr)0); - return true; -} - -void igmp_handler::set_ip_igmp_hdr(ip_igmp_tx_hdr_template_t* ip_igmp_hdr) -{ - ip_igmp_hdr->m_ip_hdr.ihl = IPV4_IGMP_HDR_LEN_WORDS; - ip_igmp_hdr->m_ip_hdr.tot_len = htons(IPV4_IGMP_HDR_LEN + sizeof(igmphdr)); - ip_igmp_hdr->m_ip_hdr_ext = htonl(IGMP_IP_HEADER_EXT); - ip_igmp_hdr->m_ip_hdr.check = 0; - ip_igmp_hdr->m_ip_hdr.check = compute_ip_checksum((unsigned short*)&ip_igmp_hdr->m_ip_hdr, (IPV4_IGMP_HDR_LEN_WORDS) * 2); - - // Create the IGMP header - ip_igmp_hdr->m_igmp_hdr.type = IGMPV2_HOST_MEMBERSHIP_REPORT; - ip_igmp_hdr->m_igmp_hdr.code = 0; - ip_igmp_hdr->m_igmp_hdr.group = m_mc_addr.get_in_addr(); - ip_igmp_hdr->m_igmp_hdr.csum = 0; - ip_igmp_hdr->m_igmp_hdr.csum = compute_ip_checksum((unsigned short*)&ip_igmp_hdr->m_igmp_hdr, IGMP_HDR_LEN_WORDS * 2); -} diff --git a/src/vma/proto/igmp_handler.h b/src/vma/proto/igmp_handler.h deleted file mode 100644 index 021b9081a..000000000 --- a/src/vma/proto/igmp_handler.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - - -#include "vma/proto/neighbour.h" -#include "vma/event/event_handler_manager.h" -#include "vma/event/timer_handler.h" -#include - - -#ifndef IGMP_HANDLER_H_ -#define IGMP_HANDLER_H_ - -#define igmp_key neigh_key - -#define IGMP_TIMER_ID 0 - -struct __attribute__ ((packed, aligned)) ip_igmp_tx_hdr_template_t { - iphdr m_ip_hdr; - uint32_t m_ip_hdr_ext; - igmphdr m_igmp_hdr; -}; - -#define IGMP_IP_HEADER_EXT 0x94040000 // IP header options field: Router alert - -class igmp_handler : public timer_handler, public lock_mutex, public cleanable_obj, public cache_observer, public neigh_observer -{ -public: - igmp_handler(const igmp_key &key, uint8_t igmp_code); - bool init(const igmp_key &key); - ~igmp_handler(); - - const std::string to_str() const - { - return(m_mc_addr.to_str() + " " + m_p_ndvl->to_str()); - } - - virtual transport_type_t get_obs_transport_type() const - { - return m_p_ndvl->get_transport_type(); - } - - void handle_query(uint8_t igmp_code); // handle queries coming from router - void handle_report(); // handle reports coming from other hosts - - virtual void clean_obj(); -private: - - ip_address m_mc_addr; - net_device_val* m_p_ndvl; - ring_allocation_logic_tx m_ring_allocation_logic; - bool m_ignore_timer; - void* m_timer_handle; - neigh_entry* m_p_neigh_entry; - neigh_val* m_p_neigh_val; - ring* m_p_ring; - header m_header; - ibv_sge m_sge; - vma_ibv_send_wr m_p_send_igmp_wqe; - uint8_t m_igmp_code; - ring_user_id_t m_id; - - void set_timer(); //called by tx_igmp_report - void unset_timer(); // called if igmp packet is report and not query - virtual void handle_timer_expired(void* user_data); - void priv_register_timer_event(timer_handler* handler, timer_req_type_t req_type, void* user_data); - bool tx_igmp_report(); - void set_ip_igmp_hdr(ip_igmp_tx_hdr_template_t* igmp_hdr); - -}; - -#endif diff --git a/src/vma/proto/igmp_mgr.cpp b/src/vma/proto/igmp_mgr.cpp deleted file mode 100644 index df2ac2809..000000000 --- a/src/vma/proto/igmp_mgr.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - - -#include -#include - -#include "utils/bullseye.h" -#include "vlogger/vlogger.h" -#include "igmp_mgr.h" -#include "vma/dev/net_device_table_mgr.h" -#include "vma/dev/net_device_val.h" - - - -#define MODULE_NAME "igmp_mgr" -#undef MODULE_HDR_INFO -#define MODULE_HDR_INFO MODULE_NAME "[%s]:%d:%s() " - -#undef __INFO__ -#define __INFO__ "" - -#define igmp_mgr_logpanic __log_info_panic -#define igmp_mgr_logerr __log_info_err -#define igmp_mgr_logwarn __log_info_warn -#define igmp_mgr_loginfo __log_info_info -#define igmp_mgr_logdbg __log_info_dbg -#define igmp_mgr_logfunc __log_info_func -#define igmp_mgr_logfuncall __log_info_funcall - - -igmp_mgr *g_p_igmp_mgr = NULL; - -igmp_mgr::~igmp_mgr() -{ - igmp_handler* p_igmp_hdlr = NULL; - igmp_hdlr_map_t::iterator iter = m_igmp_hash.begin(); - while (iter != m_igmp_hash.end()) { - p_igmp_hdlr = iter->second; - igmp_mgr_logdbg("Delete existing igmp handler '%s'", p_igmp_hdlr->to_str().c_str()); - m_igmp_hash.erase(iter); - p_igmp_hdlr->clean_obj(); - // coverity[assigned_pointer:FALSE] /* NULL assign for already freed object */ - p_igmp_hdlr = NULL; - iter = m_igmp_hash.begin(); - } -} - -void igmp_mgr::process_igmp_packet(struct iphdr* p_ip_h, in_addr_t local_if) -{ - igmp_mgr_logfunc(""); - igmp_handler* p_igmp_hdlr = NULL; - uint16_t ip_h_hdr_len = (int)(p_ip_h->ihl)*4; - struct igmphdr* p_igmp_h = (struct igmphdr*)(((uint8_t*)p_ip_h) + ip_h_hdr_len); - - net_device_val* p_ndvl = g_p_net_device_table_mgr->get_net_device_val(local_if); - BULLSEYE_EXCLUDE_BLOCK_START - if (!p_ndvl){ - igmp_mgr_logerr("Failed getting relevant net device"); - return; - } - BULLSEYE_EXCLUDE_BLOCK_END - - igmp_key key(ip_address(p_igmp_h->group), p_ndvl); - p_igmp_hdlr = get_igmp_handler(key, p_igmp_h->code); - BULLSEYE_EXCLUDE_BLOCK_START - if (!p_igmp_hdlr){ - igmp_mgr_logerr("Failed getting relevant igmp_handler"); - return; - } - BULLSEYE_EXCLUDE_BLOCK_END - - switch (p_igmp_h->type) { - case IGMP_HOST_MEMBERSHIP_QUERY: - p_igmp_hdlr->handle_query(p_igmp_h->code); - break; - - case IGMP_HOST_MEMBERSHIP_REPORT: - case IGMPV2_HOST_MEMBERSHIP_REPORT: - p_igmp_hdlr->handle_report(); - break; - - default: - break; - } -} - -igmp_handler* igmp_mgr::get_igmp_handler(const igmp_key &key, uint8_t igmp_code) -{ - igmp_handler *p_igmp_hdlr = NULL; - - lock(); - igmp_hdlr_map_t::iterator iter = m_igmp_hash.find(key); - if (iter != m_igmp_hash.end()) { - p_igmp_hdlr = iter->second; - igmp_mgr_logdbg("Found existing igmp handler '%s'", p_igmp_hdlr->to_str().c_str()); - } - else { - p_igmp_hdlr = new igmp_handler(key, igmp_code); - BULLSEYE_EXCLUDE_BLOCK_START - if (!p_igmp_hdlr) { - igmp_mgr_logerr("Failed allocating new igmp handler for mc_address = %d.%d.%d.%d, local_if= %d.%d.%d.%d", - NIPQUAD(key.get_in_addr()), NIPQUAD(key.get_net_device_val()->get_local_addr())); - unlock(); - return p_igmp_hdlr; - } - if (!p_igmp_hdlr->init(key)) { - igmp_mgr_logerr("Failed to initialize new igmp handler '%s'", p_igmp_hdlr->to_str().c_str()); - delete(p_igmp_hdlr); - p_igmp_hdlr = NULL; - unlock(); - return p_igmp_hdlr; - } - BULLSEYE_EXCLUDE_BLOCK_END - m_igmp_hash[key] = p_igmp_hdlr; - igmp_mgr_logdbg("Created new igmp handler '%s'", p_igmp_hdlr->to_str().c_str()); - } - unlock(); - return p_igmp_hdlr; -} - diff --git a/src/vma/proto/igmp_mgr.h b/src/vma/proto/igmp_mgr.h deleted file mode 100644 index 36ebe8ff9..000000000 --- a/src/vma/proto/igmp_mgr.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - - -#include "vma/proto/igmp_handler.h" -#include - -#ifndef IGMP_MANAGER_H -#define IGMP_MANAGER_H - - -typedef std::unordered_map igmp_hdlr_map_t; - -class igmp_mgr : public lock_mutex -{ -public: - igmp_mgr() {}; - ~igmp_mgr(); - void process_igmp_packet(struct iphdr* p_ip_h, in_addr_t local_if); - -private: - igmp_hdlr_map_t m_igmp_hash; - igmp_handler* get_igmp_handler(const igmp_key &key, uint8_t igmp_code); -}; - -extern igmp_mgr *g_p_igmp_mgr; - -#endif - diff --git a/src/vma/proto/neighbour.cpp b/src/vma/proto/neighbour.cpp index 081ecd2bd..340119c57 100644 --- a/src/vma/proto/neighbour.cpp +++ b/src/vma/proto/neighbour.cpp @@ -18,7 +18,6 @@ #include "vma/proto/route_rule_table_key.h" #include "vma/proto/route_table_mgr.h" #include "vma/dev/wqe_send_handler.h" -#include "vma/dev/wqe_send_ib_handler.h" //This include should be after vma includes #include @@ -128,32 +127,12 @@ inline int neigh_eth::build_uc_neigh_val() return 0; } -neigh_val & neigh_ib_val::operator=(const neigh_val & val) -{ - IPoIB_addr* l2_addr = NULL; - neigh_val* tmp_val = const_cast(&val); - const neigh_ib_val* ib_val = dynamic_cast(tmp_val); - BULLSEYE_EXCLUDE_BLOCK_START - if (ib_val == NULL) { - __log_panic("neigh_ib_val is NULL"); - } - BULLSEYE_EXCLUDE_BLOCK_END - m_l2_address = new IPoIB_addr((ib_val->get_l2_address())->get_address()); - l2_addr = (IPoIB_addr *)m_l2_address; //no need to do dynamic casting here - m_ah = ib_val->get_ah(); //TODO: we need to handle this - in case ah is used in post_send we cannot destroy it - m_qkey = ib_val->get_qkey(); - l2_addr->set_qpn(ib_val->get_qpn()); - m_ah_attr = ib_val->get_ah_attr(); - return *this; -} - -neigh_entry::neigh_entry(neigh_key key, transport_type_t _type, bool is_init_resources): +neigh_entry::neigh_entry(neigh_key key, bool is_init_resources): cache_entry_subject(key), m_cma_id(NULL), m_rdma_port_space((enum rdma_port_space)0), m_state_machine(NULL), m_type(UNKNOWN), - m_trans_type(_type), m_state(false), m_err_counter(0), m_timer_handle(NULL), @@ -161,7 +140,7 @@ neigh_entry::neigh_entry(neigh_key key, transport_type_t _type, bool is_init_res m_p_dev(key.get_net_device_val()), m_p_ring(NULL), m_is_loopback(false), - m_to_str(std::string(priv_vma_transport_type_str(m_trans_type)) + ":" + get_key().to_str()), m_id(0), + m_to_str(get_key().to_str()), m_id(0), m_is_first_send_arp(true), m_n_sysvar_neigh_wait_till_send_arp_msec(safe_mce_sys().neigh_wait_till_send_arp_msec), m_n_sysvar_neigh_uc_arp_quata(safe_mce_sys().neigh_uc_arp_quata), m_n_sysvar_neigh_num_err_retries(safe_mce_sys().neigh_num_err_retries) @@ -332,7 +311,7 @@ void neigh_entry::handle_timer_expired(void* ctx) if(!priv_is_failed(state)) { //We want to verify that L2 address wasn't changed - unsigned char tmp[IPOIB_HW_ADDR_LEN]; + unsigned char tmp[ETH_ALEN]; address_t l2_addr = (address_t)tmp; if(!priv_get_neigh_l2(l2_addr)) { return; @@ -761,8 +740,6 @@ const char* neigh_entry::event_to_str(event_t event) const return "EV_PATH_RESOLVED"; case EV_ERROR: return "EV_ERROR"; - case EV_TIMEOUT_EXPIRED: - return "EV_TIMEOUT_EXPIRED"; case EV_UNHANDLED: return "EV_UNHANDELED"; BULLSEYE_EXCLUDE_BLOCK_START @@ -1229,7 +1206,7 @@ void neigh_entry::priv_unregister_timer() //============================================================== neigh_eth ================================================== neigh_eth::neigh_eth(neigh_key key) : - neigh_entry(key, VMA_TRANSPORT_ETH) + neigh_entry(key) { neigh_logdbg(""); m_rdma_port_space = RDMA_PS_UDP; @@ -1525,628 +1502,3 @@ ring_user_id_t neigh_eth::generate_ring_user_id(header * h /* = NULL */) h->m_header.hdr.m_udp_hdr.source, h->m_header.hdr.m_udp_hdr.dest); } - -//============================================================== neigh_ib ================================================== - -neigh_ib::neigh_ib(neigh_key key, bool is_init_resources) : - neigh_entry(key, VMA_TRANSPORT_IB, is_init_resources), m_pd(NULL), m_n_sysvar_wait_after_join_msec(safe_mce_sys().wait_after_join_msec) -{ - neigh_logdbg(""); - - m_rdma_port_space = RDMA_PS_IPOIB; - - if(IS_BROADCAST_N(key.get_in_addr())) { - //In case of broadcast neigh we don't want to have state machine - m_type = MC; - return; - } - - if (IN_MULTICAST_N(key.get_in_addr())) { - //This is Multicast neigh - m_type = MC; - } - else { - // This is Unicast neigh - m_type = UC; - } - //Do we need to handle case when we get EV_ERROR but in case this error is not related to the state - //Like Address Resolve Error when we at ST_ARP_RESOLVED or ST_PATH_RESOLVED .... - - sm_short_table_line_t short_sm_table[] = - { - // {curr state, event, next state, action func } - { ST_NOT_ACTIVE, EV_KICK_START, ST_INIT, NULL }, - { ST_ERROR, EV_KICK_START, ST_INIT, NULL }, - { ST_INIT, EV_START_RESOLUTION, ST_INIT_RESOLUTION, NULL }, - { ST_INIT_RESOLUTION, EV_ADDR_RESOLVED, ST_ARP_RESOLVED, NULL }, - { ST_ARP_RESOLVED, EV_PATH_RESOLVED, ST_PATH_RESOLVED, NULL }, - { ST_PATH_RESOLVED, EV_TIMEOUT_EXPIRED, ST_READY, NULL }, - { ST_PATH_RESOLVED, EV_ERROR, ST_ERROR, NULL }, - { ST_ARP_RESOLVED, EV_ERROR, ST_ERROR, NULL }, - { ST_READY, EV_ERROR, ST_ERROR, NULL }, - { ST_INIT, EV_ERROR, ST_ERROR, NULL }, - { ST_INIT_RESOLUTION, EV_ERROR, ST_ERROR, NULL }, - { ST_ERROR, EV_ERROR, ST_NOT_ACTIVE, NULL }, - //Entry functions - { ST_INIT, SM_STATE_ENTRY, SM_NO_ST, neigh_entry::dofunc_enter_init }, - { ST_INIT_RESOLUTION, SM_STATE_ENTRY, SM_NO_ST, neigh_entry::dofunc_enter_init_resolution }, - { ST_ARP_RESOLVED, SM_STATE_ENTRY, SM_NO_ST, neigh_ib::dofunc_enter_arp_resolved }, - { ST_PATH_RESOLVED, SM_STATE_ENTRY, SM_NO_ST, neigh_ib::dofunc_enter_path_resolved }, - { ST_READY, SM_STATE_ENTRY, SM_NO_ST, neigh_entry::dofunc_enter_ready }, - { ST_NOT_ACTIVE, SM_STATE_ENTRY, SM_NO_ST, neigh_entry::dofunc_enter_not_active }, - { ST_ERROR, SM_STATE_ENTRY, SM_NO_ST, neigh_entry::dofunc_enter_error }, - SM_TABLE_END }; - - // Create state_nachine - m_state_machine = new state_machine(this, // app hndl - ST_NOT_ACTIVE, // start state_t - ST_LAST, // max states - EV_LAST, // max events - short_sm_table, // short table - general_st_entry, // default entry function - general_st_leave, // default leave function - NULL, // default func - print_event_info // debug function - ); - - BULLSEYE_EXCLUDE_BLOCK_START - if (m_state_machine == NULL) - neigh_logpanic("Failed allocating state_machine"); - BULLSEYE_EXCLUDE_BLOCK_END - - priv_kick_start_sm(); -} - -neigh_ib::~neigh_ib() -{ - priv_enter_not_active(); -} - -void neigh_ib::handle_event_ibverbs_cb(void* ev_data, void* ctx) -{ - NOT_IN_USE(ctx); - event_t event = ibverbs_event_mapping(ev_data); - event_handler(event, ev_data); -} - -// called when timer expired -void neigh_ib::handle_timer_expired(void* ctx) -{ - neigh_logdbg("general timeout expired!"); - - m_sm_lock.lock(); - int state = m_state_machine->get_curr_state(); - m_sm_lock.unlock(); - - if(state == ST_PATH_RESOLVED) { - // Clear Timer Handler - m_timer_handle = NULL; - event_handler(EV_TIMEOUT_EXPIRED); - } - else if(state == ST_READY) { - neigh_entry::handle_timer_expired(ctx); - } - else if(state == ST_INIT) { - // Clear Timer Handler - m_timer_handle = NULL; - event_handler(EV_START_RESOLUTION); - } -} - -bool neigh_ib::priv_handle_neigh_is_l2_changed(address_t new_l2_addr) -{ - auto_unlocker lock(m_lock); - IPoIB_addr new_l2_address(new_l2_addr); - if (m_val) { - if(m_val->get_l2_address()) { - if (!(m_val->get_l2_address()->compare(new_l2_address))) { - neigh_logdbg("l2 address was changed (%s => %s)", (m_val->get_l2_address())->to_str().c_str(), new_l2_address.to_str().c_str()); - event_handler(EV_ERROR); - return true; - } - else { - neigh_logdbg("No change in l2 address"); - return false; - } - } - else { - neigh_logdbg("l2 address is NULL\n"); - } - } - else { - neigh_logerr("m_val is NULL"); - } - - event_handler(EV_ERROR); - return true; -} - -bool neigh_ib::post_send_arp(bool is_broadcast) -{ - neigh_logdbg("Sending %s ARP", is_broadcast?"BC":"UC"); - - mem_buf_desc_t* p_mem_buf_desc = m_p_ring->mem_buf_tx_get(m_id, false, 1); - if (unlikely(p_mem_buf_desc == NULL)) { - neigh_logdbg("No free TX buffer, not sending ARP"); - return false; - } - - net_device_val_ib *netdevice_ib = dynamic_cast(m_p_dev); - if (netdevice_ib == NULL) { - m_p_ring->mem_buf_tx_release(p_mem_buf_desc, true); - neigh_logdbg("Net dev is NULL not sending ARP"); - return false; - } - - const L2_address *src = netdevice_ib->get_l2_address(); - const L2_address *dst; - neigh_ib_val br_neigh_val; - ibv_ah* ah = NULL; - uint32_t qpn; - uint32_t qkey; - const unsigned char* peer_mac = NULL; - if (!is_broadcast) { - dst = m_val->get_l2_address(); - peer_mac = dst->get_address(); - ah = ((neigh_ib_val *)m_val)->get_ah(); - qpn = ((neigh_ib_val *)m_val)->get_qpn(); - qkey = ((neigh_ib_val *)m_val)->get_qkey(); - } - else { - dst = m_p_dev->get_br_address(); - neigh_ib_broadcast * br_neigh = const_cast(((net_device_val_ib*)m_p_dev)->get_br_neigh()); - bool ret = br_neigh->get_peer_info(&br_neigh_val); - if (ret) { - ah = br_neigh_val.get_ah(); - qpn = br_neigh_val.get_qpn(); - qkey = br_neigh_val.get_qkey(); - } - else { - m_p_ring->mem_buf_tx_release(p_mem_buf_desc, true); - neigh_logdbg("BR Neigh is not valid, not sending BR ARP"); - return false; - } - } - - if (src == NULL || dst == NULL) { - m_p_ring->mem_buf_tx_release(p_mem_buf_desc, true); - neigh_logdbg("src or dst is NULL not sending ARP"); - return false; - } - - wqe_send_ib_handler wqe_sh; - wqe_sh.init_ib_wqe(m_send_wqe, &m_sge, 1, ah, qpn, qkey); - neigh_logdbg("ARP: ah=%p, qkey=%#x, qpn=%#x", ah ,qkey, qpn); - header h; - h.init(); - h.configure_ipoib_headers(IPOIB_ARP_HEADER); - - - tx_packet_template_t *p_pkt = (tx_packet_template_t*)p_mem_buf_desc->p_buffer; - h.copy_l2_hdr(p_pkt); - - ib_arp_hdr* p_arphdr = (ib_arp_hdr*) (p_mem_buf_desc->p_buffer + h.m_transport_header_tx_offset + h.m_total_hdr_len); - set_ib_arp_hdr(p_arphdr, m_p_dev->get_local_addr(), get_key().get_in_addr(), m_p_dev->get_l2_address()->get_address(), peer_mac); - - m_sge.addr = (uintptr_t)(p_mem_buf_desc->p_buffer + (uint8_t)h.m_transport_header_tx_offset); - m_sge.length = sizeof(ib_arp_hdr) + h.m_total_hdr_len; - m_sge.lkey = p_mem_buf_desc->lkey; - p_mem_buf_desc->p_next_desc = NULL; - m_send_wqe.wr_id = (uintptr_t)p_mem_buf_desc; - - m_p_ring->send_ring_buffer(m_id, &m_send_wqe, (vma_wr_tx_packet_attr)0); - - neigh_logdbg("ARP Sent"); - return true; -} - -bool neigh_ib::prepare_to_send_packet(header * h) -{ - neigh_logdbg(""); - wqe_send_ib_handler wqe_sh; - wqe_sh.init_ib_wqe(m_send_wqe, &m_sge , 1, ((neigh_ib_val *)m_val)->get_ah(), ((neigh_ib_val *)m_val)->get_qpn(), ((neigh_ib_val *)m_val)->get_qkey()); - h->configure_ipoib_headers(); - - return true; -} - -neigh_entry::event_t neigh_ib::ibverbs_event_mapping(void* p_event_info) -{ - struct ibv_async_event *ev = (struct ibv_async_event *) p_event_info; - neigh_logdbg("Got event %s (%d) ", priv_ibv_event_desc_str(ev->event_type), ev->event_type); - - switch (ev->event_type) - { - case IBV_EVENT_SM_CHANGE: - case IBV_EVENT_CLIENT_REREGISTER: - return EV_ERROR; - default: - return EV_UNHANDLED; - } -} - -void neigh_ib::dofunc_enter_arp_resolved(const sm_info_t& func_info) -{ - neigh_ib * my_neigh = (neigh_ib *) func_info.app_hndl; - neigh_entry::general_st_entry(func_info); - - run_helper_func(priv_enter_arp_resolved(), EV_ERROR); -} - -void neigh_ib::dofunc_enter_path_resolved(const sm_info_t& func_info) -{ - neigh_ib * my_neigh = (neigh_ib *) func_info.app_hndl; - neigh_entry::general_st_entry(func_info); - - uint32_t wait_after_join_msec; - - run_helper_func(priv_enter_path_resolved((struct rdma_cm_event*)func_info.ev_data, wait_after_join_msec), - EV_ERROR); - my_neigh->m_timer_handle = my_neigh->priv_register_timer_event(wait_after_join_msec, my_neigh, ONE_SHOT_TIMER, NULL); -} - -int neigh_ib::priv_enter_arp_resolved() -{ - neigh_logfunc(""); - - if (m_cma_id->verbs == NULL) { - neigh_logdbg("m_cma_id->verbs is NULL"); - return -1; - } - - if (find_pd()) - return -1; - - //Register Verbs event in case there was Fabric change - if (m_cma_id->verbs) { - g_p_event_handler_manager->register_ibverbs_event( - m_cma_id->verbs->async_fd, this, - m_cma_id->verbs, 0); - } - - if (m_type == UC) - return (handle_enter_arp_resolved_uc()); - else - // MC - return (handle_enter_arp_resolved_mc()); -} - -int neigh_ib::priv_enter_path_resolved(struct rdma_cm_event* event_data, - uint32_t & wait_after_join_msec) -{ - neigh_logfunc(""); - - if (m_val == NULL) - //This is the first time we are trying to allocate new val or it failed last time - m_val = new neigh_ib_val; - - BULLSEYE_EXCLUDE_BLOCK_START - if (m_val == NULL) - return -1; - BULLSEYE_EXCLUDE_BLOCK_END - - if (m_type == UC) - return (build_uc_neigh_val(event_data, wait_after_join_msec)); - else - //MC - return (build_mc_neigh_val(event_data, wait_after_join_msec)); -} - -void neigh_ib::priv_enter_error() -{ - auto_unlocker lock(m_lock); - - m_state = false; - m_pd = NULL; - - destroy_ah(); - priv_unregister_timer(); - - if (m_cma_id && m_cma_id->verbs) { - neigh_logdbg("Unregister Verbs event"); - g_p_event_handler_manager->unregister_ibverbs_event(m_cma_id->verbs->async_fd, this); - } - - neigh_entry::priv_enter_error(); -} - -void neigh_ib::priv_enter_not_active() -{ - neigh_logfunc(""); - - auto_unlocker lock(m_lock); - - m_state = false; - m_pd = NULL; - - destroy_ah(); - - if (m_cma_id && m_cma_id->verbs) { - neigh_logdbg("Unregister Verbs event"); - g_p_event_handler_manager->unregister_ibverbs_event(m_cma_id->verbs->async_fd, this); - } - - neigh_entry::priv_enter_not_active(); -} - -int neigh_ib::priv_enter_ready() -{ - neigh_logfunc(""); - priv_unregister_timer(); - return (neigh_entry::priv_enter_ready()); -} - -int neigh_ib::handle_enter_arp_resolved_mc() -{ - neigh_logdbg(""); - - IF_RDMACM_FAILURE(rdma_join_multicast( m_cma_id, (struct sockaddr*)&m_dst_addr, (void *)this)) - { - neigh_logdbg("Failed in rdma_join_multicast (errno=%d %m)", errno); - return -1; - } ENDIF_RDMACM_FAILURE; - - return 0; -} - -int neigh_ib::handle_enter_arp_resolved_uc() -{ - neigh_logdbg(""); - - IF_RDMACM_FAILURE(rdma_resolve_route(m_cma_id, RDMA_CM_TIMEOUT)) - { - neigh_logdbg("Resolve address error (errno=%d %m)", errno); - return -1; - } ENDIF_RDMACM_FAILURE; - - return 0; -} - -int neigh_ib::build_mc_neigh_val(struct rdma_cm_event* event_data, - uint32_t & wait_after_join_msec) -{ - neigh_logdbg(""); - - m_val->m_l2_address = new IPoIB_addr(event_data->param.ud.qp_num, (address_t)event_data->param.ud.ah_attr.grh.dgid.raw); - BULLSEYE_EXCLUDE_BLOCK_START - if (m_val->m_l2_address == NULL) { - neigh_logdbg("Failed allocating m_val->m_l2_address"); - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - - ((neigh_ib_val *) m_val)->m_qkey = event_data->param.ud.qkey; - - memcpy(&((neigh_ib_val *) m_val)->m_ah_attr, - &event_data->param.ud.ah_attr, - sizeof(((neigh_ib_val *) m_val)->m_ah_attr)); - - BULLSEYE_EXCLUDE_BLOCK_START - if (create_ah()) - return -1; - BULLSEYE_EXCLUDE_BLOCK_END - - neigh_logdbg("IB multicast neigh params are : ah=%p, qkey=%#x, sl=%#x, rate=%#x, port_num = %#x, qpn=%#x dlid=%#x dgid = " IPOIB_HW_ADDR_PRINT_FMT_16, - ((neigh_ib_val *) m_val)->m_ah, ((neigh_ib_val *) m_val)->m_qkey, ((neigh_ib_val *) m_val)->m_ah_attr.sl, ((neigh_ib_val *) m_val)->m_ah_attr.static_rate, - ((neigh_ib_val *) m_val)->m_ah_attr.port_num, ((neigh_ib_val *) m_val)->get_qpn(), ((neigh_ib_val *) m_val)->m_ah_attr.dlid, - IPOIB_HW_ADDR_PRINT_ADDR_16(((neigh_ib_val *) m_val)->m_ah_attr.grh.dgid.raw)); - - wait_after_join_msec = m_n_sysvar_wait_after_join_msec; - - return 0; -} - -int neigh_ib::build_uc_neigh_val(struct rdma_cm_event* event_data, - uint32_t & wait_after_join_msec) -{ - NOT_IN_USE(event_data); - neigh_logdbg(""); - - // Find peer's IPoIB row address - unsigned char tmp[IPOIB_HW_ADDR_LEN]; - address_t address = (address_t) tmp; - BULLSEYE_EXCLUDE_BLOCK_START - if (!priv_get_neigh_l2(address)) { - neigh_logdbg("Failed in priv_get_neigh_l2()"); - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - - m_val->m_l2_address = new IPoIB_addr(address); - BULLSEYE_EXCLUDE_BLOCK_START - if (m_val->m_l2_address == NULL) { - neigh_logdbg("Failed creating m_val->m_l2_address"); - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - neigh_logdbg("IPoIB MAC = %s", m_val->m_l2_address->to_str().c_str()); - // IPoIB qkey is hard coded in SM . Do we want to take it from event or leave it hard coded - //((neigh_ib_val *) m_val)->m_qkey = event_data->param.ud.qkey; //0x0b1b; - ((neigh_ib_val *) m_val)->m_qkey = IPOIB_QKEY; - - //memcpy(&m_val.ib_addr.m_ah_attr, &event_data->param.ud.ah_attr, sizeof(struct ibv_ah_attr)); - - if (!m_cma_id || m_cma_id->route.num_paths <= 0) { - neigh_logdbg("Can't prepare AH attr (cma_id=%p, num_paths=%d)", m_cma_id, m_cma_id ? m_cma_id->route.num_paths : 0); - return -1; - } - - memset(&((neigh_ib_val *) m_val)->m_ah_attr, 0, sizeof(((neigh_ib_val *) m_val)->m_ah_attr)); - ((neigh_ib_val *) m_val)->m_ah_attr.dlid = ntohs(m_cma_id->route.path_rec->dlid); - ((neigh_ib_val *) m_val)->m_ah_attr.sl = m_cma_id->route.path_rec->sl; - ((neigh_ib_val *) m_val)->m_ah_attr.src_path_bits = 0; - ((neigh_ib_val *) m_val)->m_ah_attr.static_rate = m_cma_id->route.path_rec->rate; - ((neigh_ib_val *) m_val)->m_ah_attr.is_global = 0; - ((neigh_ib_val *) m_val)->m_ah_attr.port_num = m_cma_id->port_num; - - BULLSEYE_EXCLUDE_BLOCK_START - if (create_ah()) - return -1; - BULLSEYE_EXCLUDE_BLOCK_END - - neigh_logdbg("IB unicast neigh params ah=%p, qkey=%#x, qpn=%#x, dlid=%#x", ((neigh_ib_val *) m_val)->m_ah, - ((neigh_ib_val *) m_val)->m_qkey, ((neigh_ib_val *) m_val)->get_qpn(), ((neigh_ib_val *) m_val)->m_ah_attr.dlid); - - wait_after_join_msec = 0; - - return 0; -} - -int neigh_ib::find_pd() -{ - neigh_logdbg(""); - - ib_ctx_handler* ib_ctx_h = g_p_ib_ctx_handler_collection->get_ib_ctx(m_p_dev->get_ifname_link()); - - if (ib_ctx_h) { - m_pd = ib_ctx_h->get_ibv_pd(); - return 0; - } - - return -1; -} - -int neigh_ib::create_ah() -{ - neigh_logdbg(""); - - /* if (((neigh_ib_val *) m_val)->m_ah) { - // if there's ah we want to destroy it - shouldn't happen - neigh_logerr("destroy ah %p (shouldn't happen)", ((neigh_ib_val *) m_val)->m_ah); - if (destroy_ah()) - return -1; - } - */ - ((neigh_ib_val *) m_val)->m_ah = ibv_create_ah(m_pd, &((neigh_ib_val *) m_val)->m_ah_attr); - BULLSEYE_EXCLUDE_BLOCK_START - if (!((neigh_ib_val *) m_val)->m_ah) { - neigh_logdbg("failed creating address handler (errno=%d %m)", errno); - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - return 0; -} - -int neigh_ib::destroy_ah() -{ - neigh_logdbg(""); - //For now we whouldn't destroy it - //We cannot destroy ah till each post_send with this ah has ended - //TODO: Need to think how to handle this - for now there will be ah leak - return 0; -#if 0 //unreachable code -#ifndef __COVERITY__ - if (m_val && ((neigh_ib_val *) m_val)->m_ah) { - IF_VERBS_FAILURE(ibv_destroy_ah(((neigh_ib_val *) m_val)->m_ah)) - { - neigh_logdbg("failed destroying address handle (errno=%d %m)", errno); - return -1; - }ENDIF_VERBS_FAILURE; - } - return 0; -#endif -#endif -} - -//================================================================================================================== - -neigh_ib_broadcast::neigh_ib_broadcast(neigh_key key) : neigh_ib(key, false) -{ - neigh_logdbg("Calling rdma_create_id"); - IF_RDMACM_FAILURE(rdma_create_id(g_p_neigh_table_mgr->m_neigh_cma_event_channel, &m_cma_id, (void *)this, m_rdma_port_space)) - { - neigh_logerr("Failed in rdma_create_id (errno=%d %m)", errno); - return; - } ENDIF_RDMACM_FAILURE; - - - neigh_logdbg("Calling rdma_bind_addr"); - struct sockaddr_in local_sockaddr; - local_sockaddr.sin_family = AF_INET; - local_sockaddr.sin_port = INPORT_ANY; - local_sockaddr.sin_addr.s_addr = m_p_dev->get_local_addr(); - - IF_RDMACM_FAILURE(rdma_bind_addr(m_cma_id, (struct sockaddr*)&local_sockaddr)) { - neigh_logerr("Failed in rdma_bind_addr (src=%d.%d.%d.%d) (errno=%d %m)", NIPQUAD(m_p_dev->get_local_addr()), errno); - return; - } ENDIF_RDMACM_FAILURE; - - build_mc_neigh_val(); - - m_state = true; -} - -void neigh_ib_broadcast::build_mc_neigh_val() -{ - m_val = new neigh_ib_val; - if(m_val == NULL) { - neigh_logerr("Failed allocating m_val"); - return; - } - - if (m_cma_id->verbs == NULL) { - neigh_logdbg("m_cma_id->verbs is NULL"); - return; - } - - m_val->m_l2_address = new IPoIB_addr(((m_p_dev->get_br_address())->get_address())); - if (m_val->m_l2_address == NULL) { - neigh_logerr("Failed allocating m_val->m_l2_address"); - return; - } - - ((neigh_ib_val *) m_val)->m_qkey = IPOIB_QKEY; - - memset(&((neigh_ib_val *) m_val)->m_ah_attr, 0, sizeof(((neigh_ib_val *) m_val)->m_ah_attr)); - memcpy( ((neigh_ib_val *) m_val)->m_ah_attr.grh.dgid.raw , &((m_val->m_l2_address->get_address())[4]), 16*sizeof(char)); - - ((neigh_ib_val *) m_val)->m_ah_attr.dlid = 0xc000; - ((neigh_ib_val *) m_val)->m_ah_attr.static_rate = 0x3; - ((neigh_ib_val *) m_val)->m_ah_attr.port_num = m_cma_id->port_num; - ((neigh_ib_val *) m_val)->m_ah_attr.is_global = 0x1; - - if(find_pd()) { - neigh_logerr("Failed find_pd()"); - return; - } - - if (create_ah()) - return; - - neigh_logdbg("IB broadcast neigh params are : ah=%p, qkey=%#x, sl=%#x, rate=%#x, port_num = %#x, qpn=%#x, dlid=%#x dgid = " IPOIB_HW_ADDR_PRINT_FMT_16, - ((neigh_ib_val *) m_val)->m_ah, ((neigh_ib_val *) m_val)->m_qkey, ((neigh_ib_val *) m_val)->m_ah_attr.sl, - ((neigh_ib_val *) m_val)->m_ah_attr.static_rate,((neigh_ib_val *) m_val)->m_ah_attr.port_num, - ((neigh_ib_val *) m_val)->get_qpn(), ((neigh_ib_val *) m_val)->m_ah_attr.dlid, IPOIB_HW_ADDR_PRINT_ADDR_16(((neigh_ib_val *) m_val)->m_ah_attr.grh.dgid.raw) ); - - -} - -bool neigh_ib_broadcast::get_peer_info(neigh_val * p_val) -{ - neigh_logfunc("calling neigh_entry get_peer_info. state = %d", m_state); - if (p_val == NULL) { - neigh_logdbg("p_val is NULL, return false"); - return false; - } - - auto_unlocker lock(m_lock); - if (m_state) { - neigh_logdbg("There is a valid val"); - *p_val = *m_val; - return m_state; - } - - return false; -} - -int neigh_ib_broadcast::send(neigh_send_info &s_info) -{ - NOT_IN_USE(s_info); - neigh_logerr("We should not call for this function, something is wrong"); - return false; -} - -void neigh_ib_broadcast::send_arp() -{ - neigh_logerr("We should not call for this function, something is wrong"); -} diff --git a/src/vma/proto/neighbour.h b/src/vma/proto/neighbour.h index 5a8350d21..8a0490977 100644 --- a/src/vma/proto/neighbour.h +++ b/src/vma/proto/neighbour.h @@ -76,7 +76,7 @@ class hash class neigh_val : public tostr { public: - neigh_val(): m_trans_type(VMA_TRANSPORT_UNKNOWN), m_l2_address(NULL){}; + neigh_val(): m_l2_address(NULL){}; virtual ~neigh_val(){}; virtual void zero_all_members() @@ -90,17 +90,13 @@ class neigh_val : public tostr { if (this != &val) { m_l2_address = val.m_l2_address; - m_trans_type = val.m_trans_type; } return *this; } protected: - friend class neigh_entry; - friend class neigh_ib; - friend class neigh_eth; - friend class neigh_ib_broadcast; - transport_type_t m_trans_type; + friend class neigh_entry; + friend class neigh_eth; L2_address* m_l2_address; }; @@ -109,7 +105,6 @@ class neigh_eth_val : public neigh_val public: neigh_eth_val() { - m_trans_type = VMA_TRANSPORT_ETH; zero_all_members(); } @@ -119,42 +114,7 @@ class neigh_eth_val : public neigh_val } private: - friend class neigh_eth; -}; - -class neigh_ib_val : public neigh_val -{ -public: - neigh_ib_val() : m_ah(NULL) { zero_all_members(); }; - - ibv_ah* get_ah()const { return m_ah; }; - ibv_ah_attr get_ah_attr() const { return m_ah_attr; }; - uint32_t get_qkey() const { return m_qkey; }; - uint32_t get_qpn() const - { - if (m_l2_address) - return(((IPoIB_addr *) m_l2_address)->get_qpn()); - else - return 0; - } - - neigh_val & operator=(const neigh_val & val); - -private: - friend class neigh_ib; - friend class neigh_ib_broadcast; - - ibv_ah_attr m_ah_attr; - ibv_ah* m_ah; - uint32_t m_qkey; - - void zero_all_members() - { - memset(&m_ah_attr, 0, sizeof(m_ah_attr)); - //m_ah = NULL; - m_qkey = 0; - neigh_val::zero_all_members(); - } + friend class neigh_eth; }; /* neigh_entry inherits from cache_entry_subject where @@ -194,14 +154,13 @@ class neigh_entry : public cache_entry_subject, public e EV_ADDR_RESOLVED, EV_PATH_RESOLVED, EV_ERROR, - EV_TIMEOUT_EXPIRED, // For IB MC join EV_UNHANDLED, EV_LAST }; friend class neighbour_table_mgr; - neigh_entry (neigh_key key, transport_type_t type, bool is_init_resources = true); + neigh_entry (neigh_key key, bool is_init_resources = true); virtual ~neigh_entry(); //Overwrite cach_entry virtual function @@ -243,7 +202,6 @@ class neigh_entry : public cache_entry_subject, public e enum rdma_port_space m_rdma_port_space; state_machine* m_state_machine; type m_type; // UC / MC - transport_type_t m_trans_type; bool m_state; unsent_queue_t m_unsent_queue; //Counter to sign that KickStart was already generated in ERROR_ST @@ -308,63 +266,6 @@ class neigh_entry : public cache_entry_subject, public e bool post_send_tcp(neigh_send_data *n_send_data); }; -class neigh_ib : public neigh_entry, public event_handler_ibverbs -{ -public: - friend class neighbour_table_mgr; - neigh_ib(neigh_key key, bool is_init_resources = true); - ~neigh_ib(); - - static void dofunc_enter_arp_resolved(const sm_info_t& func_info); - static void dofunc_enter_path_resolved(const sm_info_t& func_info); - -protected: - ibv_pd* m_pd; - - int find_pd(); - int create_ah(); - int destroy_ah(); - virtual int build_mc_neigh_val(struct rdma_cm_event* event_data, uint32_t & wait_after_join_msec); - -private: - - //Implementation of pure virtual functions - void handle_event_ibverbs_cb(void* ev_data, void* ctx); - void handle_timer_expired(void* user_data); - - // Overriding neigh_entry priv_enter_not_active - void priv_enter_not_active(); - void priv_enter_error(); - int priv_enter_arp_resolved(); - int priv_enter_path_resolved(struct rdma_cm_event* event_data, uint32_t & wait_after_join_msec); - virtual bool priv_handle_neigh_is_l2_changed(address_t); - // Overriding neigh_entry priv_enter_ready - int priv_enter_ready(); - - int handle_enter_arp_resolved_uc(); - int handle_enter_arp_resolved_mc(); - int build_uc_neigh_val(struct rdma_cm_event* event_data, uint32_t & wait_after_join_msec); - - event_t ibverbs_event_mapping(void* p_event_info); - virtual bool post_send_arp(bool); - virtual bool prepare_to_send_packet(header *); - - const uint32_t m_n_sysvar_wait_after_join_msec; -}; - -class neigh_ib_broadcast : public neigh_ib -{ -public: - neigh_ib_broadcast(neigh_key key); - virtual int send(neigh_send_info & s_info); - virtual bool get_peer_info(neigh_val * p_val); - virtual bool is_deletable() { return false; }; - -private: - void build_mc_neigh_val(); - virtual void send_arp(); -}; - class neigh_eth : public neigh_entry { public: diff --git a/src/vma/proto/neighbour_observer.h b/src/vma/proto/neighbour_observer.h deleted file mode 100644 index 07703bf35..000000000 --- a/src/vma/proto/neighbour_observer.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES - * Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause - */ - - - -#ifndef NEIGHBOUR_OBSERVER_H -#define NEIGHBOUR_OBSERVER_H - -#include "vma/util/sys_vars.h" -#include "vma/infra/subject_observer.h" - -class neigh_observer : public observer -{ -public: - virtual transport_type_t get_obs_transport_type() const = 0; -}; - -#endif /* NEIGHBOUR_OBSERVER_H */ diff --git a/src/vma/proto/neighbour_table_mgr.cpp b/src/vma/proto/neighbour_table_mgr.cpp index d196aec25..015a47cda 100644 --- a/src/vma/proto/neighbour_table_mgr.cpp +++ b/src/vma/proto/neighbour_table_mgr.cpp @@ -12,8 +12,6 @@ #include "vma/netlink/netlink_wrapper.h" #include "vma/event/netlink_event.h" #include "vma/proto/neighbour_table_mgr.h" - -#include "vma/proto/neighbour_observer.h" #include "vma/dev/net_device_table_mgr.h" #define MODULE_NAME "ntm:" @@ -68,35 +66,18 @@ bool neigh_table_mgr::register_observer(neigh_key key, neigh_entry* neigh_table_mgr::create_new_entry(neigh_key neigh_key, const observer* new_observer) { - observer * tmp = const_cast(new_observer); - const neigh_observer * dst = dynamic_cast(tmp) ; + observer *tmp = const_cast(new_observer); + const observer *dst = dynamic_cast(tmp) ; BULLSEYE_EXCLUDE_BLOCK_START - if (dst == NULL) { + if (!dst) { //TODO: Need to add handling of this case - neigh_mgr_logpanic("dynamic_casr failed, new_observer type is not neigh_observer"); + neigh_mgr_logpanic("dynamic_casr failed, new_observer type is not observer"); } BULLSEYE_EXCLUDE_BLOCK_END - - transport_type_t transport = dst->get_obs_transport_type(); - - if (transport == VMA_TRANSPORT_IB) { - if(IS_BROADCAST_N(neigh_key.get_in_addr())){ - neigh_mgr_logdbg("Creating new neigh_ib_broadcast"); - return (new neigh_ib_broadcast(neigh_key)); - } - neigh_mgr_logdbg("Creating new neigh_ib"); - return (new neigh_ib(neigh_key)); - } - else if (transport == VMA_TRANSPORT_ETH) { - neigh_mgr_logdbg("Creating new neigh_eth"); - return (new neigh_eth(neigh_key)); - } - else { - neigh_mgr_logdbg("Cannot create new entry, transport type is UNKNOWN"); - return NULL; - } + neigh_mgr_logdbg("Creating new neigh_eth"); + return (new neigh_eth(neigh_key)); } void neigh_table_mgr::notify_cb(event *ev) diff --git a/src/vma/proto/route_table_mgr.cpp b/src/vma/proto/route_table_mgr.cpp index 772672402..5e22fc75e 100644 --- a/src/vma/proto/route_table_mgr.cpp +++ b/src/vma/proto/route_table_mgr.cpp @@ -391,14 +391,7 @@ void route_table_mgr::update_entry(INOUT route_entry* p_ent, bool b_register_to_ rt_mgr_logdbg("Disabling Offload for route_entry '%s' - this is BC address", p_ent->to_str().c_str()); // Need to route traffic to/from OS // Prevent registering of net_device to route entry - } - // Check if: Local loopback over Ethernet case which was not supported before OFED 2.1 - /*else if (p_ndv && (p_ndv->get_transport_type() == VMA_TRANSPORT_ETH) && (peer_ip == src_addr)) { - rt_mgr_logdbg("Disabling Offload for route_entry '%s' - this is an Ethernet unicast loopback route", p_ent->to_str().c_str()); - // Need to route traffic to/from OS - // Prevent registering of net_device to route entry - }*/ - else { + } else { // register to net device for bonding events p_ent->register_to_net_device(); } diff --git a/src/vma/sock/sockinfo.h b/src/vma/sock/sockinfo.h index f83402130..e39f1619d 100644 --- a/src/vma/sock/sockinfo.h +++ b/src/vma/sock/sockinfo.h @@ -345,24 +345,6 @@ class sockinfo : public socket_fd_api, public pkt_rcvr_sink, public pkt_sndr_sou socket_fd_api::notify_epoll_context((uint32_t)events); } - // This function validates the ipoib's properties - // Input params: - // 1. IF name (can be alias) - // 2. IF flags - // 3. general path to ipoib property file (for example: /sys/class/net/%s/mtu) - // 4. the expected value of the property - // 5. size of the property - // Output params: - // 1. property sysfs filename - // 2. physical IF name (stripped alias) - // Return Value - // Type: INT - // Val: -1 Reading from the sys file failed - // 1 Reading succeeded but the actual prop value != expected - // 0 Reading succeeded and acutal ptop value == expected one - //TODO need to copy this function from util - //int validate_ipoib_prop(char* ifname, unsigned int ifflags, const char param_file[], const char *val, int size, char *filename, char * base_ifname); - inline void fetch_peer_info(sockaddr_in *p_peer_addr, sockaddr_in *__from, socklen_t *__fromlen) { *__from = *p_peer_addr; diff --git a/src/vma/util/sys_vars.cpp b/src/vma/util/sys_vars.cpp index f59327ca3..c2e3e8b88 100644 --- a/src/vma/util/sys_vars.cpp +++ b/src/vma/util/sys_vars.cpp @@ -34,8 +34,6 @@ #include "vma/proto/vma_lwip.h" #include "vma/proto/route_table_mgr.h" #include "vma/proto/rule_table_mgr.h" -#include "vma/proto/igmp_mgr.h" - #include "vma/proto/neighbour_table_mgr.h" #include "vma/netlink/netlink_wrapper.h" #include "vma/event/command.h" @@ -574,7 +572,6 @@ void mce_sys_var::get_env_params() thread_mode = MCE_DEFAULT_THREAD_MODE; buffer_batching_mode = MCE_DEFAULT_BUFFER_BATCHING_MODE; mem_alloc_type = MCE_DEFAULT_MEM_ALLOC_TYPE; - enable_ipoib = MCE_DEFAULT_IPOIB_FLAG; enable_socketxtreme = MCE_DEFAULT_SOCKETXTREME; #ifdef DEFINED_TSO enable_tso = MCE_DEFAULT_TSO; @@ -1215,9 +1212,6 @@ void mce_sys_var::get_env_params() if ((env_ptr = getenv(SYS_VAR_FORK)) != NULL) handle_fork = atoi(env_ptr) ? true : false; - if((env_ptr = getenv(SYS_VAR_IPOIB )) != NULL) - enable_ipoib = atoi(env_ptr) ? true : false; - #ifdef DEFINED_TSO if((env_ptr = getenv(SYS_VAR_TSO)) != NULL) enable_tso = atoi(env_ptr) ? true : false; diff --git a/src/vma/util/sys_vars.h b/src/vma/util/sys_vars.h index 1814533e1..2202a4aed 100644 --- a/src/vma/util/sys_vars.h +++ b/src/vma/util/sys_vars.h @@ -100,26 +100,6 @@ typedef enum { BUFFER_BATCHING_LAST, } buffer_batching_mode_t; -// See ibv_transport_type for general verbs transport types -typedef enum { - VMA_TRANSPORT_UNKNOWN = -1, - VMA_TRANSPORT_IB = 0, - VMA_TRANSPORT_ETH -} transport_type_t; - -static inline const char* priv_vma_transport_type_str(transport_type_t transport_type) -{ - BULLSEYE_EXCLUDE_BLOCK_START - switch (transport_type) { - case VMA_TRANSPORT_IB: return "IB"; - case VMA_TRANSPORT_ETH: return "ETH"; - case VMA_TRANSPORT_UNKNOWN: - default: break; - } - return "UNKNOWN"; - BULLSEYE_EXCLUDE_BLOCK_END -} - typedef enum { MSS_FOLLOW_MTU = 0 } mss_mode_t; @@ -371,8 +351,6 @@ struct mce_sys_var { cpu_set_t internal_thread_affinity; bool internal_thread_arm_cq_enabled; bool handle_bf; - - bool enable_ipoib; bool enable_socketxtreme; #ifdef DEFINED_TSO bool enable_tso; @@ -522,9 +500,7 @@ extern mce_sys_var & safe_mce_sys(); #define SYS_VAR_SPEC "VMA_SPEC" #define SYS_VAR_SPEC_PARAM1 "VMA_SPEC_PARAM1" #define SYS_VAR_SPEC_PARAM2 "VMA_SPEC_PARAM2" - -#define SYS_VAR_IPOIB "VMA_IPOIB" -#define SYS_VAR_SOCKETXTREME "VMA_SOCKETXTREME" +#define SYS_VAR_SOCKETXTREME "VMA_SOCKETXTREME" #ifdef DEFINED_TSO #define SYS_VAR_TSO "VMA_TSO" #endif /* DEFINED_TSO */ @@ -662,14 +638,13 @@ extern mce_sys_var & safe_mce_sys(); #define MCE_MAX_NUM_SGE (32) #define MCE_MIN_RX_NUM_POLLS (-1) #define MCE_MAX_RX_NUM_POLLS (100000000) -#define MCE_MIN_RX_PREFETCH_BYTES (32) /* Just enough for headers (IPoIB+IP+UDP)*/ +#define MCE_MIN_RX_PREFETCH_BYTES (32) #define MCE_MAX_RX_PREFETCH_BYTES (2044) #define MCE_RX_CQ_DRAIN_RATE_DISABLED (0) #define MCE_CQ_DRAIN_INTERVAL_DISABLED (0) #define MCE_CQ_ADAPTIVE_MODERATION_DISABLED (0) #define MCE_MIN_CQ_POLL_BATCH (1) #define MCE_MAX_CQ_POLL_BATCH (128) -#define MCE_DEFAULT_IPOIB_FLAG (1) #define MCE_DEFAULT_SOCKETXTREME (false) #ifdef DEFINED_TSO #define MCE_DEFAULT_TSO (true) @@ -679,7 +654,7 @@ extern mce_sys_var & safe_mce_sys(); #define MCE_DEFAULT_DEFERRED_CLOSE (false) #define MCE_ALIGNMENT ((unsigned long)63) -#define RX_BUF_SIZE(mtu) ((mtu) + IPOIB_HDR_LEN + GRH_HDR_LEN) // RX buffers are larger in IB +#define RX_BUF_SIZE(mtu) ((mtu) + ETH_VLAN_HDR_LEN) #define TX_BUF_SIZE(mtu) ((mtu) + 92) // Tx buffers are larger in Ethernet (they include L2 for RAW QP) #define NUM_TX_WRE_TO_SIGNAL_MAX 64 #define NUM_RX_WRE_TO_POST_RECV_MAX 1024 @@ -688,7 +663,6 @@ extern mce_sys_var & safe_mce_sys(); #define IFTYPE_PARAM_FILE "/sys/class/net/%s/type" #define IFADDR_MTU_PARAM_FILE "/sys/class/net/%s/mtu" #define UMCAST_PARAM_FILE "/sys/class/net/%s/umcast" -#define IPOIB_MODE_PARAM_FILE "/sys/class/net/%s/mode" #define VERBS_DEVICE_PORT_PARAM_FILE "/sys/class/net/%s/dev_port" #define VERBS_DEVICE_ID_PARAM_FILE "/sys/class/net/%s/dev_id" #define BONDING_MODE_PARAM_FILE "/sys/class/net/%s/bonding/mode" diff --git a/src/vma/util/utils.cpp b/src/vma/util/utils.cpp index 4c31db0a1..88a51eea9 100644 --- a/src/vma/util/utils.cpp +++ b/src/vma/util/utils.cpp @@ -94,8 +94,8 @@ int get_base_interface_name(const char *if_name, char *base_ifname, size_t sz_ba return 0; } - unsigned char vlan_if_address[MAX_L2_ADDR_LEN]; - const size_t ADDR_LEN = get_local_ll_addr(if_name, vlan_if_address, MAX_L2_ADDR_LEN, false); + unsigned char vlan_if_address[ETH_ALEN]; + const size_t ADDR_LEN = get_local_ll_addr(if_name, vlan_if_address, ETH_ALEN, false); if (ADDR_LEN > 0) { struct ifaddrs *ifaddr, *ifa; int rc = getifaddrs(&ifaddr); @@ -125,12 +125,9 @@ int get_base_interface_name(const char *if_name, char *base_ifname, size_t sz_ba unsigned char tmp_mac[ADDR_LEN]; if (ADDR_LEN == get_local_ll_addr(ifa->ifa_name, tmp_mac, ADDR_LEN, false)) { - int size_to_compare; - if (ADDR_LEN == ETH_ALEN) size_to_compare = ETH_ALEN; - else size_to_compare = IPOIB_HW_ADDR_GID_LEN; - int offset = ADDR_LEN - size_to_compare; - if (0 == memcmp(vlan_if_address + offset, tmp_mac + offset, size_to_compare) && 0 == (ifa->ifa_flags & IFF_MASTER)) { - // A bond name cannot be a base name of an interface even if both have the same MAC(ethernet) or GID(IB) addresses + int offset = ADDR_LEN - ETH_ALEN; + if (0 == memcmp(vlan_if_address + offset, tmp_mac + offset, ETH_ALEN) && 0 == (ifa->ifa_flags & IFF_MASTER)) { + // A bond name cannot be a base name of an interface even if both have the same MAC(ethernet) snprintf(base_ifname, sz_base_ifname, "%s" ,ifa->ifa_name); freeifaddrs(ifaddr); __log_dbg("Found base_ifname %s for interface %s", base_ifname, if_name); @@ -758,19 +755,17 @@ size_t get_local_ll_addr(IN const char * ifname, OUT unsigned char* addr, IN int if (addr_len < bytes_len) return 0; // error not enough room was provided by caller BULLSEYE_EXCLUDE_BLOCK_END - if (bytes_len == IPOIB_HW_ADDR_LEN && addr_len >= IPOIB_HW_ADDR_LEN) { // addr_len >= IPOIB_HW_ADDR_LEN is just for silencing coverity - sscanf(buf, IPOIB_HW_ADDR_SSCAN_FMT, IPOIB_HW_ADDR_SSCAN(addr)); - __log_dbg("found IB %s address " IPOIB_HW_ADDR_PRINT_FMT " for interface %s", is_broadcast?"BR":"UC", IPOIB_HW_ADDR_PRINT_ADDR(addr), ifname); + if (bytes_len != ETH_ALEN) { + return 0; // Error } - else if (bytes_len == ETH_ALEN) { - sscanf(buf, ETH_HW_ADDR_SSCAN_FMT, ETH_HW_ADDR_SSCAN(addr)); - __log_dbg("found ETH %s address" ETH_HW_ADDR_PRINT_FMT " for interface %s", is_broadcast?"BR":"UC", ETH_HW_ADDR_PRINT_ADDR(addr), ifname); - } - else { - return 0; // error + + if ((g_vlogger_level >= VLOG_DEBUG) && + (0 < sscanf(buf, ETH_HW_ADDR_SSCAN_FMT, ETH_HW_ADDR_SSCAN(addr)))) { + __log_dbg("found ETH %s address %s for interface %s", + is_broadcast?"BR":"UC", ETH_HW_ADDR_PRINT_ADDR(addr), ifname); } - return bytes_len; // success + return bytes_len; // Success } bool check_bond_device_exist(const char* ifname) @@ -1052,43 +1047,6 @@ bool get_interface_oper_state(IN const char* interface_name, OUT char* curr_stat return true; } -int validate_ipoib_prop(const char* ifname, unsigned int ifflags, - const char prop_file[], const char *expected_val, - int val_size, OUT char *filename, OUT char* base_ifname) -{ - char mode[10]; - char ifname_tmp[IFNAMSIZ]; - char active_slave_name[IFNAMSIZ]; - - // In case of alias (ib0:xx) take only the device name for that interface (ib0) - strncpy(ifname_tmp, ifname, sizeof(ifname_tmp) - 1); - ifname_tmp[sizeof(ifname_tmp) - 1] = '\0'; - base_ifname = strtok(ifname_tmp, ":"); - - if (ifflags & IFF_MASTER) { - // this is a bond interface, let find the slave - BULLSEYE_EXCLUDE_BLOCK_START - if (!get_bond_active_slave_name(base_ifname, active_slave_name, IFNAMSIZ)) { - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - sprintf(filename, prop_file, active_slave_name); - } else { - sprintf(filename, prop_file, base_ifname); - } - - BULLSEYE_EXCLUDE_BLOCK_START - if (priv_read_file(filename, mode, val_size) <= 0) { - return -1; - } - BULLSEYE_EXCLUDE_BLOCK_END - if (strncmp(mode, expected_val, val_size)) { - return 1; - } else { - return 0; - } -} - #if defined(DEFINED_VERBS_VERSION) && (DEFINED_VERBS_VERSION == 2) //NOTE RAW_QP_PRIVLIGES_PARAM_FILE does not exist on upstream drivers int validate_raw_qp_privliges() diff --git a/src/vma/util/utils.h b/src/vma/util/utils.h index ee9d1c1de..3043ce13b 100644 --- a/src/vma/util/utils.h +++ b/src/vma/util/utils.h @@ -245,10 +245,6 @@ bool check_netvsc_device_exist(const char* ifname); bool get_netvsc_slave(IN const char* ifname, OUT char* slave_name, OUT unsigned int &slave_flags); bool get_interface_oper_state(IN const char* interface_name, OUT char* slaves_list, IN int sz); -int validate_ipoib_prop(const char* ifname, unsigned int ifflags, - const char prop_file[], const char *expected_val, - int val_size, char *filename, char* base_ifname); - #if defined(DEFINED_VERBS_VERSION) && (DEFINED_VERBS_VERSION == 2) int validate_raw_qp_privliges(); #endif /* DEFINED_VERBS_VERSION */ @@ -320,52 +316,6 @@ inline void create_multicast_mac_from_ip(unsigned char* mc_mac, in_addr_t ip) mc_mac[5] = (uint8_t)((ip>>24)&0xff); } -static inline void create_mgid_from_ipv4_mc_ip(uint8_t *mgid, uint16_t pkey, uint32_t ip) -{ -// +--------+----+----+-----------------+---------+-------------------+ -// | 8 | 4 | 4 | 16 bits | 16 bits | 80 bits | -// +--------+----+----+-----------------+---------+-------------------+ -// |11111111|0001|scop||< P_Key >| group ID | -// +--------+----+----+-----------------+---------+-------------------+ -// |11111111|0001|0010|01000000000011011| | group ID | -// +--------+----+----+-----------------+---------+-------------------+ - - //Fixed for multicast - mgid[0] = 0xff; - mgid[1] = 0x12; - - //IPoIB signature: 0x401b for ipv4, 0x601b for ipv6 - mgid[2] = 0x40; - mgid[3] = 0x1b; - - //P_Key - mgid[4] = (((unsigned char *)(&pkey))[0]); - /* cppcheck-suppress objectIndex */ - mgid[5] = (((unsigned char *)(&pkey))[1]); - - //group ID - relevant only for ipv4 - mgid[6] = 0x00; - mgid[7] = 0x00; - mgid[8] = 0x00; - mgid[9] = 0x00; - mgid[10] = 0x00; - mgid[11] = 0x00; - mgid[12] = (uint8_t)((ip)&0x0f); - mgid[13] = (uint8_t)((ip>>8)&0xff); - mgid[14] = (uint8_t)((ip>>16)&0xff); - mgid[15] = (uint8_t)((ip>>24)&0xff); - - vlog_printf(VLOG_DEBUG, "Translated to mgid: %02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X\n", - ((unsigned char *)(mgid))[0],((unsigned char *)(mgid))[1], - ((unsigned char *)(mgid))[2],((unsigned char *)(mgid))[3], - ((unsigned char *)(mgid))[4],((unsigned char *)(mgid))[5], - ((unsigned char *)(mgid))[6],((unsigned char *)(mgid))[7], - ((unsigned char *)(mgid))[8],((unsigned char *)(mgid))[9], - ((unsigned char *)(mgid))[10],((unsigned char *)(mgid))[11], - ((unsigned char *)(mgid))[12],((unsigned char *)(mgid))[13], - ((unsigned char *)(mgid))[14],((unsigned char *)(mgid))[15]); -} - /** * special design for the rx loop. */ diff --git a/src/vma/util/vma_stats.h b/src/vma/util/vma_stats.h index 51d49a25b..7102228e6 100644 --- a/src/vma/util/vma_stats.h +++ b/src/vma/util/vma_stats.h @@ -210,15 +210,13 @@ typedef struct { typedef enum { RING_ETH = 0, RING_ETH_DIRECT, - RING_TAP, - RING_IB + RING_TAP } ring_type_t; static const char * const ring_type_str[] = { "RING_ETH", "RING_ETH_DIRECT", - "RING_TAP", - "RING_IB" + "RING_TAP" }; // Ring stat info diff --git a/src/vma/util/vtypes.h b/src/vma/util/vtypes.h index 59b30ebfb..060510638 100644 --- a/src/vma/util/vtypes.h +++ b/src/vma/util/vtypes.h @@ -80,66 +80,16 @@ static inline uint64_t ntohll(uint64_t x) { return x; } ((unsigned char *)(__addr))[2], ((unsigned char *)(__addr))[3], \ ((unsigned char *)(__addr))[4], ((unsigned char *)(__addr))[5] - -#define IPOIB_HW_ADDR_PRINT_FMT_16 "%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X" -#define IPOIB_HW_ADDR_PRINT_ADDR_16(__addr) \ - ((unsigned char *)(__addr))[0],((unsigned char *)(__addr))[1], \ - ((unsigned char *)(__addr))[2],((unsigned char *)(__addr))[3], \ - ((unsigned char *)(__addr))[4],((unsigned char *)(__addr))[5], \ - ((unsigned char *)(__addr))[6],((unsigned char *)(__addr))[7], \ - ((unsigned char *)(__addr))[8],((unsigned char *)(__addr))[9], \ - ((unsigned char *)(__addr))[10],((unsigned char *)(__addr))[11], \ - ((unsigned char *)(__addr))[12],((unsigned char *)(__addr))[13], \ - ((unsigned char *)(__addr))[14],((unsigned char *)(__addr))[15] - -#define IPOIB_HW_ADDR_PRINT_FMT "%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X:%02X%02X" -#define IPOIB_HW_ADDR_PRINT_ADDR(__addr) \ - ((unsigned char *)(__addr))[0],((unsigned char *)(__addr))[1], \ - ((unsigned char *)(__addr))[2],((unsigned char *)(__addr))[3], \ - ((unsigned char *)(__addr))[4],((unsigned char *)(__addr))[5], \ - ((unsigned char *)(__addr))[6],((unsigned char *)(__addr))[7], \ - ((unsigned char *)(__addr))[8],((unsigned char *)(__addr))[9], \ - ((unsigned char *)(__addr))[10],((unsigned char *)(__addr))[11], \ - ((unsigned char *)(__addr))[12],((unsigned char *)(__addr))[13], \ - ((unsigned char *)(__addr))[14],((unsigned char *)(__addr))[15], \ - ((unsigned char *)(__addr))[16],((unsigned char *)(__addr))[17], \ - ((unsigned char *)(__addr))[18],((unsigned char *)(__addr))[19] - #define ETH_HW_ADDR_SSCAN_FMT "%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX" #define ETH_HW_ADDR_SSCAN(__addr) \ &(__addr[0]),&(__addr[1]), \ &(__addr[2]),&(__addr[3]), \ &(__addr[4]),&(__addr[5]) - -#define IPOIB_HW_ADDR_SSCAN_FMT "%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX:%02hhX" -#define IPOIB_HW_ADDR_SSCAN(__addr) \ - &(__addr[0]),&(__addr[1]), \ - &(__addr[2]),&(__addr[3]), \ - &(__addr[4]),&(__addr[5]), \ - &(__addr[6]),&(__addr[7]), \ - &(__addr[8]),&(__addr[9]), \ - &(__addr[10]),&(__addr[11]), \ - &(__addr[12]),&(__addr[13]), \ - &(__addr[14]),&(__addr[15]), \ - &(__addr[16]),&(__addr[17]), \ - &(__addr[18]),&(__addr[19]) - #define ETH_HDR_LEN (ETH_HLEN) #define ETH_VLAN_HDR_LEN (ETH_HDR_LEN + sizeof(struct vlanhdr)) -#define GRH_HDR_LEN (sizeof(struct ibv_grh)) -#define IPOIB_HDR_LEN (sizeof(struct ipoibhdr)) -#define IPOIB_HEADER ((uint32_t)0x08000000) -#define IPOIB_ARP_HEADER ((uint32_t)0x08060000) -#define IPOIB_HW_ADDR_LEN 20 -#define IPOIB_HW_ADDR_GID_LEN 16 -#define MAX_L2_ADDR_LEN (MAX(IPOIB_HW_ADDR_LEN, ETH_ALEN)) #define IPV4_VERSION 0x4 #define IPV4_HDR_LEN_WITHOUT_OPTIONS (sizeof(struct iphdr)) // Ip Header without any options -#define IPV4_IGMP_HDR_LEN (IPV4_HDR_LEN_WITHOUT_OPTIONS + sizeof(uint32_t)) -#define IPV4_IGMP_HDR_LEN_WORDS (IPV4_IGMP_HDR_LEN / sizeof(uint32_t)) -#define IGMP_HDR_LEN (sizeof(struct igmphdr)) -#define IGMP_HDR_LEN_WORDS (IGMP_HDR_LEN / sizeof(uint32_t)) #define DONT_FRAGMENT_FLAG 0x4000 #define MORE_FRAGMENTS_FLAG 0x2000 #define FRAGMENT_OFFSET 0x1FFF @@ -151,10 +101,6 @@ static inline uint64_t ntohll(uint64_t x) { return x; } #define BROADCAST_IP "255.255.255.255" -#ifndef ARPHRD_INFINIBAND -#define ARPHRD_INFINIBAND 32 /* InfiniBand */ -#endif - #ifndef ARPHRD_ETHER #define ARPHRD_ETHER 1 /* Ethernet 10Mbps */ #endif @@ -167,10 +113,6 @@ static inline uint64_t ntohll(uint64_t x) { return x; } #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ #endif -struct __attribute__ ((packed)) ipoibhdr { - uint32_t ipoib_header; -}; - struct __attribute__((packed)) vlanhdr { uint16_t h_vlan_TCI; /* Encapsulates priority and VLAN ID */ uint16_t h_vlan_encapsulated_proto; /* packet type ID field (or len) */