Skip to content

Commit 980e0ef

Browse files
tomerdbzNirWolfer
authored andcommitted
issue: 4409403 Fix heap corruption since c73d96a
This commit fixes a critical race condition in timer management for TCP sockets that was introduced in commit c73d96a. The heap corruption was caused by a race condition between the timer thread and socket destruction. Sockets could be deleted by the event handler thread while still being referenced by the timer thread in the timer collections, resulting in heap corruption when the timer thread attempted to access the deleted memory. In the original implementation, sockets were removed from timer collections and deleted asynchronously without proper synchronization with the timer processing thread. Fix: - Remove sockets from timer collections while still holding the socket lock, guaranteeing the timer thread cannot access sockets marked for deletion - Create a simplified deletion path that doesn't attempt to access timer collections again after socket cleanup Additionally, as an unrelated improvement, this patch fixes a lock leak in the early return path of sockinfo_tcp::clean_socket_obj() where a lock was acquired but not released when a socket was already marked as cleaned. The heap corruption stemmed from a fundamental architectural change that separated socket objects from their timer management without providing proper synchronization for the distributed socket lifecycle. Signed-off-by: Tomer Cabouly <[email protected]>
1 parent 117b174 commit 980e0ef

File tree

3 files changed

+18
-10
lines changed

3 files changed

+18
-10
lines changed

src/core/event/event_handler_manager.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ void event_handler_manager::unregister_timers_event_and_delete(timer_handler *ha
156156
post_new_reg_action(reg_action);
157157
}
158158

159-
void event_handler_manager::unregister_socket_timer_and_delete(sockinfo_tcp *sock_tcp)
159+
void event_handler_manager::unregister_socket_and_delete(sockinfo_tcp *sock_tcp)
160160
{
161-
evh_logdbg("Unregistering TCP socket timer: %p", sock_tcp);
161+
evh_logdbg("Deleting TCP socket: %p", sock_tcp);
162162
reg_action_t reg_action;
163163
memset(&reg_action, 0, sizeof(reg_action));
164-
reg_action.type = UNREGISTER_TCP_SOCKET_TIMER_AND_DELETE;
164+
reg_action.type = UNREGISTER_SOCKET_AND_DELETE;
165165
reg_action.info.timer.user_data = sock_tcp;
166166
post_new_reg_action(reg_action);
167167
}
@@ -421,8 +421,6 @@ const char *event_handler_manager::reg_action_str(event_action_type_e reg_action
421421
switch (reg_action_type) {
422422
case REGISTER_TCP_SOCKET_TIMER:
423423
return "REGISTER_TCP_SOCKET_TIMER";
424-
case UNREGISTER_TCP_SOCKET_TIMER_AND_DELETE:
425-
return "UNREGISTER_TCP_SOCKET_TIMER_AND_DELETE";
426424
case REGISTER_TIMER:
427425
return "REGISTER_TIMER";
428426
case UNREGISTER_TIMER:
@@ -441,6 +439,8 @@ const char *event_handler_manager::reg_action_str(event_action_type_e reg_action
441439
return "REGISTER_COMMAND";
442440
case UNREGISTER_COMMAND:
443441
return "UNREGISTER_COMMAND";
442+
case UNREGISTER_SOCKET_AND_DELETE:
443+
return "UNREGISTER_SOCKET_AND_DELETE";
444444
BULLSEYE_EXCLUDE_BLOCK_START
445445
default:
446446
return "UNKNOWN";
@@ -703,9 +703,9 @@ void event_handler_manager::handle_registration_action(reg_action_t &reg_action)
703703
sock = reinterpret_cast<sockinfo_tcp *>(reg_action.info.timer.user_data);
704704
sock->get_tcp_timer_collection()->add_new_timer(sock);
705705
break;
706-
case UNREGISTER_TCP_SOCKET_TIMER_AND_DELETE:
706+
case UNREGISTER_SOCKET_AND_DELETE:
707707
sock = reinterpret_cast<sockinfo_tcp *>(reg_action.info.timer.user_data);
708-
sock->get_tcp_timer_collection()->remove_timer(sock);
708+
// Just delete the socket without trying to remove from timer collection
709709
delete sock;
710710
break;
711711
case REGISTER_TIMER:

src/core/event/event_handler_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ typedef std::map<void * /*event_handler_id*/, event_handler_rdma_cm * /*p_event_
2828

2929
typedef enum {
3030
REGISTER_TCP_SOCKET_TIMER,
31-
UNREGISTER_TCP_SOCKET_TIMER_AND_DELETE,
31+
UNREGISTER_SOCKET_AND_DELETE,
3232
REGISTER_TIMER,
3333
WAKEUP_TIMER, /* NOT AVAILABLE FOR GROUPED TIMERS */
3434
UNREGISTER_TIMER,
@@ -167,7 +167,7 @@ class event_handler_manager : public wakeup_pipe {
167167
void unregister_timers_event_and_delete(timer_handler *handler);
168168

169169
void register_socket_timer_event(sockinfo_tcp *sock_tcp);
170-
void unregister_socket_timer_and_delete(sockinfo_tcp *sock_tcp);
170+
void unregister_socket_and_delete(sockinfo_tcp *sock_tcp);
171171

172172
void register_ibverbs_event(int fd, event_handler_ibverbs *handler, void *channel,
173173
void *user_data);

src/core/sock/sockinfo_tcp.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,18 +595,26 @@ void sockinfo_tcp::clean_socket_obj()
595595
lock_tcp_con();
596596

597597
if (is_cleaned()) {
598+
unlock_tcp_con();
598599
return;
599600
}
600601
m_is_cleaned = true;
601602

603+
// Important: Remove from timer collection WHILE STILL HOLDING THE LOCK
604+
// This prevents the timer thread from processing this socket after it's marked for cleanup
605+
if (is_timer_registered()) {
606+
tcp_timers_collection *p_collection = get_tcp_timer_collection();
607+
p_collection->remove_timer(this);
608+
}
609+
602610
unlock_tcp_con();
603611

604612
event_handler_manager *p_event_mgr = get_event_mgr();
605613
bool delegated_timers_exit = g_b_exit &&
606614
(safe_mce_sys().tcp_ctl_thread == option_tcp_ctl_thread::CTL_THREAD_DELEGATE_TCP_TIMERS);
607615

608616
if (p_event_mgr->is_running() && !delegated_timers_exit) {
609-
p_event_mgr->unregister_socket_timer_and_delete(this);
617+
p_event_mgr->unregister_socket_and_delete(this);
610618
} else {
611619
delete this;
612620
}

0 commit comments

Comments
 (0)