From 7cee7e96d62573eeb7473a3ade74d8f0e87cca4d Mon Sep 17 00:00:00 2001 From: Tomer Cabouly Date: Tue, 28 Oct 2025 12:59:17 +0000 Subject: [PATCH] issue: 4043157 Set initial RTO to 1 second per RFC 6298 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change initial TCP retransmission timeout from 3 seconds to 1 second as recommended by RFC 6298 Section 2, with additional safeguards for timer granularity and RFC 1122 compliance. RFC 6298 states: "Until a round-trip time (RTT) measurement has been made... the sender SHOULD set RTO <- 1 second" This applies to all new TCP connections until an RTT measurement is made. Changes: 1. TCP RTO Calculation (src/core/lwip/tcp.c): - Added get_initial_rto() helper function - Uses round-up division - Prevents division trunc that could cause premature timeouts - Modified tcp_pcb_init() and tcp_pcb_recycle() to use helper 2. Timer Resolution Limits (src/core/util/sys_vars.cpp): - Added RFC 1122 validation for tcp_timer_resolution_msec - Enforces maximum of 500ms per RFC 1122 Section 4.2.3.2 - Logs warning and clamps value if exceeded - Applied to both environment variable and config registry paths 3. Configuration Schema (xlio_config_schema.json): - Added "maximum": 500 constraint to timer_msec - Updated description to reference RFC 1122 requirement - Prevents invalid configurations at schema validation level 4. Documentation (README): - Added RFC 1122 reference to timer_msec documentation Rationale: Previous implementation used simple division (1000 / slow_tmr_interval) which could result in: - 0 ticks when interval > 1000ms (immediate timeout) - 1 tick when interval = 1000ms (fires on next tick) - Insufficient granularity with large timer intervals The new implementation provides defense-in-depth: - Schema validation prevents misconfiguration - Runtime validation enforces RFC 1122 limits (delayed ACK ≤ 500ms) - Round-up division ensures minimum 1 tick without truncation This fixes incorrect SYN retransmission timing where the first retry could occur prematurely or immediately, causing TCP_USER_TIMEOUT tests to fail and connection establishment issues. Benefits: - TCP/IP standards compliance (RFC 6298 and RFC 1122) - Robust handling of timer granularity edge cases - Faster connection establishment failure detection - Prevents problematic timer configurations - Matches Linux kernel TCP_TIMEOUT_INIT behavior Signed-off-by: Tomer Cabouly --- README | 1 + .../xlio_config_schema.json | 3 ++- src/core/lwip/tcp.c | 15 ++++++++++---- src/core/util/sys_vars.cpp | 20 +++++++++++++++++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/README b/README index 0754a5209..17df7a466 100644 --- a/README +++ b/README @@ -693,6 +693,7 @@ Maps to **XLIO_TCP_TIMER_RESOLUTION_MSEC** environment variable. Control internal TCP timer resolution (fast timer) in milliseconds. Minimum value is the thread wakeup timer resolution configured in performance.threading.internal_handler.timer_msec. +Maximum is 500ms per RFC 1122 Section 4.2.3.2 (delayed ACK timer must not exceed 500ms). Default value is 100 network.protocols.tcp.timestamps diff --git a/src/core/config/descriptor_providers/xlio_config_schema.json b/src/core/config/descriptor_providers/xlio_config_schema.json index 10f3c7b4e..529513943 100644 --- a/src/core/config/descriptor_providers/xlio_config_schema.json +++ b/src/core/config/descriptor_providers/xlio_config_schema.json @@ -418,9 +418,10 @@ "timer_msec": { "type": "integer", "minimum": 0, + "maximum": 500, "default": 100, "title": "TCP timer interval (msec)", - "description": "Maps to XLIO_TCP_TIMER_RESOLUTION_MSEC environment variable.\nControl internal TCP timer resolution (fast timer) in milliseconds.\nMinimum value is the thread wakeup timer resolution configured in\nperformance.threading.internal_handler.timer_msec." + "description": "Maps to XLIO_TCP_TIMER_RESOLUTION_MSEC environment variable.\nControl internal TCP timer resolution (fast timer) in milliseconds.\nMinimum value is the thread wakeup timer resolution configured in\nperformance.threading.internal_handler.timer_msec.\nMaximum is 500ms per RFC 1122 Section 4.2.3.2 (delayed ACK timer must not exceed 500ms)." }, "mss": { "type": "integer", diff --git a/src/core/lwip/tcp.c b/src/core/lwip/tcp.c index 43e3bebcb..188598124 100644 --- a/src/core/lwip/tcp.c +++ b/src/core/lwip/tcp.c @@ -905,6 +905,11 @@ err_t tcp_recv_null(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) return ERR_OK; } +static inline u32_t get_initial_rto(void) +{ + return (1000 + slow_tmr_interval - 1) / slow_tmr_interval; +} + void tcp_pcb_init(struct tcp_pcb *pcb, u8_t prio, void *container) { u32_t iss; @@ -927,9 +932,10 @@ void tcp_pcb_init(struct tcp_pcb *pcb, u8_t prio, void *container) pcb->mss = pcb->advtsd_mss; pcb->user_timeout_ms = 0; pcb->ticks_since_data_sent = -1; - pcb->rto = 3000 / slow_tmr_interval; + // Set initial RTO to 1 second as per RFC 6298 + pcb->rto = get_initial_rto(); pcb->sa = 0; - pcb->sv = 3000 / slow_tmr_interval; + pcb->sv = get_initial_rto(); pcb->rtime = -1; #if TCP_CC_ALGO_MOD switch (lwip_cc_algo_module) { @@ -985,9 +991,10 @@ void tcp_pcb_recycle(struct tcp_pcb *pcb) pcb->flags = 0; pcb->user_timeout_ms = 0; pcb->ticks_since_data_sent = -1; - pcb->rto = 3000 / slow_tmr_interval; + // Set initial RTO to 1 second as per RFC 6298 + pcb->rto = get_initial_rto(); pcb->sa = 0; - pcb->sv = 3000 / slow_tmr_interval; + pcb->sv = get_initial_rto(); pcb->nrtx = 0; pcb->dupacks = 0; pcb->rtime = -1; diff --git a/src/core/util/sys_vars.cpp b/src/core/util/sys_vars.cpp index e2b4b1d7a..1f862ba90 100644 --- a/src/core/util/sys_vars.cpp +++ b/src/core/util/sys_vars.cpp @@ -1621,6 +1621,16 @@ void mce_sys_var::get_env_params() tcp_timer_resolution_msec = timer_resolution_msec; } + // RFC 1122 Section 4.2.3.2: Delayed ACK timer must not exceed 500ms + // This limits TCP timer resolution to ensure protocol compliance and proper RTO calculations + if (tcp_timer_resolution_msec > 500) { + vlog_printf(VLOG_WARNING, + "TCP timer resolution [%s=%d] exceeds RFC 1122 maximum of 500ms. " + "Clamping to 500ms to ensure protocol compliance.\n", + SYS_VAR_TCP_TIMER_RESOLUTION_MSEC, tcp_timer_resolution_msec); + tcp_timer_resolution_msec = 500; + } + if ((env_ptr = getenv(SYS_VAR_INTERNAL_THREAD_CPUSET))) { snprintf(internal_thread_cpuset, FILENAME_MAX, "%s", env_ptr); } @@ -2744,6 +2754,16 @@ void mce_sys_var::configure_memory_limits(const config_registry ®istry) tcp_timer_resolution_msec = timer_resolution_msec; } + // RFC 1122 Section 4.2.3.2: Delayed ACK timer must not exceed 500ms + // This limits TCP timer resolution to ensure protocol compliance and proper RTO calculations + if (tcp_timer_resolution_msec > 500) { + vlog_printf(VLOG_WARNING, + "TCP timer resolution [%s=%d] exceeds RFC 1122 maximum of 500ms. " + "Clamping to 500ms to ensure protocol compliance.\n", + SYS_VAR_TCP_TIMER_RESOLUTION_MSEC, tcp_timer_resolution_msec); + tcp_timer_resolution_msec = 500; + } + if (registry.value_exists("performance.threading.cpuset")) { snprintf(internal_thread_cpuset, FILENAME_MAX, "%s", registry.get_value("performance.threading.cpuset").c_str());