@@ -22,8 +22,7 @@ use prometheus::{
2222} ;
2323
2424use crate :: constants:: {
25- TELEMETRY_SERVER_START_RETRY_INTERVAL ,
26- TELEMETRY_SERVER_START_RETRY_MAX_ATTEMPTS ,
25+ ENDPOINT_SERVER_BIND_MAX_RETRIES , ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS ,
2726} ;
2827
2928pub struct TelemetrySystem {
@@ -45,7 +44,7 @@ pub enum MetricHandle {
4544 HistogramVec ( HistogramVec ) ,
4645}
4746
48- /// Configuration function for telemetry server routes .
47+ /// Configure API endpoints .
4948fn configure_routes ( cfg : & mut web:: ServiceConfig , registry : Registry ) {
5049 let registry_clone = registry;
5150
@@ -56,32 +55,26 @@ fn configure_routes(cfg: &mut web::ServiceConfig, registry: Registry) {
5655 api_handler:: configure_api_route ( cfg) ;
5756}
5857
59- /// Initialize the endpoint system.
60- #[ no_mangle]
61- #[ allow( clippy:: not_unsafe_ptr_arg_deref) ]
62- pub extern "C" fn ten_endpoint_system_create (
63- endpoint : * const c_char ,
64- ) -> * mut TelemetrySystem {
65- let endpoint_str = match unsafe { CStr :: from_ptr ( endpoint) } . to_str ( ) {
66- Ok ( s) if !s. trim ( ) . is_empty ( ) => s. to_string ( ) ,
67- _ => return ptr:: null_mut ( ) ,
68- } ;
69-
70- // Note: `prometheus::Registry` internally uses `Arc` and `RwLock` to
71- // achieve thread safety, so there is no need to add additional locking
72- // mechanisms. It can be used directly here.
73- let registry = Registry :: new ( ) ;
74-
75- // Start the actix-web server to provide metrics data at the specified path.
76-
58+ /// Creates an HTTP server with retry mechanism if binding fails.
59+ ///
60+ /// This function attempts to bind an HTTP server to the specified endpoint.
61+ /// If binding fails, it will retry up to a configured maximum number of
62+ /// attempts with a delay between each attempt.
63+ fn create_endpoint_server_with_retry (
64+ endpoint_str : & str ,
65+ registry : Registry ,
66+ ) -> Option < actix_web:: dev:: Server > {
7767 let mut attempts = 0 ;
78- let max_attempts = TELEMETRY_SERVER_START_RETRY_MAX_ATTEMPTS ;
79- let wait_duration =
80- std:: time:: Duration :: from_secs ( TELEMETRY_SERVER_START_RETRY_INTERVAL ) ;
68+ let max_attempts = ENDPOINT_SERVER_BIND_MAX_RETRIES ;
69+ let wait_duration = std:: time:: Duration :: from_secs (
70+ ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS ,
71+ ) ;
8172
73+ // Try to create and bind the HTTP server, with retries if it fails.
8274 let server_builder = loop {
8375 let registry_clone = registry. clone ( ) ;
8476
77+ // Create a new HTTP server with the configured routes.
8578 let result = HttpServer :: new ( move || {
8679 App :: new ( )
8780 . configure ( |cfg| configure_routes ( cfg, registry_clone. clone ( ) ) )
@@ -94,28 +87,65 @@ pub extern "C" fn ten_endpoint_system_create(
9487 Ok ( server) => break server,
9588 Err ( e) => {
9689 attempts += 1 ;
90+
91+ // If we've reached the maximum number of attempts, log the
92+ // error and return None.
9793 if attempts >= max_attempts {
9894 eprintln ! (
9995 "Error binding to address: {} after {} attempts: {:?}" ,
10096 endpoint_str, attempts, e
10197 ) ;
102- return ptr :: null_mut ( ) ;
98+ return None ;
10399 }
104100
101+ // Otherwise, log the error and retry after a delay.
105102 eprintln ! (
106103 "Failed to bind to address: {}. Attempt {} of {}. \
107- Retrying in {} second...",
104+ Retrying in {} second{} ...",
108105 endpoint_str,
109106 attempts,
110107 max_attempts,
111- TELEMETRY_SERVER_START_RETRY_INTERVAL
108+ ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS ,
109+ if ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS == 1 {
110+ ""
111+ } else {
112+ "s"
113+ }
112114 ) ;
113115 std:: thread:: sleep ( wait_duration) ;
114116 }
115117 }
116118 } ;
117119
118- let server = server_builder. run ( ) ;
120+ // Start the server and return it.
121+ Some ( server_builder. run ( ) )
122+ }
123+
124+ /// Initialize the endpoint system.
125+ #[ no_mangle]
126+ #[ allow( clippy:: not_unsafe_ptr_arg_deref) ]
127+ pub extern "C" fn ten_endpoint_system_create (
128+ endpoint : * const c_char ,
129+ ) -> * mut TelemetrySystem {
130+ let endpoint_str = match unsafe { CStr :: from_ptr ( endpoint) } . to_str ( ) {
131+ Ok ( s) if !s. trim ( ) . is_empty ( ) => s. to_string ( ) ,
132+ _ => return ptr:: null_mut ( ) ,
133+ } ;
134+
135+ // Note: `prometheus::Registry` internally uses `Arc` and `RwLock` to
136+ // achieve thread safety, so there is no need to add additional locking
137+ // mechanisms. It can be used directly here.
138+ let registry = Registry :: new ( ) ;
139+
140+ // Start the actix-web server to provide metrics data at the specified path.
141+ let server = match create_endpoint_server_with_retry (
142+ & endpoint_str,
143+ registry. clone ( ) ,
144+ ) {
145+ Some ( server) => server,
146+ None => return ptr:: null_mut ( ) ,
147+ } ;
148+
119149 let server_handle = server. handle ( ) ;
120150
121151 // Create an `oneshot` channel to notify the actix system to shut down.
0 commit comments