Skip to content

Commit 9fd71a6

Browse files
committed
fix: refine codes
1 parent 19db159 commit 9fd71a6

File tree

4 files changed

+62
-32
lines changed

4 files changed

+62
-32
lines changed

core/src/ten_rust/BUILD.gn

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ if (ten_enable_ten_rust) {
8686
output_file = "include_internal/ten_rust/ten_rust.h"
8787
binding_files = [
8888
"src/bindings.rs",
89-
"src/telemetry/mod.rs",
89+
"src/endpoint/mod.rs",
9090
]
9191

9292
deps = [ ":ten_rust_static_lib" ]

core/src/ten_rust/src/constants/endpoint.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@
55
// Refer to the "LICENSE" file in the root directory for more information.
66
//
77
pub const METRICS: &str = "/metrics";
8+
9+
pub const ENDPOINT_SERVER_BIND_MAX_RETRIES: u32 = 5;
10+
pub const ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS: u64 = 5; // seconds

core/src/ten_rust/src/constants/mod.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
pub mod endpoint;
88
pub use endpoint::*;
99

10-
pub const TELEMETRY_SERVER_START_RETRY_MAX_ATTEMPTS: u32 = 3;
11-
pub const TELEMETRY_SERVER_START_RETRY_INTERVAL: u64 = 1; // seconds
12-
1310
pub const ERR_MSG_GRAPH_LOCALHOST_FORBIDDEN_IN_SINGLE_APP_MODE: &str =
1411
"'localhost' is not allowed in graph definition, and the graph seems to \
1512
be a single-app graph, just remove the 'app' field";

core/src/ten_rust/src/endpoint/mod.rs

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ use prometheus::{
2222
};
2323

2424
use crate::constants::{
25-
TELEMETRY_SERVER_START_RETRY_INTERVAL,
26-
TELEMETRY_SERVER_START_RETRY_MAX_ATTEMPTS,
25+
ENDPOINT_SERVER_BIND_MAX_RETRIES, ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS,
2726
};
2827

2928
pub struct TelemetrySystem {
@@ -45,7 +44,7 @@ pub enum MetricHandle {
4544
HistogramVec(HistogramVec),
4645
}
4746

48-
/// Configuration function for telemetry server routes.
47+
/// Configure API endpoints.
4948
fn configure_routes(cfg: &mut web::ServiceConfig, registry: Registry) {
5049
let registry_clone = registry;
5150

@@ -56,32 +55,26 @@ fn configure_routes(cfg: &mut web::ServiceConfig, registry: Registry) {
5655
api_handler::configure_api_route(cfg);
5756
}
5857

59-
/// Initialize the endpoint system.
60-
#[no_mangle]
61-
#[allow(clippy::not_unsafe_ptr_arg_deref)]
62-
pub extern "C" fn ten_endpoint_system_create(
63-
endpoint: *const c_char,
64-
) -> *mut TelemetrySystem {
65-
let endpoint_str = match unsafe { CStr::from_ptr(endpoint) }.to_str() {
66-
Ok(s) if !s.trim().is_empty() => s.to_string(),
67-
_ => return ptr::null_mut(),
68-
};
69-
70-
// Note: `prometheus::Registry` internally uses `Arc` and `RwLock` to
71-
// achieve thread safety, so there is no need to add additional locking
72-
// mechanisms. It can be used directly here.
73-
let registry = Registry::new();
74-
75-
// Start the actix-web server to provide metrics data at the specified path.
76-
58+
/// Creates an HTTP server with retry mechanism if binding fails.
59+
///
60+
/// This function attempts to bind an HTTP server to the specified endpoint.
61+
/// If binding fails, it will retry up to a configured maximum number of
62+
/// attempts with a delay between each attempt.
63+
fn create_endpoint_server_with_retry(
64+
endpoint_str: &str,
65+
registry: Registry,
66+
) -> Option<actix_web::dev::Server> {
7767
let mut attempts = 0;
78-
let max_attempts = TELEMETRY_SERVER_START_RETRY_MAX_ATTEMPTS;
79-
let wait_duration =
80-
std::time::Duration::from_secs(TELEMETRY_SERVER_START_RETRY_INTERVAL);
68+
let max_attempts = ENDPOINT_SERVER_BIND_MAX_RETRIES;
69+
let wait_duration = std::time::Duration::from_secs(
70+
ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS,
71+
);
8172

73+
// Try to create and bind the HTTP server, with retries if it fails.
8274
let server_builder = loop {
8375
let registry_clone = registry.clone();
8476

77+
// Create a new HTTP server with the configured routes.
8578
let result = HttpServer::new(move || {
8679
App::new()
8780
.configure(|cfg| configure_routes(cfg, registry_clone.clone()))
@@ -94,28 +87,65 @@ pub extern "C" fn ten_endpoint_system_create(
9487
Ok(server) => break server,
9588
Err(e) => {
9689
attempts += 1;
90+
91+
// If we've reached the maximum number of attempts, log the
92+
// error and return None.
9793
if attempts >= max_attempts {
9894
eprintln!(
9995
"Error binding to address: {} after {} attempts: {:?}",
10096
endpoint_str, attempts, e
10197
);
102-
return ptr::null_mut();
98+
return None;
10399
}
104100

101+
// Otherwise, log the error and retry after a delay.
105102
eprintln!(
106103
"Failed to bind to address: {}. Attempt {} of {}. \
107-
Retrying in {} second...",
104+
Retrying in {} second{}...",
108105
endpoint_str,
109106
attempts,
110107
max_attempts,
111-
TELEMETRY_SERVER_START_RETRY_INTERVAL
108+
ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS,
109+
if ENDPOINT_SERVER_BIND_RETRY_INTERVAL_SECS == 1 {
110+
""
111+
} else {
112+
"s"
113+
}
112114
);
113115
std::thread::sleep(wait_duration);
114116
}
115117
}
116118
};
117119

118-
let server = server_builder.run();
120+
// Start the server and return it.
121+
Some(server_builder.run())
122+
}
123+
124+
/// Initialize the endpoint system.
125+
#[no_mangle]
126+
#[allow(clippy::not_unsafe_ptr_arg_deref)]
127+
pub extern "C" fn ten_endpoint_system_create(
128+
endpoint: *const c_char,
129+
) -> *mut TelemetrySystem {
130+
let endpoint_str = match unsafe { CStr::from_ptr(endpoint) }.to_str() {
131+
Ok(s) if !s.trim().is_empty() => s.to_string(),
132+
_ => return ptr::null_mut(),
133+
};
134+
135+
// Note: `prometheus::Registry` internally uses `Arc` and `RwLock` to
136+
// achieve thread safety, so there is no need to add additional locking
137+
// mechanisms. It can be used directly here.
138+
let registry = Registry::new();
139+
140+
// Start the actix-web server to provide metrics data at the specified path.
141+
let server = match create_endpoint_server_with_retry(
142+
&endpoint_str,
143+
registry.clone(),
144+
) {
145+
Some(server) => server,
146+
None => return ptr::null_mut(),
147+
};
148+
119149
let server_handle = server.handle();
120150

121151
// Create an `oneshot` channel to notify the actix system to shut down.

0 commit comments

Comments
 (0)