Skip to content

Commit 84f5950

Browse files
committed
feat: handle autoscaling wake up differently
1 parent 57b5b3e commit 84f5950

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

issue-bot/src/embeddings/inference_endpoints.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ impl EmbeddingApi {
4848
}
4949

5050
pub async fn generate_embedding(&self, text: String) -> Result<Vec<f32>, EmbeddingError> {
51-
let max_retries = 6;
51+
const MAX_RETRIES: u32 = 5;
52+
const MAX_WAKE_UP_RETRIES: u32 = 10;
5253
let mut retries = 0;
54+
let mut wake_up_retries = 0;
5355
loop {
5456
let res = self
5557
.client
@@ -64,8 +66,8 @@ impl EmbeddingApi {
6466
if e.is_timeout() {
6567
warn!("Embedding API request timed out");
6668
retries += 1;
67-
if retries > max_retries {
68-
return Err(EmbeddingError::MaxRetriesExceeded(max_retries));
69+
if retries > MAX_RETRIES {
70+
return Err(EmbeddingError::MaxRetriesExceeded(MAX_RETRIES));
6971
}
7072
tokio::time::sleep(Duration::from_secs(2_u64.pow(retries))).await;
7173
continue;
@@ -85,15 +87,25 @@ impl EmbeddingApi {
8587
return Err(EmbeddingError::HttpClientError(status));
8688
}
8789
if res.status() != StatusCode::OK {
90+
// Autoscaled to 0, waiting for wake up
91+
if res.status() == StatusCode::SERVICE_UNAVAILABLE {
92+
warn!("Embedding API service unavailable, retrying...");
93+
wake_up_retries += 1;
94+
if wake_up_retries > MAX_WAKE_UP_RETRIES {
95+
return Err(EmbeddingError::ServiceUnavailable(MAX_WAKE_UP_RETRIES));
96+
}
97+
tokio::time::sleep(Duration::from_secs(10)).await;
98+
continue;
99+
}
88100
let status = res.status();
89101
let response_content = res.text().await?;
90102
warn!(
91103
"[status: {}] Embedding API returned: '{}'",
92104
status, response_content
93105
);
94106
retries += 1;
95-
if retries > max_retries {
96-
return Err(EmbeddingError::MaxRetriesExceeded(max_retries));
107+
if retries > MAX_RETRIES {
108+
return Err(EmbeddingError::MaxRetriesExceeded(MAX_RETRIES));
97109
}
98110
tokio::time::sleep(Duration::from_secs(2_u64.pow(retries))).await;
99111
continue;

issue-bot/src/embeddings/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ pub enum EmbeddingError {
2626
Reqwest(#[from] reqwest::Error),
2727
#[error("serde json error: {0}")]
2828
SerdeJson(#[from] serde_json::Error),
29+
#[error("max retries ({0}) to wake up from autoscaling exceeded, service unavailable")]
30+
ServiceUnavailable(u32),
2931
// #[error("tokenizers error: {0}")]
3032
// Tokenizers(#[from] tokenizers::Error),
3133
}

0 commit comments

Comments
 (0)