@@ -48,8 +48,10 @@ impl EmbeddingApi {
4848 }
4949
5050 pub async fn generate_embedding ( & self , text : String ) -> Result < Vec < f32 > , EmbeddingError > {
51- let max_retries = 6 ;
51+ const MAX_RETRIES : u32 = 5 ;
52+ const MAX_WAKE_UP_RETRIES : u32 = 10 ;
5253 let mut retries = 0 ;
54+ let mut wake_up_retries = 0 ;
5355 loop {
5456 let res = self
5557 . client
@@ -64,8 +66,8 @@ impl EmbeddingApi {
6466 if e. is_timeout ( ) {
6567 warn ! ( "Embedding API request timed out" ) ;
6668 retries += 1 ;
67- if retries > max_retries {
68- return Err ( EmbeddingError :: MaxRetriesExceeded ( max_retries ) ) ;
69+ if retries > MAX_RETRIES {
70+ return Err ( EmbeddingError :: MaxRetriesExceeded ( MAX_RETRIES ) ) ;
6971 }
7072 tokio:: time:: sleep ( Duration :: from_secs ( 2_u64 . pow ( retries) ) ) . await ;
7173 continue ;
@@ -85,15 +87,25 @@ impl EmbeddingApi {
8587 return Err ( EmbeddingError :: HttpClientError ( status) ) ;
8688 }
8789 if res. status ( ) != StatusCode :: OK {
90+ // Autoscaled to 0, waiting for wake up
91+ if res. status ( ) == StatusCode :: SERVICE_UNAVAILABLE {
92+ warn ! ( "Embedding API service unavailable, retrying..." ) ;
93+ wake_up_retries += 1 ;
94+ if wake_up_retries > MAX_WAKE_UP_RETRIES {
95+ return Err ( EmbeddingError :: ServiceUnavailable ( MAX_WAKE_UP_RETRIES ) ) ;
96+ }
97+ tokio:: time:: sleep ( Duration :: from_secs ( 10 ) ) . await ;
98+ continue ;
99+ }
88100 let status = res. status ( ) ;
89101 let response_content = res. text ( ) . await ?;
90102 warn ! (
91103 "[status: {}] Embedding API returned: '{}'" ,
92104 status, response_content
93105 ) ;
94106 retries += 1 ;
95- if retries > max_retries {
96- return Err ( EmbeddingError :: MaxRetriesExceeded ( max_retries ) ) ;
107+ if retries > MAX_RETRIES {
108+ return Err ( EmbeddingError :: MaxRetriesExceeded ( MAX_RETRIES ) ) ;
97109 }
98110 tokio:: time:: sleep ( Duration :: from_secs ( 2_u64 . pow ( retries) ) ) . await ;
99111 continue ;
0 commit comments