Skip to content

Commit 31b3bb5

Browse files
committed
use rate limit headers for smarter retry in http backoff
1 parent 82f195d commit 31b3bb5

File tree

2 files changed

+75
-3
lines changed

2 files changed

+75
-3
lines changed

src/huggingface_hub/utils/_http.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ def _http_backoff_base(
362362

363363
nb_tries = 0
364364
sleep_time = base_wait_time
365+
ratelimit_reset: Optional[int] = None # seconds to wait for rate limit reset if 429 response
365366

366367
# If `data` is used and is a file object (or any IO), it will be consumed on the
367368
# first HTTP request. We need to save the initial position so that the full content
@@ -373,6 +374,7 @@ def _http_backoff_base(
373374
client = get_session()
374375
while True:
375376
nb_tries += 1
377+
ratelimit_reset = None
376378
try:
377379
# If `data` is used and is a file object (or any IO), set back cursor to
378380
# initial position.
@@ -382,6 +384,8 @@ def _http_backoff_base(
382384
# Perform request and handle response
383385
def _should_retry(response: httpx.Response) -> bool:
384386
"""Handle response and return True if should retry, False if should return/yield."""
387+
nonlocal ratelimit_reset
388+
385389
if response.status_code not in retry_on_status_codes:
386390
return False # Success, don't retry
387391

@@ -393,6 +397,12 @@ def _should_retry(response: httpx.Response) -> bool:
393397
# user ask for retry on a status code that doesn't raise_for_status.
394398
return False # Don't retry, return/yield response
395399

400+
# get rate limit reset time from headers if 429 response
401+
if response.status_code == 429:
402+
ratelimit_info = parse_ratelimit_headers(response.headers)
403+
if ratelimit_info is not None:
404+
ratelimit_reset = ratelimit_info.reset_in_seconds
405+
396406
return True # Should retry
397407

398408
if stream:
@@ -415,9 +425,15 @@ def _should_retry(response: httpx.Response) -> bool:
415425
if nb_tries > max_retries:
416426
raise err
417427

418-
# Sleep for X seconds
419-
logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
420-
time.sleep(sleep_time)
428+
# use rate limit reset if available, otherwise exponential backoff
429+
if ratelimit_reset is not None:
430+
actual_sleep = min(max_wait_time, float(ratelimit_reset))
431+
logger.warning(f"Rate limited. Waiting {actual_sleep}s before retry [Retry {nb_tries}/{max_retries}].")
432+
else:
433+
actual_sleep = min(max_wait_time, sleep_time)
434+
logger.warning(f"Retrying in {actual_sleep}s [Retry {nb_tries}/{max_retries}].")
435+
436+
time.sleep(actual_sleep)
421437

422438
# Update sleep time for next retry
423439
sleep_time = min(max_wait_time, sleep_time * 2) # Exponential backoff

tests/test_utils_http.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,62 @@ def _side_effect_timer() -> Generator[ConnectTimeout, None, None]:
151151
expected_sleep_times = [0.1, 0.2, 0.4, 0.5, 0.5]
152152
self.assertListEqual(sleep_times, expected_sleep_times)
153153

154+
def test_backoff_on_429_uses_ratelimit_header_capped(self) -> None:
155+
"""Test that 429 wait time is capped by max_wait_time."""
156+
sleep_times = []
157+
158+
def _side_effect_timer() -> Generator:
159+
t0 = time.time()
160+
mock_429 = Mock()
161+
mock_429.status_code = 429
162+
mock_429.headers = {"ratelimit": '"api";r=0;t=1'} # Server says wait 1s
163+
yield mock_429
164+
t1 = time.time()
165+
sleep_times.append(round(t1 - t0, 1))
166+
t0 = t1
167+
mock_200 = Mock()
168+
mock_200.status_code = 200
169+
yield mock_200
170+
171+
self.mock_request.side_effect = _side_effect_timer()
172+
173+
# max_wait_time=0.5 is less than t=1, so wait should be capped at 0.5
174+
response = http_backoff(
175+
"GET", URL, base_wait_time=0.1, max_wait_time=0.5, max_retries=3, retry_on_status_codes=429
176+
)
177+
178+
self.assertEqual(self.mock_request.call_count, 2)
179+
self.assertEqual(sleep_times, [0.5]) # Capped at max_wait_time
180+
self.assertEqual(response.status_code, 200)
181+
182+
def test_backoff_on_429_uses_ratelimit_header_not_capped(self) -> None:
183+
"""Test that 429 wait time uses full reset time when under max_wait_time."""
184+
sleep_times = []
185+
186+
def _side_effect_timer() -> Generator:
187+
t0 = time.time()
188+
mock_429 = Mock()
189+
mock_429.status_code = 429
190+
mock_429.headers = {"ratelimit": '"api";r=0;t=1'} # Server says wait 1s
191+
yield mock_429
192+
t1 = time.time()
193+
sleep_times.append(round(t1 - t0, 1))
194+
t0 = t1
195+
mock_200 = Mock()
196+
mock_200.status_code = 200
197+
yield mock_200
198+
199+
self.mock_request.side_effect = _side_effect_timer()
200+
201+
# max_wait_time=5 is greater than t=1, so wait should be full 1s
202+
response = http_backoff(
203+
"GET", URL, base_wait_time=0.1, max_wait_time=5.0, max_retries=3, retry_on_status_codes=429
204+
)
205+
206+
self.assertEqual(self.mock_request.call_count, 2)
207+
self.assertEqual(sleep_times, [1.0]) # Full reset time (not capped)
208+
self.assertEqual(response.status_code, 200)
209+
154210

155211
class TestConfigureSession(unittest.TestCase):
156212
def setUp(self) -> None:

0 commit comments

Comments
 (0)