Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions src/huggingface_hub/file_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@

_are_symlinks_supported_in_dir: dict[str, bool] = {}

# Internal retry timeout for metadata fetch when no local file exists
_ETAG_RETRY_TIMEOUT = 60
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to make it user configurable imo. i guess if a user has very slow network, they can already set HF_HUB_ETAG_TIMEOUT to increase the initial timeout

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree!



def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
"""Return whether the symlinks are supported on the machine.
Expand Down Expand Up @@ -1131,8 +1134,28 @@ def _hf_hub_download_to_cache_dir(
if not force_download:
return pointer_path

# Otherwise, raise appropriate error
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
# No local file found, retry with longer timeout if it was a timeout error
if isinstance(head_call_error, httpx.TimeoutException):
logger.info("Metadata fetch timed out and no local file found. Retrying with longer timeout..")
(url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = (
_get_metadata_or_catch_error(
repo_id=repo_id,
filename=filename,
repo_type=repo_type,
revision=revision,
endpoint=endpoint,
etag_timeout=_ETAG_RETRY_TIMEOUT,
headers=headers,
token=token,
local_files_only=local_files_only,
storage_folder=storage_folder,
relative_filename=relative_filename,
)
)

# If still error, raise
if head_call_error is not None:
_raise_on_head_call_error(head_call_error, force_download, local_files_only)

# From now on, etag, commit_hash, url and size are not None.
assert etag is not None, "etag must have been retrieved from server"
Expand Down Expand Up @@ -1300,9 +1323,26 @@ def _hf_hub_download_to_local_dir(
)
if not force_download:
return local_path
elif not force_download and isinstance(head_call_error, httpx.TimeoutException):
# No local file found, retry with longer timeout if it was a timeout error
logger.info("Metadata fetch timed out and no local file found. Retrying with longer timeout...")
(url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = (
_get_metadata_or_catch_error(
repo_id=repo_id,
filename=filename,
repo_type=repo_type,
revision=revision,
endpoint=endpoint,
etag_timeout=_ETAG_RETRY_TIMEOUT,
headers=headers,
token=token,
local_files_only=local_files_only,
)
)

# Otherwise => raise
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
# If still error, raise
if head_call_error is not None:
_raise_on_head_call_error(head_call_error, force_download, local_files_only)

# From now on, etag, commit_hash, url and size are not None.
assert etag is not None, "etag must have been retrieved from server"
Expand Down
Loading