@@ -117,45 +117,66 @@ def _atomic_download(url: str, dest: pathlib.Path):
117117
118118
119119def _download_archive (url : str , archive_path : pathlib .Path ) -> bool :
120- """Robust streaming download with retries."""
120+ """Reliable download with resume + retries."""
121121
122- logger .debug ( "Archive will be saved to : %s" , archive_path )
122+ logger .info ( "Downloading with resume support : %s" , url )
123123
124- session = requests .Session ()
125- retries = Retry (
126- total = 5 ,
127- backoff_factor = 1.0 ,
128- status_forcelist = [429 , 500 , 502 , 503 , 504 ],
129- allowed_methods = ["GET" ],
130- )
131- session .mount ("https://" , HTTPAdapter (max_retries = retries ))
124+ CHUNK_SIZE = 1024 * 1024 # 1MB
125+ MAX_RETRIES = 5
132126
133- try :
134- with session .get (url , stream = True ) as r :
135- r .raise_for_status ()
127+ # Determine existing partial file size
128+ downloaded = archive_path .stat ().st_size if archive_path .exists () else 0
136129
137- downloaded = 0
138- chunk_size = 1024 * 1024 # 1MB
130+ # Get total size
131+ head = requests .head (url )
132+ if "content-length" not in head .headers :
133+ logger .error ("Server did not return content-length header!" )
134+ return False
135+ total = int (head .headers ["content-length" ])
139136
140- with open (archive_path , "wb" ) as f :
141- for chunk in r .iter_content (chunk_size ):
142- if chunk :
143- f .write (chunk )
144- downloaded += len (chunk )
137+ if downloaded == total :
138+ logger .info ("File already fully downloaded." )
139+ return True
145140
146- logger .info ("Download completed!" )
141+ logger .info ("Resuming from %d / %d bytes" , downloaded , total )
147142
148- except Exception as e :
149- logger .exception ("Error during download: %s" , e )
150- return False
143+ # Open file in append mode
144+ with open (archive_path , "ab" ) as f :
151145
152- if archive_path .exists () and archive_path .stat ().st_size == 0 :
153- logger .warning ("Downloaded file is empty!" )
154- return False
155- elif not archive_path .exists ():
156- logger .error ("File was not downloaded!" )
146+ while downloaded < total :
147+ headers = {"Range" : f"bytes={ downloaded } -" }
148+
149+ try :
150+ with requests .get (url , headers = headers , stream = True ) as r :
151+ r .raise_for_status ()
152+
153+ for chunk in r .iter_content (CHUNK_SIZE ):
154+ if chunk :
155+ f .write (chunk )
156+ downloaded += len (chunk )
157+
158+ except (requests .exceptions .ChunkedEncodingError ,
159+ requests .exceptions .ConnectionError ,
160+ requests .exceptions .ReadTimeout ,
161+ urllib3 .exceptions .IncompleteRead ) as e :
162+
163+ logger .warning (
164+ "Connection drop during download at %d / %d bytes. Retrying... (%s)" ,
165+ downloaded , total , e ,
166+ )
167+ time .sleep (1 )
168+ continue # retry with updated downloaded value
169+
170+ # End of successful pass
171+ break
172+
173+ # Validate final size
174+ actual = archive_path .stat ().st_size
175+ if actual != total :
176+ logger .error ("Download incomplete: expected %d, got %d" , total , actual )
157177 return False
158178
179+ logger .info ("Download completed successfully." )
159180 return True
160181
161182
0 commit comments