1616import concurrent .futures
1717import logging
1818import math
19+ import multiprocessing
1920import os
2021import re
2122import shutil
2223import threading
2324from dataclasses import dataclass , field
25+ from pathlib import Path
2426from queue import Queue
2527from typing import Any , Callable , Dict , Optional
2628
@@ -92,6 +94,26 @@ def __init__(
9294 self .chunk_bytes = chunk_bytes
9395 self .job_queues = Queue ()
9496 self ._lock = threading .Lock ()
97+ self .chunk_download_path = self .get_chunk_download_path (download_path )
98+
99+ def get_chunk_download_path (self , path : str ) -> str :
100+ """Get the path where chunks will be downloaded"""
101+
102+ # make the folder name from the model name and file to be downloaded
103+ stub = path .split (os .path .sep )[- 3 ]
104+ path = "_" .join (path .split (os .path .sep )[- 2 :])
105+ file_name_as_folder = path .replace ("." , "_" )
106+
107+ # save the chunks on a different folder than the root model folder
108+ return os .path .join (
109+ str (Path .home ()),
110+ ".cache" ,
111+ "sparsezoo" ,
112+ "neuralmagic" ,
113+ "chunks" ,
114+ stub ,
115+ file_name_as_folder ,
116+ )
95117
96118 def is_range_header_supported (self ) -> bool :
97119 """Check if chunck download is supported"""
@@ -148,9 +170,11 @@ def queue_chunk_download_jobs(self) -> None:
148170 The jobs need to be executed by a worker or scheduler that processes the
149171 queued JobQueues.
150172 """
151- download_jobs : Queue = JobQueue (description = "Downloading Chunks" )
173+ file_name = self .download_path .split (os .path .sep )[- 1 ]
174+ download_jobs : Queue = JobQueue (
175+ description = f"Downloading Chunks for { file_name } "
176+ )
152177 num_download_jobs = math .ceil (self .file_size / self .chunk_bytes )
153-
154178 for job_id in range (num_download_jobs ):
155179 start_byte = 0 if job_id == 0 else job_id * (self .chunk_bytes ) + 1
156180 end_byte = (
@@ -161,8 +185,10 @@ def queue_chunk_download_jobs(self) -> None:
161185 bytes_range = f"bytes={ start_byte } -{ end_byte } "
162186
163187 func_kwargs = {
164- "download_path" : self .get_chunk_file_path (
165- f"{ job_id :05d} _{ bytes_range } "
188+ "download_path" : (
189+ os .path .join (
190+ self .chunk_download_path , f"{ job_id :05d} _{ bytes_range } "
191+ )
166192 ),
167193 "headers" : {
168194 "Range" : bytes_range ,
@@ -237,7 +263,7 @@ def queue_jobs(self) -> None:
237263 )
238264 self .job_queues .put (job_queue )
239265
240- def run (self , num_threads : int = 10 ) -> None :
266+ def run (self , num_threads : int = 1 ) -> None :
241267 """
242268 Executes queued download jobs in parallel using multiple threads.
243269
@@ -250,6 +276,9 @@ def run(self, num_threads: int = 10) -> None:
250276 file chunks in parallel. Defaults to 10.
251277
252278 """
279+ available_threads = multiprocessing .cpu_count () - threading .active_count ()
280+ num_threads = max (available_threads // 2 , num_threads )
281+
253282 is_prev_job_queue_success = True
254283 while not self .job_queues .empty () and is_prev_job_queue_success :
255284 job_queue = self .job_queues .get ()
@@ -295,23 +324,25 @@ def execute_job_from_queue(self, job_queue: Queue, **kwargs) -> None:
295324 with self ._lock :
296325 job : Job = job_queue .get ()
297326 success = False
327+ err = ""
298328 while not success and job .retries < job .max_retries :
299329 try :
300330 job .func (** job .func_kwargs , ** kwargs )
301331 success = True
302332 except Exception as _err :
333+ err = _err
303334 _LOGGER .debug (
304335 f"{ job .retries / self .max_retries } : "
305336 "Failed running {self.func} with kwargs {job.func_kwargs}"
306337 )
307- _LOGGER .debug (_err )
338+ _LOGGER .error (_err )
308339 job .retries += 1
309340 if job .retries < job .max_retries :
310341 job_queue .put (job )
311342
312343 if not success :
313344 _LOGGER .debug (f"Chunk download failed after { self .max_retries } retries." )
314- raise ValueError
345+ raise ValueError ( err )
315346
316347 def download_file (
317348 self ,
@@ -339,7 +370,10 @@ def download_file(
339370
340371 """
341372 write_chunk_size = min (CHUNK_BYTES , self .file_size )
373+ _LOGGER .debug ("creating " , download_path )
374+
342375 create_parent_dirs (download_path )
376+
343377 response = requests .get (
344378 self .url , headers = headers , stream = True , allow_redirects = True
345379 )
@@ -358,11 +392,10 @@ def combine_chunks_and_delete(self, download_path: str, progress_bar: tqdm) -> N
358392 :param progress_bar: tqdm object showing the progress of combining chunks
359393
360394 """
361- parent_directory = os .path .dirname (download_path )
362- chunk_directory = os .path .join (parent_directory , "chunks" )
395+ _LOGGER .debug ("Combing and deleting " , self .chunk_download_path )
363396
364397 pattern = re .compile (r"\d+_bytes=" )
365- files = os .listdir (chunk_directory )
398+ files = os .listdir (self . chunk_download_path )
366399
367400 chunk_files = [chunk_file for chunk_file in files if pattern .match (chunk_file )]
368401
@@ -371,13 +404,13 @@ def combine_chunks_and_delete(self, download_path: str, progress_bar: tqdm) -> N
371404 create_parent_dirs (self .download_path )
372405 with open (self .download_path , "wb" ) as combined_file :
373406 for file_path in sorted_chunk_files :
374- chunk_path = os .path .join (chunk_directory , file_path )
407+ chunk_path = os .path .join (self . chunk_download_path , file_path )
375408 with open (chunk_path , "rb" ) as infile :
376409 data = infile .read ()
377410 combined_file .write (data )
378411 progress_bar .update (len (data ))
379412
380- shutil .rmtree (chunk_directory )
413+ shutil .rmtree (self . chunk_download_path )
381414
382415 def get_chunk_file_path (self , file_range : str ) -> str :
383416 """
0 commit comments