Add files via upload

BBC-Esq · web-flow · commit ade1fa10eb5e · 2025-05-31T08:57:40.000-04:00
diff --git a/src/constants.py b/src/constants.py
@@ -114,7 +114,7 @@
     "av==14.3.0",
     "backoff==2.2.1",
     "beautifulsoup4==4.13.4",
-    "bitsandbytes==0.45.5",
+    "bitsandbytes==0.46.0",
     "braceexpand==0.1.7",
     "certifi==2025.4.26",
     "cffi==1.17.1",
@@ -154,6 +154,7 @@
     "fsspec[http]==2025.3.0", # datasets 3.6.0 requires <=2025.3.0
     "greenlet==3.2.2",
     "gTTS==2.5.4",
+    "hf_xet==1.1.2",
     "h11==0.16.0",
     "h5py==3.13.0",
     "html5lib==1.1", # only required by unstructured
diff --git a/src/module_process_images.py b/src/module_process_images.py
@@ -565,36 +565,52 @@ def initialize_model_and_tokenizer(self):
         cache_dir = CACHE_DIR / save_dir
         cache_dir.mkdir(parents=True, exist_ok=True)
         
-        config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            bnb_4bit_quant_type="nf4",
-            llm_int8_skip_modules=[
-                "vision_tower",
-                "multi_modal_projector", 
-                "language_model.embed_tokens",
-                "language_model.norm",
-                "lm_head"
-            ]
-        )
-        
         processor = AutoProcessor.from_pretrained(
             model_id,
             use_fast=True,
             cache_dir=cache_dir,
             token=False
         )
-        model = AutoModelForVision2Seq.from_pretrained(
-            model_id,
-            quantization_config=config,
-            torch_dtype=torch.bfloat16,
-            low_cpu_mem_usage=True,
-            cache_dir=cache_dir,
-            token=False
-        )
-        model.to(self.device)
+
+        if self.device == "cuda" and torch.cuda.is_available():
+            # Use quantization on CUDA
+            config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_quant_type="nf4",
+                llm_int8_skip_modules=[
+                    "vision_tower",
+                    "multi_modal_projector", 
+                    "language_model.embed_tokens",
+                    "language_model.norm",
+                    "lm_head"
+                ]
+            )
+
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_id,
+                quantization_config=config,
+                torch_dtype=torch.bfloat16,
+                low_cpu_mem_usage=True,
+                cache_dir=cache_dir,
+                token=False,
+                device_map="auto"
+            )
+            my_cprint("Granite Vision model loaded with quantization on CUDA", "green")
+            
+        else:
+            # CPU mode - no quantization
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_id,
+                torch_dtype=torch.float32,
+                low_cpu_mem_usage=True,
+                cache_dir=cache_dir,
+                token=False,
+                device_map={"": "cpu"}
+            )
+            my_cprint("Granite Vision model loaded on CPU (no quantization)", "yellow")
+        
         model.eval()
-        my_cprint("Granite Vision model loaded into memory", "green")
         return model, None, processor
 
     @torch.inference_mode()