quic · quic-dhirajku · Jun 24, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 27, 2025
@@ -5,9 +5,8 @@
 #
 # ----------------------------------------------------------------------------
 
-import hashlib
+import copy
 import inspect
-import json
 import logging
 import shutil
 import subprocess
@@ -23,8 +22,16 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants, dump_qconfig
-from QEfficient.utils.cache import QEFF_HOME, to_hashable
+from QEfficient.utils import (
+    constants,
+    create_json,
+    dump_qconfig,
+    filter_and_create_export_hash,
+    generate_mdp_partition_config,
+    hash_compile_params,
+    load_json,
+)
+from QEfficient.utils.cache import QEFF_HOME
 
 logger = logging.getLogger(__name__)
 
@@ -46,12 +53,18 @@ class QEFFBaseModel(ABC):
     def _transform_names(cls) -> List[str]:
         return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
 
-    def __init__(self, model: torch.nn.Module) -> None:
+    def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
+        self.hash_params = self.create_model_params(**kwargs)
+
         self.onnx_path: Optional[str] = None
         self.qpc_path: Optional[str] = None
         self.qpc_session: Optional[QAICInferenceSession] = None
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
+        if hasattr(self.model.config, "architectures"):
+            model_architecture = getattr(self.model.config, "architectures", None)
+            self.model_architecture = model_architecture[0] if isinstance(model_architecture, list) else None
 
         # Apply the transformations
         any_transformed = False
@@ -64,13 +77,16 @@ def __init__(self, model: torch.nn.Module) -> None:
         else:
             logger.info(f"Pytorch transforms applied to model: {self.model_name}")
 
-    @property
-    @abstractmethod
-    def model_name(self) -> str: ...
+    def create_model_params(self, **kwargs) -> Dict:
+        model_params = copy.deepcopy(kwargs)
+        model_params["config"] = self.model.config.to_diff_dict()
+        model_params["peft_config"] = getattr(self.model, "active_peft_config", None)
+        model_params["applied_transform_names"] = self._transform_names()
+        return model_params
 
     @property
     @abstractmethod
-    def model_hash(self) -> str: ...
+    def model_name(self) -> str: ...
 
     @abstractmethod
     def export(self, export_dir: Optional[str] = None) -> Path:
@@ -135,8 +151,17 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
-        export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
-        export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
+        parent_dir = self.model_architecture or self.model_name
+        export_dir = Path(export_dir or (QEFF_HOME / parent_dir / self.model_name))
+        export_hash, filtered_hash_params = filter_and_create_export_hash(
+            model_params=self.hash_params,
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+            export_kwargs=export_kwargs,
+            onnx_transform_kwargs=onnx_transform_kwargs,
+        )
+        self.export_hash = export_hash
+        export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
         onnx_path = export_dir / f"{self.model_name}.onnx"
         if onnx_path.is_file():
             self.onnx_path = onnx_path
@@ -211,6 +236,11 @@ def _export(
         finally:
             shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
 
+        # Dump JSON file with hashed parameters
+        hashed_params_export_path = export_dir / "hashed_export_params.json"
+        create_json(hashed_params_export_path, filtered_hash_params)
+        logger.info("Hashed parameters exported successfully.")
+
         self.onnx_path = onnx_path
         return onnx_path
 
@@ -241,12 +271,10 @@ def _compile(
             :mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
             :num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
             :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
-            :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
-            :compiler_options: Pass any compiler option as input.
-                Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
+            :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
+            :compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
                 - aic_num_cores=16 -> -aic-num-cores=16
                 - convert_to_fp16=True -> -convert-to-fp16
-                For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored.
         """
         if onnx_path is None and self.onnx_path is None:
             self.export()
@@ -258,28 +286,23 @@ def _compile(
             raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
 
         if enable_qnn:
-            if compiler_options:
-                logger.warning(
-                    f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}"
-                )
-
             self.qpc_path = qnn_compile(
                 onnx_path=onnx_path,
                 qpc_base_path=compile_dir,
                 specializations=specializations,
                 custom_io=custom_io,
                 device_group=list(range(mdp_ts_num_devices)),
-                num_cores=compiler_options.get("aic_num_cores", 16),
-                mxfp6=compiler_options.get("mxfp6_matmul", False),
+                num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
+                mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
                 mxint8=mxint8_kv_cache,
                 qnn_config=qnn_config,
             )
 
             return self.qpc_path
 
         command = constants.COMPILER + [f"-m={onnx_path}"]
-        if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
-            mdp_ts_num_devices = None
+
+        if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
             command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")
 
         for key, value in compiler_options.items():
@@ -289,40 +312,50 @@ def _compile(
                     command.append(option)
                 continue
             command.append(f"{option}={value}")
-        compile_hash = hashlib.sha256(to_hashable(command))
-
-        if specializations is not None:
-            compile_hash.update(to_hashable(specializations))
-
-        if custom_io is not None:
-            compile_hash.update(to_hashable(custom_io))
-
-        if num_speculative_tokens:
-            compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
-        # Hash num_devices too, since default value would always be 1.
-        compile_hash.update(to_hashable(mdp_ts_num_devices))
 
-        # Check if already compiled
-        compile_hash = compile_hash.hexdigest()[:16]
+        # Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
+        if mdp_ts_json_path is not None:
+            mdp_ts_json = load_json(str(mdp_ts_json_path))
+        elif mdp_ts_num_devices > 1:
+            mdp_ts_json = generate_mdp_partition_config(
+                mdp_ts_num_devices, compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES)
+            )
+        else:
+            mdp_ts_json = None
+
+        compile_hash, hashed_params = hash_compile_params(
+            command=command,
+            specializations=specializations,
+            custom_io=custom_io,
+            mdp_ts_num_devices=mdp_ts_num_devices,
+            mdp_ts_json=mdp_ts_json,
+            num_speculative_tokens=num_speculative_tokens,
+        )
         compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
+
         qpc_path = compile_dir / "qpc"
         qpc_path.mkdir(parents=True, exist_ok=True)
+
         if qpc_path.is_dir():
             if (qpc_path / "programqpc.bin").is_file():
                 self.qpc_path = qpc_path
                 return qpc_path
             # Probably compilation failure last time, delete directory to start over
             shutil.rmtree(qpc_path)
 
+        # write the MDP partition config file if not provided
+        if mdp_ts_json is not None:
+            mdp_ts_json_path = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
+            create_json(str(mdp_ts_json_path), mdp_ts_json)
+            command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")
+
         # Write specializations.json file
         if specializations is not None:
             specializations_json = compile_dir / "specializations.json"
-            with open(specializations_json, "w") as fp:
-                json.dump(
-                    {"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
-                    fp,
-                    indent=4,
-                )
+            specializations_data = {
+                "specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]
+            }
+            create_json(str(specializations_json), specializations_data)
             command.append(f"-network-specialization-config={specializations_json}")
 
         # Write custom_io.yaml file
@@ -333,30 +366,11 @@ def _compile(
                     fp.write(f" - IOName: {io_name}\n   Precision: {dtype}\n\n")
             command.append(f"-custom-IO-list-file={custom_io_yaml}")
 
-        # Write mdp_config.json file
-        if not mdp_ts_json_path and mdp_ts_num_devices > 1:
-            num_cores = compiler_options.get("aic_num_cores", 16)
-            mdp_ts_json = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
-            with open(mdp_ts_json, "w") as fp:
-                json.dump(
-                    {
-                        "connections": [{"devices": list(range(mdp_ts_num_devices)), "type": "p2p"}],
-                        "partitions": [
-                            {
-                                "name": "Partition0",
-                                "devices": [{"deviceId": d, "numCores": num_cores} for d in range(mdp_ts_num_devices)],
-                            }
-                        ],
-                    },
-                    fp,
-                    indent=4,
-                )
-            command.append(f"-mdp-load-partition-config={mdp_ts_json}")
-
         command.append(f"-aic-binary-dir={qpc_path}")
         logger.info(f"Running compiler: {' '.join(command)}")
         try:
             subprocess.run(command, capture_output=True, check=True)
+
         except subprocess.CalledProcessError as e:
             raise RuntimeError(
                 "\n".join(
@@ -370,6 +384,10 @@ def _compile(
                 )
             )
 
+        # Dump JSON file with hashed parameters
+        hashed_compile_params_path = compile_dir / "hashed_compile_params.json"
+        create_json(hashed_compile_params_path, hashed_params)
+        logger.info("Hashed parameters exported successfully.")
         self.qpc_path = qpc_path
 
         return qpc_path