Modifications made based on Rishin's suggestion. WIP

quic-dhirajku · quic-dhirajku · commit 99ff66814190 · 2025-07-15T05:59:07.000Z
Signed-off-by: Dhiraj Kumar Sah &lt;dhirajku@qti.qualcomm.com&gt;
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -23,8 +23,14 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants, dump_qconfig, make_serializable
-from QEfficient.utils.cache import QEFF_HOME, hash_dict_params
+from QEfficient.utils import (
+    constants,
+    create_json,
+    dump_qconfig,
+    filter_and_hash_compile_params,
+    filter_and_hash_export_params,
+)
+from QEfficient.utils.cache import QEFF_HOME
 
 logger = logging.getLogger(__name__)
 
@@ -46,15 +52,18 @@ class QEFFBaseModel(ABC):
     def _transform_names(cls) -> List[str]:
         return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
 
+    def create_model_params(self, **kwargs) -> Dict:
+        model_params = copy.deepcopy(kwargs)
+
+        model_params["config"] = self.model.config.to_diff_dict()
+        model_params["_transform_names"] = self._transform_names()
+        # TODO: Add keywords list to filter out params that are not needed for hashing
+        return model_params
+
     def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
-
-        # Store Model parameters to Calculate Hash for caching
-        self.model_params = {}
-        self.model_params = copy.deepcopy(kwargs)
-        self.model_params["config"] = self.model.config.to_diff_dict()
-        self.model_params["_transform_names"] = self._transform_names()
+        self.model_params = self.create_model_params(**kwargs)
 
         if hasattr(self.model.config, "architectures"):
             self.model_architecture = self.model.config.architectures[0]
@@ -121,6 +130,7 @@ def compile(self, *args, **kwargs) -> Path:
             :str: Path of the compiled ``qpc`` package.
         """
 
+    # @dump_model_params
     def _export(
         self,
         example_inputs: Dict[str, torch.Tensor],
@@ -141,19 +151,17 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
-        export_params = {}
-        export_params["output_names"] = output_names
-        export_params["dynamic_axes"] = dynamic_axes
-
-        self.model_params["export_params"] = export_params
-
-        self.model_params.update(export_kwargs) if export_kwargs is not None else None
-        self.model_params.update(onnx_transform_kwargs) if export_kwargs is not None else None
 
         export_dir = Path(export_dir or (QEFF_HOME / self.model_architecture / self.model_name))
+        export_hash, hashed_params = filter_and_hash_export_params(
+            model_params=copy.deepcopy(self.model_params),
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+            export_kwargs=export_kwargs,
+            onnx_transform_kwargs=onnx_transform_kwargs,
+            export_dir=export_dir,
+        )
 
-        export_hash = hash_dict_params(self.model_params)
-        export_hash = export_hash.hexdigest()[:16]
         export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
         onnx_path = export_dir / f"{self.model_name}.onnx"
         if onnx_path.is_file():
@@ -221,20 +229,6 @@ def _export(
             onnx.save(model, onnx_path)
             logger.info("Transformed onnx saved")
 
-            # Dumping model paramters in a JSON file after successful ONNX export
-            model_params_json = export_dir / "model_params.json"
-            with open(model_params_json, "w") as fp:
-                json.dump(
-                    {
-                        "model_params": {
-                            k: make_serializable(self.model_params[k]) for k in sorted(self.model_params.keys())
-                        }
-                    },
-                    fp,
-                    indent=4,
-                )
-            logger.info("Parameters used for export hash dumped in a JSON file successfully")
-
         except Exception as e:
             logger.error(f"ONNX export (or) ONNXTransforms failed: {e}")
 
@@ -243,6 +237,11 @@ def _export(
         finally:
             shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
 
+        # Dump JSON file with hashed parameters
+        hashed_params_export_path = export_dir / "hashed_model_params.json"
+        create_json(hashed_params_export_path, hashed_params)
+        logger.info("Hashed parameters exported successfully.")
+
         self.onnx_path = onnx_path
         return onnx_path
 
@@ -281,8 +280,6 @@ def _compile(
         if onnx_path is None and self.onnx_path is None:
             self.export()
 
-        self.compile_params = {}
-
         onnx_path = Path(onnx_path or self.onnx_path)
         compile_dir = Path(compile_dir or onnx_path.parent)
         qpc_path = compile_dir / "qpc"
@@ -317,23 +314,13 @@ def _compile(
                 continue
             command.append(f"{option}={value}")
 
-        self.compile_params["command"] = command
-
-        if specializations is not None:
-            self.compile_params.update({"specializations": specializations})
-
-        if custom_io is not None:
-            self.compile_params.update({"custom_io": custom_io})
-
-        if num_speculative_tokens:
-            self.compile_params.update({"num_speculative_tokens": num_speculative_tokens})
-
-        if mdp_ts_num_devices is not None:
-            self.compile_params.update({"mdp_ts_num_devices": mdp_ts_num_devices})
-
-        # Check if already compiled
-        compile_hash = hash_dict_params(self.compile_params)
-        compile_hash = compile_hash.hexdigest()[:16]
+        compile_hash, hashed_params = filter_and_hash_compile_params(
+            command=command,
+            specializations=specializations,
+            custom_io=custom_io,
+            mdp_ts_num_devices=mdp_ts_num_devices,
+            num_speculative_tokens=num_speculative_tokens,
+        )
         compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
 
         qpc_path = compile_dir / "qpc"
@@ -389,18 +376,6 @@ def _compile(
         try:
             subprocess.run(command, capture_output=True, check=True)
 
-            # Dumping compile paramters in a JSON file after successful QPC compilation
-            compile_params_json = compile_dir / "compile_params.json"
-            with open(compile_params_json, "w") as fp:
-                json.dump(
-                    {
-                        "compile_params": {
-                            k: make_serializable(self.compile_params[k]) for k in sorted(self.compile_params.keys())
-                        }
-                    },
-                    fp,
-                    indent=4,
-                )
         except subprocess.CalledProcessError as e:
             raise RuntimeError(
                 "\n".join(
@@ -414,6 +389,10 @@ def _compile(
                 )
             )
 
+        # Dump JSON file with hashed parameters
+        hashed_compile_params_path = compile_dir / "hashed_compile_params.json"
+        create_json(hashed_compile_params_path, hashed_params)
+        logger.info("Hashed parameters exported successfully.")
         self.qpc_path = qpc_path
 
         return qpc_path
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -130,6 +130,11 @@ def auto_correct_inputs(self, inputs):
         return {k: v for k, v in inputs.items() if k in [iinfo.name for iinfo in inputs_info]}
 
 
+class NoInitMeta(type):
+    def __call__(cls, *args, **kwargs):
+        raise RuntimeError("Use `from_pretrained` to create an instance.")
+
+
 class QEFFAutoModel(QEFFTransformersBase):
     """
     The QEFFAutoModel class is designed for manipulating any transformer model from the HuggingFace hub.
@@ -911,6 +916,7 @@ def __init__(
             self.model.config.vision_config.use_flash_attn = "false"
         else:
             self.model.config.text_config.use_cache = True
+        self.model_params["qeff_class"] = self.__class__.__name__
 
     @classmethod
     def from_pretrained(
@@ -934,6 +940,10 @@ def from_pretrained(
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs)
 
         return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
+        # # Bypass __call__ and manually initialize
+        # instance = object.__new__(cls)
+        # instance.__init__(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
+        # return instance
 
     def export(
         self,
@@ -1175,6 +1185,7 @@ def get_model_config(self) -> dict:
         return self.model.config.__dict__
 
 
+# class QEFFAutoModelForImageTextToText(metaclass=NoInitMeta):
 class QEFFAutoModelForImageTextToText:
     """
     The QEFFAutoModelForImageTextToText class is used to work with multimodal language models from the HuggingFace hub.
@@ -1277,10 +1288,16 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
         return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
 
+        # # Bypass __call__ and manually initialize
+        # instance = object.__new__(cls)
+        # instance.__init__(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
+        # return instance
+
 
 MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText}
 
 
+# class QEFFAutoModelForCausalLM(QEFFBaseModel, metaclass=NoInitMeta):
 class QEFFAutoModelForCausalLM(QEFFBaseModel):
     """
     The QEFF class is designed for manipulating any causal language model from the HuggingFace hub.
diff --git a/QEfficient/utils/__init__.py b/QEfficient/utils/__init__.py
@@ -11,8 +11,12 @@
 )
 from QEfficient.utils._utils import (  # noqa: F401
     check_and_assign_cache_dir,
+    create_json,
     custom_format_warning,
+    dump_model_params,
     dump_qconfig,
+    filter_and_hash_compile_params,
+    filter_and_hash_export_params,
     get_num_layers_from_config,
     get_num_layers_vlm,
     get_onnx_dir_name,
diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
@@ -11,6 +11,7 @@
 import subprocess
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import requests
@@ -25,6 +26,7 @@
     PreTrainedTokenizerFast,
 )
 
+from QEfficient.utils.cache import QEFF_HOME, hash_dict_params
 from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants, QnnConstants
 from QEfficient.utils.logging_utils import logger
 
@@ -630,6 +632,43 @@ def wrapper(self, *args, **kwargs):
     return wrapper
 
 
+def dump_model_params(func):
+    def wrapper(self, *args, **kwargs):
+        # Bind args to their parameter names
+        sig = inspect.signature(func)
+        bound_args = sig.bind(self, *args, **kwargs)
+        bound_args.apply_defaults()
+
+        # Convert bound arguments to a dictionary and exclude 'self'
+        all_kwargs = {k: v for k, v in bound_args.arguments.items() if k != "self"}
+
+        export_dir = Path(kwargs["export_dir"] or (QEFF_HOME / self.model_architecture / self.model_name))
+        try:
+            filter_and_hash_export_params(
+                self.model_params,
+                **{k: v for k, v in all_kwargs.items() if k not in ["example_inputs"]},
+            )
+
+            export_hash = hash_dict_params(self.model_params)
+            export_hash = export_hash.hexdigest()[:16]
+            export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
+
+            os.makedirs(export_dir, exist_ok=True)
+
+            hashed_params_file_path = os.path.join(export_dir, "hashed_model_params.json")
+            create_json(hashed_params_file_path, self.model_params)
+
+            logger.info("Parameters used for export hash dumped in a JSON file successfully")
+        except Exception as e:
+            logger.error(f"An unexpected error occurred while dumping the hashed model params: {e}")
+
+        result = func(self, *args, **kwargs)
+
+        return result
+
+    return wrapper
+
+
 def get_qaic_sdk_version(qaic_sdk_xml_path: str) -> Optional[str]:
     """
     Extracts the QAIC SDK version from the given SDK XML file.
@@ -724,6 +763,50 @@ def create_and_dump_qconfigs(
     create_json(qconfig_file_path, qconfigs)
 
 
+def filter_and_hash_export_params(**kwargs):
+    """
+    This Method prepares all the model params required to create the hash for export directory.
+    """
+    filtered_params = kwargs["model_params"]
+    export_params = {}
+    export_params["output_names"] = kwargs.get("output_names")
+    export_params["dynamic_axes"] = kwargs.get("dynamic_axes")
+
+    filtered_params["export_params"] = export_params
+
+    export_kwargs = kwargs.get("export_kwargs")
+    if export_kwargs:
+        filtered_params.update(export_kwargs)
+
+    onnx_transform_kwargs = kwargs.get("onnx_transform_kwargs")
+    if onnx_transform_kwargs:
+        filtered_params.update(onnx_transform_kwargs)
+
+    return hash_dict_params(filtered_params), filtered_params
+
+
+def filter_and_hash_compile_params(**kwargs):
+    """
+    This Method creates the hash for qpc directory.
+    """
+    filtered_params = {}
+    filtered_params["command"] = kwargs["command"]
+
+    if kwargs.get("specializations", None):
+        filtered_params["specializations"] = kwargs["specializations"]
+
+    if kwargs.get("custom_io", None):
+        filtered_params["custom_io"] = kwargs["custom_io"]
+
+    if kwargs.get("num_speculative_tokens", None):
+        filtered_params["num_speculative_tokens"] = kwargs["num_speculative_tokens"]
+
+    if kwargs.get("mdp_ts_num_devices", None):
+        filtered_params["mdp_ts_num_devices"] = kwargs["mdp_ts_num_devices"]
+
+    return hash_dict_params(filtered_params), filtered_params
+
+
 def filter_kwargs(func, kwargs):
     """
     Filter a dictionary of keyword arguments to only include the valid arguments of a function.
diff --git a/QEfficient/utils/cache.py b/QEfficient/utils/cache.py
@@ -11,6 +11,8 @@
 from pathlib import Path
 from typing import Dict
 
+from QEfficient.utils.constants import HASH_HEXDIGEST_STR_LEN
+
 QEFF_HOME: Path = None
 if "QEFF_HOME" in os.environ:
     QEFF_HOME = Path(os.environ["QEFF_HOME"])
@@ -43,9 +45,9 @@ def to_hashable(obj) -> bytes:
     ).encode()
 
 
-def hash_dict_params(dict_items: Dict):
+def hash_dict_params(dict_items: Dict, hash_string_size: int = HASH_HEXDIGEST_STR_LEN):
     """
     Takes a dictionary of items and returns a SHA256 hash object
     """
     mhash = hashlib.sha256(to_hashable(dict_items))
-    return mhash
+    return mhash.hexdigest()[:hash_string_size]
diff --git a/QEfficient/utils/constants.py b/QEfficient/utils/constants.py
@@ -25,6 +25,8 @@
 ONNX_EXPORT_IMAGE_DEPTH = 3
 ONNX_EXPORT_CTX_LEN = 1024
 
+HASH_HEXDIGEST_STR_LEN = 16
+
 
 # Store the qeff_models inside the ~/.cache directory or over-ride with an env variable.
 def get_models_dir():