Skip to content

Changing the hashing methodology for cache folder creation of models. #481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1d2afe4
Detaching hash function for model cache path calculation. changes for…
quic-dhirajku Jun 24, 2025
c07a619
BugFix: Fix reshape error for llama swiftkv models (#432)
quic-shagun Jun 25, 2025
efa32b8
Gemma 3 minor fixes (#476)
quic-akuruvil Jun 25, 2025
e925939
Bug fix for spdTransform (#467)
qcdipankar Jun 27, 2025
01b0600
[QEff. Finetune]: Enabled FT CI tests. (#420)
quic-meetkuma Jul 1, 2025
ed45ea5
Gemma 3 minor fixes (#476) - CPR (#484)
quic-akuruvil Jul 1, 2025
66e3859
Revert "Gemma 3 minor fixes (#476) - CPR" (#485)
quic-hemagnih Jul 1, 2025
780ca86
[Docs/Readme]: Main Readme updating for latest news and adding the on…
abukhoy Jul 2, 2025
5dd6147
QUICKFIX: Removed the redundant breakpoint comment in modeling_llava_…
quic-dhirajku Jul 3, 2025
2a4f02c
MDP hash support (#479)
quic-rishinr Jul 3, 2025
1453fcd
[QEff Finetune] Adding dataset padding changes (#478)
quic-swatia Jul 4, 2025
6336bca
Fixed QNN data format config issue. (#480)
shubhagr-qc Jul 7, 2025
52dc6f3
Corrected Total Inference Time unit (#505)
asmigosw Jul 9, 2025
7138e3b
[QEff. Finetune]: Added support to sync gradients across devices duri…
quic-meetkuma Jul 9, 2025
e77444f
[QEff Finetune]: Implement logger for finetuning and enable dumping (…
quic-mamta Jul 9, 2025
e61ca38
Adding Fix for Falcon model (#508)
qcdipankar Jul 10, 2025
17b24c7
[QEff. Finetune]: Removed samsum dataset references from FT code. (#482)
quic-meetkuma Jul 10, 2025
e3f5ab4
Dynamic cache support on llama4 (#494)
quic-rishinr Jul 13, 2025
bf63b17
Dependency package upgrade (#407)
qcdipankar Jul 14, 2025
57b918f
[QEff Finetune] : fix task_type variable in configs (#514)
quic-mamta Jul 14, 2025
908ab65
Incorporated changes suggested in comments
quic-dhirajku Jun 27, 2025
6f99b2c
Edited a comment on compile params dump
quic-dhirajku Jun 27, 2025
bd419b3
Modifications made based on Rishin's suggestion. WIP
quic-dhirajku Jul 15, 2025
a2606f1
Modifications to the flow of hash creation and filtration of params f…
quic-dhirajku Jul 16, 2025
78b7950
Clean-up post rebase was done.
quic-dhirajku Jul 16, 2025
f401f0a
commit for Linter issues
quic-dhirajku Jul 16, 2025
f5e8f8c
Removed partial changes done for Metaclass utilization to enforce fro…
quic-dhirajku Jul 16, 2025
7e1df0a
Made changes to incorporate PEFT model configs and addressed the comm…
quic-dhirajku Aug 4, 2025
c5bed92
Updated path to import 'to_hashable' method, as we have 'hash_utils' …
quic-dhirajku Aug 4, 2025
6eabbeb
edited 'QEffAutoModelForCausalLM' to store class name in 'hash_params…
quic-dhirajku Aug 5, 2025
46f5ffd
modified the way 'model_architecture' is stored so that we don't run …
quic-dhirajku Aug 5, 2025
133c076
Updated the test scripts with changes required for appropriate testing
quic-dhirajku Aug 5, 2025
6427fa6
Updated tests to account for the new hashing changes.
quic-dhirajku Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 82 additions & 64 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
#
# ----------------------------------------------------------------------------

import hashlib
import copy
import inspect
import json
import logging
import shutil
import subprocess
Expand All @@ -23,8 +22,16 @@
from QEfficient.base.pytorch_transforms import PytorchTransform
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants, dump_qconfig
from QEfficient.utils.cache import QEFF_HOME, to_hashable
from QEfficient.utils import (
constants,
create_json,
dump_qconfig,
filter_and_create_export_hash,
generate_mdp_partition_config,
hash_compile_params,
load_json,
)
from QEfficient.utils.cache import QEFF_HOME

logger = logging.getLogger(__name__)

Expand All @@ -46,12 +53,18 @@ class QEFFBaseModel(ABC):
def _transform_names(cls) -> List[str]:
return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]

def __init__(self, model: torch.nn.Module) -> None:
def __init__(self, model: torch.nn.Module, **kwargs) -> None:
super().__init__()
self.model = model
self.hash_params = self.create_model_params(**kwargs)

self.onnx_path: Optional[str] = None
self.qpc_path: Optional[str] = None
self.qpc_session: Optional[QAICInferenceSession] = None
self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
if hasattr(self.model.config, "architectures"):
model_architecture = getattr(self.model.config, "architectures", None)
self.model_architecture = model_architecture[0] if isinstance(model_architecture, list) else None

# Apply the transformations
any_transformed = False
Expand All @@ -64,13 +77,16 @@ def __init__(self, model: torch.nn.Module) -> None:
else:
logger.info(f"Pytorch transforms applied to model: {self.model_name}")

@property
@abstractmethod
def model_name(self) -> str: ...
def create_model_params(self, **kwargs) -> Dict:
model_params = copy.deepcopy(kwargs)
model_params["config"] = self.model.config.to_diff_dict()
model_params["peft_config"] = getattr(self.model, "active_peft_config", None)
model_params["applied_transform_names"] = self._transform_names()
return model_params

@property
@abstractmethod
def model_hash(self) -> str: ...
def model_name(self) -> str: ...

@abstractmethod
def export(self, export_dir: Optional[str] = None) -> Path:
Expand Down Expand Up @@ -135,8 +151,17 @@ def _export(
:onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
:export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
"""
export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
parent_dir = self.model_architecture or self.model_name
export_dir = Path(export_dir or (QEFF_HOME / parent_dir / self.model_name))
export_hash, filtered_hash_params = filter_and_create_export_hash(
model_params=self.hash_params,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_kwargs=export_kwargs,
onnx_transform_kwargs=onnx_transform_kwargs,
)
self.export_hash = export_hash
export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
onnx_path = export_dir / f"{self.model_name}.onnx"
if onnx_path.is_file():
self.onnx_path = onnx_path
Expand Down Expand Up @@ -211,6 +236,11 @@ def _export(
finally:
shutil.rmtree(tmp_onnx_dir, ignore_errors=True)

# Dump JSON file with hashed parameters
hashed_params_export_path = export_dir / "hashed_export_params.json"
create_json(hashed_params_export_path, filtered_hash_params)
logger.info("Hashed parameters exported successfully.")

self.onnx_path = onnx_path
return onnx_path

Expand Down Expand Up @@ -241,12 +271,10 @@ def _compile(
:mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
:num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
:compiler_options: Pass any compiler option as input.
Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
:compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
- aic_num_cores=16 -> -aic-num-cores=16
- convert_to_fp16=True -> -convert-to-fp16
For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored.
"""
if onnx_path is None and self.onnx_path is None:
self.export()
Expand All @@ -258,28 +286,23 @@ def _compile(
raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")

if enable_qnn:
if compiler_options:
logger.warning(
f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}"
)

self.qpc_path = qnn_compile(
onnx_path=onnx_path,
qpc_base_path=compile_dir,
specializations=specializations,
custom_io=custom_io,
device_group=list(range(mdp_ts_num_devices)),
num_cores=compiler_options.get("aic_num_cores", 16),
mxfp6=compiler_options.get("mxfp6_matmul", False),
num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
mxint8=mxint8_kv_cache,
qnn_config=qnn_config,
)

return self.qpc_path

command = constants.COMPILER + [f"-m={onnx_path}"]
if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
mdp_ts_num_devices = None

if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

for key, value in compiler_options.items():
Expand All @@ -289,40 +312,50 @@ def _compile(
command.append(option)
continue
command.append(f"{option}={value}")
compile_hash = hashlib.sha256(to_hashable(command))

if specializations is not None:
compile_hash.update(to_hashable(specializations))

if custom_io is not None:
compile_hash.update(to_hashable(custom_io))

if num_speculative_tokens:
compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
# Hash num_devices too, since default value would always be 1.
compile_hash.update(to_hashable(mdp_ts_num_devices))

# Check if already compiled
compile_hash = compile_hash.hexdigest()[:16]
# Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
if mdp_ts_json_path is not None:
mdp_ts_json = load_json(str(mdp_ts_json_path))
elif mdp_ts_num_devices > 1:
mdp_ts_json = generate_mdp_partition_config(
mdp_ts_num_devices, compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES)
)
else:
mdp_ts_json = None

compile_hash, hashed_params = hash_compile_params(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove the above comment as its not needed here " # Check if already compiled"

command=command,
specializations=specializations,
custom_io=custom_io,
mdp_ts_num_devices=mdp_ts_num_devices,
mdp_ts_json=mdp_ts_json,
num_speculative_tokens=num_speculative_tokens,
)
compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)

qpc_path = compile_dir / "qpc"
qpc_path.mkdir(parents=True, exist_ok=True)

if qpc_path.is_dir():
if (qpc_path / "programqpc.bin").is_file():
self.qpc_path = qpc_path
return qpc_path
# Probably compilation failure last time, delete directory to start over
shutil.rmtree(qpc_path)

# write the MDP partition config file if not provided
if mdp_ts_json is not None:
mdp_ts_json_path = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
create_json(str(mdp_ts_json_path), mdp_ts_json)
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

# Write specializations.json file
if specializations is not None:
specializations_json = compile_dir / "specializations.json"
with open(specializations_json, "w") as fp:
json.dump(
{"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
fp,
indent=4,
)
specializations_data = {
"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]
}
create_json(str(specializations_json), specializations_data)
command.append(f"-network-specialization-config={specializations_json}")

# Write custom_io.yaml file
Expand All @@ -333,30 +366,11 @@ def _compile(
fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n")
command.append(f"-custom-IO-list-file={custom_io_yaml}")

# Write mdp_config.json file
if not mdp_ts_json_path and mdp_ts_num_devices > 1:
num_cores = compiler_options.get("aic_num_cores", 16)
mdp_ts_json = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
with open(mdp_ts_json, "w") as fp:
json.dump(
{
"connections": [{"devices": list(range(mdp_ts_num_devices)), "type": "p2p"}],
"partitions": [
{
"name": "Partition0",
"devices": [{"deviceId": d, "numCores": num_cores} for d in range(mdp_ts_num_devices)],
}
],
},
fp,
indent=4,
)
command.append(f"-mdp-load-partition-config={mdp_ts_json}")

command.append(f"-aic-binary-dir={qpc_path}")
logger.info(f"Running compiler: {' '.join(command)}")
try:
subprocess.run(command, capture_output=True, check=True)

except subprocess.CalledProcessError as e:
raise RuntimeError(
"\n".join(
Expand All @@ -370,6 +384,10 @@ def _compile(
)
)

# Dump JSON file with hashed parameters
hashed_compile_params_path = compile_dir / "hashed_compile_params.json"
create_json(hashed_compile_params_path, hashed_params)
logger.info("Hashed parameters exported successfully.")
self.qpc_path = qpc_path

return qpc_path
Loading