Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions olive/passes/openvino/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
_validate_enum_value,
create_genai_config,
infer_library_name,
model_graph_uses_external_data,
)
from olive.passes.pass_config import BasePassConfig, ParamCategory, PassConfigParam, get_user_script_data_config

Expand Down Expand Up @@ -445,6 +446,7 @@ def _apply_compression(
config: type[BasePassConfig],
output_model_path: str,
tokenizer: Optional[Any] = None,
model_source_dir: Optional[str] = None,
) -> Any:
"""Apply NNCF weight compression to a model.

Expand All @@ -453,6 +455,9 @@ def _apply_compression(
config: The pass configuration.
output_model_path: Path where the output model will be saved.
tokenizer: Optional tokenizer for dataset transform (used in HF path).
model_source_dir: Optional directory where the source ONNX model and its external data
files (e.g., model.onnx.data) reside. Required when the model is loaded from cache
and CWD != model directory.

Returns:
The compressed model object from nncf.compress_weights().
Expand Down Expand Up @@ -497,6 +502,19 @@ def _apply_compression(

advanced_params = nncf.AdvancedCompressionParameters(**adv_par)

# For ONNX models with external data (e.g., model.onnx.data): when a model is loaded from
# cache, CWD != model directory, so NNCF cannot find the external data files using its
# default Path.cwd() lookup. Set EXTERNAL_DATA_DIR to the model's source directory.
if model_source_dir is not None:
if advanced_params is None:
adv_par = {}
adv_par["backend_params"] = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir}
advanced_params = nncf.AdvancedCompressionParameters(**adv_par)
elif not hasattr(advanced_params, "backend_params") or advanced_params.backend_params is None:
advanced_params.backend_params = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir}
elif BackendParameters.EXTERNAL_DATA_DIR not in advanced_params.backend_params:
advanced_params.backend_params[BackendParameters.EXTERNAL_DATA_DIR] = model_source_dir

# perform weight compression
return nncf.compress_weights(
model_to_compress, dataset=compression_dataset, advanced_parameters=advanced_params, **compress_config
Expand Down Expand Up @@ -733,19 +751,28 @@ def _run_onnx_pass(
if loaded_model.opset_import[0].version != target_opset:
loaded_model = onnx.version_converter.convert_version(loaded_model, target_opset)

# Detect external data from model metadata.
# When loading from cache, CWD differs from the model directory, so NNCF needs the explicit
# source directory if any tensor is marked with external data.
model_source_dir = None
if model_graph_uses_external_data(loaded_model.graph):
model_source_dir = str(Path(model.model_path).parent.resolve())

# perform weight compression using shared compression logic
output_model = self._apply_compression(loaded_model, config, output_model_path)
output_model = self._apply_compression(
loaded_model, config, output_model_path, model_source_dir=model_source_dir
)

# save to output_model_path
model_name = Path(model.model_path).name.replace(".onnx", "_compressed.onnx")
model_dir = Path(output_model_path)
output_dir = Path(output_model_path)

if Path(output_model_path).is_dir():
output_model_path = Path(output_model_path) / model_name
if output_dir.is_dir():
output_model_path = output_dir / model_name
onnx.save(output_model, output_model_path, save_as_external_data=True)

# generate the genai_config.json file for GenAI ONNX models
create_genai_config(model_name, model_dir, config)
create_genai_config(model_name, output_dir, config)

return ONNXModelHandler(model_path=output_model_path)

Expand Down
58 changes: 58 additions & 0 deletions olive/passes/openvino/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,61 @@ def create_genai_config(model_name: str, output_path: str, config: BasePassConfi
output_genai_config = Path(output_path) / "genai_config.json"
with open(output_genai_config, "w") as f:
json.dump(genai_config, f, indent=4)


def model_graph_uses_external_data(graph) -> bool:
"""Return True when any tensor in an ONNX graph (or its subgraphs) is stored as external data."""
try:
import onnx
except ImportError:
raise ImportError("Please install onnx to check for external data in ONNX models") from None

for tensor in graph.initializer:
if onnx.external_data_helper.uses_external_data(tensor):
logger.debug("Model uses external data due to initializer: %s", tensor.name)
return True

for sparse_initializer in graph.sparse_initializer:
if onnx.external_data_helper.uses_external_data(sparse_initializer.values):
logger.debug("Model uses external data due to sparse initializer: %s", sparse_initializer.values.name)
return True

for node in graph.node:
for attribute in node.attribute:
if attribute.type == onnx.AttributeProto.TENSOR and onnx.external_data_helper.uses_external_data(
attribute.t
):
logger.debug(
"Model uses external data due to node attribute tensor: %s in node %s",
attribute.t.name,
node.name,
)
return True
if attribute.type == onnx.AttributeProto.TENSORS:
for tensor in attribute.tensors:
if onnx.external_data_helper.uses_external_data(tensor):
logger.debug(
"Model uses external data due to node attribute tensors: %s in node %s",
tensor.name,
node.name,
)
return True
if attribute.type == onnx.AttributeProto.GRAPH and model_graph_uses_external_data(attribute.g):
logger.debug(
"Model uses external data due to subgraph in node attribute: %s in node %s",
attribute.g.name,
node.name,
)
return True
if attribute.type == onnx.AttributeProto.GRAPHS:
for subgraph in attribute.graphs:
if model_graph_uses_external_data(subgraph):
logger.debug(
"Model uses external data due to subgraph in node attribute: %s in node %s",
subgraph.name,
node.name,
)
return True

logger.debug("No external data found in the model.")
return False
66 changes: 66 additions & 0 deletions test/passes/openvino/test_openvino_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,72 @@ def test_openvino_weight_compression_onnx_to_onnx_multi_ignore_scope(tmp_path):
shutil.rmtree(q_dir)


def test_openvino_weight_compression_onnx_to_onnx_auto_detect_external_data(tmp_path):
import numpy as np
import onnx
from nncf.parameters import CompressWeightsMode
from nncf.quantization.advanced_parameters import GroupSizeFallbackMode

from olive.model.handler.onnx import ONNXModelHandler

# sample ONNX model with external data creation
input_shape = [1, 64]
weight_shape = [64, 128]
weight_data = np.random.randn(*weight_shape).astype(np.float32)

input_vi = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, input_shape)
output_vi = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 128])
# Use numpy_helper.from_array so data is stored as raw_data bytes,
# which onnx.save will externalize (make_tensor uses float_data repeated field and won't).
weight_init = onnx.numpy_helper.from_array(weight_data, name="weight")
matmul_node = onnx.helper.make_node("MatMul", inputs=["input", "weight"], outputs=["output"], name="MatMul_0")

graph = onnx.helper.make_graph(
nodes=[matmul_node],
name="external-data-test",
inputs=[input_vi],
outputs=[output_vi],
initializer=[weight_init],
)
model_def = onnx.helper.make_model(graph, producer_name="olive-test")
model_def.opset_import[0].version = 21

# save generated ONNX model with external data
model_dir = tmp_path / "source_model"
model_dir.mkdir()
model_path = model_dir / "model.onnx"
onnx.save(model_def, str(model_path), save_as_external_data=True, location="model.onnx.data")

# verify external data file creation
assert (model_dir / "model.onnx.data").exists()

input_onnx_model = ONNXModelHandler(model_path=str(model_path))

# NNCF compression pass
openvino_weight_compression_config = {
"compress_config": {"mode": CompressWeightsMode.INT4_SYM, "ratio": 1.0, "all_layers": True},
"extra_args": {
"use_onnx": True,
"advanced_compression_parameters": {
"group_size_fallback_mode": GroupSizeFallbackMode.IGNORE,
},
},
}
p = create_pass_from_dict(
OpenVINOWeightCompression,
openvino_weight_compression_config,
disable_search=True,
accelerator_spec=AcceleratorSpec("cpu", "OpenVINOExecutionProvider"),
)
output_folder = str(tmp_path / "openvino_wc_output")
compressed_model = p.run(input_onnx_model, output_folder)

# test if the model file is created
assert Path(compressed_model.model_path).exists()
assert Path(compressed_model.model_path).is_file()
assert Path(compressed_model.model_path.replace(".onnx", ".onnx.data")).exists()


@pytest.mark.skipif(
not package_version_at_least("optimum", "2.1.0"),
reason="Requires optimum >= 2.1.0",
Expand Down
Loading