From 47aec99c00a4f08862c20a6923fba19aa6915141 Mon Sep 17 00:00:00 2001 From: Anirudh Swaminathan Date: Mon, 6 Apr 2026 09:37:40 -0700 Subject: [PATCH] =?UTF-8?q?Intel=C2=AE=20NNCF=20Compression=20detect=20ONN?= =?UTF-8?q?X=20external=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add change to auto-detect ONNX external data files in NNCF compression pass. If an ONNX model is locally sourced, the ONNXModelHandler points to just the .onnx file. This means that the EXTERNAL_DATA_DIR isn't set in AdvancedCompressionParameters, leading to NNCF failing to find the model weights. This commit fixes that and allows auto-detecting the presence of an external data file, and will pass this path to NNCF AdvancedCompressionParameters if not already set manually with local sourcing or if chained with another pass that produces an ONNX model with external data - [x] Add unit tests for this change. - [x] Make sure all tests can pass. - [ ] Update documents if necessary. - [x] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- olive/passes/openvino/compression.py | 37 +++++++++-- olive/passes/openvino/ov_utils.py | 58 ++++++++++++++++ .../openvino/test_openvino_compression.py | 66 +++++++++++++++++++ 3 files changed, 156 insertions(+), 5 deletions(-) diff --git a/olive/passes/openvino/compression.py b/olive/passes/openvino/compression.py index 93cfc40a9e..c751e7f2cd 100644 --- a/olive/passes/openvino/compression.py +++ b/olive/passes/openvino/compression.py @@ -21,6 +21,7 @@ _validate_enum_value, create_genai_config, infer_library_name, + model_graph_uses_external_data, ) from olive.passes.pass_config import BasePassConfig, ParamCategory, PassConfigParam, get_user_script_data_config @@ -445,6 +446,7 @@ def _apply_compression( config: type[BasePassConfig], output_model_path: str, tokenizer: Optional[Any] = None, + model_source_dir: Optional[str] = None, ) -> Any: """Apply NNCF weight compression to a model. @@ -453,6 +455,9 @@ def _apply_compression( config: The pass configuration. output_model_path: Path where the output model will be saved. tokenizer: Optional tokenizer for dataset transform (used in HF path). + model_source_dir: Optional directory where the source ONNX model and its external data + files (e.g., model.onnx.data) reside. Required when the model is loaded from cache + and CWD != model directory. Returns: The compressed model object from nncf.compress_weights(). @@ -497,6 +502,19 @@ def _apply_compression( advanced_params = nncf.AdvancedCompressionParameters(**adv_par) + # For ONNX models with external data (e.g., model.onnx.data): when a model is loaded from + # cache, CWD != model directory, so NNCF cannot find the external data files using its + # default Path.cwd() lookup. Set EXTERNAL_DATA_DIR to the model's source directory. + if model_source_dir is not None: + if advanced_params is None: + adv_par = {} + adv_par["backend_params"] = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir} + advanced_params = nncf.AdvancedCompressionParameters(**adv_par) + elif not hasattr(advanced_params, "backend_params") or advanced_params.backend_params is None: + advanced_params.backend_params = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir} + elif BackendParameters.EXTERNAL_DATA_DIR not in advanced_params.backend_params: + advanced_params.backend_params[BackendParameters.EXTERNAL_DATA_DIR] = model_source_dir + # perform weight compression return nncf.compress_weights( model_to_compress, dataset=compression_dataset, advanced_parameters=advanced_params, **compress_config @@ -733,19 +751,28 @@ def _run_onnx_pass( if loaded_model.opset_import[0].version != target_opset: loaded_model = onnx.version_converter.convert_version(loaded_model, target_opset) + # Detect external data from model metadata. + # When loading from cache, CWD differs from the model directory, so NNCF needs the explicit + # source directory if any tensor is marked with external data. + model_source_dir = None + if model_graph_uses_external_data(loaded_model.graph): + model_source_dir = str(Path(model.model_path).parent.resolve()) + # perform weight compression using shared compression logic - output_model = self._apply_compression(loaded_model, config, output_model_path) + output_model = self._apply_compression( + loaded_model, config, output_model_path, model_source_dir=model_source_dir + ) # save to output_model_path model_name = Path(model.model_path).name.replace(".onnx", "_compressed.onnx") - model_dir = Path(output_model_path) + output_dir = Path(output_model_path) - if Path(output_model_path).is_dir(): - output_model_path = Path(output_model_path) / model_name + if output_dir.is_dir(): + output_model_path = output_dir / model_name onnx.save(output_model, output_model_path, save_as_external_data=True) # generate the genai_config.json file for GenAI ONNX models - create_genai_config(model_name, model_dir, config) + create_genai_config(model_name, output_dir, config) return ONNXModelHandler(model_path=output_model_path) diff --git a/olive/passes/openvino/ov_utils.py b/olive/passes/openvino/ov_utils.py index 9b9f4db9d8..952f27ed08 100644 --- a/olive/passes/openvino/ov_utils.py +++ b/olive/passes/openvino/ov_utils.py @@ -368,3 +368,61 @@ def create_genai_config(model_name: str, output_path: str, config: BasePassConfi output_genai_config = Path(output_path) / "genai_config.json" with open(output_genai_config, "w") as f: json.dump(genai_config, f, indent=4) + + +def model_graph_uses_external_data(graph) -> bool: + """Return True when any tensor in an ONNX graph (or its subgraphs) is stored as external data.""" + try: + import onnx + except ImportError: + raise ImportError("Please install onnx to check for external data in ONNX models") from None + + for tensor in graph.initializer: + if onnx.external_data_helper.uses_external_data(tensor): + logger.debug("Model uses external data due to initializer: %s", tensor.name) + return True + + for sparse_initializer in graph.sparse_initializer: + if onnx.external_data_helper.uses_external_data(sparse_initializer.values): + logger.debug("Model uses external data due to sparse initializer: %s", sparse_initializer.values.name) + return True + + for node in graph.node: + for attribute in node.attribute: + if attribute.type == onnx.AttributeProto.TENSOR and onnx.external_data_helper.uses_external_data( + attribute.t + ): + logger.debug( + "Model uses external data due to node attribute tensor: %s in node %s", + attribute.t.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.TENSORS: + for tensor in attribute.tensors: + if onnx.external_data_helper.uses_external_data(tensor): + logger.debug( + "Model uses external data due to node attribute tensors: %s in node %s", + tensor.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.GRAPH and model_graph_uses_external_data(attribute.g): + logger.debug( + "Model uses external data due to subgraph in node attribute: %s in node %s", + attribute.g.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.GRAPHS: + for subgraph in attribute.graphs: + if model_graph_uses_external_data(subgraph): + logger.debug( + "Model uses external data due to subgraph in node attribute: %s in node %s", + subgraph.name, + node.name, + ) + return True + + logger.debug("No external data found in the model.") + return False diff --git a/test/passes/openvino/test_openvino_compression.py b/test/passes/openvino/test_openvino_compression.py index 51a7d86647..0014b2c2fc 100644 --- a/test/passes/openvino/test_openvino_compression.py +++ b/test/passes/openvino/test_openvino_compression.py @@ -382,6 +382,72 @@ def test_openvino_weight_compression_onnx_to_onnx_multi_ignore_scope(tmp_path): shutil.rmtree(q_dir) +def test_openvino_weight_compression_onnx_to_onnx_auto_detect_external_data(tmp_path): + import numpy as np + import onnx + from nncf.parameters import CompressWeightsMode + from nncf.quantization.advanced_parameters import GroupSizeFallbackMode + + from olive.model.handler.onnx import ONNXModelHandler + + # sample ONNX model with external data creation + input_shape = [1, 64] + weight_shape = [64, 128] + weight_data = np.random.randn(*weight_shape).astype(np.float32) + + input_vi = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, input_shape) + output_vi = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 128]) + # Use numpy_helper.from_array so data is stored as raw_data bytes, + # which onnx.save will externalize (make_tensor uses float_data repeated field and won't). + weight_init = onnx.numpy_helper.from_array(weight_data, name="weight") + matmul_node = onnx.helper.make_node("MatMul", inputs=["input", "weight"], outputs=["output"], name="MatMul_0") + + graph = onnx.helper.make_graph( + nodes=[matmul_node], + name="external-data-test", + inputs=[input_vi], + outputs=[output_vi], + initializer=[weight_init], + ) + model_def = onnx.helper.make_model(graph, producer_name="olive-test") + model_def.opset_import[0].version = 21 + + # save generated ONNX model with external data + model_dir = tmp_path / "source_model" + model_dir.mkdir() + model_path = model_dir / "model.onnx" + onnx.save(model_def, str(model_path), save_as_external_data=True, location="model.onnx.data") + + # verify external data file creation + assert (model_dir / "model.onnx.data").exists() + + input_onnx_model = ONNXModelHandler(model_path=str(model_path)) + + # NNCF compression pass + openvino_weight_compression_config = { + "compress_config": {"mode": CompressWeightsMode.INT4_SYM, "ratio": 1.0, "all_layers": True}, + "extra_args": { + "use_onnx": True, + "advanced_compression_parameters": { + "group_size_fallback_mode": GroupSizeFallbackMode.IGNORE, + }, + }, + } + p = create_pass_from_dict( + OpenVINOWeightCompression, + openvino_weight_compression_config, + disable_search=True, + accelerator_spec=AcceleratorSpec("cpu", "OpenVINOExecutionProvider"), + ) + output_folder = str(tmp_path / "openvino_wc_output") + compressed_model = p.run(input_onnx_model, output_folder) + + # test if the model file is created + assert Path(compressed_model.model_path).exists() + assert Path(compressed_model.model_path).is_file() + assert Path(compressed_model.model_path.replace(".onnx", ".onnx.data")).exists() + + @pytest.mark.skipif( not package_version_at_least("optimum", "2.1.0"), reason="Requires optimum >= 2.1.0",