diff --git a/olive/passes/openvino/compression.py b/olive/passes/openvino/compression.py index 93cfc40a9e..c751e7f2cd 100644 --- a/olive/passes/openvino/compression.py +++ b/olive/passes/openvino/compression.py @@ -21,6 +21,7 @@ _validate_enum_value, create_genai_config, infer_library_name, + model_graph_uses_external_data, ) from olive.passes.pass_config import BasePassConfig, ParamCategory, PassConfigParam, get_user_script_data_config @@ -445,6 +446,7 @@ def _apply_compression( config: type[BasePassConfig], output_model_path: str, tokenizer: Optional[Any] = None, + model_source_dir: Optional[str] = None, ) -> Any: """Apply NNCF weight compression to a model. @@ -453,6 +455,9 @@ def _apply_compression( config: The pass configuration. output_model_path: Path where the output model will be saved. tokenizer: Optional tokenizer for dataset transform (used in HF path). + model_source_dir: Optional directory where the source ONNX model and its external data + files (e.g., model.onnx.data) reside. Required when the model is loaded from cache + and CWD != model directory. Returns: The compressed model object from nncf.compress_weights(). @@ -497,6 +502,19 @@ def _apply_compression( advanced_params = nncf.AdvancedCompressionParameters(**adv_par) + # For ONNX models with external data (e.g., model.onnx.data): when a model is loaded from + # cache, CWD != model directory, so NNCF cannot find the external data files using its + # default Path.cwd() lookup. Set EXTERNAL_DATA_DIR to the model's source directory. + if model_source_dir is not None: + if advanced_params is None: + adv_par = {} + adv_par["backend_params"] = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir} + advanced_params = nncf.AdvancedCompressionParameters(**adv_par) + elif not hasattr(advanced_params, "backend_params") or advanced_params.backend_params is None: + advanced_params.backend_params = {BackendParameters.EXTERNAL_DATA_DIR: model_source_dir} + elif BackendParameters.EXTERNAL_DATA_DIR not in advanced_params.backend_params: + advanced_params.backend_params[BackendParameters.EXTERNAL_DATA_DIR] = model_source_dir + # perform weight compression return nncf.compress_weights( model_to_compress, dataset=compression_dataset, advanced_parameters=advanced_params, **compress_config @@ -733,19 +751,28 @@ def _run_onnx_pass( if loaded_model.opset_import[0].version != target_opset: loaded_model = onnx.version_converter.convert_version(loaded_model, target_opset) + # Detect external data from model metadata. + # When loading from cache, CWD differs from the model directory, so NNCF needs the explicit + # source directory if any tensor is marked with external data. + model_source_dir = None + if model_graph_uses_external_data(loaded_model.graph): + model_source_dir = str(Path(model.model_path).parent.resolve()) + # perform weight compression using shared compression logic - output_model = self._apply_compression(loaded_model, config, output_model_path) + output_model = self._apply_compression( + loaded_model, config, output_model_path, model_source_dir=model_source_dir + ) # save to output_model_path model_name = Path(model.model_path).name.replace(".onnx", "_compressed.onnx") - model_dir = Path(output_model_path) + output_dir = Path(output_model_path) - if Path(output_model_path).is_dir(): - output_model_path = Path(output_model_path) / model_name + if output_dir.is_dir(): + output_model_path = output_dir / model_name onnx.save(output_model, output_model_path, save_as_external_data=True) # generate the genai_config.json file for GenAI ONNX models - create_genai_config(model_name, model_dir, config) + create_genai_config(model_name, output_dir, config) return ONNXModelHandler(model_path=output_model_path) diff --git a/olive/passes/openvino/ov_utils.py b/olive/passes/openvino/ov_utils.py index 9b9f4db9d8..952f27ed08 100644 --- a/olive/passes/openvino/ov_utils.py +++ b/olive/passes/openvino/ov_utils.py @@ -368,3 +368,61 @@ def create_genai_config(model_name: str, output_path: str, config: BasePassConfi output_genai_config = Path(output_path) / "genai_config.json" with open(output_genai_config, "w") as f: json.dump(genai_config, f, indent=4) + + +def model_graph_uses_external_data(graph) -> bool: + """Return True when any tensor in an ONNX graph (or its subgraphs) is stored as external data.""" + try: + import onnx + except ImportError: + raise ImportError("Please install onnx to check for external data in ONNX models") from None + + for tensor in graph.initializer: + if onnx.external_data_helper.uses_external_data(tensor): + logger.debug("Model uses external data due to initializer: %s", tensor.name) + return True + + for sparse_initializer in graph.sparse_initializer: + if onnx.external_data_helper.uses_external_data(sparse_initializer.values): + logger.debug("Model uses external data due to sparse initializer: %s", sparse_initializer.values.name) + return True + + for node in graph.node: + for attribute in node.attribute: + if attribute.type == onnx.AttributeProto.TENSOR and onnx.external_data_helper.uses_external_data( + attribute.t + ): + logger.debug( + "Model uses external data due to node attribute tensor: %s in node %s", + attribute.t.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.TENSORS: + for tensor in attribute.tensors: + if onnx.external_data_helper.uses_external_data(tensor): + logger.debug( + "Model uses external data due to node attribute tensors: %s in node %s", + tensor.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.GRAPH and model_graph_uses_external_data(attribute.g): + logger.debug( + "Model uses external data due to subgraph in node attribute: %s in node %s", + attribute.g.name, + node.name, + ) + return True + if attribute.type == onnx.AttributeProto.GRAPHS: + for subgraph in attribute.graphs: + if model_graph_uses_external_data(subgraph): + logger.debug( + "Model uses external data due to subgraph in node attribute: %s in node %s", + subgraph.name, + node.name, + ) + return True + + logger.debug("No external data found in the model.") + return False diff --git a/test/passes/openvino/test_openvino_compression.py b/test/passes/openvino/test_openvino_compression.py index 51a7d86647..0014b2c2fc 100644 --- a/test/passes/openvino/test_openvino_compression.py +++ b/test/passes/openvino/test_openvino_compression.py @@ -382,6 +382,72 @@ def test_openvino_weight_compression_onnx_to_onnx_multi_ignore_scope(tmp_path): shutil.rmtree(q_dir) +def test_openvino_weight_compression_onnx_to_onnx_auto_detect_external_data(tmp_path): + import numpy as np + import onnx + from nncf.parameters import CompressWeightsMode + from nncf.quantization.advanced_parameters import GroupSizeFallbackMode + + from olive.model.handler.onnx import ONNXModelHandler + + # sample ONNX model with external data creation + input_shape = [1, 64] + weight_shape = [64, 128] + weight_data = np.random.randn(*weight_shape).astype(np.float32) + + input_vi = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, input_shape) + output_vi = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 128]) + # Use numpy_helper.from_array so data is stored as raw_data bytes, + # which onnx.save will externalize (make_tensor uses float_data repeated field and won't). + weight_init = onnx.numpy_helper.from_array(weight_data, name="weight") + matmul_node = onnx.helper.make_node("MatMul", inputs=["input", "weight"], outputs=["output"], name="MatMul_0") + + graph = onnx.helper.make_graph( + nodes=[matmul_node], + name="external-data-test", + inputs=[input_vi], + outputs=[output_vi], + initializer=[weight_init], + ) + model_def = onnx.helper.make_model(graph, producer_name="olive-test") + model_def.opset_import[0].version = 21 + + # save generated ONNX model with external data + model_dir = tmp_path / "source_model" + model_dir.mkdir() + model_path = model_dir / "model.onnx" + onnx.save(model_def, str(model_path), save_as_external_data=True, location="model.onnx.data") + + # verify external data file creation + assert (model_dir / "model.onnx.data").exists() + + input_onnx_model = ONNXModelHandler(model_path=str(model_path)) + + # NNCF compression pass + openvino_weight_compression_config = { + "compress_config": {"mode": CompressWeightsMode.INT4_SYM, "ratio": 1.0, "all_layers": True}, + "extra_args": { + "use_onnx": True, + "advanced_compression_parameters": { + "group_size_fallback_mode": GroupSizeFallbackMode.IGNORE, + }, + }, + } + p = create_pass_from_dict( + OpenVINOWeightCompression, + openvino_weight_compression_config, + disable_search=True, + accelerator_spec=AcceleratorSpec("cpu", "OpenVINOExecutionProvider"), + ) + output_folder = str(tmp_path / "openvino_wc_output") + compressed_model = p.run(input_onnx_model, output_folder) + + # test if the model file is created + assert Path(compressed_model.model_path).exists() + assert Path(compressed_model.model_path).is_file() + assert Path(compressed_model.model_path.replace(".onnx", ".onnx.data")).exists() + + @pytest.mark.skipif( not package_version_at_least("optimum", "2.1.0"), reason="Requires optimum >= 2.1.0",