diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst index 6be2798fa3..76bec2fabf 100644 --- a/docs/source/reference/cli.rst +++ b/docs/source/reference/cli.rst @@ -170,6 +170,17 @@ Run benchmarking using llm-eval. :prog: olive :path: benchmark +Generate Model Package +====================== + +Merge multiple model outputs into a model package with manifest and per-component metadata. + +.. argparse:: + :module: olive.cli.launcher + :func: get_cli_parser + :prog: olive + :path: generate-model-package + Providing Input Models ====================== diff --git a/docs/source/reference/pass.rst b/docs/source/reference/pass.rst index fa303cfa4d..f69910440f 100644 --- a/docs/source/reference/pass.rst +++ b/docs/source/reference/pass.rst @@ -176,6 +176,12 @@ EPContextBinaryGenerator ------------------------ .. autoconfigclass:: olive.passes.EPContextBinaryGenerator +.. _model_package: + +ModelPackage +------------ +.. autoconfigclass:: olive.passes.ModelPackage + .. _compose_onnx_models: ComposeOnnxModels diff --git a/olive/cache.py b/olive/cache.py index fe351057b9..d0a89a54ab 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -384,14 +384,60 @@ def save_model( ): """Save a model from the cache to a given path.""" output_dir = Path(output_dir) if output_dir else Path.cwd() - - # If output_dir has a suffix (like .onnx), it's a file path - # Use parent directory for saving files - actual_output_dir = output_dir.parent if output_dir.suffix else output_dir + if output_dir.suffix and not output_dir.is_dir(): + actual_output_dir = output_dir.parent + else: + actual_output_dir = output_dir actual_output_dir.mkdir(parents=True, exist_ok=True) model_json = self.load_model(model_id) - if model_json["type"].lower() == "compositemodel": + if model_json["type"].lower() == "modelpackagemodel": + model_json_config = model_json["config"] + source_path = Path(model_json_config["model_path"]) + actual_output_dir.mkdir(parents=True, exist_ok=True) + + if source_path.exists(): + # Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json. + # Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy. + for item in source_path.iterdir(): + dest = actual_output_dir / item.name + if item.is_dir(): + shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite) + elif item.name == "manifest.json": + shutil.copy2(str(item), str(dest)) + + # Update paths to point to new location + model_json_config["model_path"] = str(actual_output_dir) + + # Update target model paths + for target_model in model_json_config.get("target_models", []): + target_config = target_model.get("config", {}) + old_model_path = target_config.get("model_path", "") + if old_model_path and str(source_path) in old_model_path: + target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir)) + + # Clear additional_files since each target subdir has its own copies + model_attributes = model_json_config.get("model_attributes") or {} + model_attributes.pop("additional_files", None) + + # Update manifest_path + if model_attributes.get("manifest_path"): + model_attributes["manifest_path"] = str( + actual_output_dir / Path(model_attributes["manifest_path"]).name + ) + + # Update manifest name: if pass config set model_name explicitly, keep it; + # otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct") + manifest_file = actual_output_dir / "manifest.json" + if manifest_file.exists(): + manifest = json.loads(manifest_file.read_text()) + # The pass defaults model_name to the cache dir name (not meaningful). + # Replace it with the final output directory name unless it was explicitly configured. + source_dir_name = source_path.name if source_path else None + if not manifest.get("name") or manifest.get("name") == source_dir_name: + manifest["name"] = actual_output_dir.name + manifest_file.write_text(json.dumps(manifest, indent=2)) + elif model_json["type"].lower() == "compositemodel": model_json_config = model_json["config"] model_attributes = model_json_config.get("model_attributes") or {} diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py index fed339f87d..55e6ffdeb4 100644 --- a/olive/cli/launcher.py +++ b/olive/cli/launcher.py @@ -17,6 +17,7 @@ from olive.cli.generate_adapter import GenerateAdapterCommand from olive.cli.generate_cost_model import GenerateCostModelCommand from olive.cli.init import InitCommand +from olive.cli.model_package import ModelPackageCommand from olive.cli.optimize import OptimizeCommand from olive.cli.quantize import QuantizeCommand from olive.cli.run import WorkflowRunCommand @@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser: ConfigureQualcommSDKCommand.register_subcommand(commands_parser) SharedCacheCommand.register_subcommand(commands_parser) ExtractAdaptersCommand.register_subcommand(commands_parser) + ModelPackageCommand.register_subcommand(commands_parser) BenchmarkCommand.register_subcommand(commands_parser) return parser diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py new file mode 100644 index 0000000000..e71decb8e4 --- /dev/null +++ b/olive/cli/model_package.py @@ -0,0 +1,144 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +import logging +from argparse import ArgumentParser +from pathlib import Path +from typing import Any + +from olive.cli.base import ( + BaseOliveCLICommand, + add_logging_options, + add_save_config_file_options, + add_telemetry_options, +) +from olive.telemetry import action + +logger = logging.getLogger(__name__) + + +class ModelPackageCommand(BaseOliveCLICommand): + """Merge multiple model outputs into a model package via the ModelPackage pass.""" + + @staticmethod + def register_subcommand(parser: ArgumentParser): + sub_parser = parser.add_parser( + "generate-model-package", + help="Merge multiple model outputs into a model package with manifest", + ) + + sub_parser.add_argument( + "-s", + "--source", + type=str, + action="append", + required=True, + help="Source Olive output directory. Can be specified multiple times.", + ) + + sub_parser.add_argument( + "-o", + "--output_path", + type=str, + required=True, + help="Output directory for the merged model package.", + ) + + sub_parser.add_argument( + "--model_name", + type=str, + default=None, + help="Model name for the manifest. If not set, derived from the output directory name.", + ) + + sub_parser.add_argument( + "--model_version", + type=str, + default="1.0", + help="Model version string for the manifest. Default: 1.0", + ) + + add_logging_options(sub_parser) + add_save_config_file_options(sub_parser) + add_telemetry_options(sub_parser) + sub_parser.set_defaults(func=ModelPackageCommand) + + def _get_run_config(self, tempdir: str) -> dict[str, Any]: + sources = self._parse_sources() + + target_models = [] + target_names = [] + for target_name, source_path in sources: + model_config = self._read_model_config(source_path) + target_models.append(model_config) + target_names.append(target_name) + + ep, device = self._extract_accelerator_info(target_models) + + return { + "input_model": { + "type": "ModelPackageModel", + "target_models": target_models, + "target_names": target_names, + "model_path": tempdir, + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [{"device": device, "execution_providers": [ep]}], + } + }, + "passes": { + "pkg": { + "type": "ModelPackage", + "model_name": self.args.model_name, + "model_version": self.args.model_version, + } + }, + "output_dir": self.args.output_path, + "host": "local_system", + "target": "local_system", + "log_severity_level": self.args.log_level, + "no_artifacts": True, + } + + @action + def run(self): + return self._run_workflow() + + def _parse_sources(self) -> list[tuple[str, Path]]: + sources = [] + for source in self.args.source: + path = Path(source) + if not path.is_dir(): + raise ValueError(f"Source path does not exist or is not a directory: {path}") + + if not (path / "model_config.json").exists(): + raise ValueError( + f"No model_config.json found in {path}. " + "Source must be an Olive output directory with model_config.json." + ) + + sources.append((path.name, path)) + + if len(sources) < 2: + raise ValueError("At least two --source directories are required to merge.") + + return sources + + @staticmethod + def _read_model_config(source_path: Path) -> dict: + config_path = source_path / "model_config.json" + with open(config_path) as f: + return json.load(f) + + @staticmethod + def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]: + for model_config in target_models: + attrs = model_config.get("config", {}).get("model_attributes") or {} + ep = attrs.get("ep", "CPUExecutionProvider") + device = attrs.get("device", "cpu") + return ep, device.lower() + return "CPUExecutionProvider", "cpu" diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py index 6d94c1407c..db854d808b 100644 --- a/olive/cli/optimize.py +++ b/olive/cli/optimize.py @@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]: "add_zero_point": "true", "save_as_external_data": "true", } - config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"] + config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"] if precision.value == Precision.INT4: config["use_int4"] = "true" return config diff --git a/olive/engine/engine.py b/olive/engine/engine.py index de6b7019a3..5d7cee3f26 100644 --- a/olive/engine/engine.py +++ b/olive/engine/engine.py @@ -195,15 +195,14 @@ def run( self.initialize(log_to_file, log_severity_level) output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve() - if output_dir.suffix: + # Treat as file path only if it has a suffix and is not an existing directory + is_file_path = output_dir.suffix and not output_dir.is_dir() + if is_file_path: output_dir.parent.mkdir(parents=True, exist_ok=True) + artifacts_dir = output_dir.parent else: output_dir.mkdir(parents=True, exist_ok=True) - - # Determine the directory for artifacts (run_history, etc.) - # If output_dir is a file path (has suffix), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + artifacts_dir = output_dir logger.info("Running Olive on accelerator: %s", accelerator_spec) with self._create_system(): @@ -254,10 +253,8 @@ def run_accelerator( self.footprint.record(is_input_model=True, model_id=input_model_id) - # Determine the directory for artifacts - # If output_dir is a file path (has suffix like .onnx), use parent directory - # Otherwise use output_dir itself - artifacts_dir = output_dir.parent if output_dir.suffix else output_dir + # Artifacts directory: file path (has suffix, not existing dir) uses parent + artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir try: if evaluate_input_model and not self.evaluator_config: diff --git a/olive/model/handler/__init__.py b/olive/model/handler/__init__.py index 8f335ac17f..1315817588 100644 --- a/olive/model/handler/__init__.py +++ b/olive/model/handler/__init__.py @@ -6,6 +6,7 @@ from olive.model.handler.composite import CompositeModelHandler from olive.model.handler.diffusers import DiffusersModelHandler from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler from olive.model.handler.openvino import OpenVINOModelHandler from olive.model.handler.pytorch import PyTorchModelHandler @@ -19,6 +20,7 @@ "DistributedHfModelHandler", "DistributedOnnxModelHandler", "HfModelHandler", + "ModelPackageModelHandler", "ONNXModelHandler", "OliveModelHandler", "OpenVINOModelHandler", diff --git a/olive/model/handler/model_package.py b/olive/model/handler/model_package.py new file mode 100644 index 0000000000..39a0bdec15 --- /dev/null +++ b/olive/model/handler/model_package.py @@ -0,0 +1,116 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging +from collections.abc import Iterator +from typing import Any, Optional, Union + +from olive.common.config_utils import serialize_to_json, validate_config +from olive.common.utils import dict_diff +from olive.constants import Framework, ModelFileFormat +from olive.hardware.accelerator import Device +from olive.model.config.model_config import ModelConfig +from olive.model.config.registry import model_handler_registry +from olive.model.handler.base import OliveModelHandler +from olive.resource_path import OLIVE_RESOURCE_ANNOTATIONS + +logger = logging.getLogger(__name__) + + +@model_handler_registry("ModelPackageModel") +class ModelPackageModelHandler(OliveModelHandler): + """Olive handler for the ORT Model Package format. + + Holds model variants for different deployment targets (e.g., SoC, runtime version, + execution provider). The ModelPackage pass consumes this handler to produce the + final packaged output with manifest.json and per-component metadata. + """ + + resource_keys: tuple[str, ...] = ("model_path",) + json_config_keys: tuple[str, ...] = ("target_names",) + + def __init__( + self, + target_models: list[Union[OliveModelHandler, dict[str, Any]]], + target_names: list[str], + model_path: OLIVE_RESOURCE_ANNOTATIONS = None, + model_attributes: Optional[dict[str, Any]] = None, + ): + super().__init__( + model_path=model_path, + framework=Framework.ONNX, + model_file_format=ModelFileFormat.COMPOSITE_MODEL, + model_attributes=model_attributes, + ) + self._target_models = [ + validate_config(m, ModelConfig).create_model() if isinstance(m, dict) else m for m in target_models + ] + assert all(isinstance(m, OliveModelHandler) for m in self._target_models), ( + "All target models must be OliveModelHandler or dict" + ) + assert len(self._target_models) == len(target_names), "Number of target models and names must match" + self.target_names = target_names + + @property + def target_models(self): + for m in self._target_models: + m.model_attributes = {**(self.model_attributes or {}), **(m.model_attributes or {})} + yield m + + def to_json(self, check_object: bool = False): + json_dict = super().to_json(check_object) + json_dict["config"]["target_models"] = [] + for m in self._target_models: + target_json = m.to_json(check_object) + target_json["config"]["model_attributes"] = dict_diff( + target_json["config"]["model_attributes"], self.model_attributes + ) + json_dict["config"]["target_models"].append(target_json) + return serialize_to_json(json_dict, check_object) + + def get_target_models(self) -> Iterator[tuple[str, OliveModelHandler]]: + """Iterate over (target_name, target_model) pairs.""" + return zip(self.target_names, self.target_models) + + @property + def is_composite(self) -> bool: + """Check if the target models are CompositeModelHandlers. + + All targets are expected to be the same type: either all composite or all non-composite. + """ + from olive.model.handler.composite import CompositeModelHandler + + if not self._target_models: + return False + + checks = [isinstance(m, CompositeModelHandler) for m in self._target_models] + assert all(c == checks[0] for c in checks), ( + "All target models must be the same type: either all CompositeModelHandler or all non-composite" + ) + return checks[0] + + def load_model(self, rank: int = None, cache_model: bool = True): + raise NotImplementedError + + @property + def size_on_disk(self) -> int: + """Compute size of the model on disk.""" + raise NotImplementedError + + def prepare_session( + self, + inference_settings: Optional[dict[str, Any]] = None, + device: Device = Device.CPU, + execution_providers: Union[str, list[str]] = None, + rank: Optional[int] = None, + ): + raise RuntimeError("ModelPackageModelHandler doesn't have a session of its own") + + def run_session( + self, + session: Any = None, + inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None, + **kwargs: dict[str, Any], + ) -> Any: + raise RuntimeError("ModelPackageModelHandler doesn't have a session of its own") diff --git a/olive/olive_config.json b/olive/olive_config.json index d7ef87b7f1..19c3bd14c3 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -87,6 +87,15 @@ "supported_quantization_encodings": [ ], "run_on_target": true }, + "ModelPackage": { + "module_path": "olive.passes.onnx.model_package.ModelPackage", + "supported_providers": [ "QNNExecutionProvider", "OpenVINOExecutionProvider" ], + "supported_accelerators": [ "npu", "gpu", "cpu" ], + "supported_precisions": [ "*" ], + "supported_algorithms": [ ], + "supported_quantization_encodings": [ ], + "run_on_target": true + }, "ExtractAdapters": { "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters", "supported_providers": [ "*" ], diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py index 627202a0c7..7f5f495c9a 100644 --- a/olive/passes/olive_pass.py +++ b/olive/passes/olive_pass.py @@ -48,6 +48,12 @@ class Pass(ABC): # True if the pass processes a composite model at once. Otherwise, the components of the # composite model will be processed individually. _accepts_composite_model: bool = False + # True if the pass processes a model package model at once. Otherwise, each variant + # (e.g., different SoC, device, or runtime version) will be processed independently. + _accepts_model_package_model: bool = False + # When True, skip automatic carry-forward of additional_files in run(). + # Passes that manage config/additional files themselves (e.g., ModelPackage) should set this. + _skip_additional_files_carry_forward: bool = False @classmethod def __init_subclass__(cls, **kwargs) -> None: @@ -206,6 +212,7 @@ def validate_config( def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHandler: """Run the pass on the model at a specific point in the search space.""" from olive.model import CompositeModelHandler, DistributedOnnxModelHandler + from olive.model.handler.model_package import ModelPackageModelHandler if not self._initialized: self._initialize() @@ -227,6 +234,20 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan inference_settings=model.inference_settings, model_attributes=model.model_attributes, ) + elif isinstance(model, ModelPackageModelHandler) and not self._accepts_model_package_model: + # Run the pass independently for each deployment variant + targets = [] + target_names = [] + model_dir = Path(output_model_path).with_suffix("") + model_dir.mkdir(parents=True, exist_ok=True) + for target_name, target_model in model.get_target_models(): + target_output_path = model_dir / target_name + output_target = self.run(target_model, str(target_output_path)) + targets.append(output_target) + target_names.append(target_name) + output_model = ModelPackageModelHandler( + targets, target_names, model_path=model_dir, model_attributes=model.model_attributes + ) elif isinstance(model, CompositeModelHandler) and not self._accepts_composite_model: components = [] component_names = [] @@ -246,7 +267,8 @@ def run(self, model: OliveModelHandler, output_model_path: str) -> OliveModelHan # the input model attributes, we should not update/extend anymore outside of the pass run output_model.model_attributes = output_model.model_attributes or model.model_attributes # save and carry forward additional files into the the output model path - Pass._carry_forward_additional_files(model, output_model) + if not self._skip_additional_files_carry_forward: + Pass._carry_forward_additional_files(model, output_model) return output_model @staticmethod @@ -287,7 +309,10 @@ def _carry_forward_additional_files(input_model: OliveModelHandler, output_model output_filepath = output_model_path / input_filepath.name if not output_filepath.exists(): # TODO(team): Use symlinks instead of copying the files. - shutil.copy(str(input_filepath), str(output_filepath)) + if input_filepath.is_dir(): + shutil.copytree(str(input_filepath), str(output_filepath)) + else: + shutil.copy(str(input_filepath), str(output_filepath)) # always add the file_path to the output model's additional files # this covers the case where the output model_path is the same as the input model_path # like for perf-tuning pass diff --git a/olive/passes/onnx/context_binary.py b/olive/passes/onnx/context_binary.py index d802fcc575..c3afa319e4 100644 --- a/olive/passes/onnx/context_binary.py +++ b/olive/passes/onnx/context_binary.py @@ -13,6 +13,7 @@ from olive.hardware.accelerator import AcceleratorSpec, Device from olive.hardware.constants import ExecutionProvider from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler from olive.model.utils import resolve_onnx_path from olive.passes import Pass from olive.passes.onnx.common import ( @@ -26,7 +27,11 @@ class EPContextBinaryGenerator(Pass): - """Generate EP specific context binary for the model.""" + """Generate EP specific context binary for the model. + + When provider_options is a list of dicts, generates context binaries for each set of provider options + (e.g., multiple SoC models) and returns a ModelPackageModelHandler. + """ _accepts_composite_model = True @@ -47,9 +52,13 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon ), ), "provider_options": PassConfigParam( - type_=dict, + type_=Union[dict, list], default_value=None, - description="Provider options for the EP.", + description=( + "Provider options for the EP. Can be a single dict or a list of dicts for model package" + " generation (e.g., multiple SoC models). When a list is provided, context binaries are" + " generated for each set of options and returned as a ModelPackageModelHandler." + ), ), "session_options": PassConfigParam( type_=dict, @@ -73,9 +82,7 @@ def _run_for_config( model: Union[ONNXModelHandler, CompositeModelHandler], config: type[BasePassConfig], output_model_path: str, - ) -> Union[ONNXModelHandler, CompositeModelHandler]: - from onnxruntime import __version__ as OrtVersion - + ) -> Union[ONNXModelHandler, CompositeModelHandler, ModelPackageModelHandler]: # session created using providers argument so will use the ort.get_available_providers() # TODO(jambayk): consider switching to the new EP API for Windows from onnxruntime import get_available_providers @@ -89,6 +96,90 @@ def _run_for_config( f" {get_available_providers()}" ) + # Model package mode: provider_options is a list with multiple entries + if isinstance(config.provider_options, list) and len(config.provider_options) > 1: + return self._run_model_package(model, config, output_model_path) + + # Single-target mode: unwrap single-element list if needed + if isinstance(config.provider_options, list): + single_config = deepcopy(config) + object.__setattr__(single_config, "provider_options", config.provider_options[0]) + return self._run_for_config(model, single_config, output_model_path) + + # Single-target mode: existing behavior + result = self._run_single_target(model, config, output_model_path) + + # Populate model_attributes with context binary metadata so it persists in model_config.json + result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})} + result.model_attributes["ep"] = self.accelerator_spec.execution_provider + result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper() + if config.provider_options: + result.model_attributes["provider_options"] = config.provider_options + result.model_attributes["architecture"] = config.provider_options.get("soc_model") + + return result + + def _run_model_package( + self, + model: Union[ONNXModelHandler, CompositeModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> ModelPackageModelHandler: + """Generate context binaries for multiple hardware targets. + + Each entry in config.provider_options is a separate set of provider options + (e.g., different soc_model values). The result is a ModelPackageModelHandler + wrapping per-target outputs. + """ + provider_options_list = config.provider_options + assert all(isinstance(po, dict) for po in provider_options_list), ( + "Each entry in provider_options list must be a dict" + ) + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + targets = [] + target_names = [] + for idx, provider_options in enumerate(provider_options_list): + target_name = f"soc_{provider_options.get('soc_model', idx)}" + target_output_path = str(output_dir / target_name) + + # Create a shallow copy of config with this specific provider_options + single_config = deepcopy(config) + object.__setattr__(single_config, "provider_options", provider_options) + + result = self._run_single_target(model, single_config, target_output_path) + # Store target-specific metadata + result.model_attributes = {**(model.model_attributes or {}), **(result.model_attributes or {})} + result.model_attributes["ep"] = self.accelerator_spec.execution_provider + result.model_attributes["device"] = str(self.accelerator_spec.accelerator_type).upper() + result.model_attributes["provider_options"] = provider_options + result.model_attributes["architecture"] = provider_options.get("soc_model") + + targets.append(result) + target_names.append(target_name) + + # Preserve base model path so ModelPackage can include the pre-optimized model + parent_attrs = dict(model.model_attributes or {}) + parent_attrs["base_model_path"] = str(model.model_path) + + return ModelPackageModelHandler( + targets, + target_names, + model_path=output_dir, + model_attributes=parent_attrs, + ) + + def _run_single_target( + self, + model: Union[ONNXModelHandler, CompositeModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> Union[ONNXModelHandler, CompositeModelHandler]: + """Generate context binary for a single target. This is the original logic.""" + from onnxruntime import __version__ as OrtVersion + generate_kwargs = { "execution_provider": self.accelerator_spec.execution_provider, "provider_options": config.provider_options, diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index 978744ec1c..e2539fecac 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -214,12 +214,12 @@ def _run_for_config( ) -> ONNXModelHandler: try: from onnxruntime_genai.models.builder import create_model - except ImportError: + except ImportError as e: raise ImportError( "onnxruntime-genai package is required to run ModelBuilder pass. Please install the package" " corresponding to your onnxruntime installation using pip. cpu: onnxruntime-genai, cuda:" " onnxruntime-genai-cuda, directml: onnxruntime-genai-directml" - ) from None + ) from e self.maybe_patch_quant() precision = config.precision diff --git a/olive/passes/onnx/model_package.py b/olive/passes/onnx/model_package.py new file mode 100644 index 0000000000..d58a8b7b11 --- /dev/null +++ b/olive/passes/onnx/model_package.py @@ -0,0 +1,570 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +import logging +import shutil +from collections import OrderedDict +from pathlib import Path +from typing import Optional, Union + +from huggingface_hub import model_info + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler +from olive.passes import Pass +from olive.passes.pass_config import BasePassConfig, PassConfigParam + +logger = logging.getLogger(__name__) + + +class ModelPackage(Pass): + """Generate a model package with manifest.json and per-component metadata.json. + + This pass takes a ModelPackageModelHandler (containing model variants for different + deployment targets) and generates a structured model package: + + - manifest.json at package root with model version, task, and component list + - metadata.json per component with variant descriptors for each deployment target + - configs/ directory for genai_config.json and chat_template files + + For composite models (where each target contains multiple ONNX components), the package + is organized by component first, then by target: + + models/// (files) + models//metadata.json + + Variant constraints include: + - ep (required): execution provider name + - device (optional): target device type (cpu, gpu, npu) + - ep_compatibility_info (always present): EP-specific compatibility string, empty if unavailable + """ + + _accepts_composite_model = True + _accepts_model_package_model = True + _skip_additional_files_carry_forward = True + + @classmethod + def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: + return { + "model_name": PassConfigParam( + type_=str, + default_value=None, + description="Model name for the manifest. If not set, derived from the output directory name.", + ), + "model_version": PassConfigParam( + type_=str, + default_value="1.0", + description="Model version string for the manifest.", + ), + } + + @staticmethod + def is_accelerator_agnostic(accelerator_spec: AcceleratorSpec) -> bool: + return False + + def _run_for_config( + self, + model: ModelPackageModelHandler, + config: type[BasePassConfig], + output_model_path: str, + ) -> ModelPackageModelHandler: + assert isinstance(model, ModelPackageModelHandler), "ModelPackage requires a ModelPackageModelHandler as input." + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + model_name = config.model_name or output_dir.name + + # Check if target models are composite (have multiple ONNX components) + is_composite = model.is_composite + + if is_composite: + return self._run_for_composite(model, config, output_dir, model_name) + + return self._run_for_single_component(model, config, output_dir, model_name) + + def _run_for_single_component( + self, + model: ModelPackageModelHandler, + config: type[BasePassConfig], + output_dir: Path, + model_name: str, + ) -> ModelPackageModelHandler: + """Package a non-composite model (single ONNX per target).""" + # Copy config files (genai_config.json, chat_template) to configs/ + config_file_names = self._copy_config_files(model, output_dir) + + # Extract task and derive component name + task = self._extract_task(model) + component_name = self._task_to_component_name(task) + + # Build model_variants dict and copy files into models// + component_dir = output_dir / "models" / component_name + component_dir.mkdir(parents=True, exist_ok=True) + + model_variants = {} + for target_name, target_model in model.get_target_models(): + target_attrs = target_model.model_attributes or {} + self._copy_target_model(target_name, target_model, component_dir) + file_path = self._get_relative_model_path(target_name, target_model) + constraints = self._build_constraints(target_attrs, target_model) + model_variants[target_name] = {"file": file_path, "constraints": constraints} + + # Copy base model (pre-context-binary) into base/ subdirectory + base_model_path = (model.model_attributes or {}).get("base_model_path") + if base_model_path: + self._copy_base_model(Path(base_model_path), component_dir, config_file_names) + base_file = self._get_base_model_file(component_dir / "base") + if base_file: + model_variants["base"] = {"file": base_file, "constraints": {}} + + # Remove config files from variant directories (they belong in configs/) + self._remove_config_files(component_dir, config_file_names) + + # Write metadata.json in the component directory + metadata = {"name": component_name, "model_variants": model_variants} + metadata_path = component_dir / "metadata.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f, indent=2) + logger.info("Generated metadata at %s", metadata_path) + + # Write manifest.json at package root + manifest = { + "name": model_name, + "model_version": config.model_version, + "task": task, + "component_models": [component_name], + } + manifest_path = output_dir / "manifest.json" + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + logger.info("Generated manifest at %s", manifest_path) + + return self._build_result(model, output_dir, manifest_path) + + def _run_for_composite( + self, + model: ModelPackageModelHandler, + config: type[BasePassConfig], + output_dir: Path, + model_name: str, + ) -> ModelPackageModelHandler: + """Package a composite model with per-component directory layout.""" + # Copy config files (genai_config.json, chat_template) to configs/ + config_file_names = self._copy_config_files(model, output_dir) + + # Collect component info across all targets. + # component_data[comp_name][target_name] = (comp_handler, constraints) + component_data: dict[str, dict] = OrderedDict() + + for target_name, target_model in model.get_target_models(): + assert isinstance(target_model, CompositeModelHandler), ( + "Expected CompositeModelHandler for composite packaging" + ) + target_attrs = target_model.model_attributes or {} + + for comp_name, comp_handler in target_model.get_model_components(): + if comp_name not in component_data: + component_data[comp_name] = OrderedDict() + + constraints = self._build_constraints(target_attrs, comp_handler) + component_data[comp_name][target_name] = (comp_handler, constraints) + + models_dir = output_dir / "models" + component_names = list(component_data.keys()) + + # Get base model path for copying pre-optimized files + base_model_path = (model.model_attributes or {}).get("base_model_path") + + for comp_name in component_names: + comp_dir = models_dir / comp_name + comp_dir.mkdir(parents=True, exist_ok=True) + + model_variants = {} + for target_name, (comp_handler, constraints) in component_data[comp_name].items(): + target_dir = comp_dir / target_name + self._copy_component_files(comp_handler, target_dir) + + file_path = Path(comp_handler.model_path).name + model_variants[target_name] = {"file": file_path, "constraints": constraints} + + # Copy base model for this component + if base_model_path: + self._copy_base_component(Path(base_model_path), comp_name, comp_dir, config_file_names) + base_file = self._get_base_model_file(comp_dir / "base") + if base_file: + model_variants["base"] = {"file": base_file, "constraints": {}} + + # Remove config files from component variant directories + self._remove_config_files(comp_dir, config_file_names) + + # Write per-component metadata.json + metadata = {"name": comp_name, "model_variants": model_variants} + with open(comp_dir / "metadata.json", "w") as f: + json.dump(metadata, f, indent=2) + logger.info("Generated metadata for component %s", comp_name) + + # Extract task + task = self._extract_task(model) + + # Write manifest.json at package root + manifest = { + "name": model_name, + "model_version": config.model_version, + "task": task, + "component_models": component_names, + } + manifest_path = output_dir / "manifest.json" + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + logger.info("Generated manifest at %s", manifest_path) + + return self._build_result(model, output_dir, manifest_path) + + def _build_constraints( + self, + target_attrs: dict, + target_model: Union[ONNXModelHandler, CompositeModelHandler], + ) -> dict: + """Build the constraints dict for a variant.""" + constraints = {"ep": self.accelerator_spec.execution_provider} + device = target_attrs.get("device") + if device: + constraints["device"] = device + ep_compat = self._extract_ep_compatibility_from_onnx(target_model, self.accelerator_spec.execution_provider) + constraints["ep_compatibility_info"] = ep_compat or "" + return constraints + + @staticmethod + def _build_result( + model: ModelPackageModelHandler, + output_dir: Path, + manifest_path: Path, + ) -> ModelPackageModelHandler: + """Build the result ModelPackageModelHandler with updated attributes.""" + new_model_attributes = dict(model.model_attributes or {}) + new_model_attributes["manifest_path"] = str(manifest_path) + new_model_attributes.pop("additional_files", None) + new_model_attributes.pop("base_model_path", None) + + return ModelPackageModelHandler( + [target_model for _, target_model in model.get_target_models()], + [target_name for target_name, _ in model.get_target_models()], + model_path=output_dir, + model_attributes=new_model_attributes, + ) + + @staticmethod + def _copy_target_model( + target_name: str, + target_model: Union[ONNXModelHandler, CompositeModelHandler], + output_dir: Path, + ) -> None: + dest_dir = output_dir / target_name + if dest_dir.exists(): + return + + if isinstance(target_model, CompositeModelHandler): + src_dir = Path(target_model.model_path) + else: + src_dir = Path(target_model.model_path).parent + + if src_dir.is_dir(): + shutil.copytree(str(src_dir), str(dest_dir)) + else: + dest_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(target_model.model_path), str(dest_dir)) + + @staticmethod + def _get_relative_model_path( + target_name: str, + target_model: Union[ONNXModelHandler, CompositeModelHandler], + ) -> str: + if isinstance(target_model, ONNXModelHandler): + return Path(target_model.model_path).name + return "" + + @staticmethod + def _copy_component_files(component: ONNXModelHandler, dest_dir: Path) -> None: + """Copy files for a single ONNX component to dest_dir. + + Copies the .onnx file and its associated context binary (.bin) files + by reading EPContext nodes in the ONNX model. + """ + if dest_dir.exists(): + return + + dest_dir.mkdir(parents=True, exist_ok=True) + model_path = Path(component.model_path) + src_dir = model_path.parent + + # Copy the ONNX file itself + shutil.copy2(str(model_path), str(dest_dir / model_path.name)) + + # Find associated context binary files from EPContext nodes + associated_files = set() + try: + from olive.passes.onnx.common import get_context_bin_file_names + + associated_files.update(get_context_bin_file_names(str(model_path))) + except Exception: + logger.debug("Could not read context binary file names from %s", model_path, exc_info=True) + + # Also check for ONNX external data files + try: + import onnx + + onnx_model = onnx.load(str(model_path), load_external_data=False) + for init in onnx_model.graph.initializer: + if init.data_location == onnx.TensorProto.EXTERNAL: + for entry in init.external_data: + if entry.key == "location": + associated_files.add(entry.value) + except Exception: + logger.debug("Could not read ONNX external data from %s", model_path, exc_info=True) + + # Copy all associated files + for file_name in associated_files: + src = src_dir / file_name + if src.is_file(): + shutil.copy2(str(src), str(dest_dir / file_name)) + + @classmethod + def _copy_config_files(cls, model, output_dir): + """Copy non-model files (genai_config, tokenizer, chat_template, etc.) to configs/. + + Collects files and directories from target model ``additional_files`` and copies + them to the ``configs/`` directory at the package root. Returns the set of copied + entry names so they can be removed from the variant directories later. + """ + config_entries = cls._collect_config_files(model) + if not config_entries: + return set() + + configs_dir = output_dir / "configs" + configs_dir.mkdir(parents=True, exist_ok=True) + + for name, src_path in config_entries.items(): + dest = configs_dir / name + if src_path.is_dir(): + if not dest.exists(): + shutil.copytree(str(src_path), str(dest)) + else: + shutil.copy2(str(src_path), str(dest)) + logger.info("Copied %s to %s", name, configs_dir) + + return set(config_entries.keys()) + + @classmethod + def _collect_config_files(cls, model): + """Find config files from target model additional_files or model directories.""" + config_files: dict[str, Path] = {} + + # Collect from each target model's additional_files + for _, target_model in model.get_target_models(): + for fp in (target_model.model_attributes or {}).get("additional_files", []): + p = Path(fp) + if (p.is_file() or p.is_dir()) and p.name not in config_files: + config_files[p.name] = p + if config_files: + break + + # Fall back to parent model's additional_files + if not config_files: + for fp in (model.model_attributes or {}).get("additional_files", []): + p = Path(fp) + if (p.is_file() or p.is_dir()) and p.name not in config_files: + config_files[p.name] = p + + return config_files + + @staticmethod + def _get_model_dir(target_model): + """Get the directory containing the target model.""" + if isinstance(target_model, CompositeModelHandler): + return Path(target_model.model_path) + p = Path(target_model.model_path) + return p.parent if p.is_file() else p + + @staticmethod + def _remove_config_files(component_dir, config_file_names): + """Remove config files and directories from variant subdirectories. + + Skips the ``base/`` directory since base model files are copied separately. + """ + for name in config_file_names: + for p in component_dir.rglob(name): + # Don't remove from base/ — base model is handled by _copy_base_model + if "base" in p.relative_to(component_dir).parts: + continue + if p.is_dir(): + shutil.rmtree(str(p)) + logger.debug("Removed duplicate config directory %s from variant directory", p) + else: + p.unlink() + logger.debug("Removed duplicate config file %s from variant directory", p) + + @staticmethod + def _copy_base_model(base_model_path, component_dir, config_file_names): + """Copy the pre-optimized base model to the ``base/`` subdirectory. + + Only model files are copied — config files that belong in ``configs/`` are + skipped. Recognised model suffixes: ``.onnx``, ``.data``, ``.xml``, ``.bin``. + """ + base_dir = component_dir / "base" + if base_dir.exists(): + return + + base_model_path = Path(base_model_path) + if not base_model_path.is_dir(): + logger.warning("Base model path %s not found, skipping base model copy", base_model_path) + return + + base_dir.mkdir(parents=True, exist_ok=True) + model_suffixes = {".onnx", ".data", ".xml", ".bin"} + for f in sorted(base_model_path.iterdir()): + if f.is_file() and f.name not in config_file_names and f.suffix in model_suffixes: + shutil.copy2(str(f), str(base_dir / f.name)) + logger.info("Copied base model file %s to %s", f.name, base_dir) + + @staticmethod + def _copy_base_component(base_model_path, comp_name, comp_dir, config_file_names): + """Copy the base model files for a specific component to the ``base/`` subdirectory. + + Searches the base model directory for a subdirectory matching *comp_name* + and copies model files from it. + """ + base_dir = comp_dir / "base" + if base_dir.exists(): + return + + base_model_path = Path(base_model_path) + if not base_model_path.is_dir(): + logger.warning("Base model path %s not found, skipping base model copy for %s", base_model_path, comp_name) + return + + # For composite models the base path is the parent directory containing component subdirs + comp_src = base_model_path / comp_name + if not comp_src.is_dir(): + logger.debug("No base directory found for component %s at %s", comp_name, comp_src) + return + + base_dir.mkdir(parents=True, exist_ok=True) + model_suffixes = {".onnx", ".data", ".xml", ".bin"} + for f in sorted(comp_src.iterdir()): + if f.is_file() and f.name not in config_file_names and f.suffix in model_suffixes: + shutil.copy2(str(f), str(base_dir / f.name)) + logger.info("Copied base model file %s to %s for component %s", f.name, base_dir, comp_name) + + @staticmethod + def _get_base_model_file(base_dir: Path) -> Optional[str]: + """Find the primary model file in the base/ directory. + + Returns the filename of the first ``.onnx`` or ``.xml`` file found, + or ``None`` if the directory does not exist or contains no model files. + """ + if not base_dir.is_dir(): + return None + for suffix in (".onnx", ".xml"): + for f in sorted(base_dir.iterdir()): + if f.is_file() and f.suffix == suffix: + return f.name + return None + + @staticmethod + def _task_to_component_name(task: str) -> str: + """Map a task string to a component name for single-component models. + + Used when the model is not a composite pipeline but still needs + a component directory name in the package structure. + """ + task_component_map = { + "text_generation": "decoder", + "text2text_generation": "encoder_decoder", + "text_classification": "classifier", + "token_classification": "token_classifier", + "question_answering": "qa_model", + "image_generation": "image_generator", + "image_classification": "image_classifier", + "object_detection": "object_detector", + "automatic_speech_recognition": "speech_recognizer", + } + return task_component_map.get(task, "model") + + @staticmethod + def _extract_task(model: ModelPackageModelHandler) -> str: + """Extract the task for this model using the HuggingFace Hub API. + + Reads ``_name_or_path`` from the model attributes and queries + ``huggingface_hub.model_info`` for the ``pipeline_tag``. + Returns an empty string if the task cannot be determined. + """ + attrs = model.model_attributes or {} + # Try the first target's attributes as fallback + if "_name_or_path" not in attrs: + for _, target_model in model.get_target_models(): + attrs = target_model.model_attributes or {} + if "_name_or_path" in attrs: + break + + model_name_or_path = attrs.get("_name_or_path", "") + if not model_name_or_path: + return "" + + try: + info = model_info(model_name_or_path) + tag = info.pipeline_tag or "" + # HF uses hyphens (e.g., "text-generation"); normalize to underscores + return tag.replace("-", "_") + except Exception: + logger.debug("Could not fetch task from HuggingFace Hub for %s", model_name_or_path, exc_info=True) + return "" + + @staticmethod + def _extract_ep_compatibility_from_onnx( + target_model: Union[ONNXModelHandler, CompositeModelHandler], + ep: str = "", + ) -> Optional[str]: + """Extract ep_compatibility_info from ONNX model custom metadata. + + Looks for metadata keys prefixed with ``ep_compatibility_info.`` in the + ONNX model file. If *ep* is given, the entry matching that EP name is + preferred. When only a single entry exists it is returned regardless of + the EP name. + """ + model_path = None + if isinstance(target_model, ONNXModelHandler): + model_path = Path(target_model.model_path) + elif isinstance(target_model, CompositeModelHandler): + for component in target_model.model_components: + if isinstance(component, ONNXModelHandler): + model_path = Path(component.model_path) + break + + if model_path is None or not model_path.is_file(): + return None + + try: + import onnx + + onnx_model = onnx.load(str(model_path), load_external_data=False) + prefix = "ep_compatibility_info." + ep_compat_map = { + entry.key[len(prefix) :]: entry.value + for entry in onnx_model.metadata_props + if entry.key.startswith(prefix) + } + except Exception: + logger.debug("Could not read ONNX metadata from %s", model_path, exc_info=True) + return None + + if not ep_compat_map: + return None + if ep and ep in ep_compat_map: + return ep_compat_map[ep] + if len(ep_compat_map) == 1: + return next(iter(ep_compat_map.values())) + return None diff --git a/olive/passes/openvino/encapsulation.py b/olive/passes/openvino/encapsulation.py index c8e24a2b37..b315f194ef 100644 --- a/olive/passes/openvino/encapsulation.py +++ b/olive/passes/openvino/encapsulation.py @@ -4,6 +4,7 @@ # -------------------------------------------------------------------------- import logging import os +from copy import deepcopy from pathlib import Path from typing import ClassVar, Union @@ -13,6 +14,7 @@ from olive.common.utils import hardlink_copy_dir, hardlink_copy_file from olive.hardware.accelerator import AcceleratorSpec, Device from olive.model import ONNXModelHandler, OpenVINOModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler from olive.passes import Pass from olive.passes.openvino.ov_utils import create_genai_config from olive.passes.pass_config import BasePassConfig, PassConfigParam @@ -21,7 +23,11 @@ class OpenVINOEncapsulation(Pass): - """Encapsulates OpenVINO models with onnx context nodes.""" + """Encapsulates OpenVINO models with onnx context nodes. + + When ov_version is a list of strings, generates encapsulated models for each version + and returns a ModelPackageModelHandler. + """ openvino_to_onnx_dtype: ClassVar[dict] = { "f32": TensorProto.FLOAT, @@ -62,12 +68,14 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon description=("Device the encapsulated model should run on. Available devices are cpu, gpu, npu."), ), "ov_version": PassConfigParam( - type_=str, + type_=Union[str, list], default_value=None, required=False, description=( - "Name of the OpenVINO version to override in model SDK version." - "Requires a minimum version of OpenVINO 2025.1" + "OpenVINO version to override in model SDK version. Can be a single string or a list" + " of strings for model package generation. When a list is provided, encapsulated models" + " are generated for each version and returned as a ModelPackageModelHandler." + " Requires a minimum version of OpenVINO 2025.1" ), ), "opset_imports": PassConfigParam( @@ -114,7 +122,74 @@ def _run_for_config( model: Union[OpenVINOModelHandler], config: type[BasePassConfig], output_model_path: str, + ) -> Union[ONNXModelHandler, ModelPackageModelHandler]: + # Model package mode: ov_version is a list with multiple entries + if isinstance(config.ov_version, list) and len(config.ov_version) > 1: + return self._run_model_package(model, config, output_model_path) + + # Single-target mode: unwrap single-element list if needed + if isinstance(config.ov_version, list): + single_config = deepcopy(config) + object.__setattr__(single_config, "ov_version", config.ov_version[0]) + return self._run_single_target(model, single_config, output_model_path) + + return self._run_single_target(model, config, output_model_path) + + def _run_model_package( + self, + model: Union[OpenVINOModelHandler], + config: type[BasePassConfig], + output_model_path: str, + ) -> ModelPackageModelHandler: + """Generate encapsulated models for multiple OpenVINO versions. + + Each entry in config.ov_version is a separate version string. + The result is a ModelPackageModelHandler wrapping per-version outputs. + """ + ov_version_list = config.ov_version + assert all(isinstance(v, str) for v in ov_version_list), "Each entry in ov_version list must be a string" + + output_dir = Path(output_model_path).with_suffix("") + output_dir.mkdir(parents=True, exist_ok=True) + + targets = [] + target_names = [] + for ov_ver in ov_version_list: + target_name = f"ov_{ov_ver.replace('.', '_')}" + target_output_path = str(output_dir / target_name) + + single_config = deepcopy(config) + object.__setattr__(single_config, "ov_version", ov_ver) + + result = self._run_single_target(model, single_config, target_output_path) + + targets.append(result) + target_names.append(target_name) + + # Preserve additional_files from the first target so ModelPackage can extract configs + additional_files = [] + if targets: + additional_files = (targets[0].model_attributes or {}).get("additional_files", []) + + parent_attrs = dict(model.model_attributes or {}) + parent_attrs["base_model_path"] = str(model.model_path) + if additional_files: + parent_attrs["additional_files"] = additional_files + + return ModelPackageModelHandler( + targets, + target_names, + model_path=output_dir, + model_attributes=parent_attrs, + ) + + def _run_single_target( + self, + model: Union[OpenVINOModelHandler], + config: type[BasePassConfig], + output_model_path: str, ) -> ONNXModelHandler: + """Encapsulate a single OpenVINO model. This is the original logic.""" try: import openvino as ov except ImportError: @@ -245,7 +320,25 @@ def _run_for_config( # generate the genai_config.json file for GenAI models create_genai_config(context_model_output, output_model_path, config) - return ONNXModelHandler(model_path=output_model_path) + # Collect config files (non-model files) for downstream ModelPackage + output_path = Path(output_model_path) + model_suffixes = {".onnx", ".xml", ".bin"} + additional_files = [ + str(f) + for f in sorted(output_path.iterdir()) + if (f.is_file() and f.suffix not in model_suffixes) or f.is_dir() + ] + + # Populate model_attributes with context binary metadata so it persists in model_config.json + context_binary_attrs = { + **(model.model_attributes or {}), + "ep": "OpenVINOExecutionProvider", + "device": str(config.target_device).upper(), + "sdk_version": ov_version, + "additional_files": additional_files, + } + + return ONNXModelHandler(model_path=output_model_path, model_attributes=context_binary_attrs) def extract_shape_list(shape, config, prefix: str = "input_0_") -> list: diff --git a/olive/passes/openvino/optimum_intel.py b/olive/passes/openvino/optimum_intel.py index 2105f512f2..d898e665eb 100644 --- a/olive/passes/openvino/optimum_intel.py +++ b/olive/passes/openvino/optimum_intel.py @@ -3,6 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +import os from copy import deepcopy from pathlib import Path from typing import Any, Optional, Union @@ -497,6 +498,17 @@ def _run_for_config( extra_args.pop("disable_convert_tokenizer", False) extra_args["library_name"] = lib_name extra_args.pop("library", None) + + # Workaround for optimum-intel using Path.rename() which fails across filesystems. + # Set tempdir to output path so temp files are on the same filesystem as the cache. + import tempfile + + Path(output_model_path).mkdir(parents=True, exist_ok=True) + original_tmpdir = os.environ.get("TMPDIR") + original_tempdir = tempfile.tempdir + os.environ["TMPDIR"] = output_model_path + tempfile.tempdir = output_model_path + export_optimum_intel( model.model_name_or_path, output_model_path, @@ -516,7 +528,13 @@ def _run_for_config( model_kwargs=model.load_kwargs.__dict__ if model.load_kwargs else None, ) except Exception as e: - raise RuntimeError(f"OpenVINO optimum export failed: {e}") from None + raise RuntimeError(f"OpenVINO optimum export failed: {e}") from e + finally: + tempfile.tempdir = original_tempdir + if original_tmpdir is None: + os.environ.pop("TMPDIR", None) + else: + os.environ["TMPDIR"] = original_tmpdir # check the exported components exported_models = [name.stem for name in Path(output_model_path).iterdir() if name.suffix == ".xml"] diff --git a/olive/systems/system_config.py b/olive/systems/system_config.py index dab5da3503..5addeadc61 100644 --- a/olive/systems/system_config.py +++ b/olive/systems/system_config.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Optional, Union -from pydantic import ConfigDict, Field, field_validator +from pydantic import ConfigDict, Field, SerializeAsAny, field_validator from olive.common.config_utils import ConfigBase, NestedConfig, validate_config from olive.systems.common import AcceleratorConfig, SystemType @@ -88,7 +88,7 @@ def import_system_from_type(system_type: SystemType): class SystemConfig(NestedConfig): type: SystemType - config: Optional[TargetUserConfig] = Field(default=None, validate_default=True) + config: Optional[SerializeAsAny[TargetUserConfig]] = Field(default=None, validate_default=True) @field_validator("config", mode="before") @classmethod diff --git a/test/cli/test_model_package.py b/test/cli/test_model_package.py new file mode 100644 index 0000000000..62360d3c9f --- /dev/null +++ b/test/cli/test_model_package.py @@ -0,0 +1,168 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +# pylint: disable=protected-access +import json +from argparse import ArgumentParser +from unittest.mock import patch + +import pytest + +from olive.cli.model_package import ModelPackageCommand + + +def _create_source_dir(tmp_path, name, model_attributes): + """Create a fake Olive output directory with model_config.json and a dummy .onnx file.""" + source_dir = tmp_path / name + source_dir.mkdir(parents=True) + model_config = { + "type": "ONNXModel", + "config": {"model_path": str(source_dir / "model.onnx"), "model_attributes": model_attributes}, + } + (source_dir / "model_config.json").write_text(json.dumps(model_config)) + (source_dir / "model.onnx").write_text("dummy") + return source_dir + + +def _make_command(args_list): + """Create a ModelPackageCommand instance from CLI args.""" + parser = ArgumentParser() + commands_parser = parser.add_subparsers() + ModelPackageCommand.register_subcommand(commands_parser) + parsed_args, unknown = parser.parse_known_args(args_list) + return parsed_args.func(parser, parsed_args, unknown) + + +class TestSourceValidation: + """Tests for _parse_sources validation logic.""" + + def test_rejects_single_source(self, tmp_path): + # setup + src = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"}) + cmd = _make_command(["generate-model-package", "-s", str(src), "-o", str(tmp_path / "out")]) + + # execute + assert + with pytest.raises(ValueError, match="At least two"): + cmd._parse_sources() + + def test_rejects_missing_model_config(self, tmp_path): + # setup + no_config = tmp_path / "no_config" + no_config.mkdir() + valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"}) + cmd = _make_command( + ["generate-model-package", "-s", str(no_config), "-s", str(valid), "-o", str(tmp_path / "out")] + ) + + # execute + assert + with pytest.raises(ValueError, match="model_config.json"): + cmd._parse_sources() + + def test_rejects_nonexistent_path(self, tmp_path): + # setup + valid = _create_source_dir(tmp_path, "valid", {"ep": "QNNExecutionProvider"}) + cmd = _make_command( + ["generate-model-package", "-s", "/nonexistent/path", "-s", str(valid), "-o", str(tmp_path / "out")] + ) + + # execute + assert + with pytest.raises(ValueError, match="does not exist"): + cmd._parse_sources() + + def test_parses_two_valid_sources(self, tmp_path): + # setup + src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"}) + src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider"}) + cmd = _make_command(["generate-model-package", "-s", str(src1), "-s", str(src2), "-o", str(tmp_path / "out")]) + + # execute + sources = cmd._parse_sources() + + # assert + assert len(sources) == 2 + assert sources[0] == ("soc_60", src1) + assert sources[1] == ("soc_73", src2) + + +class TestRunConfig: + """Tests for _get_run_config workflow config construction.""" + + def test_builds_model_package_workflow(self, tmp_path): + """Config has ModelPackageModel input, ModelPackage pass, and correct accelerator.""" + # setup + src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider", "device": "NPU"}) + src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider", "device": "NPU"}) + cmd = _make_command(["generate-model-package", "-s", str(src1), "-s", str(src2), "-o", str(tmp_path / "out")]) + + # execute + config = cmd._get_run_config(str(tmp_path / "tmp")) + + # assert: input model + assert config["input_model"]["type"] == "ModelPackageModel" + assert len(config["input_model"]["target_models"]) == 2 + assert config["input_model"]["target_names"] == ["soc_60", "soc_73"] + + # assert: pass config + assert config["passes"]["pkg"]["type"] == "ModelPackage" + assert config["passes"]["pkg"]["model_version"] == "1.0" + + # assert: accelerator from source model_attributes + accel = config["systems"]["local_system"]["accelerators"][0] + assert accel["device"] == "npu" + assert accel["execution_providers"] == ["QNNExecutionProvider"] + + # assert: output dir + assert config["output_dir"] == str(tmp_path / "out") + + def test_custom_model_name_and_version(self, tmp_path): + """CLI args --model_name and --model_version are forwarded to pass config.""" + # setup + src1 = _create_source_dir(tmp_path, "t1", {"ep": "QNNExecutionProvider"}) + src2 = _create_source_dir(tmp_path, "t2", {"ep": "QNNExecutionProvider"}) + cmd = _make_command( + [ + "generate-model-package", + "-s", + str(src1), + "-s", + str(src2), + "--model_name", + "my_model", + "--model_version", + "2.0", + "-o", + str(tmp_path / "out"), + ] + ) + + # execute + config = cmd._get_run_config(str(tmp_path / "tmp")) + + # assert + assert config["passes"]["pkg"]["model_name"] == "my_model" + assert config["passes"]["pkg"]["model_version"] == "2.0" + + def test_defaults_accelerator_when_no_attributes(self): + """Falls back to CPUExecutionProvider/cpu when model_attributes is empty.""" + # setup + execute + ep, device = ModelPackageCommand._extract_accelerator_info([{"type": "ONNXModel", "config": {}}]) + + # assert + assert ep == "CPUExecutionProvider" + assert device == "cpu" + + +class TestRunDelegation: + """Test that run() delegates to _run_workflow().""" + + def test_run_calls_workflow(self, tmp_path): + # setup + src1 = _create_source_dir(tmp_path, "soc_60", {"ep": "QNNExecutionProvider"}) + src2 = _create_source_dir(tmp_path, "soc_73", {"ep": "QNNExecutionProvider"}) + cmd = _make_command(["generate-model-package", "-s", str(src1), "-s", str(src2), "-o", str(tmp_path / "out")]) + + # execute + assert + with patch.object(cmd, "_run_workflow", return_value=None) as mock_workflow: + cmd.run() + mock_workflow.assert_called_once() diff --git a/test/model/test_model_package.py b/test/model/test_model_package.py new file mode 100644 index 0000000000..a3eb0ed8fb --- /dev/null +++ b/test/model/test_model_package.py @@ -0,0 +1,130 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import pytest + +from olive.model import ONNXModelHandler +from olive.model.handler.composite import CompositeModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler + + +def _make_onnx_handler(tmp_path, name="model", model_attributes=None): + model_dir = tmp_path / name + model_dir.mkdir(parents=True, exist_ok=True) + model_file = model_dir / f"{name}.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes) + + +class TestModelPackageModelHandler: + def test_creation_and_target_iteration(self, tmp_path): + """Handler stores targets and iterates (name, model) pairs correctly.""" + # setup + h1 = _make_onnx_handler(tmp_path, "t1") + h2 = _make_onnx_handler(tmp_path, "t2") + + # execute + mt = ModelPackageModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path) + + # assert + assert mt.target_names == ["t1", "t2"] + pairs = list(mt.get_target_models()) + assert len(pairs) == 2 + assert pairs[0][0] == "t1" + assert pairs[1][0] == "t2" + + def test_parent_attributes_merged_into_targets(self, tmp_path): + """Parent-level model_attributes are merged into each target while preserving target-specific ones.""" + # setup + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + mt = ModelPackageModelHandler( + [h1, h2], + ["t1", "t2"], + model_path=tmp_path, + model_attributes={"ep": "QNNExecutionProvider", "device": "NPU"}, + ) + + # execute + pairs = list(mt.get_target_models()) + + # assert: parent attrs merged + assert pairs[0][1].model_attributes["ep"] == "QNNExecutionProvider" + assert pairs[1][1].model_attributes["device"] == "NPU" + # assert: target-specific attrs preserved + assert pairs[0][1].model_attributes["architecture"] == "60" + assert pairs[1][1].model_attributes["architecture"] == "73" + + def test_to_json_round_trip(self, tmp_path): + """to_json produces correct structure with parent/target attribute separation.""" + # setup + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + mt = ModelPackageModelHandler( + [h1, h2], + ["t1", "t2"], + model_path=tmp_path, + model_attributes={"ep": "QNNExecutionProvider"}, + ) + + # execute + json_dict = mt.to_json() + + # assert + assert json_dict["type"].lower() == "modelpackagemodel" + assert json_dict["config"]["target_names"] == ["t1", "t2"] + assert len(json_dict["config"]["target_models"]) == 2 + assert json_dict["config"]["model_attributes"]["ep"] == "QNNExecutionProvider" + + def test_mismatched_names_raises(self, tmp_path): + """Mismatch between target count and name count raises AssertionError.""" + # setup + h1 = _make_onnx_handler(tmp_path, "t1") + + # execute + assert + with pytest.raises(AssertionError, match="Number of target models and names must match"): + ModelPackageModelHandler([h1], ["t1", "t2"], model_path=tmp_path) + + def test_is_composite_false_for_onnx_targets(self, tmp_path): + """is_composite returns False when all targets are ONNXModelHandler.""" + # setup + mt = ModelPackageModelHandler( + [_make_onnx_handler(tmp_path, "t1"), _make_onnx_handler(tmp_path, "t2")], + ["t1", "t2"], + model_path=tmp_path, + ) + + # execute + assert + assert mt.is_composite is False + + def test_is_composite_true_for_composite_targets(self, tmp_path): + """is_composite returns True when all targets are CompositeModelHandler.""" + # setup + c1 = CompositeModelHandler([_make_onnx_handler(tmp_path, "e1")], ["enc"], model_path=str(tmp_path / "c1")) + c2 = CompositeModelHandler([_make_onnx_handler(tmp_path, "e2")], ["enc"], model_path=str(tmp_path / "c2")) + mt = ModelPackageModelHandler([c1, c2], ["soc_a", "soc_b"], model_path=tmp_path) + + # execute + assert + assert mt.is_composite is True + + def test_is_composite_mixed_types_raises(self, tmp_path): + """Mixed ONNX and Composite targets raise AssertionError.""" + # setup + plain = _make_onnx_handler(tmp_path, "plain") + composite = CompositeModelHandler( + [_make_onnx_handler(tmp_path, "enc")], ["encoder"], model_path=str(tmp_path / "comp") + ) + mt = ModelPackageModelHandler([plain, composite], ["t1", "t2"], model_path=tmp_path) + + # execute + assert + with pytest.raises(AssertionError, match="All target models must be the same type"): + _ = mt.is_composite + + def test_is_composite_empty_targets(self, tmp_path): + """is_composite returns False for empty target list.""" + # setup + mt = ModelPackageModelHandler([], [], model_path=tmp_path) + + # execute + assert + assert mt.is_composite is False diff --git a/test/passes/onnx/test_context_binary.py b/test/passes/onnx/test_context_binary.py index deee87c550..a4750d4c3f 100644 --- a/test/passes/onnx/test_context_binary.py +++ b/test/passes/onnx/test_context_binary.py @@ -9,6 +9,7 @@ from olive.hardware.accelerator import AcceleratorSpec from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.onnx.common import resave_model from olive.passes.onnx.context_binary import EPContextBinaryGenerator @@ -132,3 +133,102 @@ def test_ep_context_binary_generator_composite(tmp_path, is_llm): assert expected_model_path.exists() if not is_skipped: assert len(list(output_model_path.glob(f"{name}_ctx*.bin"))) == 1 + + +# =========================================================================== +# Model package tests +# =========================================================================== + + +def _mock_get_available_providers(): + return ["QNNExecutionProvider", "CPUExecutionProvider"] + + +def test_model_package_returns_model_package_handler(tmp_path): + """When provider_options is a list, result should be ModelPackageModelHandler.""" + from pathlib import Path + from unittest.mock import patch + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + p = create_pass_from_dict( + EPContextBinaryGenerator, + { + "provider_options": [ + {"soc_model": "60", "htp_performance_mode": "burst"}, + {"soc_model": "73", "htp_performance_mode": "burst"}, + ], + }, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with ( + patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single, + patch("onnxruntime.get_available_providers", _mock_get_available_providers), + ): + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model_ctx.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file)) + + mock_single.side_effect = side_effect + + input_model = get_onnx_model() + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ModelPackageModelHandler) + assert result.target_names == ["soc_60", "soc_73"] + assert mock_single.call_count == 2 + + for _, target in result.get_target_models(): + assert target.model_attributes["ep"] == "QNNExecutionProvider" + assert target.model_attributes["device"] == "NPU" + assert "provider_options" in target.model_attributes + + +def test_single_target_populates_model_attributes(tmp_path): + """Single-target mode should also populate model_attributes.""" + from pathlib import Path + from unittest.mock import patch + + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + p = create_pass_from_dict( + EPContextBinaryGenerator, + { + "provider_options": { + "soc_model": "60", + "htp_performance_mode": "burst", + }, + }, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with ( + patch.object(EPContextBinaryGenerator, "_run_single_target") as mock_single, + patch("onnxruntime.get_available_providers", _mock_get_available_providers), + ): + + def side_effect(model, config, output_model_path): + out_path = Path(output_model_path) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text("dummy") + return ONNXModelHandler(model_path=str(out_path)) + + mock_single.side_effect = side_effect + + input_model = get_onnx_model() + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "QNNExecutionProvider" + assert result.model_attributes["device"] == "NPU" + assert result.model_attributes["architecture"] == "60" + assert result.model_attributes["provider_options"]["soc_model"] == "60" diff --git a/test/passes/onnx/test_model_package.py b/test/passes/onnx/test_model_package.py new file mode 100644 index 0000000000..de04cf3fe6 --- /dev/null +++ b/test/passes/onnx/test_model_package.py @@ -0,0 +1,483 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import CompositeModelHandler, ONNXModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler +from olive.passes.olive_pass import create_pass_from_dict +from olive.passes.onnx.model_package import ModelPackage + + +def _make_onnx_handler(tmp_path, name="model", model_attributes=None): + """Create a dummy ONNXModelHandler with a text file as the .onnx file.""" + model_dir = tmp_path / name + model_dir.mkdir(parents=True, exist_ok=True) + model_file = model_dir / f"{name}.onnx" + model_file.write_text("dummy") + return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes) + + +def _make_real_onnx_handler(tmp_path, name="model", model_attributes=None, onnx_metadata=None): + """Create an ONNXModelHandler backed by a valid ONNX model with optional custom metadata.""" + import onnx + from onnx import TensorProto, helper + + model_dir = tmp_path / name + model_dir.mkdir(parents=True, exist_ok=True) + model_file = model_dir / f"{name}.onnx" + + x = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1]) + y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1]) + node = helper.make_node("Identity", ["X"], ["Y"]) + graph = helper.make_graph([node], "test", [x], [y]) + onnx_model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + + if onnx_metadata: + onnx.helper.set_model_props(onnx_model, onnx_metadata) + + onnx.save(onnx_model, str(model_file)) + return ONNXModelHandler(model_path=str(model_file), model_attributes=model_attributes) + + +def _make_model_package(tmp_path, target_configs, parent_attrs=None): + """Build a ModelPackageModelHandler from (name, attrs) pairs.""" + targets = [] + names = [] + for name, attrs in target_configs: + targets.append(_make_onnx_handler(tmp_path, name=name, model_attributes=attrs)) + names.append(name) + return ModelPackageModelHandler(targets, names, model_path=tmp_path, model_attributes=parent_attrs or {}) + + +def _make_composite_model_package(tmp_path, soc_configs, component_names): + """Build a ModelPackageModelHandler wrapping CompositeModelHandlers. + + soc_configs: list of (soc_name, attrs) pairs. + component_names: list of ONNX component names shared by each SoC target. + """ + composites = [] + soc_names = [] + for soc_name, attrs in soc_configs: + comp_dir = tmp_path / soc_name + comp_dir.mkdir(parents=True, exist_ok=True) + subs = [] + for comp_name in component_names: + (comp_dir / f"{comp_name}.onnx").write_text("dummy") + subs.append(ONNXModelHandler(model_path=str(comp_dir / f"{comp_name}.onnx"))) + composites.append( + CompositeModelHandler( + model_components=subs, + model_component_names=component_names, + model_path=str(comp_dir), + model_attributes=attrs, + ) + ) + soc_names.append(soc_name) + return ModelPackageModelHandler(composites, soc_names, model_path=tmp_path) + + +def _create_pass(ep="QNNExecutionProvider", device="NPU", config=None): + accelerator_spec = AcceleratorSpec(accelerator_type=device, execution_provider=ep) + return create_pass_from_dict(ModelPackage, config or {}, disable_search=True, accelerator_spec=accelerator_spec) + + +def _run_pass(model, tmp_path, ep="QNNExecutionProvider", device="NPU", config=None): + """Run ModelPackage pass and return (result, output_dir).""" + p = _create_pass(ep=ep, device=device, config=config) + result = p.run(model, str(tmp_path / "output.onnx")) + return result, tmp_path / "output" + + +def _read_manifest(output_dir): + with open(output_dir / "manifest.json") as f: + return json.load(f) + + +def _read_metadata(output_dir, component_name="model"): + with open(output_dir / "models" / component_name / "metadata.json") as f: + return json.load(f) + + +class TestSingleComponentPackaging: + """Tests for packaging non-composite models (single ONNX per variant).""" + + def test_manifest_and_metadata_structure(self, tmp_path): + """Manifest has all required fields; metadata has correct variants and constraints.""" + # setup + mt = _make_model_package(tmp_path, [("soc_60", {"device": "NPU"}), ("soc_73", {"device": "NPU"})]) + + # execute + result, output_dir = _run_pass(mt, tmp_path) + + # assert + assert isinstance(result, ModelPackageModelHandler) + + manifest = _read_manifest(output_dir) + assert set(manifest.keys()) == {"name", "model_version", "task", "component_models"} + assert manifest["model_version"] == "1.0" + assert manifest["component_models"] == ["model"] + + metadata = _read_metadata(output_dir) + assert metadata["name"] == "model" + assert set(metadata["model_variants"].keys()) == {"soc_60", "soc_73"} + assert metadata["model_variants"]["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider" + assert metadata["model_variants"]["soc_60"]["constraints"]["device"] == "NPU" + assert metadata["model_variants"]["soc_60"]["constraints"]["ep_compatibility_info"] == "" + + assert (output_dir / "models" / "model" / "soc_60").is_dir() + assert (output_dir / "models" / "model" / "soc_73").is_dir() + + def test_custom_model_name_and_version(self, tmp_path): + """model_name and model_version pass configs override defaults.""" + # setup + mt = _make_model_package(tmp_path, [("t1", {}), ("t2", {})]) + + # execute + _, output_dir = _run_pass(mt, tmp_path, config={"model_name": "my_model", "model_version": "2.5"}) + + # assert + manifest = _read_manifest(output_dir) + assert manifest["name"] == "my_model" + assert manifest["model_version"] == "2.5" + assert _read_metadata(output_dir)["name"] == "model" + + def test_default_model_name_from_output_dir(self, tmp_path): + """Model name defaults to the output directory name.""" + # setup + mt = _make_model_package(tmp_path, [("t1", {}), ("t2", {})]) + p = _create_pass() + + # execute + p.run(mt, str(tmp_path / "my_package.onnx")) + + # assert + manifest = _read_manifest(tmp_path / "my_package") + assert manifest["name"] == "my_package" + + def test_file_field_uses_onnx_filename_only(self, tmp_path): + """Metadata file field contains only the ONNX filename, not the variant folder prefix.""" + # setup + mt = _make_model_package(tmp_path, [("soc_60", {})]) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + metadata = _read_metadata(output_dir) + assert metadata["model_variants"]["soc_60"]["file"] == "soc_60.onnx" + + def test_device_omitted_when_absent(self, tmp_path): + """Device constraint is not included when model_attributes has no device.""" + # setup + mt = _make_model_package(tmp_path, [("t1", {"device": "GPU"}), ("t2", {})]) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + variants = _read_metadata(output_dir)["model_variants"] + assert variants["t1"]["constraints"]["device"] == "GPU" + assert "device" not in variants["t2"]["constraints"] + + def test_rejects_non_model_package_input(self, tmp_path): + """Pass rejects input that is not a ModelPackageModelHandler.""" + # setup + handler = _make_onnx_handler(tmp_path, "single") + p = _create_pass() + + # execute + assert + with pytest.raises(AssertionError, match="requires a ModelPackageModelHandler"): + p.run(handler, str(tmp_path / "output.onnx")) + + def test_copy_skips_existing_destination(self, tmp_path): + """Pre-existing variant directory is not overwritten.""" + # setup + mt = _make_model_package(tmp_path, [("t1", {}), ("t2", {})]) + dest = tmp_path / "output" / "models" / "model" / "t1" + dest.mkdir(parents=True) + (dest / "marker.txt").write_text("pre-existing") + + # execute + _run_pass(mt, tmp_path) + + # assert + assert (dest / "marker.txt").read_text() == "pre-existing" + + def test_result_attributes_has_manifest_path(self, tmp_path): + """Result model_attributes includes manifest_path and clears temporary keys.""" + # setup + mt = _make_model_package(tmp_path, [("t1", {}), ("t2", {})]) + + # execute + result, _ = _run_pass(mt, tmp_path) + + # assert + assert Path(result.model_attributes["manifest_path"]).name == "manifest.json" + assert "additional_files" not in result.model_attributes + assert "base_model_path" not in result.model_attributes + + +class TestEpCompatibility: + """Tests for ep_compatibility_info extraction from ONNX metadata.""" + + def test_extracted_from_onnx_metadata(self, tmp_path): + """ep_compatibility_info is read from ONNX model metadata_props.""" + # setup + h1 = _make_real_onnx_handler( + tmp_path, + "soc_60", + model_attributes={}, + onnx_metadata={"ep_compatibility_info.QNNExecutionProvider": "soc=60"}, + ) + h2 = _make_real_onnx_handler( + tmp_path, + "soc_73", + model_attributes={}, + onnx_metadata={"ep_compatibility_info.QNNExecutionProvider": "soc=73"}, + ) + mt = ModelPackageModelHandler([h1, h2], ["soc_60", "soc_73"], model_path=tmp_path) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + variants = _read_metadata(output_dir)["model_variants"] + assert variants["soc_60"]["constraints"]["ep_compatibility_info"] == "soc=60" + assert variants["soc_73"]["constraints"]["ep_compatibility_info"] == "soc=73" + + def test_empty_string_when_no_onnx_metadata(self, tmp_path): + """ep_compatibility_info defaults to empty string when ONNX has no such entry.""" + # setup + h1 = _make_real_onnx_handler(tmp_path, "soc_60", onnx_metadata={}) + h2 = _make_real_onnx_handler(tmp_path, "soc_73", onnx_metadata={}) + mt = ModelPackageModelHandler([h1, h2], ["soc_60", "soc_73"], model_path=tmp_path) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + variants = _read_metadata(output_dir)["model_variants"] + assert variants["soc_60"]["constraints"]["ep_compatibility_info"] == "" + assert variants["soc_73"]["constraints"]["ep_compatibility_info"] == "" + + +class TestConfigFiles: + """Tests for config file (additional_files) copying to configs/ directory.""" + + def test_files_copied_to_configs_dir(self, tmp_path): + """Regular files in additional_files are copied to configs/ and removed from variants.""" + # setup + comp_dir = tmp_path / "comp" + comp_dir.mkdir() + (comp_dir / "model.onnx").write_text("dummy") + (comp_dir / "genai_config.json").write_text('{"model": {}}') + (comp_dir / "tokenizer.json").write_text("{}") + + additional_files = [str(comp_dir / "genai_config.json"), str(comp_dir / "tokenizer.json")] + h = ONNXModelHandler( + model_path=str(comp_dir / "model.onnx"), + model_attributes={"additional_files": additional_files}, + ) + mt = ModelPackageModelHandler([h], ["soc_60"], model_path=tmp_path) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + assert (output_dir / "configs" / "genai_config.json").exists() + assert (output_dir / "configs" / "tokenizer.json").exists() + assert not (output_dir / "models" / "model" / "soc_60" / "genai_config.json").exists() + + def test_directories_copied_to_configs(self, tmp_path): + """Directories in additional_files (e.g., openvino_tokenizer) are copied to configs/.""" + # setup + variant_dir = tmp_path / "ov_target" + variant_dir.mkdir() + (variant_dir / "model.onnx").write_text("dummy") + tok_dir = variant_dir / "openvino_tokenizer" + tok_dir.mkdir() + (tok_dir / "tokenizer.xml").write_text("") + + h = ONNXModelHandler( + model_path=str(variant_dir / "model.onnx"), + model_attributes={"additional_files": [str(tok_dir)]}, + ) + mt = ModelPackageModelHandler([h], ["ov_2025_1"], model_path=tmp_path) + + # execute + _, output_dir = _run_pass(mt, tmp_path, ep="OpenVINOExecutionProvider") + + # assert + assert (output_dir / "configs" / "openvino_tokenizer" / "tokenizer.xml").exists() + assert not (output_dir / "models" / "model" / "ov_2025_1" / "openvino_tokenizer").exists() + + +class TestBaseModel: + """Tests for base (pre-optimized) model copying.""" + + def test_base_model_copied_and_in_metadata(self, tmp_path): + """Base model files are copied to base/ and listed in metadata model_variants.""" + # setup + base_dir = tmp_path / "base_models" + base_dir.mkdir() + (base_dir / "embeddings.onnx").write_text("embed") + (base_dir / "context_0.onnx").write_text("ctx0") + (base_dir / "weights.onnx.data").write_bytes(b"\x00" * 64) + (base_dir / "genai_config.json").write_text('{"model": {}}') + + mt = _make_model_package( + tmp_path, + [("soc_60", {"additional_files": [str(base_dir / "genai_config.json")]})], + parent_attrs={"base_model_path": str(base_dir)}, + ) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert: model files copied, config files excluded + base_out = output_dir / "models" / "model" / "base" + assert (base_out / "embeddings.onnx").exists() + assert (base_out / "context_0.onnx").exists() + assert (base_out / "weights.onnx.data").exists() + assert not (base_out / "genai_config.json").exists() + + # assert: base variant in metadata with empty constraints + variants = _read_metadata(output_dir)["model_variants"] + assert variants["base"]["file"] == "context_0.onnx" + assert variants["base"]["constraints"] == {} + + def test_no_base_dir_when_path_missing(self, tmp_path): + """No base/ directory is created when base_model_path is not set.""" + # setup + mt = _make_model_package(tmp_path, [("soc_60", {})]) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + assert not (output_dir / "models" / "model" / "base").exists() + + +class TestCompositePackaging: + """Tests for packaging composite models (multiple ONNX components per variant).""" + + def test_composite_manifest_and_per_component_metadata(self, tmp_path): + """Composite model produces per-component dirs, metadata, and manifest with component_models.""" + # setup + mt = _make_composite_model_package( + tmp_path, + soc_configs=[("soc_60", {"device": "NPU"}), ("soc_73", {"device": "NPU"})], + component_names=["context_ctx", "embedding"], + ) + + # execute + result, output_dir = _run_pass(mt, tmp_path) + + # assert: manifest + assert isinstance(result, ModelPackageModelHandler) + manifest = _read_manifest(output_dir) + assert set(manifest["component_models"]) == {"context_ctx", "embedding"} + assert manifest["model_version"] == "1.0" + + # assert: per-component metadata + ctx_meta = _read_metadata(output_dir, "context_ctx") + assert ctx_meta["name"] == "context_ctx" + assert set(ctx_meta["model_variants"].keys()) == {"soc_60", "soc_73"} + assert ctx_meta["model_variants"]["soc_60"]["constraints"]["ep"] == "QNNExecutionProvider" + + embed_meta = _read_metadata(output_dir, "embedding") + assert embed_meta["name"] == "embedding" + + # assert: ONNX files in correct variant dirs + assert (output_dir / "models" / "context_ctx" / "soc_60" / "context_ctx.onnx").exists() + assert (output_dir / "models" / "embedding" / "soc_73" / "embedding.onnx").exists() + + def test_composite_custom_model_version(self, tmp_path): + """model_version config works for composite models.""" + # setup + mt = _make_composite_model_package( + tmp_path, + soc_configs=[("soc_60", {})], + component_names=["part1"], + ) + + # execute + _, output_dir = _run_pass(mt, tmp_path, config={"model_version": "3.0"}) + + # assert + assert _read_manifest(output_dir)["model_version"] == "3.0" + + +class TestTaskExtraction: + """Tests for task extraction via HuggingFace Hub API.""" + + def test_task_from_hf_hub_maps_to_component_name(self, tmp_path): + """HF pipeline_tag is used for task and maps to component directory name.""" + # setup + mt = _make_model_package( + tmp_path, + [("soc_60", {"_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct"})], + ) + + # execute + mock_info = type("MockInfo", (), {"pipeline_tag": "text-generation"})() + with patch("olive.passes.onnx.model_package.model_info", return_value=mock_info): + _, output_dir = _run_pass(mt, tmp_path) + + # assert + manifest = _read_manifest(output_dir) + assert manifest["task"] == "text_generation" + assert manifest["component_models"] == ["decoder"] + assert _read_metadata(output_dir, "decoder")["name"] == "decoder" + + def test_empty_task_without_name_or_path(self, tmp_path): + """Task is empty string when _name_or_path is not in model attributes.""" + # setup + mt = _make_model_package(tmp_path, [("soc_60", {})]) + + # execute + _, output_dir = _run_pass(mt, tmp_path) + + # assert + manifest = _read_manifest(output_dir) + assert manifest["task"] == "" + assert manifest["component_models"] == ["model"] + + +class TestPassAutoDispatch: + """Tests for Pass.run() auto-dispatch on ModelPackageModelHandler.""" + + def test_non_accepting_pass_iterates_targets(self, tmp_path): + """A pass without _accepts_model_package_model runs independently on each variant.""" + # setup + from olive.passes.onnx.float16_conversion import OnnxFloatToFloat16 + + h1 = _make_onnx_handler(tmp_path, "t1", model_attributes={"architecture": "60"}) + h2 = _make_onnx_handler(tmp_path, "t2", model_attributes={"architecture": "73"}) + mt = ModelPackageModelHandler([h1, h2], ["t1", "t2"], model_path=tmp_path) + accelerator_spec = AcceleratorSpec(accelerator_type="NPU", execution_provider="QNNExecutionProvider") + + # execute + with patch.object(OnnxFloatToFloat16, "_run_for_config") as mock_run: + + def side_effect(model, config, output_model_path): + out_file = Path(output_model_path) + out_file.parent.mkdir(parents=True, exist_ok=True) + out_file.write_text("dummy") + return ONNXModelHandler(model_path=str(out_file), model_attributes=model.model_attributes) + + mock_run.side_effect = side_effect + p = create_pass_from_dict(OnnxFloatToFloat16, {}, disable_search=True, accelerator_spec=accelerator_spec) + result = p.run(mt, str(tmp_path / "output.onnx")) + + # assert + assert isinstance(result, ModelPackageModelHandler) + assert result.target_names == ["t1", "t2"] + assert mock_run.call_count == 2 diff --git a/test/passes/openvino/test_openvino_encapsulation.py b/test/passes/openvino/test_openvino_encapsulation.py index bfbc15a260..dcd84c715d 100644 --- a/test/passes/openvino/test_openvino_encapsulation.py +++ b/test/passes/openvino/test_openvino_encapsulation.py @@ -3,9 +3,13 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- from pathlib import Path +from unittest.mock import MagicMock, patch import pytest +from olive.hardware.accelerator import AcceleratorSpec, Device +from olive.model import ONNXModelHandler +from olive.model.handler.model_package import ModelPackageModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.openvino.conversion import OpenVINOConversion from olive.passes.openvino.encapsulation import OpenVINOEncapsulation @@ -101,3 +105,87 @@ def test_openvino_encapsulate_pass_dynamic_keep_ov_dynamic_dims(tmp_path): # assert assert Path(onnx_model.model_path).exists() assert (Path(onnx_model.model_path)).is_file() + + +# =========================================================================== +# Model package tests +# =========================================================================== + + +def test_model_package_returns_model_package_handler(tmp_path): + accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider") + + p = create_pass_from_dict( + OpenVINOEncapsulation, + {"ov_version": ["2025.1", "2025.2"], "target_device": "npu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single: + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model.onnx" + model_file.write_text("dummy") + return ONNXModelHandler( + model_path=str(model_file), + model_attributes={ + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": config.ov_version, + "architecture": "NPU", + }, + ) + + mock_single.side_effect = side_effect + + input_model = MagicMock() + input_model.model_attributes = {} + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ModelPackageModelHandler) + assert result.target_names == ["ov_2025_1", "ov_2025_2"] + assert mock_single.call_count == 2 + + +def test_single_target_populates_model_attributes(tmp_path): + accelerator_spec = AcceleratorSpec(accelerator_type=Device.NPU, execution_provider="OpenVINOExecutionProvider") + + p = create_pass_from_dict( + OpenVINOEncapsulation, + {"ov_version": "2025.1", "target_device": "npu"}, + disable_search=True, + accelerator_spec=accelerator_spec, + ) + + with patch.object(OpenVINOEncapsulation, "_run_single_target") as mock_single: + + def side_effect(model, config, output_model_path): + out_dir = Path(output_model_path) + out_dir.parent.mkdir(parents=True, exist_ok=True) + out_dir.mkdir(parents=True, exist_ok=True) + model_file = out_dir / "model.onnx" + model_file.write_text("dummy") + return ONNXModelHandler( + model_path=str(model_file), + model_attributes={ + "ep": "OpenVINOExecutionProvider", + "device": "NPU", + "sdk_version": "2025.1", + "architecture": "NPU", + }, + ) + + mock_single.side_effect = side_effect + + input_model = MagicMock() + input_model.model_attributes = {} + output_path = str(tmp_path / "output.onnx") + result = p.run(input_model, output_path) + + assert isinstance(result, ONNXModelHandler) + assert result.model_attributes["ep"] == "OpenVINOExecutionProvider" + assert result.model_attributes["sdk_version"] == "2025.1"