Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/source/reference/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ Run benchmarking using llm-eval.
:prog: olive
:path: benchmark

Generate Model Package
======================

Merge multiple model outputs into a model package with manifest and per-component metadata.

.. argparse::
:module: olive.cli.launcher
:func: get_cli_parser
:prog: olive
:path: generate-model-package

Providing Input Models
======================

Expand Down
6 changes: 6 additions & 0 deletions docs/source/reference/pass.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ EPContextBinaryGenerator
------------------------
.. autoconfigclass:: olive.passes.EPContextBinaryGenerator

.. _model_package:

ModelPackage
------------
.. autoconfigclass:: olive.passes.ModelPackage

.. _compose_onnx_models:

ComposeOnnxModels
Expand Down
56 changes: 51 additions & 5 deletions olive/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,14 +384,60 @@ def save_model(
):
"""Save a model from the cache to a given path."""
output_dir = Path(output_dir) if output_dir else Path.cwd()

# If output_dir has a suffix (like .onnx), it's a file path
# Use parent directory for saving files
actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
if output_dir.suffix and not output_dir.is_dir():
actual_output_dir = output_dir.parent
else:
actual_output_dir = output_dir
actual_output_dir.mkdir(parents=True, exist_ok=True)

model_json = self.load_model(model_id)
if model_json["type"].lower() == "compositemodel":
if model_json["type"].lower() == "modelpackagemodel":
model_json_config = model_json["config"]
source_path = Path(model_json_config["model_path"])
actual_output_dir.mkdir(parents=True, exist_ok=True)

if source_path.exists():
# Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
# Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
for item in source_path.iterdir():
dest = actual_output_dir / item.name
if item.is_dir():
shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
elif item.name == "manifest.json":
shutil.copy2(str(item), str(dest))

# Update paths to point to new location
model_json_config["model_path"] = str(actual_output_dir)

# Update target model paths
for target_model in model_json_config.get("target_models", []):
target_config = target_model.get("config", {})
old_model_path = target_config.get("model_path", "")
if old_model_path and str(source_path) in old_model_path:
target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))

# Clear additional_files since each target subdir has its own copies
model_attributes = model_json_config.get("model_attributes") or {}
model_attributes.pop("additional_files", None)

# Update manifest_path
if model_attributes.get("manifest_path"):
model_attributes["manifest_path"] = str(
actual_output_dir / Path(model_attributes["manifest_path"]).name
)

# Update manifest name: if pass config set model_name explicitly, keep it;
# otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
manifest_file = actual_output_dir / "manifest.json"
if manifest_file.exists():
manifest = json.loads(manifest_file.read_text())
# The pass defaults model_name to the cache dir name (not meaningful).
# Replace it with the final output directory name unless it was explicitly configured.
source_dir_name = source_path.name if source_path else None
if not manifest.get("name") or manifest.get("name") == source_dir_name:
manifest["name"] = actual_output_dir.name
manifest_file.write_text(json.dumps(manifest, indent=2))
elif model_json["type"].lower() == "compositemodel":
model_json_config = model_json["config"]
model_attributes = model_json_config.get("model_attributes") or {}

Expand Down
2 changes: 2 additions & 0 deletions olive/cli/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from olive.cli.generate_adapter import GenerateAdapterCommand
from olive.cli.generate_cost_model import GenerateCostModelCommand
from olive.cli.init import InitCommand
from olive.cli.model_package import ModelPackageCommand
from olive.cli.optimize import OptimizeCommand
from olive.cli.quantize import QuantizeCommand
from olive.cli.run import WorkflowRunCommand
Expand Down Expand Up @@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
SharedCacheCommand.register_subcommand(commands_parser)
ExtractAdaptersCommand.register_subcommand(commands_parser)
ModelPackageCommand.register_subcommand(commands_parser)
BenchmarkCommand.register_subcommand(commands_parser)

return parser
Expand Down
144 changes: 144 additions & 0 deletions olive/cli/model_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Any

from olive.cli.base import (
BaseOliveCLICommand,
add_logging_options,
add_save_config_file_options,
add_telemetry_options,
)
from olive.telemetry import action

logger = logging.getLogger(__name__)


class ModelPackageCommand(BaseOliveCLICommand):
"""Merge multiple model outputs into a model package via the ModelPackage pass."""

@staticmethod
def register_subcommand(parser: ArgumentParser):
sub_parser = parser.add_parser(
"generate-model-package",
help="Merge multiple model outputs into a model package with manifest",
)

sub_parser.add_argument(
"-s",
"--source",
type=str,
action="append",
required=True,
help="Source Olive output directory. Can be specified multiple times.",
)

sub_parser.add_argument(
"-o",
"--output_path",
type=str,
required=True,
help="Output directory for the merged model package.",
)

sub_parser.add_argument(
"--model_name",
type=str,
default=None,
help="Model name for the manifest. If not set, derived from the output directory name.",
)

sub_parser.add_argument(
"--model_version",
type=str,
default="1.0",
help="Model version string for the manifest. Default: 1.0",
)

add_logging_options(sub_parser)
add_save_config_file_options(sub_parser)
add_telemetry_options(sub_parser)
sub_parser.set_defaults(func=ModelPackageCommand)

def _get_run_config(self, tempdir: str) -> dict[str, Any]:
sources = self._parse_sources()

target_models = []
target_names = []
for target_name, source_path in sources:
model_config = self._read_model_config(source_path)
target_models.append(model_config)
target_names.append(target_name)

ep, device = self._extract_accelerator_info(target_models)

return {
"input_model": {
"type": "ModelPackageModel",
"target_models": target_models,
"target_names": target_names,
"model_path": tempdir,
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [{"device": device, "execution_providers": [ep]}],
}
},
"passes": {
"pkg": {
"type": "ModelPackage",
"model_name": self.args.model_name,
"model_version": self.args.model_version,
}
},
"output_dir": self.args.output_path,
"host": "local_system",
"target": "local_system",
"log_severity_level": self.args.log_level,
"no_artifacts": True,
}

@action
def run(self):
return self._run_workflow()

def _parse_sources(self) -> list[tuple[str, Path]]:
sources = []
for source in self.args.source:
path = Path(source)
if not path.is_dir():
raise ValueError(f"Source path does not exist or is not a directory: {path}")

if not (path / "model_config.json").exists():
raise ValueError(
f"No model_config.json found in {path}. "
"Source must be an Olive output directory with model_config.json."
)

sources.append((path.name, path))

if len(sources) < 2:
raise ValueError("At least two --source directories are required to merge.")

return sources

@staticmethod
def _read_model_config(source_path: Path) -> dict:
config_path = source_path / "model_config.json"
with open(config_path) as f:
return json.load(f)

@staticmethod
def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]:
for model_config in target_models:
attrs = model_config.get("config", {}).get("model_attributes") or {}
ep = attrs.get("ep", "CPUExecutionProvider")
device = attrs.get("device", "cpu")
return ep, device.lower()
return "CPUExecutionProvider", "cpu"
2 changes: 1 addition & 1 deletion olive/cli/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
"add_zero_point": "true",
"save_as_external_data": "true",
}
config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
if precision.value == Precision.INT4:
config["use_int4"] = "true"
return config
Expand Down
17 changes: 7 additions & 10 deletions olive/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,14 @@ def run(
self.initialize(log_to_file, log_severity_level)

output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
if output_dir.suffix:
# Treat as file path only if it has a suffix and is not an existing directory
is_file_path = output_dir.suffix and not output_dir.is_dir()
if is_file_path:
output_dir.parent.mkdir(parents=True, exist_ok=True)
artifacts_dir = output_dir.parent
else:
output_dir.mkdir(parents=True, exist_ok=True)

# Determine the directory for artifacts (run_history, etc.)
# If output_dir is a file path (has suffix), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
artifacts_dir = output_dir

logger.info("Running Olive on accelerator: %s", accelerator_spec)
with self._create_system():
Expand Down Expand Up @@ -254,10 +253,8 @@ def run_accelerator(

self.footprint.record(is_input_model=True, model_id=input_model_id)

# Determine the directory for artifacts
# If output_dir is a file path (has suffix like .onnx), use parent directory
# Otherwise use output_dir itself
artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
# Artifacts directory: file path (has suffix, not existing dir) uses parent
artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir

try:
if evaluate_input_model and not self.evaluator_config:
Expand Down
2 changes: 2 additions & 0 deletions olive/model/handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from olive.model.handler.composite import CompositeModelHandler
from olive.model.handler.diffusers import DiffusersModelHandler
from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
from olive.model.handler.model_package import ModelPackageModelHandler
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
from olive.model.handler.openvino import OpenVINOModelHandler
from olive.model.handler.pytorch import PyTorchModelHandler
Expand All @@ -19,6 +20,7 @@
"DistributedHfModelHandler",
"DistributedOnnxModelHandler",
"HfModelHandler",
"ModelPackageModelHandler",
"ONNXModelHandler",
"OliveModelHandler",
"OpenVINOModelHandler",
Expand Down
Loading
Loading