microsoft · xiaoyu-work · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst
@@ -170,6 +170,17 @@ Run benchmarking using llm-eval.
     :prog: olive
     :path: benchmark
 
+Generate Model Package
+======================
+
+Merge multiple model outputs into a model package with manifest and per-component metadata.
+
+.. argparse::
+    :module: olive.cli.launcher
+    :func: get_cli_parser
+    :prog: olive
+    :path: generate-model-package
+
 Providing Input Models
 ======================
 

diff --git a/docs/source/reference/pass.rst b/docs/source/reference/pass.rst
@@ -176,6 +176,12 @@ EPContextBinaryGenerator
 ------------------------
 .. autoconfigclass:: olive.passes.EPContextBinaryGenerator
 
+.. _model_package:
+
+ModelPackage
+------------
+.. autoconfigclass:: olive.passes.ModelPackage
+
 .. _compose_onnx_models:
 
 ComposeOnnxModels

diff --git a/olive/cache.py b/olive/cache.py
@@ -384,14 +384,60 @@ def save_model(
     ):
         """Save a model from the cache to a given path."""
         output_dir = Path(output_dir) if output_dir else Path.cwd()
-
-        # If output_dir has a suffix (like .onnx), it's a file path
-        # Use parent directory for saving files
-        actual_output_dir = output_dir.parent if output_dir.suffix else output_dir
+        if output_dir.suffix and not output_dir.is_dir():
+            actual_output_dir = output_dir.parent
+        else:
+            actual_output_dir = output_dir
         actual_output_dir.mkdir(parents=True, exist_ok=True)
 
         model_json = self.load_model(model_id)
-        if model_json["type"].lower() == "compositemodel":
+        if model_json["type"].lower() == "modelpackagemodel":
+            model_json_config = model_json["config"]
+            source_path = Path(model_json_config["model_path"])
+            actual_output_dir.mkdir(parents=True, exist_ok=True)
+
+            if source_path.exists():
+                # Only copy target subdirectories (soc_60/, soc_73/, etc.) and manifest.json.
+                # Skip top-level additional_files (tokenizer, config) since each target subdir has its own copy.
+                for item in source_path.iterdir():
+                    dest = actual_output_dir / item.name
+                    if item.is_dir():
+                        shutil.copytree(str(item), str(dest), dirs_exist_ok=overwrite)
+                    elif item.name == "manifest.json":
+                        shutil.copy2(str(item), str(dest))
+
+            # Update paths to point to new location
+            model_json_config["model_path"] = str(actual_output_dir)
+
+            # Update target model paths
+            for target_model in model_json_config.get("target_models", []):
+                target_config = target_model.get("config", {})
+                old_model_path = target_config.get("model_path", "")
+                if old_model_path and str(source_path) in old_model_path:
+                    target_config["model_path"] = old_model_path.replace(str(source_path), str(actual_output_dir))
+
+            # Clear additional_files since each target subdir has its own copies
+            model_attributes = model_json_config.get("model_attributes") or {}
+            model_attributes.pop("additional_files", None)
+
+            # Update manifest_path
+            if model_attributes.get("manifest_path"):
+                model_attributes["manifest_path"] = str(
+                    actual_output_dir / Path(model_attributes["manifest_path"]).name
+                )
+
+            # Update manifest name: if pass config set model_name explicitly, keep it;
+            # otherwise update to the output directory name (e.g., "qwen_2.5_1.5b_Instruct")
+            manifest_file = actual_output_dir / "manifest.json"
+            if manifest_file.exists():
+                manifest = json.loads(manifest_file.read_text())
+                # The pass defaults model_name to the cache dir name (not meaningful).
+                # Replace it with the final output directory name unless it was explicitly configured.
+                source_dir_name = source_path.name if source_path else None
+                if not manifest.get("name") or manifest.get("name") == source_dir_name:
+                    manifest["name"] = actual_output_dir.name
+                manifest_file.write_text(json.dumps(manifest, indent=2))
+        elif model_json["type"].lower() == "compositemodel":
             model_json_config = model_json["config"]
             model_attributes = model_json_config.get("model_attributes") or {}
 

diff --git a/olive/cli/launcher.py b/olive/cli/launcher.py
@@ -17,6 +17,7 @@
 from olive.cli.generate_adapter import GenerateAdapterCommand
 from olive.cli.generate_cost_model import GenerateCostModelCommand
 from olive.cli.init import InitCommand
+from olive.cli.model_package import ModelPackageCommand
 from olive.cli.optimize import OptimizeCommand
 from olive.cli.quantize import QuantizeCommand
 from olive.cli.run import WorkflowRunCommand
@@ -54,6 +55,7 @@ def get_cli_parser(called_as_console_script: bool = True) -> ArgumentParser:
     ConfigureQualcommSDKCommand.register_subcommand(commands_parser)
     SharedCacheCommand.register_subcommand(commands_parser)
     ExtractAdaptersCommand.register_subcommand(commands_parser)
+    ModelPackageCommand.register_subcommand(commands_parser)
     BenchmarkCommand.register_subcommand(commands_parser)
 
     return parser

diff --git a/olive/cli/model_package.py b/olive/cli/model_package.py
@@ -0,0 +1,144 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import json
+import logging
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Any
+
+from olive.cli.base import (
+    BaseOliveCLICommand,
+    add_logging_options,
+    add_save_config_file_options,
+    add_telemetry_options,
+)
+from olive.telemetry import action
+
+logger = logging.getLogger(__name__)
+
+
+class ModelPackageCommand(BaseOliveCLICommand):
+    """Merge multiple model outputs into a model package via the ModelPackage pass."""
+
+    @staticmethod
+    def register_subcommand(parser: ArgumentParser):
+        sub_parser = parser.add_parser(
+            "generate-model-package",
+            help="Merge multiple model outputs into a model package with manifest",
+        )
+
+        sub_parser.add_argument(
+            "-s",
+            "--source",
+            type=str,
+            action="append",
+            required=True,
+            help="Source Olive output directory. Can be specified multiple times.",
+        )
+
+        sub_parser.add_argument(
+            "-o",
+            "--output_path",
+            type=str,
+            required=True,
+            help="Output directory for the merged model package.",
+        )
+
+        sub_parser.add_argument(
+            "--model_name",
+            type=str,
+            default=None,
+            help="Model name for the manifest. If not set, derived from the output directory name.",
+        )
+
+        sub_parser.add_argument(
+            "--model_version",
+            type=str,
+            default="1.0",
+            help="Model version string for the manifest. Default: 1.0",
+        )
+
+        add_logging_options(sub_parser)
+        add_save_config_file_options(sub_parser)
+        add_telemetry_options(sub_parser)
+        sub_parser.set_defaults(func=ModelPackageCommand)
+
+    def _get_run_config(self, tempdir: str) -> dict[str, Any]:
+        sources = self._parse_sources()
+
+        target_models = []
+        target_names = []
+        for target_name, source_path in sources:
+            model_config = self._read_model_config(source_path)
+            target_models.append(model_config)
+            target_names.append(target_name)
+
+        ep, device = self._extract_accelerator_info(target_models)
+
+        return {
+            "input_model": {
+                "type": "ModelPackageModel",
+                "target_models": target_models,
+                "target_names": target_names,
+                "model_path": tempdir,
+            },
+            "systems": {
+                "local_system": {
+                    "type": "LocalSystem",
+                    "accelerators": [{"device": device, "execution_providers": [ep]}],
+                }
+            },
+            "passes": {
+                "pkg": {
+                    "type": "ModelPackage",
+                    "model_name": self.args.model_name,
+                    "model_version": self.args.model_version,
+                }
+            },
+            "output_dir": self.args.output_path,
+            "host": "local_system",
+            "target": "local_system",
+            "log_severity_level": self.args.log_level,
+            "no_artifacts": True,
+        }
+
+    @action
+    def run(self):
+        return self._run_workflow()
+
+    def _parse_sources(self) -> list[tuple[str, Path]]:
+        sources = []
+        for source in self.args.source:
+            path = Path(source)
+            if not path.is_dir():
+                raise ValueError(f"Source path does not exist or is not a directory: {path}")
+
+            if not (path / "model_config.json").exists():
+                raise ValueError(
+                    f"No model_config.json found in {path}. "
+                    "Source must be an Olive output directory with model_config.json."
+                )
+
+            sources.append((path.name, path))
+
+        if len(sources) < 2:
+            raise ValueError("At least two --source directories are required to merge.")
+
+        return sources
+
+    @staticmethod
+    def _read_model_config(source_path: Path) -> dict:
+        config_path = source_path / "model_config.json"
+        with open(config_path) as f:
+            return json.load(f)
+
+    @staticmethod
+    def _extract_accelerator_info(target_models: list[dict]) -> tuple[str, str]:
+        for model_config in target_models:
+            attrs = model_config.get("config", {}).get("model_attributes") or {}
+            ep = attrs.get("ep", "CPUExecutionProvider")
+            device = attrs.get("device", "cpu")
+            return ep, device.lower()
+        return "CPUExecutionProvider", "cpu"
diff --git a/olive/cli/optimize.py b/olive/cli/optimize.py
@@ -582,7 +582,7 @@ def _get_matmul_nbits_to_qdq_pass_config(self) -> dict[str, Any]:
             "add_zero_point": "true",
             "save_as_external_data": "true",
         }
-        config["nodes_to_exclude"] = ["/lm_head/MatMul_Q4"]
+        config["nodes_to_exclude"] = ["/lm_head/MatMulNBits"]
         if precision.value == Precision.INT4:
             config["use_int4"] = "true"
         return config

diff --git a/olive/engine/engine.py b/olive/engine/engine.py
@@ -195,15 +195,14 @@ def run(
             self.initialize(log_to_file, log_severity_level)
 
         output_dir: Path = (Path(output_dir) if output_dir else Path.cwd()).resolve()
-        if output_dir.suffix:
+        # Treat as file path only if it has a suffix and is not an existing directory
+        is_file_path = output_dir.suffix and not output_dir.is_dir()
+        if is_file_path:
             output_dir.parent.mkdir(parents=True, exist_ok=True)
+            artifacts_dir = output_dir.parent
         else:
             output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Determine the directory for artifacts (run_history, etc.)
-        # If output_dir is a file path (has suffix), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+            artifacts_dir = output_dir
 
         logger.info("Running Olive on accelerator: %s", accelerator_spec)
         with self._create_system():
@@ -254,10 +253,8 @@ def run_accelerator(
 
         self.footprint.record(is_input_model=True, model_id=input_model_id)
 
-        # Determine the directory for artifacts
-        # If output_dir is a file path (has suffix like .onnx), use parent directory
-        # Otherwise use output_dir itself
-        artifacts_dir = output_dir.parent if output_dir.suffix else output_dir
+        # Artifacts directory: file path (has suffix, not existing dir) uses parent
+        artifacts_dir = output_dir.parent if (output_dir.suffix and not output_dir.is_dir()) else output_dir
 
         try:
             if evaluate_input_model and not self.evaluator_config:

diff --git a/olive/model/handler/__init__.py b/olive/model/handler/__init__.py
@@ -6,6 +6,7 @@
 from olive.model.handler.composite import CompositeModelHandler
 from olive.model.handler.diffusers import DiffusersModelHandler
 from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
+from olive.model.handler.model_package import ModelPackageModelHandler
 from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
 from olive.model.handler.openvino import OpenVINOModelHandler
 from olive.model.handler.pytorch import PyTorchModelHandler
@@ -19,6 +20,7 @@
     "DistributedHfModelHandler",
     "DistributedOnnxModelHandler",
     "HfModelHandler",
+    "ModelPackageModelHandler",
     "ONNXModelHandler",
     "OliveModelHandler",
     "OpenVINOModelHandler",