Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions benchmarks/utils/build_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import contextlib
import io
import os
import shutil
import subprocess
import sys
import tempfile
import time
import tomllib
from concurrent.futures import ProcessPoolExecutor, as_completed
Expand Down Expand Up @@ -218,6 +220,57 @@ def _get_sdk_submodule_info() -> tuple[str, str, str]:
return git_ref, git_sha, sdk_version


def _sdk_root() -> Path:
benchmarks_root = Path(__file__).resolve().parent.parent.parent
return benchmarks_root / "vendor" / "software-agent-sdk"


def _pre_build_sdist() -> Path:
"""
Build the SDK sdist once and reuse it across all image builds in a run.

The caller must clean up the parent directory of the returned tarball.
"""
sdk_path = _sdk_root()
sdist_dir = Path(tempfile.mkdtemp(prefix="shared-sdist-")).resolve()

logger.info("Pre-building SDK sdist from %s", sdk_path)
start = time.monotonic()
proc = subprocess.run(
["uv", "build", "--sdist", "--out-dir", str(sdist_dir)],
cwd=str(sdk_path),
capture_output=True,
text=True,
)
if proc.returncode != 0:
shutil.rmtree(sdist_dir, ignore_errors=True)
raise RuntimeError(f"Failed to build SDK sdist: {proc.stderr}")

sdists = sorted(sdist_dir.glob("*.tar.gz"))
if len(sdists) != 1:
shutil.rmtree(sdist_dir, ignore_errors=True)
raise RuntimeError(f"Expected 1 SDK sdist, got {len(sdists)}")

logger.info("Pre-built SDK sdist in %.1fs: %s", time.monotonic() - start, sdists[0])
return sdists[0]


@contextlib.contextmanager
def _prepare_cached_sdist():
cached_sdist_path: Path | None = None
try:
try:
cached_sdist_path = _pre_build_sdist()
except Exception as e:
logger.warning(
"Failed to pre-build SDK sdist; each image will build its own: %s", e
)
yield cached_sdist_path
finally:
if cached_sdist_path:
shutil.rmtree(cached_sdist_path.parent, ignore_errors=True)


@contextlib.contextmanager
def capture_output(base_name: str, out_dir: Path):
"""
Expand Down Expand Up @@ -343,6 +396,7 @@ def build_image(
target: TargetType = "source-minimal",
push: bool = False,
force_build: bool = False,
cached_sdist: Path | None = None,
) -> BuildOutput:
# Importing here because openhands.agent_server.docker.build runs git checks
# which fails when installed as a package outside the git repo
Expand All @@ -363,6 +417,7 @@ def build_image(
# Override git info to use SDK submodule info instead of benchmarks repo
git_ref=git_ref,
git_sha=git_sha,
prebuilt_sdist=cached_sdist,
sdk_version=sdk_version,
)
if _force_build_enabled(force_build):
Expand Down Expand Up @@ -464,6 +519,7 @@ def _build_with_logging(
force_build: bool = False,
max_retries: int = 3,
post_build_fn: Callable[[BuildOutput, bool], BuildOutput] | None = None,
cached_sdist: Path | None = None,
) -> BuildOutput:
"""
Module-level function for building a single image with output capture.
Expand Down Expand Up @@ -510,6 +566,7 @@ def _build_with_logging(
target,
push,
force_build=force_build,
cached_sdist=cached_sdist,
)
except Exception as e:
result = BuildOutput(
Expand Down Expand Up @@ -673,6 +730,7 @@ def _chunks(seq: list[str], size: int):
total_batches = len(batches)

with (
_prepare_cached_sdist() as cached_sdist,
manifest_file.open("w") as writer,
tqdm(
total=len(base_images), desc="Building agent-server images", leave=True
Expand Down Expand Up @@ -713,6 +771,7 @@ def _chunks(seq: list[str], size: int):
force_build=force_build,
max_retries=max_retries,
post_build_fn=post_build_fn,
cached_sdist=cached_sdist,
)
futures[fut] = base

Expand Down
107 changes: 107 additions & 0 deletions tests/test_image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
which centralize Docker image detection and build logic across all benchmarks.
"""

import contextlib
import os
import subprocess
from pathlib import Path
Expand Down Expand Up @@ -390,6 +391,112 @@ def test_build_parser_accepts_force_build(self):
assert args.force_build is True


class TestCachedSdistReuse:
def test_build_image_passes_cached_sdist_to_sdk_build_module(
self,
tmp_path: Path,
):
from benchmarks.utils.build_utils import build_image
from openhands.agent_server.docker import build as sdk_build_module

cached_sdist = tmp_path / "openhands-sdk.tar.gz"
cached_sdist.write_text("cached", encoding="utf-8")
captured = {}

def fake_build(opts):
captured["prebuilt_sdist"] = opts.prebuilt_sdist
return MagicMock(
tags=["integration:test"],
telemetry=MagicMock(),
)

with (
patch(
"benchmarks.utils.build_utils.remote_image_exists", return_value=False
),
patch(
"benchmarks.utils.build_utils._get_sdk_submodule_info",
return_value=("main", "abcdef0", "1.0.0"),
),
patch.object(
sdk_build_module, "build_with_telemetry", side_effect=fake_build
),
):
result = build_image(
base_image="base:latest",
target_image="ghcr.io/openhands/eval-agent-server",
custom_tag="mytag",
cached_sdist=cached_sdist,
)

assert result.error is None
assert result.tags == ["integration:test"]
assert captured["prebuilt_sdist"] == cached_sdist

def test_build_all_images_passes_cached_sdist_to_workers(self, tmp_path: Path):
from benchmarks.utils import build_utils

cached_sdist = tmp_path / "openhands-sdk.tar.gz"
cached_sdist.write_text("cached", encoding="utf-8")
submitted_kwargs: list[dict] = []

@contextlib.contextmanager
def fake_prepare_cached_sdist():
yield cached_sdist

class FakeFuture:
def __init__(self, result: BuildOutput):
self._result = result

def result(self) -> BuildOutput:
return self._result

class FakeExecutor:
def __init__(self, *args, **kwargs):
pass

def __enter__(self):
return self

def __exit__(self, exc_type, exc, tb):
return False

def submit(self, fn, **kwargs):
submitted_kwargs.append(kwargs)
return FakeFuture(
BuildOutput(
base_image=kwargs["base_image"],
tags=[f"tag:{kwargs['base_image']}"],
error=None,
)
)

with (
patch.object(
build_utils,
"_prepare_cached_sdist",
side_effect=fake_prepare_cached_sdist,
),
patch.object(build_utils, "ProcessPoolExecutor", FakeExecutor),
patch.object(
build_utils, "as_completed", side_effect=lambda futures: futures
),
patch.object(build_utils, "buildkit_disk_usage", return_value=(0, 0)),
patch.object(build_utils, "maybe_prune_buildkit_cache", return_value=False),
):
exit_code = build_utils.build_all_images(
base_images=["base-1", "base-2"],
target="source-minimal",
build_dir=tmp_path,
)

assert exit_code == 0
assert [kwargs["cached_sdist"] for kwargs in submitted_kwargs] == [
cached_sdist,
cached_sdist,
]


class TestBuildWithLoggingTelemetry:
@patch("benchmarks.utils.build_utils.maybe_reset_buildkit")
@patch("benchmarks.utils.build_utils.time.monotonic", side_effect=[100.0, 109.5])
Expand Down
Loading
Loading