Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/build-swebench-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ jobs:

runs-on:
labels: blacksmith-32vcpu-ubuntu-2204
timeout-minutes: 180

# Allow pushing to GHCR and commenting on issues
permissions:
Expand All @@ -86,6 +87,11 @@ jobs:
issues: write

steps:
- name: Record build start time
run: |
echo "BUILD_START=$(date +%s)" >> "$GITHUB_ENV"
echo "Build started at $(date -u)"

- name: Determine checkout ref
id: checkout-ref
run: |
Expand Down Expand Up @@ -249,6 +255,21 @@ jobs:
BUILDKIT_PROGRESS: plain
BUILDKIT_RESET_ON_FAILURE: 1

- name: Post-build disk and timing report
if: always()
run: |
set -euo pipefail
BUILD_END=$(date +%s)
ELAPSED=$(( BUILD_END - ${BUILD_START:-$BUILD_END} ))
echo "## Build Timing" >> "$GITHUB_STEP_SUMMARY"
echo "**Elapsed:** $((ELAPSED / 60))m $((ELAPSED % 60))s" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"

echo "## Disk Usage After Build" >> "$GITHUB_STEP_SUMMARY"
df -h / /var/lib/buildkit 2>/dev/null | tee -a "$GITHUB_STEP_SUMMARY" || true
echo "" >> "$GITHUB_STEP_SUMMARY"
docker buildx du --verbose 2>/dev/null | head -40 | tee -a "$GITHUB_STEP_SUMMARY" || true

- name: Archive build logs
if: always()
run: |
Expand Down
51 changes: 51 additions & 0 deletions .github/workflows/build-swtbench-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ jobs:

runs-on:
labels: blacksmith-32vcpu-ubuntu-2204
timeout-minutes: 180

permissions:
contents: read
Expand All @@ -88,6 +89,11 @@ jobs:
SELECT_FILE: ''

steps:
- name: Record build start time
run: |
echo "BUILD_START=$(date +%s)" >> "$GITHUB_ENV"
echo "Build started at $(date -u)"

- name: Determine checkout ref
id: checkout-ref
run: |
Expand Down Expand Up @@ -134,6 +140,35 @@ jobs:
run: |
make build

- name: "Preflight: prune cache and verify BuildKit disk"
run: |
set -euo pipefail
KEEP_GB=60
echo "Pruning BuildKit cache (target max-storage ${KEEP_GB} GiB, no filters)..."
if ! docker buildx prune --all --force --max-storage ${KEEP_GB}g; then
docker buildx prune --all --force --keep-storage ${KEEP_GB}g || true
fi

if df -B1 /var/lib/buildkit > /tmp/buildkit_df 2>/dev/null; then
LINE=$(tail -n1 /tmp/buildkit_df)
TOTAL=$(echo "$LINE" | awk '{print $2}')
USED=$(echo "$LINE" | awk '{print $3}')
FREE=$(echo "$LINE" | awk '{print $4}')
if [ -n "$TOTAL" ] && [ -n "$FREE" ]; then
PCT=$(( 100 * USED / TOTAL ))
echo "BuildKit disk: used ${USED} / ${TOTAL} bytes (${PCT}%); free ${FREE} bytes"
MIN=$((75 * 1024 * 1024 * 1024))
if [ "$FREE" -lt "$MIN" ]; then
echo "::error::Not enough free space on /var/lib/buildkit (${FREE} bytes free, need >= ${MIN})"
exit 1
fi
else
echo "Warning: unable to parse df output for /var/lib/buildkit"
fi
else
echo "Warning: /var/lib/buildkit not found; skipping disk check"
Comment on lines +143 to +169
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: This disk check has nested conditionals and multiple fallback strategies. It works, but consider extracting to a script file for clarity.

That said, for one-off infrastructure tooling, this level of inline complexity is acceptable - not worth blocking the PR over.

fi

- name: Build and push SWT-Bench images
run: |
set -euo pipefail
Expand Down Expand Up @@ -181,6 +216,7 @@ jobs:
env:
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
BUILDKIT_RESET_ON_FAILURE: 1

- name: Build prebaked eval env images
if: ${{ inputs.build-eval-env == 'true' }}
Expand Down Expand Up @@ -240,6 +276,21 @@ jobs:
docker ps -a || true
docker system df || true

- name: Post-build disk and timing report
if: always()
run: |
set -euo pipefail
BUILD_END=$(date +%s)
ELAPSED=$(( BUILD_END - ${BUILD_START:-$BUILD_END} ))
echo "## Build Timing" >> "$GITHUB_STEP_SUMMARY"
echo "**Elapsed:** $((ELAPSED / 60))m $((ELAPSED % 60))s" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"

echo "## Disk Usage After Build" >> "$GITHUB_STEP_SUMMARY"
df -h / /var/lib/buildkit 2>/dev/null | tee -a "$GITHUB_STEP_SUMMARY" || true
echo "" >> "$GITHUB_STEP_SUMMARY"
docker buildx du --verbose 2>/dev/null | head -40 | tee -a "$GITHUB_STEP_SUMMARY" || true

- name: Archive build logs
if: always()
run: |
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/swebench/build_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
--image ghcr.io/openhands/eval-agent-server --target source-minimal
"""

# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
import logging
import sys
from pathlib import Path

Expand All @@ -23,10 +26,9 @@
)
from benchmarks.utils.dataset import get_dataset
from benchmarks.utils.image_utils import remote_image_exists
from openhands.sdk import get_logger


logger = get_logger(__name__)
logger = logging.getLogger(__name__)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: You replaced openhands logger with stdlib logging, but where is logging.basicConfig() configured? Stdlib logging is silent by default - it needs handlers and formatters to actually output anything. Without configuration, all your log statements will go nowhere.

Check if there's a central initialization point, or add basic config in the main entry points.

WRAPPER_DOCKERFILE = Path(__file__).with_name("Dockerfile.swebench-deps")


Expand Down
6 changes: 4 additions & 2 deletions benchmarks/swtbench/build_eval_env_images.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
import argparse
import json
import logging
import os
import sys
from pathlib import Path
Expand All @@ -13,10 +16,9 @@
from benchmarks.swtbench.image_utils import ensure_swt_bench_repo
from benchmarks.utils.dataset import get_dataset
from benchmarks.utils.image_utils import remote_image_exists
from openhands.sdk import get_logger


logger = get_logger(__name__)
logger = logging.getLogger(__name__)


def select_instance_ids(
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/swtbench/image_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
import json
import logging
import os
Expand All @@ -9,10 +11,9 @@
from typing import Iterable

from benchmarks.swtbench.config import EVAL_DEFAULTS
from openhands.sdk import get_logger


logger = get_logger(__name__)
logger = logging.getLogger(__name__)


def ensure_swt_bench_repo(cache_dir: Path | None = None) -> Path:
Expand Down
Loading
Loading