Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions weco/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,17 @@ def resume_optimization_run(
run_id: str,
auth_headers: dict = {},
api_keys: Optional[Dict[str, str]] = None,
steps: Optional[int] = None,
timeout: Union[int, Tuple[int, int]] = (5, 10),
) -> Optional[Dict[str, Any]]:
"""Request the backend to resume an interrupted run."""
"""Request the backend to resume an interrupted or completed run.

When ``steps`` is provided, the backend resets the run's budget to
``last_step + steps``. Required for runs already in the ``completed`` state.
"""
with console.status("[bold green]Resuming run..."):
try:
return WecoClient(auth_headers).resume_run(run_id, api_keys=api_keys)
return WecoClient(auth_headers).resume_run(run_id, api_keys=api_keys, steps=steps)
except requests.exceptions.HTTPError as e:
handle_api_error(e, console)
return None
Expand Down
12 changes: 12 additions & 0 deletions weco/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,17 @@ def configure_resume_parser(resume_parser: argparse.ArgumentParser) -> None:
action="store_true",
help="Automatically apply the best solution to the source file without prompting",
)
resume_parser.add_argument(
"-n",
"--steps",
type=int,
default=None,
help=(
"Run this many more evaluations from the last node. "
"Required when resuming a completed run; optional for terminated/error runs "
"(omit to keep the original budget)."
),
)

default_api_keys = " ".join([f"{provider}=xxx" for provider, _ in DEFAULT_MODELS])
supported_providers = ", ".join([provider for provider, _ in DEFAULT_MODELS])
Expand Down Expand Up @@ -575,6 +586,7 @@ def execute_resume_command(args: argparse.Namespace) -> None:
output_mode=args.output,
submit_timeout=getattr(args, "submit_timeout", None),
auto_resume_policy=auto_resume_policy,
additional_steps=args.steps,
)

sys.exit(0 if success else 1)
Expand Down
10 changes: 8 additions & 2 deletions weco/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,21 @@ def get_lineage(self, lineage_id: str) -> dict:
resp.raise_for_status()
return resp.json()

def resume_run(self, run_id: str, *, api_keys: dict[str, str] | None = None) -> dict:
"""``POST /runs/{run_id}/resume`` — resume an interrupted run.
def resume_run(self, run_id: str, *, api_keys: dict[str, str] | None = None, steps: int | None = None) -> dict:
"""``POST /runs/{run_id}/resume`` — resume an interrupted or completed run.

When ``steps`` is provided, the backend resets the run's budget to
``last_step + steps`` and produces the next candidate. Required for
runs already in the ``completed`` state.

Raises:
requests.exceptions.HTTPError: On non-2xx responses.
"""
body: dict[str, Any] = {"metadata": {"client_name": "cli", "client_version": __pkg_version__}}
if api_keys:
body["api_keys"] = api_keys
if steps is not None:
body["steps"] = steps
resp = self._post(f"/runs/{run_id}/resume", json=body, timeout=(5, 10))
resp.raise_for_status()
return resp.json()
Expand Down
38 changes: 32 additions & 6 deletions weco/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,10 @@ def resume_optimization(
output_mode: str = "rich",
submit_timeout: Optional[int] = None,
auto_resume_policy: Optional[AutoResumePolicy] = None,
additional_steps: Optional[int] = None,
) -> bool:
"""
Resume an interrupted run using the queue-based optimization loop.
Resume an interrupted or completed run using the queue-based optimization loop.

Polls for execution tasks, executes locally, and submits results.
Uses the execution queue flow instead of the legacy direct flow.
Expand All @@ -419,6 +420,8 @@ def resume_optimization(
poll_interval: Seconds between polling attempts.
apply_change: If True, automatically apply best solution; if False, prompt user.
output_mode: "rich" for interactive terminal UI, "plain" for machine-readable output.
additional_steps: If set, run this many more evaluations from the last node.
Required when resuming a completed run; optional for terminated/error.

Returns:
True if optimization completed successfully, False otherwise.
Expand All @@ -438,10 +441,14 @@ def resume_optimization(
return False

run_status_val = status.get("status")
if run_status_val not in ("error", "terminated"):
if run_status_val == "completed":
if additional_steps is None:
console.print(f"[yellow]Run {run_id} is already complete. Pass --steps N to resume with N more evaluations.[/]")
return False
elif run_status_val not in ("error", "terminated"):
console.print(
f"[yellow]Run {run_id} cannot be resumed (status: {run_status_val}). "
f"Only 'error' or 'terminated' runs can be resumed.[/]"
f"Only 'error', 'terminated', or 'completed' runs can be resumed.[/]"
)
return False

Expand All @@ -467,7 +474,17 @@ def resume_optimization(
console.print(f" Objective: {metric_name} ({'maximize' if maximize else 'minimize'})")
console.print(f" Model: {model_name}")
console.print(f" Eval Command: {eval_command}")
console.print(f" Total Steps: {total_steps} | Current Step: {current_step} | Steps Remaining: {steps_remaining}")
if additional_steps is not None:
new_total = current_step + additional_steps
plural = "s" if additional_steps != 1 else ""
console.print(
f" Total Steps: {total_steps} -> [bold]{new_total}[/] | Current Step: {current_step} | "
f"Will run: [bold]{additional_steps}[/] more evaluation{plural} (--steps)"
)
if new_total < total_steps:
console.print(f" [yellow]Note: this shrinks the original budget ({total_steps}) to {new_total}.[/]")
else:
console.print(f" Total Steps: {total_steps} | Current Step: {current_step} | Steps Remaining: {steps_remaining}")
console.print(f" Last Updated: {status.get('updated_at', 'N/A')}")

unchanged = Confirm.ask(
Expand All @@ -477,11 +494,20 @@ def resume_optimization(
console.print("[yellow]Resume cancelled. Please start a new run if the environment changed.[/]")
return False

# Call backend to prepare resume (this sets status to 'running')
resume_resp = resume_optimization_run(console=console, run_id=run_id, auth_headers=auth_headers, api_keys=api_keys)
# Call backend to prepare resume (this sets status to 'running' and, when
# additional_steps is provided, resets run.steps to last_step + additional_steps)
resume_resp = resume_optimization_run(
console=console, run_id=run_id, auth_headers=auth_headers, api_keys=api_keys, steps=additional_steps
)
if resume_resp is None:
return False

# Refresh total_steps from the resume response — when the budget was
# extended, the response carries the new total.
response_steps = resume_resp.get("steps")
if response_steps is not None:
total_steps = int(response_steps)

log_dir = resume_resp.get("log_dir", ".runs")
save_logs = bool(resume_resp.get("save_logs", False))
eval_timeout = resume_resp.get("eval_timeout")
Expand Down
Loading