-
Notifications
You must be signed in to change notification settings - Fork 52
Add task validation CLI #302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9b4f45c
0236c5d
edcd13d
fa8aade
7e57738
0191b4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| """Validate task files or datasets.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| from pathlib import Path | ||
| from typing import Any, cast | ||
|
|
||
| import typer | ||
| from pydantic import ValidationError | ||
|
|
||
| from hud.datasets import load_tasks | ||
| from hud.eval.utils import validate_v4_task | ||
| from hud.types import Task | ||
| from hud.utils.hud_console import hud_console | ||
|
|
||
|
|
||
| def validate_command(source: str) -> None: | ||
| """Validate tasks from a file or HuggingFace dataset.""" | ||
| try: | ||
| raw_tasks, type_errors = _load_raw_tasks(source) | ||
| except Exception as e: | ||
| hud_console.error(f"Failed to load tasks: {e}") | ||
| raise typer.Exit(1) from e | ||
|
|
||
| errors: list[str] = [] | ||
| errors.extend(type_errors) | ||
| for idx, task in enumerate(raw_tasks): | ||
| label = task.get("id") or f"index {idx}" | ||
| try: | ||
| if _looks_like_v4(task): | ||
| validate_v4_task(task) | ||
| Task(**_as_dict(task)) | ||
| except ValidationError as e: | ||
| errors.append(f"{label}: {e}") | ||
| except Exception as e: | ||
| errors.append(f"{label}: {e}") | ||
|
|
||
| if errors: | ||
| hud_console.error(f"Found {len(errors)} invalid task(s).") | ||
| for err in errors: | ||
| hud_console.error(f"- {err}") | ||
| raise typer.Exit(1) | ||
|
|
||
| hud_console.success(f"Validated {len(raw_tasks)} task(s).") | ||
|
|
||
|
|
||
| def _as_dict(task: Any) -> dict[str, Any]: | ||
| if isinstance(task, dict): | ||
| return task | ||
| try: | ||
| return dict(task) | ||
| except Exception: | ||
| return {} | ||
|
|
||
|
|
||
| def _looks_like_v4(task: dict[str, Any]) -> bool: | ||
| return any( | ||
| key in task | ||
| for key in ("prompt", "mcp_config", "evaluate_tool", "setup_tool", "integration_test_tool") | ||
| ) | ||
|
|
||
|
|
||
| def _load_raw_tasks(source: str) -> tuple[list[dict[str, Any]], list[str]]: | ||
| path = Path(source) | ||
| if path.exists() and path.suffix.lower() in {".json", ".jsonl"}: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Case sensitivity mismatch between validation and loadingLow Severity The Additional Locations (1) |
||
| return _load_raw_from_file(path) | ||
| return cast("list[dict[str, Any]]", load_tasks(source, raw=True)), [] | ||
|
|
||
|
|
||
| def _load_raw_from_file(path: Path) -> tuple[list[dict[str, Any]], list[str]]: | ||
| errors: list[str] = [] | ||
| items: list[dict[str, Any]] = [] | ||
|
|
||
| if path.suffix.lower() == ".jsonl": | ||
| with open(path, encoding="utf-8") as f: | ||
| for line_no, line in enumerate(f, start=1): | ||
| line = line.strip() | ||
| if not line: | ||
| continue | ||
| try: | ||
| value = json.loads(line) | ||
| except json.JSONDecodeError as e: | ||
| errors.append(f"line {line_no}: invalid JSON ({e.msg})") | ||
| continue | ||
| if isinstance(value, dict): | ||
| items.append(value) | ||
| continue | ||
| if isinstance(value, list): | ||
| for idx, entry in enumerate(value): | ||
| if isinstance(entry, dict): | ||
| items.append(entry) | ||
| else: | ||
| entry_type = type(entry).__name__ | ||
| errors.append( | ||
| f"line {line_no} item {idx}: expected object, got {entry_type}" | ||
| ) | ||
| continue | ||
| errors.append( | ||
| f"line {line_no}: expected object or list, got {type(value).__name__}" | ||
| ) | ||
| return items, errors | ||
|
|
||
| with open(path, encoding="utf-8") as f: | ||
| value = json.load(f) | ||
|
|
||
| if isinstance(value, dict): | ||
| return [value], errors | ||
| if isinstance(value, list): | ||
| for idx, entry in enumerate(value): | ||
| if isinstance(entry, dict): | ||
| items.append(entry) | ||
| else: | ||
| errors.append(f"index {idx}: expected object, got {type(entry).__name__}") | ||
| return items, errors | ||
|
|
||
| raise ValueError(f"JSON file must contain an object or array, got {type(value).__name__}") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import importlib.util | ||
| import json | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
| import typer | ||
|
|
||
|
|
||
| def _load_validate_command(): | ||
| module_path = Path(__file__).resolve().parents[1] / "cli" / "validate.py" | ||
| spec = importlib.util.spec_from_file_location("hud.cli.validate", module_path) | ||
| module = importlib.util.module_from_spec(spec) # type: ignore[arg-type] | ||
| assert spec and spec.loader | ||
| spec.loader.exec_module(module) | ||
| return module.validate_command | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test uses unnecessarily complex importlib module loadingMedium Severity The |
||
|
|
||
|
|
||
| def _write_tasks(path: Path, tasks: list[dict]) -> str: | ||
| path.write_text(json.dumps(tasks), encoding="utf-8") | ||
| return str(path) | ||
|
|
||
|
|
||
| def test_validate_command_valid(tmp_path: Path) -> None: | ||
| validate_command = _load_validate_command() | ||
| tasks = [ | ||
| { | ||
| "prompt": "Say hello", | ||
| "mcp_config": {"local": {"command": "echo", "args": ["hi"]}}, | ||
| "evaluate_tool": {"name": "done", "arguments": {}}, | ||
| } | ||
| ] | ||
| path = _write_tasks(tmp_path / "tasks.json", tasks) | ||
| validate_command(path) | ||
|
|
||
|
|
||
| def test_validate_command_invalid(tmp_path: Path) -> None: | ||
| validate_command = _load_validate_command() | ||
| tasks = [{"mcp_config": {"local": {"command": "echo", "args": ["hi"]}}}] | ||
| path = _write_tasks(tmp_path / "tasks.json", tasks) | ||
| with pytest.raises(typer.Exit): | ||
| validate_command(path) | ||
|
|
||
|
|
||
| def test_validate_command_flags_non_dict_entries(tmp_path: Path) -> None: | ||
| validate_command = _load_validate_command() | ||
| tasks = [ | ||
| { | ||
| "prompt": "ok", | ||
| "mcp_config": {"local": {"command": "echo", "args": ["hi"]}}, | ||
| "evaluate_tool": {"name": "done", "arguments": {}}, | ||
| }, | ||
| "not a task", | ||
| ] | ||
| path = _write_tasks(tmp_path / "tasks.json", tasks) | ||
| with pytest.raises(typer.Exit): | ||
| validate_command(path) | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||


Uh oh!
There was an error while loading. Please reload this page.