hud-evals · lorenss-m · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/docs/advanced/harbor-convert.mdx b/docs/advanced/harbor-convert.mdx
@@ -15,7 +15,7 @@ git clone https://github.com/laude-institute/terminal-bench-2.git
 # 2. Convert to HUD format
 hud convert ./terminal-bench-2/ --output ./tb2-hud
 
-# 3. Deploy all environments
+# 3. Deploy all environments (~3 min per environment, leave it running)
 hud deploy ./tb2-hud --all
 
 # 4. Run evaluation
@@ -24,6 +24,11 @@ hud eval ./tb2-hud/taskset.json
 
 That's it. The converter handles Dockerfile adaptation, build context, test scripts, and reward parsing automatically.
 
+<Tip>
+Each environment takes roughly 3 minutes to build and deploy. For datasets with many environments,
+`hud deploy --all` runs them sequentially -- just leave it running and check back when it's done.
+</Tip>
+
 ## What Gets Converted
 
 A Harbor task directory:
@@ -81,9 +86,29 @@ Harbor test scripts write results to `/logs/verifier/`. The converter supports b
 - `reward.txt` -- a single float (`1.0` for pass, `0.0` for fail)
 - `reward.json` -- `{"reward": 1.0}` or just a float
 
-## Running Programmatically
+## Running Tasks
+
+### Option 1: Upload as a Taskset (recommended)
+
+The generated `taskset.json` can be uploaded directly to the HUD platform for managed evaluation, leaderboards, and comparison across models:
+
+1. Go to [hud.ai/evalsets](https://hud.ai/evalsets) and create a new taskset
+2. Click **Upload Tasks** and paste the contents of `taskset.json`
+3. Run evaluations from the platform UI or via `hud eval`
+
+See the [Tasksets guide](/platform/tasksets) for full details on creating and managing tasksets.
+
+### Option 2: CLI eval
+
+Run the taskset directly from the command line:
+
+```bash
+hud eval ./tb2-hud/taskset.json
+```
+
+### Option 3: Python SDK
 
-You can also run converted tasks from Python using the SDK:
+Run tasks programmatically with any agent:
 
 ```python
 import asyncio
@@ -108,7 +133,7 @@ async def main():
 asyncio.run(main())
 ```
 
-Or load the full taskset:
+Or load the full taskset as Task objects:
 
 ```python
 import json

diff --git a/hud/cli/__init__.py b/hud/cli/__init__.py
@@ -1026,46 +1026,153 @@ def get(
 
 @app.command()
 def convert(
-    tasks_file: str = typer.Argument(
-        ..., help="Path to tasks file (JSON/JSONL) to convert to remote MCP configuration"
+    path: str = typer.Argument(
+        ..., help="Path to source tasks/dataset directory to convert to HUD format"
+    ),
+    from_format: str = typer.Option(
+        "auto",
+        "--from",
+        "-f",
+        help="Source format (auto, harbor). Use 'auto' to detect automatically.",
+    ),
+    output: str | None = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Output directory (default: ./hud_converted)",
     ),
 ) -> None:
-    """Convert local MCP task configs to remote (mcp.hud.ai) format.
+    """Convert external benchmark formats to HUD environments + tasksets.
 
-    This mirrors the implicit conversion flow used by 'hud rl' and writes a new
-    remote_<name>.json next to the source file when needed.
+    [not dim]Converts tasks from frameworks like Harbor into HUD-compatible
+    environments (env.py + Dockerfile.hud) and v5 taskset files.
+
+    Supports pluggable formats. Currently: harbor.
+
+    Examples:
+        hud convert ./algotune/                  # Auto-detect, convert dataset
+        hud convert ./my-task/ --from harbor      # Explicit format
+        hud convert ./dataset/ --output ./out     # Custom output directory[/not dim]
     """
     from pathlib import Path
 
+    from .convert import detect_format, get_converter, list_formats, write_result
+
     hud_console = HUDConsole()
+    source_path = Path(path).resolve()
 
-    try:
-        from .flows.tasks import convert_tasks_to_remote
+    if not source_path.exists():
+        hud_console.error(f"Path does not exist: {path}")
+        raise typer.Exit(1)
 
-        result_path = convert_tasks_to_remote(tasks_file)
+    # Resolve converter
+    if from_format == "auto":
+        converter = detect_format(source_path)
+        if converter is None:
+            # Auto-detect failed — prompt user to pick a format
+            available = list_formats()
+            if not available:
+                hud_console.error("No converters registered.")
+                raise typer.Exit(1)
+
+            if len(available) == 1:
+                # Only one format exists, just use it
+                converter = get_converter(available[0][0])
+                if converter:
+                    hud_console.info(f"Using format: {converter.name}")
+            else:
+                import questionary
+
+                choices = [
+                    questionary.Choice(title=f"{name} — {desc}", value=name)
+                    for name, desc in available
+                ]
+                picked = questionary.select(
+                    "Could not auto-detect format. Which format is this?",
+                    choices=choices,
+                ).ask()
+                if not picked:
+                    raise typer.Exit(1)
+                converter = get_converter(picked)
 
-        # If nothing changed, inform the user
-        try:
-            if Path(result_path).resolve() == Path(tasks_file).resolve():
-                hud_console.success(
-                    "Tasks already reference remote MCP URLs. No conversion needed."
-                )
-                hud_console.hint("You can run them directly with: hud eval <tasks_file> --full")
-                return
-        except Exception as e:
-            # Best effort; continue with success message
-            hud_console.debug(f"Path comparison failed, continuing: {e}")
-
-        hud_console.success(f"Converted tasks written to: {result_path}")
-        hud_console.hint(
-            "You can now run remote flows: hud rl <converted_file> or hud eval <converted_file>"
-        )
-    except typer.Exit:
-        raise
+            if converter is None:
+                hud_console.error("No converter selected.")
+                raise typer.Exit(1)
+        else:
+            hud_console.info(f"Detected format: {converter.name}")
+    else:
+        converter = get_converter(from_format)
+        if converter is None:
+            hud_console.error(f"Unknown format: {from_format}")
+            available = list_formats()
+            if available:
+                hud_console.info("Available formats:")
+                for name, desc in available:
+                    hud_console.info(f"  {name}: {desc}")
+            raise typer.Exit(1)
+
+    # Run conversion
+    try:
+        result = converter.convert(source_path)
+    except ValueError as e:
+        hud_console.error(str(e))
+        raise typer.Exit(1) from e
+    except Exception as e:
+        hud_console.error(f"Conversion failed: {e}")
+        raise typer.Exit(1) from e
+
+    # Write output
+    output_dir = Path(output) if output else Path("./hud_converted")
+    try:
+        taskset_path = write_result(result, output_dir.resolve())
     except Exception as e:
-        hud_console.error(f"Failed to convert tasks: {e}")
+        hud_console.error(f"Failed to write output: {e}")
         raise typer.Exit(1) from e
 
+    # Display results
+    hud_console.header("Convert Complete")
+    hud_console.info("")
+
+    total_tasks = len(result.taskset)
+    total_envs = len(result.environments)
+    hud_console.success(f"Converted {total_tasks} task(s) into {total_envs} environment(s).")
+    hud_console.info("")
+
+    # Show each environment
+    hud_console.section_title("Environments")
+    for env_gen in result.environments:
+        task_count = len(env_gen.task_dirs)
+        hud_console.status_item(env_gen.name, f"{task_count} tasks")
+    hud_console.info("")
+
+    # Show output paths
+    hud_console.section_title("Output")
+    hud_console.status_item("Directory", str(output_dir.resolve()))
+    hud_console.status_item("Taskset", str(taskset_path))
+    hud_console.info("")
+
+    # Show next steps with numbered commands
+    hud_console.section_title("Next Steps")
+    hud_console.info("")
+
+    hud_console.info("1. Deploy environment(s):")
+    if total_envs > 1:
+        hud_console.command_example(
+            f"hud deploy {output_dir.resolve()} --all",
+            f"Deploy all {total_envs} environments",
+        )
+    else:
+        first_env = result.environments[0].name if result.environments else "<env>"
+        hud_console.command_example(
+            f"hud deploy {output_dir.resolve() / first_env}",
+            "Build & deploy to HUD platform",
+        )
+    hud_console.info("")
+
+    hud_console.info("2. Run evaluation:")
+    hud_console.command_example(f"hud eval {taskset_path}", "Run agent against tasks")
+    hud_console.info("")
+
 
 @app.command()
 def cancel(