From b5fcba5ce0fe2cf6a4fee7272b13b95c93e62f6c Mon Sep 17 00:00:00 2001
From: shfunc <hagforall@proton.me>
Date: Mon, 9 Feb 2026 20:59:10 +0100
Subject: [PATCH 1/5] surface remote eval errors instead of silently swallowing

---
 hud/cli/eval.py     |  6 +++++-
 hud/eval/manager.py | 17 ++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/hud/cli/eval.py b/hud/cli/eval.py
index 26db25ed..65487b12 100644
--- a/hud/cli/eval.py
+++ b/hud/cli/eval.py
@@ -703,6 +703,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
             taskset=cfg.taskset,
             tasks=tasks_data,
             hud_eval_config=eval_cfg_dict,
+            strict=True,
         )
 
         if cfg.taskset and ids:
@@ -714,7 +715,7 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
             for task_obj, task_version_id in zip(tasks_to_create, ids, strict=False):
                 task_obj.id = task_version_id
 
-        await submit_rollouts(
+        trace_ids = await submit_rollouts(
             tasks=tasks,
             job_id=job_id,
             agent_type=cfg.agent_type,
@@ -724,6 +725,9 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
             use_byok=cfg.byok,
         )
 
+        if not trace_ids:
+            raise ValueError("No tasks were accepted for execution. Check errors above.")
+
         hud_console.success(f"Tasks submitted. View at: https://hud.ai/jobs/{job_id}")
         return [], tasks
 
diff --git a/hud/eval/manager.py b/hud/eval/manager.py
index 1a4635cb..37f1ba31 100644
--- a/hud/eval/manager.py
+++ b/hud/eval/manager.py
@@ -74,8 +74,14 @@ async def _send_job_enter(
     taskset: str | None = None,
     tasks: list[dict[str, Any]] | None = None,
     hud_eval_config: dict[str, Any] | None = None,
+    strict: bool = False,
 ) -> list[str] | None:
-    """Send job enter payload (async request before traces start)."""
+    """Send job enter payload (async request before traces start).
+
+    Args:
+        strict: If True, raise ValueError on failure instead of returning None.
+            Use for remote execution where job registration is required.
+    """
     import httpx
 
     from hud.eval.types import JobEnterPayload
@@ -110,7 +116,16 @@ async def _send_job_enter(
                 ids = data.get("task_version_ids")
                 if isinstance(ids, list) and all(isinstance(x, str) for x in ids):
                     return ids
+        else:
+            error_detail = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
+            if strict:
+                raise ValueError(f"Job registration failed: {error_detail}")
+            logger.warning("Job enter failed (%d): %s", resp.status_code, error_detail)
+    except ValueError:
+        raise
     except Exception as e:
+        if strict:
+            raise ValueError(f"Job registration failed: {e}") from e
         logger.warning("Failed to send job enter: %s", e)
     return None
 

From 112afffed58e4be54d2a1bab5a860c2af25a77e8 Mon Sep 17 00:00:00 2001
From: shfunc <hagforall@proton.me>
Date: Mon, 9 Feb 2026 21:07:06 +0100
Subject: [PATCH 2/5] strict fix

---
 hud/eval/manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hud/eval/manager.py b/hud/eval/manager.py
index 37f1ba31..59edeed2 100644
--- a/hud/eval/manager.py
+++ b/hud/eval/manager.py
@@ -88,7 +88,7 @@ async def _send_job_enter(
     from hud.settings import settings
 
     api_key = api_key or settings.api_key
-    if not settings.telemetry_enabled or not api_key:
+    if not strict and (not settings.telemetry_enabled or not api_key):
         return None
 
     payload = JobEnterPayload(

From bd7f86d94070bb9b1327eb484fcd8cf4ce3cb6a3 Mon Sep 17 00:00:00 2001
From: shfunc <hagforall@proton.me>
Date: Mon, 9 Feb 2026 21:15:56 +0100
Subject: [PATCH 3/5] fix

---
 hud/eval/manager.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hud/eval/manager.py b/hud/eval/manager.py
index 59edeed2..bd1422fd 100644
--- a/hud/eval/manager.py
+++ b/hud/eval/manager.py
@@ -111,11 +111,15 @@ async def _send_job_enter(
             try:
                 data = resp.json()
             except Exception:
+                if strict:
+                    raise ValueError("Job registration failed: invalid response body")
                 return None
             if isinstance(data, dict):
                 ids = data.get("task_version_ids")
                 if isinstance(ids, list) and all(isinstance(x, str) for x in ids):
                     return ids
+            if strict:
+                raise ValueError("Job registration failed: missing task_version_ids in response")
         else:
             error_detail = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
             if strict:

From 2f66d8923b5ca30c1018bc5c683936cb6e32d04e Mon Sep 17 00:00:00 2001
From: shfunc <hagforall@proton.me>
Date: Mon, 9 Feb 2026 21:24:21 +0100
Subject: [PATCH 4/5] ruff fix

---
 hud/eval/manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hud/eval/manager.py b/hud/eval/manager.py
index bd1422fd..19e874e8 100644
--- a/hud/eval/manager.py
+++ b/hud/eval/manager.py
@@ -112,7 +112,7 @@ async def _send_job_enter(
                 data = resp.json()
             except Exception:
                 if strict:
-                    raise ValueError("Job registration failed: invalid response body")
+                    raise ValueError("Job registration failed: invalid response body") from None
                 return None
             if isinstance(data, dict):
                 ids = data.get("task_version_ids")

From 9980d62ff18308f44506e4eeaa7c371ec1fe8a71 Mon Sep 17 00:00:00 2001
From: Jaideep <jdchawla29@gmail.com>
Date: Tue, 10 Feb 2026 15:27:57 -0800
Subject: [PATCH 5/5] always raise, strict branching not needed

---
 hud/cli/eval.py     |  1 -
 hud/eval/manager.py | 54 ++++++++++++++-------------------------------
 2 files changed, 17 insertions(+), 38 deletions(-)

diff --git a/hud/cli/eval.py b/hud/cli/eval.py
index 65487b12..c2c6c906 100644
--- a/hud/cli/eval.py
+++ b/hud/cli/eval.py
@@ -703,7 +703,6 @@ async def _run_evaluation(cfg: EvalConfig) -> tuple[list[Any], list[Any]]:
             taskset=cfg.taskset,
             tasks=tasks_data,
             hud_eval_config=eval_cfg_dict,
-            strict=True,
         )
 
         if cfg.taskset and ids:
diff --git a/hud/eval/manager.py b/hud/eval/manager.py
index 19e874e8..a3670db2 100644
--- a/hud/eval/manager.py
+++ b/hud/eval/manager.py
@@ -74,13 +74,11 @@ async def _send_job_enter(
     taskset: str | None = None,
     tasks: list[dict[str, Any]] | None = None,
     hud_eval_config: dict[str, Any] | None = None,
-    strict: bool = False,
 ) -> list[str] | None:
     """Send job enter payload (async request before traces start).
 
-    Args:
-        strict: If True, raise ValueError on failure instead of returning None.
-            Use for remote execution where job registration is required.
+    Returns task_version_ids on success, None if telemetry is disabled.
+    Raises on any failure (network, bad response, etc).
     """
     import httpx
 
@@ -88,7 +86,7 @@ async def _send_job_enter(
     from hud.settings import settings
 
     api_key = api_key or settings.api_key
-    if not strict and (not settings.telemetry_enabled or not api_key):
+    if not settings.telemetry_enabled or not api_key:
         return None
 
     payload = JobEnterPayload(
@@ -100,38 +98,20 @@ async def _send_job_enter(
         hud_eval_config=hud_eval_config,
     )
 
-    try:
-        async with httpx.AsyncClient(timeout=10.0) as client:
-            resp = await client.post(
-                f"{settings.hud_api_url}/trace/job/{job_id}/enter",
-                json=payload.model_dump(exclude_none=True),
-                headers={"Authorization": f"Bearer {api_key}"},
-            )
-        if resp.is_success:
-            try:
-                data = resp.json()
-            except Exception:
-                if strict:
-                    raise ValueError("Job registration failed: invalid response body") from None
-                return None
-            if isinstance(data, dict):
-                ids = data.get("task_version_ids")
-                if isinstance(ids, list) and all(isinstance(x, str) for x in ids):
-                    return ids
-            if strict:
-                raise ValueError("Job registration failed: missing task_version_ids in response")
-        else:
-            error_detail = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
-            if strict:
-                raise ValueError(f"Job registration failed: {error_detail}")
-            logger.warning("Job enter failed (%d): %s", resp.status_code, error_detail)
-    except ValueError:
-        raise
-    except Exception as e:
-        if strict:
-            raise ValueError(f"Job registration failed: {e}") from e
-        logger.warning("Failed to send job enter: %s", e)
-    return None
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        resp = await client.post(
+            f"{settings.hud_api_url}/trace/job/{job_id}/enter",
+            json=payload.model_dump(exclude_none=True),
+            headers={"Authorization": f"Bearer {api_key}"},
+        )
+
+    resp.raise_for_status()
+    data = resp.json()
+    if isinstance(data, dict):
+        ids = data.get("task_version_ids")
+        if isinstance(ids, list) and all(isinstance(x, str) for x in ids):
+            return ids
+    raise ValueError(f"Job registration failed: unexpected response: {data}")
 
 
 @asynccontextmanager