Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions scripts/fallback_residual_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def main(argv: Sequence[str] | None = None) -> int:
print(f"Fallback agent decisions: {summary['fallback_agent_count']}")
print(f"Tiny router candidates: {summary['tiny_router_candidate_count']}")
print(f"Agent-required residuals: {summary['agent_required_count']}")
print(f"Expected boundaries: {summary['expected_boundary_count']}")
print(f"Unexpected fallbacks: {summary['unexpected_fallback_count']}")
print(f"Source-context gaps: {summary['source_context_gap_count']}")
return 0

Expand Down
30 changes: 30 additions & 0 deletions src/vulca/learning/fallback_residual_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
NEXT_AGENT_BOUNDARY = "keep_agent_boundary"
NEXT_SOURCE_CONTEXT = "recover_source_context"

BOUNDARY_EXPECTED = "expected_boundary"
BOUNDARY_UNEXPECTED = "unexpected_fallback"

OWNER_TINY_ROUTER = "tiny_router"
OWNER_RUNTIME_PROVIDER = "runtime_provider_recovery"
OWNER_AGENT_VISUAL = "agent_visual_ownership_planner"
OWNER_SOURCE_CONTEXT = "source_context_recovery"


def run_fallback_residual_audit(
*,
Expand Down Expand Up @@ -54,6 +62,8 @@ def run_fallback_residual_audit(
residual_cases,
"recommended_next_step",
),
"counts_by_boundary_status": _counter_by(residual_cases, "boundary_status"),
"counts_by_boundary_owner": _counter_by(residual_cases, "boundary_owner"),
"counts_by_fallback_reason": _fallback_reason_counts(residual_cases),
"residual_cases": residual_cases,
"safe_handling": {
Expand Down Expand Up @@ -81,10 +91,15 @@ def _summary(
source_context_gap_count = sum(
1 for item in residual_cases if item.get("residual_kind") == RESIDUAL_SOURCE_CONTEXT_GAP
)
expected_boundary_count = sum(
1 for item in residual_cases if item.get("boundary_status") == BOUNDARY_EXPECTED
)
return {
"decision_count": len(decisions),
"fallback_agent_count": len(residual_cases),
"agent_required_count": len(residual_cases) - tiny_router_candidate_count,
"expected_boundary_count": expected_boundary_count,
"unexpected_fallback_count": len(residual_cases) - expected_boundary_count,
"tiny_router_candidate_count": tiny_router_candidate_count,
"source_context_gap_count": source_context_gap_count,
}
Expand All @@ -103,6 +118,7 @@ def _residual_case(decision: Mapping[str, Any]) -> dict[str, Any]:
data_gap_tags=data_gap_tags,
failure_hint=failure_hint,
)
boundary_status, boundary_owner = _boundary(residual_kind)
return {
"example_id": str(decision.get("example_id") or ""),
"case_id": str(decision.get("case_id") or ""),
Expand All @@ -117,6 +133,8 @@ def _residual_case(decision: Mapping[str, Any]) -> dict[str, Any]:
"source_dependency": str(source_dependency.get("recommended_source_dependency") or ""),
"decision_basis": str(source_dependency.get("recommended_decision_basis") or ""),
"residual_kind": residual_kind,
"boundary_status": boundary_status,
"boundary_owner": boundary_owner,
"tiny_router_candidate": tiny_candidate,
"recommended_next_step": next_step,
}
Expand All @@ -139,6 +157,18 @@ def _classify_residual(
return RESIDUAL_AGENT_ACTION, NEXT_AGENT_BOUNDARY, False


def _boundary(residual_kind: str) -> tuple[str, str]:
if residual_kind == RESIDUAL_PROVIDER_RUNTIME:
return BOUNDARY_EXPECTED, OWNER_RUNTIME_PROVIDER
if residual_kind == RESIDUAL_COMPLEX_OWNERSHIP:
return BOUNDARY_EXPECTED, OWNER_AGENT_VISUAL
if residual_kind == RESIDUAL_SOURCE_CONTEXT_GAP:
return BOUNDARY_UNEXPECTED, OWNER_SOURCE_CONTEXT
if residual_kind == RESIDUAL_TINY_CONFIDENCE:
return BOUNDARY_UNEXPECTED, OWNER_TINY_ROUTER
return BOUNDARY_EXPECTED, OWNER_AGENT_VISUAL


def _fallback_reason_counts(
residual_cases: Sequence[Mapping[str, Any]],
) -> dict[str, int]:
Expand Down
22 changes: 22 additions & 0 deletions tests/test_fallback_residual_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ def test_fallback_residual_audit_classifies_remaining_agent_work(tmp_path):
"decision_count": 6,
"fallback_agent_count": 4,
"agent_required_count": 4,
"expected_boundary_count": 4,
"unexpected_fallback_count": 0,
"tiny_router_candidate_count": 0,
"source_context_gap_count": 0,
}
Expand All @@ -161,12 +163,30 @@ def test_fallback_residual_audit_classifies_remaining_agent_work(tmp_path):
"keep_agent_boundary": 2,
"route_provider_failure_to_runtime_handler": 2,
}
assert report["counts_by_boundary_status"] == {
"expected_boundary": 4,
}
assert report["counts_by_boundary_owner"] == {
"agent_visual_ownership_planner": 2,
"runtime_provider_recovery": 2,
}
assert [item["case_id"] for item in report["residual_cases"]] == [
"decompose_occlusion",
"redraw_under_split",
"layer_provider_failure",
"manual_provider_failure",
]
by_case = {item["case_id"]: item for item in report["residual_cases"]}
assert by_case["decompose_occlusion"]["boundary_owner"] == (
"agent_visual_ownership_planner"
)
assert by_case["layer_provider_failure"]["boundary_owner"] == (
"runtime_provider_recovery"
)
assert all(
item["boundary_status"] == "expected_boundary"
for item in report["residual_cases"]
)
assert report_path.exists()


Expand Down Expand Up @@ -197,5 +217,7 @@ def test_fallback_residual_audit_cli_writes_summary(tmp_path):
assert "Fallback agent decisions: 4" in result.stdout
assert "Tiny router candidates: 0" in result.stdout
assert "Agent-required residuals: 4" in result.stdout
assert "Expected boundaries: 4" in result.stdout
assert "Unexpected fallbacks: 0" in result.stdout
report = json.loads(report_path.read_text(encoding="utf-8"))
assert report["summary"]["fallback_agent_count"] == 4