Skip to content

Commit b57e022

Browse files
committed
cleaning up unused code
1 parent df2d034 commit b57e022

File tree

1 file changed

+0
-58
lines changed

1 file changed

+0
-58
lines changed

tests/pytest/test_tau_bench_airline.py

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -60,26 +60,6 @@ def tau_bench_airline_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Eval
6060

6161
return rows
6262

63-
64-
def save_single_trajectory(trajectory_record: Dict, row_id: str, output_dir: str = "trajectory_outputs"):
65-
"""Save a single trajectory record to file."""
66-
output_path = Path(output_dir)
67-
output_path.mkdir(exist_ok=True)
68-
69-
# Sanitize model_id for filename (replace slashes with underscores)
70-
safe_model_id = trajectory_record["model_id"].replace("/", "_").replace("\\", "_")
71-
72-
# Use row_id if provided, otherwise fall back to scenario_id
73-
filename = f"{safe_model_id}_{row_id}_trajectory.json"
74-
filepath = output_path / filename
75-
76-
with open(filepath, "w") as f:
77-
json.dump(trajectory_record, f, indent=2, default=str)
78-
79-
print(f"💾 Saved trajectory: {filepath}")
80-
return filepath
81-
82-
8363
@evaluation_test(
8464
input_dataset=["tests/pytest/data/airline_dataset.jsonl"],
8565
dataset_adapter=tau_bench_airline_to_evaluation_row,
@@ -245,44 +225,6 @@ def test_tau_bench_airline_evaluation(row: EvaluationRow) -> EvaluationRow:
245225
# If everything passed, show success
246226
reason = "\n".join(failed_reasons) if failed_reasons else "✅ All checks passed"
247227

248-
249-
# # DELETE FROM HERE
250-
# row_id = row.input_metadata.row_id
251-
252-
# # Create trajectory record similar to test_entire_airline_dataset
253-
# model_id = row.input_metadata.completion_params.model if row.input_metadata else "unknown"
254-
# trajectory_record = {
255-
# "model_id": model_id,
256-
# "row_id": row_id,
257-
# "messages": [
258-
# {"role": msg.role, "content": msg.content, "tool_calls": getattr(msg, "tool_calls", None)}
259-
# for msg in messages
260-
# ],
261-
# "evaluation": {
262-
# "score": reward,
263-
# "reason": reason,
264-
# "metrics": {
265-
# "env_reward": {"score": env_reward_info.reward, "success": env_reward_info.reward > 0, "reason": str(env_reward_info.reward_breakdown)},
266-
# "action_reward": {"score": action_reward_info.reward, "success": action_reward_info.reward > 0, "reason": str(action_reward_info.reward_breakdown)},
267-
# "nl_reward": {"score": nl_reward_info.reward, "success": nl_reward_info.reward > 0, "reason": str(nl_reward_info.reward_breakdown)},
268-
# "comm_reward": {"score": communicate_reward_info.reward, "success": communicate_reward_info.reward > 0, "reason": str(communicate_reward_info.reward_breakdown)},
269-
# },
270-
# },
271-
# "evaluation_criteria": evaluation_criteria,
272-
# "conversation_length": len(messages),
273-
# "trajectory_steps": len([msg for msg in messages if msg.role == "assistant"]), # Approximate step count
274-
# "cost_info": {
275-
# "total_cost": 0.0, # Could be extracted from usage stats if available
276-
# "total_tokens": 0, # Could be extracted from usage stats if available
277-
# "cost_source": "not_tracked",
278-
# },
279-
# "timestamp": datetime.now().isoformat(),
280-
# }
281-
282-
# # Save this individual trajectory immediately
283-
# save_single_trajectory(trajectory_record, row_id=row_id)
284-
# # DELETE UNTIL HERE
285-
286228
row.evaluation_result = EvaluateResult(
287229
score=reward,
288230
reason=reason,

0 commit comments

Comments
 (0)