cleaning up unused code

xzrderek · xzrderek · commit b57e02228aed · 2025-08-03T22:22:40.000-07:00
diff --git a/tests/pytest/test_tau_bench_airline.py b/tests/pytest/test_tau_bench_airline.py
@@ -60,26 +60,6 @@ def tau_bench_airline_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Eval
     
     return rows
 
-
-def save_single_trajectory(trajectory_record: Dict, row_id: str, output_dir: str = "trajectory_outputs"):
-    """Save a single trajectory record to file."""
-    output_path = Path(output_dir)
-    output_path.mkdir(exist_ok=True)
-
-    # Sanitize model_id for filename (replace slashes with underscores)
-    safe_model_id = trajectory_record["model_id"].replace("/", "_").replace("\\", "_")
-    
-    # Use row_id if provided, otherwise fall back to scenario_id
-    filename = f"{safe_model_id}_{row_id}_trajectory.json"
-    filepath = output_path / filename
-
-    with open(filepath, "w") as f:
-        json.dump(trajectory_record, f, indent=2, default=str)
-
-    print(f"💾 Saved trajectory: {filepath}")
-    return filepath
-
-
 @evaluation_test(
     input_dataset=["tests/pytest/data/airline_dataset.jsonl"],
     dataset_adapter=tau_bench_airline_to_evaluation_row,
@@ -245,44 +225,6 @@ def test_tau_bench_airline_evaluation(row: EvaluationRow) -> EvaluationRow:
     # If everything passed, show success
     reason = "\n".join(failed_reasons) if failed_reasons else "✅ All checks passed"
 
-
-    # # DELETE FROM HERE
-    # row_id = row.input_metadata.row_id
-
-    # # Create trajectory record similar to test_entire_airline_dataset
-    # model_id = row.input_metadata.completion_params.model if row.input_metadata else "unknown"
-    # trajectory_record = {
-    #     "model_id": model_id,
-    #     "row_id": row_id,
-    #     "messages": [
-    #         {"role": msg.role, "content": msg.content, "tool_calls": getattr(msg, "tool_calls", None)}
-    #         for msg in messages
-    #     ],
-    #     "evaluation": {
-    #         "score": reward,
-    #         "reason": reason,
-    #         "metrics": {
-    #             "env_reward": {"score": env_reward_info.reward, "success": env_reward_info.reward > 0, "reason": str(env_reward_info.reward_breakdown)},
-    #             "action_reward": {"score": action_reward_info.reward, "success": action_reward_info.reward > 0, "reason": str(action_reward_info.reward_breakdown)},
-    #             "nl_reward": {"score": nl_reward_info.reward, "success": nl_reward_info.reward > 0, "reason": str(nl_reward_info.reward_breakdown)},
-    #             "comm_reward": {"score": communicate_reward_info.reward, "success": communicate_reward_info.reward > 0, "reason": str(communicate_reward_info.reward_breakdown)},
-    #         },
-    #     },
-    #     "evaluation_criteria": evaluation_criteria,
-    #     "conversation_length": len(messages),
-    #     "trajectory_steps": len([msg for msg in messages if msg.role == "assistant"]),  # Approximate step count
-    #     "cost_info": {
-    #         "total_cost": 0.0,  # Could be extracted from usage stats if available
-    #         "total_tokens": 0,  # Could be extracted from usage stats if available
-    #         "cost_source": "not_tracked",
-    #     },
-    #     "timestamp": datetime.now().isoformat(),
-    # }
-    
-    # # Save this individual trajectory immediately
-    # save_single_trajectory(trajectory_record, row_id=row_id)
-    # # DELETE UNTIL HERE
-
     row.evaluation_result = EvaluateResult(
         score=reward,
         reason=reason,