Skip to content

Commit 1002941

Browse files
authored
Add SCORE_INVALID status to Status model and update related components (#209)
* Add SCORE_INVALID status to Status model and update related components - Introduced SCORE_INVALID status code in the Status model. - Added score_invalid method to create a status indicating an invalid score. - Updated evaluation postprocessing to set status when score is invalid. - Enhanced StatusIndicator component to display SCORE_INVALID status. - Updated TypeScript types to include SCORE_INVALID in status codes. * vite build * reset
1 parent cbb505c commit 1002941

File tree

7 files changed

+43
-20
lines changed

7 files changed

+43
-20
lines changed

eval_protocol/models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class Code(int, Enum):
114114
# Custom codes for EP (using higher numbers to avoid conflicts)
115115
FINISHED = 100
116116
RUNNING = 101
117+
SCORE_INVALID = 102
117118

118119
@classmethod
119120
def rollout_running(cls) -> "Status":
@@ -167,6 +168,13 @@ def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = Non
167168
"""Create a status indicating the rollout failed with an error."""
168169
return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
169170

171+
@classmethod
172+
def score_invalid(
173+
cls, message: str = "Score is invalid", details: Optional[List[Dict[str, Any]]] = None
174+
) -> "Status":
175+
"""Create a status indicating the score is invalid."""
176+
return cls(code=cls.Code.SCORE_INVALID, message=message, details=details or [])
177+
170178
def is_running(self) -> bool:
171179
"""Check if the status indicates the rollout is running."""
172180
return self.code == self.Code.RUNNING
@@ -183,6 +191,10 @@ def is_stopped(self) -> bool:
183191
"""Check if the status indicates the rollout was stopped."""
184192
return self.code == self.Code.CANCELLED
185193

194+
def is_score_invalid(self) -> bool:
195+
"""Check if the status indicates the score is invalid."""
196+
return self.code == self.Code.SCORE_INVALID
197+
186198
def get_termination_reason(self) -> Optional[TerminationReason]:
187199
"""Extract termination reason from details if present."""
188200
for detail in self.details:

eval_protocol/pytest/evaluation_test_postprocess.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import sys
88
import time
99
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
10-
from eval_protocol.models import CompletionParams, EvaluationRow, EvaluationThreshold
10+
from eval_protocol.models import CompletionParams, EvaluationRow, EvaluationThreshold, Status
1111
from eval_protocol.pytest.handle_persist_flow import handle_persist_flow
1212
from eval_protocol.pytest.types import EvaluationTestMode
1313
from eval_protocol.pytest.utils import AggregationMethod, aggregate, extract_effort_tag, sanitize_filename
@@ -80,6 +80,9 @@ def postprocess(
8080
result.evaluation_result.agg_score = agg_score
8181
if result.evaluation_result.standard_error is None:
8282
result.evaluation_result.standard_error = standard_error
83+
if result.evaluation_result.is_score_valid is False:
84+
if result.eval_metadata is not None:
85+
result.eval_metadata.status = Status.score_invalid()
8386
result.execution_metadata.experiment_duration_seconds = experiment_duration_seconds
8487
active_logger.log(result)
8588

Lines changed: 17 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-BOtcXdzP.js.map renamed to vite-app/dist/assets/index-C8woq7EO.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
66
<title>EP | Log Viewer</title>
77
<link rel="icon" href="/assets/favicon-BkAAWQga.png" />
8-
<script type="module" crossorigin src="/assets/index-BOtcXdzP.js"></script>
8+
<script type="module" crossorigin src="/assets/index-C8woq7EO.js"></script>
99
<link rel="stylesheet" crossorigin href="/assets/index-CSKGq1w7.css">
1010
</head>
1111
<body>

vite-app/src/components/StatusIndicator.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ const StatusIndicator: React.FC<StatusIndicatorProps> = ({
5858
textColor: "text-yellow-700",
5959
text: "stopped",
6060
};
61+
case "SCORE_INVALID":
62+
return {
63+
dotColor: "bg-red-500",
64+
textColor: "text-red-700",
65+
text: "score invalid",
66+
};
6167
default:
6268
return {
6369
dotColor: "bg-gray-500",

vite-app/src/types/eval-protocol.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ export const StatusCodeSchema = z
157157
"UNAUTHENTICATED",
158158
"FINISHED",
159159
"RUNNING",
160+
"SCORE_INVALID",
160161
])
161162
.describe("Common gRPC status codes as defined in google.rpc.Code");
162163

@@ -181,6 +182,7 @@ export const STATUS_CODE_MAP: Record<number, StatusCode> = {
181182
16: "UNAUTHENTICATED",
182183
100: "FINISHED",
183184
101: "RUNNING",
185+
102: "SCORE_INVALID",
184186
} as const;
185187

186188
// Helper function to get status code name from integer

0 commit comments

Comments
 (0)