Skip to content

Commit c09755b

Browse files
authored
Support setting custom eval result output dir (#238)
* support only persisting jsonl to local but no upload * add * add * update * output result to a custom dir
1 parent df651a7 commit c09755b

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

eval_protocol/pytest/handle_persist_flow.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name: str):
1717
try:
1818
# Default is to save and upload experiment JSONL files, unless explicitly disabled
19-
should_save_and_upload = os.getenv("EP_NO_UPLOAD") != "1"
19+
custom_output_dir = os.getenv("EP_OUTPUT_DIR")
20+
should_save = os.getenv("EP_NO_UPLOAD") != "1" or custom_output_dir is not None
2021

21-
if should_save_and_upload:
22+
if should_save:
2223
current_run_rows = [item for sublist in all_results for item in sublist]
2324
if current_run_rows:
2425
experiments: dict[str, list[EvaluationRow]] = defaultdict(list)
@@ -27,6 +28,8 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
2728
experiments[row.execution_metadata.experiment_id].append(row)
2829

2930
eval_protocol_dir = find_eval_protocol_dir()
31+
if custom_output_dir:
32+
eval_protocol_dir = custom_output_dir
3033
exp_dir = pathlib.Path(eval_protocol_dir) / "experiment_results"
3134
exp_dir.mkdir(parents=True, exist_ok=True)
3235

@@ -81,6 +84,10 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
8184
json.dump(row_data, f, ensure_ascii=False)
8285
f.write("\n")
8386

87+
should_upload = os.getenv("EP_NO_UPLOAD") != "1"
88+
if not should_upload:
89+
continue
90+
8491
def get_auth_value(key: str) -> str | None:
8592
"""Get auth value from config file or environment."""
8693
try:

eval_protocol/pytest/plugin.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ def pytest_addoption(parser) -> None:
133133
default=None,
134134
help=("If set, use this base URL for remote rollout processing. Example: http://localhost:8000"),
135135
)
136+
group.addoption(
137+
"--ep-output-dir",
138+
default=None,
139+
help=("If set, save evaluation results to this directory in jsonl format."),
140+
)
136141

137142

138143
def _normalize_max_rows(val: Optional[str]) -> Optional[str]:
@@ -258,6 +263,10 @@ def pytest_configure(config) -> None:
258263
if threshold_env is not None:
259264
os.environ["EP_PASSED_THRESHOLD"] = threshold_env
260265

266+
if config.getoption("--ep-output-dir"):
267+
# set this to save eval results to the target dir in jsonl format
268+
os.environ["EP_OUTPUT_DIR"] = config.getoption("--ep-output-dir")
269+
261270
if config.getoption("--ep-no-upload"):
262271
os.environ["EP_NO_UPLOAD"] = "1"
263272

0 commit comments

Comments
 (0)