Skip to content

Commit a473f0c

Browse files
committed
Updating num_runs
1 parent d2992d9 commit a473f0c

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

eval_protocol/benchmarks/test_gpqa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) ->
9797
rollout_processor=GPQAStripGTRolloutProcessor(),
9898
aggregation_method="mean",
9999
passed_threshold=None,
100-
num_runs=1,
100+
num_runs=8,
101101
mode="pointwise",
102102
)
103103
def test_gpqa_pointwise(row: EvaluationRow) -> EvaluationRow:

eval_protocol/benchmarks/test_livebench_data_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def test_livebench_cta_pointwise(row: EvaluationRow) -> EvaluationRow:
456456
rollout_processor=LiveBenchGroundTruthRolloutProcessor(_TABLEJOIN_ROWS),
457457
aggregation_method="mean",
458458
passed_threshold=None,
459-
num_runs=1,
459+
num_runs=4,
460460
mode="pointwise",
461461
)
462462
def test_livebench_tablejoin_pointwise(row: EvaluationRow) -> EvaluationRow:

0 commit comments

Comments
 (0)