Skip to content

Commit e8d83cd

Browse files
committed
fix upload
1 parent 96bf517 commit e8d83cd

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

eval_protocol/evaluation.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def huggingface_dataset_to_jsonl(
5353
Path to the generated JSONL file
5454
"""
5555
try:
56-
from datasets import load_dataset
56+
from datasets import load_dataset # pyright: ignore[reportAttributeAccessIssue]
5757
except ImportError:
5858
raise ImportError(
5959
"The 'datasets' package is required to use this function. "
@@ -197,9 +197,18 @@ def __init__(
197197
# If ts_mode_config is active, it takes precedence for code definition.
198198
# The multi_metrics flag passed to __init__ is for folder-based loading if ts_mode_config is not used.
199199

200+
def _should_include_file(self, filename: str) -> bool:
201+
"""Check if a file should be included in the evaluator upload."""
202+
return (
203+
filename.endswith(".py")
204+
or filename.endswith(".txt")
205+
or filename.endswith(".toml")
206+
or os.path.basename(filename) == "Dockerfile"
207+
)
208+
200209
def _load_python_files_from_folder(self, folder_path: str) -> Dict[str, str]:
201210
"""
202-
Recursively loads all Python files from a given folder (excluding common ignored dirs).
211+
Recursively loads Python, text, and TOML files from a given folder (excluding common ignored dirs).
203212
204213
Args:
205214
folder_path: Absolute path to the folder.
@@ -223,15 +232,15 @@ def _load_python_files_from_folder(self, folder_path: str) -> Dict[str, str]:
223232
# prune ignored directories
224233
dirnames[:] = [d for d in dirnames if d not in ignored_dirs and not d.startswith(".")]
225234
for name in filenames:
226-
if not name.endswith(".py"):
235+
if not self._should_include_file(name):
227236
continue
228237
abs_path = Path(dirpath) / name
229238
rel_path = str(abs_path.relative_to(base_path))
230239
with open(abs_path, "r", encoding="utf-8") as f:
231240
content = f.read()
232241
files[rel_path] = content
233242
if not files:
234-
raise ValueError(f"No Python files found in {folder_path}")
243+
raise ValueError(f"No Python, text, or TOML files found in {folder_path}")
235244
return files
236245

237246
def load_metric_folder(self, metric_name, folder_path):
@@ -300,7 +309,7 @@ def load_metric_folder(self, metric_name, folder_path):
300309
for filename, content in files.items():
301310
self.code_files[f"{metric_name}/{filename}"] = content
302311

303-
logger.info(f"Loaded {len(files)} Python files for metric '{metric_name}' from {folder_path}")
312+
logger.info(f"Loaded {len(files)} files for metric '{metric_name}' from {folder_path}")
304313
return files
305314

306315
def load_multi_metrics_folder(self, folder_path):
@@ -317,7 +326,7 @@ def load_multi_metrics_folder(self, folder_path):
317326
files = self._load_python_files_from_folder(folder_path)
318327

319328
self.code_files = files
320-
logger.info(f"Loaded {len(files)} Python files from {folder_path} for multi-metrics evaluation")
329+
logger.info(f"Loaded {len(files)} files from {folder_path} for multi-metrics evaluation")
321330
return files
322331

323332
def load_samples_from_jsonl(self, sample_file, max_samples=5):
@@ -679,11 +688,12 @@ def evaluate(messages, ground_truth: Optional[Union[str, List[Dict[str, Any]]]]
679688
elif self.multi_metrics:
680689
file_contents = {}
681690
for filename, content in self.code_files.items():
682-
if not filename.endswith(".py"):
683-
continue
684-
file_contents[filename] = self._update_evaluate_signature(content)
691+
if filename.endswith(".py"):
692+
file_contents[filename] = self._update_evaluate_signature(content)
693+
elif self._should_include_file(filename) and not filename.endswith(".py"):
694+
file_contents[filename] = content
685695
if not file_contents:
686-
raise ValueError("No Python files found for multi-metrics mode.")
696+
raise ValueError("No files found for multi-metrics mode.")
687697
# Determine entry file from entry_point if provided; otherwise detect
688698
entry_file = None
689699
if self.entry_point and "::" in self.entry_point:
@@ -737,14 +747,15 @@ def evaluate(messages, ground_truth: Optional[Union[str, List[Dict[str, Any]]]]
737747
file_contents = {}
738748
# Include all discovered files for this metric folder, preserving filenames
739749
for filename, content in self.code_files.items():
740-
if filename.startswith(f"{metric_name}/") and filename.endswith(".py"):
750+
if filename.startswith(f"{metric_name}/"):
741751
# Use the file name within the metric folder for clarity
742752
short_name = filename.split(f"{metric_name}/", 1)[1]
743-
file_contents[short_name] = self._update_evaluate_signature(content)
753+
if filename.endswith(".py"):
754+
file_contents[short_name] = self._update_evaluate_signature(content)
755+
elif self._should_include_file(filename) and not filename.endswith(".py"):
756+
file_contents[short_name] = content
744757
if not file_contents:
745-
logger.warning(
746-
f"No Python files prepared for metric '{metric_name}', skipping this metric for criteria."
747-
)
758+
logger.warning(f"No files prepared for metric '{metric_name}', skipping this metric for criteria.")
748759
continue
749760
# Determine entry file within this metric's files using entry_point if present
750761
entry_file = None

0 commit comments

Comments
 (0)