-
Notifications
You must be signed in to change notification settings - Fork 47
fix(commit0): evaluation harness fails to score cachetools and parsel due to test command issues #539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(commit0): evaluation harness fails to score cachetools and parsel due to test command issues #539
Changes from all commits
68feee6
c9ae51f
7652655
1cc53f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| import json | ||
| import os | ||
| import re | ||
| import shlex | ||
| from typing import Any, List | ||
|
|
||
|
|
@@ -62,6 +63,23 @@ | |
| """.strip() | ||
|
|
||
|
|
||
| def normalize_pytest_cmd(test_cmd: str) -> str: | ||
| """Replace bare pytest/pytest3 with python -m pytest to avoid PATH/permission issues.""" | ||
| if ( | ||
| re.match(r"pytest\d?(\s|$)", test_cmd.strip()) | ||
| and "python -m pytest" not in test_cmd | ||
| ): | ||
| test_cmd = re.sub(r"\bpytest(\d?)", r"python -m pytest\1", test_cmd, count=1) | ||
| return test_cmd | ||
|
|
||
|
|
||
| def get_pythonpath_prefix(src_dir: str) -> str: | ||
| """Return PYTHONPATH env prefix for src-layout repos.""" | ||
| if src_dir and src_dir.startswith("src"): | ||
| return "PYTHONPATH=src:$PYTHONPATH " | ||
|
Comment on lines
+76
to
+79
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Suggestion: Misleading function signature The function takes Clearer alternatives:
env_prefix = "PYTHONPATH=src:$PYTHONPATH " if src_dir and src_dir.startswith("src") else ""
def is_src_layout(src_dir: str) -> bool:
"""Check if repo uses src-layout requiring PYTHONPATH."""
return bool(src_dir and src_dir.startswith("src"))
# Then:
env_prefix = "PYTHONPATH=src:$PYTHONPATH " if is_src_layout(src_dir) else ""The current name |
||
| return "" | ||
|
|
||
|
|
||
| def parse_report_summary(raw_json: str) -> dict: | ||
| """Parse pytest-json-report summary extracted from the container. | ||
|
|
||
|
|
@@ -440,10 +458,10 @@ def evaluate_instance( | |
| # Run tests | ||
| test_cmd = instance.data["test"]["test_cmd"] | ||
| test_dir = instance.data["test"]["test_dir"] | ||
| # Use python -m pytest instead of pytest command to avoid permission issues | ||
| if test_cmd.strip() == "pytest": | ||
| test_cmd = "python -m pytest" | ||
| full_test_cmd = f"cd {repo_path} && {test_cmd} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1" | ||
| test_cmd = normalize_pytest_cmd(test_cmd) | ||
| src_dir = instance.data.get("src_dir", "") | ||
| env_prefix = get_pythonpath_prefix(src_dir) | ||
| full_test_cmd = f"cd {repo_path} && {env_prefix}{test_cmd} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1" | ||
VascoSch92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| logger.info(f"Running test command: {full_test_cmd}") | ||
| test_result = workspace.execute_command(full_test_cmd, timeout=600) | ||
| logger.info(f"Test command exit code: {test_result.exit_code}") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| """Tests for commit0 run_infer test command helpers.""" | ||
|
|
||
| import pytest | ||
|
|
||
| from benchmarks.commit0.run_infer import get_pythonpath_prefix, normalize_pytest_cmd | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "input_cmd, expected", | ||
| [ | ||
| ("pytest", "python -m pytest"), | ||
| ("pytest3", "python -m pytest3"), | ||
| ("python -m pytest", "python -m pytest"), | ||
| ("mypytest", "mypytest"), | ||
| ("pytest-xdist", "pytest-xdist"), | ||
| ("pytest_runner", "pytest_runner"), | ||
| ( | ||
| "pytest --assert=plain --ignore=setup.py", | ||
| "python -m pytest --assert=plain --ignore=setup.py", | ||
| ), | ||
| ], | ||
| ids=[ | ||
| "bare_pytest", | ||
| "bare_pytest3", | ||
| "already_module_form", | ||
| "substring_mypytest", | ||
| "substring_pytest-xdist", | ||
| "substring_pytest_runner", | ||
| "real-parsel-scenario", | ||
| ], | ||
| ) | ||
| def test_normalize_pytest_cmd(input_cmd, expected): | ||
| assert normalize_pytest_cmd(input_cmd) == expected | ||
VascoSch92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "src_dir, expected", | ||
| [ | ||
| ("src/cachetools", "PYTHONPATH=src:$PYTHONPATH "), | ||
| ("src", "PYTHONPATH=src:$PYTHONPATH "), | ||
| ("", ""), | ||
| ("lib/mypackage", ""), | ||
| ("tests/src/data", ""), | ||
| ], | ||
| ids=[ | ||
| "src_layout", | ||
| "bare_src", | ||
| "empty_string", | ||
| "no_src_dir", | ||
| "src_not_at_start", | ||
| ], | ||
| ) | ||
| def test_get_pythonpath_prefix(src_dir, expected): | ||
| assert get_pythonpath_prefix(src_dir) == expected | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 Suggestion: Logic redundancy - lacks "good taste"
You check if the stripped command is exactly
pytestorpytest3withre.match(r"pytest\d?$", ...), but then use word boundaries\bin the substitution. If you've already verified it's exactly "pytest", word boundaries are redundant.Simpler approach:
Or if you need the regex flexibility:
Current code works but mixes two different validation strategies unnecessarily.