OpenHands · juanmichelini · Mar 20, 2026 · Mar 18, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/benchmarks/commit0/run_infer.py b/benchmarks/commit0/run_infer.py
@@ -1,5 +1,6 @@
 import json
 import os
+import re
 import shlex
 from typing import Any, List
 
@@ -62,6 +63,23 @@
 """.strip()
 
 
+def normalize_pytest_cmd(test_cmd: str) -> str:
+    """Replace bare pytest/pytest3 with python -m pytest to avoid PATH/permission issues."""
+    if (
+        re.match(r"pytest\d?(\s|$)", test_cmd.strip())
+        and "python -m pytest" not in test_cmd
+    ):
+        test_cmd = re.sub(r"\bpytest(\d?)", r"python -m pytest\1", test_cmd, count=1)
+    return test_cmd
+
+
+def get_pythonpath_prefix(src_dir: str) -> str:
+    """Return PYTHONPATH env prefix for src-layout repos."""
+    if src_dir and src_dir.startswith("src"):
+        return "PYTHONPATH=src:$PYTHONPATH "
+    return ""
+
+
 def parse_report_summary(raw_json: str) -> dict:
     """Parse pytest-json-report summary extracted from the container.
 
@@ -440,10 +458,10 @@ def evaluate_instance(
         # Run tests
         test_cmd = instance.data["test"]["test_cmd"]
         test_dir = instance.data["test"]["test_dir"]
-        # Use python -m pytest instead of pytest command to avoid permission issues
-        if test_cmd.strip() == "pytest":
-            test_cmd = "python -m pytest"
-        full_test_cmd = f"cd {repo_path} && {test_cmd} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1"
+        test_cmd = normalize_pytest_cmd(test_cmd)
+        src_dir = instance.data.get("src_dir", "")
+        env_prefix = get_pythonpath_prefix(src_dir)
+        full_test_cmd = f"cd {repo_path} && {env_prefix}{test_cmd} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1"
         logger.info(f"Running test command: {full_test_cmd}")
         test_result = workspace.execute_command(full_test_cmd, timeout=600)
         logger.info(f"Test command exit code: {test_result.exit_code}")

diff --git a/tests/test_commit0_run_infer.py b/tests/test_commit0_run_infer.py
@@ -0,0 +1,54 @@
+"""Tests for commit0 run_infer test command helpers."""
+
+import pytest
+
+from benchmarks.commit0.run_infer import get_pythonpath_prefix, normalize_pytest_cmd
+
+
+@pytest.mark.parametrize(
+    "input_cmd, expected",
+    [
+        ("pytest", "python -m pytest"),
+        ("pytest3", "python -m pytest3"),
+        ("python -m pytest", "python -m pytest"),
+        ("mypytest", "mypytest"),
+        ("pytest-xdist", "pytest-xdist"),
+        ("pytest_runner", "pytest_runner"),
+        (
+            "pytest --assert=plain --ignore=setup.py",
+            "python -m pytest --assert=plain --ignore=setup.py",
+        ),
+    ],
+    ids=[
+        "bare_pytest",
+        "bare_pytest3",
+        "already_module_form",
+        "substring_mypytest",
+        "substring_pytest-xdist",
+        "substring_pytest_runner",
+        "real-parsel-scenario",
+    ],
+)
+def test_normalize_pytest_cmd(input_cmd, expected):
+    assert normalize_pytest_cmd(input_cmd) == expected
+
+
+@pytest.mark.parametrize(
+    "src_dir, expected",
+    [
+        ("src/cachetools", "PYTHONPATH=src:$PYTHONPATH "),
+        ("src", "PYTHONPATH=src:$PYTHONPATH "),
+        ("", ""),
+        ("lib/mypackage", ""),
+        ("tests/src/data", ""),
+    ],
+    ids=[
+        "src_layout",
+        "bare_src",
+        "empty_string",
+        "no_src_dir",
+        "src_not_at_start",
+    ],
+)
+def test_get_pythonpath_prefix(src_dir, expected):
+    assert get_pythonpath_prefix(src_dir) == expected