configure stdout for pytest debugging #11
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Python CI | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| on: | |
| push: | |
| branches: [main] | |
| paths-ignore: | |
| - "docs/**" | |
| - "*.md" | |
| pull_request: | |
| branches: [main] | |
| paths-ignore: | |
| - "docs/**" | |
| - "*.md" | |
| workflow_dispatch: | |
| jobs: | |
| lint-and-type-check: | |
| name: Lint & Type Check | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for all tags and branches | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| with: | |
| enable-cache: true | |
| - name: Install the project | |
| run: uv sync --locked --all-extras --dev | |
| - name: Install tau2 for testing | |
| run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main | |
| - name: Lint with flake8 | |
| run: uv run flake8 eval_protocol tests examples scripts --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics | |
| - name: Type check with mypy | |
| run: uv run mypy eval_protocol | |
| test-core: | |
| name: Core Tests (Python ${{ matrix.python-version }}) | |
| runs-on: ubuntu-latest | |
| needs: lint-and-type-check | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for all tags and branches | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| with: | |
| enable-cache: true | |
| - name: Install the project | |
| run: uv sync --locked --all-extras --dev | |
| - name: Install tau2 for testing | |
| run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main | |
| - name: Run Core Tests with pytest-xdist | |
| env: | |
| E2B_API_KEY: ${{ secrets.E2B_API_KEY }} | |
| FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} | |
| FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} | |
| PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning" | |
| run: | | |
| # Run most tests in parallel, but explicitly ignore tests that manage their own servers | |
| uv run pytest \ | |
| -n auto \ | |
| --ignore=tests/test_batch_evaluation.py \ | |
| --cov=eval_protocol --cov-append --cov-report=xml --cov-report=term-missing -v --durations=10 | |
| - name: Store coverage file | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-core-${{ matrix.python-version }} | |
| path: coverage.xml | |
| retention-days: 1 | |
| test-batch-evaluation: | |
| name: Batch Evaluation Tests | |
| runs-on: ubuntu-latest | |
| needs: lint-and-type-check | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for all tags and branches | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| with: | |
| enable-cache: true | |
| - name: Install the project | |
| run: uv sync --locked --all-extras --dev | |
| - name: Install tau2 for testing | |
| run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main | |
| - name: Run Batch Evaluation Tests | |
| env: | |
| E2B_API_KEY: ${{ secrets.E2B_API_KEY }} | |
| FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} | |
| FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} | |
| PYTHONWARNINGS: "ignore::DeprecationWarning,ignore::RuntimeWarning" | |
| run: | | |
| # Run only this specific test file, WITHOUT xdist | |
| uv run pytest tests/test_batch_evaluation.py --cov=eval_protocol --cov-append --cov-report=xml -v --durations=10 | |
| - name: Store coverage file | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-batch-eval | |
| path: coverage.xml | |
| retention-days: 1 | |
| test-mcp-e2e: | |
| name: MCP End-to-End Tests | |
| runs-on: ubuntu-latest | |
| needs: lint-and-type-check | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for all tags and branches | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| with: | |
| enable-cache: true | |
| - name: Install the project | |
| run: uv sync --locked --all-extras --dev | |
| - name: Install tau2 for testing | |
| run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main | |
| - name: Store coverage file | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-mcp-e2e | |
| path: coverage.xml | |
| retention-days: 1 | |
| upload-coverage: | |
| name: Upload Coverage | |
| runs-on: ubuntu-latest | |
| needs: [test-core, test-batch-evaluation, test-mcp-e2e] | |
| steps: | |
| - name: Download all coverage artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: coverage-artifacts | |
| - name: Upload coverage to Codecov | |
| uses: codecov/codecov-action@v3 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| directory: ./coverage-artifacts/ | |
| fail_ci_if_error: false | |
| verbose: true |