ci: Add CI workflow for lint, typecheck, and CPU tests (#189)

Abdelsalam-Abbas · web-flow · commit 025fbd802f57 · 2026-03-30T14:30:23.000-07:00
## Summary
- New `ci.yml` workflow with 3 jobs on `ubuntu-latest`, no approval
gates:
  - **lint**: `ruff check` + `ruff format --check`
- **typecheck**: `ty check` across boltz-dev, protenix-dev, rf3-dev
(matrix)
- **cpu-tests**: `pytest -m 'not gpu'` across all 3 envs (412 tests,
matrix)
- Switch self-hosted GPU runners from GitHub Actions cache to NFS-backed
caching

## Context
Addresses feedback from Karson and Marcus:
- Non-GPU tests now run automatically on PRs without approval
- Formatting/linting enforced in CI matching pre-commit hooks (ruff, ty)
- GPU test approval preserved for pausing during sampleworks machine
runs

## Test plan
- [x] Workflow YAML validated
- [x] CPU tests pass on ubuntu-latest
- [x] GPU tests work with NFS cache — pixi install: 11min → 11s,
boltz/rf3: 14min → 3.5min

&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;
## Summary by CodeRabbit

* **Chores**
* Added comprehensive CI workflows with lint, type-check, and
multi-environment test runs (including manual trigger) and adjusted GPU
install caching.
  * Tightened type-checking rules for stricter diagnostics.
* Improved runtime robustness and fallback determinism for GPU extension
loading.
  * Minor formatting and argument/help string cleanups across utilities.

* **Tests**
* Made tests more deterministic by adding targeted mocking and cache
resets.
  * Improved test reliability and maintainability.
&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,96 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'src/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - 'pixi.lock'
+      - '.github/workflows/ci.yml'
+      - '.pre-commit-config.yaml'
+  pull_request:
+    branches: [main]
+    paths:
+      - 'src/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - 'pixi.lock'
+      - '.github/workflows/ci.yml'
+      - '.pre-commit-config.yaml'
+  workflow_dispatch:
+
+concurrency:
+  group: ci-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install pixi
+        uses: prefix-dev/setup-pixi@v0.8.8
+        with:
+          environments: boltz-dev
+
+      - name: Ruff lint
+        run: pixi run -e boltz-dev ruff check .
+
+      - name: Ruff format check
+        run: pixi run -e boltz-dev ruff format --check .
+
+  typecheck:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        environment: [boltz-dev, protenix-dev, rf3-dev]
+
+    name: typecheck (${{ matrix.environment }})
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install pixi
+        uses: prefix-dev/setup-pixi@v0.8.8
+        with:
+          environments: ${{ matrix.environment }}
+
+      - name: Run ty
+        run: pixi run -e ${{ matrix.environment }} ty check
+
+  cpu-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        environment: [boltz-dev, protenix-dev, rf3-dev]
+
+    name: tests (${{ matrix.environment }})
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install pixi
+        uses: prefix-dev/setup-pixi@v0.8.8
+        with:
+          environments: ${{ matrix.environment }}
+
+      - name: Run CPU tests
+        run: pixi run -e ${{ matrix.environment }} cpu-tests
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
@@ -43,6 +43,8 @@ jobs:
 
       - name: Install pixi
         uses: prefix-dev/setup-pixi@19eac09b398e3d0c747adc7921926a6d802df4da # v0.8.8
+        with:
+          cache: false  # NFS-backed cache on self-hosted runner handles this
 
       - name: Build CUDA extensions
         run: pixi run -e ${{ matrix.environment }} python3 -c "from sampleworks.core.forward_models.xray.real_space_density_deps.ops.csrc import dilate_points_cuda"
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -177,4 +177,19 @@ include = ["src/sampleworks/eval/bond_angle_and_length_outlier_eval_script.py"]
 possibly-missing-attribute = "ignore"
 
 [tool.ty.rules]
+# Pre-existing type issues across the codebase; warn instead of error
+# so ty runs in CI without blocking PRs while the team fixes them.
 unresolved-import = "ignore"
+unknown-argument = "warn"
+unresolved-attribute = "warn"
+invalid-argument-type = "warn"
+invalid-assignment = "warn"
+invalid-method-override = "warn"
+invalid-parameter-default = "warn"
+no-matching-overload = "warn"
+not-iterable = "warn"
+not-subscriptable = "warn"
+too-many-positional-arguments = "warn"
+unsupported-operator = "warn"
+unused-ignore-comment = "warn"
+unused-type-ignore-comment = "warn"
diff --git a/scripts/eval/bond_geometry_eval.py b/scripts/eval/bond_geometry_eval.py
@@ -40,7 +40,7 @@ def bond_length_violations(pose: AtomArray, tolerance: float = 0.1) -> tuple[flo
     """
     try:
         bounds = check_pose_and_get_bounds(pose)
-    except (ValueError, BadStructureError) as e:
+    except (ValueError, BadStructureError):
         return np.nan, pd.DataFrame()
 
     bond_indices = np.sort(pose.bonds.as_array()[:, :2], axis=1)
@@ -97,13 +97,12 @@ def check_pose_and_get_bounds(pose: AtomArray):
             "`biotite.structure.io.pdbx.get_structure(..., include_bonds=True)`"
         )
         raise ValueError("The structure does not have bonds.")
-    
+
     # this fetches values from RDKit, raises BadStructureError if the structure is bad
     bounds = get_distance_bounds(pose)
     return bounds
 
 
-
 def bond_angle_violations(pose: AtomArray, tolerance: float = 0.1) -> tuple[float, pd.DataFrame]:
     """
     Calculate the percentage of bonds that are outside acceptable ranges.
diff --git a/scripts/eval/run_and_process_phenix_clashscore.py b/scripts/eval/run_and_process_phenix_clashscore.py
@@ -37,9 +37,7 @@ def main(args) -> None:
         return
 
     clashscore_df = pd.concat(clashscore_metrics, ignore_index=True)
-    clashscore_df.to_csv(
-        args.grid_search_results_path / "clashscore_metrics.csv", index=False
-    )
+    clashscore_df.to_csv(args.grid_search_results_path / "clashscore_metrics.csv", index=False)
 
 
 def process_one_trial(trial: Trial) -> pd.DataFrame:
diff --git a/scripts/eval/run_and_process_tortoize.py b/scripts/eval/run_and_process_tortoize.py
@@ -8,7 +8,6 @@
 import pandas as pd
 from loguru import logger
 from pandas import DataFrame
-
 from sampleworks.eval.grid_search_eval_utils import parse_eval_args, setup_evaluation_parameters
 
 
@@ -27,9 +26,7 @@ def main(args: argparse.Namespace) -> None:
     try:
         subprocess.call("tortoize", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     except FileNotFoundError:
-        raise RuntimeError(
-            "tortoize is not available, make sure you have installed it."
-        ) from None
+        raise RuntimeError("tortoize is not available, make sure you have installed it.") from None
     # The dropped variable is a list of ProteinConfigs, not used yet in this script
     all_trials, _ = setup_evaluation_parameters(args)
 
@@ -122,13 +119,15 @@ def get_protein_level_z_scores(tortoize_json: dict[str, Any]) -> pd.DataFrame:
     out: list[dict[str, Any]] = []
     model_block = tortoize_json.get("model", {})
     for model_id, model_data in model_block.items():
-        out.append({
-            "model": str(model_id),
-            "ramachandran_z_score": model_data.get("ramachandran-z", None),
-            "ramachandran_jackknife_sd": model_data.get("ramachandran-jackknife-sd", None),
-            "torsion_z_score": model_data.get("torsion-z", None),
-            "torsion_jackknife_sd": model_data.get("torsion-jackknife-sd", None)
-        })
+        out.append(
+            {
+                "model": str(model_id),
+                "ramachandran_z_score": model_data.get("ramachandran-z", None),
+                "ramachandran_jackknife_sd": model_data.get("ramachandran-jackknife-sd", None),
+                "torsion_z_score": model_data.get("torsion-z", None),
+                "torsion_jackknife_sd": model_data.get("torsion-jackknife-sd", None),
+            }
+        )
     return pd.DataFrame(out)
 
 
diff --git a/src/sampleworks/core/forward_models/xray/real_space_density_deps/ops/csrc/__init__.py b/src/sampleworks/core/forward_models/xray/real_space_density_deps/ops/csrc/__init__.py
@@ -45,4 +45,5 @@ def _ensure_toolchain_env() -> None:
     CUDA_AVAILABLE = True
 except Exception as e:
     print(f"CUDA extension loading failed: {e}")
+    dilate_points_cuda = None
     CUDA_AVAILABLE = False
diff --git a/src/sampleworks/eval/grid_search_eval_utils.py b/src/sampleworks/eval/grid_search_eval_utils.py
@@ -11,7 +11,7 @@
 
 from loguru import logger
 from sampleworks.eval.constants import OCCUPANCY_LEVELS
-from sampleworks.eval.eval_dataclasses import Trial, TrialList, ProteinConfig
+from sampleworks.eval.eval_dataclasses import ProteinConfig, Trial, TrialList
 from sampleworks.eval.occupancy_utils import extract_protein_and_occupancy
 from sampleworks.utils.guidance_constants import StructurePredictor
 
@@ -175,22 +175,22 @@ def parse_eval_args(description: str | None = None):
         type=Path,
         required=True,
         help="Path to the top-level grid search results directory, usu. called "
-             "``grid_search_results``",
+        "``grid_search_results``",
     )
     # not technically used everywhere yet, but requiring it future-proofs.
     parser.add_argument(
         "--grid-search-inputs-path",
         type=Path,
         required=True,
         help="Path to the directory containing the grid search inputs, in particular "
-             "the protein configuration CSV file, maps, and reference structures.",
+        "the protein configuration CSV file, maps, and reference structures.",
         default=None,
     )
     parser.add_argument(
         "--protein-configs-csv",
         type=Path,
         help="Path to the CSV file containing protein configurations, like "
-             "``${HOME}/configs.csv``. Defaults to sampleworks/data/protein_configs.csv",
+        "``${HOME}/configs.csv``. Defaults to sampleworks/data/protein_configs.csv",
         default=files("sampleworks.data") / "protein_configs.csv",
     )
     parser.add_argument(
@@ -215,7 +215,7 @@ def parse_eval_args(description: str | None = None):
 
 
 def setup_evaluation_parameters(
-        args: argparse.Namespace
+    args: argparse.Namespace,
 ) -> tuple[TrialList, dict[str, ProteinConfig]]:
     grid_search_dir = Path(args.grid_search_results_path)
 
@@ -227,9 +227,7 @@ def setup_evaluation_parameters(
     logger.info(f"Proteins configured: {list(protein_configs.keys())}")
 
     # Scan for experiments (look for refined.cif files)
-    all_trials = scan_grid_search_results(
-        grid_search_dir, target_filename=args.target_filename
-    )
+    all_trials = scan_grid_search_results(grid_search_dir, target_filename=args.target_filename)
     logger.info(f"Found {len(all_trials)} experiments with refined.cif files")
 
     if all_trials:
diff --git a/src/sampleworks/utils/msa.py b/src/sampleworks/utils/msa.py
@@ -52,8 +52,8 @@ def _validate_msa_cache_contents(msa_hash: str, msa_dir: Path) -> None:
         raise FileNotFoundError(f"No A3M files found for hash {msa_hash} in {msa_dir}")
 
     # Validate that we have matching pairs
-    csv_indices = {int(f.stem.split('_')[-1]) for f in csv_files}
-    a3m_indices = {int(f.stem.split('_')[-1]) for f in a3m_files}
+    csv_indices = {int(f.stem.split("_")[-1]) for f in csv_files}
+    a3m_indices = {int(f.stem.split("_")[-1]) for f in a3m_files}
 
     if csv_indices != a3m_indices:
         raise ValueError(
@@ -67,16 +67,16 @@ def _validate_msa_cache_contents(msa_hash: str, msa_dir: Path) -> None:
         a3m_path = msa_dir / f"{msa_hash}_{idx}.a3m"
 
         # Read CSV sequences (skip header, take second column)
-        with csv_path.open('r') as f:
+        with csv_path.open("r") as f:
             csv_lines = f.readlines()
 
         if not csv_lines or csv_lines[0].strip() != "key,sequence":
             raise ValueError(f"Invalid CSV header in {csv_path}")
 
-        csv_sequences = [line.strip().split(',', 1)[1] for line in csv_lines[1:] if line.strip()]
+        csv_sequences = [line.strip().split(",", 1)[1] for line in csv_lines[1:] if line.strip()]
 
         # Read A3M sequences (every other line, skipping headers)
-        with a3m_path.open('r') as f:
+        with a3m_path.open("r") as f:
             a3m_lines = f.readlines()
 
         # A3M format: header lines start with '>', sequences on alternating lines
diff --git a/tests/eval/test_structure_utils.py b/tests/eval/test_structure_utils.py
@@ -23,7 +23,9 @@ def mock_protein_config(tmp_path: Path) -> ProteinConfig:
     return ProteinConfig(
         protein="test",
         base_map_dir=tmp_path,
-        selection=["chain A and resi 1-10", ],
+        selection=[
+            "chain A and resi 1-10",
+        ],
         resolution=2.0,
         map_pattern="{occ_str}.ccp4",
         structure_pattern="{occ_str}.cif",
@@ -256,7 +258,9 @@ def test_converts_atomarray_to_stack(self, tmp_path, basic_atom_array_multichain
         config = ProteinConfig(
             protein="test",
             base_map_dir=tmp_path,
-            selection=["chain A", ],
+            selection=[
+                "chain A",
+            ],
             resolution=2.0,
             map_pattern="{occ_str}.ccp4",
             structure_pattern="{occ_str}.cif",
@@ -272,7 +276,9 @@ def test_with_real_structure(self, resources_dir):
         config = ProteinConfig(
             protein="6b8x",
             base_map_dir=resources_dir / "6b8x",
-            selection=["chain A", ],
+            selection=[
+                "chain A",
+            ],
             resolution=1.74,
             map_pattern="{occ_str}.ccp4",
             structure_pattern="6b8x_final.pdb",
@@ -299,7 +305,9 @@ def test_handles_exceptions_gracefully(self, tmp_path):
         config = ProteinConfig(
             protein="test",
             base_map_dir=tmp_path,
-            selection=["chain Z and resi 999", ],
+            selection=[
+                "chain Z and resi 999",
+            ],
             resolution=2.0,
             map_pattern="{occ_str}.ccp4",
             structure_pattern="{occ_str}.cif",
@@ -314,7 +322,9 @@ def test_with_real_structure(self, resources_dir):
         config = ProteinConfig(
             protein="6b8x",
             base_map_dir=resources_dir / "6b8x",
-            selection=[selection_string, ],
+            selection=[
+                selection_string,
+            ],
             resolution=1.74,
             map_pattern="{occ_str}.ccp4",
             structure_pattern="6b8x_final.pdb",
diff --git a/tests/models/protenix/test_ccd_expansion.py b/tests/models/protenix/test_ccd_expansion.py
@@ -20,7 +20,11 @@ class TestExpandTildeCCDCode:
 
     def test_unique_match_expands(self):
         """~QS should expand uniquely to A1AQS."""
-        result = _expand_tilde_ccd_code("~QS")
+        fake_codes = ["A1AQS", "GLY", "ALA"]
+        _build_ccd_suffix_map.cache_clear()
+        with patch("protenix.data.ccd.get_all_ccd_code", return_value=fake_codes):
+            result = _expand_tilde_ccd_code("~QS")
+        _build_ccd_suffix_map.cache_clear()
         assert result == "A1AQS"
 
     def test_ambiguous_match_raises(self):
@@ -37,7 +41,11 @@ def test_ambiguous_match_raises(self):
 
     def test_no_match_returns_original(self):
         """When no code matches the suffix, return the truncated code."""
-        result = _expand_tilde_ccd_code("~$$")
+        fake_codes = ["GLY", "ALA"]
+        _build_ccd_suffix_map.cache_clear()
+        with patch("protenix.data.ccd.get_all_ccd_code", return_value=fake_codes):
+            result = _expand_tilde_ccd_code("~$$")
+        _build_ccd_suffix_map.cache_clear()
         assert result == "~$$"
 
 
@@ -46,7 +54,11 @@ class TestStructureToProtenixJsonCCDExpansion:
 
     def test_9bn8_ligand_expanded(self, structure_9bn8):
         """9BN8 structure with ~QS ligand should produce CCD_A1AQS in JSON."""
-        json_dict = structure_to_protenix_json(structure_9bn8)
+        _build_ccd_suffix_map.cache_clear()
+        fake_codes = ["A1AQS", "GLY", "ALA"]
+        with patch("protenix.data.ccd.get_all_ccd_code", return_value=fake_codes):
+            json_dict = structure_to_protenix_json(structure_9bn8)
+        _build_ccd_suffix_map.cache_clear()
 
         ligand_entries = [
             entry["ligand"]["ligand"] for entry in json_dict["sequences"] if "ligand" in entry
diff --git a/tests/utils/test_atom_array_utils.py b/tests/utils/test_atom_array_utils.py
diff --git a/tests/utils/test_guidance_script_arguments.py b/tests/utils/test_guidance_script_arguments.py