-
Notifications
You must be signed in to change notification settings - Fork 5
feat: add RMSD metric #227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| """RMSD metric, developed starting from :class:`sampleworks.metrics.lddt.AllAtomLDDT`. | ||
|
|
||
| The metric returns a global per model RMSD and a per residue RMSD dictionary, | ||
| so it can be plugged into the same clustering code as the LDDT | ||
| metric. | ||
| """ | ||
|
|
||
| from typing import Any, cast | ||
|
|
||
| import numpy as np | ||
| from atomworks.io.transforms.atom_array import ensure_atom_array_stack | ||
| from atomworks.ml.transforms.atom_array import add_global_token_id_annotation | ||
| from biotite.structure import AtomArray, AtomArrayStack, rmsd, superimpose | ||
|
|
||
| from sampleworks.metrics.metric import Metric | ||
| from sampleworks.utils.atom_array_utils import filter_to_common_atoms | ||
|
|
||
|
|
||
| class AllAtomRMSD(Metric): | ||
| """Computes all atom RMSD from AtomArrays. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| superimpose | ||
| If True, superimpose predicted models onto the reference via Kabsch | ||
| before computing RMSD. Default False is correct for comparisons in a shared crystallographic | ||
| frame, but set True when the predicted and reference structures live in different frames. | ||
| log_rmsd_for_every_batch | ||
| If True, include per model RMSDs as ``all_atom_rmsd_<i>`` in the | ||
| output dict. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| superimpose: bool = False, | ||
| log_rmsd_for_every_batch: bool = False, | ||
| **kwargs: Any, | ||
| ): | ||
| super().__init__(**kwargs) | ||
| self.superimpose = superimpose | ||
| self.log_rmsd_for_every_batch = log_rmsd_for_every_batch | ||
|
|
||
| @property | ||
| def kwargs_to_compute_args(self) -> dict[str, Any]: | ||
| return { | ||
| "predicted_atom_array_stack": "predicted_atom_array_stack", | ||
| "ground_truth_atom_array_stack": "ground_truth_atom_array_stack", | ||
| "selection": "selection", | ||
| } | ||
|
|
||
| @property | ||
| def optional_kwargs(self) -> frozenset[str]: | ||
| return frozenset({"selection"}) | ||
|
|
||
| def compute( | ||
| self, | ||
| predicted_atom_array_stack: AtomArrayStack | AtomArray, | ||
| ground_truth_atom_array_stack: AtomArrayStack | AtomArray, | ||
| selection: str | None = None, | ||
| ) -> dict[str, Any]: | ||
| """Calculate all-atom RMSD between predicted and ground-truth structures. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| predicted_atom_array_stack | ||
| Predicted coordinates as AtomArray(Stack). | ||
| ground_truth_atom_array_stack | ||
| Ground truth coordinates as AtomArray(Stack). | ||
| selection | ||
| Optional selection string (AtomArray.mask() syntax) restricting | ||
| which residues appear in ``residue_rmsd_scores``. It does NOT | ||
| restrict the atoms used to compute the global RMSD, nor (when | ||
| ``superimpose=True``) the atoms used for Kabsch superposition - | ||
| both of those always use every atom common to the predicted and | ||
| reference stacks. | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, Any] | ||
| A dictionary with all-atom RMSDs: | ||
|
|
||
| - ``best_of_1_rmsd``: global RMSD for the first model. | ||
| - ``best_of_{N}_rmsd``: minimum global RMSD across all N models. | ||
| - ``residue_rmsd_scores``: per residue RMSDs, one list per | ||
| residue keyed by ``chain_id + res_id``. | ||
| """ | ||
| # 1. Annotate token IDs so atoms can be grouped into residues downstream. | ||
| predicted_atom_array_stack = add_global_token_id_annotation( | ||
| predicted_atom_array_stack # ty: ignore[invalid-argument-type] | ||
| ) | ||
| ground_truth_atom_array_stack = add_global_token_id_annotation( | ||
| ground_truth_atom_array_stack # ty: ignore[invalid-argument-type] | ||
| ) | ||
|
Comment on lines
+88
to
+93
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify type ignore syntax used across the codebase
rg -nP --type=py '# ty: ignore|# type: ignore' | head -30
echo "---"
# Check pyproject.toml for type checker configuration
rg -nP 'type.*check|mypy|pyright|ruff.*typing' pyproject.toml -A5Repository: diff-use/sampleworks Length of output: 4211 Add rationale to 🤖 Prompt for AI Agents |
||
|
|
||
| # 2. Restrict both stacks to atoms present in both structures, in matching order. | ||
| _pred, _gt = filter_to_common_atoms( | ||
| predicted_atom_array_stack, ground_truth_atom_array_stack | ||
| ) | ||
| pred_aa_stack = ensure_atom_array_stack(_pred) | ||
| gt_aa_stack = ensure_atom_array_stack(_gt) | ||
|
|
||
| if pred_aa_stack.array_length() == 0: | ||
| raise RuntimeError("No atoms in common between the two structures.") | ||
|
|
||
| # 3. Optional Kabsch superposition (always on every common atom, regardless of | ||
| # any `selection`). | ||
| gt_ref = gt_aa_stack[0] | ||
| if self.superimpose: | ||
| pred_aa_stack, _ = superimpose(gt_ref, pred_aa_stack) | ||
|
|
||
| tok_idx = cast(np.ndarray, gt_ref.token_id).astype(np.int64) | ||
|
|
||
| # Resolve the subset of tokens to report, if a residue selection was given. | ||
| selected_token_ids: set[int] | None = None | ||
| if selection is not None: | ||
| mask_fn = pred_aa_stack.mask | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it necessary to create this variable
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, won't this raise an attribute error if the mask function isn't there? |
||
| if mask_fn is None: | ||
| raise RuntimeError( | ||
| "pred_aa_stack does not support mask(). Load atom arrays with " | ||
| "`atomworks.io.utils.io_utils.load_any()` to access this method." | ||
| ) | ||
| mask = mask_fn(selection) | ||
| selected_arr = cast(AtomArray, pred_aa_stack[0, mask]) | ||
| if selected_arr.token_id is not None: | ||
| selected_token_ids = {int(t) for t in np.unique(selected_arr.token_id)} | ||
|
Comment on lines
+108
to
+125
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Check whether biotite's superimpose() preserves an atomworks-added .mask() method / attribute,
# and whether AtomArrayStack has a built-in `mask` anywhere in biotite usage.
rg -nP --type=py -C3 '\bsuperimpose\s*\(' -g '!**/test/**' -g '!**/tests/**'
echo '---'
# Look for how .mask is attached to atom arrays in the codebase (likely via atomworks load_any)
rg -nP --type=py -C3 '\b\.mask\s*\('
echo '---'
# Confirm where `mask` as an attribute is set on atom arrays
rg -nP --type=py -C3 'set_annotation\(\s*[\"\x27]mask[\"\x27]|\.mask\s*=\s*'Repository: diff-use/sampleworks Length of output: 4721 🌐 Web query:
💡 Result: biotite.structure.superimpose returns a tuple (fitted, transformation), where fitted is an AtomArray or AtomArrayStack (matching the type of the input mobile structure) with updated coordinates that superimpose it onto the fixed structure, and transformation is an AffineTransformation object. The fitted object is explicitly described as "A copy of the mobile structure(s), superimposed on the fixed structure(s)." Since it is a copy of the input AtomArrayStack, and AtomArrayStack objects store annotation arrays shared across all models, the annotations are preserved in the output. This is confirmed in the official documentation and source code. The AffineTransformation.apply method, used internally and exposed for reuse, also creates a copy of the input structure (via atoms.copy) and only updates the .coord attribute, preserving all annotations. Citations:
🏁 Script executed: # Find where .mask is defined or attached in atomworks code
find . -type f -name "*.py" | xargs grep -l "\.mask\s*=" | head -20
echo "---"
# Look for load_any implementation
rg -nP --type=py "def load_any|load_any" -A 10 -g '!**/test/**' -g '!**/tests/**' | head -50
echo "---"
# Search for how mask is set as annotation
rg -nP --type=py "set_annotation.*mask|mask.*annotation" | head -20Repository: diff-use/sampleworks Length of output: 3768 🏁 Script executed: # Check if atomworks is imported and how it modifies atom arrays
rg -nP --type=py "import.*atomworks|from.*atomworks" | head -20
echo "---"
# Look for AtomArrayStack extensions or monkey-patching
rg -nP --type=py "AtomArrayStack\.|atom_array\.mask" -B2 -A2 | head -60Repository: diff-use/sampleworks Length of output: 6102 🏁 Script executed: # Search for test files related to rmsd.py
find . -type f -name "*test*rmsd*" -o -name "*rmsd*test*" | head -10
echo "---"
# Look for any usage combining superimpose with mask in the codebase
rg -nP --type=py "superimpose.*mask|mask.*superimpose" -B3 -A3
echo "---"
# Check the rmsd.py file context around superimpose to understand the flow
head -n 130 src/sampleworks/metrics/rmsd.py | tail -n 30Repository: diff-use/sampleworks Length of output: 1475 🏁 Script executed: # Check if there are any tests that call rmsd with both superimpose=True and selection parameter
rg -nP --type=py "superimpose.*True.*selection|selection.*superimpose" -B2 -A2
echo "---"
# Look for RMSD metric usage in tests with both parameters
rg -nP "RMSD\(.*superimpose.*True" -B2 -A5Repository: diff-use/sampleworks Length of output: 67 🏁 Script executed: # Check test file for rmsd to see if any tests use both superimpose=True and selection
cat tests/metrics/test_rmsd_metrics.py | grep -n -A10 "superimpose.*True\|selection"Repository: diff-use/sampleworks Length of output: 1133 🏁 Script executed: # More targeted: look for test cases that instantiate RMSD with superimpose=True
rg -nP --type=py "RMSD\(" tests/metrics/test_rmsd_metrics.py -A5Repository: diff-use/sampleworks Length of output: 982 🏁 Script executed: # Check the rmsd.py file to see full context including imports and how superimpose is used
cat src/sampleworks/metrics/rmsd.py | head -n 135 | tail -n 35Repository: diff-use/sampleworks Length of output: 1688 🏁 Script executed: # Check the AllAtomRMSD class definition and its default parameters
rg -nP --type=py "class AllAtomRMSD" -A30 src/sampleworks/metrics/rmsd.py
echo "---"
# Check if there's a superimpose parameter in the RMSD class
rg -nP --type=py "superimpose" src/sampleworks/metrics/rmsd.py | head -20Repository: diff-use/sampleworks Length of output: 1617 🏁 Script executed: # Look for any test that explicitly sets superimpose=True
rg -nP --type=py "superimpose\s*=\s*True|superimpose=True" tests/metrics/test_rmsd_metrics.py
echo "---"
# Check the test fixtures to understand test structure
head -n 100 tests/metrics/test_rmsd_metrics.pyRepository: diff-use/sampleworks Length of output: 2623 🏁 Script executed: # Search for any instantiations of AllAtomRMSD with superimpose=True in the entire codebase
rg -nP --type=py "AllAtomRMSD\s*\(\s*superimpose\s*=\s*True|AllAtomRMSD\(\s*True"
echo "---"
# Also check if there's a way to set superimpose through config or kwargs
rg -nP --type=py "AllAtomRMSD.*superimpose" -B2 -A2Repository: diff-use/sampleworks Length of output: 67 🏁 Script executed: # Let's check conftest.py to see how test fixtures are created
head -n 80 tests/metrics/conftest.pyRepository: diff-use/sampleworks Length of output: 828 🏁 Script executed: # Let's check if there are any tests that specifically test superimpose=True with selection
rg -nP --type=py "superimpose\s*=\s*True" tests/ -A10 -B5
echo "---"
# Double-check: any usage of AllAtomRMSD with both superimpose and selection anywhere
rg -nP --type=py "AllAtomRMSD.*superimpose.*True.*selection|selection.*AllAtomRMSD.*superimpose.*True"Repository: diff-use/sampleworks Length of output: 67 🏁 Script executed: # Let's understand: is .mask a method or property? Search for how it's defined/used
rg -nP --type=py "\.mask\s*\(" src/sampleworks -B1 -A1 | head -40Repository: diff-use/sampleworks Length of output: 1564 🏁 Script executed: # Search for any actual usage in the codebase of superimpose followed by mask calls
rg -nP --type=py "superimpose\(" -A20 | grep -A15 "\.mask\("
echo "---"
# Check if there are integration tests or real usage patterns
find tests -name "*.py" -exec grep -l "superimpose.*True" {} \;
echo "---"
# Search for any documentation or comments about this interaction
rg -nP --type=py "superimpose.*mask|mask.*superimpose" -B5 -A5Repository: diff-use/sampleworks Length of output: 112 🏁 Script executed: # Let's check how atomworks loads structures to understand what .mask() is
rg -nP --type=py "class.*AtomArray|def.*mask" -A5 | head -60
echo "---"
# Critical: check if .mask is being set as an attribute on the object
rg -nP --type=py "\.mask\s*=" -B3 -A3Repository: diff-use/sampleworks Length of output: 4634 🏁 Script executed: # Look at the actual test file again to see if there's any test with superimpose=True
cat tests/metrics/test_rmsd_metrics.pyRepository: diff-use/sampleworks Length of output: 2619 🏁 Script executed: # Search for any AllAtomRMSD instantiation with superimpose parameter
rg -nP "AllAtomRMSD\(" tests/ -A3 -B1Repository: diff-use/sampleworks Length of output: 1352 🏁 Script executed: # Let's verify: does biotite.superimpose actually preserve custom methods/attributes?
# Check what biotite's copy mechanism does
rg -nP --type=py "def copy\|\.copy\(\)" src/sampleworks -B2 -A5 | head -40
echo "---"
# Check imports in rmsd.py to see what's available from biotite
head -n 20 src/sampleworks/metrics/rmsd.pyRepository: diff-use/sampleworks Length of output: 775 🏁 Script executed: # Final check: verify whether .mask is truly callable or if it's just checked for None
rg -nP "mask_fn\s*=" src/sampleworks/metrics/rmsd.py -B2 -A3Repository: diff-use/sampleworks Length of output: 365 🏁 Script executed: # Critical: check if there are any existing bug reports or comments about this interaction
rg -nP "superimpose.*selection|selection.*superimpose|mask.*after.*superimpose" -B3 -A3
echo "---"
# Check if .mask is set/assigned somewhere on AtomArrayStack
rg -nP "pred_aa_stack.*mask|\.mask\s*=" src/sampleworks/metrics/rmsd.py -B2 -A2Repository: diff-use/sampleworks Length of output: 1008 🏁 Script executed: # Search for any comments or issues related to this problem
rg -nP "FIXME|TODO|BUG|ISSUE" src/sampleworks/metrics/rmsd.py -B2 -A2 | head -30Repository: diff-use/sampleworks Length of output: 46 Potential runtime failure when combining
Resolve 🛠️ Suggested restructuring- # 3. Optional Kabsch superposition (always on every common atom, regardless of
- # any `selection`).
- gt_ref = gt_aa_stack[0]
- if self.superimpose:
- pred_aa_stack, _ = superimpose(gt_ref, pred_aa_stack)
-
- tok_idx = cast(np.ndarray, gt_ref.token_id).astype(np.int64)
-
- # Resolve the subset of tokens to report, if a residue selection was given.
- selected_token_ids: set[int] | None = None
- if selection is not None:
- mask_fn = pred_aa_stack.mask
+ gt_ref = gt_aa_stack[0]
+ tok_idx = cast(np.ndarray, gt_ref.token_id).astype(np.int64)
+
+ # Resolve the subset of tokens to report BEFORE superimposing, to avoid
+ # depending on whether custom atomworks methods survive biotite's copy.
+ selected_token_ids: set[int] | None = None
+ if selection is not None:
+ mask_fn = pred_aa_stack.mask
if mask_fn is None:
raise RuntimeError(
"pred_aa_stack does not support mask(). Load atom arrays with "
"`atomworks.io.utils.io_utils.load_any()` to access this method."
)
mask = mask_fn(selection)
selected_arr = cast(AtomArray, pred_aa_stack[0, mask])
if selected_arr.token_id is not None:
selected_token_ids = {int(t) for t in np.unique(selected_arr.token_id)}
+
+ # 3. Optional Kabsch superposition (always on every common atom, regardless of
+ # any `selection`).
+ if self.superimpose:
+ pred_aa_stack, _ = superimpose(gt_ref, pred_aa_stack)🤖 Prompt for AI Agents
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @k-chrispens definitely look into this and harden it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| global_rmsd = np.asarray(rmsd(gt_ref, pred_aa_stack)) | ||
|
|
||
| # 4. Build residue-level output: for each token, call biotite.rmsd on the | ||
| # atoms that share that token, keyed by "{chain_id}{res_id}". | ||
| chain_id = cast(np.ndarray, gt_ref.chain_id) | ||
| res_id = cast(np.ndarray, gt_ref.res_id) | ||
| residue_keys = np.char.add(chain_id, res_id.astype(str)) | ||
| token_to_residue_id_map = {int(k): str(v) for k, v in zip(tok_idx, residue_keys)} | ||
|
|
||
| residue_rmsd_scores: dict[str, list[float]] = {} | ||
| for tk in np.unique(tok_idx): | ||
| tk_int = int(tk) | ||
| if selected_token_ids is not None and tk_int not in selected_token_ids: | ||
| continue | ||
| atom_mask = tok_idx == tk | ||
| residue_rmsd = np.asarray(rmsd(gt_ref[atom_mask], pred_aa_stack[:, atom_mask])) | ||
| residue_rmsd_scores[token_to_residue_id_map[tk_int]] = residue_rmsd.tolist() | ||
|
|
||
| result: dict[str, Any] = { | ||
| "best_of_1_rmsd": float(global_rmsd[0]), | ||
| f"best_of_{len(global_rmsd)}_rmsd": float(global_rmsd.min()), | ||
| "residue_rmsd_scores": residue_rmsd_scores, | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| if self.log_rmsd_for_every_batch: | ||
| result.update( | ||
| {f"all_atom_rmsd_{i}": float(global_rmsd[i]) for i in range(len(global_rmsd))} | ||
| ) | ||
|
|
||
| return result | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| """ | ||
| Common fixtures for metrics tests. | ||
| """ | ||
|
|
||
| import pytest | ||
| from atomworks.io.transforms.atom_array import ensure_atom_array_stack | ||
| from biotite.structure import AtomArrayStack | ||
| from sampleworks.utils.atom_array_utils import ( | ||
| select_altloc, | ||
| select_backbone, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these have something else for their scope now? I.e., is it a problem for them to be kept around for multiple test modules? |
||
| def altlocA_backbone(structure_6b8x_with_altlocs) -> AtomArrayStack: | ||
| altlocA = select_altloc(structure_6b8x_with_altlocs, "A", return_full_array=True) | ||
| return ensure_atom_array_stack(select_backbone(altlocA)) | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
| def altlocB_backbone(structure_6b8x_with_altlocs) -> AtomArrayStack: | ||
| altlocB = select_altloc(structure_6b8x_with_altlocs, "B", return_full_array=True) | ||
| return ensure_atom_array_stack(select_backbone(altlocB)) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| """ | ||
| Tests for the RMSD metrics. | ||
| """ | ||
|
|
||
| import pytest | ||
| from sampleworks.metrics.rmsd import AllAtomRMSD | ||
|
|
||
|
|
||
| # TODO: make these a bit more rigorous, similarly to tests/metrics/test_lddt_metrics.py | ||
|
|
||
|
|
||
| def test_all_atom_rmsd_identity(altlocA_backbone): | ||
| """A stack compared to itself must have zero RMSD on every residue.""" | ||
| metric = AllAtomRMSD() | ||
| results = metric.compute(altlocA_backbone, altlocA_backbone) | ||
|
|
||
| assert results["best_of_1_rmsd"] == pytest.approx(0.0, abs=1e-6) | ||
| assert results["residue_rmsd_scores"] | ||
| for residue, scores in results["residue_rmsd_scores"].items(): | ||
| assert scores == pytest.approx([0.0], abs=1e-6), f"nonzero identity RMSD at {residue}" | ||
|
|
||
|
|
||
| def test_all_atom_rmsd_end_to_end(altlocA_backbone, altlocB_backbone): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a docstring to the test function. All functions should include docstrings per the guidelines. Consider adding a brief description of what this test verifies. 📝 Suggested docstring def test_all_atom_rmsd_end_to_end(altlocA_backbone, altlocB_backbone):
+ """Verify RMSD computation with selection matches expected values for altloc comparison."""
selection_string = "res_id > 179 and res_id < 190"As per coding guidelines: "Always include NumPy-style docstrings for every function and class." 🤖 Prompt for AI Agents |
||
| selection_string = "res_id > 179 and res_id < 190" | ||
| metric = AllAtomRMSD() | ||
| results = metric.compute(altlocA_backbone, altlocB_backbone, selection_string) | ||
|
|
||
| expected_results = { | ||
| "best_of_1_rmsd": 0.7332, | ||
| "residue_rmsd_scores": { | ||
| "A180": [2.9206], | ||
| "A181": [4.2650], | ||
| "A182": [5.6639], | ||
| "A183": [2.9397], | ||
| "A184": [2.0595], | ||
| "A185": [2.2885], | ||
| "A186": [3.6929], | ||
| "A187": [1.8138], | ||
| "A188": [0.9547], | ||
| "A189": [0.7596], | ||
| }, | ||
| } | ||
|
|
||
| assert results["best_of_1_rmsd"] == pytest.approx(expected_results["best_of_1_rmsd"], abs=0.002) | ||
|
|
||
| assert set(results["residue_rmsd_scores"].keys()) == set( | ||
| expected_results["residue_rmsd_scores"].keys() | ||
| ) | ||
|
|
||
| for residue_key, expected_scores in expected_results["residue_rmsd_scores"].items(): | ||
| result_scores = results["residue_rmsd_scores"][residue_key] | ||
| assert len(result_scores) == len(expected_scores) | ||
| for got, want in zip(result_scores, expected_scores): | ||
| assert got == pytest.approx(want, abs=0.005), ( | ||
| f"RMSD mismatch at {residue_key}: got {got}, expected {want}" | ||
| ) | ||
|
|
||
|
|
||
| def test_all_atom_rmsd_selection_does_not_change_global(altlocA_backbone, altlocB_backbone): | ||
| """Selection filters the residue-level dict but must not change the global RMSD.""" | ||
| metric = AllAtomRMSD() | ||
| all_residues = metric.compute(altlocA_backbone, altlocB_backbone) | ||
| selected = metric.compute(altlocA_backbone, altlocB_backbone, "res_id > 179 and res_id < 190") | ||
|
|
||
| assert selected["best_of_1_rmsd"] == pytest.approx(all_residues["best_of_1_rmsd"], abs=1e-6) | ||
| assert len(selected["residue_rmsd_scores"]) < len(all_residues["residue_rmsd_scores"]) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PR title mentions an "associated clustering script", but this PR only adds the RMSD metric/tests/fixtures. Either add the clustering script changes or update the PR title/description so it matches what’s actually being merged.