From 13d05dc8f78063808b043278f67f24f4390af7b9 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 14:07:00 +0200
Subject: [PATCH 01/19] docs: add Doxygen/Breathe infrastructure for C++ API
 reference

Wires up Doxygen XML generation + Breathe Sphinx extension so that
C++ API documentation from header comments renders in the existing
Sphinx/readthedocs site at docs/api.rst.

- docs/Doxyfile: Doxygen config (XML-only output, internal headers excluded)
- docs/conf.py: runs Doxygen as subprocess, adds breathe extension
- docs/api.rst: stub C++ API reference page (expanded by subsequent PRs)
- docs/index.rst: replace external cxx-api link with internal api.rst
- docs/requirements.txt: add breathe >= 4.35
- .readthedocs.yaml: add apt_packages: [doxygen]
- .gitignore: exclude docs/_doxygen/ (generated)

Builds on prior work in project-gemmi/gemmi#402 (Paul Emsley / pemsley).
Co-authored-by: C. Vonrhein / CV-GPhL
---
 .gitignore            |  3 +++
 .readthedocs.yaml     |  2 ++
 docs/Doxyfile         | 48 +++++++++++++++++++++++++++++++++++++++++++
 docs/api.rst          | 31 ++++++++++++++++++++++++++++
 docs/conf.py          | 16 ++++++++++++++-
 docs/index.rst        |  2 +-
 docs/requirements.txt |  1 +
 7 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 docs/Doxyfile
 create mode 100644 docs/api.rst
diff --git a/.gitignore b/.gitignore
index 0aa5b7ab6..6f22aa373 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,3 +83,6 @@ fortran/*.a
 # test programs
 fortran/fsym
 fortran/ftest
+
+# Doxygen-generated output (consumed by Breathe/Sphinx, not committed)
+docs/_doxygen/
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index e86ae0e3c..909080dca 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,6 +10,8 @@ build:
   os: ubuntu-24.04
   tools:
     python: "3.12"
+  apt_packages:
+    - doxygen
 
   # Format htmlzip produces a single page. Instead, use zip as shown in:
   # https://github.com/readthedocs/readthedocs.org/issues/3242#issuecomment-1410321534
diff --git a/docs/Doxyfile b/docs/Doxyfile
new file mode 100644
index 000000000..d305f175a
--- /dev/null
+++ b/docs/Doxyfile
@@ -0,0 +1,48 @@
+PROJECT_NAME           = "Gemmi"
+PROJECT_BRIEF          = "A library for macromolecular crystallography"
+PROJECT_NUMBER         =
+
+# Input
+INPUT                  = ../include/gemmi
+FILE_PATTERNS          = *.hpp
+STRIP_FROM_PATH        = ../include
+STRIP_FROM_INC_PATH    = ../include
+RECURSIVE              = NO
+# All public headers are flat in include/gemmi/; the only subdirectory is
+# third_party/ which must not be documented.
+
+# Exclude internal/data-only headers
+EXCLUDE_PATTERNS       = ace_*.hpp \
+                         acedrg_tables.hpp \
+                         mc_tables.hpp \
+                         eig3.hpp \
+                         cc_adj.hpp \
+                         ccp4ener.hpp \
+                         mmcif_impl.hpp
+
+# Output: XML only (Breathe consumes this; Sphinx produces HTML)
+GENERATE_XML           = YES
+GENERATE_HTML          = NO
+GENERATE_LATEX         = NO
+XML_OUTPUT             = _doxygen/xml
+
+# Extraction
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = YES
+EXTRACT_ANON_NSPACES   = NO
+
+# Preprocessing (needed for GEMMI_DLL export macro and similar)
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = YES
+EXPAND_ONLY_PREDEF     = YES
+PREDEFINED             = GEMMI_DLL=
+
+# Quiet build, warnings to file
+QUIET                  = YES
+WARN_IF_UNDOCUMENTED   = NO
+WARN_LOGFILE           = _doxygen/doxygen-warnings.log
+
+# Source browsing off (Breathe handles cross-references)
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 000000000..2386adfca
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,31 @@
+.. _api:
+
+C++ API Reference
+#################
+
+This page documents the Gemmi C++ library API, generated from Doxygen comments
+in ``include/gemmi/*.hpp``. It is updated with each pull request in the
+API documentation series.
+
+For the Python API, see the `Python API reference <https://project-gemmi.github.io/python-api/>`_.
+
+.. note::
+
+   Documentation coverage is being added incrementally. Headers not yet
+   listed here will appear in subsequent pull requests.
+
+Core Data Structures
+--------------------
+
+*(Full documentation added in PR 2.)*
+
+.. doxygenfile:: model.hpp
+   :project: gemmi
+
+Map and Grid Data
+-----------------
+
+*(Stub — full documentation added in PR 6.)*
+
+.. doxygenfile:: grid.hpp
+   :project: gemmi
diff --git a/docs/conf.py b/docs/conf.py
index 2d8a8b95b..6bdf72386 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,11 +1,19 @@
 # -*- coding: utf-8 -*-
 
+import os
+import subprocess
+
+# Run Doxygen before Sphinx processes doxygenfile:: directives.
+# Must run from docs/ so Doxyfile and _doxygen/xml/ paths resolve correctly.
+_docs_dir = os.path.dirname(os.path.abspath(__file__))
+subprocess.check_call(['doxygen', 'Doxyfile'], cwd=_docs_dir)
+
 # -- General configuration ------------------------------------------------
 
 # while we use Sphinx 8+, old version suffices to run doctests
 needs_sphinx = '5.3.0'
 
-extensions = ['sphinx.ext.doctest', 'sphinx_inline_tabs']
+extensions = ['sphinx.ext.doctest', 'sphinx_inline_tabs', 'breathe']
 
 templates_path = ['_templates']
 
@@ -125,3 +133,9 @@ def _compute_navigation_tree(context: Dict[str, Any]) -> str:
 import gemmi
 gemmi.set_leak_warnings(False)
 '''
+
+# -- Breathe configuration (Doxygen XML → Sphinx) -------------------------
+
+breathe_projects = {"gemmi": os.path.join(_docs_dir, "_doxygen", "xml")}
+breathe_default_project = "gemmi"
+breathe_default_members = ('members',)  # show all public members in every doxygenfile:: directive
diff --git a/docs/index.rst b/docs/index.rst
index 4302a3789..c1f524ce6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -93,7 +93,7 @@ Contents
 
    ChangeLog <https://github.com/project-gemmi/gemmi/releases>
    Python API reference <https://project-gemmi.github.io/python-api/>
-   C++ API reference <https://project-gemmi.github.io/cxx-api/>
+   api
 
 Credits
 =======
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 223a532b1..804a29253 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
 sphinx >= 8.1.3
 furo >= 2024.8.6
 sphinx-inline-tabs
+breathe >= 4.35

From 908eea4922a9967e94834cd67201d35fad4e3253 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 19:55:10 +0200
Subject: [PATCH 02/19] =?UTF-8?q?fix(conf.py):=20guard=20doxygen=20call=20?=
 =?UTF-8?q?=E2=80=94=20skip=20gracefully=20when=20not=20installed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/conf.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 6bdf72386..55fab5e35 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,19 +1,25 @@
 # -*- coding: utf-8 -*-
 
 import os
+import shutil
 import subprocess
 
-# Run Doxygen before Sphinx processes doxygenfile:: directives.
-# Must run from docs/ so Doxyfile and _doxygen/xml/ paths resolve correctly.
 _docs_dir = os.path.dirname(os.path.abspath(__file__))
-subprocess.check_call(['doxygen', 'Doxyfile'], cwd=_docs_dir)
+_doxygen_xml_dir = os.path.join(_docs_dir, "_doxygen", "xml")
+
+# Run Doxygen before Sphinx processes doxygenfile:: directives.
+# Skipped gracefully when doxygen is not installed (e.g. CI doctest runner).
+if shutil.which('doxygen'):
+    subprocess.check_call(['doxygen', 'Doxyfile'], cwd=_docs_dir)
 
 # -- General configuration ------------------------------------------------
 
 # while we use Sphinx 8+, old version suffices to run doctests
 needs_sphinx = '5.3.0'
 
-extensions = ['sphinx.ext.doctest', 'sphinx_inline_tabs', 'breathe']
+extensions = ['sphinx.ext.doctest', 'sphinx_inline_tabs']
+if os.path.isdir(_doxygen_xml_dir):
+    extensions.append('breathe')
 
 templates_path = ['_templates']
 
@@ -135,7 +141,9 @@ def _compute_navigation_tree(context: Dict[str, Any]) -> str:
 '''
 
 # -- Breathe configuration (Doxygen XML → Sphinx) -------------------------
+# Only active when _doxygen/xml/ exists (i.e. doxygen was run).
 
-breathe_projects = {"gemmi": os.path.join(_docs_dir, "_doxygen", "xml")}
-breathe_default_project = "gemmi"
-breathe_default_members = ('members',)  # show all public members in every doxygenfile:: directive
+if os.path.isdir(_doxygen_xml_dir):
+    breathe_projects = {"gemmi": _doxygen_xml_dir}
+    breathe_default_project = "gemmi"
+    breathe_default_members = ('members',)  # show all public members in every doxygenfile:: directive

From 3f20f61b301dfd31d399baffd69fb4800744ffab Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 20:02:20 +0200
Subject: [PATCH 03/19] ci: add docs-build job (Doxygen + Sphinx/Breathe HTML)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/ci.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f399317c4..fdfc9c30c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -217,6 +217,22 @@ jobs:
     - name: run tests under valgrind
       run: PYTHONMALLOC=malloc valgrind python3 -m unittest discover -v -s tests/
 
+  docs:
+    name: "Docs build (Doxygen + Sphinx/Breathe)"
+    runs-on: ubuntu-latest
+    if: "!contains(github.event.head_commit.message, '[skip ci]')"
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y doxygen
+        python3 -m pip install sphinx sphinx-inline-tabs breathe furo
+    - name: Build HTML docs
+      run: |
+        cd docs
+        sphinx-build -M html . _build -n -E
+
   almalinux:
     runs-on: ubuntu-latest
     name: "AlmaLinux 8"

From f17559ec0e35b3b3e9ded8d3bb5e9309711ab2ca Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:51:26 +0200
Subject: [PATCH 04/19] docs(calculate.hpp): add full Doxygen API documentation

---
 include/gemmi/calculate.hpp | 116 +++++++++++++++++++++++++++++++++---
 1 file changed, 108 insertions(+), 8 deletions(-)

diff --git a/include/gemmi/calculate.hpp b/include/gemmi/calculate.hpp
index 03a10765b..b1bb9a7a8 100644
--- a/include/gemmi/calculate.hpp
+++ b/include/gemmi/calculate.hpp
@@ -12,6 +12,10 @@
 
 namespace gemmi {
 
+/// @brief Check if an object or any of its descendants contains hydrogen atoms.
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to check
+/// @return True if hydrogen is present, false otherwise
 template<class T> bool has_hydrogen(const T& obj) {
   for (const auto& child : obj.children())
     if (has_hydrogen(child))
@@ -22,6 +26,11 @@ template<> inline bool has_hydrogen(const Atom& atom) {
   return atom.is_hydrogen();
 }
 
+/// @brief Count atom sites in an object, optionally filtered by Selection.
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to count atoms in
+/// @param sel Optional Selection filter; nullptr means all atoms
+/// @return Number of matching atom sites
 template<class T> size_t count_atom_sites(const T& obj, const Selection* sel=nullptr) {
   size_t sum = 0;
   if (!sel || sel->matches(obj))
@@ -33,6 +42,11 @@ template<> inline size_t count_atom_sites(const Atom& atom, const Selection* sel
   return (!sel || sel->matches(atom)) ? 1 : 0;
 }
 
+/// @brief Sum occupancies in an object, optionally filtered by Selection.
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to sum occupancies in
+/// @param sel Optional Selection filter; nullptr means all atoms
+/// @return Sum of occupancies for matching atoms
 template<class T> double count_occupancies(const T& obj, const Selection* sel=nullptr) {
   double sum = 0;
   if (!sel || sel->matches(obj))
@@ -44,7 +58,10 @@ template<> inline double count_occupancies(const Atom& atom, const Selection* se
   return (!sel || sel->matches(atom)) ? atom.occ : 0;
 }
 
-
+/// @brief Calculate total mass in atomic mass units for an object.
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to calculate mass for
+/// @return Total mass in atomic mass units (accounting for occupancy)
 template<class T> double calculate_mass(const T& obj) {
   double sum = 0;
   for (const auto& child : obj.children())
@@ -55,12 +72,21 @@ template<> inline double calculate_mass(const Atom& atom) {
   return atom.occ * atom.element.weight();
 }
 
+/// @brief Result of center-of-mass calculation.
 struct CenterOfMass {
+  /// Sum of mass-weighted positions
   Position weighted_sum;
+  /// Total mass
   double mass;
+  /// @brief Get center-of-mass position.
+  /// @return Mass-weighted centroid (weighted_sum / mass)
   Position get() const { return Position(weighted_sum / mass); }
 };
 
+/// @brief Calculate the center of mass (mass-weighted centroid).
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to calculate center of mass for
+/// @return CenterOfMass with weighted_sum and total mass
 template<class T> CenterOfMass calculate_center_of_mass(const T& obj) {
   CenterOfMass total{{}, 0.};
   for (const auto& child : obj.children()) {
@@ -74,6 +100,10 @@ template<> inline CenterOfMass calculate_center_of_mass(const Atom& atom) {
   return CenterOfMass{Position(atom.pos * w_mass), w_mass};
 }
 
+/// @brief Calculate min and max isotropic B-factors in an object.
+/// @tparam T Type of object (Model, Chain, Residue, Atom, etc.)
+/// @param obj Object to scan
+/// @return Pair of (min_B, max_B)
 template<class T> std::pair<float,float> calculate_b_iso_range(const T& obj) {
   std::pair<float, float> range{INFINITY, -INFINITY};
   for (const auto& child : obj.children()) {
@@ -87,7 +117,10 @@ template<> inline std::pair<float,float> calculate_b_iso_range(const Atom& atom)
   return {atom.b_iso, atom.b_iso};
 }
 
-/// uses min/max eigenvalues of Baniso, or Biso if B-factor is isotropic
+/// @brief Calculate min and max B-factors from anisotropic B-tensors.
+/// Uses min/max eigenvalues of anisotropic B-tensor, or B_iso if B-factor is isotropic.
+/// @param model Model to scan
+/// @return Pair of (min_eigenvalue * u_to_b(), max_eigenvalue * u_to_b())
 inline std::pair<double, double> calculate_b_aniso_range(const Model& model) {
   std::pair<double, double> range{INFINITY, -INFINITY};
   for (const Chain& chain : model.chains)
@@ -117,7 +150,11 @@ template<> inline void expand_box(const Atom& atom, Box<Position>& box) {
   box.extend(atom.pos);
 }
 
-// we don't take NCS into account here (cf. NeighborSearch::set_bounding_cell())
+/// @brief Calculate axis-aligned bounding box in Cartesian space.
+/// @note NCS is not taken into account here (cf. NeighborSearch::set_bounding_cell())
+/// @param st Structure to scan
+/// @param margin Optional margin to add around the box
+/// @return Axis-aligned bounding box
 inline Box<Position> calculate_box(const Structure& st, double margin=0.) {
   Box<Position> box;
   expand_box(st, box);
@@ -126,6 +163,11 @@ inline Box<Position> calculate_box(const Structure& st, double margin=0.) {
   return box;
 }
 
+/// @brief Calculate bounding box in fractional coordinates.
+/// @param st Structure to scan (must have a unit cell)
+/// @param margin Optional margin in Cartesian space (converted to fractional)
+/// @return Bounding box in fractional coordinates
+/// @throws Fails with message if Structure has no unit cell
 inline Box<Fractional> calculate_fractional_box(const Structure& st, double margin=0.) {
   if (!st.cell.is_crystal())
     fail("calculate_fractional_box(): Structure has no unit cell for fractionalization");
@@ -141,21 +183,34 @@ inline Box<Fractional> calculate_fractional_box(const Structure& st, double marg
 }
 
 
-// Calculate B_est from E. Merritt, Some B_eq are more equivalent than others,
-// Acta Cryst. A67, 512 (2011)
-// http://skuld.bmsc.washington.edu/parvati/ActaA_67_512.pdf
+/// @brief Calculate B_equiv from anisotropic B-tensor (or B_iso if isotropic).
+/// Based on E. Merritt, "Some B_eq are more equivalent than others",
+/// Acta Cryst. A67, 512 (2011).
+/// @param atom Atom with (possibly anisotropic) B-factor
+/// @return B_equiv = sqrt((sum_eigenvalues) / (sum_inverse_eigenvalues)) * u_to_b()
 inline double calculate_b_est(const Atom& atom) {
   auto eig = atom.aniso.calculate_eigenvalues();
   return u_to_b() * std::sqrt((eig[0] + eig[1] + eig[2]) /
                               (1/eig[0] + 1/eig[1] + 1/eig[2]));
 }
 
+/// @brief Calculate angle at p1 between vectors p1→p0 and p1→p2.
+/// @param p0 First position
+/// @param p1 Central position (vertex of the angle)
+/// @param p2 Third position
+/// @return Angle in radians
 inline double calculate_angle(const Position& p0, const Position& p1,
                               const Position& p2) {
   return (p0 - p1).angle(p2 - p1);
 }
 
-// discussion: https://stackoverflow.com/questions/20305272/
+/// @brief Calculate dihedral angle defined by four positions.
+/// @param p0 First position
+/// @param p1 Second position (on the bond axis)
+/// @param p2 Third position (on the bond axis)
+/// @param p3 Fourth position
+/// @return Dihedral angle in radians, in the range [-π, π]
+/// @note See discussion at https://stackoverflow.com/questions/20305272/
 inline double calculate_dihedral(const Position& p0, const Position& p1,
                                  const Position& p2, const Position& p3) {
   Vec3 b0 = p1 - p0;
@@ -168,7 +223,12 @@ inline double calculate_dihedral(const Position& p0, const Position& p1,
   return std::atan2(y, x);
 }
 
-/// the return value is in the same range as that of atan2(), i.e. [-pi, pi]
+/// @brief Calculate dihedral angle from four Atom pointers.
+/// @param a First atom pointer
+/// @param b Second atom pointer (on the bond axis)
+/// @param c Third atom pointer (on the bond axis)
+/// @param d Fourth atom pointer
+/// @return Dihedral angle in radians (same range as atan2: [-π, π]), or NaN if any pointer is null
 inline double calculate_dihedral_from_atoms(const Atom* a, const Atom* b,
                                             const Atom* c, const Atom* d) {
   if (a && b && c && d)
@@ -176,11 +236,23 @@ inline double calculate_dihedral_from_atoms(const Atom* a, const Atom* b,
   return NAN;
 }
 
+/// @brief Calculate peptide bond ω dihedral angle.
+/// ω is defined by atoms: Cα(i) - C(i) - N(i+1) - Cα(i+1)
+/// @param res Current residue
+/// @param next Next residue
+/// @return Omega angle in radians, or NaN if atoms are missing
 inline double calculate_omega(const Residue& res, const Residue& next) {
   return calculate_dihedral_from_atoms(res.get_ca(), res.get_c(),
                                        next.get_n(), next.get_ca());
 }
 
+/// @brief Check if a peptide bond is in cis configuration.
+/// @param ca1 Cα atom of the current residue
+/// @param c C atom of the current residue
+/// @param n N atom of the next residue
+/// @param ca2 Cα atom of the next residue
+/// @param tolerance_deg Tolerance in degrees (absolute |ω| < tolerance indicates cis)
+/// @return True if |ω| < tolerance_deg, false otherwise
 inline bool is_peptide_bond_cis(const Atom* ca1, const Atom* c,
                                 const Atom* n, const Atom* ca2,
                                 double tolerance_deg) {
@@ -188,11 +260,24 @@ inline bool is_peptide_bond_cis(const Atom* ca1, const Atom* c,
   return std::fabs(omega) < rad(tolerance_deg);
 }
 
+/// @brief Calculate chiral volume (scalar triple product).
+/// @param actr Central atom position (center of chirality)
+/// @param a1 First substituent position
+/// @param a2 Second substituent position
+/// @param a3 Third substituent position
+/// @return Scalar triple product: (a1-actr) · ((a2-actr) × (a3-actr))
 inline double calculate_chiral_volume(const Position& actr, const Position& a1,
                                       const Position& a2, const Position& a3) {
   return (a1 - actr).dot((a2 - actr).cross(a3 - actr));
 }
 
+/// @brief Calculate Ramachandran dihedral angles φ (phi) and ψ (psi).
+/// φ is defined by: C(i-1) - N(i) - Cα(i) - C(i)
+/// ψ is defined by: N(i) - Cα(i) - C(i) - N(i+1)
+/// @param prev Previous residue (nullptr if not available)
+/// @param res Current residue
+/// @param next Next residue (nullptr if not available)
+/// @return Array [phi, psi] in radians; NaN values if flanking residues are missing or atoms not found
 inline std::array<double, 2> calculate_phi_psi(const Residue* prev,
                                                const Residue& res,
                                                const Residue* next) {
@@ -209,15 +294,30 @@ inline std::array<double, 2> calculate_phi_psi(const Residue* prev,
   return phi_psi;
 }
 
+/// @brief Find the least-squares plane through a set of atoms.
+/// @param atoms Vector of atom pointers
+/// @return Array [a, b, c, d] representing plane equation ax + by + cz = d
+/// @note All atoms must have non-zero occupancy to be included
 GEMMI_DLL std::array<double, 4> find_best_plane(const std::vector<Atom*>& atoms);
 
+/// @brief Calculate signed distance from a point to a plane.
+/// @param pos Position of the point
+/// @param coeff Plane coefficients [a, b, c, d] from ax + by + cz = d
+/// @return Signed distance from pos to plane
 inline double get_distance_from_plane(const Position& pos,
                                       const std::array<double, 4>& coeff) {
   return coeff[0] * pos.x + coeff[1] * pos.y + coeff[2] * pos.z + coeff[3];
 }
 
+/// @brief Parse a crystallographic triplet string into an FTransform.
+/// @param s CIF triplet string (e.g., "x,y,z" or "-x+1/2,-y,z")
+/// @return FTransform representing the fractional transformation
 GEMMI_DLL FTransform parse_triplet_as_ftransform(const std::string& s);
 
+/// @brief Calculate the anisotropic U tensor at a position from TLS group parameters.
+/// @param tls TLS group parameters (T, L, S matrices)
+/// @param pos Position where U is evaluated
+/// @return 3×3 symmetric matrix U at the given position
 GEMMI_DLL SMat33<double> calculate_u_from_tls(const TlsGroup& tls, const Position& pos);
 
 } // namespace gemmi

From 2e75b21699a30629b32ead1de570d67c198a6282 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:52:06 +0200
Subject: [PATCH 05/19] docs(align.hpp): add full Doxygen API documentation

---
 include/gemmi/align.hpp | 85 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/include/gemmi/align.hpp b/include/gemmi/align.hpp
index ceb8327f7..ab4d857ab 100644
--- a/include/gemmi/align.hpp
+++ b/include/gemmi/align.hpp
@@ -12,9 +12,15 @@
 
 namespace gemmi {
 
-// Sequence alignment and label_seq_id assignment
+/// @name Sequence Alignment and label_seq_id Assignment
+/// @{
 
-// helper function for sequence alignment
+/// @brief Compute per-position gap-open penalties for sequence alignment.
+/// Accounts for gaps and discontinuities in the observed polymer sequence.
+/// @param polymer Polymer span to analyze
+/// @param polymer_type Type of polymer (peptide or nucleic acid)
+/// @param scoring Optional AlignmentScoring parameters; uses default partial_model() if nullptr
+/// @return Vector of gap-opening penalties for each position
 inline std::vector<int> prepare_target_gapo(const ConstResidueSpan& polymer,
                                             PolymerType polymer_type,
                                             const AlignmentScoring* scoring=nullptr) {
@@ -35,6 +41,12 @@ inline std::vector<int> prepare_target_gapo(const ConstResidueSpan& polymer,
   return gaps;
 }
 
+/// @brief Align a full sequence (SEQRES/Entity) to observed residues in a polymer.
+/// @param full_seq Full sequence from SEQRES or Entity
+/// @param polymer Polymer chain span (observed residues)
+/// @param polymer_type Type of polymer (peptide or nucleic acid)
+/// @param scoring Optional AlignmentScoring parameters; uses default partial_model() if nullptr
+/// @return AlignmentResult with CIGAR string describing the alignment
 inline AlignmentResult align_sequence_to_polymer(
                                      const std::vector<std::string>& full_seq,
                                      const ConstResidueSpan& polymer,
@@ -68,7 +80,10 @@ inline AlignmentResult align_sequence_to_polymer(
                          (std::uint8_t)encoding.size(), *scoring);
 }
 
-// check for exact match between model sequence and full sequence (SEQRES)
+/// @brief Check if model sequence exactly matches SEQRES numbering (fast path).
+/// @param polymer Polymer chain span
+/// @param ent Entity with full_sequence from SEQRES
+/// @return True if label_seq values already match Entity::full_sequence exactly
 inline bool seqid_matches_seqres(const ConstResidueSpan& polymer,
                                  const Entity& ent) {
   if (ent.full_sequence.size() != polymer.size())
@@ -82,6 +97,8 @@ inline bool seqid_matches_seqres(const ConstResidueSpan& polymer,
   return true;
 }
 
+/// @brief Clear full_sequence and database references from all entities.
+/// @param st Structure to clear
 inline void clear_sequences(Structure& st) {
   for (Entity& ent : st.entities) {
     ent.full_sequence.clear();
@@ -90,11 +107,16 @@ inline void clear_sequences(Structure& st) {
   }
 }
 
+/// @brief Match FASTA sequences to entities and set Entity::full_sequence.
+/// @param st Structure to update
+/// @param fasta_sequences Vector of FASTA sequence strings
 GEMMI_DLL
 void assign_best_sequences(Structure& st, const std::vector<std::string>& fasta_sequences);
 
-// Uses sequence alignment (model to SEQRES) to assign label_seq.
-// force: assign label_seq even if full sequence is not known (assumes no gaps)
+/// @brief Assign label_seq_id to residues using alignment to Entity full_sequence.
+/// @param polymer Polymer chain span to assign label_seq to
+/// @param ent Entity with full_sequence from SEQRES (nullptr allowed if force=true)
+/// @param force If true, assign label_seq sequentially even without Entity information
 inline void assign_label_seq_to_polymer(ResidueSpan& polymer,
                                         const Entity* ent, bool force) {
   AlignmentResult result;
@@ -142,6 +164,8 @@ inline void assign_label_seq_to_polymer(ResidueSpan& polymer,
   }
 }
 
+/// @brief Reset all Residue::label_seq to unset state.
+/// @param st Structure to clear
 inline void clear_label_seq_id(Structure& st) {
   for (Model& model : st.models)
     for (Chain& chain : model.chains)
@@ -149,6 +173,9 @@ inline void clear_label_seq_id(Structure& st) {
         res.label_seq = Residue::OptionalNum();
 }
 
+/// @brief Assign label_seq_id to all polymer chains in a structure.
+/// @param st Structure to process
+/// @param force If true, assign label_seq sequentially even without Entity information
 inline void assign_label_seq_id(Structure& st, bool force) {
   for (Model& model : st.models)
     for (Chain& chain : model.chains)
@@ -160,14 +187,29 @@ inline void assign_label_seq_id(Structure& st, bool force) {
 }
 
 
-// superposition
+/// @}
+/// @name Structure Superposition
+/// @{
 
+/// @brief Atom selection mode for superposition calculations.
 enum class SupSelect {
-  CaP,  // only Ca (aminoacids) or P (nucleotides) atoms
-  MainChain,  // only main chain atoms
+  /// Use only Cα atoms (for peptides) or P atoms (for nucleic acids)
+  CaP,
+  /// Use backbone atoms (N, CA, C, O for peptides; P, O5', C5', C4', C3', O3' for nucleic acids)
+  MainChain,
+  /// Use all atoms
   All
 };
 
+/// @brief Extract matching atom positions from two polymer spans for superposition.
+/// @param pos1 Output vector of positions from fixed chain
+/// @param pos2 Output vector of positions from movable chain
+/// @param fixed Fixed polymer chain (reference structure)
+/// @param movable Movable polymer chain (structure to be superposed)
+/// @param ptype Polymer type (peptide or nucleic acid)
+/// @param sel Atom selection mode (CaP, MainChain, or All)
+/// @param altloc Alternate location code to select; '\0' means all conformers
+/// @param ca_offsets Optional output vector storing indices of Cα/P atoms in pos1/pos2
 inline void prepare_positions_for_superposition(std::vector<Position>& pos1,
                                                 std::vector<Position>& pos2,
                                                 ConstResidueSpan fixed,
@@ -226,6 +268,13 @@ inline void prepare_positions_for_superposition(std::vector<Position>& pos1,
   }
 }
 
+/// @brief Calculate RMSD between two polymer spans without transformation.
+/// @param fixed Fixed polymer chain (reference structure)
+/// @param movable Movable polymer chain (structure to compare)
+/// @param ptype Polymer type (peptide or nucleic acid)
+/// @param sel Atom selection mode (CaP, MainChain, or All)
+/// @param altloc Alternate location code to select; '\0' means all conformers
+/// @return SupResult with RMSD and atom count (rotation/translation matrices are empty)
 inline SupResult calculate_current_rmsd(ConstResidueSpan fixed,
                                         ConstResidueSpan movable,
                                         PolymerType ptype,
@@ -242,6 +291,15 @@ inline SupResult calculate_current_rmsd(ConstResidueSpan fixed,
   return r;
 }
 
+/// @brief Calculate optimal superposition using QCP (quaternion characteristic polynomial).
+/// @param fixed Fixed polymer chain (reference structure)
+/// @param movable Movable polymer chain (structure to be superposed)
+/// @param ptype Polymer type (peptide or nucleic acid)
+/// @param sel Atom selection mode (CaP, MainChain, or All)
+/// @param trim_cycles Number of iterative trimming cycles (0 = no trimming)
+/// @param trim_cutoff Outlier distance threshold in Å for trimming (default 2.0)
+/// @param altloc Alternate location code to select; '\0' means all conformers
+/// @return SupResult with rotation matrix, translation vector, and RMSD
 inline SupResult calculate_superposition(ConstResidueSpan fixed,
                                          ConstResidueSpan movable,
                                          PolymerType ptype,
@@ -280,8 +338,13 @@ inline SupResult calculate_superposition(ConstResidueSpan fixed,
   return sr;
 }
 
-// Returns superpositions for all residues in fixed.first_conformer(),
-// performed by superposing backbone in radius=10.0 from residue's Ca.
+/// @brief Compute local superpositions using a sliding window around each Cα/P.
+/// Calculates superpositions by sliding a sphere of given radius around each Cα/P position.
+/// @param fixed Fixed polymer chain (reference structure)
+/// @param movable Movable polymer chain (structure to be superposed)
+/// @param ptype Polymer type (peptide or nucleic acid)
+/// @param radius Window radius in Ångströms around each Cα/P (default 10.0)
+/// @return Vector of SupResult, one per residue in fixed.first_conformer()
 inline std::vector<SupResult> calculate_superpositions_in_moving_window(
                                       ConstResidueSpan fixed,
                                       ConstResidueSpan movable,
@@ -313,5 +376,7 @@ inline std::vector<SupResult> calculate_superpositions_in_moving_window(
   return result;
 }
 
+/// @}
+
 } // namespace gemmi
 #endif

From 192465bff856fe33ca3b65e8747e6f54b4b3017f Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:54:07 +0200
Subject: [PATCH 06/19] docs: fix spec gaps in calculate.hpp and align.hpp

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 include/gemmi/align.hpp     | 6 +++---
 include/gemmi/calculate.hpp | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/gemmi/align.hpp b/include/gemmi/align.hpp
index ab4d857ab..3c7501047 100644
--- a/include/gemmi/align.hpp
+++ b/include/gemmi/align.hpp
@@ -107,9 +107,9 @@ inline void clear_sequences(Structure& st) {
   }
 }
 
-/// @brief Match FASTA sequences to entities and set Entity::full_sequence.
-/// @param st Structure to update
-/// @param fasta_sequences Vector of FASTA sequence strings
+/// @brief Match FASTA sequences to entities and set Entity::full_sequence in-place.
+/// @param st Structure whose entities are updated with matched sequences.
+/// @param fasta_sequences Vector of FASTA-format sequence strings to match against entities.
 GEMMI_DLL
 void assign_best_sequences(Structure& st, const std::vector<std::string>& fasta_sequences);
 
diff --git a/include/gemmi/calculate.hpp b/include/gemmi/calculate.hpp
index b1bb9a7a8..7569fbaec 100644
--- a/include/gemmi/calculate.hpp
+++ b/include/gemmi/calculate.hpp
@@ -142,6 +142,10 @@ inline std::pair<double, double> calculate_b_aniso_range(const Model& model) {
 }
 
 
+/// @brief Expand an axis-aligned bounding box to include all atoms in obj.
+/// @tparam T Type supporting children() iteration (Model, Chain, Residue, or Atom).
+/// @param obj Object whose atom positions are added to the box.
+/// @param box Bounding box to expand in-place.
 template<class T> void expand_box(const T& obj, Box<Position>& box) {
   for (const auto& child : obj.children())
     expand_box(child, box);

From 234b0ca001cfc71310700572e2c42ca8a56d3922 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:55:40 +0200
Subject: [PATCH 07/19] docs(neighbor.hpp): add full Doxygen API documentation

---
 include/gemmi/neighbor.hpp | 128 +++++++++++++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 4 deletions(-)

diff --git a/include/gemmi/neighbor.hpp b/include/gemmi/neighbor.hpp
index 40f5dadb6..32af8db7e 100644
--- a/include/gemmi/neighbor.hpp
+++ b/include/gemmi/neighbor.hpp
@@ -16,56 +16,98 @@
 
 namespace gemmi {
 
+/// @brief Cell-linked-list spatial index for fast atom neighbor searching.
+/// Supports both macromolecular structures (Model) and small-molecule structures
+/// (SmallStructure), with periodic boundary conditions and crystallographic symmetry.
 struct NeighborSearch {
 
+  /// @brief Atom reference stored in a grid cell.
+  /// Encodes the position and indices needed to resolve back to the original atom or site.
   struct Mark {
+    /// Cartesian position of the atom (may be a symmetry image).
     Position pos;
+    /// Alternate conformer identifier ('\\0' = no alternate).
     char altloc;
+    /// Chemical element of the atom.
     Element element;
+    /// Index into the list of crystallographic images (0 = identity).
     short image_idx;
+    /// Index of the chain in the model's chain vector.
     int chain_idx;
+    /// Index of the residue within the chain.
     int residue_idx;
+    /// Index of the atom within the residue.
     int atom_idx;
 
     Mark(const Position& p, char alt, El el, short im, int ch, int res, int atom)
     : pos(p), altloc(alt), element(el),
       image_idx(im), chain_idx(ch), residue_idx(res), atom_idx(atom) {}
 
+    /// @brief Resolve the indices into a CRA (chain/residue/atom) triple.
+    /// @param mdl Model to resolve indices into (non-const version).
+    /// @return CRA triple pointing to the resolved atom.
     CRA to_cra(Model& mdl) const {
       Chain& c = mdl.chains.at(chain_idx);
       Residue& r = c.residues.at(residue_idx);
       Atom& a = r.atoms.at(atom_idx);
       return {&c, &r, &a};
     }
+
+    /// @brief Resolve the indices into a const CRA triple.
+    /// @param mdl Model to resolve indices into (const version).
+    /// @return const CRA triple pointing to the resolved atom.
     const_CRA to_cra(const Model& mdl) const {
       const Chain& c = mdl.chains.at(chain_idx);
       const Residue& r = c.residues.at(residue_idx);
       const Atom& a = r.atoms.at(atom_idx);
       return {&c, &r, &a};
     }
+
+    /// @brief Resolve the atom_idx to a SmallStructure::Site.
+    /// @param small_st SmallStructure to resolve into (non-const version).
+    /// @return Reference to the resolved site.
     SmallStructure::Site& to_site(SmallStructure& small_st) const {
       return small_st.sites.at(atom_idx);
     }
+
+    /// @brief Resolve the atom_idx to a const SmallStructure::Site.
+    /// @param small_st SmallStructure to resolve into (const version).
+    /// @return Const reference to the resolved site.
     const SmallStructure::Site& to_site(const SmallStructure& small_st) const {
       return small_st.sites.at(atom_idx);
     }
   };
 
+  /// Spatial Grid of Mark vectors (one cell per grid voxel).
   Grid<std::vector<Mark>> grid;
+  /// The search radius passed to the constructor.
   double radius_specified = 0.;
+  /// Pointer to the indexed macromolecular model (nullptr if SmallStructure).
   Model* model = nullptr;
+  /// Pointer to the indexed small-molecule structure (nullptr if Model).
   SmallStructure* small_structure = nullptr;
+  /// If true, apply periodic boundary conditions when searching.
   bool use_pbc = true;
+  /// If true, include hydrogen atoms in the index.
   bool include_h = true;
 
+  /// @brief Default constructor creating an empty NeighborSearch.
   NeighborSearch() = default;
-  // Model is not const so it can be modified in for_each_contact()
+
+  /// @brief Initialize grid for a macromolecular model.
+  /// @param model_ Reference to the Model to index (non-const, can be modified in for_each_contact()).
+  /// @param cell Unit cell defining the crystallographic symmetry and periodicity.
+  /// @param radius Search radius used to dimension the grid.
   NeighborSearch(Model& model_, const UnitCell& cell, double radius) {
     model = &model_;
     radius_specified = radius;
     set_bounding_cell(cell);
     set_grid_size();
   }
+
+  /// @brief Initialize for a small-molecule structure.
+  /// @param small_st Reference to the SmallStructure to index.
+  /// @param radius Search radius used to dimension the grid.
   NeighborSearch(SmallStructure& small_st, double radius) {
     small_structure = &small_st;
     radius_specified = radius;
@@ -73,13 +115,38 @@ struct NeighborSearch {
     set_grid_size();
   }
 
+  /// @brief Fill the grid with all atoms in the model or small structure.
+  /// @param include_h_ If true, include hydrogen atoms (default: true).
+  /// @return Reference to *this for method chaining.
   NeighborSearch& populate(bool include_h_=true);
+
+  /// @brief Add all atoms from one chain to the grid.
+  /// @param chain The chain to add.
+  /// @param include_h_ If true, include hydrogen atoms (default: true).
   void add_chain(const Chain& chain, bool include_h_=true);
+
+  /// @brief Add atoms from a chain with explicit chain index.
+  /// Used when chain_idx matters; internally called by populate().
+  /// @param chain The chain to add.
+  /// @param n_ch Index of the chain in the model's chain vector.
   void add_chain_n(const Chain& chain, int n_ch);
+
+  /// @brief Add a single atom to the grid.
+  /// @param atom The atom to add.
+  /// @param n_ch Index of the chain.
+  /// @param n_res Index of the residue within the chain.
+  /// @param n_atom Index of the atom within the residue.
   void add_atom(const Atom& atom, int n_ch, int n_res, int n_atom);
+
+  /// @brief Add a SmallStructure site to the grid.
+  /// @param site The site to add.
+  /// @param n Index of the site in the sites vector.
   void add_site(const SmallStructure::Site& site, int n);
 
-  // assumes data in [0, 1), but uses index_n to account for numerical errors
+  /// @brief Return the reference to the grid cell containing a fractional coordinate.
+  /// Assumes data in [0, 1) but uses index_n to account for numerical errors.
+  /// @param fr Fractional coordinate in the unit cell.
+  /// @return Reference to the vector of Marks in the grid cell.
   std::vector<Mark>& get_subcell(const Fractional& fr) {
     size_t idx = grid.index_n(int(fr.x * grid.nu),
                               int(fr.y * grid.nv),
@@ -89,9 +156,21 @@ struct NeighborSearch {
     return grid.data[idx];
   }
 
+  /// @brief Iterate over all grid cells within k cells of a position.
+  /// @tparam Func Callable type with signature void(std::vector<Mark>&, const Fractional&).
+  /// @param pos Cartesian position.
+  /// @param func Function to call for each cell, receiving the Marks and fractional coordinates.
+  /// @param k Grid multiplier (default: 1); larger k searches more cells.
   template<typename Func>
   void for_each_cell(const Position& pos, const Func& func, int k=1);
 
+  /// @brief Iterate over all Marks within radius of a position, respecting alternate conformers.
+  /// @tparam Func Callable type with signature void(Mark&, double) for (mark, dist_sq).
+  /// @param pos Cartesian position.
+  /// @param alt Alternate conformer identifier to match ('\\0' matches all).
+  /// @param radius Search radius in Angstroms.
+  /// @param func Function to call for each nearby mark.
+  /// @param k Grid multiplier (default: 1); larger k searches more cells.
   template<typename Func>
   void for_each(const Position& pos, char alt, double radius, const Func& func, int k=1) {
     if (radius <= 0)
@@ -106,12 +185,21 @@ struct NeighborSearch {
     }, k);
   }
 
+  /// @brief Return the minimum grid multiplier k covering the given radius.
+  /// @param r Search radius in Angstroms.
+  /// @return Minimum k value such that k*radius_specified >= r.
   int sufficient_k(double r) const {
     // .00001 is added to account for possible numeric error in r
     return r <= radius_specified ? 1 : int(r / radius_specified + 1.00001);
   }
 
-  // with radius==0 it uses radius_specified
+  /// @brief Find all Marks within a distance range of a position.
+  /// If radius==0, uses radius_specified.
+  /// @param pos Cartesian position.
+  /// @param alt Alternate conformer identifier.
+  /// @param min_dist Minimum distance in Angstroms.
+  /// @param radius Maximum distance in Angstroms (0 = use radius_specified).
+  /// @return Vector of pointers to Marks in the distance range [min_dist, radius].
   std::vector<Mark*> find_atoms(const Position& pos, char alt,
                                 double min_dist, double radius) {
     int k = sufficient_k(radius);
@@ -125,15 +213,31 @@ struct NeighborSearch {
     return out;
   }
 
+  /// @brief Find neighbor Marks of a Model atom.
+  /// @param atom The atom to find neighbors for.
+  /// @param min_dist Minimum distance in Angstroms.
+  /// @param max_dist Maximum distance in Angstroms.
+  /// @return Vector of pointers to nearby Marks.
   std::vector<Mark*> find_neighbors(const Atom& atom, double min_dist, double max_dist) {
     return find_atoms(atom.pos, atom.altloc, min_dist, max_dist);
   }
+
+  /// @brief Find neighbor Marks of a SmallStructure site.
+  /// @param site The site to find neighbors for.
+  /// @param min_dist Minimum distance in Angstroms.
+  /// @param max_dist Maximum distance in Angstroms.
+  /// @return Vector of pointers to nearby Marks.
   std::vector<Mark*> find_site_neighbors(const SmallStructure::Site& site,
                                          double min_dist, double max_dist) {
     Position pos = grid.unit_cell.orthogonalize(site.fract);
     return find_atoms(pos, '\0', min_dist, max_dist);
   }
 
+  /// @brief Find the nearest atom within k grid layers and a radius limit.
+  /// @param pos Cartesian position.
+  /// @param k Grid multiplier (number of layers to search).
+  /// @param radius Maximum search radius in Angstroms.
+  /// @return Pair of (nearest Mark pointer, distance squared). Pointer is nullptr if no atom found.
   std::pair<Mark*, double>
   find_nearest_atom_within_k(const Position& pos, int k, double radius) {
     Mark* mark = nullptr;
@@ -151,7 +255,11 @@ struct NeighborSearch {
     return {mark, nearest_dist_sq};
   }
 
-  // it would be good to return also NearestImage
+  /// @brief Find the single nearest atom within an optional radius limit.
+  /// Automatically adapts the grid search multiplier k to find the nearest atom.
+  /// @param pos Cartesian position.
+  /// @param radius Maximum search radius in Angstroms (default: INFINITY).
+  /// @return Pointer to the nearest Mark, or nullptr if no atom found.
   Mark* find_nearest_atom(const Position& pos, double radius=INFINITY) {
     double r_spec = radius_specified;
     if (radius == 0.f)
@@ -176,13 +284,25 @@ struct NeighborSearch {
     return nullptr;
   }
 
+  /// @brief Distance squared between two positions, accounting for unit cell periodicity.
+  /// @param pos1 First Cartesian position.
+  /// @param pos2 Second Cartesian position.
+  /// @return Squared distance, accounting for periodic boundary conditions.
   double dist_sq(const Position& pos1, const Position& pos2) const {
     return grid.unit_cell.distance_sq(pos1, pos2);
   }
+
+  /// @brief Euclidean distance between two positions with periodic boundary conditions.
+  /// @param pos1 First Cartesian position.
+  /// @param pos2 Second Cartesian position.
+  /// @return Euclidean distance in Angstroms.
   double dist(const Position& pos1, const Position& pos2) const {
     return std::sqrt(dist_sq(pos1, pos2));
   }
 
+  /// @brief Retrieve the crystallographic image transformation for the given image index.
+  /// @param image_idx Crystallographic image index (0 = identity, 1+ = symmetry images).
+  /// @return The fractional transform for the image.
   FTransform get_image_transformation(int image_idx) const {
     // 0 is for identity, other indices are shifted by one.
     if (image_idx == 0)

From 5bcbc445b6fd46494173e88e26ca1b5423dd4471 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:55:56 +0200
Subject: [PATCH 08/19] docs(contact.hpp): add full Doxygen API documentation

---
 include/gemmi/contact.hpp | 51 +++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/include/gemmi/contact.hpp b/include/gemmi/contact.hpp
index ed439f462..b6b4350ed 100644
--- a/include/gemmi/contact.hpp
+++ b/include/gemmi/contact.hpp
@@ -11,40 +11,81 @@
 
 namespace gemmi {
 
+/// @brief Configures and performs distance-based contact search between atoms.
+/// Built on top of NeighborSearch with support for configurable filtering,
+/// per-element radii, and flexible results collection.
 struct ContactSearch {
+  /// @brief Filter options for atom pair inclusion in contact search results.
   enum class Ignore {
-    Nothing=0, SameResidue, AdjacentResidues, SameChain, SameAsu
+    Nothing=0,        ///< Report all contacts.
+    SameResidue,      ///< Skip contacts between atoms in the same residue.
+    AdjacentResidues, ///< Skip contacts between atoms in adjacent residues (i±1).
+    SameChain,        ///< Skip contacts between atoms in the same chain.
+    SameAsu           ///< Skip contacts between atoms in the same asymmetric unit (report only symmetry-related contacts).
   };
-  // parameters used to configure the search
+
+  /// Maximum contact distance in Angstroms.
   double search_radius;
+  /// Which atom pairs to skip (default: SameResidue).
   Ignore ignore = Ignore::SameResidue;
-  bool twice = false;  // report both A-B and B-A
+  /// If true, report each contact pair twice (A→B and B→A); default false.
+  bool twice = false;
+  /// Skip atoms with occupancy below this threshold (0 = no filtering).
   float min_occupancy = 0.f;
+  /// Squared distance threshold for identifying atoms on special positions.
   double special_pos_cutoff_sq = 0.8 * 0.8;
+  /// Per-element contact radii (used when checking atom-type-based distance criteria).
   std::vector<float> radii;
 
+  /// @brief Create a contact search with the given search radius.
+  /// @param radius Maximum contact distance in Angstroms.
   ContactSearch(double radius) noexcept : search_radius(radius) {}
 
-  // a helper function that sets per-atom radii basing on covalent_radius()
+  /// @brief Populate per-element radii based on covalent radii with scaling.
+  /// @param multiplier Scaling factor applied to covalent radius.
+  /// @param tolerance Constant tolerance added to the scaled radius.
   void setup_atomic_radii(double multiplier, double tolerance) {
     radii.resize((size_t)El::END);
     for (int i = 0; i != (int) El::END; ++i)
       radii[i] = float(multiplier * Element(i).covalent_r() + tolerance / 2);
   }
+
+  /// @brief Get the contact radius for a given element.
+  /// @param el Chemical element.
+  /// @return Contact radius for the element (0.f if radii not set up).
   float get_radius(El el) const { return radii.empty() ? 0.f : radii[(int)el]; }
+
+  /// @brief Override the contact radius for a specific element.
+  /// @param el Chemical element.
+  /// @param r New contact radius in Angstroms.
   void set_radius(El el, float r) {
     if (!radii.empty())
       radii[(int)el] = r;
   }
 
+  /// @brief Iterate over all contacts, applying ignore logic and calling a function for each.
+  /// @tparam Func Callable type with signature void(const CRA&, const CRA&, int, double)
+  ///         for (partner1, partner2, image_idx, dist_sq).
+  /// @param ns NeighborSearch object containing the indexed atoms.
+  /// @param func Function to call for each contact found.
   template<typename Func>
   void for_each_contact(NeighborSearch& ns, const Func& func);
 
+  /// @brief Result of a contact search between two atoms.
   struct Result {
-    CRA partner1, partner2;
+    /// CRA (chain/residue/atom) reference of the first partner.
+    CRA partner1;
+    /// CRA (chain/residue/atom) reference of the second partner.
+    CRA partner2;
+    /// Crystallographic image index for partner2 (0 = same ASU).
     int image_idx;
+    /// Squared distance between the partners in Angstroms squared.
     double dist_sq;
   };
+
+  /// @brief Collect and return all contacts as a vector of Result.
+  /// @param ns NeighborSearch object containing the indexed atoms.
+  /// @return Vector of Result structs representing all found contacts.
   std::vector<Result> find_contacts(NeighborSearch& ns) {
     std::vector<Result> out;
     for_each_contact(ns, [&out](const CRA& cra1, const CRA& cra2,

From e7f8251b84ac182ccf908b3ad57154dcbf8da4ce Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:58:42 +0200
Subject: [PATCH 09/19] docs(assembly.hpp): add full Doxygen API documentation

---
 include/gemmi/assembly.hpp | 157 ++++++++++++++++++++++++++++++++++---
 1 file changed, 148 insertions(+), 9 deletions(-)

diff --git a/include/gemmi/assembly.hpp b/include/gemmi/assembly.hpp
index 5032fadfa..ad7358fdd 100644
--- a/include/gemmi/assembly.hpp
+++ b/include/gemmi/assembly.hpp
@@ -13,24 +13,60 @@
 
 namespace gemmi {
 
-enum class HowToNameCopiedChain { Short, AddNumber, Dup };
+/// Naming strategy for chains copied during assembly or NCS expansion.
+///
+/// @brief Specifies how to name copied chains to ensure uniqueness.
+enum class HowToNameCopiedChain {
+  /// Use 1–2 character chain names cycling through A–Z, a–z, 0–9
+  Short,
+  /// Append a numeric suffix to the original chain name
+  AddNumber,
+  /// Keep the original chain name (may result in duplicates)
+  Dup
+};
 
+/// Utility to generate unique chain names for assembly and NCS expansion.
+///
+/// Manages a set of used chain names and provides methods to generate new,
+/// unique names according to the specified naming strategy.
 struct ChainNameGenerator {
+  /// The naming strategy to apply
   HowToNameCopiedChain how;
+  /// List of already-assigned chain names to avoid duplicates
   std::vector<std::string> used_names;
 
+  /// Initialize with a naming strategy.
+  ///
+  /// @param how_ The chain naming strategy to use
   ChainNameGenerator(HowToNameCopiedChain how_) : how(how_) {}
+
+  /// Initialize with chain names pre-populated from a Model.
+  ///
+  /// @param model The model to extract existing chain names from
+  /// @param how_ The chain naming strategy to use
   ChainNameGenerator(const Model& model, HowToNameCopiedChain how_) : how(how_) {
     if (how != HowToNameCopiedChain::Dup)
       for (const Chain& chain : model.chains)
         used_names.push_back(chain.name);
   }
+
+  /// Register a chain name if not already used.
+  ///
+  /// @param name The name to register
+  /// @return true if the name was successfully added, false if already in used_names
   bool try_add(const std::string& name) {
     if (in_vector(name, used_names))
       return false;
     used_names.push_back(name);
     return true;
   }
+
+  /// Generate a unique 1–2 character name starting from preferred.
+  ///
+  /// Cycles through A–Z, a–z, 0–9 to find an available name.
+  ///
+  /// @param preferred The preferred 1-character name to try first
+  /// @return A unique short name
   std::string make_short_name(const std::string& preferred) {
     static const char symbols[] = {
       'A','B','C','D','E','F','G','H','I','J','K','L','M',
@@ -59,6 +95,13 @@ struct ChainNameGenerator {
     fail("run out of 1- and 2-letter chain names");
   }
 
+  /// Generate a name by appending a numeric postfix to a base name.
+  ///
+  /// Increments the numeric suffix until a unique name is found.
+  ///
+  /// @param base The base chain name
+  /// @param n The starting number to append
+  /// @return A unique name of the form "base" + number
   std::string make_name_with_numeric_postfix(const std::string& base, int n) {
     std::string name = base;
     name += std::to_string(n);
@@ -69,6 +112,14 @@ struct ChainNameGenerator {
     return name;
   }
 
+  /// Generate a new name according to the configured naming strategy.
+  ///
+  /// Dispatches to make_short_name(), make_name_with_numeric_postfix(),
+  /// or returns the original name based on the how field.
+  ///
+  /// @param old The original chain name
+  /// @param n The numeric suffix (used only for AddNumber strategy)
+  /// @return A new unique chain name
   std::string make_new_name(const std::string& old, int n) {
     switch (how) {
       case HowToNameCopiedChain::Short: return make_short_name(old);
@@ -79,6 +130,13 @@ struct ChainNameGenerator {
   }
 };
 
+/// Rename a chain to ensure its name is unique within the model.
+///
+/// Uses the Short naming strategy to generate a 1–2 character name
+/// that does not conflict with other chains in the model.
+///
+/// @param model The model containing the chain
+/// @param chain The chain to rename
 inline void ensure_unique_chain_name(const Model& model, Chain& chain) {
   ChainNameGenerator namegen(HowToNameCopiedChain::Short);
   for (const Chain& ch : model.chains)
@@ -87,9 +145,28 @@ inline void ensure_unique_chain_name(const Model& model, Chain& chain) {
   chain.name = namegen.make_short_name(chain.name);
 }
 
+/// Apply all generators in an assembly to a model to generate a biological unit.
+///
+/// Creates a new Model containing copies of the input model transformed
+/// according to each generator in the assembly. Chain names are made unique
+/// according to the specified strategy.
+///
+/// @param assembly The assembly record containing transformation operators
+/// @param model The input model to transform
+/// @param how The strategy for naming copied chains
+/// @param logging Logger for warning messages
+/// @return A new Model containing the assembled biological unit
 GEMMI_DLL Model make_assembly(const Assembly& assembly, const Model& model,
                               HowToNameCopiedChain how, const Logger& logging);
 
+/// Create a pseudo-assembly representing all unit-cell images.
+///
+/// Generates an Assembly whose operators produce all crystallographic
+/// images of the structure within the unit cell. Useful for packing displays
+/// and structure visualization.
+///
+/// @param cell The unit cell with precomputed image transformations
+/// @return An Assembly object whose generators apply all unit-cell symmetry
 inline Assembly pseudo_assembly_for_unit_cell(const UnitCell& cell) {
   Assembly assembly("unit_cell");
   std::vector<Assembly::Operator> operators(cell.images.size() + 1);
@@ -102,36 +179,98 @@ inline Assembly pseudo_assembly_for_unit_cell(const UnitCell& cell) {
   return assembly;
 }
 
-/// If called with assembly_name="unit_cell" changes structure to unit cell (P1).
-/// \par keep_spacegroup preserves space group and unit cell - is it needed?
+/// Modify a Structure in-place to contain only a named assembly.
+///
+/// Replaces all models in the structure with the expanded assembly,
+/// optionally preserving the space group. If assembly_name="unit_cell",
+/// converts the structure to the unit cell (P1).
+///
+/// @param st The structure to modify in-place
+/// @param assembly_name The name of the assembly to apply
+/// @param how The strategy for naming copied chains
+/// @param logging Logger for warning messages
+/// @param keep_spacegroup If true, preserve the original space group and unit cell
+/// @param merge_dist Distance threshold for merging nearby atoms (default 0.2 Å)
 GEMMI_DLL void transform_to_assembly(Structure& st, const std::string& assembly_name,
                                      HowToNameCopiedChain how, const Logger& logging,
                                      bool keep_spacegroup=false, double merge_dist=0.2);
 
 
+/// Expand a model by applying NCS operations.
+///
+/// Generates copies of the input model by applying each NCS (non-crystallographic symmetry)
+/// operation, returning a new Model with all copies combined.
+///
+/// @param model The input model to expand
+/// @param ncs Vector of NCS operations to apply
+/// @param how The strategy for naming copied chains
+/// @return A new Model containing the original and all NCS-transformed copies
 GEMMI_DLL Model expand_ncs_model(const Model& model, const std::vector<NcsOp>& ncs,
                                  HowToNameCopiedChain how);
 
-/// Searches and merges overlapping equivalent atoms from different chains.
-/// To be used after expand_ncs() and make_assembly().
+/// Merge atoms within a distance threshold after NCS or assembly expansion.
+///
+/// Searches for overlapping atoms from different chains that are equivalent
+/// under the unit cell symmetry and merges them. Typically used after
+/// expand_ncs() and make_assembly() to eliminate duplicate atoms.
+///
+/// @param model The model to modify in-place
+/// @param cell The unit cell (used for distance calculations)
+/// @param max_dist Maximum distance for merging atoms (default 0.2 Å)
+/// @param compare_serial If true, only merge atoms with matching serial numbers (default true)
 GEMMI_DLL void merge_atoms_in_expanded_model(Model& model, const UnitCell& cell,
                                              double max_dist=0.2, bool compare_serial=true);
 
 
+/// Rename all chains to use the shortest possible unique names.
+///
+/// Replaces chain names with 1–2 character names (A–Z, a–z, 0–9) while
+/// maintaining uniqueness within the structure.
+///
+/// @param st The structure to modify in-place
 GEMMI_DLL void shorten_chain_names(Structure& st);
 
+/// Expand NCS operations in all models of a structure in-place.
+///
+/// Applies all NCS (non-crystallographic symmetry) operations to every model
+/// in the structure, generating copies and optionally merging nearby atoms.
+///
+/// @param st The structure to expand in-place
+/// @param how The strategy for naming copied chains
+/// @param merge_dist Distance threshold for merging atoms (default 0.2 Å)
 GEMMI_DLL void expand_ncs(Structure& st, HowToNameCopiedChain how, double merge_dist=0.2);
 
-/// HowToNameCopiedChain::Dup adds segment name to chain name
+/// Split chains at residue segment boundaries into separate chains.
+///
+/// Divides each chain into multiple chains at segment breaks, with new chain
+/// names generated according to the strategy. If how=HowToNameCopiedChain::Dup,
+/// the segment name is appended to the chain name.
+///
+/// @param model The model to modify in-place
+/// @param how The strategy for naming split chains
 GEMMI_DLL void split_chains_by_segments(Model& model, HowToNameCopiedChain how);
 
-/// Finds nearby crystallographic symmetry images for viewer use.
-/// The search uses only the first model and returns non-identity images.
+/// Find all crystallographic symmetry images within a distance radius.
+///
+/// Searches for non-identity symmetry images of the structure around a given
+/// position. Uses only the first model and is intended for viewer applications.
+///
+/// @param st The structure to query
+/// @param pos The center position for the search
+/// @param radius The search radius in Ångströms
+/// @return Vector of NearestImage objects describing the nearby symmetry images
 GEMMI_DLL std::vector<NearestImage> get_nearby_sym_ops(const Structure& st,
                                                        const Position& pos,
                                                        double radius);
 
-/// Returns a copy of the structure transformed to the requested symmetry image.
+/// Extract a symmetry image of the structure as a new Structure object.
+///
+/// Creates and returns a copy of the input structure transformed to the
+/// coordinates of the specified symmetry image.
+///
+/// @param st The input structure
+/// @param image The symmetry image specification
+/// @return A new Structure object containing the transformed copy
 GEMMI_DLL Structure get_sym_image(const Structure& st, const NearestImage& image);
 
 } // namespace gemmi

From 7a53ccf1ff55322e74c371a55374b09134ca68c0 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 22:59:41 +0200
Subject: [PATCH 10/19] docs(select.hpp): add full Doxygen API documentation

---
 include/gemmi/select.hpp | 221 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 218 insertions(+), 3 deletions(-)

diff --git a/include/gemmi/select.hpp b/include/gemmi/select.hpp
index 51be4112c..2749c64ff 100644
--- a/include/gemmi/select.hpp
+++ b/include/gemmi/select.hpp
@@ -18,18 +18,40 @@ namespace gemmi {
 //     /mdl/chn/*(res).ic/at[el]:aloc
 //
 
+/// Parsed CCP4-style atom/residue/chain selection expression.
+///
+/// Represents a selection filter such as "//A/10-20/CA[C].B" for filtering
+/// atoms, residues, chains, and models from a structure. Provides both
+/// individual match predicates and FilterProxy views for convenient iteration
+/// over matching elements.
 struct GEMMI_DLL Selection {
+  /// Set of allowed names with optional inversion.
+  ///
+  /// Represents comma-separated lists like "ALA,GLY" with optional
+  /// inversion to mean "everything except".
   struct List {
+    /// If true, matches everything (corresponding to "*")
     bool all = true;
+    /// If true, the match logic is inverted (names NOT in list)
     bool inverted = false;
-    std::string list;  // comma-separated
+    /// Comma-separated list of names
+    std::string list;
 
+    /// Get the canonical string representation.
+    ///
+    /// @return "*" if all=true, else the list optionally prefixed with "!"
     std::string str() const {
       if (all)
         return "*";
       return inverted ? "!" + list : list;
     }
 
+    /// Check if a name matches this list.
+    ///
+    /// Accounts for the all and inverted flags.
+    ///
+    /// @param name The name to check
+    /// @return true if the name matches according to all and inverted settings
     bool has(const std::string& name) const {
       if (all)
         return true;
@@ -38,8 +60,20 @@ struct GEMMI_DLL Selection {
     }
   };
 
+  /// Matches a single character flag against a pattern string.
+  ///
+  /// Supports optional inversion with a leading '!' character.
   struct FlagList {
+    /// Flag characters; may be prefixed with '!' to invert the match
     std::string pattern;
+
+    /// Check if a flag appears in the pattern.
+    ///
+    /// If pattern begins with '!', returns true if the flag does NOT appear
+    /// in the rest of the pattern.
+    ///
+    /// @param flag The flag character to check
+    /// @return true if the flag matches the pattern (or does not match if inverted)
     bool has(char flag) const {
       if (pattern.empty())
         return true;
@@ -49,16 +83,35 @@ struct GEMMI_DLL Selection {
     }
   };
 
+  /// A residue sequence position for range matching.
+  ///
+  /// Represents a single point in the residue sequence, with an optional
+  /// insertion code. Special values INT_MIN and INT_MAX represent unset bounds.
   struct SequenceId {
+    /// Integer sequence number (INT_MIN = unset lower bound, INT_MAX = unset upper bound)
     int seqnum;
+    /// Insertion code character ('*' = wildcard)
     char icode;
 
+    /// Check if this sequence ID is unset.
+    ///
+    /// @return true if seqnum is INT_MIN or INT_MAX
     bool empty() const {
       return seqnum == INT_MIN || seqnum == INT_MAX;
     }
 
+    /// Get the string representation of this sequence ID.
+    ///
+    /// @return String like "10" or "10A" depending on icode
     std::string str() const;
 
+    /// Compare this sequence ID to another SeqId.
+    ///
+    /// Compares first by sequence number, then by insertion code if needed.
+    /// The wildcard '*' for icode matches any insertion code.
+    ///
+    /// @param seqid The SeqId to compare to
+    /// @return negative if less than, 0 if equal, positive if greater than seqid
     int compare(const SeqId& seqid) const {
       if (seqnum != *seqid.num)
         return seqnum < *seqid.num ? -1 : 1;
@@ -68,11 +121,25 @@ struct GEMMI_DLL Selection {
     }
   };
 
+  /// A numeric filter on atom properties (occupancy or B-factor).
+  ///
+  /// Represents a constraint like "q>0.5" (occupancy greater than 0.5)
+  /// or "b<30" (B-factor less than 30).
   struct AtomInequality {
+    /// Property character: 'q' for occupancy, 'b' for B-factor
     char property;
+    /// Relation: -1 for less than, 0 for equals, +1 for greater than
     int relation;
+    /// The threshold value to compare against
     double value;
 
+    /// Check if an atom satisfies this inequality.
+    ///
+    /// Extracts the specified property from the atom and compares it
+    /// to the threshold using the specified relation.
+    ///
+    /// @param a The atom to check
+    /// @return true if the atom's property satisfies the inequality
     bool matches(const Atom& a) const {
       double atom_value = 0.;
       if (property == 'q')
@@ -86,36 +153,89 @@ struct GEMMI_DLL Selection {
       return atom_value == value;
     }
 
+    /// Get the string representation of this inequality.
+    ///
+    /// @return String like "q>0.5" or "b<30"
     std::string str() const;
   };
 
-  int mdl = 0;            // 0 = all
+  /// Model number to select (0 = all models)
+  int mdl = 0;
+  /// List of allowed chain IDs
   List chain_ids;
+  /// Start of sequence ID range (inclusive)
   SequenceId from_seqid = {INT_MIN, '*'};
+  /// End of sequence ID range (inclusive)
   SequenceId to_seqid = {INT_MAX, '*'};
+  /// List of allowed residue names (3-letter codes)
   List residue_names;
+  /// List of allowed entity type strings
   List entity_types;
-  // array corresponding to enum EntityType
+  /// Flags array for entity type matching (corresponds to enum EntityType)
   std::array<char, 6> et_flags;
+  /// List of allowed atom names
   List atom_names;
+  /// Vector of allowed element numbers
   std::vector<char> elements;
+  /// List of allowed alternate conformer IDs
   List altlocs;
+  /// Flag-based filter for residues
   FlagList residue_flags;
+  /// Flag-based filter for atoms
   FlagList atom_flags;
+  /// Vector of numeric property filters
   std::vector<AtomInequality> atom_inequalities;
 
+  /// Default constructor.
   Selection() = default;
+
+  /// Parse a CCP4 selection string.
+  ///
+  /// Parses a selection string like "//A/10-20/CA[C].B" into the
+  /// selection criteria.
+  ///
+  /// @param cid The CCP4-style selection string
   Selection(const std::string& cid);
 
+  /// Get the canonical string representation of this selection.
+  ///
+  /// @return String representation of the parsed selection
   std::string str() const;
 
+  /// Check if a structure matches this selection.
+  ///
+  /// Structures always match (they are never filtered).
+  ///
+  /// @param s The structure to check
+  /// @return Always true
   bool matches(const Structure&) const { return true; }
+
+  /// Check if a model matches this selection.
+  ///
+  /// Matches if the model number is 0 (select all) or matches mdl.
+  ///
+  /// @param model The model to check
+  /// @return true if the model is included in this selection
   bool matches(const Model& model) const {
     return mdl == 0 || mdl == model.num;
   }
+
+  /// Check if a chain matches this selection.
+  ///
+  /// Matches if the chain ID is in the allowed list.
+  ///
+  /// @param chain The chain to check
+  /// @return true if the chain is included in this selection
   bool matches(const Chain& chain) const {
     return chain_ids.has(chain.name);
   }
+
+  /// Check if a residue matches this selection.
+  ///
+  /// Matches residue name, sequence ID range, entity type, and flags.
+  ///
+  /// @param res The residue to check
+  /// @return true if the residue is included in this selection
   bool matches(const Residue& res) const {
     return (entity_types.all || et_flags[(int)res.entity_type]) &&
            residue_names.has(res.name) &&
@@ -123,6 +243,13 @@ struct GEMMI_DLL Selection {
            to_seqid.compare(res.seqid) >= 0 &&
            residue_flags.has(res.flag);
   }
+
+  /// Check if an atom matches this selection.
+  ///
+  /// Matches atom name, element, altloc, flags, and numeric inequalities.
+  ///
+  /// @param a The atom to check
+  /// @return true if the atom is included in this selection
   bool matches(const Atom& a) const {
     return atom_names.has(a.name) &&
            (elements.empty() || elements[a.element.ordinal()]) &&
@@ -131,25 +258,58 @@ struct GEMMI_DLL Selection {
            std::all_of(atom_inequalities.begin(), atom_inequalities.end(),
                        [&](const AtomInequality& i) { return i.matches(a); });
   }
+
+  /// Check if a chain-residue-atom triplet matches this selection.
+  ///
+  /// Matches if all non-null components pass their respective match tests.
+  ///
+  /// @param cra The CRA structure to check
+  /// @return true if the CRA is included in this selection
   bool matches(const CRA& cra) const {
     return (cra.chain == nullptr || matches(*cra.chain)) &&
            (cra.residue == nullptr || matches(*cra.residue)) &&
            (cra.atom == nullptr || matches(*cra.atom));
   }
 
+  /// Get a filtered view over models in a structure.
+  ///
+  /// @param st The structure to filter
+  /// @return A FilterProxy that iterates over matching models
   FilterProxy<Selection, Model> models(Structure& st) const {
     return {*this, st.models};
   }
+
+  /// Get a filtered view over chains in a model.
+  ///
+  /// @param model The model to filter
+  /// @return A FilterProxy that iterates over matching chains
   FilterProxy<Selection, Chain> chains(Model& model) const {
     return {*this, model.chains};
   }
+
+  /// Get a filtered view over residues in a chain.
+  ///
+  /// @param chain The chain to filter
+  /// @return A FilterProxy that iterates over matching residues
   FilterProxy<Selection, Residue> residues(Chain& chain) const {
     return {*this, chain.residues};
   }
+
+  /// Get a filtered view over atoms in a residue.
+  ///
+  /// @param residue The residue to filter
+  /// @return A FilterProxy that iterates over matching atoms
   FilterProxy<Selection, Atom> atoms(Residue& residue) const {
     return {*this, residue.atoms};
   }
 
+  /// Find the first matching atom in a model.
+  ///
+  /// Searches through the model's chains and residues to find the first
+  /// atom that matches this selection.
+  ///
+  /// @param model The model to search
+  /// @return A CRA with the first match, or {nullptr, nullptr, nullptr} if not found
   CRA first_in_model(Model& model) const {
     if (matches(model))
       for (Chain& chain : model.chains) {
@@ -165,6 +325,12 @@ struct GEMMI_DLL Selection {
     return {nullptr, nullptr, nullptr};
   }
 
+  /// Find the first matching atom in a structure.
+  ///
+  /// Searches through all models to find the first matching atom.
+  ///
+  /// @param st The structure to search
+  /// @return A pair of {Model*, CRA} with the first match, or {nullptr, empty_CRA} if not found
   std::pair<Model*, CRA> first(Structure& st) const {
     for (Model& model : st.models) {
       CRA cra = first_in_model(model);
@@ -174,6 +340,14 @@ struct GEMMI_DLL Selection {
     return {nullptr, {nullptr, nullptr, nullptr}};
   }
 
+  /// Recursively copy matching children from orig into target.
+  ///
+  /// Used internally by copy_selection() to recursively populate a target
+  /// structure with only the matching elements.
+  ///
+  /// @tparam T The type of element (Model, Chain, Residue, or Atom)
+  /// @param orig The original element to copy from
+  /// @param target The target element to copy into
   template<typename T>
   void add_matching_children(const T& orig, T& target) const {
     for (const auto& orig_child : orig.children())
@@ -182,17 +356,36 @@ struct GEMMI_DLL Selection {
         add_matching_children(orig_child, target.children().back());
       }
   }
+
+  /// Specialization for Atom (leaf node).
   void add_matching_children(const Atom&, Atom&) const {}
 
+  /// Set the residue flag filter pattern.
+  ///
+  /// @param pattern Flag pattern string (may be prefixed with '!' to invert)
+  /// @return *this for method chaining
   Selection& set_residue_flags(const std::string& pattern) {
     residue_flags.pattern = pattern;
     return *this;
   }
+
+  /// Set the atom flag filter pattern.
+  ///
+  /// @param pattern Flag pattern string (may be prefixed with '!' to invert)
+  /// @return *this for method chaining
   Selection& set_atom_flags(const std::string& pattern) {
     atom_flags.pattern = pattern;
     return *this;
   }
 
+  /// Create a copy of an element containing only matching children.
+  ///
+  /// Returns a copy of orig with all non-matching children filtered out.
+  /// Recursively applies the filter to nested children.
+  ///
+  /// @tparam T The type of element (Structure, Model, Chain, or Residue)
+  /// @param orig The original element to copy
+  /// @return A new element containing only matching children
   template<typename T>
   T copy_selection(const T& orig) const {
     T copied = orig.empty_copy();
@@ -200,6 +393,14 @@ struct GEMMI_DLL Selection {
     return copied;
   }
 
+  /// Remove all matching atoms or residues in-place.
+  ///
+  /// Recursively removes all matching children from the element.
+  /// After removing all matching children from a child, also removes
+  /// any empty children.
+  ///
+  /// @tparam T The type of element (Model, Chain, or Residue)
+  /// @param t The element to modify in-place
   template<typename T>
   void remove_selected(T& t) const {
     for (auto& child : t.children())
@@ -208,6 +409,11 @@ struct GEMMI_DLL Selection {
     vector_remove_if(t.children(),
                      [&](typename T::child_type& c) { return c.children().empty(); });
   }
+
+  /// Specialization for Residue: remove matching atoms.
+  ///
+  /// Optimized version that clears all atoms if the selection matches
+  /// all atoms, otherwise removes atoms one by one.
   void remove_selected(Residue& res) const {
     if (atom_names.all && elements.empty() && altlocs.all &&
         atom_flags.pattern.empty() && atom_inequalities.empty())
@@ -216,12 +422,21 @@ struct GEMMI_DLL Selection {
       vector_remove_if(res.atoms, [&](Atom& c) { return matches(c); });
   }
 
+  /// Remove all non-matching atoms or residues in-place.
+  ///
+  /// Recursively removes all non-matching children from the element,
+  /// then recurses into the remaining children.
+  ///
+  /// @tparam T The type of element (Model, Chain, or Residue)
+  /// @param t The element to modify in-place
   template<typename T>
   void remove_not_selected(T& t) const {
     vector_remove_if(t.children(), [&](typename T::child_type& c) { return !matches(c); });
     for (auto& child : t.children())
       remove_not_selected(child);
   }
+
+  /// Specialization for Atom (leaf node, no-op).
   void remove_not_selected(Atom&) const {}
 };
 

From 1cc7c730f1ebf102064235dc102d666b7fa4dc2b Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:00:53 +0200
Subject: [PATCH 11/19] docs: fix missing @brief tag on ChainNameGenerator in
 assembly.hpp

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 include/gemmi/assembly.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/gemmi/assembly.hpp b/include/gemmi/assembly.hpp
index ad7358fdd..594f65ea7 100644
--- a/include/gemmi/assembly.hpp
+++ b/include/gemmi/assembly.hpp
@@ -25,7 +25,7 @@ enum class HowToNameCopiedChain {
   Dup
 };
 
-/// Utility to generate unique chain names for assembly and NCS expansion.
+/// @brief Utility to generate unique chain names for assembly and NCS expansion.
 ///
 /// Manages a set of used chain names and provides methods to generate new,
 /// unique names according to the specified naming strategy.

From 2c7d894d75108b525641d1bd92f45b8fafb18c32 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:02:51 +0200
Subject: [PATCH 12/19] docs(modify.hpp): add full Doxygen API documentation

---
 include/gemmi/modify.hpp | 106 ++++++++++++++++++++++++++++++++-------
 1 file changed, 87 insertions(+), 19 deletions(-)

diff --git a/include/gemmi/modify.hpp b/include/gemmi/modify.hpp
index 6d20f5607..97d5b69ac 100644
--- a/include/gemmi/modify.hpp
+++ b/include/gemmi/modify.hpp
@@ -14,6 +14,10 @@
 namespace gemmi {
 
 /// Remove alternative conformations.
+/// Recursively removes all alternative conformations, keeping only the first
+/// (altloc='A' or blank). For Chain level, keeps one representative per residue seqid.
+/// For Residue level, removes duplicate atoms by name.
+/// @tparam T Model, Chain, or Residue
 template<class T> void remove_alternative_conformations(T& obj) {
   for (auto& child : obj.children())
     remove_alternative_conformations(child);
@@ -39,7 +43,9 @@ template<> inline void remove_alternative_conformations(Chain& chain) {
   }
 }
 
-/// Remove hydrogens.
+/// Remove hydrogens and deuterium atoms.
+/// Recursively removes all H and D atoms from the structure.
+/// @tparam T Model, Chain, or Residue
 template<class T> void remove_hydrogens(T& obj) {
   for (auto& child : obj.children())
     remove_hydrogens(child);
@@ -53,6 +59,10 @@ template<> inline void remove_hydrogens(Residue& res) {
 /// Set isotropic ADP to the range (b_min, b_max). Values smaller than
 /// b_min are changed to b_min, values larger than b_max to b_max.
 /// Anisotropic ADP is left unchanged.
+/// @tparam T Model, Chain, Residue, or Atom
+/// @param obj object to modify
+/// @param b_min minimum B-factor value
+/// @param b_max maximum B-factor value
 template<class T> void assign_b_iso(T& obj, float b_min, float b_max) {
   for (auto& child : obj.children())
     assign_b_iso(child, b_min, b_max);
@@ -61,7 +71,9 @@ template<> inline void assign_b_iso(Atom& atom, float b_min, float b_max) {
   atom.b_iso = clamp(atom.b_iso, b_min, b_max);
 }
 
-/// Remove anisotropic ADP
+/// Remove anisotropic displacement parameters.
+/// Recursively zeroes all anisotropic ADP tensors (u11, u22, u33, u12, u13, u23).
+/// @tparam T Model, Chain, Residue, or Atom
 template<class T> void remove_anisou(T& obj) {
   for (auto& child : obj.children())
     remove_anisou(child);
@@ -70,7 +82,9 @@ template<> inline void remove_anisou(Atom& atom) {
   atom.aniso = {0, 0, 0, 0, 0, 0};
 }
 
-/// Set absent ANISOU to value from B_iso
+/// Set absent ANISOU records to isotropic values derived from B_iso.
+/// For atoms without anisotropic ADP, creates isotropic tensor U = B_iso/(8π²).
+/// @tparam T Model, Chain, Residue, or Atom
 template<class T> void ensure_anisou(T& obj) {
   for (auto& child : obj.children())
     ensure_anisou(child);
@@ -82,7 +96,12 @@ template<> inline void ensure_anisou(Atom& atom) {
   }
 }
 
-/// apply Transform to both atom's position and ADP
+/// Apply a Transform to atomic positions and anisotropic displacement parameters.
+/// Recursively applies the given transformation to all atom coordinates and
+/// congruence-transforms anisotropic ADPs.
+/// @tparam T Model, Chain, Residue, or Atom
+/// @param obj object to transform
+/// @param tr transformation to apply
 template<class T> void transform_pos_and_adp(T& obj, const Transform& tr) {
   for (auto& child : obj.children())
     transform_pos_and_adp(child, tr);
@@ -93,7 +112,10 @@ template<> inline void transform_pos_and_adp(Atom& atom, const Transform& tr) {
     atom.aniso = atom.aniso.transformed_by<float>(tr.mat);
 }
 
-/// set atom site serial numbers to 1, 2, ..., optionally leaving gaps for TERs
+/// Assign atom site serial numbers (1, 2, 3, ...) sequentially.
+/// Optionally leaves gaps at chain ends for TER records if numbering polymer chains.
+/// @param model model containing chains
+/// @param numbered_ter if true, increment serial after last polymer residue in each chain
 inline void assign_serial_numbers(Model& model, bool numbered_ter=false) {
   int serial = 0;
   for (Chain& chain : model.chains)
@@ -105,17 +127,22 @@ inline void assign_serial_numbers(Model& model, bool numbered_ter=false) {
         ++serial;
     }
 }
+
+/// Assign atom site serial numbers to all models in a structure.
+/// @param st structure containing models
+/// @param numbered_ter if true, increment serial after last polymer residue in each chain
 inline void assign_serial_numbers(Structure& st, bool numbered_ter=false) {
   for (Model& model : st.models)
     assign_serial_numbers(model, numbered_ter);
 }
 
 
-/// Helper function for processing (usually: changing) names and numbers
-/// in AtomAddress instances in metadata:
-/// Connection, CisPep, StructSite, Helix, Sheet::Strand.
-/// Other fields are not updated here, in particular: ModRes, Entity::DbRef,
-/// Entity::full_sequence, TlsGroup::Selection.
+/// Apply a function to all AtomAddress references in structure metadata.
+/// Processes atom addresses in Connection, CisPep, StructSite, Helix, and Sheet records.
+/// Does not update ModRes, Entity::DbRef, Entity::full_sequence, or TlsGroup::Selection.
+/// @tparam Func callable taking an AtomAddress& parameter
+/// @param st structure whose metadata will be processed
+/// @param func function to apply to each AtomAddress
 template<typename Func>
 void process_addresses(Structure& st, Func func) {
   for (Connection& con : st.connections) {
@@ -144,9 +171,13 @@ void process_addresses(Structure& st, Func func) {
     }
 }
 
-/// Takes func(const std::string& chain_name, gemmi::SeqId& seqid).
-/// It doesn't process Entity::DbRef::seq_begin/seq_end (b/c there is no
-/// single corresponding chain name).
+/// Apply a function to all SeqId references in structure metadata.
+/// Processes sequence IDs in Connection/CisPep/StructSite/Helix/Sheet records,
+/// ModRes entries, and TlsGroup selections.
+/// Does not process Entity::DbRef::seq_begin/seq_end (no single chain name).
+/// @tparam Func callable taking (const std::string& chain_name, SeqId& seqid)
+/// @param st structure whose metadata will be processed
+/// @param func function to apply to each (chain_name, seqid) pair
 template<typename Func>
 void process_sequence_ids(Structure& st, Func func) {
   process_addresses(st, [&](AtomAddress& aa) { func(aa.chain_name, aa.res_id.seqid); });
@@ -160,6 +191,11 @@ void process_sequence_ids(Structure& st, Func func) {
       }
 }
 
+/// Rename a chain throughout the structure.
+/// Updates all occurrences of old_name to new_name in models and metadata.
+/// @param st structure to modify
+/// @param old_name current chain name
+/// @param new_name new chain name
 inline void rename_chain(Structure& st, const std::string& old_name,
                                         const std::string& new_name) {
   auto update = [&](std::string& name) {
@@ -178,6 +214,12 @@ inline void rename_chain(Structure& st, const std::string& old_name,
       update(chain.name);
 }
 
+/// Rename residues throughout the structure.
+/// Updates all residues named old_name to new_name in models and metadata.
+/// Also updates Entity sequences and StructSite member records.
+/// @param st structure to modify
+/// @param old_name current residue name
+/// @param new_name new residue name
 inline void rename_residues(Structure& st, const std::string& old_name,
                                            const std::string& new_name) {
   auto update = [&](ResidueId& rid) {
@@ -211,6 +253,12 @@ inline void rename_residues(Structure& st, const std::string& old_name,
 }
 
 
+/// Rename atoms in residues of a specific type.
+/// For residues named res_name, renames atoms according to the old→new map.
+/// Updates both atomic coordinates and metadata references.
+/// @param st structure to modify
+/// @param res_name residue type to target
+/// @param old_new map from old atom names to new atom names
 inline void rename_atom_names(Structure& st, const std::string& res_name,
                               const std::map<std::string, std::string>& old_new) {
   auto update = [&old_new](std::string& name) {
@@ -232,6 +280,10 @@ inline void rename_atom_names(Structure& st, const std::string& res_name,
 }
 
 
+/// Expand deuterium-fraction representation into explicit H/D alternate conformations.
+/// Converts H atoms with non-zero d_fraction into H/D altloc pairs.
+/// If d_fraction >= 1, converts to pure D; otherwise creates H altloc 'A' and D altloc 'D'.
+/// @param res residue containing hydrogens to expand
 inline void replace_d_fraction_with_altlocs(Residue& res) {
   for (size_t i = res.atoms.size(); i-- != 0; ) {
     Atom& atom = res.atoms[i];
@@ -263,6 +315,11 @@ inline void replace_d_fraction_with_altlocs(Residue& res) {
   }
 }
 
+/// Contract explicit D atoms into H atoms with fractional occupancy.
+/// Merges H and D atoms at the same position into a single H atom,
+/// storing the D occupancy as the fraction field.
+/// @param res residue containing deuterium atoms to contract
+/// @return true if any D atoms were found and processed, false otherwise
 inline bool replace_deuterium_with_fraction(Residue& res) {
   bool found = false;
   for (auto d = res.atoms.end(); d-- != res.atoms.begin(); )
@@ -294,11 +351,13 @@ inline bool replace_deuterium_with_fraction(Residue& res) {
   return found;
 }
 
-/// Hydrogens modelled as H/D mixture (altlocs H and D with the same position
-/// and ADP, but with refined fraction of D), it can be stored in mmCIF either
-/// as two atoms (H and D) or, using CCP4/Refmac extension, as H atoms with
-/// the ccp4_deuterium_fraction parameter.
-/// This function switches fraction <-> altlocs
+/// Toggle deuterium representation between fraction and explicit altlocs.
+/// Hydrogens modelled as H/D mixtures can be stored either as:
+/// - Two atoms with altlocs H and D (same position/ADP, different occupancies)
+/// - Single H atom with ccp4_deuterium_fraction parameter (CCP4/Refmac extension)
+/// This function converts between the two representations.
+/// @param st structure to modify
+/// @param store_fraction if true, use fraction representation; otherwise use altlocs
 inline void store_deuterium_as_fraction(Structure& st, bool store_fraction) {
   if (st.has_d_fraction == store_fraction)
     return;
@@ -314,6 +373,10 @@ inline void store_deuterium_as_fraction(Structure& st, bool store_fraction) {
         }
 }
 
+/// Set the deuterium fraction of all hydrogen atoms.
+/// Assigns the fraction field of all H atoms in the structure to d_fract.
+/// @param st structure to modify
+/// @param d_fract deuterium fraction to assign (0.0 to 1.0)
 inline void set_deuterium_fraction_of_hydrogens(Structure& st, float d_fract) {
   st.has_d_fraction = true;
   for (Model& model : st.models)
@@ -324,7 +387,12 @@ inline void set_deuterium_fraction_of_hydrogens(Structure& st, float d_fract) {
             atom.fraction = d_fract;
 }
 
-/// Convert coordinates to the standard coordinate system for the unit cell.
+/// Transform structure to the standard crystallographic frame.
+/// Converts to standard coordinates where the a-axis is along x,
+/// the b-axis is in the xy-plane, and c-axis points along +z.
+/// Updates ORIGX matrices and NCS operators accordingly.
+/// Only operates on crystal structures with explicit transformation matrices.
+/// @param st structure to transform
 inline void standardize_crystal_frame(Structure& st) {
   if (!st.cell.explicit_matrices || !st.cell.is_crystal())
     return;

From 26f37f8455bf1050e9021f049c83c6ed5dbfd752 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:04:24 +0200
Subject: [PATCH 13/19] docs(polyheur.hpp): add full Doxygen API documentation

---
 include/gemmi/polyheur.hpp | 175 ++++++++++++++++++++++++++++++-------
 1 file changed, 141 insertions(+), 34 deletions(-)

diff --git a/include/gemmi/polyheur.hpp b/include/gemmi/polyheur.hpp
index 6186e79ff..8aa2a289b 100644
--- a/include/gemmi/polyheur.hpp
+++ b/include/gemmi/polyheur.hpp
@@ -12,12 +12,21 @@
 
 namespace gemmi {
 
-// A simplistic classification. It may change in the future.
-// It returns PolymerType which corresponds to _entity_poly.type,
-// but here we use only PeptideL, Rna, Dna, DnaRnaHybrid and Unknown.
+/// Classify a residue span as peptide, RNA, DNA, or other polymer type.
+/// A simplistic heuristic classification based on residue names.
+/// Returns PolymerType (PeptideL, PeptideD, Rna, Dna, DnaRnaHybrid, or Unknown).
+/// @param span sequence of residues to classify
+/// @param ignore_entity_type if false, uses Entity type if known and consistent
+/// @return PolymerType classification
 GEMMI_DLL PolymerType check_polymer_type(const ConstResidueSpan& span,
                                          bool ignore_entity_type=false);
 
+/// Determine polymer type from entity or by classification.
+/// Returns the entity's polymer_type if known and non-Unknown,
+/// otherwise classifies the residue span.
+/// @param ent entity with known polymer type (may be nullptr)
+/// @param polymer residue span to classify if entity type unknown
+/// @return PolymerType classification
 inline PolymerType get_or_check_polymer_type(const Entity* ent,
                                              const ConstResidueSpan& polymer) {
   if (ent && ent->polymer_type != PolymerType::Unknown)
@@ -25,8 +34,15 @@ inline PolymerType get_or_check_polymer_type(const Entity* ent,
   return check_polymer_type(polymer);
 }
 
+/// Atom name and chemical element pair.
+/// Used to represent backbone atom components of a polymer type.
 struct AtomNameElement { std::string atom_name; El el; };
 
+/// Get backbone atom names and elements for a polymer type.
+/// For peptides: N, CA, C, O.
+/// For nucleic acids: P, O5', C5', C4', O4', C3', O3', C2', O2', C1'.
+/// @param ptype polymer type (peptide or nucleic acid)
+/// @return vector of {atom_name, element} pairs for the backbone
 inline std::vector<AtomNameElement> get_mainchain_atoms(PolymerType ptype) {
   if (is_polynucleotide(ptype))
     return {{"P", El::P}, {"O5'", El::O}, {"C5'", El::C},
@@ -35,23 +51,46 @@ inline std::vector<AtomNameElement> get_mainchain_atoms(PolymerType ptype) {
   return {{"N", El::N}, {"CA", El::C}, {"C", El::C}, {"O", El::O}};
 }
 
-/// distance-based check for peptide bond
+/// Check if two atoms are within peptide bond distance.
+/// Tests C(i)–N(i+1) distance < 2.01 Ångströms.
+/// @param a1 C atom (may be nullptr)
+/// @param a2 N atom (may be nullptr)
+/// @return true if both atoms exist and are within bond distance
 inline bool in_peptide_bond_distance(const Atom* a1, const Atom* a2) {
   return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.341 * 1.5);
 }
+/// Check if two consecutive residues are bonded by a peptide bond.
+/// Tests if the C atom of r1 and N atom of r2 are within peptide bond distance.
+/// @param r1 first (C-terminal) residue
+/// @param r2 second (N-terminal) residue
+/// @return true if peptide bond exists
 inline bool have_peptide_bond(const Residue& r1, const Residue& r2) {
   return in_peptide_bond_distance(r1.get_c(), r2.get_n());
 }
 
-/// distance-based check for phosphodiester bond between nucleotide
+/// Check if two atoms are within nucleotide bond distance.
+/// Tests O3'(i)–P(i+1) distance < 2.4 Ångströms.
+/// @param a1 O3' atom (may be nullptr)
+/// @param a2 P atom (may be nullptr)
+/// @return true if both atoms exist and are within bond distance
 inline bool in_nucleotide_bond_distance(const Atom* a1, const Atom* a2) {
   return a1 && a2 && a1->pos.dist_sq(a2->pos) < sq(1.6 * 1.5);
 }
+/// Check if two consecutive residues are bonded by a phosphodiester bond.
+/// Tests if the O3' atom of r1 and P atom of r2 are within nucleotide bond distance.
+/// @param r1 first residue
+/// @param r2 second (following) residue
+/// @return true if phosphodiester bond exists
 inline bool have_nucleotide_bond(const Residue& r1, const Residue& r2) {
   return in_nucleotide_bond_distance(r1.get_o3prim(), r2.get_p());
 }
 
-/// check C-N or O3'-P distance
+/// Strict connectivity check using actual bond atoms.
+/// For peptides: tests C-N distance; for nucleotides: tests O3'-P distance.
+/// @param r1 first residue
+/// @param r2 second residue
+/// @param ptype polymer type (peptide or nucleic acid)
+/// @return true if residues are bonded
 inline bool are_connected(const Residue& r1, const Residue& r2, PolymerType ptype) {
   if (is_polypeptide(ptype))
     return have_peptide_bond(r1, r2);
@@ -60,7 +99,13 @@ inline bool are_connected(const Residue& r1, const Residue& r2, PolymerType ptyp
   return false;
 }
 
-/// are_connected2() is less exact, but requires only CA (or P) atoms.
+/// Loose connectivity check using only alpha-carbon or phosphorus atoms.
+/// For peptides: tests Cα–Cα < 5 Å; for nucleotides: tests P–P < 7.5 Å.
+/// Requires only Cα or P atoms, not full backbone atoms.
+/// @param r1 first residue
+/// @param r2 second residue
+/// @param ptype polymer type (peptide or nucleic acid)
+/// @return true if residues appear connected by distance
 inline bool are_connected2(const Residue& r1, const Residue& r2, PolymerType ptype) {
   auto this_or_first = [](const Atom* a, const Residue& r, El el) -> const Atom* {
     if (a || r.atoms.empty())
@@ -82,7 +127,13 @@ inline bool are_connected2(const Residue& r1, const Residue& r2, PolymerType pty
   return false;
 }
 
-/// are_connected3() = are_connected() + fallback to are_connected2()
+/// Connectivity check with fallback from strict to loose criteria.
+/// First tries are_connected() with actual bond atoms,
+/// then falls back to are_connected2() using Cα or P distances.
+/// @param r1 first residue
+/// @param r2 second residue
+/// @param ptype polymer type (peptide or nucleic acid)
+/// @return true if residues are connected by either criterion
 inline bool are_connected3(const Residue& r1, const Residue& r2, PolymerType ptype) {
   if (is_polypeptide(ptype)) {
     if (const Atom* a1 = r1.get_c())
@@ -102,41 +153,67 @@ inline bool are_connected3(const Residue& r1, const Residue& r2, PolymerType pty
   return false;
 }
 
+/// Convert polymer residues to one-letter sequence codes.
+/// Translates residue names to standard IUPAC single-letter codes.
+/// Unknown residues become 'X'.
+/// @param polymer sequence of residues to convert
+/// @return one-letter sequence string
 GEMMI_DLL std::string make_one_letter_sequence(const ConstResidueSpan& polymer);
 
-/// Assigns entity_type=Polymer|NonPolymer|Water for each Residue (only
-/// for residues with entity_type==Unknown, unless overwrite=true).
-/// Determining where the polymer ends and ligands start is sometimes
-/// arbitrary -- there can be a non-standard residue at the end that can
-/// be regarded as as either the last residue or a linked ligand.
+/// Assign entity types to residues in a chain.
+/// Assigns entity_type=Polymer|NonPolymer|Water to each residue.
+/// Only updates residues with entity_type==Unknown unless overwrite=true.
+/// Note: determining polymer/ligand boundary can be ambiguous for non-standard residues.
+/// @param chain chain to annotate
+/// @param overwrite if true, reassign types even for known residues
 GEMMI_DLL void add_entity_types(Chain& chain, bool overwrite);
+
+/// Assign entity types to all chains in a structure.
+/// @param st structure to annotate
+/// @param overwrite if true, reassign types even for known residues
 GEMMI_DLL void add_entity_types(Structure& st, bool overwrite);
 
-/// Assigns entity_type=Unknown for all residues.
+/// Reset all residue entity types to Unknown.
+/// @param st structure to modify
 GEMMI_DLL void remove_entity_types(Structure& st);
 
-/// Assigns Residue::entity_id based on Residue::subchain and Entity::subchains.
+/// Assign entity IDs to residues based on subchain assignments.
+/// Links Residue::entity_id to Entity records via Residue::subchain and Entity::subchains.
+/// @param st structure to annotate
+/// @param overwrite if true, reassign IDs even for residues that already have them
 GEMMI_DLL void add_entity_ids(Structure& st, bool overwrite);
 
-/// The subchain field in the residue is where we store_atom_site.label_asym_id
-/// from mmCIF files. As of 2018 wwPDB software splits author's chains
-/// (auth_asym_id) into label_asym_id units:
-/// * linear polymer,
-/// * non-polymers (each residue has different separate label_asym_id),
-/// * and waters.
-/// Refmac/makecif is doing similar thing but using different naming and
-/// somewhat different rules (it was written in 1990's before PDBx/mmCIF).
-///
-/// Here we use naming and rules different from both wwPDB and makecif.
-/// Note: call add_entity_types() first.
+/// Assign subchain labels to residues in a chain.
+/// Splits the chain into segments: linear polymers, non-polymers (each with unique ID),
+/// and waters. Stores label_asym_id-like names in Residue::subchain.
+/// wwPDB software splits auth_asym_id into label_asym_id units similarly.
+/// This function uses compatible but distinct naming and rules.
+/// @param chain chain to segment
+/// @param nonpolymer_counter incremented for each non-polymer segment (input/output)
+/// @note call add_entity_types() first to set entity types correctly
 GEMMI_DLL void assign_subchain_names(Chain& chain, int& nonpolymer_counter);
 
+/// Assign subchain names to all chains in a structure.
+/// Calls assign_subchain_names on each chain to segment into polymer/non-polymer units.
+/// @param st structure to annotate
+/// @param force if true, reassign subchains even if already assigned
+/// @param fail_if_unknown if true, raise error if polymer type is unknown; if false, skip
 GEMMI_DLL void assign_subchains(Structure& st, bool force, bool fail_if_unknown=true);
 
+/// Create missing Entity records for all subchains in the structure.
+/// Ensures each Residue::subchain has a corresponding Entity.
+/// @param st structure to update
 GEMMI_DLL void ensure_entities(Structure& st);
 
+/// Merge Entity records with identical sequences into one.
+/// Consolidates duplicate sequence entries and updates residue entity_ids.
+/// @param st structure to deduplicate
 GEMMI_DLL void deduplicate_entities(Structure& st);
 
+/// Set up entity metadata for a structure in a standard workflow.
+/// Convenience function that calls add_entity_types + assign_subchains +
+/// ensure_entities + deduplicate_entities in sequence.
+/// @param st structure to set up
 inline void setup_entities(Structure& st) {
   add_entity_types(st, /*overwrite=*/false);
   assign_subchains(st, /*force=*/false);
@@ -144,9 +221,16 @@ inline void setup_entities(Structure& st) {
   deduplicate_entities(st);
 }
 
-/// Determine ATOM/HETATM record type, based on Residue::entity_type
+/// Determine ATOM/HETATM record type based on residue entity type.
+/// Returns 'A' (ATOM) for polymers or 'H' (HETATM) for non-polymers and waters.
+/// @param res residue to classify
+/// @return 'A' or 'H'
 GEMMI_DLL char recommended_het_flag(const Residue& res);
-/// R = recommended_het_flag(), other valid values are A, H and '\0'
+
+/// Assign het_flag to all residues in an object.
+/// @tparam T Model, Chain, or Residue
+/// @param obj object to modify
+/// @param flag het_flag value: 'A' (ATOM), 'H' (HETATM), 'R' (recommended), or ' ' (none)
 template<class T> void assign_het_flags(T& obj, char flag='R') {
   for (auto& child : obj.children())
     assign_het_flags(child, flag);
@@ -158,7 +242,10 @@ template<> inline void assign_het_flags(Residue& res, char flag) {
   res.het_flag = flag == 'R' ? recommended_het_flag(res) : flag;
 }
 
-// Remove waters. It may leave empty chains.
+/// Remove water residues from a structure or chain.
+/// May leave empty chains if all residues are waters.
+/// @tparam T Model, Chain, or Residue
+/// @param obj object to modify
 template<class T> void remove_waters(T& obj) {
   for (auto& child : obj.children())
     remove_waters(child);
@@ -168,7 +255,11 @@ template<> inline void remove_waters(Chain& ch) {
                    [](const Residue& res) { return res.is_water(); });
 }
 
-// Remove ligands and waters. It may leave empty chains.
+/// Remove all non-polymer residues (ligands and waters).
+/// Requires entity_type to be assigned first (see add_entity_types).
+/// May leave empty chains if all residues are removed.
+/// @tparam T Model, Chain, or Residue
+/// @param obj object to modify
 template<class T> void remove_ligands_and_waters(T& obj) {
   for (auto& child : obj.children())
     remove_ligands_and_waters(child);
@@ -181,22 +272,38 @@ template<> inline void remove_ligands_and_waters(Chain& ch) {
   });
 }
 
-// Trim to alanine. Returns true if trimmed, false if it's (likely) not AA.
+/// Reduce an amino acid residue to an alanine backbone.
+/// Removes all side-chain atoms, keeping only N, CA, C, O and CB.
+/// @param res residue to trim
+/// @return true if trimmed successfully, false if residue is not a standard amino acid
 GEMMI_DLL bool trim_to_alanine(Residue& res);
 
+/// Trim all amino acid residues in a chain to alanine.
+/// @param chain chain to modify
 inline void trim_to_alanine(Chain& chain) {
   for (Residue& res : chain.residues)
     trim_to_alanine(res);
 }
 
-// Functions for switching between long (>3 chars) residue names (CCD codes)
-// and shortened ones that are compatible with the PDB format.
+/// Convert long CCD codes to 3-letter abbreviations for PDB format compatibility.
+/// Refmac does similar shortening. Enables Refmac refinement with long residue names.
+/// @param st structure to modify
 GEMMI_DLL void shorten_ccd_codes(Structure& st);
+
+/// Restore full CCD codes from their 3-letter abbreviations.
+/// Inverse of shorten_ccd_codes(); uses stored abbreviation mappings.
+/// @param st structure to modify
 GEMMI_DLL void restore_full_ccd_codes(Structure& st);
 
-/// Modifies Entity::full_sequence. Uses only the first chain for each Entity.
+/// Annotate microheterogeneity positions in entity sequences.
+/// Identifies residue positions with multiple alternate conformations and
+/// marks them in Entity::full_sequence. Uses only the first chain for each entity.
+/// @param st structure to annotate
+/// @param overwrite if true, overwrite existing microheterogeneity annotations
 GEMMI_DLL void add_microhetero_to_sequences(Structure& st, bool overwrite=false);
 
+/// Assign sequential integer IDs to all TLS groups in the structure.
+/// @param st structure to annotate
 GEMMI_DLL void add_tls_group_ids(Structure& st);
 
 } // namespace gemmi

From c4fab63f751ac3acd680ebfb9b152d36ace8538d Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:06:53 +0200
Subject: [PATCH 14/19] docs(sfcalc.hpp): add full Doxygen API documentation

---
 include/gemmi/sfcalc.hpp | 82 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 10 deletions(-)

diff --git a/include/gemmi/sfcalc.hpp b/include/gemmi/sfcalc.hpp
index bab426ca5..22bfd7156 100644
--- a/include/gemmi/sfcalc.hpp
+++ b/include/gemmi/sfcalc.hpp
@@ -17,26 +17,42 @@
 
 namespace gemmi {
 
-// calculate part of the structure factor: exp(2 pi i r * s)
+/// @brief Compute exp(2πi r·s) for one atom at fractional position for a reflection.
+/// @param fpos Fractional coordinates of the atom.
+/// @param hkl Miller indices of the reflection.
+/// @return Complex phase factor.
 inline std::complex<double> calculate_sf_part(const Fractional& fpos,
                                               const Miller& hkl) {
   double arg = 2 * pi() * (hkl[0]*fpos.x + hkl[1]*fpos.y + hkl[2]*fpos.z);
   return std::complex<double>{std::cos(arg), std::sin(arg)};
 }
 
+/// @brief Calculates structure factors by direct summation over all atoms.
+/// @tparam Table Scattering factor table type (e.g., IT92, WK95, ElectronTable).
+///         Must have a nested Coef type with coef_type member.
 template <typename Table>
 class StructureFactorCalculator {
 public:
   using coef_type = typename Table::Coef::coef_type;
 
+  /// @brief Initialize with the unit cell.
+  /// @param cell Unit cell; a reference is stored for use in subsequent calculations.
   StructureFactorCalculator(const UnitCell& cell) : cell_(cell) {}
 
+  /// @brief Cache (sin θ/λ)² for hkl and clear the scattering factor cache.
+  /// Must be called before computing structure factors for a new reflection.
+  /// @param hkl Miller indices of the reflection.
   void set_stol2_and_scattering_factors(const Miller& hkl) {
     stol2_ = (coef_type) cell_.calculate_stol_sq(hkl);
     scattering_factors_.clear();
     scattering_factors_.resize(addends.size(), 0.);
   }
 
+  /// @brief Return scattering factor for element at current (sin θ/λ)².
+  /// Lazily computes and caches the result.
+  /// @param element The chemical element.
+  /// @param charge Formal charge (signed char; 0 = neutral atom).
+  /// @return Scattering factor value.
   double get_scattering_factor(Element element, signed char charge) {
     double& sfactor = scattering_factors_[element.ordinal()];
     if (sfactor == 0.) {
@@ -47,27 +63,48 @@ class StructureFactorCalculator {
     return sfactor;
   }
 
-  // Calculation of Debye-Waller factor with isotropic ADPs
+  /// @brief Isotropic Debye-Waller factor exp(-B·stol²) for a small-molecule site.
+  /// @param site Small-molecule site with u_iso field.
+  /// @return Debye-Waller factor value.
   double dwf_iso(const SmallStructure::Site& site) const {
     return std::exp(-u_to_b() * stol2_ * site.u_iso);
   }
+
+  /// @brief Isotropic Debye-Waller factor exp(-B·stol²) for a macromolecular atom.
+  /// @param atom Macromolecular atom with b_iso field.
+  /// @return Debye-Waller factor value.
   double dwf_iso(const Atom& atom) const {
     return std::exp(-stol2_ * atom.b_iso);
   }
 
-  // Calculation of Debye-Waller factor exp(-2 pi^2 s.U.s)
-  // cf. B. Rupp's book, p. 641 or RWGK & Adams 2002, J. Appl. Cryst. 35, 477
-  // Small molecule and macromolecular anisotropic U's are defined differently,
-  // so we have two functions.
+  /// @brief Anisotropic Debye-Waller factor exp(-2π²·s·U·s) for a small-molecule site.
+  /// Uses site.aniso, where the anisotropic U tensor is in crystallographic coordinates.
+  /// @param site Small-molecule site with aniso field.
+  /// @param hkl Miller indices of the reflection.
+  /// @return Debye-Waller factor value.
   double dwf_aniso(const SmallStructure::Site& site, const Vec3& hkl) const {
     Vec3 arh(cell_.ar * hkl.x, cell_.br * hkl.y, cell_.cr * hkl.z);
     return std::exp(-2 * pi() * pi() * site.aniso.r_u_r(arh));
   }
+
+  /// @brief Anisotropic Debye-Waller factor exp(-2π²·s·U·s) for a macromolecular atom.
+  /// Uses atom.aniso in orthogonal coordinates.
+  /// @param atom Macromolecular atom with aniso field.
+  /// @param hkl Miller indices of the reflection.
+  /// @return Debye-Waller factor value.
   double dwf_aniso(const Atom& atom, const Vec3& hkl) const {
     return std::exp(-2 * pi() * pi() *
                     atom.aniso.transformed_by<>(cell_.frac.mat).r_u_r(hkl));
   }
 
+  /// @brief Contribution of one atom to structure factor, given its scattering factor.
+  /// Accounts for Debye-Waller factor, occupancy, and phase.
+  /// @tparam Site SmallStructure::Site or Atom.
+  /// @param fract Fractional coordinates of the atom.
+  /// @param site Site/atom record containing occupancy and aniso data.
+  /// @param hkl Miller indices of the reflection.
+  /// @param sf Precomputed scattering factor for this atom.
+  /// @return Complex structure factor contribution.
   template<typename Site>
   std::complex<double> calculate_sf_from_atom_sf(const Fractional& fract,
                                                  const Site& site,
@@ -88,6 +125,13 @@ class StructureFactorCalculator {
     return oc_sf * sum;
   }
 
+  /// @brief Contribution of one atom to structure factor, with automatic scattering factor lookup.
+  /// Accounts for Debye-Waller factor, occupancy, and phase.
+  /// @tparam Site SmallStructure::Site or Atom.
+  /// @param fract Fractional coordinates of the atom.
+  /// @param site Site/atom record containing element and charge information.
+  /// @param hkl Miller indices of the reflection.
+  /// @return Complex structure factor contribution.
   template<typename Site>
   std::complex<double> calculate_sf_from_atom(const Fractional& fract,
                                               const Site& site,
@@ -96,6 +140,10 @@ class StructureFactorCalculator {
     return calculate_sf_from_atom_sf(fract, site, hkl, atom_sf);
   }
 
+  /// @brief Sum contributions from all atoms in a Model for the given reflection.
+  /// @param model The macromolecular model.
+  /// @param hkl Miller indices of the reflection.
+  /// @return Total structure factor.
   std::complex<double> calculate_sf_from_model(const Model& model, const Miller& hkl) {
     std::complex<double> sf = 0.;
     set_stol2_and_scattering_factors(hkl);
@@ -106,7 +154,12 @@ class StructureFactorCalculator {
     return sf;
   }
 
-  // Z part of Mott-Bethe formula (when need to use different model)
+  /// @brief Compute Z (atomic number sum) component for Mott-Bethe conversion.
+  /// Used when a different model is needed for the Z calculation.
+  /// @param model The macromolecular model.
+  /// @param hkl Miller indices of the reflection.
+  /// @param only_h If true, restrict calculation to hydrogen atoms.
+  /// @return Z component of structure factor.
   std::complex<double> calculate_mb_z(const Model& model, const Miller& hkl, bool only_h) {
     std::complex<double> sf = 0.;
     stol2_ = (coef_type) cell_.calculate_stol_sq(hkl);
@@ -119,12 +172,19 @@ class StructureFactorCalculator {
     return sf;
   }
 
+  /// @brief Return the Mott-Bethe prefactor at current (sin θ/λ)².
+  /// Used to convert X-ray structure factors to electron structure factors.
+  /// @return Mott-Bethe factor: -mott_bethe_const() / (4·stol²).
   double mott_bethe_factor() const {
     return -mott_bethe_const() / 4 / stol2_;
   }
 
-  // The occupancy is assumed to take into account symmetry,
-  // i.e. to be fractional if the atom is on special position.
+  /// @brief Sum contributions from all sites in a SmallStructure for the given reflection.
+  /// The occupancy is assumed to account for symmetry (i.e., fractional if the atom
+  /// is on a special position).
+  /// @param small_st The small-molecule structure.
+  /// @param hkl Miller indices of the reflection.
+  /// @return Total structure factor.
   std::complex<double> calculate_sf_from_small_structure(const SmallStructure& small_st,
                                                          const Miller& hkl) {
     std::complex<double> sf = 0.;
@@ -139,7 +199,9 @@ class StructureFactorCalculator {
   coef_type stol2_;
   std::vector<double> scattering_factors_;
 public:
-  Addends addends;  // usually f' for X-rays
+  /// Addends object for anomalous scattering corrections (usually f' for X-rays).
+  /// Public and mutable by the caller.
+  Addends addends;
 };
 
 } // namespace gemmi

From 3eac55d4e91cc2be291b6b5f9711d7899c42fac9 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:07:01 +0200
Subject: [PATCH 15/19] docs(ecalc.hpp): add full Doxygen API documentation

---
 include/gemmi/ecalc.hpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/gemmi/ecalc.hpp b/include/gemmi/ecalc.hpp
index 9da667d98..812c55d9c 100644
--- a/include/gemmi/ecalc.hpp
+++ b/include/gemmi/ecalc.hpp
@@ -10,6 +10,16 @@
 
 namespace gemmi {
 
+/// @brief Compute per-reflection amplitude normalization factors for E-scale conversion.
+/// Uses the Karle approach: E = F / sqrt(Σ f²), with resolution-bin-based averaging and smoothing.
+/// @tparam DataProxy Type satisfying the data proxy interface: must provide size(), stride(),
+///         spacegroup(), unit_cell(), get_hkl(n), and get_num(n) methods.
+/// @param data Data proxy (e.g., MtzDataProxy or ReflDataProxy).
+/// @param fcol_idx Column index of F amplitudes in the proxy.
+/// @param binner Binner object defining resolution shells.
+/// @return Vector of multipliers (one per reflection); NaN for missing values.
+///         Algorithm: collects F² in bins, applies [0.75, 1, 0.75] smoothing kernel,
+///         returns 1/sqrt(<F²>) per reflection for normalization.
 template<typename DataProxy>
 std::vector<double> calculate_amplitude_normalizers(const DataProxy& data, int fcol_idx,
                                                     const Binner& binner) {

From 9c193f5b472b62bd7af806777dc186bc2fe0a547 Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:08:12 +0200
Subject: [PATCH 16/19] docs(scaling.hpp): add full Doxygen API documentation

---
 include/gemmi/scaling.hpp | 134 ++++++++++++++++++++++++++++----------
 1 file changed, 100 insertions(+), 34 deletions(-)

diff --git a/include/gemmi/scaling.hpp b/include/gemmi/scaling.hpp
index 630774c0b..84f2b74d5 100644
--- a/include/gemmi/scaling.hpp
+++ b/include/gemmi/scaling.hpp
@@ -13,17 +13,26 @@
 
 namespace gemmi {
 
+/// @brief Type alias for a symmetric 3×3 tensor represented as 6 coefficients.
+/// Stores (u11, u22, u33, u12, u13, u23).
 using Vec6 = std::array<double, 6>;
 
+/// @brief Dot product of Vec6 with a symmetric 3×3 matrix.
+/// Used for tensor contractions in ADP refinement.
+/// @param a Vec6 vector (6-element array).
+/// @param s Symmetric 3×3 matrix.
+/// @return Dot product result.
 inline double vec6_dot(const Vec6& a, const SMat33<double>& s) {
   return a[0] * s.u11 + a[1] * s.u22 + a[2] * s.u33
        + a[3] * s.u12 + a[4] * s.u13 + a[5] * s.u23;
 }
 
-/// Symmetry constraints of ADP.
-/// The number of rows is the number of independent coefficients in U.
-/// For example, for tetragonal crystal returns two normalized Vec6 vectors
-/// in directions [1 1 0 0 0 0] and [0 0 1 0 0 0].
+/// @brief Return the symmetry-adapted constraint vectors for the ADP tensor in a space group.
+/// The number and content of returned constraint vectors depend on the crystal system.
+/// For example, cubic returns one vector [1 1 1 0 0 0] (isotropic); tetragonal returns
+/// two vectors in directions [1 1 0 0 0 0] and [0 0 1 0 0 0] (diagonal u11=u22, u33 free).
+/// @param sg Pointer to the space group; if null, triclinic symmetry is assumed (6 free components).
+/// @return Vector of normalized Vec6 constraint vectors for independent ADP components.
 inline std::vector<Vec6> adp_symmetry_constraints(const SpaceGroup* sg) {
   auto constraints = [](const std::initializer_list<int>& l) {
     constexpr double K2 = 0.70710678118654752440;  // sqrt(1/2)
@@ -68,46 +77,58 @@ inline std::vector<Vec6> adp_symmetry_constraints(const SpaceGroup* sg) {
   unreachable();
 }
 
+/// @brief Anisotropic scaling of calculated structure factors to observed data.
+/// Optionally includes bulk solvent correction: Fc + k_sol·exp(-b_sol·stol²)·Fmask.
+/// Parameter refinement uses Levenberg-Marquardt (or NLopt if WITH_NLOPT is defined).
+/// @tparam Real Floating-point type (float or double).
 template<typename Real>
 struct Scaling {
+  /// @brief One reflection used in the least-squares fit.
   struct Point {
-    Miller hkl;
-    double stol2;
-    std::complex<Real> fcmol, fmask;
-    Real fobs, sigma;
+    Miller hkl;                            ///< Miller indices.
+    double stol2;                          ///< (sin θ/λ)² for this reflection.
+    std::complex<Real> fcmol, fmask;       ///< Calculated molecular and mask structure factors.
+    Real fobs, sigma;                      ///< Observed amplitude and its standard uncertainty.
 
     Miller get_x() const { return hkl; }
     double get_y() const { return fobs; }
     double get_weight() const { return 1.0 /* / sigma*/; }
   };
 
-  UnitCell cell;
-  // model parameters
-  double k_overall = 1.;
-  // b_star = F B_cart F^T, where F - fractionalization matrix
-  SMat33<double> b_star{0, 0, 0, 0, 0, 0};
-  std::vector<Vec6> constraint_matrix;
-  bool use_solvent = false;
-  bool fix_k_sol = false;
-  bool fix_b_sol = false;
-  // initialize with average values (Fokine & Urzhumtsev, 2002)
-  double k_sol = 0.35;
-  double b_sol = 46.0;
-  std::vector<Point> points;
-
+  UnitCell cell;                           ///< Unit cell parameters.
+  double k_overall = 1.;                   ///< Overall scale factor.
+  SMat33<double> b_star{0, 0, 0, 0, 0, 0}; ///< Anisotropic B* tensor in reciprocal space.
+  std::vector<Vec6> constraint_matrix;     ///< Symmetry constraints on b_star.
+  bool use_solvent = false;                ///< If true, include bulk solvent correction.
+  bool fix_k_sol = false;                  ///< If true, do not refine k_sol.
+  bool fix_b_sol = false;                  ///< If true, do not refine b_sol.
+  double k_sol = 0.35;                     ///< Bulk solvent scale factor.
+  double b_sol = 46.0;                     ///< Bulk solvent B-factor.
+  std::vector<Point> points;               ///< Reflection data for fitting.
+
+  /// @brief Initialize with unit cell and space group.
+  /// Sets up constraint_matrix from crystal system symmetry.
+  /// @param cell_ Unit cell parameters.
+  /// @param sg Pointer to space group (for symmetry constraints).
   Scaling(const UnitCell& cell_, const SpaceGroup* sg)
       : cell(cell_), constraint_matrix(adp_symmetry_constraints(sg)) {}
 
-  // B_{overall} is stored as B* not B_{cartesian}.
-  // Use getter and setter to convert from/to B_{cartesian}.
+  /// @brief Set b_star from a real-space B matrix.
+  /// Converts from Cartesian coordinates to reciprocal-space B*.
+  /// @param b_overall B matrix in real (Cartesian) space.
   void set_b_overall(const SMat33<double>& b_overall) {
     b_star = b_overall.transformed_by(cell.frac.mat);
   }
+
+  /// @brief Get b_star converted back to real (Cartesian) space.
+  /// @return B matrix in Cartesian coordinates.
   SMat33<double> get_b_overall() const {
     return b_star.transformed_by(cell.orth.mat);
   }
 
-  // Scale data, optionally adding bulk solvent correction.
+  /// @brief Apply scaling to all reflections in-place, optionally adding bulk solvent correction.
+  /// @param asu_data ASU data to scale (modified in-place).
+  /// @param mask_data Mask (solvent) data; required if use_solvent is true, otherwise may be null.
   void scale_data(AsuData<std::complex<Real>>& asu_data,
                   const AsuData<std::complex<Real>>* mask_data) const {
     if (use_solvent && !(mask_data && mask_data->size() == asu_data.size()))
@@ -126,6 +147,12 @@ struct Scaling {
     }
   }
 
+  /// @brief Compute scaled value for one reflection.
+  /// Applies overall scale factor and optionally bulk solvent correction.
+  /// @param hkl Miller indices.
+  /// @param f_value Calculated molecular structure factor.
+  /// @param mask_value Mask (solvent) structure factor.
+  /// @return Scaled complex structure factor.
   std::complex<Real> scale_value(const Miller& hkl, std::complex<Real> f_value,
                                  std::complex<Real> mask_value) {
     if (use_solvent) {
@@ -135,6 +162,9 @@ struct Scaling {
     return f_value * (Real) get_overall_scale_factor(hkl);
   }
 
+  /// @brief Return current parameters as a flat vector.
+  /// Includes k_overall, b_star components (via constraint_matrix), and optionally k_sol and b_sol.
+  /// @return Vector of parameters in order: k_overall, [k_sol], [b_sol], b_star_components.
   std::vector<double> get_parameters() const {
     std::vector<double> ret;
     ret.push_back(k_overall);
@@ -149,7 +179,9 @@ struct Scaling {
     return ret;
   }
 
-  /// set k_overall, k_sol, b_sol, b_star
+  /// @brief Set parameters from pointer or vector.
+  /// Updates k_overall, b_star components (via constraint_matrix), and optionally k_sol and b_sol.
+  /// @param p Pointer to parameter array (order: k_overall, [k_sol], [b_sol], b_star_components).
   void set_parameters(const double* p) {
     k_overall = p[0];
     int n = 0;
@@ -171,11 +203,17 @@ struct Scaling {
     }
   }
 
+  /// @brief Set parameters from vector.
+  /// @param p Vector of parameters.
   void set_parameters(const std::vector<double>& p) {
     set_parameters(p.data());
   }
 
-  // pre: all AsuData args are sorted
+  /// @brief Populate points from matching reflections in calc and obs datasets.
+  /// Precondition: all AsuData arguments must be sorted by Miller indices.
+  /// @param calc Calculated structure factors.
+  /// @param obs Observed amplitudes and sigmas.
+  /// @param mask_data Mask structure factors; required if use_solvent is true.
   void prepare_points(const AsuData<std::complex<Real>>& calc,
                       const AsuData<ValueSigma<Real>>& obs,
                       const AsuData<std::complex<Real>>* mask_data) {
@@ -210,21 +248,32 @@ struct Scaling {
   }
 
 
+  /// @brief Compute k_sol * exp(-b_sol * stol²) for bulk solvent correction.
+  /// @param stol2 (sin θ/λ)² value.
+  /// @return Solvent scale factor.
   double get_solvent_scale(double stol2) const {
     return k_sol * std::exp(-b_sol * stol2);
   }
 
+  /// @brief Compute k_overall * exp(-b_star:hkl) for overall scale factor.
+  /// @param hkl Miller indices.
+  /// @return Overall scale factor including anisotropic B*.
   double get_overall_scale_factor(const Miller& hkl) const {
     return k_overall * std::exp(-0.25 * b_star.r_u_r(hkl));
   }
 
+  /// @brief Compute total calculated structure factor for point p.
+  /// Includes molecular part and optionally bulk solvent correction.
+  /// @param p Reflection point.
+  /// @return Total Fcalc = Fcmol + k_sol·exp(-b_sol·stol²)·Fmask (or just Fcmol if no solvent).
   std::complex<Real> get_fcalc(const Point& p) const {
     if (!use_solvent)
       return p.fcmol;
     return p.fcmol + (Real)get_solvent_scale(p.stol2) * p.fmask;
   }
 
-  // quick linear fit (ignoring sigma) to get initial k_overall and isotropic B
+  /// @brief Quick linear least-squares fit of isotropic B only (ignoring sigma).
+  /// Used for initialization before full refinement.
   void fit_isotropic_b_approximately() {
     double sx = 0, sy = 0, sxx = 0, sxy = 0;
     int n = 0;
@@ -249,7 +298,8 @@ struct Scaling {
     set_b_overall({b_iso, b_iso, b_iso, 0, 0, 0});
   }
 
-  // least-squares fitting of k_overall only
+  /// @brief Compute optimal k_overall by linear least squares.
+  /// @return Optimal scale factor.
   double lsq_k_overall() const {
     double sxx = 0, sxy = 0;
     for (const Point& p : points) {
@@ -263,10 +313,9 @@ struct Scaling {
     return sxx != 0. ? sxy / sxx : 1.;
   }
 
-  // For testing only, don't use it.
-  // Estimates anisotropic b_star using other parameters (incl. isotropic B),
-  // following P. Afonine et al, doi:10.1107/S0907444913000462 sec. 2.1.
-  // The symmetry constraints are not implemented - don't use it!
+  /// @brief Fit anisotropic B* by linear approximation (for testing/initialization only).
+  /// DO NOT USE for production: symmetry constraints are not implemented.
+  /// Based on P. Afonine et al., doi:10.1107/S0907444913000462, section 2.1.
   void fit_b_star_approximately() {
     double b_iso = 1/3. * get_b_overall().trace();
     //size_t nc = constraint_matrix.size();
@@ -297,11 +346,15 @@ struct Scaling {
     //printf("fitted B = {%g %g %g  %g %g %g}\n", e[0], e[1], e[2], e[3], e[4], e[5]);
   }
 
+  /// @brief Full Levenberg-Marquardt optimization of all parameters.
+  /// @return Final R factor.
   double fit_parameters() {
     LevMar levmar;
     return levmar.fit(*this);
   }
 
+  /// @brief Compute R-factor: Σ|Fobs - Fcalc| / Σ|Fobs|.
+  /// @return R-factor value.
   double calculate_r_factor() const {
     double abs_diff_sum = 0;
     double denom = 0;
@@ -312,11 +365,17 @@ struct Scaling {
     return abs_diff_sum / denom;
   }
 
-  // interface for fitting
+  /// @brief Compute Fcalc for point p (interface for fitting).
+  /// @param p Reflection point.
+  /// @return Scaled calculated structure factor amplitude.
   double compute_value(const Point& p) const {
     return std::abs(get_fcalc(p)) * (Real) get_overall_scale_factor(p.hkl);
   }
 
+  /// @brief Compute Fcalc and its derivatives with respect to all parameters.
+  /// @param p Reflection point.
+  /// @param dy_da Output vector to fill with derivatives [dy/dk_overall, dy/dk_sol, dy/db_sol, dy/db_star_components].
+  /// @return Calculated structure factor amplitude.
   double compute_value_and_derivatives(const Point& p, std::vector<double>& dy_da) const {
     Vec3 h(p.hkl);
     double kaniso = std::exp(-0.25 * b_star.r_u_r(h));
@@ -353,6 +412,13 @@ struct Scaling {
   }
 };
 
+/// @brief Fit scaling parameters using NLopt with the named optimizer.
+/// Only available when compiled with WITH_NLOPT.
+/// @tparam Real Floating-point type (float or double).
+/// @param scaling The Scaling object to optimize.
+/// @param optimizer Name of the optimizer algorithm (NLopt algorithm string).
+/// @return Final residual value (WSSR).
+/// @note This function is for testing and evaluation purposes.
 // only for testing and evaluation - scaling with NLOpt
 #if WITH_NLOPT
 namespace impl {

From fc6cd20d918bc9128a8c477955d485664de3247b Mon Sep 17 00:00:00 2001
From: "C. Vonrhein" <CV-GPhL>
Date: Wed, 22 Apr 2026 23:11:28 +0200
Subject: [PATCH 17/19] docs(dssp.hpp): add full Doxygen API documentation

Add comprehensive Doxygen documentation comments to all public items in
the DSSP secondary structure assignment module:
- All enum types and values (SecondaryStructure, TurnType, HelixPosition,
  BridgeType, HydrogenMode, HBondDefinition)
- All public structs and their data members (Bridge, HBond,
  SecondaryStructureInfo, DsspOptions, DsspCalculator)
- All public methods with @param, @return, and @brief tags
- The calculate_dssp convenience function
- Used @tparam and other tags as appropriate for clarity

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 include/gemmi/dssp.hpp | 190 ++++++++++++++++++++++++++---------------
 1 file changed, 122 insertions(+), 68 deletions(-)

diff --git a/include/gemmi/dssp.hpp b/include/gemmi/dssp.hpp
index ddcaef76e..7af6ea651 100644
--- a/include/gemmi/dssp.hpp
+++ b/include/gemmi/dssp.hpp
@@ -12,89 +12,101 @@
 
 namespace gemmi {
 
-// Secondary structure types as defined in DSSP
+/// @brief Secondary structure type codes as defined in DSSP.
 enum class SecondaryStructure : char {
-  Loop = '~',        // Loop/coil
-  Break = '=',       // Break
-  Bend = 'S',        // Bend
-  Turn = 'T',        // Turn
-  Helix_PP = 'P',    // Polyproline helix
-  Helix_5 = 'I',     // Pi helix (5-turn)
-  Helix_3 = 'G',     // 3-10 helix
-  Strand = 'E',      // Extended strand
-  Bridge = 'B',      // Beta bridge
-  Helix_4 = 'H'      // Alpha helix (4-turn)
+  Loop = '~',        /// Coil/loop (unassigned secondary structure)
+  Break = '=',       /// Chain break
+  Bend = 'S',        /// Bend (local geometry criterion)
+  Turn = 'T',        /// Hydrogen-bonded turn
+  Helix_PP = 'P',    /// Polyproline II helix
+  Helix_5 = 'I',     /// π-helix (5-turn)
+  Helix_3 = 'G',     /// 3₁₀-helix (3-turn)
+  Strand = 'E',      /// Extended β-strand
+  Bridge = 'B',      /// Isolated β-bridge
+  Helix_4 = 'H'      /// α-helix (4-turn)
 };
 
-// Turn types
+/// @brief Hydrogen bond turn types, indexed by residue separation (i to i+n).
 enum class TurnType {
-  Turn_3 = 3,
-  Turn_4 = 4,
-  Turn_5 = 5,
-  Turn_PP = 6
+  Turn_3 = 3,   /// 3-residue turn (i to i+3 hydrogen bond)
+  Turn_4 = 4,   /// 4-residue turn (i to i+4 hydrogen bond)
+  Turn_5 = 5,   /// 5-residue turn (i to i+5 hydrogen bond)
+  Turn_PP = 6   /// Polyproline II type turn (i to i+6)
 };
 
-// Helix positions
+/// @brief Position of a residue within a helix.
 enum class HelixPosition {
-  None = 0,
-  Start,
-  Middle,
-  End,
-  StartAndEnd
+  None = 0,       /// Not part of a helix
+  Start,          /// First residue of a helix
+  Middle,         /// Interior residue of a helix
+  End,            /// Last residue of a helix
+  StartAndEnd     /// Single-residue helix (acts as both start and end)
 };
 
-// Bridge types
+/// @brief Type of β-bridge partnership between residues.
 enum class BridgeType {
-  None = 0,
-  Parallel,
-  AntiParallel
+  None = 0,       /// No bridge
+  Parallel,       /// Parallel β-bridge
+  AntiParallel    /// Antiparallel β-bridge
 };
 
+/// @brief A simple β-bridge record between two residues.
 struct Bridge {
-    size_t partner1;
-    size_t partner2;
-    BridgeType type;
+    size_t partner1;     /// Chain index of the first bridge residue
+    size_t partner2;     /// Chain index of the second bridge residue
+    BridgeType type;     /// Type of bridge (Parallel or AntiParallel)
 };
 
-// Hydrogen bond modes
+/// @brief Source of hydrogen atoms for hydrogen bond detection.
 enum class HydrogenMode {
-  Existing = 0,  // Use existing hydrogen atoms from structure
-  Calculate      // Calculate hydrogen positions (original DSSP method)
+  Existing = 0,  /// Use hydrogen atoms already present in the structure
+  Calculate      /// Compute idealized hydrogen positions (original DSSP method, for structures lacking explicit H)
 };
 
-// Hydrogen bond definition
+/// @brief Criterion for hydrogen bond acceptance.
 enum class HBondDefinition {
-  Energy = 0,   // Energy-based (original DSSP)
-  Geometry      // Geometry-based (distance + angle)
+  Energy = 0,   /// Use Kabsch-Sander energy criterion (E < cutoff in kcal/mol)
+  Geometry      /// Use geometric criteria (distance and angle thresholds)
 };
 
+/// @brief Record of one hydrogen bond detected by DSSP.
 struct GEMMI_DLL HBond {
-  Topo::ResInfo *donor = nullptr, *acceptor = nullptr;
-  char alt1 = '\0';
-  char alt2 = '\0';
-  double energy = 0;
-  bool is_valid = false;
+  Topo::ResInfo *donor = nullptr;       /// Pointer to the donor residue (NH group)
+  Topo::ResInfo *acceptor = nullptr;    /// Pointer to the acceptor residue (C=O group)
+  char alt1 = '\0';                      /// Alternate conformer of the donor atom
+  char alt2 = '\0';                      /// Alternate conformer of the acceptor atom
+  double energy = 0;                     /// Hydrogen bond energy in kcal/mol (for Energy mode); NaN if not applicable
+  bool is_valid = false;                 /// True if this bond meets the cutoff criterion
 };
 
-// Per-residue secondary structure information
+/// @brief Per-residue secondary structure annotation and structural features.
 struct GEMMI_DLL SecondaryStructureInfo {
-  SecondaryStructure ss_type = SecondaryStructure::Loop;
-  std::vector<size_t> parallel_bridges;
-  std::vector<size_t> antiparallel_bridges;
-  std::vector<SecondaryStructureInfo*> break_partners;
+  SecondaryStructure ss_type = SecondaryStructure::Loop;  /// Assigned secondary structure code
+  std::vector<size_t> parallel_bridges;                   /// Chain indices of residues forming parallel β-bridges with this residue
+  std::vector<size_t> antiparallel_bridges;               /// Chain indices of residues forming antiparallel β-bridges with this residue
+  std::vector<SecondaryStructureInfo*> break_partners;    /// Pointers to SecondaryStructureInfo of chain-break partners
   std::array<HelixPosition, 4> helix_positions = {HelixPosition::None, HelixPosition::None,
-                                                  HelixPosition::None, HelixPosition::None};
-  bool has_break = false;
-  bool nturn_acceptor = false;
+                                                  HelixPosition::None, HelixPosition::None};  /// Helix position for each turn type (indexed by TurnType-3)
+  bool has_break = false;                                 /// True if there is a chain break before this residue
+  bool nturn_acceptor = false;                            /// True if this residue is the acceptor of an n-turn hydrogen bond
 
+  /// @brief Set the helix position for a given turn type.
+  /// @param turn TurnType value (Turn_3, Turn_4, Turn_5, or Turn_PP)
+  /// @param pos HelixPosition value
   void set_helix_position(TurnType turn, HelixPosition pos) {
     helix_positions[static_cast<int>(turn) - 3] = pos;
   }
 
+  /// @brief Retrieve the helix position for a given turn type.
+  /// @param turn TurnType value
+  /// @return HelixPosition at the specified turn
   HelixPosition get_helix_position(TurnType turn) const {
     return helix_positions[static_cast<int>(turn) - 3];
   }
 
+  /// @brief Record a β-bridge partnership with another residue.
+  /// @param partner_idx Chain index of the bridge partner
+  /// @param type BridgeType (Parallel or AntiParallel)
   void add_bridge(size_t partner_idx, BridgeType type) {
     if (type == BridgeType::Parallel) {
       parallel_bridges.push_back(partner_idx);
@@ -103,43 +115,65 @@ struct GEMMI_DLL SecondaryStructureInfo {
     }
   }
 
+  /// @brief Check if this residue has any bridges of the given type.
+  /// @param type BridgeType to check
+  /// @return True if this residue has at least one bridge of the given type
   bool has_bridges(BridgeType type) const {
     return type == BridgeType::Parallel ? !parallel_bridges.empty()
                                        : !antiparallel_bridges.empty();
   }
 };
 
-// DSSP options/parameters
+/// @brief Configuration parameters for the DSSP secondary structure algorithm.
 struct GEMMI_DLL DsspOptions {
-  HydrogenMode hydrogen_mode = HydrogenMode::Calculate;
-  HBondDefinition hbond_definition = HBondDefinition::Energy;
-  double cutoff = 0.9;  // nm
-  bool pi_helix_preference = true;
-  bool search_polyproline = true;
-  bool shortened_pp_stretch = false;
-  double hbond_energy_cutoff = -0.5;  // kcal/mol
-  double min_ca_distance = 9.0;  // Angstrom
-  double bend_angle_min = 70.0;  // degrees
-  double max_peptide_bond_distance = 2.5;  // Angstrom
+  HydrogenMode hydrogen_mode = HydrogenMode::Calculate;  /// How to source hydrogen atoms (Existing or Calculate)
+  HBondDefinition hbond_definition = HBondDefinition::Energy;  /// Criterion for hydrogen bond acceptance (Energy or Geometry)
+  double cutoff = 0.9;  /// Distance cutoff in nm for hydrogen bond search (default 0.9)
+  bool pi_helix_preference = true;  /// If true, prefer π-helix over α-helix at ambiguous positions
+  bool search_polyproline = true;  /// If true, detect polyproline II helices
+  bool shortened_pp_stretch = false;  /// If true, allow polyproline stretches shorter than canonical
+  double hbond_energy_cutoff = -0.5;  /// Energy threshold in kcal/mol for hydrogen bond acceptance (default -0.5)
+  double min_ca_distance = 9.0;  /// Minimum Cα–Cα distance in Angstrom to consider residues potentially bonded (default 9.0)
+  double bend_angle_min = 70.0;  /// Minimum bend angle in degrees to assign Bend secondary structure (default 70.0)
+  double max_peptide_bond_distance = 2.5;  /// Maximum C–N distance in Angstrom considered a valid peptide bond (default 2.5)
 };
 
-// Main DSSP calculator class
+/// @brief Main DSSP engine that performs secondary structure assignment for a protein chain.
+/// Detects and annotates secondary structure elements (helices, strands, turns, etc.)
+/// based on hydrogen bond patterns and geometry.
 struct GEMMI_DLL DsspCalculator {
+  /// @brief Initialize the calculator with options.
+  /// @param opts DsspOptions configuration (default: standard DSSP parameters)
   explicit DsspCalculator(const DsspOptions& opts = DsspOptions{}) : options(opts) {}
 
-  // Calculate secondary structure for a chain
+  /// @brief Run the full DSSP secondary structure assignment for a chain.
+  /// Requires a populated NeighborSearch for spatial lookups.
+  /// @param ns NeighborSearch object with the protein atoms indexed
+  /// @param chain_info Topo::ChainInfo containing residue topology
+  /// @return Secondary structure string, one character per residue
   std::string calculate_secondary_structure(NeighborSearch& ns, Topo::ChainInfo& chain_info);
 
-  // Get detailed secondary structure information
+  /// @brief Access the per-residue secondary structure information.
+  /// @return Const reference to the vector of SecondaryStructureInfo
   const std::vector<SecondaryStructureInfo>& get_detailed_info() const { return ss_info; }
 
-  DsspOptions options;
-  std::vector<SecondaryStructureInfo> ss_info;
-  std::vector<Bridge> bridges_;
+  DsspOptions options;                              /// Configuration options for this calculation
+  std::vector<SecondaryStructureInfo> ss_info;      /// Per-residue secondary structure and structural features (populated after calculate_secondary_structure)
+  std::vector<Bridge> bridges_;                      /// List of all detected β-bridges
 
-  // Calculate hydrogen bonds
+  /// @brief Detect and record all hydrogen bonds in the chain.
+  /// @param ns NeighborSearch object with atoms indexed
+  /// @param chain_info Topo::ChainInfo containing residue topology
   void calculate_hydrogen_bonds(NeighborSearch& ns, Topo::ChainInfo& chain_info);
+
+  /// @brief Compute and store Kabsch-Sander hydrogen bond energy between donor and acceptor.
+  /// @param donor Topo::ResInfo pointer for the NH (donor) residue
+  /// @param acceptor Topo::ResInfo pointer for the C=O (acceptor) residue
   void calculate_hbond_energy(Topo::ResInfo* donor, Topo::ResInfo* acceptor);
+
+  /// @brief Evaluate geometric hydrogen bond criterion (distance and angle) between donor and acceptor.
+  /// @param donor Topo::ResInfo pointer for the NH (donor) residue
+  /// @param acceptor Topo::ResInfo pointer for the C=O (acceptor) residue
   void calculate_hbond_geometry(Topo::ResInfo* donor, Topo::ResInfo* acceptor);
 
   /*
@@ -150,16 +184,36 @@ struct GEMMI_DLL DsspCalculator {
   void find_polyproline_helices(Topo::ChainInfo& chain_info);
   */
 
-  // Utility functions
+  /// @brief Check if a valid hydrogen bond exists from donor to acceptor.
+  /// @param donor Topo::ResInfo pointer for the donor
+  /// @param acceptor Topo::ResInfo pointer for the acceptor
+  /// @return True if a hydrogen bond exists between the residues
   bool has_hbond_between(Topo::ResInfo* donor, Topo::ResInfo* acceptor) const;
+
+  /// @brief Check for chain breaks between two residue indices.
+  /// @param chain_info Topo::ChainInfo containing residue topology
+  /// @param res1_idx Chain index of the first residue
+  /// @param res2_idx Chain index of the second residue
+  /// @return True if no chain breaks exist between res1_idx and res2_idx
   bool no_chain_breaks_between(Topo::ChainInfo& chain_info, size_t res1_idx, size_t res2_idx) const;
+
+  /// @brief Determine the type of β-bridge between two residues.
+  /// @param chain_info Topo::ChainInfo containing residue topology
+  /// @param res1_idx Chain index of the first residue
+  /// @param res2_idx Chain index of the second residue
+  /// @return BridgeType (Parallel, AntiParallel, or None)
   BridgeType calculate_bridge_type(Topo::ChainInfo& chain_info, size_t res1_idx, size_t res2_idx) const;
 
   // Generate final secondary structure string
   //std::string generate_ss_string() const;
 };
 
-// Convenience function for simple use cases
+/// @brief Convenience function to calculate secondary structure with default or custom options.
+/// Creates a DsspCalculator with the given options and runs the full secondary structure assignment.
+/// @param ns NeighborSearch object with atoms indexed for spatial lookups
+/// @param cinfo Topo::ChainInfo containing the chain topology
+/// @param opts DsspOptions configuration (default: standard DSSP parameters)
+/// @return Secondary structure string, one character per residue
 GEMMI_DLL std::string calculate_dssp(NeighborSearch& ns, Topo::ChainInfo& cinfo,
                                      const DsspOptions& opts = DsspOptions{});
 

From 558543305335a151f679de158de13b16ecb27982 Mon Sep 17 00:00:00 2001
From: Clemens Vonrhein <vonrhein@globalphasing.com>
Date: Wed, 22 Apr 2026 23:18:45 +0200
Subject: [PATCH 18/19] docs(api.rst): add Calculations and Analysis +
 Structure Factor sections (PR 7)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docs/api.rst | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/docs/api.rst b/docs/api.rst
index 2386adfca..ee89d7e99 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -29,3 +29,52 @@ Map and Grid Data
 
 .. doxygenfile:: grid.hpp
    :project: gemmi
+
+Calculations and Analysis
+-------------------------
+
+Geometric calculations, sequence alignment, structure superposition,
+neighbour search, contact detection, biological assembly, atom selection,
+structure modification, polymer heuristics, and secondary structure assignment.
+
+.. doxygenfile:: calculate.hpp
+   :project: gemmi
+
+.. doxygenfile:: align.hpp
+   :project: gemmi
+
+.. doxygenfile:: neighbor.hpp
+   :project: gemmi
+
+.. doxygenfile:: contact.hpp
+   :project: gemmi
+
+.. doxygenfile:: assembly.hpp
+   :project: gemmi
+
+.. doxygenfile:: select.hpp
+   :project: gemmi
+
+.. doxygenfile:: modify.hpp
+   :project: gemmi
+
+.. doxygenfile:: polyheur.hpp
+   :project: gemmi
+
+.. doxygenfile:: dssp.hpp
+   :project: gemmi
+
+Structure Factor Calculations
+-----------------------------
+
+Direct structure factor summation, amplitude normalisation (F→E),
+and anisotropic scaling with optional bulk-solvent correction.
+
+.. doxygenfile:: sfcalc.hpp
+   :project: gemmi
+
+.. doxygenfile:: ecalc.hpp
+   :project: gemmi
+
+.. doxygenfile:: scaling.hpp
+   :project: gemmi

From d69b16cf8a2f6bf9ebcd83f4683759991d3f1a49 Mon Sep 17 00:00:00 2001
From: Clemens Vonrhein <vonrhein@globalphasing.com>
Date: Thu, 23 Apr 2026 15:09:09 +0200
Subject: [PATCH 19/19] docs: add @par References citations to calculations
 headers

sfcalc.hpp: Bourhis et al. (2015) for structure factor computation context.
scaling.hpp: Afonine et al. (2012) for anisotropic scaling;
  Fokine/Urzhumtsev (2002) for initial bulk-solvent parameters.
calculate.hpp: Merritt (2011) for B_est formula.
dssp.hpp: Kabsch/Sander (1983) for DSSP algorithm.

Co-authored-by: C. Vonrhein / CV-GPhL
---
 include/gemmi/calculate.hpp |  5 +++--
 include/gemmi/dssp.hpp      |  8 ++++++--
 include/gemmi/scaling.hpp   | 10 +++++++++-
 include/gemmi/sfcalc.hpp    |  8 ++++++++
 4 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/gemmi/calculate.hpp b/include/gemmi/calculate.hpp
index 7569fbaec..7e2a3883d 100644
--- a/include/gemmi/calculate.hpp
+++ b/include/gemmi/calculate.hpp
@@ -188,10 +188,11 @@ inline Box<Fractional> calculate_fractional_box(const Structure& st, double marg
 
 
 /// @brief Calculate B_equiv from anisotropic B-tensor (or B_iso if isotropic).
-/// Based on E. Merritt, "Some B_eq are more equivalent than others",
-/// Acta Cryst. A67, 512 (2011).
 /// @param atom Atom with (possibly anisotropic) B-factor
 /// @return B_equiv = sqrt((sum_eigenvalues) / (sum_inverse_eigenvalues)) * u_to_b()
+/// @par References
+/// Merritt, E.A. (2011). Some B_eq are more equivalent than others.
+/// Acta Cryst. A67, 512–516. https://doi.org/10.1107/S0108767311034350
 inline double calculate_b_est(const Atom& atom) {
   auto eig = atom.aniso.calculate_eigenvalues();
   return u_to_b() * std::sqrt((eig[0] + eig[1] + eig[2]) /
diff --git a/include/gemmi/dssp.hpp b/include/gemmi/dssp.hpp
index 7af6ea651..b714ccb35 100644
--- a/include/gemmi/dssp.hpp
+++ b/include/gemmi/dssp.hpp
@@ -139,8 +139,12 @@ struct GEMMI_DLL DsspOptions {
 };
 
 /// @brief Main DSSP engine that performs secondary structure assignment for a protein chain.
-/// Detects and annotates secondary structure elements (helices, strands, turns, etc.)
-/// based on hydrogen bond patterns and geometry.
+/// @details Detects and annotates secondary structure elements (helices, strands, turns, etc.)
+/// based on hydrogen bond patterns and geometry, following the original DSSP algorithm.
+/// @par References
+/// Kabsch, W. & Sander, C. (1983). Dictionary of protein secondary structure:
+/// pattern recognition of hydrogen-bonded and geometrical features.
+/// Biopolymers 22, 2577–2637. https://doi.org/10.1002/bip.360221211
 struct GEMMI_DLL DsspCalculator {
   /// @brief Initialize the calculator with options.
   /// @param opts DsspOptions configuration (default: standard DSSP parameters)
diff --git a/include/gemmi/scaling.hpp b/include/gemmi/scaling.hpp
index 84f2b74d5..ede7e45ee 100644
--- a/include/gemmi/scaling.hpp
+++ b/include/gemmi/scaling.hpp
@@ -78,9 +78,17 @@ inline std::vector<Vec6> adp_symmetry_constraints(const SpaceGroup* sg) {
 }
 
 /// @brief Anisotropic scaling of calculated structure factors to observed data.
-/// Optionally includes bulk solvent correction: Fc + k_sol·exp(-b_sol·stol²)·Fmask.
+/// @details Optionally includes bulk solvent correction: Fc + k_sol·exp(-b_sol·stol²)·Fmask.
 /// Parameter refinement uses Levenberg-Marquardt (or NLopt if WITH_NLOPT is defined).
 /// @tparam Real Floating-point type (float or double).
+/// @par References
+/// Afonine, P.V. et al. (2012). Towards automated crystallographic structure
+/// refinement with phenix.refine. Acta Cryst. D68, 352–367.
+/// https://doi.org/10.1107/S0907444913000462
+///
+/// Fokine, A. & Urzhumtsev, A. (2002). Flat bulk-solvent model: obtaining
+/// optimal parameters. Acta Cryst. A58, 384–392.
+/// https://doi.org/10.1107/S0108767302005669
 template<typename Real>
 struct Scaling {
   /// @brief One reflection used in the least-squares fit.
diff --git a/include/gemmi/sfcalc.hpp b/include/gemmi/sfcalc.hpp
index 22bfd7156..d3d79d9f9 100644
--- a/include/gemmi/sfcalc.hpp
+++ b/include/gemmi/sfcalc.hpp
@@ -28,8 +28,16 @@ inline std::complex<double> calculate_sf_part(const Fractional& fpos,
 }
 
 /// @brief Calculates structure factors by direct summation over all atoms.
+/// @details Simple direct summation; for optimised FFT-based calculations see
+/// dencalc.hpp + fourier.hpp.
 /// @tparam Table Scattering factor table type (e.g., IT92, WK95, ElectronTable).
 ///         Must have a nested Coef type with coef_type member.
+/// @par References
+/// Bourhis, L.J., Dolomanov, O.V., Gildea, R.J., Howard, J.A.K. &
+/// Puschmann, H. (2015). The anatomy of a comprehensive constrained,
+/// restrained refinement program for the modern computing environment —
+/// Olex2 dissected. Acta Cryst. A70, 300–311.
+/// https://doi.org/10.1107/S2053273314022207
 template <typename Table>
 class StructureFactorCalculator {
 public: