diff --git a/.appveyor.yml b/.appveyor.yml index f44d333f..9241f29b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -53,7 +53,7 @@ for: - py -m unittest discover -v -s tests/ - cd docs - set PYTHONPATH=.. - - py -m pip install --no-warn-script-location sphinx sphinx-inline-tabs + - py -m pip install --no-warn-script-location sphinx sphinx-inline-tabs breathe - py -m sphinx -M doctest . _build -n -E artifacts: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f399317c..fdfc9c30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,6 +217,22 @@ jobs: - name: run tests under valgrind run: PYTHONMALLOC=malloc valgrind python3 -m unittest discover -v -s tests/ + docs: + name: "Docs build (Doxygen + Sphinx/Breathe)" + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, '[skip ci]')" + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y doxygen + python3 -m pip install sphinx sphinx-inline-tabs breathe furo + - name: Build HTML docs + run: | + cd docs + sphinx-build -M html . _build -n -E + almalinux: runs-on: ubuntu-latest name: "AlmaLinux 8" diff --git a/.gitignore b/.gitignore index 0aa5b7ab..6f22aa37 100644 --- a/.gitignore +++ b/.gitignore @@ -83,3 +83,6 @@ fortran/*.a # test programs fortran/fsym fortran/ftest + +# Doxygen-generated output (consumed by Breathe/Sphinx, not committed) +docs/_doxygen/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml index e86ae0e3..909080dc 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,6 +10,8 @@ build: os: ubuntu-24.04 tools: python: "3.12" + apt_packages: + - doxygen # Format htmlzip produces a single page. Instead, use zip as shown in: # https://github.com/readthedocs/readthedocs.org/issues/3242#issuecomment-1410321534 diff --git a/docs/Doxyfile b/docs/Doxyfile new file mode 100644 index 00000000..d305f175 --- /dev/null +++ b/docs/Doxyfile @@ -0,0 +1,48 @@ +PROJECT_NAME = "Gemmi" +PROJECT_BRIEF = "A library for macromolecular crystallography" +PROJECT_NUMBER = + +# Input +INPUT = ../include/gemmi +FILE_PATTERNS = *.hpp +STRIP_FROM_PATH = ../include +STRIP_FROM_INC_PATH = ../include +RECURSIVE = NO +# All public headers are flat in include/gemmi/; the only subdirectory is +# third_party/ which must not be documented. + +# Exclude internal/data-only headers +EXCLUDE_PATTERNS = ace_*.hpp \ + acedrg_tables.hpp \ + mc_tables.hpp \ + eig3.hpp \ + cc_adj.hpp \ + ccp4ener.hpp \ + mmcif_impl.hpp + +# Output: XML only (Breathe consumes this; Sphinx produces HTML) +GENERATE_XML = YES +GENERATE_HTML = NO +GENERATE_LATEX = NO +XML_OUTPUT = _doxygen/xml + +# Extraction +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = YES +EXTRACT_ANON_NSPACES = NO + +# Preprocessing (needed for GEMMI_DLL export macro and similar) +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES +PREDEFINED = GEMMI_DLL= + +# Quiet build, warnings to file +QUIET = YES +WARN_IF_UNDOCUMENTED = NO +WARN_LOGFILE = _doxygen/doxygen-warnings.log + +# Source browsing off (Breathe handles cross-references) +SOURCE_BROWSER = NO +INLINE_SOURCES = NO diff --git a/docs/api.rst b/docs/api.rst index 19b24801..00982747 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -22,6 +22,94 @@ Core Data Structures .. doxygenfile:: model.hpp :project: gemmi +Map and Grid Data +----------------- + +*(Stub — full documentation added in PR 6.)* + +.. doxygenfile:: grid.hpp + :project: gemmi + +Scattering, Math, and Geometry +------------------------------- + +Form factor tables, anomalous scattering, Bessel function helpers, core +linear-algebra primitives, unit-cell reduction, and numerical tools used +throughout structure-factor and density calculations. + +*(Full documentation added in PR 9.)* + +.. doxygenfile:: fprime.hpp + :project: gemmi + +.. doxygenfile:: formfact.hpp + :project: gemmi + +.. doxygenfile:: it92.hpp + :project: gemmi + +.. doxygenfile:: c4322.hpp + :project: gemmi + +.. doxygenfile:: neutron92.hpp + :project: gemmi + +.. doxygenfile:: bessel.hpp + :project: gemmi + +.. doxygenfile:: math.hpp + :project: gemmi + +.. doxygenfile:: cellred.hpp + :project: gemmi + +Sequence Alignment and Twinning +-------------------------------- + +Sequence utilities, pairwise alignment, twinning-law discovery, and +interoperability helpers. + +*(Full documentation added in PR 9.)* + +.. doxygenfile:: seqtools.hpp + :project: gemmi + +.. doxygenfile:: seqalign.hpp + :project: gemmi + +.. doxygenfile:: twin.hpp + :project: gemmi + +.. doxygenfile:: serialize.hpp + :project: gemmi + +.. doxygenfile:: interop.hpp + :project: gemmi + +.. doxygenfile:: flat.hpp + :project: gemmi + +.. doxygenfile:: smarts.hpp + :project: gemmi + +Density Analysis and Numerical Methods +--------------------------------------- + +Electron density blob finding, isosurface extraction, Levenberg-Marquardt +least-squares minimization, and quaternion-based superposition (QCP). + +*(Full documentation added in PR 9.)* + +.. doxygenfile:: blob.hpp + :project: gemmi + +.. doxygenfile:: isosurface.hpp + :project: gemmi + +.. doxygenfile:: levmar.hpp + :project: gemmi + +.. doxygenfile:: qcp.hpp Reflection Data --------------- diff --git a/docs/conf.py b/docs/conf.py index 2d8a8b95..55fab5e3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,11 +1,25 @@ # -*- coding: utf-8 -*- +import os +import shutil +import subprocess + +_docs_dir = os.path.dirname(os.path.abspath(__file__)) +_doxygen_xml_dir = os.path.join(_docs_dir, "_doxygen", "xml") + +# Run Doxygen before Sphinx processes doxygenfile:: directives. +# Skipped gracefully when doxygen is not installed (e.g. CI doctest runner). +if shutil.which('doxygen'): + subprocess.check_call(['doxygen', 'Doxyfile'], cwd=_docs_dir) + # -- General configuration ------------------------------------------------ # while we use Sphinx 8+, old version suffices to run doctests needs_sphinx = '5.3.0' extensions = ['sphinx.ext.doctest', 'sphinx_inline_tabs'] +if os.path.isdir(_doxygen_xml_dir): + extensions.append('breathe') templates_path = ['_templates'] @@ -125,3 +139,11 @@ def _compute_navigation_tree(context: Dict[str, Any]) -> str: import gemmi gemmi.set_leak_warnings(False) ''' + +# -- Breathe configuration (Doxygen XML → Sphinx) ------------------------- +# Only active when _doxygen/xml/ exists (i.e. doxygen was run). + +if os.path.isdir(_doxygen_xml_dir): + breathe_projects = {"gemmi": _doxygen_xml_dir} + breathe_default_project = "gemmi" + breathe_default_members = ('members',) # show all public members in every doxygenfile:: directive diff --git a/docs/index.rst b/docs/index.rst index 4302a378..c1f524ce 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -93,7 +93,7 @@ Contents ChangeLog Python API reference - C++ API reference + api Credits ======= diff --git a/docs/requirements.txt b/docs/requirements.txt index 223a532b..804a2925 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ sphinx >= 8.1.3 furo >= 2024.8.6 sphinx-inline-tabs +breathe >= 4.35 diff --git a/include/gemmi/bessel.hpp b/include/gemmi/bessel.hpp index cdd6d04d..b50044f2 100644 --- a/include/gemmi/bessel.hpp +++ b/include/gemmi/bessel.hpp @@ -18,6 +18,11 @@ namespace gemmi { +/// @brief Evaluate degree-(N-1) polynomial by Horner's method +/// @tparam N number of coefficients +/// @param poly coefficient array; poly[0] is the constant term +/// @param x evaluation point +/// @return polynomial value at x template inline double evaluate_polynomial(const double(&poly)[N], double x) { static_assert(N > 1, ""); @@ -27,13 +32,20 @@ inline double evaluate_polynomial(const double(&poly)[N], double x) { return result; } +/// @brief Polynomial coefficient tables for Bessel function approximations +/// @details Wrapped in a template to enable header-only inline variables template struct BesselTables_ { + /// @brief Polynomial coefficients for I0 approximation, small x static const double P1[8]; + /// @brief Polynomial coefficients for I0 approximation, small x static const double Q1[9]; + /// @brief Polynomial coefficients for I0 approximation, large x static const double P2[5]; + /// @brief Polynomial coefficients for I0 approximation, large x (x < 50) static const double Q2[5]; + /// @brief Polynomial coefficients for I0 approximation, very large x (x >= 50) static const double Q3[3]; }; template const double BesselTables_::P1[8] = { @@ -78,6 +90,10 @@ template const double BesselTables_::Q3[3] = { }; +/// @brief Compute I₁(x)/I₀(x) using polynomial approximations +/// @details Used in maximum-likelihood refinement of diffraction data +/// @param x argument +/// @return I₁(x)/I₀(x) inline double bessel_i1_over_i0(double x) { using B = BesselTables_; if (x < 0) @@ -95,9 +111,11 @@ inline double bessel_i1_over_i0(double x) { return p / q; } -// Simplified function from Boost.Math. -// Similar to std::cyl_bessel_i(0, x), but much faster, less exact and doesn't -// throw out_of_range on negative argument. Relative error < 5.02e-08. +/// @brief Compute modified Bessel function I₀(x) +/// @details Simplified from Boost.Math; relative error < 5.02e-08. +/// Similar to std::cyl_bessel_i(0, x) but faster. Does not throw on negative argument. +/// @param x argument +/// @return I₀(x) inline double bessel_i0(double x) { using B = BesselTables_; x = std::fabs(x); @@ -111,7 +129,11 @@ inline double bessel_i0(double x) { return ex * evaluate_polynomial(B::Q3, 1 / x) / std::sqrt(x) * ex; } -// Relative error < 4e-08. +/// @brief Compute ln(I₀(x)) +/// @details Numerically stable computation via log1p for small x; relative error < 4e-08. +/// Avoids overflow for large x. +/// @param x argument +/// @return ln(I₀(x)) inline double log_bessel_i0(double x) { using B = BesselTables_; x = std::fabs(x); diff --git a/include/gemmi/blob.hpp b/include/gemmi/blob.hpp index 4b07a972..ab3de1a5 100644 --- a/include/gemmi/blob.hpp +++ b/include/gemmi/blob.hpp @@ -15,19 +15,32 @@ namespace gemmi { +/// @brief A local maximum or connected region ("blob") in a 3D density map. +/// Represents a contiguous region above a density threshold, computed by flood fill. struct Blob { + /// @brief Volume of the blob in cubic Angstroms. double volume = 0.0; + /// @brief Score (integrated density above the cutoff). double score = 0.0; + /// @brief Peak electron density value within the blob. double peak_value = 0.0; + /// @brief Centroid position (weighted average of coordinates). gemmi::Position centroid; + /// @brief Position of the peak density point. gemmi::Position peak_pos; + /// @brief Check if blob is non-empty (volume > 0). explicit operator bool() const { return volume != 0. ; } }; +/// @brief Criteria for blob detection in density maps. struct BlobCriteria { + /// @brief Minimum electron density threshold to include points in a blob. double cutoff; + /// @brief Minimum volume (cubic Angstroms) for blob to be reported. double min_volume = 10.0; + /// @brief Minimum integrated score for blob to be reported. double min_score = 15.0; + /// @brief Minimum peak density value for blob to be reported. double min_peak = 0.0; }; @@ -78,7 +91,17 @@ inline Blob make_blob_of_points(const std::vector& points, } // namespace impl -// with negate=true grid negatives of grid values are used +/// @brief Find all blobs (density peaks) in a 3D grid using flood fill. +/// @details +/// Uses a flood fill algorithm to identify connected regions above a density threshold, +/// respecting crystallographic symmetry through the asymmetric unit mask. +/// Results are sorted by score (integrated density) in descending order. +/// The algorithm differs from FloodFill in floodfill.hpp by using symmetry operators +/// to exclude symmetric mates from separate blob detection. +/// @param grid The electron density grid to search. +/// @param criteria Thresholds for minimum volume, score, peak density. +/// @param negate If true, search for negative density (use -grid.data). +/// @return Vector of Blob objects, sorted by score (highest first). inline std::vector find_blobs_by_flood_fill(const gemmi::Grid& grid, const BlobCriteria& criteria, bool negate=false) { diff --git a/include/gemmi/c4322.hpp b/include/gemmi/c4322.hpp index abf8c982..9804c1a8 100644 --- a/include/gemmi/c4322.hpp +++ b/include/gemmi/c4322.hpp @@ -22,15 +22,24 @@ namespace gemmi { #pragma GCC diagnostic ignored "-Wfloat-conversion" #endif +/// @brief Electron scattering factor coefficients from ITC Volume C (2011) table 4.3.2.2. +/// Five-Gaussian approximation for neutral atoms, valid for sinθ/λ ≤ 2 Å⁻¹. +/// @tparam Real floating-point type template struct C4322 { - using Coef = GaussianCoef<5, 0, Real>; - static Coef data[99]; + using Coef = GaussianCoef<5, 0, Real>; ///< Type alias for coefficient set + static Coef data[99]; ///< Electron scattering factor coefficients + /// @brief Check if coefficients are available for element. + /// @param el element + /// @return true if element has tabulated coefficients static bool has(El el) { return el <= El::Cf || el == El::D; } + /// @brief Get coefficient set for element (charge is ignored — only neutral atoms tabulated). + /// @param el element + /// @return coefficient reference static Coef& get(El el, signed char /*charge*/=0, int /*serial*/=0) { // ordinal for X, H, ... Cf; H=1 for D; X=0 for Es, ... Og int pos = el <= El::Cf ? (int)el : (int)(el == El::D); diff --git a/include/gemmi/cellred.hpp b/include/gemmi/cellred.hpp index 7174f776..fc208115 100644 --- a/include/gemmi/cellred.hpp +++ b/include/gemmi/cellred.hpp @@ -16,17 +16,29 @@ namespace gemmi { struct SellingVector; -// GruberVector contains G6 vector (G for Gruber) and cell reduction algorithms. -// Originally, in B. Gruber, Acta Cryst. A29, 433 (1973), the vector was called -// "characteristic" of a lattice/cell. -// Functions that take epsilon as a parameter use it for comparisons, -// as proposed in Grosse-Kunstleve et al, Acta Cryst. (2004) A60, 1. +/// @brief G6 Gruber vector representing a lattice and its cell reduction algorithms. +/// @details The six-component G6 vector was called "characteristic" of a lattice/cell +/// by Gruber (1973). Functions that take epsilon use it for numerical comparisons, +/// following Grosse-Kunstleve et al. (2004). +/// @par References +/// Gruber, B. (1973). The relationship between reduced cells in a general Bravais lattice. +/// Acta Cryst. A29, 433–440. https://doi.org/10.1107/S0567739473001063 +/// +/// Krivy, I. & Gruber, B. (1976). A unified algorithm for determining the reduced +/// (Niggli) cell. Acta Cryst. A32, 297–298. https://doi.org/10.1107/S0567739476000636 +/// +/// Grosse-Kunstleve, R.W., Sauter, N.K. & Adams, P.D. (2004). Numerically stable +/// algorithms for the computation of reduced unit cells. +/// Acta Cryst. A60, 1–6. https://doi.org/10.1107/S010876730302186X struct GruberVector { // a.a b.b c.c 2b.c 2a.c 2a.b - double A, B, C, xi, eta, zeta; // the 1973 paper uses names A B C ξ η ζ - std::unique_ptr change_of_basis; // we use only Op::Rot + /// @brief G6 vector elements (A, B, C, ξ, η, ζ) from Gruber 1973 + double A, B, C, xi, eta, zeta; + /// @brief Change of basis transformation (Op::Rot only) + std::unique_ptr change_of_basis; - // m - orthogonalization matrix of a primitive cell + /// @brief Construct from orthogonalization matrix of primitive cell + /// @param m orthogonalization matrix explicit GruberVector(const Mat33& m) : A(m.column_dot(0,0)), B(m.column_dot(1,1)), @@ -35,21 +47,38 @@ struct GruberVector { eta(2 * m.column_dot(0,2)), zeta(2 * m.column_dot(0,1)) {} + /// @brief Construct from G6 vector array + /// @param g6 array {A, B, C, ξ, η, ζ} explicit GruberVector(const std::array& g6) : A(g6[0]), B(g6[1]), C(g6[2]), xi(g6[3]), eta(g6[4]), zeta(g6[5]) {} + /// @brief Construct from UnitCell with centring + /// @param u unit cell + /// @param centring centring type character + /// @param track_change_of_basis if true, track change of basis GruberVector(const UnitCell& u, char centring, bool track_change_of_basis=false) : GruberVector(u.primitive_orth_matrix(centring)) { if (track_change_of_basis) set_change_of_basis(Op{centred_to_primitive(centring), {0,0,0}, 'x'}); } + /// @brief Construct from UnitCell with SpaceGroup + /// @param u unit cell + /// @param sg space group (may be null for P) + /// @param track_change_of_basis if true, track change of basis GruberVector(const UnitCell& u, const SpaceGroup* sg, bool track_change_of_basis=false) : GruberVector(u, sg ? sg->centring_type() : 'P', track_change_of_basis) {} + /// @brief Set change of basis transformation + /// @param op transformation operation void set_change_of_basis(const Op& op) { change_of_basis.reset(new Op(op)); } + /// @brief Get G6 vector as array + /// @return array {A, B, C, ξ, η, ζ} std::array parameters() const { return {A, B, C, xi, eta, zeta}; } + /// @brief Get unit cell parameters + /// @details Convert from G6 vector to cell parameters + /// @return array {a, b, c, α, β, γ} with lengths in Å and angles in degrees std::array cell_parameters() const { // inverse of UnitCell::g6() double a = std::sqrt(A); @@ -60,10 +89,16 @@ struct GruberVector { deg(std::acos(eta/(2*a*c))), deg(std::acos(zeta/(2*a*b)))}; } + /// @brief Construct UnitCell from G6 vector + /// @return unit cell UnitCell get_cell() const { return UnitCell(cell_parameters()); } + /// @brief Convert to Selling-Delaunay vector + /// @return Selling vector SellingVector selling() const; + /// @brief Check if G6 satisfies Gruber 1973 normalization conditions + /// @return true if normalized (eq(3) from Gruber 1973) bool is_normalized() const { // eq(3) from Gruber 1973 return A <= B && B <= C && @@ -72,6 +107,9 @@ struct GruberVector { (xi > 0) == (eta > 0) && (xi > 0) == (zeta > 0); } + /// @brief Check if this is a Buerger-reduced cell + /// @param epsilon tolerance for comparisons + /// @return true if Buerger-reduced bool is_buerger(double epsilon=1e-9) const { return is_normalized() && // eq (4) from Gruber 1973 @@ -80,8 +118,9 @@ struct GruberVector { std::abs(zeta) <= A + epsilon; } - // Algorithm N from Gruber (1973). - // Returns branch taken in N3. + /// @brief Normalize using Gruber Algorithm N + /// @details Apply Gruber normalization to the G6 vector (Algorithm N from Gruber 1973) + /// @param eps tolerance for comparisons void normalize(double eps=1e-9) { auto step_N1 = [&]() { if (A - B > eps || (A - B >= -eps && std::abs(xi) > std::abs(eta) + eps)) { // N1 @@ -119,8 +158,9 @@ struct GruberVector { zeta = std::copysign(zeta, sgn); } - // Algorithm B from Gruber (1973). - // Returns true if no change was needed. + /// @brief Perform one step of Gruber Algorithm B + /// @details Execute one iteration of the Buerger reduction algorithm + /// @return true if already Buerger-reduced (no change made) bool buerger_step() { if (std::abs(xi) > B) { // B2 double j = std::floor(0.5*xi/B + 0.5); @@ -148,7 +188,9 @@ struct GruberVector { return false; } - // Returns number of iterations. + /// @brief Reduce to Buerger cell + /// @details Apply normalize() and buerger_step() repeatedly until convergence + /// @return iteration count int buerger_reduce() { int n = 0; double prev_sum = -1; @@ -174,9 +216,11 @@ struct GruberVector { return n; } - // To be called after normalize() or is_normalized(). - // Returns true if it already was Niggli cell. - // Algorithm from Krivy & Gruber, Acta Cryst. (1976) A32, 297. + /// @brief Perform one step of Krivy-Gruber Niggli reduction + /// @details To be called after normalize() or is_normalized() + /// Algorithm from Krivy & Gruber, Acta Cryst. (1976) A32, 297 + /// @param epsilon tolerance for comparisons + /// @return true if already Niggli cell (no change made) bool niggli_step(double epsilon=1e-9) { if (std::abs(xi) > B + epsilon || // step 5. from Krivy-Gruber (1976) (xi >= B - epsilon && 2 * eta < zeta - epsilon) || @@ -220,7 +264,11 @@ struct GruberVector { return false; } - // Returns number of iterations. + /// @brief Reduce to Niggli cell + /// @details Apply normalize() and niggli_step() repeatedly until convergence + /// @param epsilon tolerance for comparisons + /// @param iteration_limit maximum iterations + /// @return iteration count int niggli_reduce(double epsilon=1e-9, int iteration_limit=100) { int n = 0; for (;;) { @@ -231,6 +279,9 @@ struct GruberVector { return n; } + /// @brief Check if this is a Niggli-reduced cell + /// @param epsilon tolerance for comparisons + /// @return true if Niggli-reduced bool is_niggli(double epsilon=1e-9) const { return is_normalized() && GruberVector(parameters()).niggli_step(epsilon); } @@ -254,20 +305,29 @@ struct GruberVector { }; -// Selling-Delaunay reduction. Based on: -// - chapter "Delaunay reduction and standardization" in -// International Tables for Crystallography vol. A (2016), sec. 3.1.2.3. -// https://onlinelibrary.wiley.com/iucr/itc/Ac/ch3o1v0001/ -// - Patterson & Love (1957), Acta Cryst. 10, 111, -// "Remarks on the Delaunay reduction", doi:10.1107/s0365110x57000328 -// - Andrews et al (2019), Acta Cryst. A75, 115, -// "Selling reduction versus Niggli reduction for crystallographic lattices". +/// @brief Selling-Delaunay vector for lattice reduction. +/// @details Represents a lattice in terms of 6 scalar products among four basis vectors. +/// @par References +/// Patterson, A.L. & Love, W.E. (1957). Remarks on the Delaunay reduction. +/// Acta Cryst. 10, 111–116. https://doi.org/10.1107/S0365110X57000328 +/// +/// Andrews, L.C., Bernstein, H.J. & Sauter, N.K. (2019). Selling reduction versus +/// Niggli reduction for crystallographic lattices. +/// Acta Cryst. A75, 115–120. https://doi.org/10.1107/S2053273318015413 +/// +/// International Tables for Crystallography, Vol. A (2016), sec. 3.1.2.3. +/// https://doi.org/10.1107/97809553602060000933 struct SellingVector { - // b.c a.c a.b a.d b.d c.d + /// @brief Selling vector elements s (6 scalar products) + /// Order: b.c, a.c, a.b, a.d, b.d, c.d std::array s; + /// @brief Construct from Selling vector array + /// @param s_ array of 6 dot products explicit SellingVector(const std::array& s_) : s(s_) {} + /// @brief Construct from orthogonalization matrix + /// @param orth orthogonalization matrix explicit SellingVector(const Mat33& orth) { Vec3 b[4]; for (int i = 0; i < 3; ++i) @@ -281,20 +341,36 @@ struct SellingVector { s[5] = b[2].dot(b[3]); } + /// @brief Construct from UnitCell with centring + /// @param u unit cell + /// @param centring centring type SellingVector(const UnitCell& u, char centring) : SellingVector(u.primitive_orth_matrix(centring)) {} + /// @brief Construct from UnitCell with SpaceGroup + /// @param u unit cell + /// @param sg space group (may be null for P) SellingVector(const UnitCell& u, const SpaceGroup* sg) : SellingVector(u, sg ? sg->centring_type() : 'P') {} - // The reduction minimizes the sum b_i^2 which is equal to -2 sum s_i. + /// @brief Sum of squared basis vector lengths + /// @details The reduction minimizes sum(b_i²) which equals -2·sum(s_i) + /// @return sum of squared lengths double sum_b_squared() const { return -2 * (s[0] + s[1] + s[2] + s[3] + s[4] + s[5]); } + /// @brief Check if reduced + /// @details A Selling vector is reduced if all s[i] ≤ 0 within tolerance + /// @param eps tolerance + /// @return true if reduced bool is_reduced(double eps=1e-9) const { return std::all_of(s.begin(), s.end(), [eps](double x) { return x <= eps; }); } + /// @brief Perform one reduction step + /// @details Apply one iteration of Selling reduction + /// @param eps tolerance + /// @return true if a step was applied bool reduce_step(double eps=1e-9) { //printf(" s = %g %g %g %g %g %g sum=%g\n", // s[0], s[1], s[2], s[3], s[4], s[5], sum_b_squared()); @@ -330,7 +406,11 @@ struct SellingVector { return true; } - // Returns number of iterations. + /// @brief Reduce to Selling form + /// @details Apply reduce_step() repeatedly until convergence + /// @param eps tolerance + /// @param iteration_limit maximum iterations + /// @return iteration count int reduce(double eps=1e-9, int iteration_limit=100) { int n = 0; while (++n != iteration_limit) @@ -339,13 +419,19 @@ struct SellingVector { return n; } + /// @brief Convert to G6 parameters + /// @return GruberVector parameters std::array g6_parameters() const { return {-s[1]-s[2]-s[3], -s[0]-s[2]-s[4], -s[0]-s[1]-s[5], 2*s[0], 2*s[1], 2*s[2]}; } + /// @brief Convert to GruberVector + /// @return Gruber vector equivalent GruberVector gruber() const { return GruberVector(g6_parameters()); } - // Swap values to make a <= b <= c <= d + /// @brief Sort basis vectors by squared length + /// @details Swap values to make a² ≤ b² ≤ c² ≤ d² + /// @param eps tolerance for comparisons void sort(double eps=1e-9) { double abcd_sq_neg[4] = { // -a^2, -b^2, -c^2, -d^2 (negated - to be sorted in descending order) @@ -389,12 +475,19 @@ struct SellingVector { } } + /// @brief Get unit cell parameters + /// @return array {a, b, c, α, β, γ} via Gruber conversion std::array cell_parameters() const { return gruber().cell_parameters(); } + /// @brief Construct UnitCell from Selling vector + /// @return unit cell UnitCell get_cell() const { return UnitCell(cell_parameters()); } }; +/// @brief Convert GruberVector to Selling-Delaunay vector +/// @details Compute Selling vector from G6 Gruber parameters +/// @return Selling vector inline SellingVector GruberVector::selling() const { double s0 = 0.5 * xi; double s1 = 0.5 * eta; diff --git a/include/gemmi/flat.hpp b/include/gemmi/flat.hpp index ec1e49ed..5fd6cca1 100644 --- a/include/gemmi/flat.hpp +++ b/include/gemmi/flat.hpp @@ -10,38 +10,72 @@ namespace gemmi { +/// @brief A flat representation of a single atom for efficient array storage. +/// Stores atomic properties in a contiguous, columnar-friendly format suitable +/// for NumPy arrays and bulk data processing. struct FlatAtom { + /// @brief Atom name (e.g., "CA", "CB"). char atom_name[8] {}; + /// @brief Residue name (e.g., "ALA", "GLY"). char residue_name[8] = {}; + /// @brief Chain identifier. char chain_id[8] = {}; + /// @brief Subchain identifier. char subchain[8] = {}; + /// @brief Entity identifier. char entity_id[8] = {}; + /// @brief Sequence identifier (residue number with insertion code). SeqId seq_id; + /// @brief Cartesian position in Angstroms. Position pos; + /// @brief Occupancy (0.0 to 1.0). float occ = 1.0f; - float b_iso = 20.0f; // arbitrary default value - char altloc = '\0'; // 0 if not set - char het_flag = '\0'; // 'A' = ATOM, 'H' = HETATM, 0 = unspecified + /// @brief Isotropic B-factor (arbitrary default 20.0 Angstrom^2). + float b_iso = 20.0f; + /// @brief Alternate location indicator (0 if not set). + char altloc = '\0'; + /// @brief Atom type flag ('A' = ATOM, 'H' = HETATM, 0 = unspecified). + char het_flag = '\0'; + /// @brief Entity type classification. EntityType entity_type = EntityType::Unknown; + /// @brief Chemical element. Element element = El::X; - signed char charge = 0; // [-8, +8] + /// @brief Formal charge (-8 to +8). + signed char charge = 0; + /// @brief Anisotropic thermal parameters. SMat33 aniso = {0, 0, 0, 0, 0, 0}; + /// @brief Model number. int model_num; + /// @brief Atom serial number. int serial = 0; + /// @brief Selection flag. bool selected = false; + /// @brief Generate a formatted atom identifier string. + /// @return String containing chain, residue, and atom identifiers. std::string atom_str() const { ResidueId resid{seq_id, "", residue_name}; return gemmi::atom_str(chain_id, resid, atom_name, altloc); } }; + +/// @brief A flat representation of a structure as a contiguous table of atoms. +/// Converts hierarchical Structure into a single table of FlatAtom entries, +/// enabling efficient access patterns and integration with array-based tools. struct GEMMI_DLL FlatStructure { + /// @brief Empty structure holding only metadata (name, cell, spacegroup). Structure empty_st; + /// @brief Table of flat atoms from the structure. std::vector table; - bool strings_as_numbers = true; // when accessed as NumPy arrays (Python bindings) + /// @brief If true, string fields are interpreted as numeric when accessed as NumPy arrays. + bool strings_as_numbers = true; - // Structure <-> FlatStructure + /// @brief Convert a macromolecular structure to flat representation. + /// @param st The structure to flatten. FlatStructure(const Structure& st); + + /// @brief Reconstruct a hierarchical structure from the flat representation. + /// @return A Structure with chains, residues, and atoms rebuilt from the table. Structure generate_structure(); }; diff --git a/include/gemmi/formfact.hpp b/include/gemmi/formfact.hpp index b4f15481..b784c726 100644 --- a/include/gemmi/formfact.hpp +++ b/include/gemmi/formfact.hpp @@ -15,10 +15,10 @@ namespace gemmi { -// NOTE: the argument x must be between -88 and 88. -// It is based on expapprox() from -// https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h -// Relative error is below 1e-5. +/// @brief Fast approximate exp for float; relative error < 1e-5. +/// @note Input must be in [-88, 88]. +/// @param x exponent value +/// @return approximate exp(x) inline float unsafe_expapprox(float x) { static_assert(std::numeric_limits::is_iec559, "float is not IEEE 754?"); //static float zero = 0.f; // non-const to disable optimization @@ -34,11 +34,16 @@ inline float unsafe_expapprox(float x) { (-2.190619930e-3f + b * 1.3555747234e-2f)))); } -// precalculated density of an isotropic atom +/// @brief Precalculated isotropic density as sum of N Gaussians in r². +/// Amplitude and exponent coefficients are stored for fast evaluation. template struct ExpSum { - Real a[N], b[N]; + Real a[N]; ///< Amplitude coefficients + Real b[N]; ///< Exponent coefficients + /// @brief Calculate density at squared distance. + /// @param r2 squared distance + /// @return density at r² Real calculate(Real r2) const { Real density = 0; for (int i = 0; i < N; ++i) @@ -46,6 +51,9 @@ struct ExpSum { return density; } + /// @brief Calculate density and its derivative. + /// @param r distance + /// @return pair of (density, derivative) std::pair calculate_with_derivative(Real r) const { Real density = 0; Real derivative = 0; @@ -58,9 +66,14 @@ struct ExpSum { } }; +/// @brief Float specialisation of ExpSum using unsafe_expapprox for speed. template struct ExpSum { - float a[N], b[N]; + float a[N]; ///< Amplitude coefficients + float b[N]; ///< Exponent coefficients + /// @brief Calculate density at squared distance. + /// @param r2 squared distance + /// @return density at r² float calculate(float r2) const { float density = 0; float tmp[N]; @@ -71,6 +84,9 @@ struct ExpSum { return density; } + /// @brief Calculate density and its derivative. + /// @param r distance + /// @return pair of (density, derivative) std::pair calculate_with_derivative(float r) const { float density = 0; float derivative = 0; @@ -86,12 +102,16 @@ struct ExpSum { } }; -// precalculated density of an anisotropic atom +/// @brief Precalculated anisotropic density as sum of N Gaussians. +/// Amplitude and tensor coefficients are stored for fast evaluation. template struct ExpAnisoSum { - Real a[N]; - SMat33 b[N]; + Real a[N]; ///< Amplitude coefficients + SMat33 b[N]; ///< Anisotropic exponent tensor coefficients + /// @brief Calculate density at vector position. + /// @param r position vector + /// @return density at r Real calculate(const Vec3& r) const { Real density = 0; for (int i = 0; i < N; ++i) @@ -100,11 +120,15 @@ struct ExpAnisoSum { } }; +/// @brief Float specialisation of ExpAnisoSum using unsafe_expapprox for speed. template struct ExpAnisoSum { - float a[N]; - SMat33 b[N]; + float a[N]; ///< Amplitude coefficients + SMat33 b[N]; ///< Anisotropic exponent tensor coefficients + /// @brief Calculate density at vector position. + /// @param r_ position vector + /// @return density at r float calculate(const Vec3& r_) const { Vec3f r((float)r_.x, (float)r_.y, (float)r_.z); float density = 0; @@ -117,22 +141,33 @@ struct ExpAnisoSum { } }; +/// @brief Compute x^1.5. +/// @tparam Real floating-point type +/// @param x input value +/// @return x^1.5 template Real pow15(Real x) { return x * std::sqrt(x); } -// Gaussian coefficients with functions to calculate sf and density. +/// @brief Gaussian approximation coefficients for atomic scattering. +/// N Gaussians plus optional constant term for efficient computation. template struct GaussianCoef { - using coef_type = Real; - static const int ncoeffs = N; - std::array coefs; + using coef_type = Real; ///< Type alias for coefficient values + static const int ncoeffs = N; ///< Number of Gaussian terms (constant term separate) + std::array coefs; ///< Coefficients: a[0..N-1], b[0..N-1], c (if WithC) + /// @brief Get amplitude coefficient for Gaussian i. Real a(int n) const { return coefs[n]; } + /// @brief Get exponent coefficient for Gaussian i. Real b(int n) const { return coefs[N+n]; } + /// @brief Get constant term (0 if WithC=0). Real c() const { return WithC ? coefs[2*N] : 0; } + /// @brief Set all coefficients. void set_coefs(const std::array& c) { coefs = c; } - // argument: (sin(theta)/lambda)^2 + /// @brief Calculate structure factor at specified reciprocal resolution. + /// @param stol2 (sinθ/λ)² + /// @return structure factor Real calculate_sf(Real stol2) const { Real sf = c(); for (int i = 0; i < N; ++i) @@ -140,6 +175,10 @@ struct GaussianCoef { return sf; } + /// @brief Calculate isotropic density at given distance and B-factor. + /// @param r2 squared distance + /// @param B isotropic B-factor (Ų) + /// @return density Real calculate_density_iso(Real r2, Real B) const { constexpr Real _4pi = Real(4 * pi()); Real r2pi = Real(r2 * pi()); @@ -151,7 +190,10 @@ struct GaussianCoef { return density; } - // note: addend is considered only if WithC (addend is usually dispersion f') + /// @brief Precalculate coefficients for fast isotropic density evaluation. + /// @param B isotropic B-factor (Ų) + /// @param addend added to constant term if WithC=1 (e.g., dispersion f') + /// @return ExpSum ready for fast density calculation ExpSum precalculate_density_iso(Real B, Real addend=0) const { ExpSum prec; constexpr Real _4pi = Real(4 * pi()); @@ -168,6 +210,10 @@ struct GaussianCoef { return prec; } + /// @brief Calculate anisotropic density at given position and U-tensor. + /// @param r position vector + /// @param U anisotropic displacement tensor + /// @return density Real calculate_density_aniso(const Vec3& r, const SMat33& U) const { constexpr Real pi2 = sq(pi()); const SMat33 B = U.scaled(8 * pi2); @@ -181,6 +227,10 @@ struct GaussianCoef { return density; } + /// @brief Precalculate coefficients for fast anisotropic density evaluation with B-tensor. + /// @param B anisotropic B-factor matrix + /// @param addend added to constant term if WithC=1 + /// @return ExpAnisoSum ready for fast density calculation ExpAnisoSum precalculate_density_aniso_b(const SMat33& B, Real addend=0) const { constexpr Real m4pi2 = Real(-4 * sq(pi())); @@ -198,6 +248,10 @@ struct GaussianCoef { return prec; } + /// @brief Precalculate coefficients for fast anisotropic density evaluation with U-tensor. + /// @param U anisotropic displacement tensor + /// @param addend added to constant term if WithC=1 + /// @return ExpAnisoSum ready for fast density calculation ExpAnisoSum precalculate_density_aniso_u(const SMat33& U, Real addend=0) const { constexpr Real UtoB = 8 * sq(pi()); diff --git a/include/gemmi/fprime.hpp b/include/gemmi/fprime.hpp index da74ac98..58a5ccc9 100644 --- a/include/gemmi/fprime.hpp +++ b/include/gemmi/fprime.hpp @@ -10,18 +10,33 @@ namespace gemmi { -/// Cromer-Liberman calculation of anomalous scattering factors. -/// input: -/// z - atomic number -/// npts - array length -/// energy - energies in eV -/// output: -/// fp - f' (real part of anomalous scattering) -/// fpp - f" (imaginary part of anomalous scattering) +/// @brief Cromer-Liberman calculation of anomalous scattering factors for an array +/// of energies, with Kissel-Pratt corrections. +/// @param z atomic number +/// @param npts array length +/// @param energy energies in eV +/// @param fp output: f' (real part of anomalous scattering) +/// @param fpp output: f" (imaginary part of anomalous scattering) +/// @par References +/// Cromer, D.T. & Liberman, D.A. (1994). Anomalous dispersion calculations +/// near to and on the long-wavelength side of an absorption edge. +/// Acta Cryst. A51, 416. https://doi.org/10.1107/S0108767394013292 +/// +/// Kissel, L. & Pratt, R.H. (1990). Rayleigh scattering — elastic photon +/// scattering by bound electrons: status and perspectives. +/// Acta Cryst. A46, 170–175. https://doi.org/10.1107/S0108767389010718 +/// +/// Brennan, S. & Cowan, P.L. (1992). A suite of programs for calculating +/// x-ray absorption, reflection, and diffraction performance. +/// Rev. Sci. Instrum. 63, 850. https://doi.org/10.1063/1.1142625 GEMMI_DLL void cromer_liberman_for_array(int z, int npts, const double* energy, double* fp, double* fpp); -/// returns fp, fpp is returned through the last argument. +/// @brief Single-energy wrapper for cromer_liberman_for_array. +/// @param z atomic number +/// @param energy energy in eV +/// @param fpp output: may be nullptr (optional f" value) +/// @return f' value inline double cromer_liberman(int z, double energy, double* fpp) { double fp = 0., fpp_ = 0.; cromer_liberman_for_array(z, 1, &energy, &fp, &fpp_); diff --git a/include/gemmi/interop.hpp b/include/gemmi/interop.hpp index 07e5c30b..a9790415 100644 --- a/include/gemmi/interop.hpp +++ b/include/gemmi/interop.hpp @@ -10,6 +10,12 @@ namespace gemmi { +/// @brief Convert a macromolecular atom to a small-structure site. +/// Performs coordinate transformation to fractional space, occupancy adjustment +/// for special positions, and anisotropic thermal parameter conversion. +/// @param atom The macromolecular atom to convert. +/// @param cell The unit cell for coordinate transformation. +/// @return A SmallStructure::Site representation of the atom. inline SmallStructure::Site atom_to_site(const Atom& atom, const UnitCell& cell) { SmallStructure::Site site; site.label = atom.name; @@ -42,6 +48,11 @@ inline SmallStructure::Site atom_to_site(const Atom& atom, const UnitCell& cell) return site; } +/// @brief Convert a macromolecular structure (MX) to a small-structure (SX) representation. +/// Extracts atoms from a specific model and converts them to sites using atom_to_site(). +/// @param st The macromolecular structure to convert. +/// @param n The model index to extract (default 0). +/// @return A SmallStructure representation of the model. inline SmallStructure mx_to_sx_structure(const Structure& st, int n=0) { const Model& model = st.models.at(n); SmallStructure small_st; diff --git a/include/gemmi/isosurface.hpp b/include/gemmi/isosurface.hpp index 1257e25d..037317c2 100644 --- a/include/gemmi/isosurface.hpp +++ b/include/gemmi/isosurface.hpp @@ -14,14 +14,23 @@ namespace gemmi { -/// Result of an isosurface extraction. +/// @brief Result of an isosurface extraction (vertices and triangle indices). struct IsoSurface { std::vector vertices; // x, y, z triples std::vector triangles; // vertex-index triples }; -enum class IsoMethod { MarchingCubes, SnappedMC }; +/// @brief Isosurface extraction algorithm selection. +enum class IsoMethod { + /// @brief Standard marching cubes algorithm. + MarchingCubes, + /// @brief Marching cubes with snapped edge midpoints (0, 0.5, 1.0 only). + SnappedMC +}; +/// @brief Parse isosurface method name from string. +/// @param s String name: "snapped MC" returns SnappedMC, otherwise MarchingCubes. +/// @return The selected isosurface method. inline IsoMethod iso_method_from_string(const std::string& s) { if (s == "snapped MC") return IsoMethod::SnappedMC; @@ -40,10 +49,17 @@ namespace impl { namespace gemmi { -/// Low-level marching cubes on a flat 3D array of values and positions. -/// dims: number of grid points in each dimension. -/// values: scalar field values (dims[0]*dims[1]*dims[2] elements). -/// points: Cartesian coordinates (3 * dims[0]*dims[1]*dims[2] floats). +/// @brief Extract an isosurface from flat 3D arrays using marching cubes. +/// @details +/// Low-level function that operates directly on flat arrays of values and positions. +/// For convenience, use extract_isosurface() with a Grid object instead. +/// @param dims Number of grid points in each dimension [x, y, z]. +/// @param values Scalar field values, length dims[0]*dims[1]*dims[2]. +/// @param points Cartesian coordinates as flattened triples: x0,y0,z0,x1,y1,z1,... +/// Total length: 3 * dims[0]*dims[1]*dims[2]. +/// @param isolevel The isovalue threshold for surface extraction. +/// @param method Marching cubes variant (standard or snapped). +/// @return IsoSurface containing vertices (x,y,z triples) and triangles (vertex-index triples). inline IsoSurface calculate_isosurface(const std::array& dims, const std::vector& values, const std::vector& points, @@ -123,8 +139,18 @@ inline IsoSurface calculate_isosurface(const std::array& dims, return result; } -/// Extract an isosurface from a Grid within a sphere of given radius -/// centered at (x, y, z) in Cartesian coordinates. +/// @brief Extract an isosurface from a Grid within a spherical region. +/// @details +/// Automatically extracts a region of the grid around a center point, +/// converts to fractional/grid coordinates, and runs marching cubes +/// on the subregion. This is the recommended high-level interface. +/// @tparam T The grid value type (float, double, etc.). +/// @param grid The 3D density map to process. +/// @param center Center point in Cartesian coordinates. +/// @param radius Radius of the sphere in Angstroms. +/// @param isolevel The isovalue threshold for surface extraction. +/// @param method Marching cubes variant (standard or snapped). +/// @return IsoSurface containing extracted vertices and triangle indices. template IsoSurface extract_isosurface(const Grid& grid, const Position& center, diff --git a/include/gemmi/it92.hpp b/include/gemmi/it92.hpp index a7d21978..6508a526 100644 --- a/include/gemmi/it92.hpp +++ b/include/gemmi/it92.hpp @@ -23,17 +23,28 @@ namespace gemmi { #pragma GCC diagnostic ignored "-Wfloat-conversion" #endif +/// @brief X-ray scattering factor coefficients from International Tables +/// for Crystallography Volume C (1992). +/// Cromer-Mann Gaussian approximation valid for sinθ/λ < 2 Å⁻¹. +/// @tparam Real floating-point type template struct IT92 { - using Coef = GaussianCoef<4, 1, Real>; - static Coef data[99+112]; - static std::pair ion_list[112]; - static bool ignore_charge; + using Coef = GaussianCoef<4, 1, Real>; ///< Type alias for coefficient set + static Coef data[99+112]; ///< Cromer-Mann coefficients indexed by element ordinal + static std::pair ion_list[112]; ///< List of (element, charge) pairs for ions + static bool ignore_charge; ///< If true, return neutral-atom coefficients regardless of charge + /// @brief Check if coefficients are available for element. + /// @param el element + /// @return true if element has tabulated coefficients static bool has(El el) { return el <= El::Cf || el == El::D; } + /// @brief Get coefficient set for element and charge, with fallback to neutral. + /// @param el element + /// @param charge ionic charge + /// @return coefficient reference static Coef& get(El el, signed char charge, int /*serial*/=0) { // ordinal for X, H, ... Cf; H=1 for D; X=0 for Es, ... Og int pos = el <= El::Cf ? (int)el : (int)(el == El::D); @@ -52,6 +63,10 @@ struct IT92 { return data[pos]; } + /// @brief Get pointer to exact match for element and charge, or nullptr if not found. + /// @param el element + /// @param charge ionic charge + /// @return pointer to coefficient, or nullptr static Coef* get_exact(El el, signed char charge) { if (has(el)) { Coef* coef = &get(el, charge); @@ -61,14 +76,16 @@ struct IT92 { return nullptr; } - // Make a1+a2+a3+a4+c equal exactly to the number of electrons. - // It changes the values from ITC only slightly (by not more than 0.12%). + /// @brief Normalise one coefficient set so it integrates to n electrons. + /// @param f coefficient to normalise + /// @param n electron count static void normalize_one(Coef& f, int n) { double factor = n / (f.a(0) + f.a(1) + f.a(2) + f.a(3) + f.c()); for (int j = 0; j < 4; ++j) f.coefs[j] *= factor; f.coefs[8] *= factor; } + /// @brief Normalise all entries to integrate to correct electron count. static void normalize() { for (int i = 1; i < 99; ++i) normalize_one(data[i], i); diff --git a/include/gemmi/levmar.hpp b/include/gemmi/levmar.hpp index 88b23181..6782fd4d 100644 --- a/include/gemmi/levmar.hpp +++ b/include/gemmi/levmar.hpp @@ -26,6 +26,13 @@ namespace gemmi { /// a is n x n matrix (in vector) /// b is vector of length n, /// This function returns vector x[] in b[], and 1-matrix in a[]. +/// @brief Solve a linear system using Gauss-Jordan elimination with partial pivoting. +/// @details +/// Solves A * x = b by reducing A to the identity matrix via row operations. +/// Handles singular matrices by skipping zero rows/columns. +/// @param a n x n matrix stored in row-major order (modified to identity matrix). +/// @param b Right-hand side vector of length n (modified to solution x). +/// @param n System size. inline void jordan_solve(double* a, double* b, int n) { for (int i = 0; i < n; i++) { // looking for a pivot element @@ -69,11 +76,17 @@ inline void jordan_solve(double* a, double* b, int n) { } } +/// @brief Solve a linear system stored in vector containers. +/// @param a n x n matrix as flat vector (n^2 elements). +/// @param b Right-hand side vector of length n. inline void jordan_solve(std::vector& a, std::vector& b) { assert(a.size() == b.size() * b.size()); jordan_solve(a.data(), b.data(), (int)b.size()); } +/// @brief Print parameters to stderr (debug only). +/// @param name Label for the parameter set. +/// @param a Vector of parameters to print. inline void print_parameters(const std::string& name, std::vector &a) { fprintf(stderr, " %s:", name.c_str()); for (double& x : a) @@ -81,6 +94,13 @@ inline void print_parameters(const std::string& name, std::vector &a) { fprintf(stderr, "\n"); } +/// @brief Compute weighted sum of squared residuals for a target. +/// @details +/// Uses long double for accumulation to improve numerical accuracy. +/// Assumes Target provides: points container, get_weight(), get_y(), compute_value(). +/// @tparam Target Fitting target type. +/// @param target The fitting target. +/// @return Sum of weighted squared residuals. template double compute_wssr(const Target& target) { long double wssr = 0; // long double here notably increases the accuracy @@ -89,6 +109,15 @@ double compute_wssr(const Target& target) { return (double) wssr; } +/// @brief Compute function value, residuals, and gradients with respect to parameters. +/// @details +/// Assumes Target provides: points container, get_weight(), get_y(), +/// compute_value_and_derivatives(point, dy_da_vector). +/// @tparam Target Fitting target type. +/// @param target The fitting target. +/// @param n Number of parameters. +/// @param grad Output array of size n for partial derivatives d(wssr)/da. +/// @return Weighted sum of squared residuals. template double compute_gradients(const Target& target, unsigned n, double* grad) { double wssr = 0; @@ -127,10 +156,20 @@ double compute_gradients(const Target& target, unsigned n, double* grad) { return wssr; } -// alpha and beta are matrices outputted for the Levenberg-Marquardt algorithm. -// Ignoring weights, alpha is a squared Jacobian J^T J (which approximates the -// Hessian, as discussed in Numerical Recipes, chapter 15.5), not "damped" yet. -// The return value is the same as from compute_wssr(). +/// @brief Compute Jacobian-based matrices for Levenberg-Marquardt algorithm. +/// @details +/// Computes the normal equations: alpha = J^T*J (approximates Hessian), +/// beta = J^T*residual. These are the building blocks for iterative refinement. +/// Alpha is initially undamped; the LM algorithm applies the damping factor +/// (1 + lambda) to diagonal elements. Both matrices use only the lower triangle +/// and are symmetrized after computation. +/// Assumes Target provides: points container, get_weight(), get_y(), +/// compute_value_and_derivatives(point, dy_da_vector). +/// @tparam Target Fitting target type. +/// @param target The fitting target. +/// @param alpha Output: n x n matrix (stored as flat vector) = J^T*J. +/// @param beta Output: n-element vector = J^T*residual. +/// @return Weighted sum of squared residuals. template double compute_lm_matrices(const Target& target, std::vector& alpha, @@ -164,27 +203,61 @@ double compute_lm_matrices(const Target& target, return (double) wssr; } +/// @brief Levenberg-Marquardt non-linear least-squares optimization. +/// @details +/// Implements the Levenberg-Marquardt algorithm for fitting model parameters +/// to minimize the sum of weighted squared residuals. +/// The algorithm adjusts a damping factor (lambda) to interpolate between +/// gradient descent (large lambda) and Newton's method (small lambda), +/// automatically selecting the step size that gives best improvement. +/// @par References +/// Levenberg, K. (1944). A method for the solution of certain non-linear problems +/// in least squares. Q. Appl. Math. 2, 164–168. +/// https://doi.org/10.1090/qam/10666 +/// +/// Marquardt, D.W. (1963). An algorithm for least-squares estimation of nonlinear +/// parameters. J. Soc. Ind. Appl. Math. 11, 431–441. +/// https://doi.org/10.1137/0111030 struct LevMar { - // termination criteria + /// @brief Maximum number of function evaluations before terminating. int eval_limit = 100; + /// @brief Stop optimization if damping factor lambda exceeds this value. double lambda_limit = 1e+15; + /// @brief Stop if relative change in WSSR falls below this for two consecutive iterations. double stop_rel_change = 1e-5; - // adjustable parameters (normally the default values work fine) + /// @brief Factor by which lambda is multiplied if fit worsens. double lambda_up_factor = 10; + /// @brief Factor by which lambda is multiplied if fit improves. double lambda_down_factor = 0.1; + /// @brief Initial damping factor (typically 0.001). double lambda_start = 0.001; - // values set in fit() that can be inspected later + /// @brief Initial weighted sum of squared residuals (set by fit()). double initial_wssr = NAN; - int eval_count = 0; // number of function evaluations - - // arrays used during refinement - std::vector alpha; // matrix - std::vector beta; // vector - std::vector temp_alpha, temp_beta; // working arrays + /// @brief Number of function evaluations performed (set by fit()). + int eval_count = 0; + /// @brief Jacobian-based normal equations matrix (J^T*J), size n*n. + std::vector alpha; + /// @brief Jacobian-based normal equations vector (J^T*residual), size n. + std::vector beta; + /// @brief Working copies of alpha and beta during refinement. + std::vector temp_alpha, temp_beta; + /// @brief Run Levenberg-Marquardt optimization. + /// @details + /// Iteratively refines parameters to minimize WSSR. At each iteration, + /// solves (J^T*J + lambda*diag(J^T*J)) * da = J^T*residual for step da, + /// then updates parameters and checks for improvement. + /// Terminates when eval_limit is reached, lambda exceeds lambda_limit, + /// or relative improvement drops below stop_rel_change for two iterations. + /// @tparam Target Fitting target type (must implement: get_parameters(), + /// set_parameters(), points container, and + /// compute_value_and_derivatives()). + /// @param target The fitting target (modified in place). + /// @return Final weighted sum of squared residuals. initial_wssr and eval_count + /// are also set and can be inspected after fit() returns. template double fit(Target& target) { std::vector initial_a = target.get_parameters(); diff --git a/include/gemmi/math.hpp b/include/gemmi/math.hpp index 8d480031..9e71fde4 100644 --- a/include/gemmi/math.hpp +++ b/include/gemmi/math.hpp @@ -13,27 +13,53 @@ namespace gemmi { +/// @brief Return π to double precision +/// @return pi constexpr double pi() { return 3.1415926535897932384626433832795029; } -// The value used in converting between energy[eV] and wavelength[Angstrom]. -// $ units -d15 'h * c / eV / angstrom' +/// @brief Planck constant × speed of light in eV·Å +/// @details The value used in converting between energy[eV] and wavelength[Angstrom]. +/// @return hc constant constexpr double hc() { return 12398.4197386209; } -// The Bohr radius (a0) in Angstroms. +/// @brief Bohr radius a₀ in Angstroms +/// @return Bohr radius constexpr double bohrradius() { return 0.529177210903; } -// for Mott-Bethe factor +/// @brief Mott-Bethe constant: 1/(2π²a₀) used in electron scattering factor +/// @details Constant used in Mott-Bethe electron scattering factor calculations +/// @return Mott-Bethe constant constexpr double mott_bethe_const() { return 1. / (2 * pi() * pi() * bohrradius()); } -// Used in conversion of ADPs (atomic displacement parameters). +/// @brief Factor converting U_iso (Ų) to B-factor: 8π² +/// @details Used in conversion of ADPs (atomic displacement parameters) +/// @return conversion factor constexpr double u_to_b() { return 8 * pi() * pi(); } +/// @brief Convert radians to degrees +/// @param angle angle in radians +/// @return angle in degrees constexpr double deg(double angle) { return 180.0 / pi() * angle; } + +/// @brief Convert degrees to radians +/// @param angle angle in degrees +/// @return angle in radians constexpr double rad(double angle) { return pi() / 180.0 * angle; } +/// @brief Compute x² +/// @param x value (float) +/// @return x² constexpr float sq(float x) { return x * x; } + +/// @brief Compute x² +/// @param x value (double) +/// @return x² constexpr double sq(double x) { return x * x; } +/// @brief Compute ln(cosh(x)) stably +/// @details Avoids overflow for large x; cosh(x) would overflow for x > 710.5 +/// @param x argument +/// @return ln(cosh(x)) inline double log_cosh(double x) { // cosh(x) would overflow for x > 710.5, so we calculate: // ln(cosh(x)) = ln(e^x + e^-x) - ln(2) = ln(e^x * (1 + e^-2x)) - ln(2) @@ -41,8 +67,17 @@ inline double log_cosh(double x) { return x - std::log(2) + std::log1p(std::exp(-2 * x)); } +/// @brief Round double to nearest int +/// @param d value to round +/// @return rounded integer inline int iround(double d) { return static_cast(std::round(d)); } +/// @brief Absolute angular difference +/// @details Computes absolute difference modulo full range, in range [0, full/2] +/// @param a first angle +/// @param b second angle +/// @param full full angular range (default 360.0) +/// @return absolute difference inline double angle_abs_diff(double a, double b, double full=360.0) { double d = std::fabs(a - b); if (d > full) @@ -50,19 +85,35 @@ inline double angle_abs_diff(double a, double b, double full=360.0) { return std::min(d, full - d); } -// similar to C++17 std::clamp() +/// @brief Clamp value to range [lo, hi] +/// @details Similar to C++17 std::clamp() +/// @tparam T scalar type +/// @param v value to clamp +/// @param lo lower bound +/// @param hi upper bound +/// @return clamped value template constexpr T clamp(T v, T lo, T hi) { return std::min(std::max(v, lo), hi); } +/// @brief 3D vector template +/// @details Typedefs: Vec3 (double), Vec3f (float) +/// @tparam Real scalar type (double or float) template struct Vec3_ { Real x, y, z; + /// @brief Default constructor (zero vector) Vec3_() : x(0), y(0), z(0) {} + /// @brief Construct from coordinates Vec3_(Real x_, Real y_, Real z_) : x(x_), y(y_), z(z_) {} + /// @brief Construct from integer array [3] explicit Vec3_(std::array h) : x(h[0]), y(h[1]), z(h[2]) {} + /// @brief Access component by index + /// @param i index (0, 1, or 2) + /// @return reference to component x, y, or z + /// @throws std::out_of_range if i not in 0..2 Real& at(int i) { switch (i) { case 0: return x; @@ -71,55 +122,133 @@ struct Vec3_ { default: throw std::out_of_range("Vec3 index must be 0, 1 or 2."); } } + /// @brief Access component by index (const) + /// @param i index (0, 1, or 2) + /// @return value of component x, y, or z + /// @throws std::out_of_range if i not in 0..2 Real at(int i) const { return const_cast(this)->at(i); } + /// @brief Negate vector (unary minus) + /// @return -this Vec3_ operator-() const { return {-x, -y, -z}; } + /// @brief Subtract vectors + /// @param o other vector + /// @return this - o Vec3_ operator-(const Vec3_& o) const { return {x-o.x, y-o.y, z-o.z}; } + /// @brief Add vectors + /// @param o other vector + /// @return this + o Vec3_ operator+(const Vec3_& o) const { return {x+o.x, y+o.y, z+o.z}; } + /// @brief Multiply by scalar + /// @param d scalar + /// @return vector scaled by d Vec3_ operator*(Real d) const { return {x*d, y*d, z*d}; } + /// @brief Divide by scalar + /// @param d scalar + /// @return vector scaled by 1/d Vec3_ operator/(Real d) const { return *this * (1.0/d); } + /// @brief Subtract-assign + /// @param o other vector + /// @return reference to this Vec3_& operator-=(const Vec3_& o) { *this = *this - o; return *this; } + /// @brief Add-assign + /// @param o other vector + /// @return reference to this Vec3_& operator+=(const Vec3_& o) { *this = *this + o; return *this; } + /// @brief Multiply-assign by scalar + /// @param d scalar + /// @return reference to this Vec3_& operator*=(Real d) { *this = *this * d; return *this; } + /// @brief Divide-assign by scalar + /// @param d scalar + /// @return reference to this Vec3_& operator/=(Real d) { return operator*=(1.0/d); } + /// @brief Return negated copy + /// @return -this Vec3_ negated() const { return {-x, -y, -z}; } + /// @brief Dot product + /// @param o other vector + /// @return this · o Real dot(const Vec3_& o) const { return x*o.x + y*o.y + z*o.z; } + /// @brief Cross product + /// @param o other vector + /// @return this × o Vec3_ cross(const Vec3_& o) const { return {y*o.z - z*o.y, z*o.x - x*o.z, x*o.y - y*o.x}; } + /// @brief Squared Euclidean length + /// @return ||this||² Real length_sq() const { return x * x + y * y + z * z; } + /// @brief Euclidean length + /// @return ||this|| Real length() const { return std::sqrt(length_sq()); } + /// @brief Return vector with same direction but given magnitude + /// @param m desired magnitude + /// @return scaled vector Vec3_ changed_magnitude(Real m) const { return operator*(m / length()); } + /// @brief Return unit vector + /// @return normalized vector Vec3_ normalized() const { return changed_magnitude(1.0); } + /// @brief Squared distance to another vector + /// @param o other vector + /// @return ||this - o||² Real dist_sq(const Vec3_& o) const { return (*this - o).length_sq(); } + /// @brief Euclidean distance to another vector + /// @param o other vector + /// @return ||this - o|| Real dist(const Vec3_& o) const { return std::sqrt(dist_sq(o)); } + /// @brief Cosine of angle between vectors + /// @param o other vector + /// @return cos(angle) Real cos_angle(const Vec3_& o) const { return dot(o) / std::sqrt(length_sq() * o.length_sq()); } + /// @brief Angle between vectors in radians + /// @param o other vector + /// @return angle in [0, π] Real angle(const Vec3_& o) const { return std::acos(clamp(cos_angle(o), -1., 1.)); } + /// @brief Component-wise approximate equality + /// @param o other vector + /// @param epsilon tolerance + /// @return true if all components differ by at most epsilon bool approx(const Vec3_& o, Real epsilon) const { return std::fabs(x - o.x) <= epsilon && std::fabs(y - o.y) <= epsilon && std::fabs(z - o.z) <= epsilon; } + /// @brief Return true if any component is NaN + /// @return true if has NaN bool has_nan() const { return std::isnan(x) || std::isnan(y) || std::isnan(z); } + /// @brief Return true if all components are finite + /// @return true if finite bool is_finite() const { return std::isfinite(x) && std::isfinite(y) && std::isfinite(z); } }; +/// @brief 3D vector of doubles using Vec3 = Vec3_; + +/// @brief 3D vector of floats using Vec3f = Vec3_; +/// @brief Scalar multiplication (scalar * vector) +/// @param d scalar +/// @param v vector +/// @return scaled vector inline Vec3 operator*(double d, const Vec3& v) { return v * d; } -/// Rodrigues' rotation formula: rotate vector v about given axis of rotation -/// (which must be a unit vector) by given angle (in radians). +/// @brief Rotate vector about an axis using Rodrigues' rotation formula +/// @details Rotate vector v about given axis (which must be a unit vector) by angle theta +/// @param v vector to rotate +/// @param axis unit vector (axis of rotation) +/// @param theta angle in radians +/// @return rotated vector inline Vec3 rotate_about_axis(const Vec3& v, const Vec3& axis, double theta) { double sin_theta = std::sin(theta); double cos_theta = std::cos(theta); @@ -127,6 +256,8 @@ inline Vec3 rotate_about_axis(const Vec3& v, const Vec3& axis, double theta) { axis * (axis.dot(v) * (1 - cos_theta)); } +/// @brief 3×3 matrix of doubles +/// @details Row-major storage struct Mat33 { double a[3][3] = { {1.,0.,0.}, {0.,1.,0.}, {0.,0.,1.} }; @@ -135,55 +266,100 @@ struct Mat33 { const row_t& operator[](int i) const { return a[i]; } row_t& operator[](int i) { return a[i]; } + /// @brief Default constructor (identity matrix) Mat33() = default; + /// @brief Fill constructor + /// @param d value to fill the matrix explicit Mat33(double d) : a{{d, d, d}, {d, d, d}, {d, d, d}} {} + /// @brief Element constructor (row-major) + /// @param a1 first row, column 0 + /// @param a2 first row, column 1 + /// @param a3 first row, column 2 + /// @param b1 second row, column 0 + /// @param b2 second row, column 1 + /// @param b3 second row, column 2 + /// @param c1 third row, column 0 + /// @param c2 third row, column 1 + /// @param c3 third row, column 2 Mat33(double a1, double a2, double a3, double b1, double b2, double b3, double c1, double c2, double c3) : a{{a1, a2, a3}, {b1, b2, b3}, {c1, c2, c3}} {} + /// @brief Construct matrix from column vectors + /// @param c1 first column + /// @param c2 second column + /// @param c3 third column + /// @return matrix with given columns static Mat33 from_columns(const Vec3& c1, const Vec3& c2, const Vec3& c3) { return Mat33(c1.x, c2.x, c3.x, c1.y, c2.y, c3.y, c1.z, c2.z, c3.z); } + /// @brief Get row as a vector + /// @param i row index (0, 1, or 2) + /// @return row vector + /// @throws std::out_of_range if i not in 0..2 Vec3 row_copy(int i) const { if (i < 0 || i > 2) throw std::out_of_range("Mat33 row index must be 0, 1 or 2."); return Vec3(a[i][0], a[i][1], a[i][2]); } + /// @brief Get column as a vector + /// @param i column index (0, 1, or 2) + /// @return column vector + /// @throws std::out_of_range if i not in 0..2 Vec3 column_copy(int i) const { if (i < 0 || i > 2) throw std::out_of_range("Mat33 column index must be 0, 1 or 2."); return Vec3(a[0][i], a[1][i], a[2][i]); } + /// @brief Matrix addition + /// @param b other matrix + /// @return this + b Mat33 operator+(const Mat33& b) const { return Mat33(a[0][0] + b[0][0], a[0][1] + b[0][1], a[0][2] + b[0][2], a[1][0] + b[1][0], a[1][1] + b[1][1], a[1][2] + b[1][2], a[2][0] + b[2][0], a[2][1] + b[2][1], a[2][2] + b[2][2]); } + /// @brief Matrix subtraction + /// @param b other matrix + /// @return this - b Mat33 operator-(const Mat33& b) const { return Mat33(a[0][0] - b[0][0], a[0][1] - b[0][1], a[0][2] - b[0][2], a[1][0] - b[1][0], a[1][1] - b[1][1], a[1][2] - b[1][2], a[2][0] - b[2][0], a[2][1] - b[2][1], a[2][2] - b[2][2]); } + /// @brief Matrix-vector product + /// @param p vector + /// @return this * p Vec3 multiply(const Vec3& p) const { return {a[0][0] * p.x + a[0][1] * p.y + a[0][2] * p.z, a[1][0] * p.x + a[1][1] * p.y + a[1][2] * p.z, a[2][0] * p.x + a[2][1] * p.y + a[2][2] * p.z}; } + /// @brief Left multiplication (row vector times matrix) + /// @details Computes v^T * this + /// @param p vector + /// @return p * this (treating p as row vector) Vec3 left_multiply(const Vec3& p) const { return {a[0][0] * p.x + a[1][0] * p.y + a[2][0] * p.z, a[0][1] * p.x + a[1][1] * p.y + a[2][1] * p.z, a[0][2] * p.x + a[1][2] * p.y + a[2][2] * p.z}; } - // p has elements from the main diagonal of a 3x3 diagonal matrix + /// @brief Right multiply by diagonal matrix + /// @details Multiplies each column i by p[i] + /// @param p diagonal matrix elements + /// @return this * diag(p) Mat33 multiply_by_diagonal(const Vec3& p) const { return Mat33(a[0][0] * p.x, a[0][1] * p.y, a[0][2] * p.z, a[1][0] * p.x, a[1][1] * p.y, a[1][2] * p.z, a[2][0] * p.x, a[2][1] * p.y, a[2][2] * p.z); } + /// @brief Matrix product + /// @param b other matrix + /// @return this * b Mat33 multiply(const Mat33& b) const { Mat33 r; for (int i = 0; i != 3; ++i) @@ -191,13 +367,21 @@ struct Mat33 { r[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j] + a[i][2] * b[2][j]; return r; } + /// @brief Transpose + /// @return transposed matrix Mat33 transpose() const { return Mat33(a[0][0], a[1][0], a[2][0], a[0][1], a[1][1], a[2][1], a[0][2], a[1][2], a[2][2]); } + /// @brief Trace (sum of diagonal) + /// @return trace double trace() const { return a[0][0] + a[1][1] + a[2][2]; } + /// @brief Element-wise approximate equality + /// @param other other matrix + /// @param epsilon tolerance + /// @return true if all elements differ by at most epsilon bool approx(const Mat33& other, double epsilon) const { for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) @@ -205,6 +389,8 @@ struct Mat33 { return false; return true; } + /// @brief Check for NaN + /// @return true if any element is NaN bool has_nan() const { for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) @@ -213,11 +399,15 @@ struct Mat33 { return false; } + /// @brief Determinant + /// @return det(this) double determinant() const { return a[0][0] * (a[1][1]*a[2][2] - a[2][1]*a[1][2]) + a[0][1] * (a[1][2]*a[2][0] - a[2][2]*a[1][0]) + a[0][2] * (a[1][0]*a[2][1] - a[2][0]*a[1][1]); } + /// @brief Inverse + /// @return inverse matrix Mat33 inverse() const { Mat33 inv; double inv_det = 1.0 / determinant(); @@ -232,26 +422,41 @@ struct Mat33 { inv[2][2] = inv_det * (a[0][0] * a[1][1] - a[1][0] * a[0][1]); return inv; } + /// @brief Check if matrix is identity + /// @return true if this is the identity matrix bool is_identity() const { return a[0][0] == 1 && a[0][1] == 0 && a[0][2] == 0 && a[1][0] == 0 && a[1][1] == 1 && a[1][2] == 0 && a[2][0] == 0 && a[2][1] == 0 && a[2][2] == 1; } + /// @brief Dot product of columns + /// @param i first column index + /// @param j second column index + /// @return column[i] · column[j] double column_dot(int i, int j) const { return a[0][i] * a[0][j] + a[1][i] * a[1][j] + a[2][i] * a[2][j]; } + /// @brief Check if matrix is upper triangular + /// @return true if lower triangle is zero bool is_upper_triangular() const { return a[1][0] == 0 && a[2][0] == 0 && a[2][1] == 0; } }; +/// @brief Upper-triangular 3×3 matrix +/// @details Stores 6 independent elements (no zero lower triangle) struct UpperTriangularMat33 { double a11 = 0, a12 = 0, a13 = 0; double a22 = 0, a23 = 0; double a33 = 0; + /// @brief Default constructor UpperTriangularMat33() = default; + /// @brief Assign from full matrix + /// @details Sets all elements to NaN if the matrix is not upper-triangular + /// @param m full matrix + /// @return reference to this UpperTriangularMat33& operator=(const Mat33& m) { if (m.is_upper_triangular()) { a11 = m[0][0]; @@ -265,6 +470,9 @@ struct UpperTriangularMat33 { } return *this; } + /// @brief Matrix-vector product + /// @param p vector + /// @return this * p Vec3 multiply(const Vec3& p) const { return {a11 * p.x + a12 * p.y + a13 * p.z, a22 * p.y + a23 * p.z, @@ -272,71 +480,118 @@ struct UpperTriangularMat33 { } }; -// Symmetric matrix 3x3. Used primarily for an ADP tensor. +/// @brief Symmetric 3×3 matrix +/// @details Used primarily for ADP tensor; stores 6 independent elements +/// Elements in PDB ANISOU order: u11, u22, u33, u12, u13, u23 +/// @tparam T scalar type template struct SMat33 { T u11, u22, u33, u12, u13, u23; - // The PDB ANISOU record has the above order, but in a different context - // (such as metric tensor) the order of Voigt notation may be preferred. + /// @brief Return elements in PDB ANISOU order + /// @details Order: u11, u22, u33, u12, u13, u23 + /// @return array of 6 elements std::array elements_pdb() const { return {{u11, u22, u33, u12, u13, u23}}; } + /// @brief Return elements in Voigt notation order + /// @details Order: u11, u22, u33, u23, u13, u12 + /// @return array of 6 elements std::array elements_voigt() const { return {{u11, u22, u33, u23, u13, u12}}; } + /// @brief Convert to full 3×3 matrix + /// @return full symmetric matrix Mat33 as_mat33() const { return Mat33(u11, u12, u13, u12, u22, u23, u13, u23, u33); } - // the arguments i and j must be in [0,2], i.e. 0, 1 or 2. + /// @brief Get reference to element [i,j] + /// @details i and j must be in [0,2]; no bounds checking + /// @param i row index + /// @param j column index + /// @return reference to element [i,j] T& unchecked_ref(int i, int j) { T* ptrs[9] = {&u11, &u12, &u13, &u12, &u22, &u23, &u13, &u23, &u33}; return *ptrs[3 * i + j]; } + /// @brief Trace (sum of diagonal) + /// @return trace T trace() const { return u11 + u22 + u33; } + /// @brief Check if nonzero + /// @return true if trace is nonzero bool nonzero() const { return trace() != 0; } + /// @brief Check if all elements are zero + /// @return true if all elements are zero bool all_zero() const { return u11 == 0 && u22 == 0 && u33 == 0 && u12 == 0 && u13 == 0 && u23 == 0; } + /// @brief Multiply all elements in-place by scalar + /// @param s scalar void scale(T s) const { u11 *= s; u22 *= s; u33 *= s; u12 *= s; u13 *= s; u23 *= s; } + /// @brief Return new matrix with all elements scaled by scalar + /// @tparam Real output scalar type + /// @param s scalar + /// @return scaled matrix template SMat33 scaled(Real s) const { return SMat33{u11*s, u22*s, u33*s, u12*s, u13*s, u23*s}; } - // returns U + kI + /// @brief Return U + kI + /// @details Add k to diagonal elements + /// @param k scalar to add to diagonal + /// @return new matrix SMat33 added_kI(T k) const { return {u11+k, u22+k, u33+k, u12, u13, u23}; } - // returns squared norm r^T U r where U is this matrix and vector r is arg + /// @brief Compute scalar r^T U r + /// @details Compute the scalar quadratic form for vector r + /// @param r vector + /// @return r^T U r template auto r_u_r(const Vec3_& r) const -> decltype(r.x+u11) { return r.x * r.x * u11 + r.y * r.y * u22 + r.z * r.z * u33 + 2 * (r.x * r.y * u12 + r.x * r.z * u13 + r.y * r.z * u23); } + /// @brief Compute scalar r^T U r for integer vector + /// @param h integer vector [3] + /// @return r^T U r double r_u_r(const std::array& h) const { // it's faster to first convert ints to doubles (Vec3) return r_u_r(Vec3(h)); } + /// @brief Matrix-vector product U*p + /// @param p vector + /// @return U*p Vec3 multiply(const Vec3& p) const { return {u11 * p.x + u12 * p.y + u13 * p.z, u12 * p.x + u22 * p.y + u23 * p.z, u13 * p.x + u23 * p.y + u33 * p.z}; } + /// @brief Matrix subtraction + /// @param o other matrix + /// @return this - o SMat33 operator-(const SMat33& o) const { return {u11-o.u11, u22-o.u22, u33-o.u33, u12-o.u12, u13-o.u13, u23-o.u23}; } + /// @brief Matrix addition + /// @param o other matrix + /// @return this + o SMat33 operator+(const SMat33& o) const { return {u11+o.u11, u22+o.u22, u33+o.u33, u12+o.u12, u13+o.u13, u23+o.u23}; } - // return M U M^T + /// @brief Transform by similarity: M U M^T + /// @details Compute the conjugate: M*U*M^T + /// @tparam Real output scalar type + /// @param m transformation matrix + /// @return M U M^T template SMat33 transformed_by(const Mat33& m) const { // slightly faster than m.multiply(as_mat33()).multiply(m.transpose()); @@ -350,12 +605,18 @@ template struct SMat33 { elem(0, 1), elem(0, 2), elem(1, 2)}; } + /// @brief Determinant + /// @return det(this) T determinant() const { return u11 * (u22*u33 - u23*u23) + u12 * (u23*u13 - u33*u12) + u13 * (u12*u23 - u13*u22); } + /// @brief Inverse matrix + /// @details Internal helper with pre-computed determinant + /// @param det determinant + /// @return inverse matrix SMat33 inverse_(T det) const { SMat33 inv; T inv_det = 1.0f / det; @@ -367,12 +628,16 @@ template struct SMat33 { inv.u23 = inv_det * (u12 * u13 - u11 * u23); return inv; } + /// @brief Inverse + /// @return inverse matrix SMat33 inverse() const { return inverse_(determinant()); } - /// Based on https://en.wikipedia.org/wiki/Eigenvalue_algorithm - /// To calculate both eigenvalues and eigenvectors use eig3.hpp + /// @brief Calculate eigenvalues + /// @details Based on https://en.wikipedia.org/wiki/Eigenvalue_algorithm + /// For eigenvalues and eigenvectors use eig3.hpp + /// @return array of 3 eigenvalues std::array calculate_eigenvalues() const { double p1 = u12*u12 + u13*u13 + u23*u23; if (p1 == 0) @@ -393,39 +658,63 @@ template struct SMat33 { } }; +/// @brief Affine transformation: y = mat * x + vec +/// @details Represents a linear transformation followed by translation struct Transform { Mat33 mat; Vec3 vec; + /// @brief Inverse transformation + /// @return inverse transform Transform inverse() const { Mat33 minv = mat.inverse(); return {minv, minv.multiply(vec).negated()}; } + /// @brief Apply transformation to vector + /// @param x vector + /// @return mat * x + vec Vec3 apply(const Vec3& x) const { return mat.multiply(x) + vec; } + /// @brief Compose transforms + /// @details Apply this transform followed by b + /// @param b other transform + /// @return combined transform: b ∘ this Transform combine(const Transform& b) const { return {mat.multiply(b.mat), vec + mat.multiply(b.vec)}; } + /// @brief Check if identity transform + /// @return true if this is the identity bool is_identity() const { return mat.is_identity() && vec.x == 0. && vec.y == 0. && vec.z == 0.; } + /// @brief Reset to identity transform void set_identity() { mat = Mat33(); vec = Vec3(); } + /// @brief Check for NaN + /// @return true if any component is NaN bool has_nan() const { return mat.has_nan() || vec.has_nan(); } + /// @brief Approximate equality + /// @param o other transform + /// @param epsilon tolerance + /// @return true if components match within epsilon bool approx(const Transform& o, double epsilon) const { return mat.approx(o.mat, epsilon) && vec.approx(o.vec, epsilon); } }; +/// @brief Axis-aligned bounding box +/// @tparam Pos position type (e.g., Vec3) template struct Box { Pos minimum = Pos(INFINITY, INFINITY, INFINITY); Pos maximum = Pos(-INFINITY, -INFINITY, -INFINITY); + /// @brief Expand box to include point + /// @param p point to include void extend(const Pos& p) { if (p.x < minimum.x) minimum.x = p.x; if (p.y < minimum.y) minimum.y = p.y; @@ -434,8 +723,14 @@ struct Box { if (p.y > maximum.y) maximum.y = p.y; if (p.z > maximum.z) maximum.z = p.z; } + /// @brief Get size of box + /// @return box size as position offset Pos get_size() const { return maximum - minimum; } + /// @brief Expand box by margins on each side + /// @param p margin offset for each direction void add_margins(const Pos& p) { minimum -= p; maximum += p; } + /// @brief Expand box by uniform margin + /// @param m margin in all directions void add_margin(double m) { add_margins(Pos(m, m, m)); } }; diff --git a/include/gemmi/neutron92.hpp b/include/gemmi/neutron92.hpp index 67b96d36..b65769f2 100644 --- a/include/gemmi/neutron92.hpp +++ b/include/gemmi/neutron92.hpp @@ -20,13 +20,25 @@ namespace gemmi { #pragma GCC diagnostic ignored "-Wfloat-conversion" #endif +/// @brief Placeholder scattering type where scattering is a single real constant +/// (zero Gaussians plus one constant term). +/// Used for neutron coherent scattering. template struct ZeroCoef { using Coef = GaussianCoef<0, 1, Real>; static Real data[121]; + /// @brief Get scattering length for element. + /// @param el element + /// @return scattering length static Real& get_(El el) { return data[static_cast(el)]; } + /// @brief Check if scattering length is available. + /// @param el element + /// @return true if available static bool has(El el) { return static_cast(el) < sizeof(data) / sizeof(Real); } + /// @brief Get coefficient for element as constant Gaussian. + /// @param el element + /// @return coefficient static Coef get(El el, signed char /*charge*/=0, int /*serial*/=0) { return Coef{{get_(el)}}; } }; @@ -34,13 +46,29 @@ struct ZeroCoef { template Real ZeroCoef::data[121] = { /*X*/ 0.0 }; +/// @brief Neutron coherent scattering lengths from Neutron News 3(3) 1992. +/// @details Real part of the bound coherent scattering length in femtometers (fm). +/// Data matches cctbx/eltbx/neutron.h and the NIST neutron scattering lengths table. +/// @tparam Real floating-point type +/// @par References +/// Sears, V.F. (1992). Neutron scattering lengths and cross sections. +/// Neutron News 3(3), 26–37. https://doi.org/10.1080/10448639208218770 template struct Neutron92 { using Coef = GaussianCoef<0, 1, Real>; static Real data[121]; + /// @brief Get scattering length for element. + /// @param el element + /// @return scattering length static Real& get_(El el) { return data[static_cast(el)]; } + /// @brief Check if non-zero scattering length is available. + /// @param el element + /// @return true if non-zero static bool has(El el) { return get_(el) != 0; } + /// @brief Get coefficient for element as constant Gaussian. + /// @param el element + /// @return coefficient static Coef get(El el, signed char /*charge*/=0, int /*serial*/=0) { return Coef{{get_(el)}}; } }; diff --git a/include/gemmi/qcp.hpp b/include/gemmi/qcp.hpp index 7e0be7a7..cccf108c 100644 --- a/include/gemmi/qcp.hpp +++ b/include/gemmi/qcp.hpp @@ -83,14 +83,33 @@ namespace gemmi { +/// @brief Result of quaternion-based structural superposition. +/// Stores the optimal rotation, translation, and RMSD between two point sets. struct SupResult { + /// @brief Root-mean-square deviation between the superposed structures. double rmsd; + /// @brief Number of atom pairs used in the calculation. size_t count; - Position center1, center2; + /// @brief Centroid of the first structure. + Position center1; + /// @brief Centroid of the second structure. + Position center2; + /// @brief Rotation and translation to map second structure onto first. Transform transform; }; -// helper function +/// @brief Compute covariance matrix H and sum G for QCP algorithm. +/// @details Helper function for quaternion-based superposition. +/// Computes the weighted covariance (or "scatter") matrix H = sum_i w_i * (p1_i - ctr1) * (p2_i - ctr2)^T, +/// and the sum G1 + G2 = sum_i w_i * |p1_i - ctr1|^2 + |p2_i - ctr2|^2. +/// @param mat Output 3x3 covariance matrix (accumulated into). +/// @param pos1 Array of positions in first structure. +/// @param ctr1 Centroid of first structure. +/// @param pos2 Array of positions in second structure. +/// @param ctr2 Centroid of second structure. +/// @param len Number of position pairs. +/// @param weight Optional array of weights; if null, all weights are 1.0. +/// @return Sum of weighted squared distances from centroids (G1 + G2). inline double qcp_inner_product(Mat33& mat, const Position* pos1, const Position& ctr1, const Position* pos2, const Position& ctr2, @@ -116,7 +135,21 @@ inline double qcp_inner_product(Mat33& mat, return (G1 + G2) * 0.5; } -// helper function +/// @brief Compute optimal rotation and RMSD using quaternion characteristic polynomial. +/// @details +/// Fast algorithm (Theobald 2005, Liu et al. 2009) for computing the optimal +/// rotation that minimizes RMSD between two point clouds. Uses eigenvalue +/// computation via Newton-Raphson on a characteristic polynomial and +/// quaternion-to-rotation-matrix conversion. +/// @param rot Output 3x3 rotation matrix; set to identity if computation fails. +/// May be null if only RMSD is needed (set min_score > 0). +/// @param A Covariance matrix from qcp_inner_product(). +/// @param rmsd Output: root-mean-square deviation after optimal rotation. +/// @param E0 Sum of weighted squared distances from centroids. +/// @param len Sum of weights (or number of atoms). +/// @param min_score If >= 0 and RMSD < min_score, skip rotation computation and return -1. +/// @return Status: 1 if rotation computed successfully, 0 if singular (identity returned), +/// -1 if min_score criterion not met or rot is null. inline int fast_calc_rmsd_and_rotation(Mat33* rot, const Mat33& A, double *rmsd, double E0, double len, double min_score) { const double evecprec = 1e-6; @@ -280,7 +313,11 @@ inline int fast_calc_rmsd_and_rotation(Mat33* rot, const Mat33& A, double *rmsd, return 1; } -// helper function +/// @brief Calculate the weighted centroid of a point set. +/// @param pos Array of positions. +/// @param len Number of positions. +/// @param weight Optional array of weights; if null, all weights are 1.0. +/// @return Weighted centroid position. inline Position qcp_calculate_center(const Position* pos, size_t len, const double *weight) { double wsum = 0.0; Position ctr; @@ -292,8 +329,24 @@ inline Position qcp_calculate_center(const Position* pos, size_t len, const doub return ctr / wsum; } -// Calculate superposition of pos2 onto pos1 -- pos2 is movable. -// Does not perform the superposition, only returns the operation to be used. +/// @brief Calculate the optimal superposition of one point set onto another using QCP. +/// @details +/// Finds the rotation and translation that best align pos2 onto pos1, +/// minimizing the weighted root-mean-square deviation. Does not modify positions; +/// to apply the transformation, use transform.apply() on pos2. +/// @param pos1 Array of reference positions (fixed). +/// @param pos2 Array of positions to be superposed (movable). +/// @param len Number of position pairs. +/// @param weight Optional array of weights; if null, all weights are 1.0. +/// @return SupResult containing RMSD, rotation matrix, translation, and centroids. +/// @par References +/// Theobald, D.L. (2005). Rapid calculation of RMSD using a quaternion-based +/// characteristic polynomial. Acta Cryst. A61, 478–480. +/// https://doi.org/10.1107/S0108767305015266 +/// +/// Liu, P., Agrafiotis, D.K. & Theobald, D.L. (2010). Fast determination of the +/// optimal rotational matrix for macromolecular superpositions. +/// J. Comput. Chem. 31, 1561–1563. https://doi.org/10.1002/jcc.21439 inline SupResult superpose_positions(const Position* pos1, const Position* pos2, size_t len, const double* weight) { SupResult result; @@ -326,7 +379,16 @@ inline SupResult superpose_positions(const Position* pos1, const Position* pos2, return result; } -// Similar to superpose_positions(), but calculates RMSD only. +/// @brief Calculate the RMSD between two point sets after optimal superposition (fast). +/// @details +/// Similar to superpose_positions() but computes only the RMSD without +/// the rotation matrix or translation vector, making it faster for cases +/// where only the quality of fit is needed. +/// @param pos1 Array of reference positions. +/// @param pos2 Array of positions to be superposed. +/// @param len Number of position pairs. +/// @param weight Optional array of weights; if null, all weights are 1.0. +/// @return Root-mean-square deviation after optimal superposition. inline double calculate_rmsd_of_superposed_positions(const Position* pos1, const Position* pos2, size_t len, const double* weight) { diff --git a/include/gemmi/seqalign.hpp b/include/gemmi/seqalign.hpp index 758df8fb..28a695dd 100644 --- a/include/gemmi/seqalign.hpp +++ b/include/gemmi/seqalign.hpp @@ -17,26 +17,40 @@ namespace gemmi { +/// @brief Scoring parameters for sequence alignment. struct AlignmentScoring { + /// @brief Score for a match int match = 1; + /// @brief Penalty for a mismatch int mismatch = -1; - int gapo = -1; // gap opening penalty - int gape = -1; // gap extension penalty - // In a polymer in model, coordinates are used to determine expected gaps. - int good_gapo = 0; // gap opening in expected place in a polymer - int bad_gapo = -2; // gap opening that was not predicted + /// @brief Gap opening penalty + int gapo = -1; + /// @brief Gap extension penalty + int gape = -1; + /// @brief Gap opening penalty in expected place in a polymer + int good_gapo = 0; + /// @brief Gap opening penalty for unexpected gaps in a polymer + int bad_gapo = -2; + /// @brief Substitution score matrix (square matrix, row-major order) std::vector score_matrix; + /// @brief Labels for score matrix rows/columns (e.g. amino acid codes) std::vector matrix_encoding; + /// @brief Get simple default scoring (match=1, mismatch=-1, gaps=-1). + /// @return Pointer to static simple scoring instance static const AlignmentScoring* simple() { static const AlignmentScoring s{}; return &s; } - // Scoring for alignment of partially-modelled polymer to its full sequence + /// @brief Get scoring for alignment of partially-modelled polymer to full sequence. + /// @details Uses high penalties to penalize mismatches but allow expected gaps. + /// @return Pointer to static partial model scoring instance static const AlignmentScoring* partial_model() { static const AlignmentScoring s = { 100, -10000, -10000, -1, 0, -200, {}, {} }; return &s; } + /// @brief Get BLOSUM-62 scoring matrix (standard for protein alignment). + /// @return Pointer to static BLOSUM-62 scoring instance static const AlignmentScoring* blosum62() { // BLAST uses BLOSUM-62 with gap cost (10,1) static const AlignmentScoring s = { @@ -68,17 +82,28 @@ struct AlignmentScoring { } }; +/// @brief Result of a pairwise sequence alignment. struct AlignmentResult { + /// @brief Single element in CIGAR string. struct Item { + /// @brief Packed value: bits 0-3 = operation, bits 4+ = length std::uint32_t value; + /// @brief Get CIGAR operation: 'M'=match/mismatch, 'I'=insertion, 'D'=deletion char op() const { return "MID"[value & 0xf]; } + /// @brief Get length of this CIGAR operation std::uint32_t len() const { return value >> 4; } }; + /// @brief Alignment score int score = 0; + /// @brief Number of matching positions int match_count = 0; + /// @brief Visual representation of matches ('|' for match, '.' for mismatch, ' ' for gap) std::string match_string; + /// @brief CIGAR string representation as vector of operations std::vector cigar; + /// @brief Get CIGAR string representation. + /// @return CIGAR string (e.g., "5M1D3M") std::string cigar_str() const { std::string s; for (Item item : cigar) { @@ -88,7 +113,9 @@ struct AlignmentResult { return s; } - // 1=query, 2=target, other=shorter + /// @brief Get input sequence length for alignment. + /// @param which 1=query, 2=target, other=shorter of the two + /// @return Length of specified input sequence std::size_t input_length(int which) const { std::size_t counters[3] = {0, 0, 0}; for (Item item : cigar) @@ -97,14 +124,22 @@ struct AlignmentResult { return counters[0] + counters[which]; return counters[0] + std::min(counters[1], counters[2]); } + /// @brief Calculate sequence identity percentage. + /// @param which 1=query, 2=target, 0=shorter (default) + /// @return Percent identity (0-100) double calculate_identity(int which=0) const { return 100. * match_count / input_length(which); } - // In the backtrack matrix, value p[] has the following structure: - // bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F - // bit 3/0x08: 1 if a continuation on the E state - // bit 4/0x10: 1 if a continuation on the F state + /// @brief Convert backtrack matrix to CIGAR string. + /// @details Reconstructs the alignment path from the dynamic programming matrix. + /// In the backtrack matrix, value p[] has the structure: + /// - bits 0-2: which type gets the max (0 for H, 1 for E, 2 for F) + /// - bit 3/0x08: 1 if a continuation on the E state + /// - bit 4/0x10: 1 if a continuation on the F state + /// @param p Backtrack matrix (row-major) + /// @param i Number of target positions + /// @param j Number of query positions void backtrack_to_cigar(const std::uint8_t *p, int i, int j) { i--; int j0 = j--; @@ -135,6 +170,10 @@ struct AlignmentResult { } + /// @brief Count matching positions and generate match string. + /// @details Compares aligned sequences and updates match_count and match_string. + /// @param query Encoded query sequence + /// @param target Encoded target sequence void count_matches(const std::vector& query, const std::vector& target) { match_count = 0; @@ -157,6 +196,10 @@ struct AlignmentResult { } } + /// @brief Insert gap characters into a sequence based on CIGAR string. + /// @param s Original sequence string + /// @param which 1=show query gaps, 2=show target gaps, other=show both + /// @return Sequence with gaps inserted as '-' characters std::string add_gaps(const std::string& s, unsigned which) const { std::string out; size_t pos = 0; @@ -168,6 +211,10 @@ struct AlignmentResult { return out; } + /// @brief Get formatted alignment string with query, matches, and target. + /// @param a Query sequence + /// @param b Target sequence + /// @return Multi-line formatted alignment std::string formatted(const std::string& a, const std::string& b) const { std::string r; r.reserve((match_string.size() + 1) * 3); @@ -180,7 +227,9 @@ struct AlignmentResult { return r; } - // op: 0=match/mismatch, 1=insertion, 2=deletion + /// @brief Add or extend a CIGAR operation; consecutive ops of the same type are merged. + /// @param op Operation: 0=match/mismatch, 1=insertion, 2=deletion + /// @param len Length of operation void push_cigar(std::uint32_t op, int len) { if (cigar.empty() || op != (cigar.back().value & 0xf)) cigar.push_back({len<<4 | op}); @@ -189,8 +238,19 @@ struct AlignmentResult { } }; -/// All values in query and target must be less then m. -/// target_gapo, if set, has gap opening penalties at specific positions in target. +/// @brief Perform pairwise sequence alignment using dynamic programming. +/// @details Implements the Needleman-Wunsch global alignment algorithm with +/// position-specific gap opening penalties. Code derived from ksw2 (Heng Li). +/// @param query Encoded query sequence (values < m) +/// @param target Encoded target sequence (values < m) +/// @param target_gapo Position-specific gap opening penalties for target (empty if uniform) +/// @param m Number of distinct values in encoding (vocabulary size) +/// @param scoring Scoring parameters (match, mismatch, gap costs, score matrix) +/// @return Alignment result with score, CIGAR string, and match count +/// @par References +/// Needleman, S.B. & Wunsch, C.D. (1970). A general method applicable to the +/// search for similarities in the amino acid sequence of two proteins. +/// J. Mol. Biol. 48, 443–453. https://doi.org/10.1016/0022-2836(70)90057-4 inline AlignmentResult align_sequences(const std::vector& query, const std::vector& target, @@ -294,6 +354,15 @@ AlignmentResult align_sequences(const std::vector& query, return result; } +/// @brief Perform pairwise alignment of string-based sequences. +/// @details Encodes sequences as integers and calls align_sequences(). +/// Useful for aligning residue names or custom codes. +/// @param query Query sequence (vector of strings, e.g. 3-letter residue codes) +/// @param target Target sequence (vector of strings) +/// @param target_gapo Position-specific gap opening penalties for target +/// @param scoring Scoring parameters (nullptr uses simple default) +/// @return Alignment result with score, CIGAR string, and match count +/// @note Returns empty result if encoding requires >255 distinct values inline AlignmentResult align_string_sequences(const std::vector& query, const std::vector& target, diff --git a/include/gemmi/seqtools.hpp b/include/gemmi/seqtools.hpp index 4117a1fa..ca3b6ff2 100644 --- a/include/gemmi/seqtools.hpp +++ b/include/gemmi/seqtools.hpp @@ -10,8 +10,16 @@ namespace gemmi { +/// @brief Get the molecular weight of water (H2O). +/// @return Water weight in atomic mass units constexpr double h2o_weight() { return 2 * 1.00794 + 15.9994; } +/// @brief Calculate the molecular weight of a polymer sequence. +/// @details Sums the weights of individual residues and subtracts water molecules +/// for the peptide/nucleic acid bonds formed. +/// @param seq Vector of residue names (3-letter codes) +/// @param unknown Weight to use for unknown residues (default 100.0) +/// @return Molecular weight in atomic mass units inline double calculate_sequence_weight(const std::vector& seq, double unknown=100.) { double weight = 0.; @@ -27,6 +35,9 @@ inline double calculate_sequence_weight(const std::vector& seq, return weight - (seq.size() - 1) * h2o_weight(); } +/// @brief Convert a sequence to single-letter FASTA code. +/// @param seq Vector of residue names (3-letter codes) +/// @return String of single-letter codes (X for unknown residues) inline std::string one_letter_code(const std::vector& seq) { std::string r; for (const std::string& item : seq) @@ -34,9 +45,13 @@ inline std::string one_letter_code(const std::vector& seq) { return r; } -/// Returns the format used in _entity_poly.pdbx_seq_one_letter_code, +/// @brief Convert sequence to PDBx format with non-standard residues in parentheses. +/// @details Returns the format used in _entity_poly.pdbx_seq_one_letter_code, /// in which non-standard amino acids/nucleotides are represented by CCD codes -/// in parenthesis, e.g. AA(MSE)H. +/// in parentheses, e.g. AA(MSE)H. +/// @param seq Vector of residue names (3-letter codes) +/// @param kind Type of residue (AA, DNA, RNA) to filter +/// @return String with single-letter codes for standard residues and (CCD) for non-standard inline std::string pdbx_one_letter_code(const std::vector& seq, ResidueKind kind) { std::string r; @@ -51,7 +66,12 @@ inline std::string pdbx_one_letter_code(const std::vector& seq, return r; } -/// used with expand_one_letter_sequence() +/// @brief Convert polymer type to residue kind. +/// @details Used with expand_one_letter_sequence() to determine which +/// single-letter codes to expect for a polymer. +/// @param ptype Polymer type +/// @return Residue kind (AA, DNA, or RNA) +/// @throws Fails with error if polymer type is Unknown inline ResidueKind sequence_kind(PolymerType ptype) { if (is_polypeptide(ptype)) return ResidueKind::AA; diff --git a/include/gemmi/serialize.hpp b/include/gemmi/serialize.hpp index 76721e69..f0a439b2 100644 --- a/include/gemmi/serialize.hpp +++ b/include/gemmi/serialize.hpp @@ -10,12 +10,18 @@ #include "model.hpp" #include "cifdoc.hpp" +/// @brief Macro to generate serialize functions for a struct. +/// @details Expands to both mutable and const serialize template specializations. +/// Usage: SERIALIZE(MyStruct, member1, member2, ...) #define SERIALIZE(Struct, ...) \ template \ void serialize(Archive& archive, Struct& o) { archive(__VA_ARGS__); } \ template \ void serialize(Archive& archive, const Struct& o) { archive(__VA_ARGS__); } +/// @brief Macro to generate serialize functions for a struct with a parent class. +/// @details Serializes the parent class first, then child members. +/// Usage: SERIALIZE_P(MyStruct, ParentClass, member1, member2, ...) #define SERIALIZE_P(Struct, Parent, ...) \ template \ void serialize(Archive& archive, Struct& o) \ @@ -24,6 +30,9 @@ template \ void serialize(Archive& archive, const Struct& o) \ { archive(static_cast(o), __VA_ARGS__); } +/// @brief Macro to generate serialize functions for a template struct with one parameter. +/// @details Generates both mutable and const serialize specializations for a template class. +/// Usage: SERIALIZE_T1(MyTemplate, typename, member1, member2, ...) #define SERIALIZE_T1(Struct, Typename, ...) \ template \ void serialize(Archive& archive, Struct& o) { archive(__VA_ARGS__); } \ @@ -179,6 +188,9 @@ SERIALIZE(Loop, o.tags, o.values) SERIALIZE(Block, o.name, o.items) SERIALIZE(Document, o.source, o.blocks) +/// @brief Serialize CIF Item, handling union member construction. +/// @param archive Archive to read from or write to +/// @param o Item to serialize template void serialize(Archive& archive, Item& o) { archive(o.type, o.line_number); @@ -190,6 +202,9 @@ void serialize(Archive& archive, Item& o) { case ItemType::Erased: break; } } +/// @brief Serialize const CIF Item. +/// @param archive Archive to read from or write to +/// @param o Item to serialize template void serialize(Archive& archive, const Item& o) { archive(o.type, o.line_number); diff --git a/include/gemmi/smarts.hpp b/include/gemmi/smarts.hpp index 2442d141..1723a165 100644 --- a/include/gemmi/smarts.hpp +++ b/include/gemmi/smarts.hpp @@ -11,18 +11,23 @@ namespace gemmi { -// A match is a vector of atom indices in the ChemComp, corresponding -// to the atoms in the SMARTS pattern. +/// @brief A SMARTS pattern match represented as atom indices. +/// Each match is a vector of indices into the ChemComp's atom list, +/// corresponding in order to atoms in the SMARTS pattern. using SmartsMatch = std::vector; -// Find all non-overlapping or overlapping matches of a SMARTS pattern. -// This implementation supports a small subset of SMARTS: -// - Atomic symbols: [C], [N], [O], etc. (or C, N, O without brackets) -// - Wildcards: * -// - Aromaticity: [c], [n], etc. -// - Constraints: H (hydrogen count), X (connectivity) -// - Bonds: - (single), = (double), ~ (any) -// - Branching: ( ) +/// @brief Find all matches of a SMARTS pattern in a chemical component. +/// @details +/// This implementation supports a small subset of SMARTS notation: +/// - Atomic symbols: [C], [N], [O], etc. (or C, N, O without brackets) +/// - Wildcards: * (matches any atom) +/// - Aromaticity: [c], [n], etc. (aromatic atoms) +/// - Constraints: H (hydrogen count), X (connectivity/degree) +/// - Bonds: - (single), = (double), ~ (any) +/// - Branching: ( ) for subgraph grouping +/// @param cc The chemical component to search. +/// @param pattern The SMARTS pattern string. +/// @return Vector of all matches found; may include overlapping matches. GEMMI_DLL std::vector match_smarts(const ChemComp& cc, const std::string& pattern); } // namespace gemmi diff --git a/include/gemmi/twin.hpp b/include/gemmi/twin.hpp index 27222779..444e9ca0 100644 --- a/include/gemmi/twin.hpp +++ b/include/gemmi/twin.hpp @@ -125,10 +125,19 @@ using TwoFold = TwoFold_; } // namespace impl +/// @brief Pair of symmetry operation and its obliquity angle (in degrees). using OpObliquity = std::pair; -// Obliquity calculated here is the same as Le Page delta in cctbx. -// (tested against output of lebedev_2005_perturbation.py from cctbx) +/// @brief Calculate cosine of obliquity angle for a twinning operator. +/// @details Obliquity is the same as Le Page's delta (tested against +/// lebedev_2005_perturbation.py from cctbx). +/// @param reduced_cell Unit cell (typically from Niggli reduction) +/// @param d_axis 2-fold axis direction in direct space +/// @param r_axis 2-fold axis direction in reciprocal space +/// @return Cosine of the obliquity angle +/// @par References +/// Le Page, Y. (1982). Direct derivation of twin laws from the metric tensor. +/// J. Appl. Cryst. 15, 255–259. https://doi.org/10.1107/S0021889882011959 inline double calculate_cos_obliquity(const UnitCell& reduced_cell, const Vec3& d_axis, const Vec3& r_axis) { // From the Le Page paper: @@ -142,8 +151,10 @@ inline double calculate_cos_obliquity(const UnitCell& reduced_cell, return std::min(1.0, std::fabs(t.cos_angle(tau))); } -// Reduced cell can be from GruberVector::get_cell() after Niggli reduction. -// max_obliq is max obliquity (delta) in degrees as defined in Le Page (1982). +/// @brief Find potential 2-fold twinning operators for a reduced unit cell. +/// @param reduced_cell Reduced unit cell (typically from Niggli reduction) +/// @param max_obliq Maximum obliquity (delta) in degrees as defined in Le Page (1982) +/// @return Vector of 2-fold operators with their obliquity angles, sorted by obliquity inline std::vector find_lattice_2fold_ops(const UnitCell& reduced_cell, double max_obliq) { std::vector ret; @@ -166,9 +177,10 @@ inline std::vector find_lattice_2fold_ops(const UnitCell& reduced_c return ret; } -// Reduced cell can be from GruberVector::get_cell() after Niggli reduction. -// max_obliq is max obliquity (delta) in degrees as defined in Le Page (1982). -// Returns lattice symmetry except inversion. +/// @brief Find lattice symmetry operations for a reduced unit cell (excluding inversion). +/// @param reduced_cell Reduced unit cell (typically from Niggli reduction) +/// @param max_obliq Maximum obliquity (delta) in degrees as defined in Le Page (1982) +/// @return Group operations representing lattice symmetry (without inversion) inline GroupOps find_lattice_symmetry_r(const UnitCell& reduced_cell, double max_obliq) { std::vector gen = find_lattice_2fold_ops(reduced_cell, max_obliq); std::vector genops; @@ -186,7 +198,11 @@ inline GroupOps find_lattice_symmetry_r(const UnitCell& reduced_cell, double max return go; } -// Returns lattice symmetry, but without inversion. +/// @brief Find lattice symmetry operations for a unit cell (excluding inversion). +/// @param cell Unit cell +/// @param centring Centring type (P, I, F, C, etc.) +/// @param max_obliq Maximum obliquity (delta) in degrees as defined in Le Page (1982) +/// @return Group operations representing lattice symmetry (without inversion) inline GroupOps find_lattice_symmetry(const UnitCell& cell, char centring, double max_obliq) { GruberVector gv(cell, centring, true); @@ -197,8 +213,21 @@ inline GroupOps find_lattice_symmetry(const UnitCell& cell, char centring, return gops; } -// Determine potential twinning operators. -// Returns all operators or only unique ones (coset representatives). +/// @brief Determine potential twinning operators for a structure. +/// @details Compares lattice symmetry with space group symmetry to identify +/// candidate twin operators, optionally returning all or only unique ones. +/// @param cell Unit cell +/// @param sg Space group (if nullptr, P1 is used) +/// @param max_obliq Maximum obliquity (delta) in degrees as defined in Le Page (1982) +/// @param all_ops If true, return all operators; if false, return only coset representatives +/// @return Vector of potential twinning operators +/// @par References +/// Lebedev, A.A., Vagin, A.A. & Murshudov, G.N. (2006). Intensity statistics in +/// twinned crystals with examples from the PDB. +/// Acta Cryst. D62, 83–95. https://doi.org/10.1107/S0907444905036759 +/// +/// Zwart, P.H., Grosse-Kunstleve, R.W. & Adams, P.D. (2006). Exploring metric symmetry. +/// CCP4 Newsletter 42. http://legacy.ccp4.ac.uk/newsletters/newsletter44/articles/explore_metric_symmetry.html inline std::vector find_twin_laws(const UnitCell& cell, const SpaceGroup* sg, double max_obliq,