diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 000000000..19b248015
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,63 @@
+.. _api:
+
+C++ API Reference
+#################
+
+This page documents the Gemmi C++ library API, generated from Doxygen comments
+in ``include/gemmi/*.hpp``. It is updated with each pull request in the
+API documentation series.
+
+For the Python API, see the `Python API reference `_.
+
+.. note::
+
+ Documentation coverage is being added incrementally. Headers not yet
+ listed here will appear in subsequent pull requests.
+
+Core Data Structures
+--------------------
+
+*(Full documentation added in PR 2.)*
+
+.. doxygenfile:: model.hpp
+ :project: gemmi
+
+Reflection Data
+---------------
+
+*(Full documentation added in PR 5.)*
+
+.. doxygenfile:: mtz.hpp
+ :project: gemmi
+
+.. doxygenfile:: refln.hpp
+ :project: gemmi
+
+.. doxygenfile:: cif2mtz.hpp
+ :project: gemmi
+
+.. doxygenfile:: mtz2cif.hpp
+ :project: gemmi
+
+.. doxygenfile:: xds_ascii.hpp
+ :project: gemmi
+
+.. doxygenfile:: xds2mtz.hpp
+ :project: gemmi
+
+.. doxygenfile:: intensit.hpp
+ :project: gemmi
+
+.. doxygenfile:: binner.hpp
+ :project: gemmi
+
+.. doxygenfile:: asudata.hpp
+ :project: gemmi
+
+Map and Grid Data
+-----------------
+
+*(Stub — full documentation added in PR 6.)*
+
+.. doxygenfile:: grid.hpp
+ :project: gemmi
diff --git a/include/gemmi/asudata.hpp b/include/gemmi/asudata.hpp
index ccb44ee09..aacf67561 100644
--- a/include/gemmi/asudata.hpp
+++ b/include/gemmi/asudata.hpp
@@ -1,6 +1,7 @@
// Copyright 2020 Global Phasing Ltd.
//
-// AsuData for storing reflection data.
+/// @file
+/// @brief AsuData template for storing per-HKL reflection data in asymmetric unit.
#ifndef GEMMI_ASUDATA_HPP_
#define GEMMI_ASUDATA_HPP_
@@ -14,13 +15,18 @@
namespace gemmi {
+/// @brief Correlation calculation for complex-valued reflection data.
+/// Accumulates running statistics for complex numbers (e.g., structure factors).
struct ComplexCorrelation {
- int n = 0;
- double sum_xx = 0.;
- double sum_yy = 0.;
- std::complex sum_xy = 0.;
- std::complex mean_x = 0.;
- std::complex mean_y = 0.;
+ int n = 0; ///< Number of points accumulated
+ double sum_xx = 0.; ///< Sum of |x|^2
+ double sum_yy = 0.; ///< Sum of |y|^2
+ std::complex sum_xy = 0.; ///< Sum of (x - mean_x) * conj(y - mean_y)
+ std::complex mean_x = 0.; ///< Running mean of x
+ std::complex mean_y = 0.; ///< Running mean of y
+ /// @brief Add a complex-valued point pair to the correlation.
+ /// @param x First complex value
+ /// @param y Second complex value
void add_point(std::complex x, std::complex y) {
++n;
double inv_n = 1.0 / n;
@@ -33,15 +39,27 @@ struct ComplexCorrelation {
mean_x += dx * inv_n;
mean_y += dy * inv_n;
}
+ /// @brief Add a complex-valued point pair (float version).
+ /// @param x First complex value
+ /// @param y Second complex value
void add_point(std::complex x, std::complex y) {
add_point(std::complex(x), std::complex(y));
}
+ /// @brief Compute correlation coefficient from accumulated statistics.
std::complex coefficient() const { return sum_xy / std::sqrt(sum_xx * sum_yy); }
+ /// @brief Compute ratio of mean magnitudes.
double mean_ratio() const { return std::abs(mean_y) / std::abs(mean_x); }
};
-/// \pre a and b are sorted
+/// @brief Apply a function to matching reflections from two sorted lists.
+/// Iterates through reflections with matching HKLs in both lists and calls func.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam Func Callable type taking (const T&, const T&).
+/// @tparam T Reflection data type (must have hkl member and operator<).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @param func Function to call for each matching pair.
template
void for_matching_reflections(const std::vector& a,
const std::vector& b,
@@ -61,7 +79,12 @@ void for_matching_reflections(const std::vector& a,
}
}
-/// \pre a and b are sorted
+/// @brief Calculate correlation of intensity values between two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return Correlation object computed from matching HKLs.
template
Correlation calculate_hkl_value_correlation(const std::vector& a,
const std::vector& b) {
@@ -72,7 +95,12 @@ Correlation calculate_hkl_value_correlation(const std::vector& a,
return cor;
}
-/// \pre a and b are sorted
+/// @brief Calculate correlation of complex-valued reflection data between two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return ComplexCorrelation object computed from matching HKLs.
template
ComplexCorrelation calculate_hkl_complex_correlation(const std::vector& a,
const std::vector& b) {
@@ -83,7 +111,12 @@ ComplexCorrelation calculate_hkl_complex_correlation(const std::vector& a,
return cor;
}
-/// \pre a and b are sorted
+/// @brief Count matching reflections with identical values in two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return Number of matching HKLs with equal values.
template
int count_equal_values(const std::vector& a, const std::vector& b) {
int count = 0;
@@ -94,31 +127,44 @@ int count_equal_values(const std::vector& a, const std::vector& b) {
return count;
}
+/// @brief Miller index paired with a generic value.
+/// Used as the basic element in AsuData containers.
+/// @tparam T Value type (float, double, complex, etc.).
template
struct HklValue {
- alignas(8) Miller hkl;
- T value;
+ alignas(8) Miller hkl; ///< Miller indices (h, k, l)
+ T value; ///< Associated value (intensity, structure factor, etc.)
+ /// @brief Compare with Miller index.
bool operator<(const Miller& m) const { return hkl < m; }
+ /// @brief Compare with another HklValue by their HKL indices.
bool operator<(const HklValue& o) const { return operator<(o.hkl); }
};
+/// @brief Value paired with its uncertainty (sigma/standard deviation).
+/// @tparam T Numeric type (float, double, etc.).
template
struct ValueSigma {
using value_type = T;
- T value, sigma;
+ T value; ///< The measured or calculated value
+ T sigma; ///< Standard deviation or uncertainty
+ /// @brief Check equality of both value and sigma.
bool operator==(const ValueSigma& o) const {
return value == o.value && sigma == o.sigma;
}
};
+/// @brief Implementation functions for moving reflections to asymmetric unit.
namespace impl {
+/// @brief Generic move_to_asu for real-valued data (no phase adjustment).
template
void move_to_asu(const GroupOps&, const Miller& hkl, int, HklValue& hkl_value) {
hkl_value.hkl = hkl;
}
+/// @brief Specialized move_to_asu for complex-valued data (applies phase shift).
+/// Applies phase shift from symmetry operator and conjugation for isym % 2 == 0.
template
void move_to_asu(const GroupOps& gops, const Miller& hkl, int isym,
HklValue>& v) {
@@ -135,12 +181,17 @@ void move_to_asu(const GroupOps& gops, const Miller& hkl, int isym,
}
} // namespace impl
+/// @brief Generic container for reflection data in asymmetric unit.
+/// Stores values (e.g., structure factors, phases, intensities) indexed by Miller indices.
+/// Keeps data sorted by HKL and can enforce ASU constraints.
+/// @tparam T Value type (float, double, complex, ValueSigma<>, etc.).
template
struct AsuData {
- std::vector> v;
- UnitCell unit_cell_;
- const SpaceGroup* spacegroup_ = nullptr;
- // function defining FPhiProxy interface
+ std::vector> v; ///< Reflection data (HKL + value pairs)
+ UnitCell unit_cell_; ///< Unit cell parameters
+ const SpaceGroup* spacegroup_ = nullptr; ///< Space group (not owned by this object)
+
+ // FPhiProxy interface compatibility methods
size_t stride() const { return 1; }
size_t size() const { return v.size(); }
Miller get_hkl(size_t n) const { return v[n].hkl; }
@@ -148,11 +199,17 @@ struct AsuData {
double get_phi(size_t n) const { return std::arg(v[n].value); }
const UnitCell& unit_cell() const { return unit_cell_; }
const SpaceGroup* spacegroup() const { return spacegroup_; }
+
+ /// @brief Sort reflections by HKL indices if not already sorted.
void ensure_sorted() {
if (!std::is_sorted(v.begin(), v.end()))
std::sort(v.begin(), v.end());
}
+ /// @brief Transform all reflections to the asymmetric unit.
+ /// Moves reflections outside ASU to their equivalent inside, applying
+ /// symmetry operators and phase shifts as needed for complex values.
+ /// @param tnt_asu If true, use TNT-style ASU; otherwise use standard ASU.
void ensure_asu(bool tnt_asu=false) {
if (!spacegroup_)
fail("AsuData::ensure_asu(): space group not set");
@@ -167,7 +224,13 @@ struct AsuData {
}
}
- // load values from one column
+ /// @brief Load values from a single data column.
+ /// Reads HKLs and values from proxy, filters NaN, converts to ASU, and sorts.
+ /// @tparam DataProxy Type with column_index(), unit_cell(), spacegroup(),
+ /// size(), stride(), get_hkl(), get_num() interface.
+ /// @param proxy Data proxy object (MTZ, mmCIF, etc.).
+ /// @param label Column name/label to load.
+ /// @param as_is If true, skip ASU conversion and sorting (raw load).
template
void load_values(const DataProxy& proxy, const std::string& label,
bool as_is=false) {
@@ -185,7 +248,15 @@ struct AsuData {
}
}
- // load values from two or more columns
+ /// @brief Load values from multiple columns (for complex, vector, or sigma pairs).
+ /// Reads N columns per reflection and combines them into a single value.
+ /// Filters out reflections with any NaN in the N columns.
+ /// @tparam N Number of columns to load (2 for complex/F+sigma, etc.).
+ /// @tparam DataProxy Type with column_index(), unit_cell(), spacegroup(),
+ /// size(), stride(), get_hkl(), get_num() interface.
+ /// @param proxy Data proxy object (MTZ, mmCIF, etc.).
+ /// @param labels Array of N column names/labels to load.
+ /// @param as_is If true, skip ASU conversion and sorting (raw load).
template
void load_values(const DataProxy& proxy, const std::array& labels,
bool as_is=false) {
@@ -212,14 +283,17 @@ struct AsuData {
}
private:
- // for T being a number, std::array and std::complex, respectively:
+ /// @brief Helper to convert numeric array(s) to T.
+ /// Overloaded for: scalar, array<1>, complex (F+phase), ValueSigma (F+sigma).
static void set_value_from_array(T& val, const std::array& nums) { val = nums[0]; }
static void set_value_from_array(T& val, const T& nums) { val = nums; }
+ /// @brief Convert (F, phi) pair to complex structure factor.
template
static void set_value_from_array(std::complex& val, const std::array& nums) {
R theta = (R)rad(nums[1]);
val = {nums[0] * std::cos(theta), nums[0] * std::sin(theta)};
}
+ /// @brief Convert (value, sigma) pair to ValueSigma.
template
static void set_value_from_array(ValueSigma& val, const std::array& nums) {
val.value = nums[0];
@@ -227,6 +301,14 @@ struct AsuData {
}
};
+/// @brief Create AsuData by loading from N columns in a data source.
+/// @tparam T Value type to load into.
+/// @tparam N Number of columns to combine per reflection.
+/// @tparam Data Data source type (MTZ, mmCIF, etc.).
+/// @param data Source data object.
+/// @param labels Array of N column labels to load.
+/// @param as_is If true, skip ASU conversion and sorting.
+/// @return New AsuData object populated with data.
template
AsuData make_asu_data(const Data& data, const std::array& labels,
bool as_is=false) {
@@ -234,6 +316,14 @@ AsuData make_asu_data(const Data& data, const std::array& labe
asu_data.template load_values(data_proxy(data), labels, as_is);
return asu_data;
}
+
+/// @brief Create AsuData by loading from a single column in a data source.
+/// @tparam T Value type to load into.
+/// @tparam Data Data source type (MTZ, mmCIF, etc.).
+/// @param data Source data object.
+/// @param label Column label to load.
+/// @param as_is If true, skip ASU conversion and sorting.
+/// @return New AsuData object populated with data.
template
AsuData make_asu_data(const Data& data, const std::string& label, bool as_is) {
AsuData asu_data;
@@ -241,7 +331,11 @@ AsuData make_asu_data(const Data& data, const std::string& label, bool as_is)
return asu_data;
}
-/// retains only points with positive SIGF and F/SIGF > cutoff
+/// @brief Filter AsuData to retain only reflections with high signal-to-noise.
+/// Removes reflections where sigma <= 0 or value/sigma < cutoff.
+/// @tparam T Numeric type (float, double).
+/// @param asu_data AsuData container with ValueSigma values (in/out).
+/// @param cutoff Minimum value/sigma ratio to retain.
template
void discard_by_sigma_ratio(AsuData>& asu_data, double cutoff) {
vector_remove_if(asu_data.v, [cutoff](const HklValue>& p) {
diff --git a/include/gemmi/binner.hpp b/include/gemmi/binner.hpp
index 8e7325638..82864bf2e 100644
--- a/include/gemmi/binner.hpp
+++ b/include/gemmi/binner.hpp
@@ -1,6 +1,7 @@
// Copyright 2022 Global Phasing Ltd.
//
-// Binning - resolution shells for reflections.
+/// @file
+/// @brief Binner class for organizing reflections into resolution shells.
#ifndef GEMMI_BINNER_HPP_
#define GEMMI_BINNER_HPP_
@@ -11,14 +12,25 @@
namespace gemmi {
+/// @brief Divide reflections into resolution shells (bins) for statistics.
+///
+/// Organizes reflection data into bins by resolution. Supports multiple
+/// binning schemes (equal count, linear/quadratic/cubic in d or 1/d^2).
+/// Provides fast bin lookup for both sorted and unsorted data.
struct Binner {
+ /// @brief Strategy for dividing reflections into resolution shells.
enum class Method {
- EqualCount,
- Dstar,
- Dstar2,
- Dstar3,
+ EqualCount, ///< Bins with approximately equal number of reflections
+ Dstar, ///< Linear spacing in 1/d (resolution)
+ Dstar2, ///< Linear spacing in 1/d^2 (squared reciprocal spacing)
+ Dstar3, ///< Cubic spacing (linear in (1/d^2)^(1/3))
};
+ /// @brief Set up bins using pre-calculated 1/d^2 values.
+ /// @param nbins Number of resolution bins to create.
+ /// @param method Binning scheme (EqualCount, Dstar, Dstar2, Dstar3).
+ /// @param inv_d2 Vector of 1/d^2 values for all reflections (moved from caller).
+ /// @param cell_ Unit cell for resolution calculations (nullptr uses already-set cell).
void setup_from_1_d2(int nbins, Method method, std::vector&& inv_d2,
const UnitCell* cell_) {
if (nbins < 1)
@@ -84,6 +96,15 @@ struct Binner {
limits.resize(nbins);
}
+ /// @brief Set up bins from reflection data using DataProxy interface.
+ /// Automatically calculates 1/d^2 values from HKLs and unit cell.
+ /// @tparam DataProxy Type with spacegroup(), unit_cell(), size(), stride(),
+ /// get_hkl(), get_num() interface.
+ /// @param nbins Number of resolution bins to create.
+ /// @param method Binning scheme (EqualCount, Dstar, Dstar2, Dstar3).
+ /// @param proxy Data proxy object (typically Intensities or MTZ).
+ /// @param cell_ Unit cell for resolution calculations (nullptr uses proxy.unit_cell()).
+ /// @param col_idx Column index for filtering NaN values (0 = skip filtering).
template
void setup(int nbins, Method method, const DataProxy& proxy,
const UnitCell* cell_=nullptr, size_t col_idx=0) {
@@ -98,12 +119,15 @@ struct Binner {
setup_from_1_d2(nbins, method, std::move(inv_d2), nullptr);
}
+ /// @brief Check that bins have been set up; throw if not.
void ensure_limits_are_set() const {
if (limits.empty())
fail("Binner not set up");
}
- // Generic. Method-specific versions could be faster.
+ /// @brief Find bin index for a given 1/d^2 value (binary search, generic).
+ /// @param inv_d2 Squared reciprocal resolution (1/d^2).
+ /// @return Bin index (0 to size()-1).
int get_bin_from_1_d2(double inv_d2) {
ensure_limits_are_set();
auto it = std::lower_bound(limits.begin(), limits.end(), inv_d2);
@@ -111,13 +135,20 @@ struct Binner {
return int(it - limits.begin());
}
+ /// @brief Find bin index for a reflection by Miller indices.
+ /// @param hkl Miller indices (h,k,l).
+ /// @return Bin index (0 to size()-1).
int get_bin(const Miller& hkl) {
double inv_d2 = cell.calculate_1_d2(hkl);
return get_bin_from_1_d2(inv_d2);
}
- // We assume that bins are seeked mostly for sorted reflections,
- // so it's usually either the same bin as previously, or the next one.
+ /// @brief Find bin index using a hint (fast path for sorted reflections).
+ /// Updates hint to the found bin index for the next call.
+ /// Assumes sorted reflections: each bin is same or next to previous.
+ /// @param inv_d2 Squared reciprocal resolution (1/d^2).
+ /// @param hint In/out: previous bin index (input), current bin index (output).
+ /// @return Bin index (0 to size()-1).
int get_bin_from_1_d2_hinted(double inv_d2, int& hint) const {
if (inv_d2 <= limits[hint]) {
while (hint != 0 && limits[hint-1] > inv_d2)
@@ -130,11 +161,20 @@ struct Binner {
return hint;
}
+ /// @brief Find bin index for a reflection using a hint (fast path).
+ /// @param hkl Miller indices (h,k,l).
+ /// @param hint In/out: previous bin index (input), current bin index (output).
+ /// @return Bin index (0 to size()-1).
int get_bin_hinted(const Miller& hkl, int& hint) const {
double inv_d2 = cell.calculate_1_d2(hkl);
return get_bin_from_1_d2_hinted(inv_d2, hint);
}
+ /// @brief Get bin indices for all reflections in a DataProxy.
+ /// Uses hinting for fast processing of sorted data.
+ /// @tparam DataProxy Type with size(), stride(), get_hkl() interface.
+ /// @param proxy Data proxy object.
+ /// @return Vector of bin indices (length = proxy.size() / proxy.stride()).
template
std::vector get_bins(const DataProxy& proxy) const {
ensure_limits_are_set();
@@ -145,6 +185,11 @@ struct Binner {
return nums;
}
+ /// @brief Get bin indices for an array of 1/d^2 values.
+ /// Uses hinting for fast processing of sorted data.
+ /// @param inv_d2 Pointer to array of squared reciprocal resolutions.
+ /// @param size Number of values in array.
+ /// @return Vector of bin indices (length = size).
std::vector get_bins_from_1_d2(const double* inv_d2, size_t size) const {
ensure_limits_are_set();
int hint = 0;
@@ -154,24 +199,34 @@ struct Binner {
return nums;
}
+ /// @brief Get bin indices for a vector of 1/d^2 values.
+ /// @param inv_d2 Vector of squared reciprocal resolutions.
+ /// @return Vector of bin indices (length = inv_d2.size()).
std::vector get_bins_from_1_d2(const std::vector& inv_d2) const {
return get_bins_from_1_d2(inv_d2.data(), inv_d2.size());
}
+ /// @brief Get minimum resolution (highest 1/d^2) of a bin.
+ /// @param n Bin index (0 to size()-1).
+ /// @return Minimum resolution d in Angstroms (highest angle, tightest spacing).
double dmin_of_bin(int n) const {
return 1. / std::sqrt(n == (int) size() - 1 ? max_1_d2 : limits.at(n));
}
+ /// @brief Get maximum resolution (lowest 1/d^2) of a bin.
+ /// @param n Bin index (0 to size()-1).
+ /// @return Maximum resolution d in Angstroms (lowest angle, loosest spacing).
double dmax_of_bin(int n) const {
return 1. / std::sqrt(n == 0 ? min_1_d2 : limits.at(n-1));
}
+ /// @brief Number of bins (resolution shells).
size_t size() const { return limits.size(); }
- UnitCell cell;
- double min_1_d2;
- double max_1_d2;
- std::vector limits; // upper limit of each bin
- std::vector mids; // the middle of each bin
+ UnitCell cell; ///< Unit cell for calculating 1/d^2 from HKL
+ double min_1_d2; ///< Minimum 1/d^2 in data (lowest resolution)
+ double max_1_d2; ///< Maximum 1/d^2 in data (highest resolution)
+ std::vector limits; ///< Upper limit (1/d^2) of each bin
+ std::vector mids; ///< Midpoint (1/d^2) of each bin
};
} // namespace gemmi
diff --git a/include/gemmi/cif2mtz.hpp b/include/gemmi/cif2mtz.hpp
index 4300bb09e..6a6de2d09 100644
--- a/include/gemmi/cif2mtz.hpp
+++ b/include/gemmi/cif2mtz.hpp
@@ -1,3 +1,10 @@
+/// @file
+/// @brief Convert structure factor data from mmCIF to MTZ format
+///
+/// Provides CifToMtz for converting reflection data from PDB/mmCIF format
+/// to CCP4 MTZ binary format. Handles both merged and unmerged data,
+/// including anomalous and old-style anomalous structures.
+
// Copyright 2021 Global Phasing Ltd.
//
// A class for converting SF-mmCIF to MTZ (merged or unmerged).
@@ -20,7 +27,15 @@
namespace gemmi {
-// "Old-style" anomalous or unmerged data is expected to have only these tags.
+/// @brief Check if reflection block uses old-style anomalous data format.
+///
+/// "Old-style" anomalous or unmerged data is expected to have only these tags:
+/// index_h/k/l, wavelength_id, crystal_id, scale_group_code, status,
+/// and either (intensity_meas/sigma) or (F_meas_au/sigma).
+///
+/// @param rb ReflnBlock to check
+/// @param data_type Expected data type (Unmerged or Anomalous)
+/// @return true if all tags in the reflection loop match the old-style subset
inline bool possible_old_style(const ReflnBlock& rb, DataType data_type) {
if (rb.refln_loop == nullptr)
return false;
@@ -43,13 +58,21 @@ inline bool possible_old_style(const ReflnBlock& rb, DataType data_type) {
}
+/// @brief Convert old-style anomalous data to modern PDBx format.
+///
/// Before _refln.pdbx_F_plus/minus was introduced, anomalous data was
-/// stored as two F_meas_au reflections, say (1,1,3) and (-1,-1,-3).
-/// This function transcribes it to how the anomalous data is stored
-/// in PDBx/mmCIF nowadays:
-/// _refln.F_meas_au -> pdbx_F_plus / pdbx_F_minus,
-/// _refln.F_meas_sigma_au -> pdbx_F_plus_sigma / pdbx_F_minus_sigma.
-/// _refln.intensity_{meas,sigma} -> _refln.pdbx_F_plus{,_sigma} / ...
+/// stored as two F_meas_au reflections, e.g. (1,1,3) and (-1,-1,-3).
+/// This function transcribes it to the modern PDBx/mmCIF storage:
+/// - _refln.F_meas_au → pdbx_F_plus / pdbx_F_minus
+/// - _refln.F_meas_sigma_au → pdbx_F_plus_sigma / pdbx_F_minus_sigma
+/// - _refln.intensity_meas/sigma → pdbx_I_plus/I_minus and sigmas
+///
+/// Reflections are moved to the ASU, and when both +/- forms exist for
+/// the same HKL, they are merged (missing values set to '.').
+///
+/// @param loop CIF loop containing old-style anomalous data
+/// @param sg Space group for ASU determination (null → P1)
+/// @return New loop with reflections in standard pdbx format
inline cif::Loop transcript_old_anomalous_to_standard(const cif::Loop& loop,
const SpaceGroup* sg) {
std::vector positions;
@@ -139,7 +162,29 @@ inline cif::Loop transcript_old_anomalous_to_standard(const cif::Loop& loop,
}
+/// @brief Converter from CIF reflection data to MTZ format.
+///
+/// Handles conversion of structure factor data from mmCIF format
+/// (PDB/wwPDB standard) to MTZ format (CCP4 binary format).
+/// Supports both merged and unmerged reflection data, with configurable
+/// column mappings from CIF _refln tags to MTZ column labels and types.
+///
+/// Uses a specification (default or custom) to map CIF tags to MTZ columns,
+/// handling code-to-number translation for categorical data (e.g., FreeR flags).
struct CifToMtz {
+ /// @brief Get the default column mapping specification.
+ ///
+ /// Returns static arrays of mapping lines. Each line has format:
+ /// `cif_tag mtz_label col_type dataset_id [code_mapping]`
+ /// where code_mapping (optional) is a comma-separated list of `code=value` pairs.
+ ///
+ /// For merged data: includes FreeR_flag, intensities, structure factors,
+ /// anomalous pairs, calculated phases, and weight/FOM columns.
+ ///
+ /// For unmerged data: includes intensity_meas, detector coordinates, and rotation angle.
+ ///
+ /// @param for_merged true for merged data, false for unmerged
+ /// @return Pointer to null-terminated array of specification strings
// Alternative mmCIF tags for the same MTZ label should be consecutive
static const char** default_spec(bool for_merged) {
static const char* merged[] = {
@@ -192,13 +237,23 @@ struct CifToMtz {
return for_merged ? merged : unmerged;
}
+ /// @brief Specification entry mapping a CIF tag to an MTZ column.
struct Entry {
- std::string refln_tag;
- std::string col_label;
- char col_type;
- int dataset_id;
+ std::string refln_tag; ///< CIF _refln.* tag name (without prefix)
+ std::string col_label; ///< MTZ column label
+ char col_type; ///< MTZ column type code (H,K,L,F,Q,J,P,etc.)
+ int dataset_id; ///< Dataset id for merged data (0 or 1)
+ /// Code-to-number mappings for categorical data (e.g., FreeR code → number)
std::vector> code_to_number;
+ /// @brief Parse a specification line into an Entry.
+ ///
+ /// Expected format:
+ /// - `cif_tag mtz_label col_type dataset_id`
+ /// - `cif_tag mtz_label col_type dataset_id code1=val1,code2=val2`
+ ///
+ /// @param line Specification line to parse
+ /// @throws gemmi::fail if format is invalid
Entry(const std::string& line) {
std::vector tokens;
tokens.reserve(4);
@@ -234,6 +289,13 @@ struct CifToMtz {
}
}
+ /// @brief Translate a categorical value to its numeric equivalent.
+ ///
+ /// Uses code_to_number mappings to convert coded values (e.g., 'o', 'f')
+ /// to numeric equivalents (e.g., 1.0, 0.0 for FreeR flags).
+ ///
+ /// @param v The coded value to translate
+ /// @return Translated number, or NAN if no mapping found
float translate_code_to_number(const std::string& v) const {
if (v.size() == 1) {
for (const auto& c2n : code_to_number)
@@ -249,12 +311,31 @@ struct CifToMtz {
}
};
- bool force_unmerged = false;
- std::string title;
+ bool force_unmerged = false; ///< If true, treat all data as unmerged
+ std::string title; ///< Title to set in output MTZ file
+ /// Historical entries to include in MTZ history; defaults to version line
std::vector history = { "From gemmi-cif2mtz " GEMMI_VERSION };
- double wavelength = NAN;
+ double wavelength = NAN; ///< Override wavelength (NAN = auto-detect)
+ /// Custom column specification lines; if empty, uses default_spec()
std::vector spec_lines;
+ /// @brief Convert a single mmCIF reflection block to MTZ format.
+ ///
+ /// Maps CIF _refln columns to MTZ columns using the specification (custom or default).
+ /// Handles merged and unmerged data differently:
+ ///
+ /// **Merged data:** Creates a single dataset with wavelength from ReflnBlock.
+ ///
+ /// **Unmerged data:** Extracts diffrn_id and pdbx_image_id to create BATCH column,
+ /// creates a dataset per crystal, and uses UnmergedHklMover to put HKLs into ASU.
+ ///
+ /// Missing values ('.' in CIF) become NAN in MTZ. The M/ISYM and BATCH columns
+ /// are automatically added for unmerged data.
+ ///
+ /// @param rb Reflection block with parsed _refln loop
+ /// @param logger Logger for informational and error messages
+ /// @return MTZ structure with data, columns, and metadata
+ /// @throws gemmi::fail if required tags (index_h/k/l, data columns) are missing
Mtz convert_block_to_mtz(const ReflnBlock& rb, Logger& logger) const {
Mtz mtz;
mtz.title = title.empty() ? "Converted from mmCIF block " + rb.block.name : title;
@@ -505,6 +586,23 @@ struct CifToMtz {
return mtz;
}
+ /// @brief Auto-detect data type and convert with optional transformation.
+ ///
+ /// Performs intelligent detection and conversion:
+ ///
+ /// **Mode 'f' (fix old-style anomalous):** If the block contains old-style
+ /// anomalous data, transforms it to modern pdbx_F_plus/minus format before conversion.
+ ///
+ /// **Mode 'a' (auto-detect anomalous):** After conversion, analyzes the unique
+ /// HKLs under symmetry to detect if data is actually anomalous or unmerged despite
+ /// initial classification. Logs warnings and attempts recovery for old-style data.
+ ///
+ /// **Other modes:** Performs conversion as-is without transformation.
+ ///
+ /// @param rb Reflection block (modified in-place if transformation occurs)
+ /// @param logger Logger for notes and errors
+ /// @param mode Conversion mode: 'f'=fix old anomalous, 'a'=auto-detect, other=no transform
+ /// @return Converted MTZ structure
Mtz auto_convert_block_to_mtz(ReflnBlock& rb, Logger& logger, char mode) const {
if (mode == 'f' && possible_old_style(rb, DataType::Anomalous))
*rb.refln_loop = transcript_old_anomalous_to_standard(*rb.refln_loop, rb.spacegroup);
@@ -537,6 +635,15 @@ struct CifToMtz {
}
private:
+ /// @brief Convert _refln.status code to FreeR_flag value.
+ ///
+ /// Maps status codes to numeric equivalents:
+ /// - 'o' or quoted 'o' → 1.0 (observed/working set)
+ /// - 'f' or quoted 'f' → 0.0 (free set)
+ /// - other → NAN
+ ///
+ /// @param str Status code string from CIF
+ /// @return Numeric flag value (1.0, 0.0, or NAN)
static float status_to_freeflag(const std::string& str) {
char c = str[0];
if (c == '\'' || c == '"')
diff --git a/include/gemmi/intensit.hpp b/include/gemmi/intensit.hpp
index d47ad6bb8..ec77b28c3 100644
--- a/include/gemmi/intensit.hpp
+++ b/include/gemmi/intensit.hpp
@@ -1,5 +1,8 @@
// Copyright 2020 Global Phasing Ltd.
//
+/// @file
+/// @brief Intensities class for reading and merging intensity data from various formats.
+//
// Class Intensities that reads multi-record data from MTZ, mmCIF or XDS_ASCII
// and merges it into mean or anomalous intensities.
// It can also read merged data.
@@ -23,28 +26,40 @@ struct ReflnBlock;
namespace cif { struct Block; }
using std::int8_t;
-// If used to request a particular data type:
-// MergedMA = Mean if available, otherwise Anomalous,
-// MergedAM = Anomalous if available, otherwise Mean.
-// UAM = Unmerged if available, otherwise MergedAM
-enum class DataType { Unknown, Unmerged, Mean, Anomalous,
- MergedMA, MergedAM, UAM };
+/// Data type of intensity data: unmerged, mean intensity, or anomalous intensities.
+///
+/// When requesting a particular data type, the MergedMA/MergedAM/UAM variants
+/// allow fallback to secondary options (e.g., MergedMA = Mean if available, else Anomalous).
+enum class DataType {
+ Unknown, ///< Unknown or unspecified intensity type
+ Unmerged, ///< Unmerged (multi-record) intensity data
+ Mean, ///< Mean intensity
+ Anomalous, ///< Anomalous intensities (I+/I-)
+ MergedMA, ///< Mean if available, otherwise Anomalous (fallback type)
+ MergedAM, ///< Anomalous if available, otherwise Mean (fallback type)
+ UAM ///< Unmerged if available, otherwise MergedAM (fallback type)
+};
+/// @brief Statistics calculated for a resolution shell (bin) of merged intensities.
+///
+/// Accumulates numerators and denominators for R-merge, R-meas, and R-pim.
+/// Can be summed across shells to compute overall statistics.
struct GEMMI_DLL MergingStats {
- int all_refl = 0; // all reflections, sometimes called observations
- int unique_refl = 0;
- int stats_refl = 0; // unique reflections with 2+ observations (used for statistics)
- double r_merge_num = 0; // numerator for R-merge
- double r_meas_num = 0; // numerator for R-meas
- double r_pim_num = 0; // numerator for R-pim
- double r_denom = 0; // denominator for R-*
- // sums for CC1/2
- double sum_ibar = 0;
- double sum_ibar2 = 0;
- double sum_sig2_eps = 0;
-
- /// This class is additive. Adding two MergingStats gives the same result
- /// as calculating statistics in these two resolution shells from the start.
+ int all_refl = 0; ///< Total number of observations (all reflections)
+ int unique_refl = 0; ///< Number of unique reflections
+ int stats_refl = 0; ///< Unique reflections with 2+ observations (for statistics)
+ double r_merge_num = 0; ///< Numerator for R-merge calculation
+ double r_meas_num = 0; ///< Numerator for R-meas (redundancy-weighted) calculation
+ double r_pim_num = 0; ///< Numerator for R-pim (precision-indicating merge) calculation
+ double r_denom = 0; ///< Denominator for R-merge/meas/pim calculations
+ double sum_ibar = 0; ///< Sum of mean intensities for CC1/2
+ double sum_ibar2 = 0; ///< Sum of squared mean intensities for CC1/2
+ double sum_sig2_eps = 0; ///< Sum of variance terms for CC1/2
+
+ /// Accumulate statistics from another shell.
+ /// @param o Statistics from another resolution shell.
+ /// Adding two MergingStats gives the same result as calculating statistics
+ /// for the combined shells from the start.
void add_other(const MergingStats& o) {
all_refl += o.all_refl;
unique_refl += o.unique_refl;
@@ -58,64 +73,89 @@ struct GEMMI_DLL MergingStats {
sum_sig2_eps += o.sum_sig2_eps;
}
+ /// Compute R-merge for this shell.
double r_merge() const { return r_merge_num / r_denom; }
+ /// Compute redundancy-weighted R-meas for this shell.
double r_meas() const { return r_meas_num / r_denom; }
+ /// Compute precision-indicating R-pim for this shell.
double r_pim() const { return r_pim_num / r_denom; }
+ /// Compute CC1/2 (correlation coefficient of half-datasets).
double cc_half() const; // calculated using sigma-tau method
- /// split-half reliability using the Spearman-Brown prediction formula :-)
+ /// Compute CC* using Spearman-Brown prediction formula from CC1/2.
double cc_full() const {
double cc = cc_half();
return 2 * cc / (1 + cc);
}
+ /// Estimate of overall correlation coefficient from CC1/2.
double cc_star() const { return std::sqrt(cc_full()); }
};
-/// Returns STARANISO version or empty string.
+/// @brief Extract STARANISO anisotropy B-tensor from MTZ file.
+/// @param mtz MTZ file object to read from.
+/// @param output Anisotropic B-tensor (3x3 symmetric matrix in Voigt notation).
+/// @return STARANISO version string if found, empty string otherwise.
GEMMI_DLL std::string read_staraniso_b_from_mtz(const Mtz& mtz, SMat33& output);
+/// @brief Container for intensity data from reflection measurements.
+///
+/// Stores multi-record (unmerged) or merged intensities with metadata such as
+/// unit cell, space group, and wavelength. Supports merging operations and
+/// import from MTZ, mmCIF, and XDS_ASCII formats.
struct GEMMI_DLL Intensities {
+ /// @brief A single reflection record with intensity, sigma, and metadata.
struct Refl {
- Miller hkl;
- int8_t isign; // 1 for I(+), -1 for I(-), 0 for mean or unmerged
- int8_t isym; // for unmerged data: encodes symmetry op like M/ISYM in MTZ
- short nobs;
- double value;
- double sigma;
-
+ Miller hkl; ///< Miller indices (h, k, l)
+ int8_t isign; ///< Intensity component: 1=I(+), -1=I(-), 0=mean/unmerged
+ int8_t isym; ///< Symmetry operator encoding (ISYM in MTZ for unmerged data)
+ short nobs; ///< Number of observations (used during merging)
+ double value; ///< Intensity value
+ double sigma; ///< Standard deviation of intensity
+
+ /// Compare reflections by (h,k,l, isign) for sorting.
bool operator<(const Refl& o) const {
return std::tie(hkl[0], hkl[1], hkl[2], isign) <
std::tie(o.hkl[0], o.hkl[1], o.hkl[2], o.isign);
}
- // for merged data
+ /// @brief Get intensity label ("<I>", "I(+)", or "I(-)").
const char* intensity_label() const {
if (isign == 0)
return "";
return isign > 0 ? "I(+)" : "I(-)";
}
+ /// @brief Format HKL and intensity label as a string.
std::string hkl_label() const {
return cat(intensity_label(), " (", hkl[0], ' ', hkl[1], ' ', hkl[2], ')');
}
};
+ /// @brief Anisotropic scaling tensor for STARANISO B-factor correction.
struct AnisoScaling {
- SMat33 b = {0., 0., 0., 0., 0., 0.};
+ SMat33 b = {0., 0., 0., 0., 0., 0.}; ///< Symmetric B-tensor in Voigt notation
+ /// Check if anisotropic tensor is set (non-zero).
bool ok() const { return !b.all_zero(); }
+ /// @brief Compute scaling factor for a reflection at given HKL.
+ /// @param hkl Miller indices
+ /// @param cell Unit cell to convert HKL to reciprocal-space vector
+ /// @return Exponential scaling factor exp(0.5 * B * s * s)
double scale(const Miller& hkl, const UnitCell& cell) const {
Vec3 s = cell.frac.mat.left_multiply(Vec3(hkl[0], hkl[1], hkl[2]));
return std::exp(0.5 * b.r_u_r(s));
}
};
- std::vector data;
- const SpaceGroup* spacegroup = nullptr;
- UnitCell unit_cell;
- double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.};
- double wavelength;
- DataType type = DataType::Unknown;
- std::vector isym_ops;
- AnisoScaling staraniso_b;
-
+ std::vector data; ///< Reflection records
+ const SpaceGroup* spacegroup = nullptr; ///< Space group (not owned by this object)
+ UnitCell unit_cell; ///< Crystal unit cell parameters
+ double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.}; ///< RMSDs of unit cell parameters
+ double wavelength; ///< Diffraction wavelength in Angstroms
+ DataType type = DataType::Unknown; ///< Type of intensity data stored
+ std::vector isym_ops; ///< Symmetry operators (for unmerged data)
+ AnisoScaling staraniso_b; ///< STARANISO anisotropy correction tensor
+
+ /// @brief Get string representation of intensity data type.
+ /// @param data_type Type to convert.
+ /// @return Human-readable label ("I", "<I>", "I+/I-", "n/a", etc.).
static const char* type_str(DataType data_type) {
switch (data_type) {
case DataType::Unmerged: return "I";
@@ -129,16 +169,28 @@ struct GEMMI_DLL Intensities {
unreachable();
}
+ /// @brief Get string representation of this object's intensity data type.
const char* type_str() const { return Intensities::type_str(type); }
+ /// @brief Get space group name (Hermann-Mauguin symbol) or "none".
std::string spacegroup_str() const { return spacegroup ? spacegroup->xhm() : "none"; }
- // returns (d_max, d_min)
+ /// @brief Get minimum and maximum resolution of reflections.
+ /// @return {d_max, d_min} as array of two doubles.
std::array resolution_range() const;
- // pre: both are sorted
+ /// @brief Calculate correlation of intensity values between two sorted lists.
+ /// @param other Another Intensities object to correlate with.
+ /// @return Correlation object with matching reflections analyzed.
Correlation calculate_correlation(const Intensities& other) const;
+ /// @brief Add a single reflection if its data is valid.
+ /// Skips reflections with NaN or non-positive sigma (rejected by XDS, etc.).
+ /// @param hkl Miller indices
+ /// @param isign Intensity sign (1 for I+, -1 for I-, 0 for mean)
+ /// @param isym Symmetry operator encoding
+ /// @param value Intensity value
+ /// @param sigma Standard deviation of intensity
void add_if_valid(const Miller& hkl, int8_t isign, int8_t isym, double value, double sigma) {
// XDS marks rejected reflections with negative sigma.
// Sigma 0.0 rarely happens (e.g. 5tkn), but is also problematic.
@@ -146,6 +198,7 @@ struct GEMMI_DLL Intensities {
data.push_back({hkl, isign, isym, /*nobs=*/0, value, sigma});
}
+ /// @brief Remove reflections that are forbidden by space group symmetry.
void remove_systematic_absences() {
if (!spacegroup)
return;
@@ -153,53 +206,99 @@ struct GEMMI_DLL Intensities {
vector_remove_if(data, [&](Refl& x) { return gops.is_systematically_absent(x.hkl); });
}
+ /// @brief Sort reflections by (h, k, l, isign) in ascending order.
void sort() { std::sort(data.begin(), data.end()); }
+ /// @brief Merge reflections in-place to a specified data type (mean or anomalous).
+ /// @param new_type Target data type for merged intensities.
void merge_in_place(DataType new_type);
+ /// @brief Create a merged copy without modifying this object.
+ /// @param new_type Target data type for merged intensities.
+ /// @return New Intensities object with merged data.
Intensities merged(DataType new_type) {
Intensities m(*this);
m.merge_in_place(new_type);
return m;
}
- /// use_weights can be 'Y' (yes, like Aimless), 'U' (unweighted), 'X' (yes, like XDS)
+ /// @brief Calculate R-merge and related statistics for each resolution shell.
+ /// @param binner Resolution shell binning (or nullptr for all data in one shell).
+ /// @param use_weights Weighting scheme: 'Y'=Aimless style, 'U'=unweighted, 'X'=XDS style.
+ /// @return Vector of MergingStats, one per shell.
std::vector calculate_merging_stats(const Binner* binner,
char use_weights='Y') const;
- // call with DataType::Anomalous before calculate_merging_stats() to get I+/I- stats
+ /// @brief Prepare data for merging and classify anomalous/mean component.
+ /// Call with DataType::Anomalous before calculate_merging_stats() to get I+/I- stats.
+ /// @param new_type Target data type.
+ /// @return Classified data type after preparation.
DataType prepare_for_merging(DataType new_type);
+ /// @brief Convert unmerged ISYM indices to ASU indices using stored isym_ops.
void switch_to_asu_indices();
+ /// @brief Load unmerged intensities from MTZ file.
+ /// @param mtz MTZ object to read from.
void import_unmerged_intensities_from_mtz(const Mtz& mtz);
+ /// @brief Load mean intensities from MTZ file.
+ /// @param mtz MTZ object to read from.
void import_mean_intensities_from_mtz(const Mtz& mtz);
- // with check_complete=true, throw if anomalous data is null where it shouldn't be
+ /// @brief Load anomalous intensities (I+/I-) from MTZ file.
+ /// @param mtz MTZ object to read from.
+ /// @param check_complete If true, throw if anomalous data is null where expected.
void import_anomalous_intensities_from_mtz(const Mtz& mtz, bool check_complete=false);
+ /// @brief Load intensities from MTZ file, auto-detecting data type.
+ /// @param mtz MTZ object to read from.
+ /// @param data_type Requested data type; DataType::Unknown auto-detects.
void import_mtz(const Mtz& mtz, DataType data_type=DataType::Unknown);
+ /// @brief Load unmerged intensities from mmCIF reflection block.
+ /// @param rb Reflection block to read from.
void import_unmerged_intensities_from_mmcif(const ReflnBlock& rb);
+ /// @brief Load mean intensities from mmCIF reflection block.
+ /// @param rb Reflection block to read from.
void import_mean_intensities_from_mmcif(const ReflnBlock& rb);
+ /// @brief Load anomalous intensities (I+/I-) from mmCIF reflection block.
+ /// @param rb Reflection block to read from.
+ /// @param check_complete If true, throw if anomalous data is null where expected.
void import_anomalous_intensities_from_mmcif(const ReflnBlock& rb, bool check_complete=false);
+ /// @brief Load structure factor squared (F^2) from mmCIF reflection block.
+ /// @param rb Reflection block to read from.
void import_f_squared_from_mmcif(const ReflnBlock& rb);
+ /// @brief Load intensities from mmCIF reflection block, auto-detecting data type.
+ /// @param rb Reflection block to read from.
+ /// @param data_type Requested data type; DataType::Unknown auto-detects.
void import_refln_block(const ReflnBlock& rb, DataType data_type=DataType::Unknown);
+ /// @brief Load intensities from XDS_ASCII file.
+ /// @param xds XDS_ASCII object to read from.
void import_xds(const XdsAscii& xds);
- // returns STARANISO version or empty string
+ /// @brief Extract and store STARANISO B-tensor from MTZ file.
+ /// @param mtz MTZ object to read from.
+ /// @return STARANISO version string if found, empty string otherwise.
std::string take_staraniso_b_from_mtz(const Mtz& mtz);
+ /// @brief Extract and store STARANISO B-tensor from mmCIF block.
+ /// @param block CIF block to read from.
+ /// @return True if tensor was found and loaded, false otherwise.
bool take_staraniso_b_from_mmcif(const cif::Block& block);
+ /// @brief Create a merged MTZ file from these intensities.
+ /// @param with_nobs If true, include NOBS (observation count) column.
+ /// @return Mtz object with merged data.
Mtz prepare_merged_mtz(bool with_nobs);
};
-// Minimal compatibility with MtzDataProxy and ReflnDataProxy.
+/// @brief Adapter providing DataProxy interface to Intensities data.
+/// Enables use of Intensities with generic algorithms expecting standard data proxy interface.
struct IntensitiesDataProxy {
- const Intensities& intensities_;
+ const Intensities& intensities_; ///< Reference to underlying Intensities object
+
size_t stride() const { return 1; }
size_t size() const { return intensities_.data.size(); }
const SpaceGroup* spacegroup() const { return intensities_.spacegroup; }
@@ -208,6 +307,15 @@ struct IntensitiesDataProxy {
double get_num(size_t n) const { return intensities_.data[n].value; }
};
+/// @brief Infer intensity data type from reflection data under symmetry.
+///
+/// Examines unique reflections and detects whether data is unmerged (multiple
+/// copies of same HKL), mean intensity (single copy), or anomalous (both I+/I-).
+///
+/// @tparam DataProxy Type with spacegroup(), unit_cell(), size(), stride(),
+/// get_hkl() interface.
+/// @param proxy Data proxy object to analyze.
+/// @return Pair of (inferred DataType, number of unique HKLs in ASU).
template
std::pair check_data_type_under_symmetry(const DataProxy& proxy) {
const SpaceGroup* sg = proxy.spacegroup();
diff --git a/include/gemmi/mtz.hpp b/include/gemmi/mtz.hpp
index c7cf44316..6e5b8b987 100644
--- a/include/gemmi/mtz.hpp
+++ b/include/gemmi/mtz.hpp
@@ -1,3 +1,6 @@
+/// @file
+/// @brief MTZ reflection file format (X-ray crystallography).
+
// Copyright 2019 Global Phasing Ltd.
//
// MTZ reflection file format.
@@ -24,15 +27,19 @@
namespace gemmi {
-// Unmerged MTZ files always store in-asu hkl indices and symmetry operation
-// encoded in the M/ISYM column. Here is a helper for writing such files.
+/// Helper for writing unmerged MTZ files with correct M/ISYM column values.
+/// Converts Miller indices to ASU-equivalent and encodes the symmetry operation.
struct UnmergedHklMover {
+ /// Initialize with spacegroup information.
+ /// @param spacegroup The space group (may be null).
UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) {
if (spacegroup)
group_ops_ = spacegroup->operations();
}
- // Modifies hkl and returns ISYM value for M/ISYM
+ /// Move HKL indices to ASU and return the encoded ISYM value.
+ /// @param hkl [in,out] Miller indices; modified to ASU-equivalent values.
+ /// @return ISYM value for the M/ISYM column (encodes symmetry operation).
int move_to_asu(std::array& hkl) {
std::pair hkl_isym = asu_.to_asu(hkl, group_ops_);
hkl = hkl_isym.first;
@@ -44,63 +51,125 @@ struct UnmergedHklMover {
GroupOps group_ops_;
};
+/// MTZ file metadata: crystallographic parameters, symmetry, and file structure.
struct MtzMetadata {
- std::string source_path; // input file path, if known
+ /// Input file path (if known).
+ std::string source_path;
+ /// True if the file's byte order matches the system (not swapped).
bool same_byte_order = true;
+ /// For unmerged MTZ: true if HKL indices have been switched to original (non-ASU) values.
bool indices_switched_to_original = false;
+ /// Offset (in 32-bit words) to the start of the header block.
std::int64_t header_offset = 0;
+ /// Version stamp from VERS header line (e.g., "MTZ:V1.1").
std::string version_stamp;
+ /// Title from TITLE header line.
std::string title;
+ /// Number of reflections in the data array.
int nreflections = 0;
+ /// Sort order: columns used to sort reflections (0 = not used).
std::array sort_order = {};
+ /// Minimum 1/d² value in the file (d = 1/sqrt(1/d²)).
double min_1_d2 = NAN;
+ /// Maximum 1/d² value in the file.
double max_1_d2 = NAN;
+ /// VALM value: typically unused (for future use in CCP4).
float valm = NAN;
+ /// Number of symmetry operations (redundant with symops.size()).
int nsymop = 0;
+ /// Global unit cell parameters.
UnitCell cell;
+ /// CCP4 space group number.
int spacegroup_number = 0;
+ /// Space group name (Hermann-Mauguin, e.g., "P 21 21 21").
std::string spacegroup_name;
+ /// Symmetry operations read from SYMM header lines.
std::vector symops;
+ /// Pointer to the SpaceGroup object (from symmetry database).
const SpaceGroup* spacegroup = nullptr;
+ /// Historical processing steps (MTZHIST records).
std::vector history;
+ /// Text appended after MTZENDOFHEADERS (non-standard).
std::string appended_text;
- // used to report non-critical problems when reading a file (also used in mtz2cif)
+ /// Logger for non-critical problems during file reading.
Logger logger;
};
+/// Representation of an MTZ reflection file.
+/// Contains reflection data, column definitions, batch headers (for unmerged files),
+/// and crystallographic metadata (cell, space group, symmetry operations).
struct GEMMI_DLL Mtz : public MtzMetadata {
+ /// A dataset in the MTZ file hierarchy: project → crystal → dataset → columns.
struct Dataset {
+ /// Unique dataset ID (positive integer, typically starting at 1).
int id;
+ /// Project name (e.g., dataset owner or beamline).
std::string project_name;
+ /// Crystal name (e.g., sample identifier).
std::string crystal_name;
+ /// Dataset name (e.g., experiment or scan identifier).
std::string dataset_name;
+ /// Unit cell parameters for this dataset (overrides global cell if set).
UnitCell cell;
- double wavelength; // 0 means not set
+ /// X-ray wavelength in Angstroms (0 = not set).
+ double wavelength;
};
+ /// A column in the reflection data array.
+ /// Stores one field per reflection (e.g., amplitude, phase, flag).
struct Column {
+ /// Dataset ID this column belongs to.
int dataset_id;
+ /// Column type code: 'H'=Miller index (H, K, or L), 'F'=amplitude,
+ /// 'Q'=standard deviation, 'J'=intensity, 'M/ISYM'=symmetry flag (unmerged),
+ /// 'D'=anomalous difference, 'P'=phase (degrees), 'W'=weight, 'A'=phase prob.,
+ /// 'B'=batch number, 'Y'=M/ISYM, 'I'=integer, 'R'=R-factor, 'G'=F(+)/F(-),
+ /// 'K'=I(+)/I(-), 'L'=string.
char type;
+ /// Column label (e.g., "FP", "SIGFP", "FWT", "PHWT").
std::string label;
+ /// Minimum value in this column (NAN if not computed).
float min_value = NAN;
+ /// Maximum value in this column (NAN if not computed).
float max_value = NAN;
- std::string source; // from COLSRC
+ /// Source of the data (from COLSRC header; e.g., derivation formula).
+ std::string source;
+ /// Pointer to parent Mtz object (for data access).
Mtz* parent;
+ /// Index of this column in parent->columns (used to access data array).
std::size_t idx;
+ /// Get the Dataset this column belongs to.
Dataset& dataset() { return parent->dataset(dataset_id); }
+ /// Get the Dataset this column belongs to (const).
const Dataset& dataset() const { return parent->dataset(dataset_id); }
+ /// True if parent Mtz has data loaded.
bool has_data() const { return parent->has_data(); }
+ /// Number of values in this column (0 if no data loaded).
int size() const { return has_data() ? parent->nreflections : 0; }
+ /// Stride between consecutive values in the data array (= number of columns).
size_t stride() const { return parent->columns.size(); }
+ /// Access column value for reflection n.
+ /// @param n Reflection index (0 to nreflections-1).
float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; }
+ /// Access column value for reflection n (const).
float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; }
+ /// Access column value for reflection n with bounds checking.
+ /// @param n Reflection index.
+ /// @return Reference to the data value.
+ /// @throws std::out_of_range if n is out of bounds.
float& at(std::size_t n) { return parent->data.at(idx + n * stride()); }
+ /// Access column value for reflection n with bounds checking (const).
float at(std::size_t n) const { return parent->data.at(idx + n * stride()); }
+ /// True if this column type represents an integer value.
+ /// Returns true for types H, B, Y, I (indices, batch, ISYM, integers).
bool is_integer() const {
return type == 'H' || type == 'B' || type == 'Y' || type == 'I';
}
+ /// Find the next column in the same dataset with a specific type.
+ /// @param next_type The column type to search for.
+ /// @return Pointer to the next matching column, or nullptr.
const Column* get_next_column_if_type(char next_type) const {
if (idx + 1 < parent->columns.size()) {
const Column& next_col = parent->columns[idx + 1];
@@ -110,22 +179,31 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return nullptr;
}
+ /// Iterator over this column's values.
using iterator = StrideIter;
+ /// Begin iterator over all values in this column.
iterator begin() {
assert(parent);
assert(&parent->columns[idx] == this);
return iterator({parent->data.data(), idx, stride()});
}
+ /// End iterator for this column.
iterator end() {
return iterator({parent->data.data() + parent->data.size(), idx,
stride()});
}
+ /// Const iterator over this column's values.
using const_iterator = StrideIter;
+ /// Begin const iterator.
const_iterator begin() const { return const_cast(this)->begin(); }
+ /// End const iterator.
const_iterator end() const { return const_cast(this)->end(); }
};
+ /// Batch header for unmerged MTZ files (one per diffraction image/sweep).
+ /// Contains crystallographic and experimental metadata in fixed positions.
struct Batch {
+ /// Initialize a batch with default values (matching CCP4 COMBAT/Pointless).
Batch() {
ints.resize(29, 0);
floats.resize(156, 0.);
@@ -136,16 +214,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
// COMBAT sets BSCALE=1, but Pointless sets it to 0.
//floats[43] = 1.f; // batch scale
}
+ /// Batch number (usually 1-based).
int number = 0;
+ /// Title or description of the batch.
std::string title;
+ /// Integer values: ints[20] = dataset_id, ints[0,1,2] = sizes (fixed).
std::vector ints;
+ /// Float values: floats[0-5] = cell, floats[6-14] = U matrix, floats[36-37] = phi range,
+ /// floats[86] = wavelength.
std::vector floats;
+ /// Axis names (e.g., "OMEGA", "KAPPA", "PHI").
std::vector axes;
+ /// Extract unit cell parameters from batch header.
+ /// @return Unit cell (a, b, c, alpha, beta, gamma).
UnitCell get_cell() const {
return UnitCell(floats[0], floats[1], floats[2],
floats[3], floats[4], floats[5]);
}
+ /// Set unit cell parameters in batch header.
+ /// @param uc The unit cell to store.
void set_cell(const UnitCell& uc) {
floats[0] = (float) uc.a;
floats[1] = (float) uc.b;
@@ -155,12 +243,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
floats[5] = (float) uc.gamma;
}
+ /// Get the dataset ID from batch header.
+ /// @return Dataset ID (from ints[20]).
int dataset_id() const { return ints[20]; }
+ /// Set the dataset ID in batch header.
+ /// @param id Dataset ID to store in ints[20].
void set_dataset_id(int id) { ints[20] = id; }
+ /// Get the X-ray wavelength.
+ /// @return Wavelength in Angstroms (from floats[86]).
float wavelength() const { return floats[86]; }
+ /// Set the X-ray wavelength.
+ /// @param lambda Wavelength in Angstroms.
void set_wavelength(float lambda) { floats[86] = lambda; }
+ /// Get the phi rotation start angle.
+ /// @return Start angle in degrees (from floats[36]).
float phi_start() const { return floats[36]; }
+ /// Get the phi rotation end angle.
+ /// @return End angle in degrees (from floats[37]).
float phi_end() const { return floats[37]; }
+ /// Get the crystal orientation matrix U (3×3).
+ /// @return U matrix from floats[6-14].
Mat33 matrix_U() const {
return Mat33(floats[6], floats[9], floats[12],
floats[7], floats[10], floats[13],
@@ -168,16 +270,25 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
}
};
+ /// All datasets in the file.
std::vector datasets;
+ /// All columns in the file (ordered by position in the data array).
std::vector columns;
+ /// Batch headers (empty for merged MTZ files).
std::vector batches;
+ /// Reflection data: laid out as [col0_refl0, col1_refl0, ..., col0_refl1, col1_refl1, ...].
+ /// Size = columns.size() * nreflections. Access via Column's operator[].
std::vector data;
+ /// Create an empty MTZ object.
+ /// @param with_base If true, initialize with a default HKL_base dataset and H, K, L columns.
explicit Mtz(bool with_base=false) {
if (with_base)
add_base();
}
+ /// Move constructor.
Mtz(Mtz&& o) noexcept { *this = std::move(o); }
+ /// Move assignment. Updates parent pointers in all columns.
Mtz& operator=(Mtz&& o) noexcept {
MtzMetadata::operator=(std::move(o));
datasets = std::move(o.datasets);
@@ -189,7 +300,7 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return *this;
}
- // explicit to be aware where we make copies
+ /// Copy constructor. Explicit to prevent accidental copies. Updates parent pointers.
explicit Mtz(const Mtz& o) : MtzMetadata(o) {
datasets = o.datasets;
columns = o.columns;
@@ -199,19 +310,29 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
col.parent = this;
}
+ /// Copy assignment is deleted (explicit copy constructor forces intentionality).
Mtz& operator=(Mtz const&) = delete;
+ /// Initialize with default HKL_base dataset and H, K, L columns.
void add_base() {
datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
for (int i = 0; i != 3; ++i)
add_column(std::string(1, "HKL"[i]), 'H', 0, i, false);
}
- // Functions to use after MTZ headers (and data) is read.
+ /// @name Crystallographic properties (after reading headers/data)
+ /// @{
+ /// Get the highest resolution in Angstroms.
+ /// @return d_min = 1/sqrt(max_1_d2); resolution is high when d is small.
double resolution_high() const { return std::sqrt(1.0 / max_1_d2); }
+ /// Get the lowest resolution in Angstroms.
+ /// @return d_max = 1/sqrt(min_1_d2); resolution is low when d is large.
double resolution_low() const { return std::sqrt(1.0 / min_1_d2); }
+ /// Get the unit cell for a specific dataset or the global cell.
+ /// @param dataset Dataset ID (default -1 = global cell, but searches datasets first).
+ /// @return Reference to the unit cell.
UnitCell& get_cell(int dataset=-1) {
for (Dataset& ds : datasets)
if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0)
@@ -219,10 +340,13 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return cell;
}
+ /// Get the unit cell (const).
const UnitCell& get_cell(int dataset=-1) const {
return const_cast(this)->get_cell(dataset);
}
+ /// Set the global and all per-dataset unit cells to the same value.
+ /// @param new_cell The new unit cell parameters.
void set_cell_for_all(const UnitCell& new_cell) {
cell = new_cell;
cell.set_cell_images_from_spacegroup(spacegroup); // probably not needed
@@ -230,20 +354,37 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
ds.cell = cell;
}
+ /// Calculate average unit cell from all batch headers, optionally with RMSD.
+ /// @param rmsd [out] Pointer to array of 6 doubles (a, b, c, alpha, beta, gamma RMSD),
+ /// or nullptr to skip.
+ /// @return Average cell from all batches, or global cell if batches are invalid.
UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const;
+ /// Set the space group and update related fields.
+ /// @param new_sg Pointer to SpaceGroup (may be null).
void set_spacegroup(const SpaceGroup* new_sg) {
spacegroup = new_sg;
spacegroup_number = new_sg ? spacegroup->ccp4 : 0;
spacegroup_name = new_sg ? spacegroup->hm : "";
}
+ /// @}
+ /// @name Dataset access
+ /// @{
+
+ /// Get the last (most recently added) dataset.
+ /// @return Reference to the last dataset.
+ /// @throws std::runtime_error if no datasets exist.
Dataset& last_dataset() {
if (datasets.empty())
fail("MTZ dataset not found (missing DATASET header line?).");
return datasets.back();
}
+ /// Get dataset by ID.
+ /// @param id Dataset ID to look up.
+ /// @return Reference to the dataset.
+ /// @throws std::runtime_error if ID not found.
Dataset& dataset(int id) {
if ((size_t)id < datasets.size() && datasets[id].id == id)
return datasets[id];
@@ -252,20 +393,32 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return d;
fail("MTZ file has no dataset with ID " + std::to_string(id));
}
+ /// Get dataset by ID (const).
const Dataset& dataset(int id) const {
return const_cast(this)->dataset(id);
}
+ /// Find a dataset by name.
+ /// @param name Dataset name to search for.
+ /// @return Pointer to the dataset, or nullptr if not found.
Dataset* dataset_with_name(const std::string& name) {
for (Dataset& d : datasets)
if (d.dataset_name == name)
return &d;
return nullptr;
}
+ /// Find a dataset by name (const).
const Dataset* dataset_with_name(const std::string& label) const {
return const_cast(this)->dataset_with_name(label);
}
+ /// @}
+ /// @name Column access and queries
+ /// @{
+
+ /// Count columns with a specific label (may be > 1 if duplicates exist).
+ /// @param label Column label to count.
+ /// @return Number of columns with this label.
int count(const std::string& label) const {
int n = 0;
for (const Column& col : columns)
@@ -274,6 +427,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return n;
}
+ /// Count columns of a specific type.
+ /// @param type Column type code (e.g., 'F', 'P', 'Q').
+ /// @return Number of columns with this type.
int count_type(char type) const {
int n = 0;
for (const Column& col : columns)
@@ -282,6 +438,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return n;
}
+ /// Find the first column with a given label, optionally filtered by dataset and type.
+ /// @param label Column label.
+ /// @param ds [optional] Restrict search to this dataset (nullptr = any).
+ /// @param type [optional] Restrict search to this type ('*' = any).
+ /// @return Pointer to the column, or nullptr if not found.
Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') {
for (Column& col : columns)
if (col.label == label && (!ds || ds->id == col.dataset_id)
@@ -289,17 +450,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return &col;
return nullptr;
}
+ /// Find the first column with a given label (const).
const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr,
char type='*') const {
return const_cast(this)->column_with_label(label, ds, type);
}
+ /// Get a column by label, raising an error if not found.
+ /// @param label Column label.
+ /// @param ds [optional] Restrict search to this dataset.
+ /// @return Reference to the column.
+ /// @throws std::runtime_error if column not found.
const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const {
if (const Column* col = column_with_label(label, ds))
return *col;
fail("Column label not found: " + label);
}
+ /// Get all columns of a specific type.
+ /// @param type Column type code.
+ /// @return Vector of pointers to matching columns.
std::vector columns_with_type(char type) const {
std::vector cols;
for (const Column& col : columns)
@@ -308,6 +478,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return cols;
}
+ /// Get positions (indices) of all columns with a specific type.
+ /// @param col_type Column type code.
+ /// @return Vector of column indices.
std::vector positions_of_columns_with_type(char col_type) const {
std::vector cols;
for (int i = 0; i < (int) columns.size(); ++i)
@@ -316,9 +489,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return cols;
}
- // F(+)/(-) pairs should have type G (and L for sigma),
- // I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type,
- // so here we use column labels not types.
+ /// Find anomalous (±) column pairs by label pattern matching.
+ /// Looks for labels with "(+)" and matches corresponding "(-)" columns.
+ /// Note: F(+)/(-) pairs use type G, I(+)/(-) use type K, but E(+)/(-)
+ /// have no dedicated type, so label matching is used.
+ /// @return Vector of (index_plus, index_minus) pairs.
std::vector> positions_of_plus_minus_columns() const {
std::vector> r;
for (int i = 0; i < (int) columns.size(); ++i) {
@@ -339,7 +514,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return r;
}
- /// the order of labels matters
+ /// Find the first column matching any label in a prioritized list.
+ /// @param labels List of labels to try in order.
+ /// @param type [optional] Column type to match ('*' = any).
+ /// @return Pointer to the first matching column, or nullptr.
+ /// @note Order of labels matters; returns the first match.
const Column* column_with_one_of_labels(std::initializer_list labels,
char type='*') const {
for (const char* label : labels)
@@ -348,7 +527,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return nullptr;
}
- /// the order of labels doesn't matter
+ /// Find a column matching a type and any of several labels.
+ /// @param type Column type to match.
+ /// @param labels List of labels to search for.
+ /// @return Pointer to the first matching column, or nullptr.
+ /// @note Order of labels does not matter.
Column* column_with_type_and_any_of_labels(char type, std::initializer_list labels) {
for (Column& col : columns)
if (col.type == type) {
@@ -359,75 +542,118 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return nullptr;
}
+ /// Find the R-free flag column (common labels: FREE, RFREE, R_FREE_FLAGS, etc.).
+ /// @return Pointer to R-free column (type 'I'), or nullptr.
Column* rfree_column() {
// cf. MtzToCif::default_spec in mtz2cif.hpp
return column_with_type_and_any_of_labels('I',
{"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"});
}
+ /// Find the R-free flag column (const).
const Column* rfree_column() const {
return const_cast(this)->rfree_column();
}
+ /// Find the mean intensity column (common labels: IMEAN, I, IOBS, I-obs).
+ /// @return Pointer to intensity column (type 'J'), or nullptr.
Column* imean_column() {
return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"});
}
+ /// Find the mean intensity column (const).
const Column* imean_column() const {
return const_cast(this)->imean_column();
}
+ /// Find the I(+) anomalous intensity column (common labels: I(+), IOBS(+), Iplus).
+ /// @return Pointer to I(+) column (type 'K'), or nullptr.
Column* iplus_column() {
return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"});
}
+ /// Find the I(+) column (const).
const Column* iplus_column() const {
return const_cast(this)->iplus_column();
}
+ /// Find the I(-) anomalous intensity column.
+ /// @return Pointer to I(-) column (type 'K'), or nullptr.
Column* iminus_column() {
return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"});
}
+ /// Find the I(-) column (const).
const Column* iminus_column() const {
return const_cast(this)->iminus_column();
}
+ /// @}
+ /// @name Data status
+ /// @{
+
+ /// Check if reflection data has been loaded.
+ /// @return True if data.size() == columns.size() * nreflections.
bool has_data() const {
return data.size() == columns.size() * nreflections;
}
+ /// Check if this is a merged MTZ file (no batch headers).
+ /// @return True if batches.empty().
bool is_merged() const { return batches.empty(); }
- /// Calculates min/max for all combinations of reflections and unit cells,
- /// where unit cells are a global CELL and per-dataset DCELL.
+ /// Calculate min/max 1/d² from all reflections and unit cells.
+ /// Considers both global cell and per-dataset DCELLs.
+ /// @return [min_1_d2, max_1_d2].
std::array calculate_min_max_1_d2() const;
+ /// Recalculate and update min_1_d2 and max_1_d2 from reflection data.
void update_reso() {
std::array reso = calculate_min_max_1_d2();
min_1_d2 = reso[0];
max_1_d2 = reso[1];
}
- // Functions for reading MTZ headers and data.
+ /// @}
+ /// @name File I/O
+ /// @{
+ /// Toggle the assumed byte order and swap header_offset accordingly.
void toggle_endianness() {
same_byte_order = !same_byte_order;
swap_eight_bytes(&header_offset);
}
+ /// Read and verify the first 80 bytes (MTZ magic and machine stamp).
+ /// @param stream Input stream positioned at file start.
void read_first_bytes(AnyStream& stream);
- /// read headers until END
+ /// Read header records from VERS until END.
+ /// @param stream Input stream positioned at the header block.
+ /// @param save_headers [optional] Pointer to string vector to save header lines.
void read_main_headers(AnyStream& stream, std::vector* save_headers);
- /// read the part between END and MTZENDOFHEADERS
+ /// Read history (MTZHIST) and batch (MTZBATS) records after the END header.
+ /// @param stream Input stream positioned after END.
void read_history_and_batch_headers(AnyStream& stream);
+ /// Set up spacegroup pointer from spacegroup_number or spacegroup_name.
void setup_spacegroup();
+ /// Read raw reflection data from stream (float32 binary).
+ /// @param stream Input stream.
+ /// @param do_read If false, skip reading (compute space only).
void read_raw_data(AnyStream& stream, bool do_read=true);
+ /// Read all header records (convenience wrapper).
+ /// @param stream Input stream.
void read_all_headers(AnyStream& stream);
+ /// Read MTZ from a stream, including headers and optionally data.
+ /// Expects stream positioned at file start; reads in order: raw data, main headers, batch headers.
+ /// @param stream Input stream.
+ /// @param with_data If true, read reflection data; if false, skip it.
void read_stream(AnyStream& stream, bool with_data);
+ /// Read MTZ from a file path.
+ /// @param path File path.
+ /// @throws std::system_error or std::runtime_error on failure.
void read_file(const std::string& path) {
try {
source_path = path;
@@ -440,45 +666,90 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
}
}
+ /// Read MTZ from an input object (e.g., MaybeGzipped for .mtz or .mtz.gz).
+ /// @tparam Input Type with path() and create_stream() methods.
+ /// @param input Input object.
+ /// @param with_data If true, read reflection data.
template
void read_input(Input&& input, bool with_data) {
source_path = input.path();
read_stream(*input.create_stream(), with_data);
}
- /// the same as read_input(MaybeGzipped(path), with_data)
+ /// Read MTZ from a file, handling .gz compression automatically.
+ /// @param path File path (.mtz or .mtz.gz).
+ /// @param with_data If true, read reflection data (default true).
void read_file_gz(const std::string& path, bool with_data=true);
+ /// @}
+ /// @name Data manipulation (reflection rows)
+ /// @{
+
+ /// Get sorted row indices based on the first N columns (HKL by default).
+ /// @param use_first Number of columns to use for sorting (default 3 = h, k, l).
+ /// @return Vector of indices [0..nreflections-1] sorted by the first N columns.
std::vector sorted_row_indices(int use_first=3) const;
+
+ /// Sort reflections in-place using the first N columns.
+ /// @param use_first Number of columns to use for sorting (default 3).
+ /// @return True if any sorting was done; false if already sorted.
bool sort(int use_first=3);
+ /// Extract Miller indices from a reflection at a given offset in the data array.
+ /// @param offset Offset to the first element of the reflection (H, K, L at offsets 0, 1, 2).
+ /// @return Miller indices.
Miller get_hkl(size_t offset) const {
return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}};
}
+
+ /// Set Miller indices at a given offset.
+ /// @param offset Offset to the H element.
+ /// @param hkl Miller indices to store.
void set_hkl(size_t offset, const Miller& hkl) {
for (int i = 0; i != 3; ++i)
data[offset + i] = static_cast(hkl[i]);
}
- /// Returns offset of the first hkl or (size_t)-1. Can be slow.
+ /// Find the data offset of the first reflection with specific Miller indices.
+ /// @param hkl Miller indices to search for.
+ /// @param start Starting offset (optional, default 0).
+ /// @return Offset to the reflection, or (size_t)-1 if not found.
+ /// @note This is a linear search; can be slow for large files.
size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const;
- /// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
+ /// Move all reflections to ASU and adjust phases/anomalous data accordingly.
+ /// For merged MTZ only. Transforms F(+), F(-), phases, and Hendrickson-Lattman coefficients.
+ /// @param tnt_asu If true, use TNT ASU setting; if false, use default ASU.
void ensure_asu(bool tnt_asu=false);
- /// Reindex data, usually followed by ensure_asu(). Outputs messages through logger.
+ /// Reindex reflections using a new basis and update space group accordingly.
+ /// Applies symmetry operation to HKL, removes fractional indices, adjusts cell and space group.
+ /// Outputs messages to logger.
+ /// @param op Reindexing operation (must have no translation and determinant > 0).
void reindex(const Op& op);
- /// Change symmetry to P1 and expand reflections. Does not sort.
- /// Similar to command EXPAND in SFTOOLS.
+ /// Expand reflections to P1 using all symmetry operations.
+ /// Duplicate reflections under symmetry, adjust phases if present.
+ /// Similar to SFTOOLS EXPAND command.
+ /// @note Does not re-sort; sort afterwards if needed.
void expand_to_p1();
- /// (for unmerged MTZ only) change HKL according to M/ISYM
+ /// For unmerged MTZ: convert HKL from ASU to original (observer) indices.
+ /// Reads M/ISYM column and applies inverse symmetry operations.
+ /// @return True if M/ISYM column was found and data was modified.
bool switch_to_original_hkl();
- /// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
+ /// For unmerged MTZ: convert HKL to ASU and set M/ISYM column accordingly.
+ /// @return True if M/ISYM column was found and data was modified.
bool switch_to_asu_hkl();
+ /// @}
+ /// @name Data construction
+ /// @{
+
+ /// Create a new dataset with auto-assigned ID and add to the file.
+ /// @param name Name to use for project, crystal, and dataset.
+ /// @return Reference to the newly added dataset.
Dataset& add_dataset(const std::string& name) {
int id = 0;
for (const Dataset& d : datasets)
@@ -488,20 +759,39 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
return datasets.back();
}
+ /// Add a column to the file and optionally expand the data array.
+ /// @param label Column label.
+ /// @param type Column type code.
+ /// @param dataset_id Dataset ID for this column (-1 = last dataset).
+ /// @param pos Position in column list (-1 = append).
+ /// @param expand_data If true, insert empty rows (NAN) in the data array.
+ /// @return Reference to the newly added column.
Column& add_column(const std::string& label, char type,
int dataset_id, int pos, bool expand_data);
- // extra_col are columns right after src_col that are also copied.
+ /// Replace a column with data from another column (including trailing columns).
+ /// @param dest_idx Destination column index.
+ /// @param src_col Source column to copy from.
+ /// @param trailing_cols [optional] Labels of columns immediately after src_col to also copy.
+ /// @return Reference to the destination column.
Column& replace_column(size_t dest_idx, const Column& src_col,
const std::vector& trailing_cols={});
- // If dest_idx < 0 - columns are appended at the end
- // append new column(s), otherwise overwrite existing ones.
+ /// Copy a column to a destination, or append if dest_idx < 0.
+ /// @param dest_idx Destination index (-1 = append).
+ /// @param src_col Source column.
+ /// @param trailing_cols [optional] Labels of subsequent columns to also copy.
+ /// @return Reference to the destination column.
Column& copy_column(int dest_idx, const Column& src_col,
const std::vector& trailing_cols={});
+ /// Remove a column from the file and data array.
+ /// @param idx Column index to remove.
void remove_column(size_t idx);
+ /// Remove reflection rows matching a condition.
+ /// @tparam Func Callable that takes pointer to row data and returns true to remove.
+ /// @param condition Predicate function.
template
void remove_rows_if(Func condition) {
if (!has_data())
@@ -518,6 +808,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
nreflections = int(data.size() / width);
}
+ /// Insert new empty columns in the data array.
+ /// @param added Number of columns to insert.
+ /// @param pos_ Position to insert at (-1 = at the end).
void expand_data_rows(size_t added, int pos_=-1) {
size_t old_row_size = columns.size() - added;
if (data.size() != old_row_size * nreflections)
@@ -528,6 +821,10 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN);
}
+ /// Replace the reflection data array with new data.
+ /// @param new_data Pointer to float array.
+ /// @param n Total number of floats (must be divisible by columns.size()).
+ /// @throws std::runtime_error if n is not a multiple of columns.size().
void set_data(const float* new_data, size_t n) {
size_t ncols = columns.size();
if (n % ncols != 0)
@@ -536,24 +833,54 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
data.assign(new_data, new_data + n);
}
- // Function for writing MTZ file
+ /// @}
+ /// @name File output
+ /// @{
+
+ /// Write MTZ to a C FILE stream.
+ /// @param stream Open FILE* stream (should be in binary write mode).
void write_to_cstream(std::FILE* stream) const;
+
+ /// Write MTZ to a string (binary data).
+ /// @param str [out] String to append the binary MTZ data to.
void write_to_string(std::string& str) const;
+
+ /// Write MTZ to a file.
+ /// @param path File path.
void write_to_file(const std::string& path) const;
+
+ /// Get the size of the binary MTZ output in bytes.
+ /// @return Size needed for the complete MTZ file.
size_t size_to_write() const;
+
+ /// Write MTZ to a buffer.
+ /// @param buf Pointer to output buffer.
+ /// @param maxlen Maximum bytes to write.
+ /// @return Number of bytes written.
size_t write_to_buffer(char* buf, size_t maxlen) const;
private:
+ /// Generic write implementation (template to support FILE*, string, buffer).
+ /// @tparam Write Function type (size_t write(const void*, size_t, size_t)).
template void write_to_stream(Write write) const;
};
+/// @}
+/// Convenience function: read MTZ from a file path.
+/// @param path File path.
+/// @return Loaded Mtz object.
inline Mtz read_mtz_file(const std::string& path) {
Mtz mtz;
mtz.read_file(path);
return mtz;
}
+/// Convenience function: read MTZ from an input object (handles gzip).
+/// @tparam Input Type with path() and create_stream() methods.
+/// @param input Input object (e.g., MaybeGzipped).
+/// @param with_data If true, read reflection data; if false, headers only.
+/// @return Loaded Mtz object.
template
Mtz read_mtz(Input&& input, bool with_data) {
Mtz mtz;
@@ -561,17 +888,33 @@ Mtz read_mtz(Input&& input, bool with_data) {
return mtz;
}
-// Abstraction of data source, cf. ReflnDataProxy.
+/// Abstraction layer for accessing MTZ data uniformly.
+/// Provides stride, data access, and cell/symmetry information.
+/// Similar to ReflnDataProxy for reflection data in other formats.
struct MtzDataProxy {
+ /// Reference to the MTZ object.
const Mtz& mtz_;
+ /// Stride (number of columns) between consecutive reflections.
size_t stride() const { return mtz_.columns.size(); }
+ /// Total number of floats in the data array.
size_t size() const { return mtz_.data.size(); }
+ /// Element type (always float).
using num_type = float;
+ /// Access a data element by index.
+ /// @param n Index into the flat data array.
float get_num(size_t n) const { return mtz_.data[n]; }
+ /// Get the unit cell.
const UnitCell& unit_cell() const { return mtz_.cell; }
+ /// Get the space group.
const SpaceGroup* spacegroup() const { return mtz_.spacegroup; }
+ /// Get Miller indices from a reflection.
+ /// @param offset Offset to the H element.
Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); }
+ /// Find the column index for a given label.
+ /// @param label Column label.
+ /// @return Column index (idx).
+ /// @throws std::runtime_error if label not found.
size_t column_index(const std::string& label) const {
if (const Mtz::Column* col = mtz_.column_with_label(label))
return col->idx;
@@ -579,13 +922,21 @@ struct MtzDataProxy {
}
};
-// Like above, but here the data is stored outside of the Mtz class
+/// MtzDataProxy variant for external data (not stored in Mtz).
+/// Wraps MTZ metadata with a separate data array pointer.
struct MtzExternalDataProxy : MtzDataProxy {
+ /// Pointer to external data array.
const float* data_;
+ /// Initialize with MTZ metadata and external data.
+ /// @param mtz MTZ object (for structure info only).
+ /// @param data Pointer to external float array (size = columns.size() * nreflections).
MtzExternalDataProxy(const Mtz& mtz, const float* data)
: MtzDataProxy{mtz}, data_(data) {}
+ /// Total size of the external data array.
size_t size() const { return mtz_.columns.size() * mtz_.nreflections; }
+ /// Access element from external data.
float get_num(size_t n) const { return data_[n]; }
+ /// Get Miller indices from external data.
Miller get_hkl(size_t offset) const {
return {{(int)data_[offset + 0],
(int)data_[offset + 1],
@@ -593,6 +944,9 @@ struct MtzExternalDataProxy : MtzDataProxy {
}
};
+/// Create a proxy for accessing MTZ data.
+/// @param mtz MTZ object.
+/// @return MtzDataProxy wrapping the MTZ.
inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; }
} // namespace gemmi
diff --git a/include/gemmi/mtz2cif.hpp b/include/gemmi/mtz2cif.hpp
index 387ceedc8..79811d865 100644
--- a/include/gemmi/mtz2cif.hpp
+++ b/include/gemmi/mtz2cif.hpp
@@ -19,27 +19,60 @@
namespace gemmi {
+/// @file
+/// @brief Converter for MTZ reflection data to SF-mmCIF format.
+
+/// @brief Converts MTZ files (merged or unmerged) to SF-mmCIF reflection tables.
+///
+/// This class provides configuration options for column selection, naming,
+/// filtering, and metadata handling when converting MTZ format to mmCIF.
class GEMMI_DLL MtzToCif {
public:
- // options that can be set directly
- std::vector spec_lines; // conversion specification (cf. default_spec)
- const char* block_name = nullptr; // NAME in data_NAME
- std::string entry_id = "xxxx"; // _entry.id
- bool with_comments = true; // write comments
- bool with_history = true; // write MTZ history in comments
- bool skip_empty = false; // skip reflections with no values
- bool skip_negative_sigi = false; // skip refl. with sigma(I) < 0 in unmerged
- bool enable_UB = false; // write _diffrn_orient_matrix.UB
- bool write_staraniso_tensor = true; // write _reflns.pdbx_aniso_B_tensor_*
+ /// Column conversion specification lines (see default_spec for format).
+ std::vector spec_lines;
+ /// CIF data block name (NAME in data_NAME).
+ const char* block_name = nullptr;
+ /// Entry identifier (_entry.id tag).
+ std::string entry_id = "xxxx";
+ /// Whether to write comments describing the conversion.
+ bool with_comments = true;
+ /// Whether to write MTZ history records in comments.
+ bool with_history = true;
+ /// Skip reflections where all selected numeric columns are missing.
+ bool skip_empty = false;
+ /// Skip unmerged reflections with sigma(I) < 0.
+ bool skip_negative_sigi = false;
+ /// Write _diffrn_orient_matrix.UB orientation matrix.
+ bool enable_UB = false;
+ /// Write _reflns.pdbx_aniso_B_tensor_* Starraniso B-tensor (if available).
+ bool write_staraniso_tensor = true;
+ /// Write PDB-specific special marker for validation.
bool write_special_marker_for_pdb = false;
- int less_anomalous = 0; // skip (+)/(-) columns even if in spec
- std::string skip_empty_cols; // columns used to determine "emptiness"
- double wavelength = NAN; // user-specified wavelength
- int trim = 0; // output only reflections -N<=h,k,l<=N
- int free_flag_value = -1; // -1 = auto: 0 or (if we have >50% of 0's) 1
- std::string staraniso_version; // for _software.version in "special_marker"
- std::string gemmi_run_from; // added to gemmi as _software.description
+ /// If non-zero, skip anomalous (+/-) column pairs.
+ int less_anomalous = 0;
+ /// Columns used to determine if reflection is "empty" (when skip_empty=true).
+ std::string skip_empty_cols;
+ /// User-specified wavelength (NAN means use MTZ value).
+ double wavelength = NAN;
+ /// Trim reflections: output only those with -N<=h,k,l<=N (0 = no trim).
+ int trim = 0;
+ /// Free flag value: -1=auto, 0 or 1=explicit.
+ int free_flag_value = -1;
+ /// Starraniso version string for metadata.
+ std::string staraniso_version;
+ /// Description string appended to gemmi software entry.
+ std::string gemmi_run_from;
+ /// @brief Get default column specification for merged or unmerged data.
+ ///
+ /// The returned spec_lines describe MTZ-to-mmCIF column mapping.
+ /// Format: [?|&|$|H][COLUMN_NAME] [TYPE] [mmCIF_TAG] [FORMAT]
+ /// - ? = optional column (try alternatives separated by |)
+ /// - & = required, uses previous column's result
+ /// - $ = internal (dataset_id, counter)
+ /// - H = required by IUCR standard
+ /// @param for_merged If true, return spec for merged data; else unmerged.
+ /// @return Null-terminated array of spec strings.
static const char** default_spec(bool for_merged) {
static const char* merged[] = {
"H H index_h",
@@ -90,24 +123,56 @@ class GEMMI_DLL MtzToCif {
return for_merged ? merged : unmerged;
}
+ /// @brief Write MTZ reflection data to CIF format.
+ /// @param mtz First MTZ dataset (required).
+ /// @param mtz2 Optional second MTZ dataset for anomalous comparison.
+ /// @param staraniso_b Optional Starraniso B-tensor to include.
+ /// @param os Output stream for CIF file.
void write_cif(const Mtz& mtz, const Mtz* mtz2,
SMat33* staraniso_b, std::ostream& os);
+ /// @brief Write XDS reflection data to CIF format.
+ /// @param xds XDS_ASCII data to convert.
+ /// @param os Output stream for CIF file.
void write_cif_from_xds(const XdsAscii& xds, std::ostream& os) const;
};
+/// @brief Write Starraniso B-tensor to mmCIF format.
+/// @param b 3x3 symmetric B-tensor matrix.
+/// @param entry_id Entry identifier for tags.
+/// @param buf Temporary buffer for formatting.
+/// @param os Output stream.
GEMMI_DLL void write_staraniso_b_in_mmcif(const SMat33& b,
const std::string& entry_id,
char* buf, std::ostream& os);
-/// remove '_dataset_name' that can be appended to column names in ccp4i
+/// @brief Remove '_dataset_name' appendix from MTZ column labels.
+///
+/// This suffix is sometimes added by CCP4i and needs removal for proper conversion.
+/// @param mtz MTZ file to modify.
+/// @param logger For reporting changes.
GEMMI_DLL void remove_appendix_from_column_names(Mtz& mtz, const Logger& logger);
+/// @brief Validate merged MTZ has required columns for PDB deposition.
+/// @param mtz MTZ to check.
+/// @param logger For reporting results.
+/// @return True if all required columns present.
GEMMI_DLL bool validate_merged_mtz_deposition_columns(const Mtz& mtz, const Logger& logger);
-// note: both mi and ui get modified
+/// @brief Validate merged intensity data for consistency and quality.
+///
+/// Compares merged and unmerged intensity columns for anomalous differences and completeness.
+/// Modifies both Intensities objects.
+/// @param mi Merged intensities (modified).
+/// @param ui Unmerged intensities (modified).
+/// @param relaxed_check If true, apply looser validation criteria.
+/// @param logger For reporting issues.
+/// @return True if validation passes.
GEMMI_DLL bool validate_merged_intensities(Intensities& mi, Intensities& ui,
bool relaxed_check, const Logger& logger);
+/// @brief Extract software information from MTZ history records.
+/// @param history Vector of history strings from MTZ file.
+/// @return Vector of SoftwareItem objects describing processing steps.
GEMMI_DLL std::vector
get_software_from_mtz_history(const std::vector& history);
diff --git a/include/gemmi/refln.hpp b/include/gemmi/refln.hpp
index d74cc9939..c24df4c18 100644
--- a/include/gemmi/refln.hpp
+++ b/include/gemmi/refln.hpp
@@ -15,19 +15,38 @@
namespace gemmi {
+/// @file
+/// @brief Structure and accessors for reflection data from SF-mmCIF files.
+
+/// @brief Wrapper for reflection data block from mmCIF file.
+///
+/// Provides column access and HKL index management for reflection data
+/// stored in CIF loops (either merged _refln or unmerged _diffrn_refln categories).
struct ReflnBlock {
+ /// CIF data block containing reflection data.
cif::Block block;
+ /// Entry identifier from _entry.id tag.
std::string entry_id;
+ /// Unit cell parameters.
UnitCell cell;
+ /// Pointer to space group; nullptr if unknown.
const SpaceGroup* spacegroup = nullptr;
+ /// X-ray wavelength in Angstroms (0 if multiple wavelengths).
double wavelength;
+ /// Number of wavelengths in the data block.
int wavelength_count;
+ /// Pointer to _refln loop (merged data) or nullptr if absent.
cif::Loop* refln_loop = nullptr;
+ /// Pointer to _diffrn_refln loop (unmerged data) or nullptr if absent.
cif::Loop* diffrn_refln_loop = nullptr;
+ /// Points to active loop (refln_loop or diffrn_refln_loop).
cif::Loop* default_loop = nullptr;
+ /// Default constructor (empty block).
ReflnBlock() = default;
+ /// Move constructor.
ReflnBlock(ReflnBlock&& rblock_) = default;
+ /// Construct from CIF block; extracts cell, spacegroup, wavelength, and reflection loops.
ReflnBlock(cif::Block&& block_) : block(std::move(block_)) {
entry_id = cif::as_string(block.find_value("_entry.id"));
impl::set_cell_from_mmcif(block, cell);
@@ -43,7 +62,9 @@ struct ReflnBlock {
diffrn_refln_loop = block.find_loop("_diffrn_refln.index_h").get_loop();
default_loop = refln_loop ? refln_loop : diffrn_refln_loop;
}
+ /// Move assignment.
ReflnBlock& operator=(ReflnBlock&&) = default;
+ /// Copy assignment (deep copy of block and pointers).
ReflnBlock& operator=(const ReflnBlock& o) {
if (this == &o)
return *this;
@@ -61,19 +82,28 @@ struct ReflnBlock {
return *this;
}
+ /// @brief Check if block contains valid reflection data.
+ /// @return True if default_loop is set and not null.
bool ok() const { return default_loop != nullptr; }
+ /// @brief Throw exception if block is not valid.
void check_ok() const { if (!ok()) fail("Invalid ReflnBlock"); }
- // position after "_refln." or "_diffrn_refln."
+ /// @brief Get offset to tag name (after "_refln." or "_diffrn_refln.").
+ /// @return 7 for merged, 14 for unmerged.
size_t tag_offset() const { return default_loop == refln_loop ? 7 : 14; }
+ /// @brief Switch between merged and unmerged reflection loops.
+ /// @param unmerged If true, use _diffrn_refln loop; otherwise use _refln.
void use_unmerged(bool unmerged) {
default_loop = unmerged ? diffrn_refln_loop : refln_loop;
}
+ /// @brief Check if active loop is merged reflection data.
bool is_merged() const { return ok() && default_loop == refln_loop; }
- // deprecated
+ /// @brief Check if active loop is unmerged reflection data (deprecated).
bool is_unmerged() const { return ok() && default_loop == diffrn_refln_loop; }
+ /// @brief Get list of column labels (without category prefix).
+ /// @return Vector of tag names from the active loop.
std::vector column_labels() const {
check_ok();
std::vector labels(default_loop->tags.size());
@@ -82,6 +112,9 @@ struct ReflnBlock {
return labels;
}
+ /// @brief Find column index by tag name (without category prefix).
+ /// @param tag Column name (e.g., "index_h", "F_meas_sigma_au").
+ /// @return Column index, or -1 if not found.
int find_column_index(const std::string& tag) const {
if (!ok())
return -1;
@@ -92,6 +125,10 @@ struct ReflnBlock {
return -1;
}
+ /// @brief Get column index by tag name, throwing exception if not found.
+ /// @param tag Column name.
+ /// @return Column index.
+ /// @throws gemmi::fail if column does not exist.
size_t get_column_index(const std::string& tag) const {
int idx = find_column_index(tag);
if (idx == -1) {
@@ -103,6 +140,11 @@ struct ReflnBlock {
return idx;
}
+ /// @brief Extract column values as typed vector.
+ /// @tparam T Value type (e.g., int, double).
+ /// @param tag Column name.
+ /// @param null Default value for missing data.
+ /// @return Vector of converted values.
template
std::vector make_vector(const std::string& tag, T null) const {
size_t n = get_column_index(tag);
@@ -112,12 +154,16 @@ struct ReflnBlock {
return v;
}
+ /// @brief Get column indices for h, k, l indices.
+ /// @return Array of 3 column indices for index_h, index_k, index_l.
std::array get_hkl_column_indices() const {
return {{get_column_index("index_h"),
get_column_index("index_k"),
get_column_index("index_l")}};
}
+ /// @brief Extract Miller indices from all reflections.
+ /// @return Vector of Miller indices.
std::vector make_miller_vector() const {
auto hkl_idx = get_hkl_column_indices();
std::vector v(default_loop->length());
@@ -127,6 +173,9 @@ struct ReflnBlock {
return v;
}
+ /// @brief Calculate 1/d^2 for all reflections using unit cell parameters.
+ /// @return Vector of 1/d^2 values.
+ /// @throws gemmi::fail if unit cell is not set.
std::vector make_1_d2_vector() const {
if (!cell.is_crystal() || cell.a <= 0)
fail("Unit cell is not known");
@@ -141,6 +190,8 @@ struct ReflnBlock {
return r;
}
+ /// @brief Calculate d-spacing for all reflections using unit cell parameters.
+ /// @return Vector of d-spacing values in Angstroms.
std::vector make_d_vector() const {
std::vector vec = make_1_d2_vector();
for (double& d : vec)
@@ -149,7 +200,12 @@ struct ReflnBlock {
}
};
-// moves blocks from the argument to the return value
+/// @brief Convert CIF blocks to ReflnBlocks, propagating cell and spacegroup info.
+///
+/// Fills in missing cell or spacegroup data by copying from the first block
+/// that contains it. Moves blocks from input to output.
+/// @param blocks Input CIF blocks (consumed).
+/// @return Vector of ReflnBlocks.
inline
std::vector as_refln_blocks(std::vector&& blocks) {
std::vector rvec;
@@ -175,8 +231,15 @@ std::vector as_refln_blocks(std::vector&& blocks) {
return rvec;
}
-// Get the first (merged) block with required labels.
-// Optionally, block name can be specified.
+/// @brief Find the first merged reflection block containing specified columns.
+///
+/// Searches blocks for one with _refln loop and required column labels.
+/// Propagates spacegroup from first block if needed.
+/// @param blocks Input CIF blocks (consumed).
+/// @param labels Required column names (empty string means optional).
+/// @param block_name Optional: if provided, only this named block is considered.
+/// @return The matching ReflnBlock.
+/// @throws gemmi::fail if no matching block or required columns not found.
inline ReflnBlock get_refln_block(std::vector&& blocks,
const std::vector& labels,
const char* block_name=nullptr) {
@@ -209,6 +272,9 @@ inline ReflnBlock get_refln_block(std::vector&& blocks,
fail("Tags not found in SF-mmCIF file: _refln.", join_str(labels, ", _refln."));
}
+/// @brief Convert CIF block from non-mmCIF format to ReflnBlock.
+/// @param block Input CIF block (e.g., from hkl file).
+/// @return ReflnBlock with data from _refln_index_h loop.
inline ReflnBlock hkl_cif_as_refln_block(cif::Block& block) {
ReflnBlock rb;
rb.block.swap(block);
@@ -224,29 +290,47 @@ inline ReflnBlock hkl_cif_as_refln_block(cif::Block& block) {
return rb;
}
-// Abstraction of data source, cf. MtzDataProxy.
+/// @brief Generic data source abstraction over a ReflnBlock for row iteration.
+///
+/// Provides uniform interface for accessing reflection columns (similar to MtzDataProxy).
struct ReflnDataProxy {
+ /// Reference to underlying ReflnBlock.
const ReflnBlock& rb_;
+ /// Cached indices for h, k, l columns.
std::array hkl_cols_;
+ /// Initialize proxy from a ReflnBlock.
explicit ReflnDataProxy(const ReflnBlock& rb)
: rb_(rb), hkl_cols_(rb_.get_hkl_column_indices()) {}
+ /// Number of columns (values per reflection).
size_t stride() const { return loop().tags.size(); }
+ /// Total number of values (stride * reflection count).
size_t size() const { return loop().values.size(); }
+ /// Numeric value type.
using num_type = double;
+ /// Get numeric value at flattened loop index.
double get_num(size_t n) const { return cif::as_number(loop().values[n]); }
+ /// Get unit cell.
const UnitCell& unit_cell() const { return rb_.cell; }
+ /// Get spacegroup.
const SpaceGroup* spacegroup() const { return rb_.spacegroup; }
+ /// Get Miller indices at given offset in loop.
Miller get_hkl(size_t offset) const {
return {{get_int(offset + hkl_cols_[0]),
get_int(offset + hkl_cols_[1]),
get_int(offset + hkl_cols_[2])}};
}
+ /// Get column index by label.
size_t column_index(const std::string& label) const { return rb_.get_column_index(label); }
private:
+ /// Get active loop (with bounds check).
const cif::Loop& loop() const { rb_.check_ok(); return *rb_.default_loop; }
+ /// Get integer value at flattened loop index.
int get_int(size_t n) const { return cif::as_int(loop().values[n]); }
};
+/// @brief Create a data proxy over a ReflnBlock.
+/// @param rb ReflnBlock to wrap.
+/// @return ReflnDataProxy for generic access.
inline ReflnDataProxy data_proxy(const ReflnBlock& rb) { return ReflnDataProxy(rb); }
} // namespace gemmi
diff --git a/include/gemmi/xds2mtz.hpp b/include/gemmi/xds2mtz.hpp
index af142a4e3..826c6268b 100644
--- a/include/gemmi/xds2mtz.hpp
+++ b/include/gemmi/xds2mtz.hpp
@@ -12,6 +12,17 @@
namespace gemmi {
+/// @file
+/// @brief Converter for XDS reflection data to MTZ format.
+
+/// @brief Convert XDS reflection data to MTZ format.
+///
+/// For unmerged data, creates unmerged MTZ with batch headers matching Pointless output.
+/// For merged data, uses Intensities class to prepare merged MTZ.
+/// Sets up all standard MTZ columns: H, K, L, M/ISYM, BATCH, I, SIGI, XDET, YDET, ROT,
+/// plus optional FRACTIONCALC, LP, CORR, and MAXC columns depending on XDS read_columns.
+/// @param xds XDS data to convert (modified).
+/// @return Populated MTZ object sorted by reflection index.
inline Mtz xds_to_mtz(XdsAscii& xds) {
if (xds.is_merged()) {
Intensities intensities;
diff --git a/include/gemmi/xds_ascii.hpp b/include/gemmi/xds_ascii.hpp
index 38b7a93f9..1f4ed8ceb 100644
--- a/include/gemmi/xds_ascii.hpp
+++ b/include/gemmi/xds_ascii.hpp
@@ -11,76 +11,138 @@
namespace gemmi {
-// from Pointless docs: likely in-house source, in which case
-// the unpolarised value is left unchanged (recognised wavelengths
-// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
+/// @file
+/// @brief Reader for XDS_ASCII.HKL and INTEGRATE.HKL reflection files.
+
+/// @brief Check if wavelength likely comes from in-house X-ray source.
+///
+/// Based on Pointless documentation; recognizes Cu, Mo, and Cr K-alpha
+/// wavelengths with their typical uncertainties.
+/// @param wavelength Wavelength in Angstroms.
+/// @return True if wavelength matches Cu, Mo, or Cr.
inline bool likely_in_house_source(double wavelength) {
return std::fabs(wavelength - 1.5418) < 0.0019 ||
std::fabs(wavelength - 0.7107) < 0.0002 ||
std::fabs(wavelength - 2.29) < 0.01;
}
+/// @brief Metadata for XDS reflection data (base class).
struct XdsAsciiMetadata {
+ /// @brief Properties of one integration set (dataset/sweep).
struct Iset {
+ /// Integration set ID.
int id;
+ /// Input file name for this set.
std::string input_file;
+ /// Wavelength in Angstroms (0 if not specified).
double wavelength = 0.;
+ /// Unit cell constants [a, b, c, alpha, beta, gamma].
std::array cell_constants = {0., 0., 0., 0., 0., 0.};
- //statistics set by gather_iset_statistics()
+ /// Minimum frame number (set by gather_iset_statistics).
int frame_number_min = -1;
+ /// Maximum frame number (set by gather_iset_statistics).
int frame_number_max = -1;
+ /// Total number of distinct frames in set.
int frame_count = -1;
+ /// Number of reflections in this set.
int reflection_count = -1;
+ /// @brief Construct Iset with given ID.
Iset(int id_) : id(id_) {}
};
+ /// Source file path.
std::string source_path;
- int read_columns = 0; // doesn't include ITEM_ISET from XSCALE
+ /// Number of columns read from DATA section (0-13, excludes ITEM_ISET from XSCALE).
+ int read_columns = 0;
+ /// Space group number from XDS_ASCII header.
int spacegroup_number = 0;
+ /// X-ray wavelength in Angstroms.
double wavelength = 0.;
+ /// Unit cell constants [a, b, c, alpha, beta, gamma].
std::array cell_constants = {0., 0., 0., 0., 0., 0.};
+ /// Unit cell matrix: rows are a*, b*, c* axes (reciprocal lattice).
Mat33 cell_axes{0.};
+ /// Incident beam direction (usually normalized).
Vec3 incident_beam_dir;
+ /// Oscillation range per frame in degrees.
double oscillation_range = 0.;
+ /// Rotation axis direction.
Vec3 rotation_axis;
+ /// Starting angle for rotation in degrees.
double starting_angle = 0.;
+ /// Mosaicity/reflecting range standard deviation in degrees.
double reflecting_range_esd = 0.;
+ /// Friedel's law assumption: '\0' unknown, 'T'rue, 'F'alse.
char friedels_law = '\0';
+ /// First frame number.
int starting_frame = 1;
- int nx = 0; // detector size - number of pixels
+ /// Detector width in pixels.
+ int nx = 0;
+ /// Detector height in pixels.
int ny = 0;
- double qx = 0.; // pixel size in mm
+ /// Pixel size in x-direction (mm).
+ double qx = 0.;
+ /// Pixel size in y-direction (mm).
double qy = 0.;
+ /// Detector origin in x-direction (mm).
double orgx = 0.;
+ /// Detector origin in y-direction (mm).
double orgy = 0.;
+ /// Distance from sample to detector (mm).
double detector_distance = 0.;
+ /// Program name that generated the file (e.g., "XDS").
std::string generated_by;
+ /// Version string of generating program.
std::string version_str;
+ /// Integration sets (for multi-sweep data).
std::vector isets;
};
+/// @brief Container for XDS reflection data (XDS_ASCII.HKL or INTEGRATE.HKL).
+///
+/// Stores integration metadata and per-reflection measurements from XDS output.
+/// Supports both merged and unmerged data depending on read_columns value.
struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
+ /// @brief One reflection record from XDS data.
struct Refl {
+ /// Miller indices (h, k, l).
Miller hkl;
+ /// Integration set ID (dataset number).
int iset = 1;
+ /// Integrated intensity.
double iobs;
+ /// Standard deviation of iobs.
double sigma;
+ /// Detector position x (mm).
double xd;
+ /// Detector position y (mm).
double yd;
+ /// Frame number (zd) relative to starting frame (can be negative).
double zd;
+ /// Reciprocal lattice point (RLP) value; related to partiality.
double rlp;
+ /// Peak intensity percentage (0-10000 for 0-100%).
double peak;
- double corr; // is it always integer?
+ /// Correction factor (Lorentz-polarization etc; usually 100-150).
+ double corr;
+ /// Maximum pixel value in reflection.
double maxc;
- // ZD can be negative for a few reflections
+ /// @brief Get frame number (rounded up from zd).
+ /// @return Frame number (zd is negative-friendly).
int frame() const { return (int) std::floor(zd + 1); }
};
+ /// All reflection records.
std::vector data;
+ /// @brief Default constructor.
XdsAscii() = default;
+ /// @brief Construct with existing metadata.
XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {}
+ /// @brief Get existing or create new integration set.
+ /// @param id Integration set ID.
+ /// @return Reference to Iset with given ID.
Iset& find_or_add_iset(int id) {
for (Iset& i : isets)
if (i.id == id)
@@ -88,24 +150,40 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
isets.emplace_back(id);
return isets.back();
}
+ /// @brief Read XDS file from stream.
+ /// @param reader Input stream handler.
+ /// @param source File path (for error messages).
void read_stream(AnyStream& reader, const std::string& source);
+ /// @brief Read XDS file from input object (file or stdin).
+ /// @tparam T Input object with create_stream() and path() methods.
+ /// @param input Input object.
template
void read_input(T&& input) {
read_stream(*input.create_stream(), input.path());
}
+ /// @brief Check if data is merged (few columns in XDS file).
+ /// @return True if read_columns < 8 (no per-reflection BATCH info).
bool is_merged() const { return read_columns < 8; }
- // set a few Iset properties in isets
+ /// @brief Calculate frame number statistics for each integration set.
+ ///
+ /// Sets frame_number_min, frame_number_max, frame_count, and
+ /// reflection_count for each Iset.
void gather_iset_statistics();
+ /// @brief Calculate rotation angle for a reflection.
+ /// @param refl Reflection record with zd frame number.
+ /// @return Rotation angle in degrees.
double rot_angle(const Refl& refl) const {
double z = refl.zd - starting_frame + 1;
return starting_angle + oscillation_range * z;
}
- // it's already normalized, but just in case normalize it again
+ /// @brief Get normalized rotation axis.
+ /// @return Normalized rotation_axis vector.
+ /// @throws gemmi::fail if rotation_axis is zero.
Vec3 get_rotation_axis() const {
double length = rotation_axis.length();
if (length == 0)
@@ -113,7 +191,9 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
return rotation_axis / length;
}
- // I'm not sure if always |incident_beam_dir| == 1/wavelength
+ /// @brief Get normalized incident beam direction (S0).
+ /// @return Normalized incident_beam_dir vector.
+ /// @throws gemmi::fail if incident_beam_dir is zero.
Vec3 get_s0_direction() const {
double length = incident_beam_dir.length();
if (length == 0)
@@ -121,6 +201,8 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
return incident_beam_dir / length;
}
+ /// @brief Check if reciprocal lattice vectors (cell_axes) are set.
+ /// @return True if all 3 reciprocal axes are non-zero.
bool has_cell_axes() const {
for (int i = 0; i < 3; ++i)
if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
@@ -128,12 +210,15 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
return true;
}
- /// Return transition matrix from "Cambridge" frame to XDS frame.
- /// x_xds = M x_cam
+ /// @brief Calculate transformation matrix from Cambridge frame to XDS frame.
+ ///
+ /// Cambridge frame: z along rotation axis, x along incident beam.
+ /// @return 3x3 matrix such that x_xds = M * x_cambridge.
+ /// @throws gemmi::fail if geometry data missing.
Mat33 calculate_conversion_from_cambridge() const {
// Cambridge z direction is along the principal rotation axis
Vec3 z = get_rotation_axis();
- // Cambridge z direction is along beam
+ // Cambridge x direction is along beam
Vec3 x = get_s0_direction();
Vec3 y = z.cross(x).normalized();
// beam and rotation axis may not be orthogonal
@@ -141,6 +226,9 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
return Mat33::from_columns(x, y, z);
}
+ /// @brief Calculate crystal orientation matrix U.
+ /// @return 3x3 orientation matrix.
+ /// @throws gemmi::fail if cell_axes not set.
Mat33 get_orientation() const {
if (!has_cell_axes())
fail("unknown unit cell axes");
@@ -154,21 +242,31 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
return Mat33::from_columns(ar, br, cr);
}
- /// \par p is degree of polarization from range (0,1), as used in XDS.
+ /// @brief Apply polarization correction to all intensities and sigmas.
+ ///
+ /// Based on incident beam direction, rotation axis, and polarization plane.
+ /// Assumes XDS file has unpolarized beam correction already applied.
+ /// @param p Degree of polarization in [0, 1] (0.5 for unpolarized).
+ /// @param normal Normal vector to polarization plane.
void apply_polarization_correction(double p, Vec3 normal);
- /// \par overload is maximally allowed pixel value in a peak (MAXC).
+ /// @brief Remove reflections with peak pixel value exceeding threshold.
+ /// @param overload Maximum allowed MAXC pixel value.
void eliminate_overloads(double overload) {
vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
}
- /// \par batchmin lowest allowed batch number.
+ /// @brief Remove reflections with frame number below threshold.
+ /// @param batchmin Minimum frame number to keep.
void eliminate_batchmin(int batchmin) {
double minz = batchmin - 1;
vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
}
};
+/// @brief Read XDS_ASCII file from path.
+/// @param path File path.
+/// @return Populated XdsAscii object.
inline XdsAscii read_xds_ascii_file(const std::string& path) {
XdsAscii ret;
FileStream stream(path.c_str(), "rb");
@@ -176,7 +274,9 @@ inline XdsAscii read_xds_ascii_file(const std::string& path) {
return ret;
}
-/// read possibly gzipped file
+/// @brief Read XDS_ASCII file, handling gzip compression.
+/// @param path File path (may be .gz).
+/// @return Populated XdsAscii object.
GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path);
} // namespace gemmi