diff --git a/docs/api.rst b/docs/api.rst
new file mode 100644
index 000000000..19b248015
--- /dev/null
+++ b/docs/api.rst
@@ -0,0 +1,63 @@
+.. _api:
+
+C++ API Reference
+#################
+
+This page documents the Gemmi C++ library API, generated from Doxygen comments
+in ``include/gemmi/*.hpp``. It is updated with each pull request in the
+API documentation series.
+
+For the Python API, see the `Python API reference <https://project-gemmi.github.io/python-api/>`_.
+
+.. note::
+
+   Documentation coverage is being added incrementally. Headers not yet
+   listed here will appear in subsequent pull requests.
+
+Core Data Structures
+--------------------
+
+*(Full documentation added in PR 2.)*
+
+.. doxygenfile:: model.hpp
+   :project: gemmi
+
+Reflection Data
+---------------
+
+*(Full documentation added in PR 5.)*
+
+.. doxygenfile:: mtz.hpp
+   :project: gemmi
+
+.. doxygenfile:: refln.hpp
+   :project: gemmi
+
+.. doxygenfile:: cif2mtz.hpp
+   :project: gemmi
+
+.. doxygenfile:: mtz2cif.hpp
+   :project: gemmi
+
+.. doxygenfile:: xds_ascii.hpp
+   :project: gemmi
+
+.. doxygenfile:: xds2mtz.hpp
+   :project: gemmi
+
+.. doxygenfile:: intensit.hpp
+   :project: gemmi
+
+.. doxygenfile:: binner.hpp
+   :project: gemmi
+
+.. doxygenfile:: asudata.hpp
+   :project: gemmi
+
+Map and Grid Data
+-----------------
+
+*(Stub — full documentation added in PR 6.)*
+
+.. doxygenfile:: grid.hpp
+   :project: gemmi
diff --git a/include/gemmi/asudata.hpp b/include/gemmi/asudata.hpp
index ccb44ee09..aacf67561 100644
--- a/include/gemmi/asudata.hpp
+++ b/include/gemmi/asudata.hpp
@@ -1,6 +1,7 @@
 // Copyright 2020 Global Phasing Ltd.
 //
-// AsuData for storing reflection data.
+/// @file
+/// @brief AsuData template for storing per-HKL reflection data in asymmetric unit.
 
 #ifndef GEMMI_ASUDATA_HPP_
 #define GEMMI_ASUDATA_HPP_
@@ -14,13 +15,18 @@
 
 namespace gemmi {
 
+/// @brief Correlation calculation for complex-valued reflection data.
+/// Accumulates running statistics for complex numbers (e.g., structure factors).
 struct ComplexCorrelation {
-  int n = 0;
-  double sum_xx = 0.;
-  double sum_yy = 0.;
-  std::complex<double> sum_xy = 0.;
-  std::complex<double> mean_x = 0.;
-  std::complex<double> mean_y = 0.;
+  int n = 0;                          ///< Number of points accumulated
+  double sum_xx = 0.;                 ///< Sum of |x|^2
+  double sum_yy = 0.;                 ///< Sum of |y|^2
+  std::complex<double> sum_xy = 0.;   ///< Sum of (x - mean_x) * conj(y - mean_y)
+  std::complex<double> mean_x = 0.;   ///< Running mean of x
+  std::complex<double> mean_y = 0.;   ///< Running mean of y
+  /// @brief Add a complex-valued point pair to the correlation.
+  /// @param x First complex value
+  /// @param y Second complex value
   void add_point(std::complex<double> x, std::complex<double> y) {
     ++n;
     double inv_n = 1.0 / n;
@@ -33,15 +39,27 @@ struct ComplexCorrelation {
     mean_x += dx * inv_n;
     mean_y += dy * inv_n;
   }
+  /// @brief Add a complex-valued point pair (float version).
+  /// @param x First complex value
+  /// @param y Second complex value
   void add_point(std::complex<float> x, std::complex<float> y) {
     add_point(std::complex<double>(x), std::complex<double>(y));
   }
+  /// @brief Compute correlation coefficient from accumulated statistics.
   std::complex<double> coefficient() const { return sum_xy / std::sqrt(sum_xx * sum_yy); }
+  /// @brief Compute ratio of mean magnitudes.
   double mean_ratio() const { return std::abs(mean_y) / std::abs(mean_x); }
 };
 
 
-/// \pre a and b are sorted
+/// @brief Apply a function to matching reflections from two sorted lists.
+/// Iterates through reflections with matching HKLs in both lists and calls func.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam Func Callable type taking (const T&, const T&).
+/// @tparam T Reflection data type (must have hkl member and operator<).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @param func Function to call for each matching pair.
 template<typename Func, typename T>
 void for_matching_reflections(const std::vector<T>& a,
                               const std::vector<T>& b,
@@ -61,7 +79,12 @@ void for_matching_reflections(const std::vector<T>& a,
   }
 }
 
-/// \pre a and b are sorted
+/// @brief Calculate correlation of intensity values between two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return Correlation object computed from matching HKLs.
 template<typename T>
 Correlation calculate_hkl_value_correlation(const std::vector<T>& a,
                                             const std::vector<T>& b) {
@@ -72,7 +95,12 @@ Correlation calculate_hkl_value_correlation(const std::vector<T>& a,
   return cor;
 }
 
-/// \pre a and b are sorted
+/// @brief Calculate correlation of complex-valued reflection data between two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return ComplexCorrelation object computed from matching HKLs.
 template<typename T>
 ComplexCorrelation calculate_hkl_complex_correlation(const std::vector<T>& a,
                                                      const std::vector<T>& b) {
@@ -83,7 +111,12 @@ ComplexCorrelation calculate_hkl_complex_correlation(const std::vector<T>& a,
   return cor;
 }
 
-/// \pre a and b are sorted
+/// @brief Count matching reflections with identical values in two sorted lists.
+/// @pre Vectors a and b are sorted by HKL.
+/// @tparam T Reflection data type (must have hkl and value members).
+/// @param a First reflection list.
+/// @param b Second reflection list.
+/// @return Number of matching HKLs with equal values.
 template<typename T>
 int count_equal_values(const std::vector<T>& a, const std::vector<T>& b) {
   int count = 0;
@@ -94,31 +127,44 @@ int count_equal_values(const std::vector<T>& a, const std::vector<T>& b) {
   return count;
 }
 
+/// @brief Miller index paired with a generic value.
+/// Used as the basic element in AsuData containers.
+/// @tparam T Value type (float, double, complex, etc.).
 template<typename T>
 struct HklValue {
-  alignas(8) Miller hkl;
-  T value;
+  alignas(8) Miller hkl;  ///< Miller indices (h, k, l)
+  T value;                ///< Associated value (intensity, structure factor, etc.)
 
+  /// @brief Compare with Miller index.
   bool operator<(const Miller& m) const { return hkl < m; }
+  /// @brief Compare with another HklValue by their HKL indices.
   bool operator<(const HklValue& o) const { return operator<(o.hkl); }
 };
 
+/// @brief Value paired with its uncertainty (sigma/standard deviation).
+/// @tparam T Numeric type (float, double, etc.).
 template<typename T>
 struct ValueSigma {
   using value_type = T;
-  T value, sigma;
+  T value;  ///< The measured or calculated value
+  T sigma;  ///< Standard deviation or uncertainty
 
+  /// @brief Check equality of both value and sigma.
   bool operator==(const ValueSigma& o) const {
     return value == o.value && sigma == o.sigma;
   }
 };
 
+/// @brief Implementation functions for moving reflections to asymmetric unit.
 namespace impl {
+/// @brief Generic move_to_asu for real-valued data (no phase adjustment).
 template<typename T>
 void move_to_asu(const GroupOps&, const Miller& hkl, int, HklValue<T>& hkl_value) {
   hkl_value.hkl = hkl;
 }
 
+/// @brief Specialized move_to_asu for complex-valued data (applies phase shift).
+/// Applies phase shift from symmetry operator and conjugation for isym % 2 == 0.
 template<typename R>
 void move_to_asu(const GroupOps& gops, const Miller& hkl, int isym,
                  HklValue<std::complex<R>>& v) {
@@ -135,12 +181,17 @@ void move_to_asu(const GroupOps& gops, const Miller& hkl, int isym,
 }
 } // namespace impl
 
+/// @brief Generic container for reflection data in asymmetric unit.
+/// Stores values (e.g., structure factors, phases, intensities) indexed by Miller indices.
+/// Keeps data sorted by HKL and can enforce ASU constraints.
+/// @tparam T Value type (float, double, complex, ValueSigma<>, etc.).
 template<typename T>
 struct AsuData {
-  std::vector<HklValue<T>> v;
-  UnitCell unit_cell_;
-  const SpaceGroup* spacegroup_ = nullptr;
-  // function defining FPhiProxy interface
+  std::vector<HklValue<T>> v;                  ///< Reflection data (HKL + value pairs)
+  UnitCell unit_cell_;                        ///< Unit cell parameters
+  const SpaceGroup* spacegroup_ = nullptr;    ///< Space group (not owned by this object)
+
+  // FPhiProxy interface compatibility methods
   size_t stride() const { return 1; }
   size_t size() const { return v.size(); }
   Miller get_hkl(size_t n) const { return v[n].hkl; }
@@ -148,11 +199,17 @@ struct AsuData {
   double get_phi(size_t n) const { return std::arg(v[n].value); }
   const UnitCell& unit_cell() const { return unit_cell_; }
   const SpaceGroup* spacegroup() const { return spacegroup_; }
+
+  /// @brief Sort reflections by HKL indices if not already sorted.
   void ensure_sorted() {
     if (!std::is_sorted(v.begin(), v.end()))
       std::sort(v.begin(), v.end());
   }
 
+  /// @brief Transform all reflections to the asymmetric unit.
+  /// Moves reflections outside ASU to their equivalent inside, applying
+  /// symmetry operators and phase shifts as needed for complex values.
+  /// @param tnt_asu If true, use TNT-style ASU; otherwise use standard ASU.
   void ensure_asu(bool tnt_asu=false) {
     if (!spacegroup_)
       fail("AsuData::ensure_asu(): space group not set");
@@ -167,7 +224,13 @@ struct AsuData {
     }
   }
 
-  // load values from one column
+  /// @brief Load values from a single data column.
+  /// Reads HKLs and values from proxy, filters NaN, converts to ASU, and sorts.
+  /// @tparam DataProxy Type with column_index(), unit_cell(), spacegroup(),
+  ///                    size(), stride(), get_hkl(), get_num() interface.
+  /// @param proxy Data proxy object (MTZ, mmCIF, etc.).
+  /// @param label Column name/label to load.
+  /// @param as_is If true, skip ASU conversion and sorting (raw load).
   template<typename DataProxy>
   void load_values(const DataProxy& proxy, const std::string& label,
                    bool as_is=false) {
@@ -185,7 +248,15 @@ struct AsuData {
     }
   }
 
-  // load values from two or more columns
+  /// @brief Load values from multiple columns (for complex, vector, or sigma pairs).
+  /// Reads N columns per reflection and combines them into a single value.
+  /// Filters out reflections with any NaN in the N columns.
+  /// @tparam N Number of columns to load (2 for complex/F+sigma, etc.).
+  /// @tparam DataProxy Type with column_index(), unit_cell(), spacegroup(),
+  ///                    size(), stride(), get_hkl(), get_num() interface.
+  /// @param proxy Data proxy object (MTZ, mmCIF, etc.).
+  /// @param labels Array of N column names/labels to load.
+  /// @param as_is If true, skip ASU conversion and sorting (raw load).
   template<int N, typename DataProxy>
   void load_values(const DataProxy& proxy, const std::array<std::string,N>& labels,
                    bool as_is=false) {
@@ -212,14 +283,17 @@ struct AsuData {
   }
 
 private:
-  // for T being a number, std::array and std::complex, respectively:
+  /// @brief Helper to convert numeric array(s) to T.
+  /// Overloaded for: scalar, array<1>, complex (F+phase), ValueSigma (F+sigma).
   static void set_value_from_array(T& val, const std::array<T,1>& nums) { val = nums[0]; }
   static void set_value_from_array(T& val, const T& nums) { val = nums; }
+  /// @brief Convert (F, phi) pair to complex structure factor.
   template<typename R>
   static void set_value_from_array(std::complex<R>& val, const std::array<R,2>& nums) {
     R theta = (R)rad(nums[1]);
     val = {nums[0] * std::cos(theta), nums[0] * std::sin(theta)};
   }
+  /// @brief Convert (value, sigma) pair to ValueSigma.
   template<typename R>
   static void set_value_from_array(ValueSigma<R>& val, const std::array<R,2>& nums) {
     val.value = nums[0];
@@ -227,6 +301,14 @@ struct AsuData {
   }
 };
 
+/// @brief Create AsuData by loading from N columns in a data source.
+/// @tparam T Value type to load into.
+/// @tparam N Number of columns to combine per reflection.
+/// @tparam Data Data source type (MTZ, mmCIF, etc.).
+/// @param data Source data object.
+/// @param labels Array of N column labels to load.
+/// @param as_is If true, skip ASU conversion and sorting.
+/// @return New AsuData object populated with data.
 template<typename T, int N, typename Data>
 AsuData<T> make_asu_data(const Data& data, const std::array<std::string,N>& labels,
                          bool as_is=false) {
@@ -234,6 +316,14 @@ AsuData<T> make_asu_data(const Data& data, const std::array<std::string,N>& labe
   asu_data.template load_values<N>(data_proxy(data), labels, as_is);
   return asu_data;
 }
+
+/// @brief Create AsuData by loading from a single column in a data source.
+/// @tparam T Value type to load into.
+/// @tparam Data Data source type (MTZ, mmCIF, etc.).
+/// @param data Source data object.
+/// @param label Column label to load.
+/// @param as_is If true, skip ASU conversion and sorting.
+/// @return New AsuData object populated with data.
 template<typename T, typename Data>
 AsuData<T> make_asu_data(const Data& data, const std::string& label, bool as_is) {
   AsuData<T> asu_data;
@@ -241,7 +331,11 @@ AsuData<T> make_asu_data(const Data& data, const std::string& label, bool as_is)
   return asu_data;
 }
 
-/// retains only points with positive SIGF and F/SIGF > cutoff
+/// @brief Filter AsuData to retain only reflections with high signal-to-noise.
+/// Removes reflections where sigma <= 0 or value/sigma < cutoff.
+/// @tparam T Numeric type (float, double).
+/// @param asu_data AsuData container with ValueSigma<T> values (in/out).
+/// @param cutoff Minimum value/sigma ratio to retain.
 template<typename T>
 void discard_by_sigma_ratio(AsuData<ValueSigma<T>>& asu_data, double cutoff) {
   vector_remove_if(asu_data.v, [cutoff](const HklValue<ValueSigma<T>>& p) {
diff --git a/include/gemmi/binner.hpp b/include/gemmi/binner.hpp
index 8e7325638..82864bf2e 100644
--- a/include/gemmi/binner.hpp
+++ b/include/gemmi/binner.hpp
@@ -1,6 +1,7 @@
 // Copyright 2022 Global Phasing Ltd.
 //
-// Binning - resolution shells for reflections.
+/// @file
+/// @brief Binner class for organizing reflections into resolution shells.
 
 #ifndef GEMMI_BINNER_HPP_
 #define GEMMI_BINNER_HPP_
@@ -11,14 +12,25 @@
 
 namespace gemmi {
 
+/// @brief Divide reflections into resolution shells (bins) for statistics.
+///
+/// Organizes reflection data into bins by resolution. Supports multiple
+/// binning schemes (equal count, linear/quadratic/cubic in d or 1/d^2).
+/// Provides fast bin lookup for both sorted and unsorted data.
 struct Binner {
+  /// @brief Strategy for dividing reflections into resolution shells.
   enum class Method {
-    EqualCount,
-    Dstar,
-    Dstar2,
-    Dstar3,
+    EqualCount, ///< Bins with approximately equal number of reflections
+    Dstar,      ///< Linear spacing in 1/d (resolution)
+    Dstar2,     ///< Linear spacing in 1/d^2 (squared reciprocal spacing)
+    Dstar3,     ///< Cubic spacing (linear in (1/d^2)^(1/3))
   };
 
+  /// @brief Set up bins using pre-calculated 1/d^2 values.
+  /// @param nbins Number of resolution bins to create.
+  /// @param method Binning scheme (EqualCount, Dstar, Dstar2, Dstar3).
+  /// @param inv_d2 Vector of 1/d^2 values for all reflections (moved from caller).
+  /// @param cell_ Unit cell for resolution calculations (nullptr uses already-set cell).
   void setup_from_1_d2(int nbins, Method method, std::vector<double>&& inv_d2,
                        const UnitCell* cell_) {
     if (nbins < 1)
@@ -84,6 +96,15 @@ struct Binner {
     limits.resize(nbins);
   }
 
+  /// @brief Set up bins from reflection data using DataProxy interface.
+  /// Automatically calculates 1/d^2 values from HKLs and unit cell.
+  /// @tparam DataProxy Type with spacegroup(), unit_cell(), size(), stride(),
+  ///                    get_hkl(), get_num() interface.
+  /// @param nbins Number of resolution bins to create.
+  /// @param method Binning scheme (EqualCount, Dstar, Dstar2, Dstar3).
+  /// @param proxy Data proxy object (typically Intensities or MTZ).
+  /// @param cell_ Unit cell for resolution calculations (nullptr uses proxy.unit_cell()).
+  /// @param col_idx Column index for filtering NaN values (0 = skip filtering).
   template<typename DataProxy>
   void setup(int nbins, Method method, const DataProxy& proxy,
              const UnitCell* cell_=nullptr, size_t col_idx=0) {
@@ -98,12 +119,15 @@ struct Binner {
     setup_from_1_d2(nbins, method, std::move(inv_d2), nullptr);
   }
 
+  /// @brief Check that bins have been set up; throw if not.
   void ensure_limits_are_set() const {
     if (limits.empty())
       fail("Binner not set up");
   }
 
-  // Generic. Method-specific versions could be faster.
+  /// @brief Find bin index for a given 1/d^2 value (binary search, generic).
+  /// @param inv_d2 Squared reciprocal resolution (1/d^2).
+  /// @return Bin index (0 to size()-1).
   int get_bin_from_1_d2(double inv_d2) {
     ensure_limits_are_set();
     auto it = std::lower_bound(limits.begin(), limits.end(), inv_d2);
@@ -111,13 +135,20 @@ struct Binner {
     return int(it - limits.begin());
   }
 
+  /// @brief Find bin index for a reflection by Miller indices.
+  /// @param hkl Miller indices (h,k,l).
+  /// @return Bin index (0 to size()-1).
   int get_bin(const Miller& hkl) {
     double inv_d2 = cell.calculate_1_d2(hkl);
     return get_bin_from_1_d2(inv_d2);
   }
 
-  // We assume that bins are seeked mostly for sorted reflections,
-  // so it's usually either the same bin as previously, or the next one.
+  /// @brief Find bin index using a hint (fast path for sorted reflections).
+  /// Updates hint to the found bin index for the next call.
+  /// Assumes sorted reflections: each bin is same or next to previous.
+  /// @param inv_d2 Squared reciprocal resolution (1/d^2).
+  /// @param hint In/out: previous bin index (input), current bin index (output).
+  /// @return Bin index (0 to size()-1).
   int get_bin_from_1_d2_hinted(double inv_d2, int& hint) const {
     if (inv_d2 <= limits[hint]) {
       while (hint != 0 && limits[hint-1] > inv_d2)
@@ -130,11 +161,20 @@ struct Binner {
     return hint;
   }
 
+  /// @brief Find bin index for a reflection using a hint (fast path).
+  /// @param hkl Miller indices (h,k,l).
+  /// @param hint In/out: previous bin index (input), current bin index (output).
+  /// @return Bin index (0 to size()-1).
   int get_bin_hinted(const Miller& hkl, int& hint) const {
     double inv_d2 = cell.calculate_1_d2(hkl);
     return get_bin_from_1_d2_hinted(inv_d2, hint);
   }
 
+  /// @brief Get bin indices for all reflections in a DataProxy.
+  /// Uses hinting for fast processing of sorted data.
+  /// @tparam DataProxy Type with size(), stride(), get_hkl() interface.
+  /// @param proxy Data proxy object.
+  /// @return Vector of bin indices (length = proxy.size() / proxy.stride()).
   template<typename DataProxy>
   std::vector<int> get_bins(const DataProxy& proxy) const {
     ensure_limits_are_set();
@@ -145,6 +185,11 @@ struct Binner {
     return nums;
   }
 
+  /// @brief Get bin indices for an array of 1/d^2 values.
+  /// Uses hinting for fast processing of sorted data.
+  /// @param inv_d2 Pointer to array of squared reciprocal resolutions.
+  /// @param size Number of values in array.
+  /// @return Vector of bin indices (length = size).
   std::vector<int> get_bins_from_1_d2(const double* inv_d2, size_t size) const {
     ensure_limits_are_set();
     int hint = 0;
@@ -154,24 +199,34 @@ struct Binner {
     return nums;
   }
 
+  /// @brief Get bin indices for a vector of 1/d^2 values.
+  /// @param inv_d2 Vector of squared reciprocal resolutions.
+  /// @return Vector of bin indices (length = inv_d2.size()).
   std::vector<int> get_bins_from_1_d2(const std::vector<double>& inv_d2) const {
     return get_bins_from_1_d2(inv_d2.data(), inv_d2.size());
   }
 
+  /// @brief Get minimum resolution (highest 1/d^2) of a bin.
+  /// @param n Bin index (0 to size()-1).
+  /// @return Minimum resolution d in Angstroms (highest angle, tightest spacing).
   double dmin_of_bin(int n) const {
     return 1. / std::sqrt(n == (int) size() - 1 ? max_1_d2 : limits.at(n));
   }
+  /// @brief Get maximum resolution (lowest 1/d^2) of a bin.
+  /// @param n Bin index (0 to size()-1).
+  /// @return Maximum resolution d in Angstroms (lowest angle, loosest spacing).
   double dmax_of_bin(int n) const {
     return 1. / std::sqrt(n == 0 ? min_1_d2 : limits.at(n-1));
   }
 
+  /// @brief Number of bins (resolution shells).
   size_t size() const { return limits.size(); }
 
-  UnitCell cell;
-  double min_1_d2;
-  double max_1_d2;
-  std::vector<double> limits;  // upper limit of each bin
-  std::vector<double> mids;    // the middle of each bin
+  UnitCell cell;                  ///< Unit cell for calculating 1/d^2 from HKL
+  double min_1_d2;               ///< Minimum 1/d^2 in data (lowest resolution)
+  double max_1_d2;               ///< Maximum 1/d^2 in data (highest resolution)
+  std::vector<double> limits;    ///< Upper limit (1/d^2) of each bin
+  std::vector<double> mids;      ///< Midpoint (1/d^2) of each bin
 };
 
 } // namespace gemmi
diff --git a/include/gemmi/cif2mtz.hpp b/include/gemmi/cif2mtz.hpp
index 4300bb09e..6a6de2d09 100644
--- a/include/gemmi/cif2mtz.hpp
+++ b/include/gemmi/cif2mtz.hpp
@@ -1,3 +1,10 @@
+/// @file
+/// @brief Convert structure factor data from mmCIF to MTZ format
+///
+/// Provides CifToMtz for converting reflection data from PDB/mmCIF format
+/// to CCP4 MTZ binary format. Handles both merged and unmerged data,
+/// including anomalous and old-style anomalous structures.
+
 // Copyright 2021 Global Phasing Ltd.
 //
 // A class for converting SF-mmCIF to MTZ (merged or unmerged).
@@ -20,7 +27,15 @@
 
 namespace gemmi {
 
-// "Old-style" anomalous or unmerged data is expected to have only these tags.
+/// @brief Check if reflection block uses old-style anomalous data format.
+///
+/// "Old-style" anomalous or unmerged data is expected to have only these tags:
+/// index_h/k/l, wavelength_id, crystal_id, scale_group_code, status,
+/// and either (intensity_meas/sigma) or (F_meas_au/sigma).
+///
+/// @param rb ReflnBlock to check
+/// @param data_type Expected data type (Unmerged or Anomalous)
+/// @return true if all tags in the reflection loop match the old-style subset
 inline bool possible_old_style(const ReflnBlock& rb, DataType data_type) {
   if (rb.refln_loop == nullptr)
     return false;
@@ -43,13 +58,21 @@ inline bool possible_old_style(const ReflnBlock& rb, DataType data_type) {
 }
 
 
+/// @brief Convert old-style anomalous data to modern PDBx format.
+///
 /// Before _refln.pdbx_F_plus/minus was introduced, anomalous data was
-/// stored as two F_meas_au reflections, say (1,1,3) and (-1,-1,-3).
-/// This function transcribes it to how the anomalous data is stored
-/// in PDBx/mmCIF nowadays:
-///  _refln.F_meas_au -> pdbx_F_plus / pdbx_F_minus,
-///  _refln.F_meas_sigma_au -> pdbx_F_plus_sigma / pdbx_F_minus_sigma.
-///  _refln.intensity_{meas,sigma} -> _refln.pdbx_F_plus{,_sigma} / ...
+/// stored as two F_meas_au reflections, e.g. (1,1,3) and (-1,-1,-3).
+/// This function transcribes it to the modern PDBx/mmCIF storage:
+/// - _refln.F_meas_au → pdbx_F_plus / pdbx_F_minus
+/// - _refln.F_meas_sigma_au → pdbx_F_plus_sigma / pdbx_F_minus_sigma
+/// - _refln.intensity_meas/sigma → pdbx_I_plus/I_minus and sigmas
+///
+/// Reflections are moved to the ASU, and when both +/- forms exist for
+/// the same HKL, they are merged (missing values set to '.').
+///
+/// @param loop CIF loop containing old-style anomalous data
+/// @param sg Space group for ASU determination (null → P1)
+/// @return New loop with reflections in standard pdbx format
 inline cif::Loop transcript_old_anomalous_to_standard(const cif::Loop& loop,
                                                       const SpaceGroup* sg) {
   std::vector<int> positions;
@@ -139,7 +162,29 @@ inline cif::Loop transcript_old_anomalous_to_standard(const cif::Loop& loop,
 }
 
 
+/// @brief Converter from CIF reflection data to MTZ format.
+///
+/// Handles conversion of structure factor data from mmCIF format
+/// (PDB/wwPDB standard) to MTZ format (CCP4 binary format).
+/// Supports both merged and unmerged reflection data, with configurable
+/// column mappings from CIF _refln tags to MTZ column labels and types.
+///
+/// Uses a specification (default or custom) to map CIF tags to MTZ columns,
+/// handling code-to-number translation for categorical data (e.g., FreeR flags).
 struct CifToMtz {
+  /// @brief Get the default column mapping specification.
+  ///
+  /// Returns static arrays of mapping lines. Each line has format:
+  /// `cif_tag mtz_label col_type dataset_id [code_mapping]`
+  /// where code_mapping (optional) is a comma-separated list of `code=value` pairs.
+  ///
+  /// For merged data: includes FreeR_flag, intensities, structure factors,
+  /// anomalous pairs, calculated phases, and weight/FOM columns.
+  ///
+  /// For unmerged data: includes intensity_meas, detector coordinates, and rotation angle.
+  ///
+  /// @param for_merged true for merged data, false for unmerged
+  /// @return Pointer to null-terminated array of specification strings
   // Alternative mmCIF tags for the same MTZ label should be consecutive
   static const char** default_spec(bool for_merged) {
     static const char* merged[] = {
@@ -192,13 +237,23 @@ struct CifToMtz {
     return for_merged ? merged : unmerged;
   }
 
+  /// @brief Specification entry mapping a CIF tag to an MTZ column.
   struct Entry {
-    std::string refln_tag;
-    std::string col_label;
-    char col_type;
-    int dataset_id;
+    std::string refln_tag;           ///< CIF _refln.* tag name (without prefix)
+    std::string col_label;           ///< MTZ column label
+    char col_type;                   ///< MTZ column type code (H,K,L,F,Q,J,P,etc.)
+    int dataset_id;                  ///< Dataset id for merged data (0 or 1)
+    /// Code-to-number mappings for categorical data (e.g., FreeR code → number)
     std::vector<std::pair<std::string, float>> code_to_number;
 
+    /// @brief Parse a specification line into an Entry.
+    ///
+    /// Expected format:
+    /// - `cif_tag mtz_label col_type dataset_id`
+    /// - `cif_tag mtz_label col_type dataset_id code1=val1,code2=val2`
+    ///
+    /// @param line Specification line to parse
+    /// @throws gemmi::fail if format is invalid
     Entry(const std::string& line) {
       std::vector<std::string> tokens;
       tokens.reserve(4);
@@ -234,6 +289,13 @@ struct CifToMtz {
       }
     }
 
+    /// @brief Translate a categorical value to its numeric equivalent.
+    ///
+    /// Uses code_to_number mappings to convert coded values (e.g., 'o', 'f')
+    /// to numeric equivalents (e.g., 1.0, 0.0 for FreeR flags).
+    ///
+    /// @param v The coded value to translate
+    /// @return Translated number, or NAN if no mapping found
     float translate_code_to_number(const std::string& v) const {
       if (v.size() == 1) {
         for (const auto& c2n : code_to_number)
@@ -249,12 +311,31 @@ struct CifToMtz {
     }
   };
 
-  bool force_unmerged = false;
-  std::string title;
+  bool force_unmerged = false;         ///< If true, treat all data as unmerged
+  std::string title;                   ///< Title to set in output MTZ file
+  /// Historical entries to include in MTZ history; defaults to version line
   std::vector<std::string> history = { "From gemmi-cif2mtz " GEMMI_VERSION };
-  double wavelength = NAN;
+  double wavelength = NAN;             ///< Override wavelength (NAN = auto-detect)
+  /// Custom column specification lines; if empty, uses default_spec()
   std::vector<std::string> spec_lines;
 
+  /// @brief Convert a single mmCIF reflection block to MTZ format.
+  ///
+  /// Maps CIF _refln columns to MTZ columns using the specification (custom or default).
+  /// Handles merged and unmerged data differently:
+  ///
+  /// **Merged data:** Creates a single dataset with wavelength from ReflnBlock.
+  ///
+  /// **Unmerged data:** Extracts diffrn_id and pdbx_image_id to create BATCH column,
+  /// creates a dataset per crystal, and uses UnmergedHklMover to put HKLs into ASU.
+  ///
+  /// Missing values ('.' in CIF) become NAN in MTZ. The M/ISYM and BATCH columns
+  /// are automatically added for unmerged data.
+  ///
+  /// @param rb Reflection block with parsed _refln loop
+  /// @param logger Logger for informational and error messages
+  /// @return MTZ structure with data, columns, and metadata
+  /// @throws gemmi::fail if required tags (index_h/k/l, data columns) are missing
   Mtz convert_block_to_mtz(const ReflnBlock& rb, Logger& logger) const {
     Mtz mtz;
     mtz.title = title.empty() ? "Converted from mmCIF block " + rb.block.name : title;
@@ -505,6 +586,23 @@ struct CifToMtz {
     return mtz;
   }
 
+  /// @brief Auto-detect data type and convert with optional transformation.
+  ///
+  /// Performs intelligent detection and conversion:
+  ///
+  /// **Mode 'f' (fix old-style anomalous):** If the block contains old-style
+  /// anomalous data, transforms it to modern pdbx_F_plus/minus format before conversion.
+  ///
+  /// **Mode 'a' (auto-detect anomalous):** After conversion, analyzes the unique
+  /// HKLs under symmetry to detect if data is actually anomalous or unmerged despite
+  /// initial classification. Logs warnings and attempts recovery for old-style data.
+  ///
+  /// **Other modes:** Performs conversion as-is without transformation.
+  ///
+  /// @param rb Reflection block (modified in-place if transformation occurs)
+  /// @param logger Logger for notes and errors
+  /// @param mode Conversion mode: 'f'=fix old anomalous, 'a'=auto-detect, other=no transform
+  /// @return Converted MTZ structure
   Mtz auto_convert_block_to_mtz(ReflnBlock& rb, Logger& logger, char mode) const {
     if (mode == 'f' && possible_old_style(rb, DataType::Anomalous))
       *rb.refln_loop = transcript_old_anomalous_to_standard(*rb.refln_loop, rb.spacegroup);
@@ -537,6 +635,15 @@ struct CifToMtz {
   }
 
 private:
+  /// @brief Convert _refln.status code to FreeR_flag value.
+  ///
+  /// Maps status codes to numeric equivalents:
+  /// - 'o' or quoted 'o' → 1.0 (observed/working set)
+  /// - 'f' or quoted 'f' → 0.0 (free set)
+  /// - other → NAN
+  ///
+  /// @param str Status code string from CIF
+  /// @return Numeric flag value (1.0, 0.0, or NAN)
   static float status_to_freeflag(const std::string& str) {
     char c = str[0];
     if (c == '\'' || c == '"')
diff --git a/include/gemmi/intensit.hpp b/include/gemmi/intensit.hpp
index d47ad6bb8..ec77b28c3 100644
--- a/include/gemmi/intensit.hpp
+++ b/include/gemmi/intensit.hpp
@@ -1,5 +1,8 @@
 // Copyright 2020 Global Phasing Ltd.
 //
+/// @file
+/// @brief Intensities class for reading and merging intensity data from various formats.
+//
 // Class Intensities that reads multi-record data from MTZ, mmCIF or XDS_ASCII
 // and merges it into mean or anomalous intensities.
 // It can also read merged data.
@@ -23,28 +26,40 @@ struct ReflnBlock;
 namespace cif { struct Block; }
 using std::int8_t;
 
-// If used to request a particular data type:
-//   MergedMA = Mean if available, otherwise Anomalous,
-//   MergedAM = Anomalous if available, otherwise Mean.
-//   UAM = Unmerged if available, otherwise MergedAM
-enum class DataType { Unknown, Unmerged, Mean, Anomalous,
-                      MergedMA, MergedAM, UAM };
+/// Data type of intensity data: unmerged, mean intensity, or anomalous intensities.
+///
+/// When requesting a particular data type, the MergedMA/MergedAM/UAM variants
+/// allow fallback to secondary options (e.g., MergedMA = Mean if available, else Anomalous).
+enum class DataType {
+  Unknown,     ///< Unknown or unspecified intensity type
+  Unmerged,    ///< Unmerged (multi-record) intensity data
+  Mean,        ///< Mean intensity <I>
+  Anomalous,   ///< Anomalous intensities (I+/I-)
+  MergedMA,    ///< Mean if available, otherwise Anomalous (fallback type)
+  MergedAM,    ///< Anomalous if available, otherwise Mean (fallback type)
+  UAM          ///< Unmerged if available, otherwise MergedAM (fallback type)
+};
 
+/// @brief Statistics calculated for a resolution shell (bin) of merged intensities.
+///
+/// Accumulates numerators and denominators for R-merge, R-meas, and R-pim.
+/// Can be summed across shells to compute overall statistics.
 struct GEMMI_DLL MergingStats {
-  int all_refl = 0;  // all reflections, sometimes called observations
-  int unique_refl = 0;
-  int stats_refl = 0;  // unique reflections with 2+ observations (used for statistics)
-  double r_merge_num = 0;  // numerator for R-merge
-  double r_meas_num = 0;   // numerator for R-meas
-  double r_pim_num = 0;    // numerator for R-pim
-  double r_denom = 0;      // denominator for R-*
-  // sums for CC1/2
-  double sum_ibar = 0;
-  double sum_ibar2 = 0;
-  double sum_sig2_eps = 0;
-
-  /// This class is additive. Adding two MergingStats gives the same result
-  /// as calculating statistics in these two resolution shells from the start.
+  int all_refl = 0;        ///< Total number of observations (all reflections)
+  int unique_refl = 0;     ///< Number of unique reflections
+  int stats_refl = 0;      ///< Unique reflections with 2+ observations (for statistics)
+  double r_merge_num = 0;  ///< Numerator for R-merge calculation
+  double r_meas_num = 0;   ///< Numerator for R-meas (redundancy-weighted) calculation
+  double r_pim_num = 0;    ///< Numerator for R-pim (precision-indicating merge) calculation
+  double r_denom = 0;      ///< Denominator for R-merge/meas/pim calculations
+  double sum_ibar = 0;     ///< Sum of mean intensities for CC1/2
+  double sum_ibar2 = 0;    ///< Sum of squared mean intensities for CC1/2
+  double sum_sig2_eps = 0; ///< Sum of variance terms for CC1/2
+
+  /// Accumulate statistics from another shell.
+  /// @param o Statistics from another resolution shell.
+  /// Adding two MergingStats gives the same result as calculating statistics
+  /// for the combined shells from the start.
   void add_other(const MergingStats& o) {
     all_refl += o.all_refl;
     unique_refl += o.unique_refl;
@@ -58,64 +73,89 @@ struct GEMMI_DLL MergingStats {
     sum_sig2_eps += o.sum_sig2_eps;
   }
 
+  /// Compute R-merge for this shell.
   double r_merge() const { return r_merge_num / r_denom; }
+  /// Compute redundancy-weighted R-meas for this shell.
   double r_meas() const { return r_meas_num / r_denom; }
+  /// Compute precision-indicating R-pim for this shell.
   double r_pim() const { return r_pim_num / r_denom; }
+  /// Compute CC1/2 (correlation coefficient of half-datasets).
   double cc_half() const; // calculated using sigma-tau method
-  /// split-half reliability using the Spearman-Brown prediction formula :-)
+  /// Compute CC* using Spearman-Brown prediction formula from CC1/2.
   double cc_full() const {
     double cc = cc_half();
     return 2 * cc / (1 + cc);
   }
+  /// Estimate of overall correlation coefficient from CC1/2.
   double cc_star() const { return std::sqrt(cc_full()); }
 };
 
-/// Returns STARANISO version or empty string.
+/// @brief Extract STARANISO anisotropy B-tensor from MTZ file.
+/// @param mtz MTZ file object to read from.
+/// @param output Anisotropic B-tensor (3x3 symmetric matrix in Voigt notation).
+/// @return STARANISO version string if found, empty string otherwise.
 GEMMI_DLL std::string read_staraniso_b_from_mtz(const Mtz& mtz, SMat33<double>& output);
 
+/// @brief Container for intensity data from reflection measurements.
+///
+/// Stores multi-record (unmerged) or merged intensities with metadata such as
+/// unit cell, space group, and wavelength. Supports merging operations and
+/// import from MTZ, mmCIF, and XDS_ASCII formats.
 struct GEMMI_DLL Intensities {
+  /// @brief A single reflection record with intensity, sigma, and metadata.
   struct Refl {
-    Miller hkl;
-    int8_t isign;  // 1 for I(+), -1 for I(-), 0 for mean or unmerged
-    int8_t isym;   // for unmerged data: encodes symmetry op like M/ISYM in MTZ
-    short nobs;
-    double value;
-    double sigma;
-
+    Miller hkl;       ///< Miller indices (h, k, l)
+    int8_t isign;     ///< Intensity component: 1=I(+), -1=I(-), 0=mean/unmerged
+    int8_t isym;      ///< Symmetry operator encoding (ISYM in MTZ for unmerged data)
+    short nobs;       ///< Number of observations (used during merging)
+    double value;     ///< Intensity value
+    double sigma;     ///< Standard deviation of intensity
+
+    /// Compare reflections by (h,k,l, isign) for sorting.
     bool operator<(const Refl& o) const {
       return std::tie(hkl[0], hkl[1], hkl[2], isign) <
              std::tie(o.hkl[0], o.hkl[1], o.hkl[2], o.isign);
     }
-    // for merged data
+    /// @brief Get intensity label ("&lt;I&gt;", "I(+)", or "I(-)").
     const char* intensity_label() const {
       if (isign == 0)
         return "<I>";
       return isign > 0 ? "I(+)" : "I(-)";
     }
+    /// @brief Format HKL and intensity label as a string.
     std::string hkl_label() const {
       return cat(intensity_label(), " (", hkl[0], ' ', hkl[1], ' ', hkl[2], ')');
     }
   };
 
+  /// @brief Anisotropic scaling tensor for STARANISO B-factor correction.
   struct AnisoScaling {
-    SMat33<double> b = {0., 0., 0., 0., 0., 0.};
+    SMat33<double> b = {0., 0., 0., 0., 0., 0.}; ///< Symmetric B-tensor in Voigt notation
 
+    /// Check if anisotropic tensor is set (non-zero).
     bool ok() const { return !b.all_zero(); }
+    /// @brief Compute scaling factor for a reflection at given HKL.
+    /// @param hkl Miller indices
+    /// @param cell Unit cell to convert HKL to reciprocal-space vector
+    /// @return Exponential scaling factor exp(0.5 * B * s * s)
     double scale(const Miller& hkl, const UnitCell& cell) const {
       Vec3 s = cell.frac.mat.left_multiply(Vec3(hkl[0], hkl[1], hkl[2]));
       return std::exp(0.5 * b.r_u_r(s));
     }
   };
 
-  std::vector<Refl> data;
-  const SpaceGroup* spacegroup = nullptr;
-  UnitCell unit_cell;
-  double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.};
-  double wavelength;
-  DataType type = DataType::Unknown;
-  std::vector<Op> isym_ops;
-  AnisoScaling staraniso_b;
-
+  std::vector<Refl> data;                  ///< Reflection records
+  const SpaceGroup* spacegroup = nullptr;  ///< Space group (not owned by this object)
+  UnitCell unit_cell;                      ///< Crystal unit cell parameters
+  double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.}; ///< RMSDs of unit cell parameters
+  double wavelength;                       ///< Diffraction wavelength in Angstroms
+  DataType type = DataType::Unknown;       ///< Type of intensity data stored
+  std::vector<Op> isym_ops;                ///< Symmetry operators (for unmerged data)
+  AnisoScaling staraniso_b;                ///< STARANISO anisotropy correction tensor
+
+  /// @brief Get string representation of intensity data type.
+  /// @param data_type Type to convert.
+  /// @return Human-readable label ("I", "&lt;I&gt;", "I+/I-", "n/a", etc.).
   static const char* type_str(DataType data_type) {
     switch (data_type) {
       case DataType::Unmerged: return "I";
@@ -129,16 +169,28 @@ struct GEMMI_DLL Intensities {
     unreachable();
   }
 
+  /// @brief Get string representation of this object's intensity data type.
   const char* type_str() const { return Intensities::type_str(type); }
 
+  /// @brief Get space group name (Hermann-Mauguin symbol) or "none".
   std::string spacegroup_str() const { return spacegroup ? spacegroup->xhm() : "none"; }
 
-  // returns (d_max, d_min)
+  /// @brief Get minimum and maximum resolution of reflections.
+  /// @return {d_max, d_min} as array of two doubles.
   std::array<double,2> resolution_range() const;
 
-  // pre: both are sorted
+  /// @brief Calculate correlation of intensity values between two sorted lists.
+  /// @param other Another Intensities object to correlate with.
+  /// @return Correlation object with matching reflections analyzed.
   Correlation calculate_correlation(const Intensities& other) const;
 
+  /// @brief Add a single reflection if its data is valid.
+  /// Skips reflections with NaN or non-positive sigma (rejected by XDS, etc.).
+  /// @param hkl Miller indices
+  /// @param isign Intensity sign (1 for I+, -1 for I-, 0 for mean)
+  /// @param isym Symmetry operator encoding
+  /// @param value Intensity value
+  /// @param sigma Standard deviation of intensity
   void add_if_valid(const Miller& hkl, int8_t isign, int8_t isym, double value, double sigma) {
     // XDS marks rejected reflections with negative sigma.
     // Sigma 0.0 rarely happens (e.g. 5tkn), but is also problematic.
@@ -146,6 +198,7 @@ struct GEMMI_DLL Intensities {
       data.push_back({hkl, isign, isym, /*nobs=*/0, value, sigma});
   }
 
+  /// @brief Remove reflections that are forbidden by space group symmetry.
   void remove_systematic_absences() {
     if (!spacegroup)
       return;
@@ -153,53 +206,99 @@ struct GEMMI_DLL Intensities {
     vector_remove_if(data, [&](Refl& x) { return gops.is_systematically_absent(x.hkl); });
   }
 
+  /// @brief Sort reflections by (h, k, l, isign) in ascending order.
   void sort() { std::sort(data.begin(), data.end()); }
 
+  /// @brief Merge reflections in-place to a specified data type (mean or anomalous).
+  /// @param new_type Target data type for merged intensities.
   void merge_in_place(DataType new_type);
 
+  /// @brief Create a merged copy without modifying this object.
+  /// @param new_type Target data type for merged intensities.
+  /// @return New Intensities object with merged data.
   Intensities merged(DataType new_type) {
     Intensities m(*this);
     m.merge_in_place(new_type);
     return m;
   }
 
-  /// use_weights can be 'Y' (yes, like Aimless), 'U' (unweighted), 'X' (yes, like XDS)
+  /// @brief Calculate R-merge and related statistics for each resolution shell.
+  /// @param binner Resolution shell binning (or nullptr for all data in one shell).
+  /// @param use_weights Weighting scheme: 'Y'=Aimless style, 'U'=unweighted, 'X'=XDS style.
+  /// @return Vector of MergingStats, one per shell.
   std::vector<MergingStats> calculate_merging_stats(const Binner* binner,
                                                     char use_weights='Y') const;
 
-  // call with DataType::Anomalous before calculate_merging_stats() to get I+/I- stats
+  /// @brief Prepare data for merging and classify anomalous/mean component.
+  /// Call with DataType::Anomalous before calculate_merging_stats() to get I+/I- stats.
+  /// @param new_type Target data type.
+  /// @return Classified data type after preparation.
   DataType prepare_for_merging(DataType new_type);
 
+  /// @brief Convert unmerged ISYM indices to ASU indices using stored isym_ops.
   void switch_to_asu_indices();
 
+  /// @brief Load unmerged intensities from MTZ file.
+  /// @param mtz MTZ object to read from.
   void import_unmerged_intensities_from_mtz(const Mtz& mtz);
+  /// @brief Load mean intensities from MTZ file.
+  /// @param mtz MTZ object to read from.
   void import_mean_intensities_from_mtz(const Mtz& mtz);
-  // with check_complete=true, throw if anomalous data is null where it shouldn't be
+  /// @brief Load anomalous intensities (I+/I-) from MTZ file.
+  /// @param mtz MTZ object to read from.
+  /// @param check_complete If true, throw if anomalous data is null where expected.
   void import_anomalous_intensities_from_mtz(const Mtz& mtz, bool check_complete=false);
 
+  /// @brief Load intensities from MTZ file, auto-detecting data type.
+  /// @param mtz MTZ object to read from.
+  /// @param data_type Requested data type; DataType::Unknown auto-detects.
   void import_mtz(const Mtz& mtz, DataType data_type=DataType::Unknown);
 
+  /// @brief Load unmerged intensities from mmCIF reflection block.
+  /// @param rb Reflection block to read from.
   void import_unmerged_intensities_from_mmcif(const ReflnBlock& rb);
+  /// @brief Load mean intensities from mmCIF reflection block.
+  /// @param rb Reflection block to read from.
   void import_mean_intensities_from_mmcif(const ReflnBlock& rb);
+  /// @brief Load anomalous intensities (I+/I-) from mmCIF reflection block.
+  /// @param rb Reflection block to read from.
+  /// @param check_complete If true, throw if anomalous data is null where expected.
   void import_anomalous_intensities_from_mmcif(const ReflnBlock& rb, bool check_complete=false);
 
+  /// @brief Load structure factor squared (F^2) from mmCIF reflection block.
+  /// @param rb Reflection block to read from.
   void import_f_squared_from_mmcif(const ReflnBlock& rb);
 
+  /// @brief Load intensities from mmCIF reflection block, auto-detecting data type.
+  /// @param rb Reflection block to read from.
+  /// @param data_type Requested data type; DataType::Unknown auto-detects.
   void import_refln_block(const ReflnBlock& rb, DataType data_type=DataType::Unknown);
 
+  /// @brief Load intensities from XDS_ASCII file.
+  /// @param xds XDS_ASCII object to read from.
   void import_xds(const XdsAscii& xds);
 
-  // returns STARANISO version or empty string
+  /// @brief Extract and store STARANISO B-tensor from MTZ file.
+  /// @param mtz MTZ object to read from.
+  /// @return STARANISO version string if found, empty string otherwise.
   std::string take_staraniso_b_from_mtz(const Mtz& mtz);
 
+  /// @brief Extract and store STARANISO B-tensor from mmCIF block.
+  /// @param block CIF block to read from.
+  /// @return True if tensor was found and loaded, false otherwise.
   bool take_staraniso_b_from_mmcif(const cif::Block& block);
 
+  /// @brief Create a merged MTZ file from these intensities.
+  /// @param with_nobs If true, include NOBS (observation count) column.
+  /// @return Mtz object with merged data.
   Mtz prepare_merged_mtz(bool with_nobs);
 };
 
-// Minimal compatibility with MtzDataProxy and ReflnDataProxy.
+/// @brief Adapter providing DataProxy interface to Intensities data.
+/// Enables use of Intensities with generic algorithms expecting standard data proxy interface.
 struct IntensitiesDataProxy {
-  const Intensities& intensities_;
+  const Intensities& intensities_; ///< Reference to underlying Intensities object
+
   size_t stride() const { return 1; }
   size_t size() const { return intensities_.data.size(); }
   const SpaceGroup* spacegroup() const { return intensities_.spacegroup; }
@@ -208,6 +307,15 @@ struct IntensitiesDataProxy {
   double get_num(size_t n) const { return intensities_.data[n].value; }
 };
 
+/// @brief Infer intensity data type from reflection data under symmetry.
+///
+/// Examines unique reflections and detects whether data is unmerged (multiple
+/// copies of same HKL), mean intensity (single copy), or anomalous (both I+/I-).
+///
+/// @tparam DataProxy Type with spacegroup(), unit_cell(), size(), stride(),
+///                    get_hkl() interface.
+/// @param proxy Data proxy object to analyze.
+/// @return Pair of (inferred DataType, number of unique HKLs in ASU).
 template<typename DataProxy>
 std::pair<DataType, size_t> check_data_type_under_symmetry(const DataProxy& proxy) {
   const SpaceGroup* sg = proxy.spacegroup();
diff --git a/include/gemmi/mtz.hpp b/include/gemmi/mtz.hpp
index c7cf44316..6e5b8b987 100644
--- a/include/gemmi/mtz.hpp
+++ b/include/gemmi/mtz.hpp
@@ -1,3 +1,6 @@
+/// @file
+/// @brief MTZ reflection file format (X-ray crystallography).
+
 // Copyright 2019 Global Phasing Ltd.
 //
 // MTZ reflection file format.
@@ -24,15 +27,19 @@
 
 namespace gemmi {
 
-// Unmerged MTZ files always store in-asu hkl indices and symmetry operation
-// encoded in the M/ISYM column. Here is a helper for writing such files.
+/// Helper for writing unmerged MTZ files with correct M/ISYM column values.
+/// Converts Miller indices to ASU-equivalent and encodes the symmetry operation.
 struct UnmergedHklMover {
+  /// Initialize with spacegroup information.
+  /// @param spacegroup The space group (may be null).
   UnmergedHklMover(const SpaceGroup* spacegroup) : asu_(spacegroup) {
     if (spacegroup)
       group_ops_ = spacegroup->operations();
   }
 
-  // Modifies hkl and returns ISYM value for M/ISYM
+  /// Move HKL indices to ASU and return the encoded ISYM value.
+  /// @param hkl [in,out] Miller indices; modified to ASU-equivalent values.
+  /// @return ISYM value for the M/ISYM column (encodes symmetry operation).
   int move_to_asu(std::array<int, 3>& hkl) {
     std::pair<Miller, int> hkl_isym = asu_.to_asu(hkl, group_ops_);
     hkl = hkl_isym.first;
@@ -44,63 +51,125 @@ struct UnmergedHklMover {
   GroupOps group_ops_;
 };
 
+/// MTZ file metadata: crystallographic parameters, symmetry, and file structure.
 struct MtzMetadata {
-  std::string source_path;  // input file path, if known
+  /// Input file path (if known).
+  std::string source_path;
+  /// True if the file's byte order matches the system (not swapped).
   bool same_byte_order = true;
+  /// For unmerged MTZ: true if HKL indices have been switched to original (non-ASU) values.
   bool indices_switched_to_original = false;
+  /// Offset (in 32-bit words) to the start of the header block.
   std::int64_t header_offset = 0;
+  /// Version stamp from VERS header line (e.g., "MTZ:V1.1").
   std::string version_stamp;
+  /// Title from TITLE header line.
   std::string title;
+  /// Number of reflections in the data array.
   int nreflections = 0;
+  /// Sort order: columns used to sort reflections (0 = not used).
   std::array<int, 5> sort_order = {};
+  /// Minimum 1/d² value in the file (d = 1/sqrt(1/d²)).
   double min_1_d2 = NAN;
+  /// Maximum 1/d² value in the file.
   double max_1_d2 = NAN;
+  /// VALM value: typically unused (for future use in CCP4).
   float valm = NAN;
+  /// Number of symmetry operations (redundant with symops.size()).
   int nsymop = 0;
+  /// Global unit cell parameters.
   UnitCell cell;
+  /// CCP4 space group number.
   int spacegroup_number = 0;
+  /// Space group name (Hermann-Mauguin, e.g., "P 21 21 21").
   std::string spacegroup_name;
+  /// Symmetry operations read from SYMM header lines.
   std::vector<Op> symops;
+  /// Pointer to the SpaceGroup object (from symmetry database).
   const SpaceGroup* spacegroup = nullptr;
+  /// Historical processing steps (MTZHIST records).
   std::vector<std::string> history;
+  /// Text appended after MTZENDOFHEADERS (non-standard).
   std::string appended_text;
-  // used to report non-critical problems when reading a file (also used in mtz2cif)
+  /// Logger for non-critical problems during file reading.
   Logger logger;
 };
 
+/// Representation of an MTZ reflection file.
+/// Contains reflection data, column definitions, batch headers (for unmerged files),
+/// and crystallographic metadata (cell, space group, symmetry operations).
 struct GEMMI_DLL Mtz : public MtzMetadata {
+  /// A dataset in the MTZ file hierarchy: project → crystal → dataset → columns.
   struct Dataset {
+    /// Unique dataset ID (positive integer, typically starting at 1).
     int id;
+    /// Project name (e.g., dataset owner or beamline).
     std::string project_name;
+    /// Crystal name (e.g., sample identifier).
     std::string crystal_name;
+    /// Dataset name (e.g., experiment or scan identifier).
     std::string dataset_name;
+    /// Unit cell parameters for this dataset (overrides global cell if set).
     UnitCell cell;
-    double wavelength;  // 0 means not set
+    /// X-ray wavelength in Angstroms (0 = not set).
+    double wavelength;
   };
 
+  /// A column in the reflection data array.
+  /// Stores one field per reflection (e.g., amplitude, phase, flag).
   struct Column {
+    /// Dataset ID this column belongs to.
     int dataset_id;
+    /// Column type code: 'H'=Miller index (H, K, or L), 'F'=amplitude,
+    /// 'Q'=standard deviation, 'J'=intensity, 'M/ISYM'=symmetry flag (unmerged),
+    /// 'D'=anomalous difference, 'P'=phase (degrees), 'W'=weight, 'A'=phase prob.,
+    /// 'B'=batch number, 'Y'=M/ISYM, 'I'=integer, 'R'=R-factor, 'G'=F(+)/F(-),
+    /// 'K'=I(+)/I(-), 'L'=string.
     char type;
+    /// Column label (e.g., "FP", "SIGFP", "FWT", "PHWT").
     std::string label;
+    /// Minimum value in this column (NAN if not computed).
     float min_value = NAN;
+    /// Maximum value in this column (NAN if not computed).
     float max_value = NAN;
-    std::string source;  // from COLSRC
+    /// Source of the data (from COLSRC header; e.g., derivation formula).
+    std::string source;
+    /// Pointer to parent Mtz object (for data access).
     Mtz* parent;
+    /// Index of this column in parent->columns (used to access data array).
     std::size_t idx;
 
+    /// Get the Dataset this column belongs to.
     Dataset& dataset() { return parent->dataset(dataset_id); }
+    /// Get the Dataset this column belongs to (const).
     const Dataset& dataset() const { return parent->dataset(dataset_id); }
+    /// True if parent Mtz has data loaded.
     bool has_data() const { return parent->has_data(); }
+    /// Number of values in this column (0 if no data loaded).
     int size() const { return has_data() ? parent->nreflections : 0; }
+    /// Stride between consecutive values in the data array (= number of columns).
     size_t stride() const { return parent->columns.size(); }
+    /// Access column value for reflection n.
+    /// @param n Reflection index (0 to nreflections-1).
     float& operator[](std::size_t n) { return parent->data[idx + n * stride()]; }
+    /// Access column value for reflection n (const).
     float operator[](std::size_t n) const { return parent->data[idx + n * stride()]; }
+    /// Access column value for reflection n with bounds checking.
+    /// @param n Reflection index.
+    /// @return Reference to the data value.
+    /// @throws std::out_of_range if n is out of bounds.
     float& at(std::size_t n) { return parent->data.at(idx + n * stride()); }
+    /// Access column value for reflection n with bounds checking (const).
     float at(std::size_t n) const { return parent->data.at(idx + n * stride()); }
+    /// True if this column type represents an integer value.
+    /// Returns true for types H, B, Y, I (indices, batch, ISYM, integers).
     bool is_integer() const {
       return type == 'H' || type == 'B' || type == 'Y' || type == 'I';
     }
 
+    /// Find the next column in the same dataset with a specific type.
+    /// @param next_type The column type to search for.
+    /// @return Pointer to the next matching column, or nullptr.
     const Column* get_next_column_if_type(char next_type) const {
       if (idx + 1 < parent->columns.size()) {
         const Column& next_col = parent->columns[idx + 1];
@@ -110,22 +179,31 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
       return nullptr;
     }
 
+    /// Iterator over this column's values.
     using iterator = StrideIter<float>;
+    /// Begin iterator over all values in this column.
     iterator begin() {
       assert(parent);
       assert(&parent->columns[idx] == this);
       return iterator({parent->data.data(), idx, stride()});
     }
+    /// End iterator for this column.
     iterator end() {
       return iterator({parent->data.data() + parent->data.size(), idx,
                        stride()});
     }
+    /// Const iterator over this column's values.
     using const_iterator = StrideIter<const float>;
+    /// Begin const iterator.
     const_iterator begin() const { return const_cast<Column*>(this)->begin(); }
+    /// End const iterator.
     const_iterator end() const { return const_cast<Column*>(this)->end(); }
   };
 
+  /// Batch header for unmerged MTZ files (one per diffraction image/sweep).
+  /// Contains crystallographic and experimental metadata in fixed positions.
   struct Batch {
+    /// Initialize a batch with default values (matching CCP4 COMBAT/Pointless).
     Batch() {
       ints.resize(29, 0);
       floats.resize(156, 0.);
@@ -136,16 +214,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
       // COMBAT sets BSCALE=1, but Pointless sets it to 0.
       //floats[43] = 1.f; // batch scale
     }
+    /// Batch number (usually 1-based).
     int number = 0;
+    /// Title or description of the batch.
     std::string title;
+    /// Integer values: ints[20] = dataset_id, ints[0,1,2] = sizes (fixed).
     std::vector<int> ints;
+    /// Float values: floats[0-5] = cell, floats[6-14] = U matrix, floats[36-37] = phi range,
+    /// floats[86] = wavelength.
     std::vector<float> floats;
+    /// Axis names (e.g., "OMEGA", "KAPPA", "PHI").
     std::vector<std::string> axes;
 
+    /// Extract unit cell parameters from batch header.
+    /// @return Unit cell (a, b, c, alpha, beta, gamma).
     UnitCell get_cell() const {
       return UnitCell(floats[0], floats[1], floats[2],
                       floats[3], floats[4], floats[5]);
     }
+    /// Set unit cell parameters in batch header.
+    /// @param uc The unit cell to store.
     void set_cell(const UnitCell& uc) {
       floats[0] = (float) uc.a;
       floats[1] = (float) uc.b;
@@ -155,12 +243,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
       floats[5] = (float) uc.gamma;
     }
 
+    /// Get the dataset ID from batch header.
+    /// @return Dataset ID (from ints[20]).
     int dataset_id() const { return ints[20]; }
+    /// Set the dataset ID in batch header.
+    /// @param id Dataset ID to store in ints[20].
     void set_dataset_id(int id) { ints[20] = id; }
+    /// Get the X-ray wavelength.
+    /// @return Wavelength in Angstroms (from floats[86]).
     float wavelength() const { return floats[86]; }
+    /// Set the X-ray wavelength.
+    /// @param lambda Wavelength in Angstroms.
     void set_wavelength(float lambda) { floats[86] = lambda; }
+    /// Get the phi rotation start angle.
+    /// @return Start angle in degrees (from floats[36]).
     float phi_start() const { return floats[36]; }
+    /// Get the phi rotation end angle.
+    /// @return End angle in degrees (from floats[37]).
     float phi_end() const { return floats[37]; }
+    /// Get the crystal orientation matrix U (3×3).
+    /// @return U matrix from floats[6-14].
     Mat33 matrix_U() const {
       return Mat33(floats[6], floats[9],  floats[12],
                    floats[7], floats[10], floats[13],
@@ -168,16 +270,25 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     }
   };
 
+  /// All datasets in the file.
   std::vector<Dataset> datasets;
+  /// All columns in the file (ordered by position in the data array).
   std::vector<Column> columns;
+  /// Batch headers (empty for merged MTZ files).
   std::vector<Batch> batches;
+  /// Reflection data: laid out as [col0_refl0, col1_refl0, ..., col0_refl1, col1_refl1, ...].
+  /// Size = columns.size() * nreflections. Access via Column's operator[].
   std::vector<float> data;
 
+  /// Create an empty MTZ object.
+  /// @param with_base If true, initialize with a default HKL_base dataset and H, K, L columns.
   explicit Mtz(bool with_base=false) {
     if (with_base)
       add_base();
   }
+  /// Move constructor.
   Mtz(Mtz&& o) noexcept { *this = std::move(o); }
+  /// Move assignment. Updates parent pointers in all columns.
   Mtz& operator=(Mtz&& o) noexcept {
     MtzMetadata::operator=(std::move(o));
     datasets = std::move(o.datasets);
@@ -189,7 +300,7 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return *this;
   }
 
-  // explicit to be aware where we make copies
+  /// Copy constructor. Explicit to prevent accidental copies. Updates parent pointers.
   explicit Mtz(const Mtz& o) : MtzMetadata(o) {
     datasets = o.datasets;
     columns = o.columns;
@@ -199,19 +310,29 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
       col.parent = this;
   }
 
+  /// Copy assignment is deleted (explicit copy constructor forces intentionality).
   Mtz& operator=(Mtz const&) = delete;
 
+  /// Initialize with default HKL_base dataset and H, K, L columns.
   void add_base() {
     datasets.push_back({0, "HKL_base", "HKL_base", "HKL_base", cell, 0.});
     for (int i = 0; i != 3; ++i)
       add_column(std::string(1, "HKL"[i]), 'H', 0, i, false);
   }
 
-  // Functions to use after MTZ headers (and data) is read.
+  /// @name Crystallographic properties (after reading headers/data)
+  /// @{
 
+  /// Get the highest resolution in Angstroms.
+  /// @return d_min = 1/sqrt(max_1_d2); resolution is high when d is small.
   double resolution_high() const { return std::sqrt(1.0 / max_1_d2); }
+  /// Get the lowest resolution in Angstroms.
+  /// @return d_max = 1/sqrt(min_1_d2); resolution is low when d is large.
   double resolution_low() const  { return std::sqrt(1.0 / min_1_d2); }
 
+  /// Get the unit cell for a specific dataset or the global cell.
+  /// @param dataset Dataset ID (default -1 = global cell, but searches datasets first).
+  /// @return Reference to the unit cell.
   UnitCell& get_cell(int dataset=-1) {
     for (Dataset& ds : datasets)
       if (ds.id == dataset && ds.cell.is_crystal() && ds.cell.a > 0)
@@ -219,10 +340,13 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return cell;
   }
 
+  /// Get the unit cell (const).
   const UnitCell& get_cell(int dataset=-1) const {
     return const_cast<Mtz*>(this)->get_cell(dataset);
   }
 
+  /// Set the global and all per-dataset unit cells to the same value.
+  /// @param new_cell The new unit cell parameters.
   void set_cell_for_all(const UnitCell& new_cell) {
     cell = new_cell;
     cell.set_cell_images_from_spacegroup(spacegroup);  // probably not needed
@@ -230,20 +354,37 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
       ds.cell = cell;
   }
 
+  /// Calculate average unit cell from all batch headers, optionally with RMSD.
+  /// @param rmsd [out] Pointer to array of 6 doubles (a, b, c, alpha, beta, gamma RMSD),
+  ///                   or nullptr to skip.
+  /// @return Average cell from all batches, or global cell if batches are invalid.
   UnitCellParameters get_average_cell_from_batch_headers(double* rmsd) const;
 
+  /// Set the space group and update related fields.
+  /// @param new_sg Pointer to SpaceGroup (may be null).
   void set_spacegroup(const SpaceGroup* new_sg) {
     spacegroup = new_sg;
     spacegroup_number = new_sg ? spacegroup->ccp4 : 0;
     spacegroup_name = new_sg ? spacegroup->hm : "";
   }
 
+  /// @}
+  /// @name Dataset access
+  /// @{
+
+  /// Get the last (most recently added) dataset.
+  /// @return Reference to the last dataset.
+  /// @throws std::runtime_error if no datasets exist.
   Dataset& last_dataset() {
     if (datasets.empty())
       fail("MTZ dataset not found (missing DATASET header line?).");
     return datasets.back();
   }
 
+  /// Get dataset by ID.
+  /// @param id Dataset ID to look up.
+  /// @return Reference to the dataset.
+  /// @throws std::runtime_error if ID not found.
   Dataset& dataset(int id) {
     if ((size_t)id < datasets.size() && datasets[id].id == id)
       return datasets[id];
@@ -252,20 +393,32 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
         return d;
     fail("MTZ file has no dataset with ID " + std::to_string(id));
   }
+  /// Get dataset by ID (const).
   const Dataset& dataset(int id) const {
     return const_cast<Mtz*>(this)->dataset(id);
   }
 
+  /// Find a dataset by name.
+  /// @param name Dataset name to search for.
+  /// @return Pointer to the dataset, or nullptr if not found.
   Dataset* dataset_with_name(const std::string& name) {
     for (Dataset& d : datasets)
       if (d.dataset_name == name)
         return &d;
     return nullptr;
   }
+  /// Find a dataset by name (const).
   const Dataset* dataset_with_name(const std::string& label) const {
     return const_cast<Mtz*>(this)->dataset_with_name(label);
   }
 
+  /// @}
+  /// @name Column access and queries
+  /// @{
+
+  /// Count columns with a specific label (may be > 1 if duplicates exist).
+  /// @param label Column label to count.
+  /// @return Number of columns with this label.
   int count(const std::string& label) const {
     int n = 0;
     for (const Column& col : columns)
@@ -274,6 +427,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return n;
   }
 
+  /// Count columns of a specific type.
+  /// @param type Column type code (e.g., 'F', 'P', 'Q').
+  /// @return Number of columns with this type.
   int count_type(char type) const {
     int n = 0;
     for (const Column& col : columns)
@@ -282,6 +438,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return n;
   }
 
+  /// Find the first column with a given label, optionally filtered by dataset and type.
+  /// @param label Column label.
+  /// @param ds [optional] Restrict search to this dataset (nullptr = any).
+  /// @param type [optional] Restrict search to this type ('*' = any).
+  /// @return Pointer to the column, or nullptr if not found.
   Column* column_with_label(const std::string& label, const Dataset* ds=nullptr, char type='*') {
     for (Column& col : columns)
       if (col.label == label && (!ds || ds->id == col.dataset_id)
@@ -289,17 +450,26 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
         return &col;
     return nullptr;
   }
+  /// Find the first column with a given label (const).
   const Column* column_with_label(const std::string& label, const Dataset* ds=nullptr,
                                   char type='*') const {
     return const_cast<Mtz*>(this)->column_with_label(label, ds, type);
   }
 
+  /// Get a column by label, raising an error if not found.
+  /// @param label Column label.
+  /// @param ds [optional] Restrict search to this dataset.
+  /// @return Reference to the column.
+  /// @throws std::runtime_error if column not found.
   const Column& get_column_with_label(const std::string& label, const Dataset* ds=nullptr) const {
     if (const Column* col = column_with_label(label, ds))
       return *col;
     fail("Column label not found: " + label);
   }
 
+  /// Get all columns of a specific type.
+  /// @param type Column type code.
+  /// @return Vector of pointers to matching columns.
   std::vector<const Column*> columns_with_type(char type) const {
     std::vector<const Column*> cols;
     for (const Column& col : columns)
@@ -308,6 +478,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return cols;
   }
 
+  /// Get positions (indices) of all columns with a specific type.
+  /// @param col_type Column type code.
+  /// @return Vector of column indices.
   std::vector<int> positions_of_columns_with_type(char col_type) const {
     std::vector<int> cols;
     for (int i = 0; i < (int) columns.size(); ++i)
@@ -316,9 +489,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return cols;
   }
 
-  // F(+)/(-) pairs should have type G (and L for sigma),
-  // I(+)/(-) -- K (M for sigma), but E(+)/(-) has no special column type,
-  // so here we use column labels not types.
+  /// Find anomalous (±) column pairs by label pattern matching.
+  /// Looks for labels with "(+)" and matches corresponding "(-)" columns.
+  /// Note: F(+)/(-) pairs use type G, I(+)/(-) use type K, but E(+)/(-)
+  /// have no dedicated type, so label matching is used.
+  /// @return Vector of (index_plus, index_minus) pairs.
   std::vector<std::pair<int,int>> positions_of_plus_minus_columns() const {
     std::vector<std::pair<int,int>> r;
     for (int i = 0; i < (int) columns.size(); ++i) {
@@ -339,7 +514,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return r;
   }
 
-  /// the order of labels matters
+  /// Find the first column matching any label in a prioritized list.
+  /// @param labels List of labels to try in order.
+  /// @param type [optional] Column type to match ('*' = any).
+  /// @return Pointer to the first matching column, or nullptr.
+  /// @note Order of labels matters; returns the first match.
   const Column* column_with_one_of_labels(std::initializer_list<const char*> labels,
                                           char type='*') const {
     for (const char* label : labels)
@@ -348,7 +527,11 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return nullptr;
   }
 
-  /// the order of labels doesn't matter
+  /// Find a column matching a type and any of several labels.
+  /// @param type Column type to match.
+  /// @param labels List of labels to search for.
+  /// @return Pointer to the first matching column, or nullptr.
+  /// @note Order of labels does not matter.
   Column* column_with_type_and_any_of_labels(char type, std::initializer_list<const char*> labels) {
     for (Column& col : columns)
       if (col.type == type) {
@@ -359,75 +542,118 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return nullptr;
   }
 
+  /// Find the R-free flag column (common labels: FREE, RFREE, R_FREE_FLAGS, etc.).
+  /// @return Pointer to R-free column (type 'I'), or nullptr.
   Column* rfree_column() {
     // cf. MtzToCif::default_spec in mtz2cif.hpp
     return column_with_type_and_any_of_labels('I',
         {"FREE", "RFREE", "FREER", "FreeR_flag", "R-free-flags", "FreeRflag", "R_FREE_FLAGS"});
   }
+  /// Find the R-free flag column (const).
   const Column* rfree_column() const {
     return const_cast<Mtz*>(this)->rfree_column();
   }
 
+  /// Find the mean intensity column (common labels: IMEAN, I, IOBS, I-obs).
+  /// @return Pointer to intensity column (type 'J'), or nullptr.
   Column* imean_column() {
     return column_with_type_and_any_of_labels('J', {"IMEAN", "I", "IOBS", "I-obs"});
   }
+  /// Find the mean intensity column (const).
   const Column* imean_column() const {
     return const_cast<Mtz*>(this)->imean_column();
   }
 
+  /// Find the I(+) anomalous intensity column (common labels: I(+), IOBS(+), Iplus).
+  /// @return Pointer to I(+) column (type 'K'), or nullptr.
   Column* iplus_column() {
     return column_with_type_and_any_of_labels('K', {"I(+)", "IOBS(+)", "I-obs(+)", "Iplus"});
   }
+  /// Find the I(+) column (const).
   const Column* iplus_column() const {
     return const_cast<Mtz*>(this)->iplus_column();
   }
 
+  /// Find the I(-) anomalous intensity column.
+  /// @return Pointer to I(-) column (type 'K'), or nullptr.
   Column* iminus_column() {
     return column_with_type_and_any_of_labels('K', {"I(-)", "IOBS(-)", "I-obs(-)", "Iminus"});
   }
+  /// Find the I(-) column (const).
   const Column* iminus_column() const {
     return const_cast<Mtz*>(this)->iminus_column();
   }
 
+  /// @}
+  /// @name Data status
+  /// @{
+
+  /// Check if reflection data has been loaded.
+  /// @return True if data.size() == columns.size() * nreflections.
   bool has_data() const {
     return data.size() == columns.size() * nreflections;
   }
 
+  /// Check if this is a merged MTZ file (no batch headers).
+  /// @return True if batches.empty().
   bool is_merged() const { return batches.empty(); }
 
-  /// Calculates min/max for all combinations of reflections and unit cells,
-  /// where unit cells are a global CELL and per-dataset DCELL.
+  /// Calculate min/max 1/d² from all reflections and unit cells.
+  /// Considers both global cell and per-dataset DCELLs.
+  /// @return [min_1_d2, max_1_d2].
   std::array<double,2> calculate_min_max_1_d2() const;
 
+  /// Recalculate and update min_1_d2 and max_1_d2 from reflection data.
   void update_reso() {
     std::array<double,2> reso = calculate_min_max_1_d2();
     min_1_d2 = reso[0];
     max_1_d2 = reso[1];
   }
 
-  // Functions for reading MTZ headers and data.
+  /// @}
+  /// @name File I/O
+  /// @{
 
+  /// Toggle the assumed byte order and swap header_offset accordingly.
   void toggle_endianness() {
     same_byte_order = !same_byte_order;
     swap_eight_bytes(&header_offset);
   }
 
+  /// Read and verify the first 80 bytes (MTZ magic and machine stamp).
+  /// @param stream Input stream positioned at file start.
   void read_first_bytes(AnyStream& stream);
 
-  /// read headers until END
+  /// Read header records from VERS until END.
+  /// @param stream Input stream positioned at the header block.
+  /// @param save_headers [optional] Pointer to string vector to save header lines.
   void read_main_headers(AnyStream& stream, std::vector<std::string>* save_headers);
 
-  /// read the part between END and MTZENDOFHEADERS
+  /// Read history (MTZHIST) and batch (MTZBATS) records after the END header.
+  /// @param stream Input stream positioned after END.
   void read_history_and_batch_headers(AnyStream& stream);
 
+  /// Set up spacegroup pointer from spacegroup_number or spacegroup_name.
   void setup_spacegroup();
 
+  /// Read raw reflection data from stream (float32 binary).
+  /// @param stream Input stream.
+  /// @param do_read If false, skip reading (compute space only).
   void read_raw_data(AnyStream& stream, bool do_read=true);
 
+  /// Read all header records (convenience wrapper).
+  /// @param stream Input stream.
   void read_all_headers(AnyStream& stream);
 
+  /// Read MTZ from a stream, including headers and optionally data.
+  /// Expects stream positioned at file start; reads in order: raw data, main headers, batch headers.
+  /// @param stream Input stream.
+  /// @param with_data If true, read reflection data; if false, skip it.
   void read_stream(AnyStream& stream, bool with_data);
 
+  /// Read MTZ from a file path.
+  /// @param path File path.
+  /// @throws std::system_error or std::runtime_error on failure.
   void read_file(const std::string& path) {
     try {
       source_path = path;
@@ -440,45 +666,90 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     }
   }
 
+  /// Read MTZ from an input object (e.g., MaybeGzipped for .mtz or .mtz.gz).
+  /// @tparam Input Type with path() and create_stream() methods.
+  /// @param input Input object.
+  /// @param with_data If true, read reflection data.
   template<typename Input>
   void read_input(Input&& input, bool with_data) {
     source_path = input.path();
     read_stream(*input.create_stream(), with_data);
   }
 
-  /// the same as read_input(MaybeGzipped(path), with_data)
+  /// Read MTZ from a file, handling .gz compression automatically.
+  /// @param path File path (.mtz or .mtz.gz).
+  /// @param with_data If true, read reflection data (default true).
   void read_file_gz(const std::string& path, bool with_data=true);
 
+  /// @}
+  /// @name Data manipulation (reflection rows)
+  /// @{
+
+  /// Get sorted row indices based on the first N columns (HKL by default).
+  /// @param use_first Number of columns to use for sorting (default 3 = h, k, l).
+  /// @return Vector of indices [0..nreflections-1] sorted by the first N columns.
   std::vector<int> sorted_row_indices(int use_first=3) const;
+
+  /// Sort reflections in-place using the first N columns.
+  /// @param use_first Number of columns to use for sorting (default 3).
+  /// @return True if any sorting was done; false if already sorted.
   bool sort(int use_first=3);
 
+  /// Extract Miller indices from a reflection at a given offset in the data array.
+  /// @param offset Offset to the first element of the reflection (H, K, L at offsets 0, 1, 2).
+  /// @return Miller indices.
   Miller get_hkl(size_t offset) const {
     return {{(int)data[offset], (int)data[offset+1], (int)data[offset+2]}};
   }
+
+  /// Set Miller indices at a given offset.
+  /// @param offset Offset to the H element.
+  /// @param hkl Miller indices to store.
   void set_hkl(size_t offset, const Miller& hkl) {
     for (int i = 0; i != 3; ++i)
       data[offset + i] = static_cast<float>(hkl[i]);
   }
 
-  /// Returns offset of the first hkl or (size_t)-1. Can be slow.
+  /// Find the data offset of the first reflection with specific Miller indices.
+  /// @param hkl Miller indices to search for.
+  /// @param start Starting offset (optional, default 0).
+  /// @return Offset to the reflection, or (size_t)-1 if not found.
+  /// @note This is a linear search; can be slow for large files.
   size_t find_offset_of_hkl(const Miller& hkl, size_t start=0) const;
 
-  /// (for merged MTZ only) change HKL to ASU equivalent, adjust phases, etc
+  /// Move all reflections to ASU and adjust phases/anomalous data accordingly.
+  /// For merged MTZ only. Transforms F(+), F(-), phases, and Hendrickson-Lattman coefficients.
+  /// @param tnt_asu If true, use TNT ASU setting; if false, use default ASU.
   void ensure_asu(bool tnt_asu=false);
 
-  /// Reindex data, usually followed by ensure_asu(). Outputs messages through logger.
+  /// Reindex reflections using a new basis and update space group accordingly.
+  /// Applies symmetry operation to HKL, removes fractional indices, adjusts cell and space group.
+  /// Outputs messages to logger.
+  /// @param op Reindexing operation (must have no translation and determinant > 0).
   void reindex(const Op& op);
 
-  /// Change symmetry to P1 and expand reflections. Does not sort.
-  /// Similar to command EXPAND in SFTOOLS.
+  /// Expand reflections to P1 using all symmetry operations.
+  /// Duplicate reflections under symmetry, adjust phases if present.
+  /// Similar to SFTOOLS EXPAND command.
+  /// @note Does not re-sort; sort afterwards if needed.
   void expand_to_p1();
 
-  /// (for unmerged MTZ only) change HKL according to M/ISYM
+  /// For unmerged MTZ: convert HKL from ASU to original (observer) indices.
+  /// Reads M/ISYM column and applies inverse symmetry operations.
+  /// @return True if M/ISYM column was found and data was modified.
   bool switch_to_original_hkl();
 
-  /// (for unmerged MTZ only) change HKL to ASU equivalent and set ISYM
+  /// For unmerged MTZ: convert HKL to ASU and set M/ISYM column accordingly.
+  /// @return True if M/ISYM column was found and data was modified.
   bool switch_to_asu_hkl();
 
+  /// @}
+  /// @name Data construction
+  /// @{
+
+  /// Create a new dataset with auto-assigned ID and add to the file.
+  /// @param name Name to use for project, crystal, and dataset.
+  /// @return Reference to the newly added dataset.
   Dataset& add_dataset(const std::string& name) {
     int id = 0;
     for (const Dataset& d : datasets)
@@ -488,20 +759,39 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     return datasets.back();
   }
 
+  /// Add a column to the file and optionally expand the data array.
+  /// @param label Column label.
+  /// @param type Column type code.
+  /// @param dataset_id Dataset ID for this column (-1 = last dataset).
+  /// @param pos Position in column list (-1 = append).
+  /// @param expand_data If true, insert empty rows (NAN) in the data array.
+  /// @return Reference to the newly added column.
   Column& add_column(const std::string& label, char type,
                      int dataset_id, int pos, bool expand_data);
 
-  // extra_col are columns right after src_col that are also copied.
+  /// Replace a column with data from another column (including trailing columns).
+  /// @param dest_idx Destination column index.
+  /// @param src_col Source column to copy from.
+  /// @param trailing_cols [optional] Labels of columns immediately after src_col to also copy.
+  /// @return Reference to the destination column.
   Column& replace_column(size_t dest_idx, const Column& src_col,
                          const std::vector<std::string>& trailing_cols={});
 
-  // If dest_idx < 0 - columns are appended at the end
-  // append new column(s), otherwise overwrite existing ones.
+  /// Copy a column to a destination, or append if dest_idx < 0.
+  /// @param dest_idx Destination index (-1 = append).
+  /// @param src_col Source column.
+  /// @param trailing_cols [optional] Labels of subsequent columns to also copy.
+  /// @return Reference to the destination column.
   Column& copy_column(int dest_idx, const Column& src_col,
                       const std::vector<std::string>& trailing_cols={});
 
+  /// Remove a column from the file and data array.
+  /// @param idx Column index to remove.
   void remove_column(size_t idx);
 
+  /// Remove reflection rows matching a condition.
+  /// @tparam Func Callable that takes pointer to row data and returns true to remove.
+  /// @param condition Predicate function.
   template <typename Func>
   void remove_rows_if(Func condition) {
     if (!has_data())
@@ -518,6 +808,9 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     nreflections = int(data.size() / width);
   }
 
+  /// Insert new empty columns in the data array.
+  /// @param added Number of columns to insert.
+  /// @param pos_ Position to insert at (-1 = at the end).
   void expand_data_rows(size_t added, int pos_=-1) {
     size_t old_row_size = columns.size() - added;
     if (data.size() != old_row_size * nreflections)
@@ -528,6 +821,10 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     vector_insert_columns(data, old_row_size, (size_t)nreflections, added, pos, NAN);
   }
 
+  /// Replace the reflection data array with new data.
+  /// @param new_data Pointer to float array.
+  /// @param n Total number of floats (must be divisible by columns.size()).
+  /// @throws std::runtime_error if n is not a multiple of columns.size().
   void set_data(const float* new_data, size_t n) {
     size_t ncols = columns.size();
     if (n % ncols != 0)
@@ -536,24 +833,54 @@ struct GEMMI_DLL Mtz : public MtzMetadata {
     data.assign(new_data, new_data + n);
   }
 
-  // Function for writing MTZ file
+  /// @}
+  /// @name File output
+  /// @{
+
+  /// Write MTZ to a C FILE stream.
+  /// @param stream Open FILE* stream (should be in binary write mode).
   void write_to_cstream(std::FILE* stream) const;
+
+  /// Write MTZ to a string (binary data).
+  /// @param str [out] String to append the binary MTZ data to.
   void write_to_string(std::string& str) const;
+
+  /// Write MTZ to a file.
+  /// @param path File path.
   void write_to_file(const std::string& path) const;
+
+  /// Get the size of the binary MTZ output in bytes.
+  /// @return Size needed for the complete MTZ file.
   size_t size_to_write() const;
+
+  /// Write MTZ to a buffer.
+  /// @param buf Pointer to output buffer.
+  /// @param maxlen Maximum bytes to write.
+  /// @return Number of bytes written.
   size_t write_to_buffer(char* buf, size_t maxlen) const;
 
 private:
+  /// Generic write implementation (template to support FILE*, string, buffer).
+  /// @tparam Write Function type (size_t write(const void*, size_t, size_t)).
   template<typename Write> void write_to_stream(Write write) const;
 };
 
+/// @}
 
+/// Convenience function: read MTZ from a file path.
+/// @param path File path.
+/// @return Loaded Mtz object.
 inline Mtz read_mtz_file(const std::string& path) {
   Mtz mtz;
   mtz.read_file(path);
   return mtz;
 }
 
+/// Convenience function: read MTZ from an input object (handles gzip).
+/// @tparam Input Type with path() and create_stream() methods.
+/// @param input Input object (e.g., MaybeGzipped).
+/// @param with_data If true, read reflection data; if false, headers only.
+/// @return Loaded Mtz object.
 template<typename Input>
 Mtz read_mtz(Input&& input, bool with_data) {
   Mtz mtz;
@@ -561,17 +888,33 @@ Mtz read_mtz(Input&& input, bool with_data) {
   return mtz;
 }
 
-// Abstraction of data source, cf. ReflnDataProxy.
+/// Abstraction layer for accessing MTZ data uniformly.
+/// Provides stride, data access, and cell/symmetry information.
+/// Similar to ReflnDataProxy for reflection data in other formats.
 struct MtzDataProxy {
+  /// Reference to the MTZ object.
   const Mtz& mtz_;
+  /// Stride (number of columns) between consecutive reflections.
   size_t stride() const { return mtz_.columns.size(); }
+  /// Total number of floats in the data array.
   size_t size() const { return mtz_.data.size(); }
+  /// Element type (always float).
   using num_type = float;
+  /// Access a data element by index.
+  /// @param n Index into the flat data array.
   float get_num(size_t n) const { return mtz_.data[n]; }
+  /// Get the unit cell.
   const UnitCell& unit_cell() const { return mtz_.cell; }
+  /// Get the space group.
   const SpaceGroup* spacegroup() const { return mtz_.spacegroup; }
+  /// Get Miller indices from a reflection.
+  /// @param offset Offset to the H element.
   Miller get_hkl(size_t offset) const { return mtz_.get_hkl(offset); }
 
+  /// Find the column index for a given label.
+  /// @param label Column label.
+  /// @return Column index (idx).
+  /// @throws std::runtime_error if label not found.
   size_t column_index(const std::string& label) const {
     if (const Mtz::Column* col = mtz_.column_with_label(label))
       return col->idx;
@@ -579,13 +922,21 @@ struct MtzDataProxy {
   }
 };
 
-// Like above, but here the data is stored outside of the Mtz class
+/// MtzDataProxy variant for external data (not stored in Mtz).
+/// Wraps MTZ metadata with a separate data array pointer.
 struct MtzExternalDataProxy : MtzDataProxy {
+  /// Pointer to external data array.
   const float* data_;
+  /// Initialize with MTZ metadata and external data.
+  /// @param mtz MTZ object (for structure info only).
+  /// @param data Pointer to external float array (size = columns.size() * nreflections).
   MtzExternalDataProxy(const Mtz& mtz, const float* data)
     : MtzDataProxy{mtz}, data_(data) {}
+  /// Total size of the external data array.
   size_t size() const { return mtz_.columns.size() * mtz_.nreflections; }
+  /// Access element from external data.
   float get_num(size_t n) const { return data_[n]; }
+  /// Get Miller indices from external data.
   Miller get_hkl(size_t offset) const {
     return {{(int)data_[offset + 0],
              (int)data_[offset + 1],
@@ -593,6 +944,9 @@ struct MtzExternalDataProxy : MtzDataProxy {
   }
 };
 
+/// Create a proxy for accessing MTZ data.
+/// @param mtz MTZ object.
+/// @return MtzDataProxy wrapping the MTZ.
 inline MtzDataProxy data_proxy(const Mtz& mtz) { return {mtz}; }
 
 } // namespace gemmi
diff --git a/include/gemmi/mtz2cif.hpp b/include/gemmi/mtz2cif.hpp
index 387ceedc8..79811d865 100644
--- a/include/gemmi/mtz2cif.hpp
+++ b/include/gemmi/mtz2cif.hpp
@@ -19,27 +19,60 @@
 
 namespace gemmi {
 
+/// @file
+/// @brief Converter for MTZ reflection data to SF-mmCIF format.
+
+/// @brief Converts MTZ files (merged or unmerged) to SF-mmCIF reflection tables.
+///
+/// This class provides configuration options for column selection, naming,
+/// filtering, and metadata handling when converting MTZ format to mmCIF.
 class GEMMI_DLL MtzToCif {
 public:
-  // options that can be set directly
-  std::vector<std::string> spec_lines; // conversion specification (cf. default_spec)
-  const char* block_name = nullptr;  // NAME in data_NAME
-  std::string entry_id = "xxxx";     // _entry.id
-  bool with_comments = true;         // write comments
-  bool with_history = true;          // write MTZ history in comments
-  bool skip_empty = false;           // skip reflections with no values
-  bool skip_negative_sigi = false;   // skip refl. with sigma(I) < 0 in unmerged
-  bool enable_UB = false;            // write _diffrn_orient_matrix.UB
-  bool write_staraniso_tensor = true; // write _reflns.pdbx_aniso_B_tensor_*
+  /// Column conversion specification lines (see default_spec for format).
+  std::vector<std::string> spec_lines;
+  /// CIF data block name (NAME in data_NAME).
+  const char* block_name = nullptr;
+  /// Entry identifier (_entry.id tag).
+  std::string entry_id = "xxxx";
+  /// Whether to write comments describing the conversion.
+  bool with_comments = true;
+  /// Whether to write MTZ history records in comments.
+  bool with_history = true;
+  /// Skip reflections where all selected numeric columns are missing.
+  bool skip_empty = false;
+  /// Skip unmerged reflections with sigma(I) < 0.
+  bool skip_negative_sigi = false;
+  /// Write _diffrn_orient_matrix.UB orientation matrix.
+  bool enable_UB = false;
+  /// Write _reflns.pdbx_aniso_B_tensor_* Starraniso B-tensor (if available).
+  bool write_staraniso_tensor = true;
+  /// Write PDB-specific special marker for validation.
   bool write_special_marker_for_pdb = false;
-  int less_anomalous = 0;            // skip (+)/(-) columns even if in spec
-  std::string skip_empty_cols;       // columns used to determine "emptiness"
-  double wavelength = NAN;           // user-specified wavelength
-  int trim = 0;                      // output only reflections -N<=h,k,l<=N
-  int free_flag_value = -1;          // -1 = auto: 0 or (if we have >50% of 0's) 1
-  std::string staraniso_version;     // for _software.version in "special_marker"
-  std::string gemmi_run_from;        // added to gemmi as _software.description
+  /// If non-zero, skip anomalous (+/-) column pairs.
+  int less_anomalous = 0;
+  /// Columns used to determine if reflection is "empty" (when skip_empty=true).
+  std::string skip_empty_cols;
+  /// User-specified wavelength (NAN means use MTZ value).
+  double wavelength = NAN;
+  /// Trim reflections: output only those with -N<=h,k,l<=N (0 = no trim).
+  int trim = 0;
+  /// Free flag value: -1=auto, 0 or 1=explicit.
+  int free_flag_value = -1;
+  /// Starraniso version string for metadata.
+  std::string staraniso_version;
+  /// Description string appended to gemmi software entry.
+  std::string gemmi_run_from;
 
+  /// @brief Get default column specification for merged or unmerged data.
+  ///
+  /// The returned spec_lines describe MTZ-to-mmCIF column mapping.
+  /// Format: [?|&|$|H][COLUMN_NAME] [TYPE] [mmCIF_TAG] [FORMAT]
+  /// - ? = optional column (try alternatives separated by |)
+  /// - & = required, uses previous column's result
+  /// - $ = internal (dataset_id, counter)
+  /// - H = required by IUCR standard
+  /// @param for_merged If true, return spec for merged data; else unmerged.
+  /// @return Null-terminated array of spec strings.
   static const char** default_spec(bool for_merged) {
     static const char* merged[] = {
       "H                              H index_h",
@@ -90,24 +123,56 @@ class GEMMI_DLL MtzToCif {
     return for_merged ? merged : unmerged;
   }
 
+  /// @brief Write MTZ reflection data to CIF format.
+  /// @param mtz First MTZ dataset (required).
+  /// @param mtz2 Optional second MTZ dataset for anomalous comparison.
+  /// @param staraniso_b Optional Starraniso B-tensor to include.
+  /// @param os Output stream for CIF file.
   void write_cif(const Mtz& mtz, const Mtz* mtz2,
                  SMat33<double>* staraniso_b, std::ostream& os);
+  /// @brief Write XDS reflection data to CIF format.
+  /// @param xds XDS_ASCII data to convert.
+  /// @param os Output stream for CIF file.
   void write_cif_from_xds(const XdsAscii& xds, std::ostream& os) const;
 };
 
+/// @brief Write Starraniso B-tensor to mmCIF format.
+/// @param b 3x3 symmetric B-tensor matrix.
+/// @param entry_id Entry identifier for tags.
+/// @param buf Temporary buffer for formatting.
+/// @param os Output stream.
 GEMMI_DLL void write_staraniso_b_in_mmcif(const SMat33<double>& b,
                                           const std::string& entry_id,
                                           char* buf, std::ostream& os);
 
-/// remove '_dataset_name' that can be appended to column names in ccp4i
+/// @brief Remove '_dataset_name' appendix from MTZ column labels.
+///
+/// This suffix is sometimes added by CCP4i and needs removal for proper conversion.
+/// @param mtz MTZ file to modify.
+/// @param logger For reporting changes.
 GEMMI_DLL void remove_appendix_from_column_names(Mtz& mtz, const Logger& logger);
 
+/// @brief Validate merged MTZ has required columns for PDB deposition.
+/// @param mtz MTZ to check.
+/// @param logger For reporting results.
+/// @return True if all required columns present.
 GEMMI_DLL bool validate_merged_mtz_deposition_columns(const Mtz& mtz, const Logger& logger);
 
-// note: both mi and ui get modified
+/// @brief Validate merged intensity data for consistency and quality.
+///
+/// Compares merged and unmerged intensity columns for anomalous differences and completeness.
+/// Modifies both Intensities objects.
+/// @param mi Merged intensities (modified).
+/// @param ui Unmerged intensities (modified).
+/// @param relaxed_check If true, apply looser validation criteria.
+/// @param logger For reporting issues.
+/// @return True if validation passes.
 GEMMI_DLL bool validate_merged_intensities(Intensities& mi, Intensities& ui,
                                            bool relaxed_check, const Logger& logger);
 
+/// @brief Extract software information from MTZ history records.
+/// @param history Vector of history strings from MTZ file.
+/// @return Vector of SoftwareItem objects describing processing steps.
 GEMMI_DLL std::vector<SoftwareItem>
 get_software_from_mtz_history(const std::vector<std::string>& history);
 
diff --git a/include/gemmi/refln.hpp b/include/gemmi/refln.hpp
index d74cc9939..c24df4c18 100644
--- a/include/gemmi/refln.hpp
+++ b/include/gemmi/refln.hpp
@@ -15,19 +15,38 @@
 
 namespace gemmi {
 
+/// @file
+/// @brief Structure and accessors for reflection data from SF-mmCIF files.
+
+/// @brief Wrapper for reflection data block from mmCIF file.
+///
+/// Provides column access and HKL index management for reflection data
+/// stored in CIF loops (either merged _refln or unmerged _diffrn_refln categories).
 struct ReflnBlock {
+  /// CIF data block containing reflection data.
   cif::Block block;
+  /// Entry identifier from _entry.id tag.
   std::string entry_id;
+  /// Unit cell parameters.
   UnitCell cell;
+  /// Pointer to space group; nullptr if unknown.
   const SpaceGroup* spacegroup = nullptr;
+  /// X-ray wavelength in Angstroms (0 if multiple wavelengths).
   double wavelength;
+  /// Number of wavelengths in the data block.
   int wavelength_count;
+  /// Pointer to _refln loop (merged data) or nullptr if absent.
   cif::Loop* refln_loop = nullptr;
+  /// Pointer to _diffrn_refln loop (unmerged data) or nullptr if absent.
   cif::Loop* diffrn_refln_loop = nullptr;
+  /// Points to active loop (refln_loop or diffrn_refln_loop).
   cif::Loop* default_loop = nullptr;
 
+  /// Default constructor (empty block).
   ReflnBlock() = default;
+  /// Move constructor.
   ReflnBlock(ReflnBlock&& rblock_) = default;
+  /// Construct from CIF block; extracts cell, spacegroup, wavelength, and reflection loops.
   ReflnBlock(cif::Block&& block_) : block(std::move(block_)) {
     entry_id = cif::as_string(block.find_value("_entry.id"));
     impl::set_cell_from_mmcif(block, cell);
@@ -43,7 +62,9 @@ struct ReflnBlock {
     diffrn_refln_loop = block.find_loop("_diffrn_refln.index_h").get_loop();
     default_loop = refln_loop ? refln_loop : diffrn_refln_loop;
   }
+  /// Move assignment.
   ReflnBlock& operator=(ReflnBlock&&) = default;
+  /// Copy assignment (deep copy of block and pointers).
   ReflnBlock& operator=(const ReflnBlock& o) {
     if (this == &o)
       return *this;
@@ -61,19 +82,28 @@ struct ReflnBlock {
     return *this;
   }
 
+  /// @brief Check if block contains valid reflection data.
+  /// @return True if default_loop is set and not null.
   bool ok() const { return default_loop != nullptr; }
+  /// @brief Throw exception if block is not valid.
   void check_ok() const { if (!ok()) fail("Invalid ReflnBlock"); }
 
-  // position after "_refln." or "_diffrn_refln."
+  /// @brief Get offset to tag name (after "_refln." or "_diffrn_refln.").
+  /// @return 7 for merged, 14 for unmerged.
   size_t tag_offset() const { return default_loop == refln_loop ? 7 : 14; }
 
+  /// @brief Switch between merged and unmerged reflection loops.
+  /// @param unmerged If true, use _diffrn_refln loop; otherwise use _refln.
   void use_unmerged(bool unmerged) {
     default_loop = unmerged ? diffrn_refln_loop : refln_loop;
   }
+  /// @brief Check if active loop is merged reflection data.
   bool is_merged() const { return ok() && default_loop == refln_loop; }
-  // deprecated
+  /// @brief Check if active loop is unmerged reflection data (deprecated).
   bool is_unmerged() const { return ok() && default_loop == diffrn_refln_loop; }
 
+  /// @brief Get list of column labels (without category prefix).
+  /// @return Vector of tag names from the active loop.
   std::vector<std::string> column_labels() const {
     check_ok();
     std::vector<std::string> labels(default_loop->tags.size());
@@ -82,6 +112,9 @@ struct ReflnBlock {
     return labels;
   }
 
+  /// @brief Find column index by tag name (without category prefix).
+  /// @param tag Column name (e.g., "index_h", "F_meas_sigma_au").
+  /// @return Column index, or -1 if not found.
   int find_column_index(const std::string& tag) const {
     if (!ok())
       return -1;
@@ -92,6 +125,10 @@ struct ReflnBlock {
     return -1;
   }
 
+  /// @brief Get column index by tag name, throwing exception if not found.
+  /// @param tag Column name.
+  /// @return Column index.
+  /// @throws gemmi::fail if column does not exist.
   size_t get_column_index(const std::string& tag) const {
     int idx = find_column_index(tag);
     if (idx == -1) {
@@ -103,6 +140,11 @@ struct ReflnBlock {
     return idx;
   }
 
+  /// @brief Extract column values as typed vector.
+  /// @tparam T Value type (e.g., int, double).
+  /// @param tag Column name.
+  /// @param null Default value for missing data.
+  /// @return Vector of converted values.
   template<typename T>
   std::vector<T> make_vector(const std::string& tag, T null) const {
     size_t n = get_column_index(tag);
@@ -112,12 +154,16 @@ struct ReflnBlock {
     return v;
   }
 
+  /// @brief Get column indices for h, k, l indices.
+  /// @return Array of 3 column indices for index_h, index_k, index_l.
   std::array<size_t,3> get_hkl_column_indices() const {
     return {{get_column_index("index_h"),
              get_column_index("index_k"),
              get_column_index("index_l")}};
   }
 
+  /// @brief Extract Miller indices from all reflections.
+  /// @return Vector of Miller indices.
   std::vector<Miller> make_miller_vector() const {
     auto hkl_idx = get_hkl_column_indices();
     std::vector<Miller> v(default_loop->length());
@@ -127,6 +173,9 @@ struct ReflnBlock {
     return v;
   }
 
+  /// @brief Calculate 1/d^2 for all reflections using unit cell parameters.
+  /// @return Vector of 1/d^2 values.
+  /// @throws gemmi::fail if unit cell is not set.
   std::vector<double> make_1_d2_vector() const {
     if (!cell.is_crystal() || cell.a <= 0)
       fail("Unit cell is not known");
@@ -141,6 +190,8 @@ struct ReflnBlock {
     return r;
   }
 
+  /// @brief Calculate d-spacing for all reflections using unit cell parameters.
+  /// @return Vector of d-spacing values in Angstroms.
   std::vector<double> make_d_vector() const {
     std::vector<double> vec = make_1_d2_vector();
     for (double& d : vec)
@@ -149,7 +200,12 @@ struct ReflnBlock {
   }
 };
 
-// moves blocks from the argument to the return value
+/// @brief Convert CIF blocks to ReflnBlocks, propagating cell and spacegroup info.
+///
+/// Fills in missing cell or spacegroup data by copying from the first block
+/// that contains it. Moves blocks from input to output.
+/// @param blocks Input CIF blocks (consumed).
+/// @return Vector of ReflnBlocks.
 inline
 std::vector<ReflnBlock> as_refln_blocks(std::vector<cif::Block>&& blocks) {
   std::vector<ReflnBlock> rvec;
@@ -175,8 +231,15 @@ std::vector<ReflnBlock> as_refln_blocks(std::vector<cif::Block>&& blocks) {
   return rvec;
 }
 
-// Get the first (merged) block with required labels.
-// Optionally, block name can be specified.
+/// @brief Find the first merged reflection block containing specified columns.
+///
+/// Searches blocks for one with _refln loop and required column labels.
+/// Propagates spacegroup from first block if needed.
+/// @param blocks Input CIF blocks (consumed).
+/// @param labels Required column names (empty string means optional).
+/// @param block_name Optional: if provided, only this named block is considered.
+/// @return The matching ReflnBlock.
+/// @throws gemmi::fail if no matching block or required columns not found.
 inline ReflnBlock get_refln_block(std::vector<cif::Block>&& blocks,
                                   const std::vector<std::string>& labels,
                                   const char* block_name=nullptr) {
@@ -209,6 +272,9 @@ inline ReflnBlock get_refln_block(std::vector<cif::Block>&& blocks,
   fail("Tags not found in SF-mmCIF file: _refln.", join_str(labels, ", _refln."));
 }
 
+/// @brief Convert CIF block from non-mmCIF format to ReflnBlock.
+/// @param block Input CIF block (e.g., from hkl file).
+/// @return ReflnBlock with data from _refln_index_h loop.
 inline ReflnBlock hkl_cif_as_refln_block(cif::Block& block) {
   ReflnBlock rb;
   rb.block.swap(block);
@@ -224,29 +290,47 @@ inline ReflnBlock hkl_cif_as_refln_block(cif::Block& block) {
   return rb;
 }
 
-// Abstraction of data source, cf. MtzDataProxy.
+/// @brief Generic data source abstraction over a ReflnBlock for row iteration.
+///
+/// Provides uniform interface for accessing reflection columns (similar to MtzDataProxy).
 struct ReflnDataProxy {
+  /// Reference to underlying ReflnBlock.
   const ReflnBlock& rb_;
+  /// Cached indices for h, k, l columns.
   std::array<size_t,3> hkl_cols_;
+  /// Initialize proxy from a ReflnBlock.
   explicit ReflnDataProxy(const ReflnBlock& rb)
     : rb_(rb), hkl_cols_(rb_.get_hkl_column_indices()) {}
+  /// Number of columns (values per reflection).
   size_t stride() const { return loop().tags.size(); }
+  /// Total number of values (stride * reflection count).
   size_t size() const { return loop().values.size(); }
+  /// Numeric value type.
   using num_type = double;
+  /// Get numeric value at flattened loop index.
   double get_num(size_t n) const { return cif::as_number(loop().values[n]); }
+  /// Get unit cell.
   const UnitCell& unit_cell() const { return rb_.cell; }
+  /// Get spacegroup.
   const SpaceGroup* spacegroup() const { return rb_.spacegroup; }
+  /// Get Miller indices at given offset in loop.
   Miller get_hkl(size_t offset) const {
     return {{get_int(offset + hkl_cols_[0]),
              get_int(offset + hkl_cols_[1]),
              get_int(offset + hkl_cols_[2])}};
   }
+  /// Get column index by label.
   size_t column_index(const std::string& label) const { return rb_.get_column_index(label); }
 private:
+  /// Get active loop (with bounds check).
   const cif::Loop& loop() const { rb_.check_ok(); return *rb_.default_loop; }
+  /// Get integer value at flattened loop index.
   int get_int(size_t n) const { return cif::as_int(loop().values[n]); }
 };
 
+/// @brief Create a data proxy over a ReflnBlock.
+/// @param rb ReflnBlock to wrap.
+/// @return ReflnDataProxy for generic access.
 inline ReflnDataProxy data_proxy(const ReflnBlock& rb) { return ReflnDataProxy(rb); }
 
 } // namespace gemmi
diff --git a/include/gemmi/xds2mtz.hpp b/include/gemmi/xds2mtz.hpp
index af142a4e3..826c6268b 100644
--- a/include/gemmi/xds2mtz.hpp
+++ b/include/gemmi/xds2mtz.hpp
@@ -12,6 +12,17 @@
 
 namespace gemmi {
 
+/// @file
+/// @brief Converter for XDS reflection data to MTZ format.
+
+/// @brief Convert XDS reflection data to MTZ format.
+///
+/// For unmerged data, creates unmerged MTZ with batch headers matching Pointless output.
+/// For merged data, uses Intensities class to prepare merged MTZ.
+/// Sets up all standard MTZ columns: H, K, L, M/ISYM, BATCH, I, SIGI, XDET, YDET, ROT,
+/// plus optional FRACTIONCALC, LP, CORR, and MAXC columns depending on XDS read_columns.
+/// @param xds XDS data to convert (modified).
+/// @return Populated MTZ object sorted by reflection index.
 inline Mtz xds_to_mtz(XdsAscii& xds) {
   if (xds.is_merged()) {
     Intensities intensities;
diff --git a/include/gemmi/xds_ascii.hpp b/include/gemmi/xds_ascii.hpp
index 38b7a93f9..1f4ed8ceb 100644
--- a/include/gemmi/xds_ascii.hpp
+++ b/include/gemmi/xds_ascii.hpp
@@ -11,76 +11,138 @@
 
 namespace gemmi {
 
-// from Pointless docs: likely in-house source, in which case
-// the unpolarised value is left unchanged (recognised wavelengths
-// are CuKalpha 1.5418 +- 0.0019, Mo 0.7107 +- 0.0002, Cr 2.29 +- 0.01)
+/// @file
+/// @brief Reader for XDS_ASCII.HKL and INTEGRATE.HKL reflection files.
+
+/// @brief Check if wavelength likely comes from in-house X-ray source.
+///
+/// Based on Pointless documentation; recognizes Cu, Mo, and Cr K-alpha
+/// wavelengths with their typical uncertainties.
+/// @param wavelength Wavelength in Angstroms.
+/// @return True if wavelength matches Cu, Mo, or Cr.
 inline bool likely_in_house_source(double wavelength) {
   return std::fabs(wavelength - 1.5418) < 0.0019 ||
          std::fabs(wavelength - 0.7107) < 0.0002 ||
          std::fabs(wavelength - 2.29) < 0.01;
 }
 
+/// @brief Metadata for XDS reflection data (base class).
 struct XdsAsciiMetadata {
+  /// @brief Properties of one integration set (dataset/sweep).
   struct Iset {
+    /// Integration set ID.
     int id;
+    /// Input file name for this set.
     std::string input_file;
+    /// Wavelength in Angstroms (0 if not specified).
     double wavelength = 0.;
+    /// Unit cell constants [a, b, c, alpha, beta, gamma].
     std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
-    //statistics set by gather_iset_statistics()
+    /// Minimum frame number (set by gather_iset_statistics).
     int frame_number_min = -1;
+    /// Maximum frame number (set by gather_iset_statistics).
     int frame_number_max = -1;
+    /// Total number of distinct frames in set.
     int frame_count = -1;
+    /// Number of reflections in this set.
     int reflection_count = -1;
 
+    /// @brief Construct Iset with given ID.
     Iset(int id_) : id(id_) {}
   };
+  /// Source file path.
   std::string source_path;
-  int read_columns = 0;  // doesn't include ITEM_ISET from XSCALE
+  /// Number of columns read from DATA section (0-13, excludes ITEM_ISET from XSCALE).
+  int read_columns = 0;
+  /// Space group number from XDS_ASCII header.
   int spacegroup_number = 0;
+  /// X-ray wavelength in Angstroms.
   double wavelength = 0.;
+  /// Unit cell constants [a, b, c, alpha, beta, gamma].
   std::array<double,6> cell_constants = {0., 0., 0., 0., 0., 0.};
+  /// Unit cell matrix: rows are a*, b*, c* axes (reciprocal lattice).
   Mat33 cell_axes{0.};
+  /// Incident beam direction (usually normalized).
   Vec3 incident_beam_dir;
+  /// Oscillation range per frame in degrees.
   double oscillation_range = 0.;
+  /// Rotation axis direction.
   Vec3 rotation_axis;
+  /// Starting angle for rotation in degrees.
   double starting_angle = 0.;
+  /// Mosaicity/reflecting range standard deviation in degrees.
   double reflecting_range_esd = 0.;
+  /// Friedel's law assumption: '\0' unknown, 'T'rue, 'F'alse.
   char friedels_law = '\0';
+  /// First frame number.
   int starting_frame = 1;
-  int nx = 0;  // detector size - number of pixels
+  /// Detector width in pixels.
+  int nx = 0;
+  /// Detector height in pixels.
   int ny = 0;
-  double qx = 0.;  // pixel size in mm
+  /// Pixel size in x-direction (mm).
+  double qx = 0.;
+  /// Pixel size in y-direction (mm).
   double qy = 0.;
+  /// Detector origin in x-direction (mm).
   double orgx = 0.;
+  /// Detector origin in y-direction (mm).
   double orgy = 0.;
+  /// Distance from sample to detector (mm).
   double detector_distance = 0.;
+  /// Program name that generated the file (e.g., "XDS").
   std::string generated_by;
+  /// Version string of generating program.
   std::string version_str;
+  /// Integration sets (for multi-sweep data).
   std::vector<Iset> isets;
 };
 
+/// @brief Container for XDS reflection data (XDS_ASCII.HKL or INTEGRATE.HKL).
+///
+/// Stores integration metadata and per-reflection measurements from XDS output.
+/// Supports both merged and unmerged data depending on read_columns value.
 struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
+  /// @brief One reflection record from XDS data.
   struct Refl {
+    /// Miller indices (h, k, l).
     Miller hkl;
+    /// Integration set ID (dataset number).
     int iset = 1;
+    /// Integrated intensity.
     double iobs;
+    /// Standard deviation of iobs.
     double sigma;
+    /// Detector position x (mm).
     double xd;
+    /// Detector position y (mm).
     double yd;
+    /// Frame number (zd) relative to starting frame (can be negative).
     double zd;
+    /// Reciprocal lattice point (RLP) value; related to partiality.
     double rlp;
+    /// Peak intensity percentage (0-10000 for 0-100%).
     double peak;
-    double corr;  // is it always integer?
+    /// Correction factor (Lorentz-polarization etc; usually 100-150).
+    double corr;
+    /// Maximum pixel value in reflection.
     double maxc;
 
-    // ZD can be negative for a few reflections
+    /// @brief Get frame number (rounded up from zd).
+    /// @return Frame number (zd is negative-friendly).
     int frame() const { return (int) std::floor(zd + 1); }
   };
+  /// All reflection records.
   std::vector<Refl> data;
 
+  /// @brief Default constructor.
   XdsAscii() = default;
+  /// @brief Construct with existing metadata.
   XdsAscii(const XdsAsciiMetadata& m) : XdsAsciiMetadata(m) {}
 
+  /// @brief Get existing or create new integration set.
+  /// @param id Integration set ID.
+  /// @return Reference to Iset with given ID.
   Iset& find_or_add_iset(int id) {
     for (Iset& i : isets)
       if (i.id == id)
@@ -88,24 +150,40 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     isets.emplace_back(id);
     return isets.back();
   }
+  /// @brief Read XDS file from stream.
+  /// @param reader Input stream handler.
+  /// @param source File path (for error messages).
   void read_stream(AnyStream& reader, const std::string& source);
 
+  /// @brief Read XDS file from input object (file or stdin).
+  /// @tparam T Input object with create_stream() and path() methods.
+  /// @param input Input object.
   template<typename T>
   void read_input(T&& input) {
     read_stream(*input.create_stream(), input.path());
   }
 
+  /// @brief Check if data is merged (few columns in XDS file).
+  /// @return True if read_columns < 8 (no per-reflection BATCH info).
   bool is_merged() const { return read_columns < 8; }
 
-  // set a few Iset properties in isets
+  /// @brief Calculate frame number statistics for each integration set.
+  ///
+  /// Sets frame_number_min, frame_number_max, frame_count, and
+  /// reflection_count for each Iset.
   void gather_iset_statistics();
 
+  /// @brief Calculate rotation angle for a reflection.
+  /// @param refl Reflection record with zd frame number.
+  /// @return Rotation angle in degrees.
   double rot_angle(const Refl& refl) const {
     double z = refl.zd - starting_frame + 1;
     return starting_angle + oscillation_range * z;
   }
 
-  // it's already normalized, but just in case normalize it again
+  /// @brief Get normalized rotation axis.
+  /// @return Normalized rotation_axis vector.
+  /// @throws gemmi::fail if rotation_axis is zero.
   Vec3 get_rotation_axis() const {
     double length = rotation_axis.length();
     if (length == 0)
@@ -113,7 +191,9 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     return rotation_axis / length;
   }
 
-  // I'm not sure if always |incident_beam_dir| == 1/wavelength
+  /// @brief Get normalized incident beam direction (S0).
+  /// @return Normalized incident_beam_dir vector.
+  /// @throws gemmi::fail if incident_beam_dir is zero.
   Vec3 get_s0_direction() const {
     double length = incident_beam_dir.length();
     if (length == 0)
@@ -121,6 +201,8 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     return incident_beam_dir / length;
   }
 
+  /// @brief Check if reciprocal lattice vectors (cell_axes) are set.
+  /// @return True if all 3 reciprocal axes are non-zero.
   bool has_cell_axes() const {
     for (int i = 0; i < 3; ++i)
       if (cell_axes[i][0] == 0 && cell_axes[i][1] == 0 && cell_axes[i][2] == 0)
@@ -128,12 +210,15 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     return true;
   }
 
-  /// Return transition matrix from "Cambridge" frame to XDS frame.
-  /// x_xds = M x_cam
+  /// @brief Calculate transformation matrix from Cambridge frame to XDS frame.
+  ///
+  /// Cambridge frame: z along rotation axis, x along incident beam.
+  /// @return 3x3 matrix such that x_xds = M * x_cambridge.
+  /// @throws gemmi::fail if geometry data missing.
   Mat33 calculate_conversion_from_cambridge() const {
     // Cambridge z direction is along the principal rotation axis
     Vec3 z = get_rotation_axis();
-    // Cambridge z direction is along beam
+    // Cambridge x direction is along beam
     Vec3 x = get_s0_direction();
     Vec3 y = z.cross(x).normalized();
     // beam and rotation axis may not be orthogonal
@@ -141,6 +226,9 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     return Mat33::from_columns(x, y, z);
   }
 
+  /// @brief Calculate crystal orientation matrix U.
+  /// @return 3x3 orientation matrix.
+  /// @throws gemmi::fail if cell_axes not set.
   Mat33 get_orientation() const {
     if (!has_cell_axes())
       fail("unknown unit cell axes");
@@ -154,21 +242,31 @@ struct GEMMI_DLL XdsAscii : XdsAsciiMetadata {
     return Mat33::from_columns(ar, br, cr);
   }
 
-  /// \par p is degree of polarization from range (0,1), as used in XDS.
+  /// @brief Apply polarization correction to all intensities and sigmas.
+  ///
+  /// Based on incident beam direction, rotation axis, and polarization plane.
+  /// Assumes XDS file has unpolarized beam correction already applied.
+  /// @param p Degree of polarization in [0, 1] (0.5 for unpolarized).
+  /// @param normal Normal vector to polarization plane.
   void apply_polarization_correction(double p, Vec3 normal);
 
-  /// \par overload is maximally allowed pixel value in a peak (MAXC).
+  /// @brief Remove reflections with peak pixel value exceeding threshold.
+  /// @param overload Maximum allowed MAXC pixel value.
   void eliminate_overloads(double overload) {
     vector_remove_if(data, [&](Refl& r) { return r.maxc > overload; });
   }
 
-  /// \par batchmin lowest allowed batch number.
+  /// @brief Remove reflections with frame number below threshold.
+  /// @param batchmin Minimum frame number to keep.
   void eliminate_batchmin(int batchmin) {
     double minz = batchmin - 1;
     vector_remove_if(data, [&](Refl& r) { return r.zd < minz; });
   }
 };
 
+/// @brief Read XDS_ASCII file from path.
+/// @param path File path.
+/// @return Populated XdsAscii object.
 inline XdsAscii read_xds_ascii_file(const std::string& path) {
   XdsAscii ret;
   FileStream stream(path.c_str(), "rb");
@@ -176,7 +274,9 @@ inline XdsAscii read_xds_ascii_file(const std::string& path) {
   return ret;
 }
 
-/// read possibly gzipped file
+/// @brief Read XDS_ASCII file, handling gzip compression.
+/// @param path File path (may be .gz).
+/// @return Populated XdsAscii object.
 GEMMI_DLL XdsAscii read_xds_ascii(const std::string& path);
 
 } // namespace gemmi