diff --git a/docs/api.rst b/docs/api.rst index d2f937490..01a825277 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,6 +17,34 @@ For the Python API, see the `Python API reference `_ for the full roadmap. Chemistry and Restraints ------------------------ diff --git a/docs/conf.py b/docs/conf.py index 55fab5e35..0c495fd17 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -146,4 +146,5 @@ def _compute_navigation_tree(context: Dict[str, Any]) -> str: if os.path.isdir(_doxygen_xml_dir): breathe_projects = {"gemmi": _doxygen_xml_dir} breathe_default_project = "gemmi" + breathe_default_members = ('members',) breathe_default_members = ('members',) # show all public members in every doxygenfile:: directive diff --git a/include/gemmi/asumask.hpp b/include/gemmi/asumask.hpp index 0fb915cb8..6287bba4f 100644 --- a/include/gemmi/asumask.hpp +++ b/include/gemmi/asumask.hpp @@ -2,6 +2,9 @@ // // AsuBrick and MaskedGrid that is used primarily as direct-space asu mask. +/// @file +/// @brief Asymmetric unit (ASU) masking for crystallographic grids. + #ifndef GEMMI_ASUMASK_HPP_ #define GEMMI_ASUMASK_HPP_ @@ -12,16 +15,27 @@ namespace gemmi { +/// Representation of an asymmetric unit (ASU) brick region. +/// The ASU is the minimal region of the unit cell under crystallographic space group symmetry. +/// This struct represents a brick-shaped ASU as 0 <= x <= size[i]/denom for each axis. +/// Lower bounds are always at the origin; upper bounds may be inclusive (<=) or exclusive (<). struct AsuBrick { - // The brick is 0<=x<=size[0]/24, 0<=y<=size[1]/24, 0<=z<=size[2]/24 + /// Denominator for normalizing brick coordinates to [0,1); always 24. static constexpr int denom = 24; - std::array size; - std::array incl; - int volume; + std::array size; ///< Numerator for each axis (denominator is always denom=24) + std::array incl; ///< Inclusivity flags; true if boundary is <= (closed), false if < + int volume; ///< Product of size elements: size[0]*size[1]*size[2] + /// Construct an ASU brick with given numerators. + /// Automatically determines inclusivity based on whether each size is less than denom. + /// @param a Numerator for x-axis + /// @param b Numerator for y-axis + /// @param c Numerator for z-axis AsuBrick(int a, int b, int c) : size({a,b,c}), incl({a < denom, b < denom, c < denom}), volume(a*b*c) {} + /// Returns a human-readable string representation of the brick. + /// Example: "0<=x<=1/8; 0<=y<1/6; 0<=z<=1/4" std::string str() const { std::string s; for (int i = 0; i < 3; ++i) { @@ -35,6 +49,9 @@ struct AsuBrick { return s; } + /// Returns the upper limit of the ASU as fractional coordinates. + /// Adds a small epsilon to inclusive boundaries and subtracts from exclusive boundaries + /// to handle floating-point comparisons correctly. Fractional get_upper_limit() const { double inv_denom = 1.0 / denom; return Fractional(inv_denom * size[0] + (incl[0] ? 1e-9 : -1e-9), @@ -42,7 +59,8 @@ struct AsuBrick { inv_denom * size[2] + (incl[2] ? 1e-9 : -1e-9)); } - // cf. Ccp4Base::get_extent() + /// Returns a bounding box for the ASU in fractional coordinates. + /// The lower bound is always at (-epsilon, -epsilon, -epsilon) and upper bound is from get_upper_limit(). Box get_extent() const { Box box; box.minimum = Fractional(-1e-9, -1e-9, -1e-9); @@ -50,6 +68,11 @@ struct AsuBrick { return box; } + /// Converts the ASU brick upper bound to grid point indices. + /// Computes ceiling values for each axis, accounting for the grid's axis order. + /// @param meta Grid metadata (including axis order and dimensions) + /// @return Array of [u_end, v_end, w_end] grid indices + /// @throws gemmi::Failure if grid axis order is not XYZ std::array uvw_end(const GridMeta& meta) const { if (meta.axis_order != AxisOrder::XYZ) fail("grid is not fully setup"); @@ -60,8 +83,13 @@ struct AsuBrick { } }; -// Returns asu brick upper bound. Lower bound is always (0,0,0). -// Brute force method that considers 8^3 sizes. +/// Determines the optimal ASU brick for a crystallographic space group. +/// Uses a brute-force search over 8^3 possible brick sizes to find the smallest ASU +/// that covers the full unit cell when replicated by all space group operations. +/// Both the size and inclusivity (boundary direction) of the brick are optimized. +/// @param sg Pointer to SpaceGroup; must not be nullptr +/// @return AsuBrick representing the optimal ASU for the given space group +/// @throws gemmi::Failure if sg is nullptr inline AsuBrick find_asu_brick(const SpaceGroup* sg) { if (sg == nullptr) fail("Missing space group"); @@ -213,29 +241,55 @@ inline AsuBrick find_asu_brick(const SpaceGroup* sg) { } +/// Grid wrapper that iterates only over points within the asymmetric unit. +/// Wraps a grid and a mask vector, providing an iterator that skips masked-out points. +/// @tparam T Grid value type +/// @tparam V Mask value type (default std::int8_t) template struct MaskedGrid { - std::vector mask; - Grid* grid; + std::vector mask; ///< Mask vector; 0 means point is outside ASU, non-zero is inside + Grid* grid; ///< Pointer to the wrapped grid + /// Iterator over grid points within the ASU (where mask == 0). struct iterator { - typename GridBase::iterator grid_iterator; - const std::vector& mask_ref; + typename GridBase::iterator grid_iterator; ///< Underlying grid iterator + const std::vector& mask_ref; ///< Reference to mask vector + + /// Construct an iterator from a grid iterator and mask reference. iterator(typename GridBase::iterator it, const std::vector& mask) : grid_iterator(it), mask_ref(mask) {} + + /// Pre-increment operator; skips to next non-masked grid point. iterator& operator++() { do { ++grid_iterator; } while (grid_iterator.index != mask_ref.size() && mask_ref[grid_iterator.index] != 0); return *this; } + + /// Dereferences the iterator to return the current grid point. typename GridBase::Point operator*() { return *grid_iterator; } + + /// Equality comparison. bool operator==(const iterator &o) const { return grid_iterator == o.grid_iterator; } + + /// Inequality comparison. bool operator!=(const iterator &o) const { return grid_iterator != o.grid_iterator; } }; + + /// Returns iterator to the first point in the ASU. iterator begin() { return {grid->begin(), mask}; } + + /// Returns iterator past the last point. iterator end() { return {grid->end(), mask}; } }; +/// Generates an ASU mask for a crystallographic grid. +/// Creates a vector with one entry per grid point: 0 for ASU points, 1 for symmetry mates. +/// Grid points on special positions map to themselves and are marked as 0 (ASU). +/// @tparam V Mask value type (default std::int8_t) +/// @param grid Grid metadata including space group and dimensions +/// @return Mask vector of size grid.point_count() with values 0 (ASU) or 1 (mate) +/// @throws gemmi::Failure if the ASU is not successfully determined template std::vector get_asu_mask(const GridMeta& grid) { std::vector mask(grid.point_count(), 2); @@ -261,16 +315,23 @@ std::vector get_asu_mask(const GridMeta& grid) { return mask; } +/// Creates a MaskedGrid for convenient iteration over ASU points only. +/// @tparam T Grid value type +/// @param grid Grid to wrap +/// @return MaskedGrid wrapper with ASU mask pre-computed template MaskedGrid masked_asu(Grid& grid) { return {get_asu_mask(grid), &grid}; } - +/// Functions for calculating bounding boxes and extents of grid data. // Calculating bounding box (brick) with the data (non-zero and non-NaN). namespace impl { -// find the shortest span (possibly wrapped) that contains all true values +/// Finds the shortest span (possibly wrapping around) containing all true values in a vector. +/// Handles periodic (wrapped) boundaries by considering the span of leading and trailing false values. +/// @param vec Vector of boolean values +/// @return Pair (start, end) of shortest span containing all true values; {0, n} if all false inline std::pair trim_false_values(const std::vector& vec) { const int n = (int) vec.size(); assert(n != 0); @@ -311,7 +372,12 @@ inline std::pair trim_false_values(const std::vector& vec) { } } // namespace impl -// Get the smallest box with non-zero (and non-NaN) values. +/// Calculates the smallest bounding box containing all non-zero and non-NaN grid values. +/// Scans the grid for non-zero points along each axis, finds the tightest bounding box +/// in fractional coordinate space. +/// @tparam T Grid value type +/// @param grid Grid to analyze +/// @return Box in fractional coordinates that bounds all non-zero data template Box get_nonzero_extent(const GridBase& grid) { grid.check_not_empty(); diff --git a/include/gemmi/ccp4.hpp b/include/gemmi/ccp4.hpp index 872d898c9..19d46832d 100644 --- a/include/gemmi/ccp4.hpp +++ b/include/gemmi/ccp4.hpp @@ -1,3 +1,12 @@ +/// @file ccp4.hpp +/// @brief CCP4/MRC map file format (electron density maps and masks). +/// +/// This header provides support for reading and writing CCP4 (Collaborative Computational +/// Project 4) and MRC (Medical Research Council) map file formats commonly used for +/// electron density maps and molecular masks in crystallography and cryo-EM. +/// The CCP4 map consists of a 1024-byte fixed header followed by data values (typically floats). +/// The header contains grid dimensions, unit cell parameters, space group, and data statistics. + // Copyright 2018 Global Phasing Ltd. // // CCP4 format for maps and masks. @@ -54,62 +63,102 @@ namespace gemmi { using std::int32_t; -// options for Ccp4<>::setup +/// @brief Options for expanding/reordering CCP4 map data during setup. +/// +/// CCP4 maps can store data in non-standard axis orderings and may cover only a portion +/// of the unit cell. The setup() method uses this enum to control transformation behavior. enum class MapSetup { - Full, // reorder and expand to the whole unit cell - NoSymmetry, // reorder and resize to the whole cell, but no symmetry ops - ReorderOnly // reorder axes to X, Y, Z + Full, ///< Reorder axes to XYZ and expand with symmetry to the whole unit cell + NoSymmetry, ///< Reorder axes to XYZ and resize to the whole cell, but do not apply symmetry ops + ReorderOnly ///< Only reorder axes to X, Y, Z; keep the partial cell as-is }; +/// @brief Base class for CCP4 map headers and metadata. +/// +/// Stores the raw CCP4 header (256 32-bit words plus optional extended symmetry records) +/// and provides methods to read/write individual header fields and manage map metadata. +/// The header defines the grid geometry, unit cell, space group, axis ordering, and statistics. struct Ccp4Base { - DataStats hstats; // data statistics read from / written to ccp4 map - // stores raw headers if the grid was read from ccp4 map - std::vector ccp4_header; - bool same_byte_order = true; + DataStats hstats; ///< Data statistics (min, max, mean, RMS) read from/written to map header + std::vector ccp4_header; ///< Raw header words (256 required + symmetry records) + bool same_byte_order = true; ///< True if file byte order matches machine byte order - // methods to access info from ccp4 headers, w is word number from the spec + /// @brief Get pointer to a header word (32-bit value). + /// @param w Word number in CCP4 convention (1-indexed) + /// @return Pointer to the word (cast to appropriate type as needed) void* header_word(int w) { return &ccp4_header.at(w - 1); } + /// @brief Get const pointer to a header word. const void* header_word(int w) const { return &ccp4_header.at(w - 1); } + /// @brief Read a 32-bit signed integer from header with byte-order correction. + /// @param w Word number (1-indexed) + /// @return The 32-bit value, byte-swapped if needed int32_t header_i32(int w) const { int32_t value = ccp4_header.at(w - 1); if (!same_byte_order) swap_four_bytes(&value); return value; } + /// @brief Read three consecutive 32-bit integers from header. + /// @param w Starting word number (1-indexed) + /// @return Array of three consecutive 32-bit values std::array header_3i32(int w) const { return {{ header_i32(w), header_i32(w+1), header_i32(w+2) }}; } + /// @brief Read a 32-bit float from header with byte-order correction. + /// @param w Word number (1-indexed) + /// @return The floating-point value float header_float(int w) const { int32_t int_value = header_i32(w); float f; std::memcpy(&f, &int_value, 4); return f; } + /// @brief Read a string field from header. + /// @param w Starting word number (1-indexed) + /// @param len Byte length to read (default 80 for standard CCP4 label) + /// @return The extracted string (may include padding) std::string header_str(int w, size_t len=80) const { if (4 * ccp4_header.size() < 4 * (w - 1) + len) fail("invalid end of string"); return std::string(reinterpret_cast(header_word(w)), len); } + /// @brief Write a 32-bit signed integer to header with byte-order correction. + /// @param w Word number (1-indexed) + /// @param value The value to write void set_header_i32(int w, int32_t value) { if (!same_byte_order) swap_four_bytes(&value); ccp4_header.at(w - 1) = value; } + /// @brief Write three consecutive 32-bit integers to header. + /// @param w Starting word number (1-indexed) + /// @param x First value (word w) + /// @param y Second value (word w+1) + /// @param z Third value (word w+2) void set_header_3i32(int w, int32_t x, int32_t y, int32_t z) { set_header_i32(w, x); set_header_i32(w+1, y); set_header_i32(w+2, z); } + /// @brief Write a 32-bit float to header with byte-order correction. + /// @param w Word number (1-indexed) + /// @param value The floating-point value to write void set_header_float(int w, float value) { int32_t int32_value; std::memcpy(&int32_value, &value, 4); set_header_i32(w, int32_value); } + /// @brief Write a string to header field. + /// @param w Starting word number (1-indexed) + /// @param str The string to write (must fit in available space) void set_header_str(int w, const std::string& str) { std::memcpy(header_word(w), str.c_str(), str.size()); } + /// @brief Determine the actual axis ordering from header MAPC, MAPR, MAPS records. + /// @return Array where pos[i] is the map column/row/section index for axis i (X/Y/Z) + /// @throws If MAPC/MAPR/MAPS records are invalid or inconsistent std::array axis_positions() const { if (ccp4_header.empty()) return {{0, 1, 2}}; // assuming it's X,Y,Z @@ -123,10 +172,15 @@ struct Ccp4Base { return pos; } - double header_rfloat(int w) const { // rounded to 5 digits + /// @brief Read a floating-point value from header, rounded to 5 decimal places. + /// @param w Word number (1-indexed) + /// @return The value rounded to avoid floating-point representation artifacts + double header_rfloat(int w) const { return std::round(1e5 * header_float(w)) / 1e5; } + /// @brief Get the extent of the map data in fractional coordinates. + /// @return A box (min, max corners) in fractional coordinates covering the map data Box get_extent() const { Box box; // cf. setup() @@ -143,14 +197,15 @@ struct Ccp4Base { return box; } - // Skew transformation (words 25-37) is supported by CCP4 maplib and PyMOL, - // but it's not in the MRC format and is not supported by most programs. - // From maplib.html: Skew transformation is from standard orthogonal - // coordinate frame (as used for atoms) to orthogonal map frame, as - // Xo(map) = S * (Xo(atoms) - t) + /// @brief Check if the map has a skew transformation (non-orthogonal grid). + /// @return True if LSKFLG (word 25) is non-zero, indicating skew is applied + /// @note Skew transformation is CCP4-specific and rarely used; most software ignores it bool has_skew_transformation() const { - return header_i32(25) != 0; // LSKFLG should be 0 or 1 + return header_i32(25) != 0; } + /// @brief Get the skew transformation matrix and translation vector. + /// @return Transform struct with 3x3 matrix and translation for Xo(map) = S * (Xo(atoms) - t) + /// @note Only meaningful if has_skew_transformation() is true Transform get_skew_transformation() const { return { // 26-34 SKWMAT @@ -162,12 +217,16 @@ struct Ccp4Base { }; } - // ORIGIN (words 50-52), used in MRC format, zeros in CCP4 format + /// @brief Get the origin offset (used in MRC format). + /// @return Position vector from words 50-52 (usually zero in CCP4, non-zero in MRC) Position get_origin() const { return Position(header_float(50), header_float(51), header_float(52)); } - // this function assumes that the whole unit cell is covered with offset 0 + /// @brief Create a CCP4 header for the given grid (excludes MODE and data statistics). + /// @param grid GridMeta with unit cell, space group, and dimensions to encode + /// @note Assumes the grid covers the whole unit cell with no offset. + /// The header includes symmetry operation records from the space group. void prepare_ccp4_header_except_mode_and_stats(GridMeta& grid) { GroupOps ops; if (grid.spacegroup) @@ -206,25 +265,32 @@ struct Ccp4Base { } } + /// @brief Update header MODE and data statistics fields. + /// @param mode CCP4 mode (0=int8, 1=int16, 2=float, 6=uint16, 12=float16) void update_header_mode_and_stats(int mode) { set_header_i32(4, mode); set_header_float(20, (float) hstats.dmin); set_header_float(21, (float) hstats.dmax); set_header_float(22, (float) hstats.dmean); set_header_float(55, (float) hstats.rms); - // labels could be modified but it's not important } + /// @brief Check if the map data covers the entire unit cell. + /// @param grid Grid metadata to check against header + /// @return True if NXSTART=NYSTART=NZSTART=0 and MX=NX, MY=NY, MZ=NZ bool full_cell_(const GridMeta& grid) const { if (ccp4_header.empty()) - return true; // assuming it's full cell + return true; return - // NXSTART et al. must be 0 header_i32(5) == 0 && header_i32(6) == 0 && header_i32(7) == 0 && - // MX == NX header_i32(8) == grid.nu && header_i32(9) == grid.nv && header_i32(10) == grid.nw; } + /// @brief Read CCP4 map header from stream, detecting byte order and extended records. + /// @param grid GridMeta to populate with header info (may be null to skip grid setup) + /// @param f Input stream positioned at start of file + /// @param path File path for error reporting + /// @throws If header is invalid, file is truncated, or contains unsupported features void read_ccp4_header_(GridMeta* grid, AnyStream& f, const std::string& path) { const size_t hsize = 256; ccp4_header.resize(hsize); @@ -236,7 +302,7 @@ struct Ccp4Base { if (machst[0] != 0x44 && machst[0] != 0x11) fail("Unsupported machine stamp (endianness) in the file?"); same_byte_order = machst[0] == (is_little_endian() ? 0x44 : 0x11); - size_t ext_w = header_i32(24) / 4; // NSYMBT in words + size_t ext_w = header_i32(24) / 4; if (ext_w != 0) { if (ext_w > 1000000) fail("Unexpectedly long extended header: " + path); @@ -269,12 +335,22 @@ struct Ccp4Base { } }; +/// @brief CCP4 map file container with typed grid data. +/// @tparam T Data type for grid values (typically float, int8_t, int16_t, or uint16_t) +/// +/// Extends Ccp4Base to hold both the raw CCP4 header and the grid data in memory. +/// The grid axes may not be in XYZ order when read; call setup() to reorder if needed. template struct Ccp4 : public Ccp4Base { - Grid grid; - - /// If the header is empty, prepare it; otherwise, update only MODE - /// and, if update_stats==true, also DMIN, DMAX, DMEAN and RMS. + Grid grid; ///< The 3D grid of map values + + /// @brief Create or update the CCP4 header with mode and statistics. + /// + /// If the header is empty, creates it with full grid metadata. + /// If the header exists, updates MODE word and optionally DMIN/DMAX/DMEAN/RMS. + /// @param mode CCP4 data mode (-1 to auto-detect from T), or explicit mode value + /// @param update_stats If true, compute and store min/max/mean/rms from grid data + /// @throws If grid is empty, not setup, or mode cannot be determined void update_ccp4_header(int mode=-1, bool update_stats=true) { if (mode > 2 && mode != 6) fail("Only modes 0, 1, 2 and 6 are supported."); @@ -295,6 +371,8 @@ struct Ccp4 : public Ccp4Base { update_header_mode_and_stats(mode); } + /// @brief Detect CCP4 mode value for the data type T. + /// @return CCP4 mode (0, 1, 2, 6) or -1 if type is not supported for mode static int mode_for_data() { if (std::is_same::value) return 0; @@ -307,35 +385,64 @@ struct Ccp4 : public Ccp4Base { return -1; } + /// @brief Check if the map data covers the full unit cell. + /// @return True if NXSTART, NYSTART, NZSTART are 0 and grid matches cell sampling bool full_cell() const { return full_cell_(grid); } + /// @brief Read CCP4 header from stream and initialize grid metadata. + /// @param f Input stream positioned at file start + /// @param path File path for error reporting void read_ccp4_header(AnyStream& f, const std::string& path) { read_ccp4_header_(&grid, f, path); if (grid.axis_order != AxisOrder::Unknown) grid.calculate_spacing(); } + /// @brief Transform map axes and/or expand to full unit cell. + /// @param default_value Value to use for grid points outside the map region + /// @param mode Control what transformations to apply (Full, NoSymmetry, or ReorderOnly) + /// @note Modifies grid dimensions, axis order, and data layout; call after reading header void setup(T default_value, MapSetup mode=MapSetup::Full); + + /// @brief Trim the map to a region specified in fractional coordinates. + /// @param box Region bounds in fractional coordinates [0, 1] + /// @note Only works after setup(); requires full_cell() == true and XYZ axis order void set_extent(const Box& box); + /// @brief Read CCP4 map data and header from a stream. + /// @param f Input stream positioned at file start + /// @param path File path for error reporting + /// @note Reads both header and data; grid data is resized and populated void read_ccp4_stream(AnyStream& f, const std::string& path); + /// @brief Read CCP4 map from a file path. + /// @param path Path to CCP4 map file void read_ccp4_file(const std::string& path) { FileStream stream(path.c_str(), "rb"); read_ccp4_stream(stream, path); } + /// @brief Read CCP4 map from memory buffer. + /// @param data Pointer to file contents in memory + /// @param size Size of buffer in bytes + /// @param name Label for error messages void read_ccp4_from_memory(const char* data, size_t size, const std::string& name) { MemoryStream stream(data, size); read_ccp4_stream(stream, name); } + /// @brief Read CCP4 map using a generic input adapter. + /// @tparam Input Type with create_stream() and path() methods + /// @param input Input adapter (e.g., MaybeGzipped for .map.gz files) template void read_ccp4(Input&& input) { std::unique_ptr stream = input.create_stream(); read_ccp4_stream(*stream, input.path()); } + /// @brief Write CCP4 map and data to file. + /// @param path Output file path + /// @note Header must be set (via update_ccp4_header) before writing void write_ccp4_map(const std::string& path) const; }; @@ -392,6 +499,7 @@ void write_data(const std::vector& content, FILE* f) { // This function was tested only on little-endian machines, // let us know if you need support for other architectures. +/// @brief Implementation of read_ccp4_stream for template specialization. template void Ccp4::read_ccp4_stream(AnyStream& f, const std::string& path) { read_ccp4_header(f, path); @@ -410,8 +518,6 @@ void Ccp4::read_ccp4_stream(AnyStream& f, const std::string& path) { else fail("Mode " + std::to_string(mode) + " is not supported " "(only 0, 1, 2 and 6 are supported)."); - //if (std::fgetc(f) != EOF) - // fail("The map file is longer then expected."); if (!same_byte_order) { if (sizeof(T) == 2) @@ -423,6 +529,7 @@ void Ccp4::read_ccp4_stream(AnyStream& f, const std::string& path) { } } +/// @brief Implementation of setup() for template specialization. template void Ccp4::setup(T default_value, MapSetup mode) { if (grid.axis_order == AxisOrder::XYZ || ccp4_header.empty()) @@ -448,10 +555,10 @@ void Ccp4::setup(T default_value, MapSetup mode) { grid.nu = sampl[0]; grid.nv = sampl[1]; grid.nw = sampl[2]; - set_header_3i32(5, 0, 0, 0); // start + set_header_3i32(5, 0, 0, 0); } - set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ - set_header_3i32(17, 1, 2, 3); // axes (MAPC, MAPR, MAPS) + set_header_3i32(1, grid.nu, grid.nv, grid.nw); + set_header_3i32(17, 1, 2, 3); grid.axis_order = full_cell() ? AxisOrder::XYZ : AxisOrder::Unknown; if (grid.axis_order == AxisOrder::XYZ) grid.calculate_spacing(); @@ -461,9 +568,9 @@ void Ccp4::setup(T default_value, MapSetup mode) { std::vector full(grid.point_count(), default_value); int it[3]; int idx = 0; - for (it[2] = start[2]; it[2] < end[2]; it[2]++) // sections - for (it[1] = start[1]; it[1] < end[1]; it[1]++) // rows - for (it[0] = start[0]; it[0] < end[0]; it[0]++) { // cols + for (it[2] = start[2]; it[2] < end[2]; it[2]++) + for (it[1] = start[1]; it[1] < end[1]; it[1]++) + for (it[0] = start[0]; it[0] < end[0]; it[0]++) { T val = grid.data[idx++]; size_t new_index = grid.index_s(it[pos[0]], it[pos[1]], it[pos[2]]); full[new_index] = val; @@ -472,13 +579,13 @@ void Ccp4::setup(T default_value, MapSetup mode) { } if (mode == MapSetup::Full && - // no need to apply symmetry if we started with the whole cell (end[pos[0]] - start[pos[0]] < sampl[0] || end[pos[1]] - start[pos[1]] < sampl[1] || end[pos[2]] - start[pos[2]] < sampl[2])) grid.symmetrize_nondefault(default_value); } +/// @brief Implementation of set_extent() for template specialization. template void Ccp4::set_extent(const Box& box) { if (ccp4_header.empty()) @@ -501,12 +608,12 @@ void Ccp4::set_extent(const Box& box) { grid.nu = nu; grid.nv = nv; grid.nw = nw; - set_header_3i32(1, grid.nu, grid.nv, grid.nw); // NX, NY, NZ + set_header_3i32(1, grid.nu, grid.nv, grid.nw); set_header_3i32(5, u0, v0, w0); - // AxisOrder::XYZ is used only for grid covering full cell grid.axis_order = AxisOrder::Unknown; } +/// @brief Implementation of write_ccp4_map() for template specialization. template void Ccp4::write_ccp4_map(const std::string& path) const { assert(ccp4_header.size() >= 256); @@ -523,8 +630,21 @@ void Ccp4::write_ccp4_map(const std::string& path) const { impl::write_data(grid.data, f.get()); } +/// @brief Convenience function to read a CCP4 electron density map. +/// @param path File path to .map or .map.gz file +/// @param setup If true, call setup(NAN) to expand and reorder to full cell +/// @return Ccp4 object with grid data and header GEMMI_DLL Ccp4 read_ccp4_map(const std::string& path, bool setup); + +/// @brief Convenience function to read a CCP4 binary mask. +/// @param path File path to .msk or mask file +/// @param setup If true, call setup(-1) to expand to full cell +/// @return Ccp4 object with int8_t grid data (0=false, non-zero=true) GEMMI_DLL Ccp4 read_ccp4_mask(const std::string& path, bool setup); + +/// @brief Read only the CCP4 header without allocating grid data. +/// @param path File path to CCP4 map file +/// @return Ccp4Base with header and metadata (grid data not populated) GEMMI_DLL Ccp4Base read_ccp4_header(const std::string& path); } // namespace gemmi diff --git a/include/gemmi/dencalc.hpp b/include/gemmi/dencalc.hpp index f5d9217ba..184320d75 100644 --- a/include/gemmi/dencalc.hpp +++ b/include/gemmi/dencalc.hpp @@ -1,3 +1,11 @@ +/// @file dencalc.hpp +/// @brief Electron density calculation from atomic coordinates using Gaussian density distributions. +/// +/// Provides DensityCalculator struct for placing atomic Gaussian electron density onto a 3D grid, +/// with support for isotropic and anisotropic B-factors, occupancy, X-ray scattering factors, +/// and electron scattering (Coulomb potential). Used to generate synthetic electron density maps +/// from an atomic model for comparison with experimental maps or map correlation calculations. + // Copyright 2019 Global Phasing Ltd. // // Tools to prepare a grid with values of electron density of a model. @@ -13,6 +21,16 @@ namespace gemmi { +/// @brief Find the radius at which a radial density function falls below a cutoff level. +/// @tparam N Number of Gaussian components in the exponential sum +/// @tparam Real Floating-point type (float or double) +/// @param x1 Initial search radius (in Angstroms or grid units) +/// @param precal Precalculated exponential sum object providing calculate() and derivative +/// @param cutoff_level Density threshold; radius is where |density| equals this value +/// @return Radius (distance) at which the density function equals the cutoff level +/// +/// Uses binary search with special handling for addends (like Mott-Bethe factor) that +/// may cause the function to rise then fall, rather than monotonically decreasing. template Real determine_cutoff_radius(Real x1, const ExpSum& precal, Real cutoff_level) { Real y1, dy; @@ -60,37 +78,70 @@ Real determine_cutoff_radius(Real x1, const ExpSum& precal, Real cutoff return x1 + (x1 - x2) / (y1 - y2) * (cutoff_level - y1); } -// approximated radius of electron density (IT92) above cutoff=1e-5 for C +/// @brief Approximate radial extent of electron density for a given B-factor. +/// @tparam Real Floating-point type +/// @param b Isotropic B-factor (in Angstrom^2) +/// @return Approximate radius (in Angstroms) at which density falls to ~1e-5 (International Tables Vol. C formula) +/// +/// Used as initial guess for cutoff radius search; applicable to X-ray scattering factors (IT92). template Real it92_radius_approx(Real b) { return (8.5f + 0.075f * b) / (2.4f + 0.0045f * b); } -// Usual usage: -// - set d_min and optionally also other parameters, -// - set addends to f' values for your wavelength (see fprime.hpp) -// - use grid.setup_from() to set grid's unit cell and space group -// - check that Table has SF coefficients for all elements that are to be used -// - call put_model_density_on_grid() -// - do FFT using transform_map_to_f_phi() -// - if blur is used, multiply the SF by reciprocal_space_multiplier() +/// @brief Calculate electron density from an atomic model on a regular grid. +/// @tparam Table Scattering factor coefficients table (e.g., IT92, Cromer-Mann, electron scattering) +/// @tparam GReal Type for grid values (float or double) +/// +/// Places Gaussian electron density contributions from atoms onto a 3D grid. +/// Supports isotropic and anisotropic B-factors, occupancy, X-ray and electron scattering. +/// +/// Typical workflow: +/// - Construct DensityCalculator; set grid via grid.setup_from(structure) +/// - Set d_min (resolution) and other parameters (blur, cutoff) +/// - Optionally set addends (wavelength-dependent f' anomalous corrections) +/// - Call put_model_density_on_grid(model) to populate the grid +/// - Use transform_map_to_f_phi() for FFT to reciprocal space +/// - If blur > 0, multiply reciprocal-space data by reciprocal_space_multiplier() template struct DensityCalculator { - // GReal = type of grid; CReal = type of coefficients in Table + /// @brief Type of grid (provided as template parameter) using CReal = typename Table::Coef::coef_type; + + /// @brief Output electron density grid (unit cell and space group set via setup_from()) Grid grid; + + /// @brief Target d_min resolution (Angstroms); grid sampling = d_min / (2 * rate) double d_min = 0.; + + /// @brief Oversampling rate relative to d_min (default 1.5 = 50% oversampling) double rate = 1.5; + + /// @brief Additional blur (B-factor) to apply to all atoms (Angstrom^2); default 0 double blur = 0.; + + /// @brief Density cutoff for determining atom-to-grid interaction radius (default 1e-5) float cutoff = 1e-5f; + #if GEMMI_COUNT_DC + /// @brief Count of atoms added (debugging; only if GEMMI_COUNT_DC defined) size_t atoms_added = 0; + /// @brief Count of density calculations (debugging; only if GEMMI_COUNT_DC defined) size_t density_computations = 0; #endif + + /// @brief Wavelength-dependent f' corrections (additive anomalous factors) Addends addends; + /// @brief Compute grid spacing (Angstroms per voxel) based on d_min and rate. + /// @return Grid spacing; 0 if d_min not set double requested_grid_spacing() const { return d_min / (2 * rate); } + /// @brief Set blur to match Refmac5 conventions: depends on existing grid and model B-factors. + /// @param model Atomic model providing B-factor statistics + /// @param allow_negative If true, blur can be negative (default false = clamp to 0) + /// + /// Calculates blur such that the effective B-factor on the model matches Refmac defaults. void set_refmac_compatible_blur(const Model& model, bool allow_negative=false) { double spacing = requested_grid_spacing(); if (spacing <= 0) @@ -101,19 +152,35 @@ struct DensityCalculator { blur = 0.; } - // pre: check if Table::has(atom.element) + /// @brief Add electron density contribution of a single atom to the grid. + /// @param atom Atom with element, position, B-factor, occupancy, and anisotropic info + /// @pre Table must have scattering factor coefficients for atom.element + /// + /// Places Gaussian density with appropriate radius based on B-factor and scattering factors. + /// Handles both isotropic and anisotropic B-factors. Occupancy and anomalous addends applied. void add_atom_density_to_grid(const Atom& atom) { Element el = atom.element; const auto& coef = Table::get(el, atom.charge, atom.serial); do_add_atom_density_to_grid(atom, coef, addends.get(el)); } - // Parameter c is a constant factor and has the same meaning as either addend - // or c in scattering factor coefficients (a1, b1, ..., c). + /// @brief Add a constant radial density contribution for an atom (for special cases). + /// @param atom Atom providing position, occupancy + /// @param c Constant density factor (as in scattering factor coefficients or addends) + /// + /// Useful for adding constant density contributions (e.g., Mott-Bethe factor for electron scattering). void add_c_contribution_to_grid(const Atom& atom, float c) { do_add_atom_density_to_grid(atom, GaussianCoef<0, 1, CReal>{0}, c); } + /// @brief Estimate the interaction radius for density based on precalculated B-factor. + /// @tparam N Number of exponential components + /// @param precal Precalculated exponential sum object + /// @param b Isotropic B-factor (Angstrom^2) + /// @return Interaction radius (Angstroms) where density falls below cutoff + /// + /// For single-Gaussian scattering factors (N=1), computes analytically. + /// For multi-Gaussian (N>1), uses determine_cutoff_radius() binary search. template CReal estimate_radius(const ExpSum& precal, CReal b) const { if (N == 1) @@ -122,6 +189,14 @@ struct DensityCalculator { return determine_cutoff_radius(x1, precal, (CReal)cutoff); } + /// @brief Internal: place electron density on grid for an atom with given scattering factors. + /// @tparam Coef Scattering factor coefficients type + /// @param atom Atom with position, occupancy, B-factor, anisotropic U-tensor + /// @param coef Precalculated scattering factor coefficients (from Table::get()) + /// @param addend Wavelength-dependent anomalous correction (f') added to constant term + /// + /// Handles isotropic B-factor case with radial density sampling and anisotropic case + /// with box-based sampling respecting the anisotropic U-tensor. template void do_add_atom_density_to_grid(const Atom& atom, const Coef& coef, float addend) { #if GEMMI_COUNT_DC @@ -162,6 +237,11 @@ struct DensityCalculator { } } + /// @brief Clear grid data and allocate size based on d_min and rate, or use existing grid. + /// @throws Fails if d_min not set and grid has no existing dimensions + /// + /// Sets grid size for FFT-friendly dimensions if d_min > 0. + /// Otherwise assumes grid already configured by user (via setup_from). void initialize_grid() { grid.data.clear(); double spacing = requested_grid_spacing(); @@ -174,6 +254,11 @@ struct DensityCalculator { fail("initialize_grid(): d_min is not set"); } + /// @brief Add electron density contributions from all atoms in a model. + /// @param model Atomic model with chains, residues, atoms + /// + /// Iterates through all atoms and calls add_atom_density_to_grid() for each. + /// Grid must already be initialized. void add_model_density_to_grid(const Model& model) { grid.check_not_empty(); for (const Chain& chain : model.chains) @@ -182,22 +267,37 @@ struct DensityCalculator { add_atom_density_to_grid(atom); } + /// @brief Initialize grid and add all atom densities from a model. + /// @param model Atomic model + /// + /// Calls initialize_grid(), add_model_density_to_grid(), and symmetrize_sum(). void put_model_density_on_grid(const Model& model) { initialize_grid(); add_model_density_to_grid(model); grid.symmetrize_sum(); } - // deprecated, use directly grid.setup_from(st) + /// @brief Set grid unit cell and space group from a structure. + /// @param st Structure providing unit cell and space group + /// @deprecated Use grid.setup_from(st) directly void set_grid_cell_and_spacegroup(const Structure& st) { grid.setup_from(st); } - // The argument is 1/d^2 - as outputted by unit_cell.calculate_1_d2(hkl). + /// @brief Compute reciprocal-space multiplier for blur correction factor. + /// @param inv_d2 Inverse d-spacing squared (1/d^2) from unit_cell.calculate_1_d2(hkl) + /// @return Exponential factor: exp(blur * 0.25 * inv_d2) + /// + /// If blur was applied, multiply structure factors by this to correct for the applied blur. double reciprocal_space_multiplier(double inv_d2) const { return std::exp(blur * 0.25 * inv_d2); } + /// @brief Compute the Mott-Bethe factor for electron scattering. + /// @param hkl Miller indices + /// @return Mott-Bethe correction factor (optionally scaled by reciprocal_space_multiplier if blur > 0) + /// + /// For electron scattering, the Mott-Bethe factor approximates the Coulomb potential contribution. double mott_bethe_factor(const Miller& hkl) const { double inv_d2 = grid.unit_cell.calculate_1_d2(hkl); double factor = -mott_bethe_const() / inv_d2; diff --git a/include/gemmi/floodfill.hpp b/include/gemmi/floodfill.hpp index 8594f40c1..eca9d309a 100644 --- a/include/gemmi/floodfill.hpp +++ b/include/gemmi/floodfill.hpp @@ -1,3 +1,11 @@ +/// @file floodfill.hpp +/// @brief Flood fill algorithm for identifying connected regions in 3D grids. +/// +/// Implements a scanline-based flood fill for marking connected regions in a grid +/// with periodic boundary conditions and 6-way (face) connectivity. +/// Useful for identifying solvent envelopes, protein/ligand masks, or other +/// binary segmentation tasks on crystallographic maps. + // Copyright 2020 Global Phasing Ltd. // // The flood fill (scanline fill) algorithm for Grid. @@ -11,19 +19,32 @@ namespace gemmi { -// Land is either 0 (when 1=sea, 0=land) or 1 (when 0=sea, 1=land) +/// @brief Flood fill algorithm for finding connected regions in a 3D grid. +/// @tparam T Grid data type (typically int8_t for binary masks) +/// @tparam Land Value marking the region to fill (0 or 1); fills connected regions with this value +/// +/// Implements a scanline-based flood fill that respects periodic boundary conditions +/// and uses 6-way (face) connectivity. The algorithm identifies all grid points +/// with value @p Land that are connected to a seed point. template struct FloodFill { + /// @brief Reference to the grid being processed Grid& mask; + /// @brief Horizontal line segment within a flood fill region struct Line { - int u, v, w, ulen; - T* ptr; + int u; ///< Starting u (x) coordinate + int v; ///< v (y) coordinate + int w; ///< w (z) coordinate + int ulen; ///< Length of the line in u direction + T* ptr; ///< Pointer to first data element of the line }; + /// @brief Result of a flood fill operation: all connected line segments struct Result { - std::vector lines; + std::vector lines; ///< All horizontal lines composing the filled region + /// @brief Total number of points in the filled region size_t point_count() const { size_t n = 0; for (const Line& line : lines) @@ -32,8 +53,14 @@ struct FloodFill { } }; + /// @brief Internal marker value (combines Land with a flag bit) static constexpr T this_island() { return T(Land|2); } + /// @brief Mark all points in a line with a given value, handling periodic wraparound. + /// @param line Line segment to mark + /// @param value Value to write + /// + /// Handles periodic boundary conditions in the u-direction for lines that wrap around. void set_line_values(Line& line, T value) const { for (int i = 0; i < std::min(line.ulen, mask.nu - line.u); ++i) { assert(line.ptr[i] != value); @@ -45,12 +72,22 @@ struct FloodFill { } } + /// @brief Mark all lines in a filled region with a given value. + /// @param r Result containing all lines of the region + /// @param value Value to write to each line void set_volume_values(Result& r, T value) const { for (Line& line : r.lines) set_line_values(line, value); } - // Find all connected points with value Land. Change them to this_island(). + /// @brief Find all grid points with value Land connected to a seed point via face neighbors. + /// @param u Starting u (x) coordinate + /// @param v Starting v (y) coordinate + /// @param w Starting w (z) coordinate + /// @return Result containing all connected line segments; points are temporarily marked with this_island() + /// + /// Uses a scanline algorithm that expands from the seed point to find all connected regions. + /// Respects periodic boundary conditions and 6-way (face) connectivity. Result find_all_connected_points(int u, int v, int w) { Result r; T* ptr = &mask.data[mask.index_q(u, v, w)]; @@ -79,6 +116,12 @@ struct FloodFill { return r; } + /// @brief Iterate over all connected regions (islands) in the grid, calling a function on each. + /// @tparam Func Callable taking a Result (one island) + /// @param func Function invoked once per connected region + /// + /// Scans the entire grid, identifying connected regions with value Land. + /// After processing all regions, resets marked points back to Land. template void for_each_islands(Func func) { size_t idx = 0; @@ -98,6 +141,12 @@ struct FloodFill { } private: + /// @brief Internal: expand from a line to find connected lines in neighboring v and w planes. + /// @param u Starting u coordinate + /// @param v Starting v coordinate + /// @param w Starting w coordinate + /// @param ulen Length to check + /// @param r Result accumulating all connected lines void add_lines(int u, int v, int w, int ulen, Result& r) { T* ptr = &mask.data[mask.index_q(u, v, w)]; for (int i = 0; i < std::min(ulen, mask.nu - u); ++i) @@ -112,6 +161,15 @@ struct FloodFill { } } + /// @brief Internal: extract the full horizontal line containing a point. + /// @param u Starting u coordinate + /// @param v Starting v coordinate + /// @param w Starting w coordinate + /// @param ptr Pointer to grid data at (u, v, w) + /// @return Line segment with full extent in u direction, handling periodic wraparound + /// + /// Finds the maximal horizontal line of Land values containing the point, + /// handling wraparound in the periodic u-direction. Line line_from_point(int u, int v, int w, T* ptr) const { int len = 1; while (u + len < mask.nu && ptr[len] == Land) @@ -131,6 +189,13 @@ struct FloodFill { } }; +/// @brief Create a binary mask of grid points above (or below) a density threshold. +/// @param mask Output binary mask grid (1 = above threshold, 0 = below) +/// @param grid Input electron density or other continuous-valued grid +/// @param threshold Density threshold value +/// @param negate If true, invert the comparison (below threshold -> 1) +/// +/// Copies grid metadata to mask and sets mask data to 1 where grid > threshold (or < threshold if negate). inline void mask_nodes_above_threshold(Grid& mask, const Grid& grid, double threshold, bool negate=false) { mask.copy_metadata_from(grid); @@ -140,6 +205,17 @@ inline void mask_nodes_above_threshold(Grid& mask, const Grid threshold); } +/// @brief Identify connected regions above (or below) a threshold using flood fill starting from seeds. +/// @param grid Input electron density or other continuous-valued grid +/// @param seeds Vector of seed positions (Angstrom coordinates) to start the flood fill +/// @param threshold Density threshold; regions above this value are filled +/// @param negate If true, find regions below threshold instead +/// @return Binary mask grid (1 = in connected region from seeds, 0 = background) +/// +/// Creates a mask where 1 indicates points in regions connected to the seed points +/// and satisfying the threshold criterion. Useful for identifying solvent envelopes +/// (seed from solvent, find connected regions above threshold) or protein masks +/// (seed from protein region, find connected regions). inline Grid flood_fill_above(const Grid& grid, const std::vector& seeds, double threshold, diff --git a/include/gemmi/fourier.hpp b/include/gemmi/fourier.hpp index 31a441051..ef77b4970 100644 --- a/include/gemmi/fourier.hpp +++ b/include/gemmi/fourier.hpp @@ -1,3 +1,11 @@ +/// @file fourier.hpp +/// @brief Fourier transforms for converting between real-space electron density maps and reciprocal-space structure factors. +/// +/// This header provides functions to convert between real-space grids (electron density maps) and +/// reciprocal-space grids (structure factors with amplitudes and phases). +/// Uses the pocketfft library for 3D FFT operations and handles symmetry operations, +/// Friedel mate reconstruction, and both half-l (Hermitian) and full reciprocal-space representations. + // Copyright 2019 Global Phasing Ltd. // // Fourier transform applied to map coefficients. @@ -20,10 +28,11 @@ namespace gemmi { -// Returns angle in [0, 360). When it's negative but approximately zero, -// printing 0.00 than 360.00 might be better, hence the second arg. -// The default value is for similar precision as float around 360: -// std::nextafter(360.f, 361.f) - 360.f -> 3.052e-5 +/// @brief Convert complex number phase to angle in degrees [0, 360). +/// @tparam T Scalar type of complex number (float or double) +/// @param v Complex number +/// @param eps Threshold for treating negative angles as zero (default 2e-5 for float precision) +/// @return Phase angle in degrees; small negative angles are rounded to 0 instead of 360 template double phase_in_angles(const std::complex& v, double eps=2e-5) { double angle = gemmi::deg(std::arg(v)); @@ -32,7 +41,11 @@ double phase_in_angles(const std::complex& v, double eps=2e-5) { return std::max(0., angle); // the order matters when angle is -0.0 } -// the first arg is usually Mtz::data +/// @brief Append Miller indices, amplitudes, and phases from asymmetric unit to a flat vector. +/// @param float_data Output vector (usually Mtz::data); elements appended as [h, k, l, F, phi, ...] +/// @param asu_data ASU data with complex structure factors +/// +/// For each reflection in @p asu_data, appends 5 floats: h, k, l (as floats), |F|, and phase(degrees). inline void add_asu_f_phi_to_float_vector(std::vector& float_data, const AsuData>& asu_data) { float_data.reserve(float_data.size() + asu_data.v.size() * 5); @@ -44,6 +57,12 @@ inline void add_asu_f_phi_to_float_vector(std::vector& float_data, } } +/// @brief Calculate grid size needed to accommodate Miller indices from diffraction data. +/// @tparam DataProxy Type with unit_cell(), spacegroup(), and get_hkl() methods +/// @param data Diffraction data (MTZ-like) +/// @param min_size Minimum grid size [nu, nv, nw] +/// @param sample_rate Desired sampling rate (1/Angstrom per voxel); if <= 0, only checks Miller range +/// @return Grid dimensions adjusted for largest observed Miller indices and rounding to FFT-friendly factors template std::array get_size_for_hkl(const DataProxy& data, std::array min_size, @@ -73,6 +92,11 @@ std::array get_size_for_hkl(const DataProxy& data, return good_grid_size(dsize, GridSizeRounding::Up, data.spacegroup()); } +/// @brief Check if all Miller indices in data fit within grid dimensions. +/// @tparam DataProxy Type with get_hkl() and size() methods +/// @param data Diffraction data +/// @param size Grid dimensions [nu, nv, nw] +/// @return True if all Miller indices (h, k, l) satisfy 2*|hkl[i]| < size[i] template bool data_fits_into(const DataProxy& data, std::array size) { for (size_t i = 0; i < data.size(); i += data.stride()) { @@ -84,6 +108,12 @@ bool data_fits_into(const DataProxy& data, std::array size) { return true; } +/// @brief Reconstruct Friedel mates (complex conjugates) from half of reciprocal space. +/// @tparam T Data type (typically complex or complex) +/// @param grid Reciprocal-space grid; missing Friedel-related reflections are filled in +/// +/// For non-centrosymmetric structures, if F(h,k,l) is known but F(-h,-k,-l) is missing, +/// fills in the conjugate. Respects grid axis order and half_l mode. template void add_friedel_mates(ReciprocalGrid& grid) { const T default_val = T(); // initialized to 0 or 0+0i @@ -128,6 +158,14 @@ void add_friedel_mates(ReciprocalGrid& grid) { } } +/// @brief Initialize a reciprocal-space grid with cell, space group, and dimension info. +/// @tparam T Data type (typically complex or complex) +/// @tparam DataProxy Type with unit_cell(), spacegroup() methods +/// @param grid Output reciprocal grid +/// @param data Source providing unit cell and space group +/// @param size Target grid dimensions [nu, nv, nw] (may be modified for half_l) +/// @param half_l If true, store only l>=0; grid nw becomes nw/2+1 +/// @param axis_order XYZ (h,k,l) or ZYX ordering for grid axes template void initialize_hkl_grid(ReciprocalGrid& grid, const DataProxy& data, std::array size, bool half_l, @@ -148,15 +186,26 @@ void initialize_hkl_grid(ReciprocalGrid& grid, const DataProxy& data, grid.set_size_without_checking(size[0], size[1], size[2]); } +/// @brief Adapter to extract F (amplitude) and phi (phase) columns from diffraction data. +/// @tparam DataProxy Base data proxy type (typically MTZ-like with stride and columns) +/// +/// Wraps a data proxy to present only F and phi columns, with phi optionally defaulting to 0. template struct FPhiProxy : DataProxy { + /// @brief Initialize proxy with column indices. + /// @param data_proxy Source data + /// @param f_col Column offset for amplitude F + /// @param phi_col Column offset for phase (degrees); -1 means use 0 + /// @throws Fails if column indices are out of range FPhiProxy(const DataProxy& data_proxy, size_t f_col, size_t phi_col) : DataProxy(data_proxy), f_col_(f_col), phi_col_(phi_col) { if (f_col >= data_proxy.stride() || (phi_col >= data_proxy.stride() && phi_col != size_t(-1))) fail("Map coefficients not found."); } using real = typename DataProxy::num_type; + /// @brief Get amplitude F at given offset. real get_f(size_t offset) const { return this->get_num(offset + f_col_); } + /// @brief Get phase in radians at given offset; returns 0 if phi column not set. double get_phi(size_t offset) const { return phi_col_ != (size_t)-1 ? rad(this->get_num(offset + phi_col_)) : 0; } @@ -164,8 +213,14 @@ struct FPhiProxy : DataProxy { size_t f_col_, phi_col_; }; -// If half_l is true, grid has only data with l>=0. -// Parameter size can be obtained from get_size_for_hkl(). +/// @brief Populate a reciprocal-space grid with complex structure factors from F,phi data. +/// @tparam T Data type (typically complex or complex) +/// @tparam FPhi Type providing get_f(), get_phi(), and HKL-related methods +/// @param fphi F/phi data (typically MTZ or FPhiProxy) +/// @param size Grid dimensions (from get_size_for_hkl()); will be adjusted for half_l +/// @param half_l If true, store only l>=0; applies Friedel symmetry to l<0 +/// @param axis_order XYZ or ZYX grid axis ordering (default XYZ) +/// @return Grid with complex structure factors placed on HKL grid, symmetry-expanded template FPhiGrid get_f_phi_on_grid(const FPhi& fphi, std::array size, bool half_l, @@ -200,6 +255,15 @@ FPhiGrid get_f_phi_on_grid(const FPhi& fphi, return grid; } +/// @brief Populate a reciprocal-space grid with real-valued data from a single column. +/// @tparam T Data type (typically float or double) +/// @tparam DataProxy Type with get_num() and HKL-related methods +/// @param data Diffraction data (MTZ-like) +/// @param column Column index (offset) to extract values +/// @param size Grid dimensions; adjusted for half_l +/// @param half_l If true, use only l>=0; applies Friedel symmetry to l<0 +/// @param axis_order XYZ or ZYX grid axis ordering +/// @return Grid with real values placed on HKL grid, symmetry-expanded template ReciprocalGrid get_value_on_grid(const DataProxy& data, size_t column, std::array size, bool half_l, @@ -234,6 +298,13 @@ ReciprocalGrid get_value_on_grid(const DataProxy& data, size_t column, } +/// @brief Convert a reciprocal-space grid to a real-space electron density map via inverse FFT. +/// @tparam T Scalar type (typically float or double) +/// @param hkl Reciprocal-space (F, phi) grid; modified in-place (complex conjugated and FFT'd) +/// @param map Output real-space map; size and metadata set from hkl +/// +/// Applies inverse 3D FFT using pocketfft. Handles half-l mode (Hermitian symmetry) and +/// both axis orders (XYZ and ZYX). Negates imaginary parts and normalizes by cell volume. template void transform_f_phi_grid_to_map_(FPhiGrid&& hkl, Grid& map) { // NaNs are not good for FFT, so we change them to 0. @@ -282,6 +353,10 @@ void transform_f_phi_grid_to_map_(FPhiGrid&& hkl, Grid& map) { } } +/// @brief Convert a reciprocal-space grid to a real-space map via inverse FFT. +/// @tparam T Scalar type (float or double) +/// @param hkl Reciprocal-space grid (consumed) +/// @return Real-space map with same unit cell, space group, and axis order as input template Grid transform_f_phi_grid_to_map(FPhiGrid&& hkl) { Grid map; @@ -289,6 +364,15 @@ Grid transform_f_phi_grid_to_map(FPhiGrid&& hkl) { return map; } +/// @brief Convert diffraction data (F, phi) to a real-space electron density map. +/// @tparam T Data type for output map (float or double) +/// @tparam FPhi Type providing get_f(), get_phi(), unit_cell(), spacegroup() +/// @param fphi F/phi data +/// @param size Minimum grid dimensions [nu, nv, nw]; auto-adjusted unless exact_size=true +/// @param sample_rate Sampling rate (1/Angstrom); grid expanded if needed (ignored if exact_size) +/// @param exact_size If true, use @p size exactly without adjustment (must be FFT-friendly) +/// @param order XYZ or ZYX grid axis ordering +/// @return Real-space electron density map template Grid transform_f_phi_to_map(const FPhi& fphi, std::array size, @@ -303,6 +387,15 @@ Grid transform_f_phi_to_map(const FPhi& fphi, return transform_f_phi_grid_to_map(get_f_phi_on_grid(fphi, size, true, order)); } +/// @brief Convert F,phi to map, supporting both minimum and exact size specifications. +/// @tparam T Map data type +/// @tparam FPhi F/phi data type +/// @param fphi Diffraction data with F and phase +/// @param min_size Minimum grid dimensions [nu, nv, nw] +/// @param sample_rate Target sampling rate (1/Angstrom) +/// @param exact_size Exact grid size to use; if any element is non-zero, exact_size overrides min_size +/// @param order Grid axis order (XYZ or ZYX) +/// @return Real-space electron density map template Grid transform_f_phi_to_map2(const FPhi& fphi, std::array min_size, @@ -314,6 +407,13 @@ Grid transform_f_phi_to_map2(const FPhi& fphi, sample_rate, exact, order); } +/// @brief Convert a real-space electron density map to reciprocal-space structure factors via FFT. +/// @tparam T Scalar type (float or double) +/// @param map Real-space electron density grid (not modified) +/// @param half_l If true, store only l>=0 (Hermitian symmetry); if false, store full reciprocal space +/// @param use_scale If true, normalize by cell volume; if false, normalize by point count +/// @return Complex-valued reciprocal-space grid with structure factors F + i*0 +/// @throws Fails if half_l=true and ZYX axis order (not yet supported) template FPhiGrid transform_map_to_f_phi(const Grid& map, bool half_l, bool use_scale=true) { if (half_l && map.axis_order == AxisOrder::ZYX) diff --git a/include/gemmi/grid.hpp b/include/gemmi/grid.hpp index 38fe3d73e..c9bd76558 100644 --- a/include/gemmi/grid.hpp +++ b/include/gemmi/grid.hpp @@ -1,3 +1,10 @@ +/// @file grid.hpp +/// @brief 3D crystallographic grids for electron density maps, cell-method search, and reflection data. +/// +/// This header provides template classes for regular 3D grids on a crystallographic unit cell, +/// with support for symmetry operations, interpolation (trilinear and tricubic), +/// and operations on grid points within specified regions or radii. + // Copyright 2017 Global Phasing Ltd. // // 3d grids used by CCP4 maps, cell-method search and hkl data. @@ -19,21 +26,29 @@ namespace gemmi { -/// Order of grid axis. Some Grid functionality works only with the XYZ order. -/// The values XYZ and ZYX are used only when the grid covers whole unit cell. +/// @brief Order of grid axes relative to unit cell axes (a, b, c). +/// +/// Not all functionality works with all axis orders; many operations require XYZ order. +/// The values XYZ and ZYX are used only when the grid covers the whole unit cell. enum class AxisOrder : unsigned char { - Unknown, - XYZ, // default, corresponds to CCP4 map with axis order XYZ, - // i.e. index X (H in reciprocal space) is fast and Z (or L) is slow - ZYX // fast Z (or L), may not be fully supported everywhere + Unknown, ///< Axis order not determined + XYZ, ///< Grid axes correspond to a, b, c (default, CCP4 convention). + ///< Index X (H in reciprocal space) varies fastest; Z (or L) varies slowest. + ZYX ///< Grid axes reversed: Z varies fastest, X varies slowest. + ///< May not be fully supported everywhere. }; +/// @brief Strategy for rounding calculated grid dimensions to suitable values. enum class GridSizeRounding { - Nearest, - Up, - Down + Nearest, ///< Round to nearest value with small prime factors (2, 3, 5). + Up, ///< Round up (ceil) to next value with small prime factors. + Down ///< Round down (floor) to previous value with small prime factors. }; +/// @brief Compute mathematical modulo (a mod n), always returning a value in [0, n). +/// @param a value to take modulo +/// @param n modulus (n > 0) +/// @return Value in range [0, n) inline int modulo(int a, int n) { if (a >= n) a %= n; @@ -42,6 +57,9 @@ inline int modulo(int a, int n) { return a; } +/// @brief Check if n has only small prime factors (2, 3, 5). +/// @param n Integer to check +/// @return True if n = 2^a * 3^b * 5^c for non-negative a, b, c inline bool has_small_factorization(int n) { while (n % 2 == 0) n /= 2; @@ -51,6 +69,12 @@ inline bool has_small_factorization(int n) { return n == 1 || n == -1; } +/// @brief Round a value to the nearest integer with only small prime factors. +/// +/// Useful for choosing FFT-friendly grid dimensions. +/// @param exact Exact floating-point value +/// @param rounding Rounding strategy (Up, Down, or Nearest) +/// @return Integer >= 1 with only 2, 3, 5 as prime factors, closest to @p exact per the strategy inline int round_with_small_factorization(double exact, GridSizeRounding rounding) { int n; if (rounding == GridSizeRounding::Up) { @@ -75,6 +99,14 @@ inline int round_with_small_factorization(double exact, GridSizeRounding roundin return n; } +/// @brief Compute suitable grid dimensions respecting space group symmetry and FFT efficiency. +/// +/// Takes into account space group symmetry factors and symmetry-related directions +/// (which must have equal grid sizes), and rounds to dimensions with small prime factors. +/// @param limit Target grid dimensions (approximate) +/// @param rounding Rounding strategy for each dimension +/// @param sg Space group constraints (may be null for P1) +/// @return Array of three grid dimensions {nu, nv, nw} inline std::array good_grid_size(std::array limit, GridSizeRounding rounding, const SpaceGroup* sg) { @@ -113,9 +145,18 @@ inline std::array good_grid_size(std::array limit, return m; } +/// @brief A crystallographic symmetry operation scaled for grid coordinates. +/// +/// The scaled operation directly transforms grid indices by applying the +/// rotation matrix and scaled translation (pre-scaled by grid dimensions). struct GridOp { - Op scaled_op; + Op scaled_op; ///< Crystallographic operation with translation scaled to grid coordinates + /// @brief Apply the operation to grid indices. + /// @param u Grid index along first axis + /// @param v Grid index along second axis + /// @param w Grid index along third axis + /// @return Transformed grid indices {u', v', w'} std::array apply(int u, int v, int w) const { std::array t; const Op::Rot& rot = scaled_op.rot; @@ -125,6 +166,13 @@ struct GridOp { } }; +/// @brief Verify that grid dimensions are compatible with space group symmetry. +/// +/// Checks that each dimension is divisible by the corresponding space group factor, +/// and that symmetry-related directions have equal grid sizes. +/// @param sg Space group to validate against (may be null for P1) +/// @param size Grid dimensions {nu, nv, nw} +/// @throws Raises an exception if constraints are violated inline void check_grid_factors(const SpaceGroup* sg, std::array size) { if (sg) { GroupOps gops = sg->operations(); @@ -139,17 +187,35 @@ inline void check_grid_factors(const SpaceGroup* sg, std::array size) { } } +/// @brief Linear interpolation (lerp) between two values. +/// @param a Start value +/// @param b End value +/// @param t Interpolation parameter in [0, 1]; 0 returns a, 1 returns b +/// @return Interpolated value a + (b - a) * t inline double lerp_(double a, double b, double t) { return a + (b - a) * t; } + +/// @brief Linear interpolation for complex numbers. template std::complex lerp_(std::complex a, std::complex b, double t) { return a + (b - a) * (T) t; } -/// Catmull–Rom spline interpolation. CINT_u from: -/// https://en.wikipedia.org/wiki/Cubic_Hermite_spline -/// The same as (24) in https://journals.iucr.org/d/issues/2018/06/00/ic5103/ +/// @brief Catmull–Rom cubic spline interpolation. +/// @details Interpolates a cubic between points b and c given neighboring points a and d. +/// Uses the Catmull–Rom formula (equation 24 in the reference below). +/// @param u Parameter in [0, 1], where 0 → b, 1 → c +/// @param a Value at u = -1 +/// @param b Value at u = 0 +/// @param c Value at u = 1 +/// @param d Value at u = 2 +/// @return Interpolated value +/// @par References +/// Afonine, P.V., Poon, B.K., Read, R.J., Sobolev, O.V., Terwilliger, T.C., +/// Urzhumtsev, A. & Adams, P.D. (2018). Real-space refinement in PHENIX for +/// cryo-EM and crystallography. Acta Cryst. D74, 531–544. +/// https://doi.org/10.1107/S2059798318006551 inline double cubic_interpolation(double u, double a, double b, double c, double d) { //return 0.5 * u * (u * (u * (3*b - 3*c + d - a) + (2*a - 5*b + 4*c - d)) + (c - a)) + b; // equivalent form that is faster on my computer: @@ -157,30 +223,61 @@ inline double cubic_interpolation(double u, double a, double b, double c, double u * (c * ((3*u - 4) * u - 1) - d * (u-1) * u)); } -/// df/du (from Wolfram Alpha) +/// @brief First derivative of cubic spline interpolation. +/// +/// Computes df/du for Catmull–Rom interpolation. +/// @param u Parameter in [0, 1] +/// @param a Value at u = -1 +/// @param b Value at u = 0 +/// @param c Value at u = 1 +/// @param d Value at u = 2 +/// @return df/du at parameter u inline double cubic_interpolation_der(double u, double a, double b, double c, double d) { return a * (-1.5*u*u + 2*u - 0.5) + c * (-4.5*u*u + 4*u + 0.5) + u * (4.5*b*u - 5*b + 1.5*d*u - d); } -/// The base of Grid classes that does not depend on stored data type. +/// @brief Metadata common to all grid types (not dependent on stored data type). +/// +/// Contains unit cell, space group, grid dimensions, and indexing operations. +/// Grid indices u, v, w are in the range [0, nu), [0, nv), [0, nw) for valid grids. struct GridMeta { - UnitCell unit_cell; - const SpaceGroup* spacegroup = nullptr; - int nu = 0, nv = 0, nw = 0; - AxisOrder axis_order = AxisOrder::Unknown; + UnitCell unit_cell; ///< Unit cell of the crystal + const SpaceGroup* spacegroup; ///< Space group (may be nullptr for P1) + int nu = 0, nv = 0, nw = 0; ///< Grid dimensions + AxisOrder axis_order = AxisOrder::Unknown; ///< Grid axis correspondence to a, b, c + /// @brief Total number of grid points. + /// @return nu * nv * nw size_t point_count() const { return (size_t)nu * nv * nw; } - /// u,v,w are not normalized here + + /// @brief Convert grid indices to fractional coordinates. + /// + /// @param u Grid index (not normalized to [0, 1)) + /// @param v Grid index (not normalized to [0, 1)) + /// @param w Grid index (not normalized to [0, 1)) + /// @return Fractional coordinates {u/nu, v/nv, w/nw} Fractional get_fractional(int u, int v, int w) const { return {u * (1.0 / nu), v * (1.0 / nv), w * (1.0 / nw)}; } + + /// @brief Convert grid indices to orthogonal (Cartesian) coordinates. + /// @param u Grid index + /// @param v Grid index + /// @param w Grid index + /// @return Orthogonal position in Angstroms Position get_position(int u, int v, int w) const { return unit_cell.orthogonalize(get_fractional(u, v, w)); } - // operations re-scaled for faster later calculations; identity not included + /// @brief Get symmetry operations scaled to grid coordinates (excluding identity). + /// + /// Returns all non-identity symmetry operations with translations and rotations + /// pre-scaled for direct application to grid indices. Used internally for + /// symmetrization operations. Requires axis_order == AxisOrder::XYZ. + /// @return Vector of GridOp, empty if space group is P1 + /// @throws Raises exception if grid is not in XYZ order std::vector get_scaled_ops_except_id() const { std::vector grid_ops; if (!spacegroup || spacegroup->number == 1) @@ -206,18 +303,39 @@ struct GridMeta { return grid_ops; } - /// Quick(est) index function, but works only if `0 <= u < nu`, etc. + /// @brief Quick index computation: fastest but requires 0 <= u < nu, etc. + /// + /// No bounds checking. Data layout is row-major: (w*nv + v)*nu + u. + /// @param u Grid index (must be in [0, nu)) + /// @param v Grid index (must be in [0, nv)) + /// @param w Grid index (must be in [0, nw)) + /// @return Index into flat data array size_t index_q(int u, int v, int w) const { return size_t(w * nv + v) * nu + u; } + + /// @brief Quick index computation for unsigned indices. size_t index_q(size_t u, size_t v, size_t w) const { return (w * nv + v) * nu + u; } - /// Faster than index_s(), but works only if `-nu <= u < 2*nu`, etc. + /// @brief Index with periodic wrapping: faster than index_s() but limited range. + /// + /// Works for indices in the range [-nu, 2*nu) (and similarly for v, w). + /// Applies periodic boundary conditions (wraps to [0, n)). + /// @param u Grid index + /// @param v Grid index + /// @param w Grid index + /// @return Index into flat data array size_t index_n(int u, int v, int w) const { return index_n_ref(u, v, w); } - /// The same as index_n(), but modifies arguments. + /// @brief Index with periodic wrapping, modifying arguments in-place. + /// + /// Same as index_n() but normalizes u, v, w to [0, nu), [0, nv), [0, nw). + /// @param u Grid index (will be normalized) + /// @param v Grid index (will be normalized) + /// @param w Grid index (will be normalized) + /// @return Index into flat data array size_t index_n_ref(int& u, int& v, int& w) const { if (u >= nu) u -= nu; else if (u < 0) u += nu; if (v >= nv) v -= nv; else if (v < 0) v += nv; @@ -225,7 +343,13 @@ struct GridMeta { return this->index_q(u, v, w); } - /// Faster than index_n(), but works only if -nu <= u < nu, etc. + /// @brief Index with periodic wrapping for indices near zero. + /// + /// Optimized version of index_n() for indices in [-n, n). + /// @param u Grid index in range [-nu, nu) + /// @param v Grid index in range [-nv, nv) + /// @param w Grid index in range [-nw, nw) + /// @return Index into flat data array size_t index_near_zero(int u, int v, int w) const { return this->index_q(u >= 0 ? u : u + nu, v >= 0 ? v : v + nv, @@ -233,31 +357,55 @@ struct GridMeta { } }; -/// A common subset of Grid and ReciprocalGrid. +/// @brief Common base for Grid and ReciprocalGrid templates. +/// @tparam T Data type stored at each grid point template struct GridBase : GridMeta { - /// grid coordinates (modulo size) and a pointer to value + /// @brief A grid point with normalized indices and a value pointer. + /// + /// Indices u, v, w have been normalized to [0, nu), [0, nv), [0, nw). + /// The pointer may become invalid if the grid is resized. struct Point { - int u, v, w; - T* value; + int u, v, w; ///< Normalized grid indices + T* value; ///< Pointer to the grid value at this point }; - std::vector data; + std::vector data; ///< Flat row-major array of grid values + /// @brief Check that grid is not empty (has allocated data). + /// @throws Raises exception if data.empty() void check_not_empty() const { if (data.empty()) fail("grid is empty"); } + /// @brief Allocate and set grid dimensions without bounds checking. + /// + /// Resizes the data array to nu_ * nv_ * nw_ elements. + /// Does not validate space group compatibility. + /// @param nu_ Number of grid points along first axis + /// @param nv_ Number of grid points along second axis + /// @param nw_ Number of grid points along third axis void set_size_without_checking(int nu_, int nv_, int nw_) { nu = nu_, nv = nv_, nw = nw_; data.resize((size_t)nu_ * nv_ * nw_); } + /// @brief Get value at grid point using quick (unsafe) index. + /// @param u Grid index (must be in [0, nu)) + /// @param v Grid index (must be in [0, nv)) + /// @param w Grid index (must be in [0, nw)) + /// @return Value at (u, v, w) T get_value_q(int u, int v, int w) const { return data[index_q(u, v, w)]; } + /// @brief Convert a Point to its index in the data array. + /// @param p Point with value pointer + /// @return Index into data array size_t point_to_index(const Point& p) const { return p.value - data.data(); } + /// @brief Convert a data array index to a Point with normalized indices. + /// @param idx Index into data array + /// @return Point with indices and value pointer Point index_to_point(size_t idx) { auto d1 = std::div((ptrdiff_t)idx, (ptrdiff_t)nu); auto d2 = std::div(d1.quot, (ptrdiff_t)nv); @@ -268,22 +416,32 @@ struct GridBase : GridMeta { return {u, v, w, &data.at(idx)}; } + /// @brief Resize grid and fill all points with a constant value. + /// @param value Value to assign to all grid points void fill(T value) { data.resize(point_count()); std::fill(data.begin(), data.end(), value); } + /// @brief Sum type used for accumulating values (ptrdiff_t for integers, T for floats). using Tsum = typename std::conditional::value, std::ptrdiff_t, T>::type; + /// @brief Sum all grid values. + /// @return Sum of all data points Tsum sum() const { return std::accumulate(data.begin(), data.end(), Tsum()); } + /// @brief Iterator over grid points in row-major order (u varies fastest). struct iterator { - GridBase& parent; - size_t index; - int u = 0, v = 0, w = 0; + GridBase& parent; ///< Reference to parent grid + size_t index; ///< Current position in data array + int u = 0, v = 0, w = 0; ///< Current grid coordinates + + /// @brief Construct an iterator at a specific index. iterator(GridBase& parent_, size_t index_) : parent(parent_), index(index_) {} + + /// @brief Pre-increment to next grid point. iterator& operator++() { ++index; if (++u == parent.nu) { @@ -295,19 +453,32 @@ struct GridBase : GridMeta { } return *this; } + + /// @brief Dereference to a Point at current position. typename GridBase::Point operator*() { return {u, v, w, &parent.data[index]}; } + + /// @brief Equality comparison. bool operator==(const iterator &o) const { return index == o.index; } + + /// @brief Inequality comparison. bool operator!=(const iterator &o) const { return index != o.index; } }; + + /// @brief Begin iterator (first grid point). iterator begin() { return {*this, 0}; } + + /// @brief End iterator (one past last grid point). iterator end() { return {*this, data.size()}; } }; -/// Real-space grid. -/// For simplicity, some operations work only if the grid covers whole unit cell -/// and axes u,v,w correspond to a,b,c in the unit cell. +/// @brief Real-space crystallographic grid (electron density, masks, etc.). +/// @tparam T Data type for grid values (default: float) +/// +/// A 3D grid covering the unit cell at regular fractional intervals. +/// Grid points are at fractional coordinates (i/nu, j/nv, k/nw). +/// Many operations require AxisOrder::XYZ and covering the full unit cell. template struct Grid : GridBase { using Point = typename GridBase::Point; @@ -318,12 +489,24 @@ struct Grid : GridBase { using GridBase::spacegroup; using GridBase::data; - /// spacing between virtual planes, not between points + /// @brief Spacing (in Angstroms) between consecutive grid planes. + /// + /// spacing[0] is distance between u planes, etc. + /// Note: spacing is between planes, not between grid points. + /// Computed as 1/(n*cell_length) for each axis. double spacing[3] = {0., 0., 0.}; - /// unit_cell.orth.mat columns divided by nu, nv, nw + + /// @brief Orthogonalization matrix scaled by grid dimensions. + /// + /// Each column of unit_cell.orth.mat divided by {nu, nv, nw}. + /// Used for efficient conversion of fractional deltas to orthogonal. UpperTriangularMat33 orth_n; - /// copy unit_cell, spacegroup, nu, nv, nw, axis_order and set spacing + /// @brief Copy grid metadata (cell, space group, dimensions, axis order). + /// + /// Copies unit_cell, spacegroup, nu, nv, nw, axis_order from another GridMeta, + /// and recalculates spacing and orth_n. + /// @param g Source GridMeta void copy_metadata_from(const GridMeta& g) { unit_cell = g.unit_cell; spacegroup = g.spacegroup; @@ -334,7 +517,11 @@ struct Grid : GridBase { calculate_spacing(); } - /// set #spacing and #orth_n + /// @brief Compute spacing and scaled orthogonalization matrix. + /// + /// Recalculates #spacing and #orth_n based on unit cell and grid dimensions. + /// Must be called after changing unit_cell or grid dimensions. + /// @throws Raises exception if unit cell is not in standard orientation void calculate_spacing() { spacing[0] = 1.0 / (nu * unit_cell.ar); spacing[1] = 1.0 / (nv * unit_cell.br); @@ -345,17 +532,31 @@ struct Grid : GridBase { fail("Grids work only with the standard orientation of crystal frame (SCALEn)"); } + /// @brief Set grid dimensions and recalculate spacing (no space group check). + /// @param nu_ Dimension along first axis + /// @param nv_ Dimension along second axis + /// @param nw_ Dimension along third axis void set_size_without_checking(int nu_, int nv_, int nw_) { GridBase::set_size_without_checking(nu_, nv_, nw_); calculate_spacing(); this->axis_order = AxisOrder::XYZ; } + /// @brief Set grid dimensions with space group compatibility check. + /// @param nu_ Dimension along first axis + /// @param nv_ Dimension along second axis + /// @param nw_ Dimension along third axis + /// @throws Raises exception if dimensions incompatible with space group void set_size(int nu_, int nv_, int nw_) { check_grid_factors(spacegroup, {{nu_, nv_, nw_}}); set_size_without_checking(nu_, nv_, nw_); } + /// @brief Calculate grid dimensions from desired spacing. + /// + /// Chooses dimensions with small prime factors (2, 3, 5) compatible with space group. + /// @param approx_spacing Target spacing in Angstroms + /// @param rounding Strategy for choosing nearby dimensions void set_size_from_spacing(double approx_spacing, GridSizeRounding rounding) { std::array limit = {{unit_cell.a / approx_spacing, unit_cell.b / approx_spacing, @@ -364,17 +565,33 @@ struct Grid : GridBase { set_size_without_checking(m[0], m[1], m[2]); } + /// @brief Set unit cell parameters from individual values. + /// @param a Cell length (Angstroms) + /// @param b Cell length (Angstroms) + /// @param c Cell length (Angstroms) + /// @param alpha Angle (degrees) + /// @param beta Angle (degrees) + /// @param gamma Angle (degrees) void set_unit_cell(double a, double b, double c, double alpha, double beta, double gamma) { unit_cell.set(a, b, c, alpha, beta, gamma); calculate_spacing(); } + /// @brief Set unit cell. + /// @param cell Unit cell to assign void set_unit_cell(const UnitCell& cell) { unit_cell = cell; calculate_spacing(); } + /// @brief Initialize grid from a structure (typically Model/Chain/Residue). + /// + /// Extracts space group and unit cell. If approx_spacing > 0, + /// sets grid dimensions based on spacing. + /// @tparam S Structure type with find_spacegroup() and cell members + /// @param st Structure + /// @param approx_spacing Target grid spacing in Angstroms (default: 0, no sizing) template void setup_from(const S& st, double approx_spacing=0) { spacegroup = st.find_spacegroup(); @@ -383,22 +600,43 @@ struct Grid : GridBase { set_size_from_spacing(approx_spacing, GridSizeRounding::Up); } - /// Returns index in data array for (u,v,w). Safe but slower than index_q(). + /// @brief Get index in data array with periodic wrapping. + /// + /// Safe but slower than index_q(). Applies modulo to all indices. + /// @param u Grid index (will be wrapped) + /// @param v Grid index (will be wrapped) + /// @param w Grid index (will be wrapped) + /// @return Index into data array size_t index_s(int u, int v, int w) const { this->check_not_empty(); return this->index_q(modulo(u, nu), modulo(v, nv), modulo(w, nw)); } - /// returns `data[index_s(u, v, w)]` + /// @brief Get value at grid point with periodic wrapping. + /// @param u Grid index + /// @param v Grid index + /// @param w Grid index + /// @return Grid value at the (normalized) indices T get_value(int u, int v, int w) const { return data[index_s(u, v, w)]; } + /// @brief Set value at grid point with periodic wrapping. + /// @param u Grid index + /// @param v Grid index + /// @param w Grid index + /// @param x Value to assign void set_value(int u, int v, int w, T x) { data[index_s(u, v, w)] = x; } - /// Point stores normalizes indices (not the original u,v,w). + /// @brief Get a Point at given grid indices with periodic wrapping. + /// + /// The returned Point has normalized indices u, v, w in [0, nu), [0, nv), [0, nw). + /// @param u Grid index + /// @param v Grid index + /// @param w Grid index + /// @return Point with normalized indices Point get_point(int u, int v, int w) { u = modulo(u, nu); v = modulo(v, nv); @@ -406,35 +644,64 @@ struct Grid : GridBase { return {u, v, w, &data[this->index_q(u, v, w)]}; } + /// @brief Get the grid point nearest to a fractional coordinate. + /// @param f Fractional coordinates + /// @return Point at nearest grid position + /// @throws Raises exception if grid is not in XYZ order Point get_nearest_point(const Fractional& f) { if (this->axis_order != AxisOrder::XYZ) fail("grid is not fully setup"); return get_point(iround(f.x * nu), iround(f.y * nv), iround(f.z * nw)); } + + /// @brief Get the grid point nearest to an orthogonal (Cartesian) position. + /// @param pos Orthogonal coordinates (Angstroms) + /// @return Point at nearest grid position Point get_nearest_point(const Position& pos) { return get_nearest_point(unit_cell.fractionalize(pos)); } + /// @brief Get the index of the nearest grid point to a fractional coordinate. + /// @param f Fractional coordinates + /// @return Index into data array size_t get_nearest_index(const Fractional& f) { return index_s(iround(f.x * nu), iround(f.y * nv), iround(f.z * nw)); } - /// Point stores normalized indices, so fractional coordinates are in [0,1). + /// @brief Convert a grid Point to fractional coordinates. + /// + /// The Point's indices are normalized, so coordinates are in [0, 1). + /// @param p Point with normalized indices + /// @return Fractional coordinates Fractional point_to_fractional(const Point& p) const { return this->get_fractional(p.u, p.v, p.w); } + + /// @brief Convert a grid Point to orthogonal coordinates. + /// @param p Point with normalized indices + /// @return Orthogonal position (Angstroms) Position point_to_position(const Point& p) const { return this->get_position(p.u, p.v, p.w); } + /// @brief Helper to split a real coordinate into integer and fractional parts. + /// @param x Real coordinate + /// @param n Grid dimension + /// @param iptr Output: normalized integer part in [0, n) + /// @return Fractional part in [0, 1) static double grid_modulo(double x, int n, int* iptr) { double f = std::floor(x); *iptr = modulo((int)f, n); return x - f; } - /// https://en.wikipedia.org/wiki/Trilinear_interpolation - /// x,y,z are grid coordinates (x=1.5 is between 2nd and 3rd grid point). + /// @brief Trilinear interpolation at a grid coordinate. + /// + /// Reference: https://en.wikipedia.org/wiki/Trilinear_interpolation + /// @param x Grid coordinate (x=1.5 is between 2nd and 3rd grid point). Wraps periodically. + /// @param y Grid coordinate + /// @param z Grid coordinate + /// @return Interpolated value using trilinear basis functions T trilinear_interpolation(double x, double y, double z) const { this->check_not_empty(); int u, v, w; @@ -456,15 +723,32 @@ struct Grid : GridBase { } return (T) lerp_(avg[0], avg[1], zd); } + /// @brief Trilinear interpolation at fractional coordinates. + /// @param fctr Fractional coordinates + /// @return Interpolated value T trilinear_interpolation(const Fractional& fctr) const { return trilinear_interpolation(fctr.x * nu, fctr.y * nv, fctr.z * nw); } + + /// @brief Trilinear interpolation at orthogonal coordinates. + /// @param ctr Orthogonal coordinates (Angstroms) + /// @return Interpolated value T trilinear_interpolation(const Position& ctr) const { return trilinear_interpolation(unit_cell.fractionalize(ctr)); } - /// https://en.wikipedia.org/wiki/Tricubic_interpolation - /// x,y,z are grid coordinates (x=1.5 is between 2nd and 3rd grid point). + /// @brief Tricubic interpolation at a grid coordinate. + /// @details Uses Catmull–Rom cubic splines applied as a tensor product in three dimensions. + /// Smoother than trilinear but more expensive. See cubic_interpolation() for the 1D formula. + /// @param x Grid coordinate (x=1.5 is between 2nd and 3rd grid point). Wraps periodically. + /// @param y Grid coordinate + /// @param z Grid coordinate + /// @return Interpolated value (double precision) + /// @par References + /// Afonine, P.V., Poon, B.K., Read, R.J., Sobolev, O.V., Terwilliger, T.C., + /// Urzhumtsev, A. & Adams, P.D. (2018). Real-space refinement in PHENIX for + /// cryo-EM and crystallography. Acta Cryst. D74, 531–544. + /// https://doi.org/10.1107/S2059798318006551 double tricubic_interpolation(double x, double y, double z) const { std::array,4>,4> copy; copy_4x4x4(x, y, z, copy); @@ -477,13 +761,27 @@ struct Grid : GridBase { } return cubic_interpolation(x, b[0], b[1], b[2], b[3]); } + /// @brief Tricubic interpolation at fractional coordinates. + /// @param fctr Fractional coordinates + /// @return Interpolated value double tricubic_interpolation(const Fractional& fctr) const { return tricubic_interpolation(fctr.x * nu, fctr.y * nv, fctr.z * nw); } + + /// @brief Tricubic interpolation at orthogonal coordinates. + /// @param ctr Orthogonal coordinates (Angstroms) + /// @return Interpolated value double tricubic_interpolation(const Position& ctr) const { return tricubic_interpolation(unit_cell.fractionalize(ctr)); } - /// returns the same as above + derivatives df/dx, df/dy, df/dz + + /// @brief Tricubic interpolation with first derivatives. + /// + /// Returns the interpolated value and partial derivatives. + /// @param x Grid coordinate + /// @param y Grid coordinate + /// @param z Grid coordinate + /// @return Array {value, df/dx, df/dy, df/dz} in grid coordinates std::array tricubic_interpolation_der(double x, double y, double z) const { std::array,4>,4> copy; copy_4x4x4(x, y, z, copy); @@ -509,11 +807,16 @@ struct Grid : GridBase { ret[3] = cubic_interpolation_der(z, b[0], b[1], b[2], b[3]); return ret; } + /// @brief Tricubic interpolation with derivatives at fractional coordinates. + /// @param fctr Fractional coordinates + /// @return Array {value, df/da, df/db, df/dc} in fractional coordinates std::array tricubic_interpolation_der(const Fractional& fctr) const { auto r = tricubic_interpolation_der(fctr.x * nu, fctr.y * nv, fctr.z * nw); return {r[0], r[1] * nu, r[2] * nv, r[3] * nw}; } - /// @private + + /// @brief Helper: extract 4x4x4 block of grid values for tricubic interpolation. + /// @private Internal use only. Modifies x, y, z to fractional parts. void copy_4x4x4(double& x, double& y, double& z, std::array,4>,4>& copy) const { this->check_not_empty(); @@ -540,7 +843,11 @@ struct Grid : GridBase { copy[i][j][k] = this->get_value_q(u_indices[i], v_indices[j], w_indices[k]); } - /// @param order 0=nearest, 1=linear, 3=cubic interpolation + /// @brief Interpolate value at fractional coordinates using specified method. + /// @param f Fractional coordinates + /// @param order Interpolation order: 0 (nearest), 1 (trilinear), 3 (tricubic) + /// @return Interpolated value + /// @throws std::invalid_argument if order is not 0, 1, or 3 T interpolate_value(const Fractional& f, int order=1) const { switch (order) { case 0: return *const_cast*>(this)->get_nearest_point(f).value; @@ -549,10 +856,22 @@ struct Grid : GridBase { } throw std::invalid_argument("interpolation \"order\" must 0, 1 or 3"); } + + /// @brief Interpolate value at orthogonal coordinates using specified method. + /// @param ctr Orthogonal coordinates (Angstroms) + /// @param order Interpolation order: 0 (nearest), 1 (trilinear), 3 (tricubic) + /// @return Interpolated value T interpolate_value(const Position& ctr, int order=1) const { return interpolate_value(unit_cell.fractionalize(ctr), order); } + /// @brief Extract a rectangular subarray of grid points with periodic wrapping. + /// + /// Copies a contiguous block of grid values into a destination array. + /// Handles wrapping across periodic boundaries. + /// @param dest Destination array (at least shape[0]*shape[1]*shape[2] elements) + /// @param start Starting grid indices {u, v, w} + /// @param shape Block size {du, dv, dw} void get_subarray(T* dest, std::array start, std::array shape) const { this->check_not_empty(); if (this->axis_order != AxisOrder::XYZ) @@ -579,6 +898,13 @@ struct Grid : GridBase { } } + /// @brief Set a rectangular subarray of grid points with periodic wrapping. + /// + /// Copies a block of values from a source array into the grid. + /// Handles wrapping across periodic boundaries. + /// @param src Source array (at least shape[0]*shape[1]*shape[2] elements) + /// @param start Starting grid indices {u, v, w} + /// @param shape Block size {du, dv, dw} void set_subarray(const T* src, std::array start, std::array shape) { this->check_not_empty(); if (this->axis_order != AxisOrder::XYZ) @@ -605,6 +931,12 @@ struct Grid : GridBase { } } + /// @brief Validate and adjust size parameters for box operations. + /// @tparam UsePbc If true, apply periodic boundary conditions; otherwise clamp to grid + /// @param du Half-size along first axis (may be adjusted) + /// @param dv Half-size along second axis (may be adjusted) + /// @param dw Half-size along third axis (may be adjusted) + /// @param fail_on_too_large_radius If true, raise exception if radius too large for PBC template void check_size_for_points_in_box(int& du, int& dv, int& dw, bool fail_on_too_large_radius) const { @@ -622,6 +954,15 @@ struct Grid : GridBase { } } + /// @brief Internal: iterate over grid points in a box around a fractional coordinate. + /// @tparam UsePbc If true, apply periodic boundary conditions + /// @tparam Func Callable(T&, double, Position, int, int, int) invoked for each point + /// @param fctr Fractional center coordinate + /// @param du Half-extent along first axis (in grid points) + /// @param dv Half-extent along second axis (in grid points) + /// @param dw Half-extent along third axis (in grid points) + /// @param func Callback function(value_ref, distance_sq, delta_position, u, v, w) + /// @param radius Optional spherical radius limit (INFINITY for box only) template void do_use_points_in_box(const Fractional& fctr, int du, int dv, int dw, Func&& func, double radius=INFINITY) { @@ -677,6 +1018,16 @@ struct Grid : GridBase { } } + /// @brief Iterate over grid points in a box around a fractional coordinate. + /// @tparam UsePbc If true, apply periodic boundary conditions + /// @tparam Func Callable(T&, double, Position, int, int, int) invoked for each point + /// @param fctr Fractional center coordinate + /// @param du Half-extent along first axis (in grid points) + /// @param dv Half-extent along second axis (in grid points) + /// @param dw Half-extent along third axis (in grid points) + /// @param func Callback function(value_ref, distance_sq, delta_position, u, v, w) + /// @param fail_on_too_large_radius If true and UsePbc, raise exception for oversized box + /// @param radius Optional spherical radius limit (INFINITY for box only) template void use_points_in_box(const Fractional& fctr, int du, int dv, int dw, Func&& func, bool fail_on_too_large_radius=true, @@ -685,6 +1036,13 @@ struct Grid : GridBase { do_use_points_in_box(fctr, du, dv, dw, func, radius); } + /// @brief Iterate over grid points within a spherical radius of a fractional coordinate. + /// @tparam UsePbc If true, apply periodic boundary conditions + /// @tparam Func Callable(T&, double) invoked for each point + /// @param fctr Fractional center coordinate + /// @param radius Spherical radius (in Angstroms) + /// @param func Callback function(value_ref, distance_sq) + /// @param fail_on_too_large_radius If true and UsePbc, raise exception for large radius template void use_points_around(const Fractional& fctr, double radius, Func&& func, bool fail_on_too_large_radius=true) { @@ -698,6 +1056,11 @@ struct Grid : GridBase { radius); } + /// @brief Set all grid points within a spherical radius to a constant value. + /// @param ctr Orthogonal center (Angstroms) + /// @param radius Spherical radius (Angstroms) + /// @param value Value to assign + /// @param use_pbc If true, apply periodic boundary conditions void set_points_around(const Position& ctr, double radius, T value, bool use_pbc=true) { Fractional fctr = unit_cell.fractionalize(ctr); if (use_pbc) @@ -706,21 +1069,31 @@ struct Grid : GridBase { use_points_around(fctr, radius, [&](T& ref, double) { ref = value; }, false); } - /// Change all occurrences of old_value to new_value + /// @brief Replace all occurrences of one value with another. + /// @param old_value Value to search for + /// @param new_value Replacement value void change_values(T old_value, T new_value) { for (auto& d : data) if (impl::is_same(d, old_value)) d = new_value; } - /// Use \par func to reduce values of all symmetry mates of each - /// grid point, then assign the result to all the points. - /// \par func takes two values and returns a value. + /// @brief Apply a reduction function across all symmetry mates of each point. + /// + /// For each unique point under the space group, applies @p func to combine + /// the values of all symmetry-related points, then assigns the result back + /// to all mate positions. + /// @tparam Func Binary function(T, T) → T + /// @param func Reduction function, e.g., std::plus, std::max, etc. template void symmetrize(Func func) { symmetrize_using_ops(this->get_scaled_ops_except_id(), func); } + /// @brief Apply a reduction function using an explicit list of operations. + /// @tparam Func Binary function(T, T) → T + /// @param ops GridOp operations (typically from get_scaled_ops_except_id()) + /// @param func Reduction function template void symmetrize_using_ops(const std::vector& ops, Func func) { if (ops.empty()) @@ -754,23 +1127,40 @@ struct Grid : GridBase { assert(idx == data.size()); } - // most common symmetrize functions + /// @brief Apply symmetry by taking the minimum value among symmetry mates. void symmetrize_min() { symmetrize([](T a, T b) { return (a < b || !(b == b)) ? a : b; }); } + + /// @brief Apply symmetry by taking the maximum value among symmetry mates. void symmetrize_max() { symmetrize([](T a, T b) { return (a > b || !(b == b)) ? a : b; }); } + + /// @brief Apply symmetry by taking the maximum absolute value among symmetry mates. void symmetrize_abs_max() { symmetrize([](T a, T b) { return (std::abs(a) > std::abs(b) || !(b == b)) ? a : b; }); } - /// multiplies grid points on special position + + /// @brief Apply symmetry by summing values of symmetry mates. + /// + /// Points on special positions (with fewer mates) contribute their value + /// multiple times. Used for density map averaging without normalization. void symmetrize_sum() { symmetrize([](T a, T b) { return a + b; }); } + + /// @brief Apply symmetry by selecting non-default values among mates. + /// + /// If a point's value equals @p default_, uses the value from a symmetry mate. + /// @param default_ Value to replace void symmetrize_nondefault(T default_) { symmetrize([default_](T a, T b) { return impl::is_same(a, default_) ? b : a; }); } + + /// @brief Apply symmetry by averaging values of symmetry mates. + /// + /// Sums symmetry mates and divides by space group order. void symmetrize_avg() { symmetrize_sum(); if (spacegroup && spacegroup->number != 1) { @@ -780,7 +1170,10 @@ struct Grid : GridBase { } } - /// scale the data to get mean == 0 and rmsd == 1 (doesn't work for T=complex) + /// @brief Normalize grid values to zero mean and unit RMS. + /// + /// Subtracts the mean and scales by RMS, making statistics zero mean and unit variance. + /// Does not work for complex-valued grids. void normalize() { DataStats stats = calculate_data_statistics(data); for (T& x : data) @@ -788,9 +1181,18 @@ struct Grid : GridBase { } }; -// TODO: add argument Box src_extent -// cf. interpolate_grid_around_model() in solmask.hpp -// cf interpolate_values in python/grid.cpp +/// @brief Interpolate grid values from source to destination under a transformation. +/// +/// For each point in the destination grid, applies the transformation and +/// interpolates the source grid value at that location. +/// TODO: add argument Box src_extent +/// See: interpolate_grid_around_model() in solmask.hpp +/// See: interpolate_values in python/grid.cpp +/// @tparam T Grid value type +/// @param dest Destination grid +/// @param src Source grid +/// @param tr Spatial transformation (rotation + translation) +/// @param order Interpolation order: 0 (nearest), 1 (trilinear), 3 (tricubic) template void interpolate_grid(Grid& dest, const Grid& src, const Transform& tr, int order=1) { FTransform frac_tr = src.unit_cell.frac.combine(tr).combine(dest.unit_cell.orth); @@ -804,6 +1206,14 @@ void interpolate_grid(Grid& dest, const Grid& src, const Transform& tr, in } } +/// @brief Calculate correlation coefficient between two grids. +/// +/// Computes Pearson correlation between grid values, skipping NaN values. +/// @tparam T Grid value type +/// @param a First grid +/// @param b Second grid +/// @return Correlation object with mean, stddev, and correlation coefficient +/// @throws Raises exception if grids have different dimensions template Correlation calculate_correlation(const GridBase& a, const GridBase& b) { if (a.data.size() != b.data.size() || a.nu != b.nu || a.nv != b.nv || a.nw != b.nw) diff --git a/include/gemmi/recgrid.hpp b/include/gemmi/recgrid.hpp index 9caf77212..a6e3e96a5 100644 --- a/include/gemmi/recgrid.hpp +++ b/include/gemmi/recgrid.hpp @@ -1,3 +1,11 @@ +/// @file recgrid.hpp +/// @brief Reciprocal-space grid for reflection data and Fourier transforms. +/// +/// This header provides ReciprocalGrid, a template class for storing complex-valued data +/// in reciprocal space indexed by Miller indices (h, k, l) or FFT grid coordinates. +/// Typically used to store structure factor amplitudes/phases or computed Fourier components. +/// The grid implements FFT conventions with half-l storage for Hermitian-symmetric data. + // Copyright 2020 Global Phasing Ltd. // // ReciprocalGrid -- grid for reciprocal space data. @@ -11,15 +19,33 @@ namespace gemmi { +/// @brief Get the Friedel-mate value (complex conjugate for complex, identity for real). +/// @tparam T Real or complex type +/// @param v Input value +/// @return Complex conjugate if T is complex, otherwise @p v unchanged template T friedel_mate_value(T v) { return v; } +/// @brief Specialization: complex conjugate for Friedel mates. template std::complex friedel_mate_value(const std::complex& v) { return std::conj(v); } +/// @brief Grid for reciprocal-space (Fourier) data indexed by Miller indices. +/// @tparam T Data type at grid points (typically complex or complex) +/// +/// Stores reflection amplitudes, phases, or structure factors in reciprocal space. +/// Indices u, v, w correspond to h, k, l (Miller indices) in the FFT grid. +/// For Hermitian-symmetric data (result of real-space FFT), can use half-l mode +/// to store only l >= 0, halving memory while allowing Friedel-mate reconstruction. template struct ReciprocalGrid : GridBase { - bool half_l = false; // hkl grid that stores only l>=0 + bool half_l = false; ///< If true, stores only l>=0; l<0 values obtained from Friedel pairs + + /// @brief Check if Miller indices (u, v, w) are valid and in-range for the grid. + /// @param u Grid index for h (may be negative in the range [-nu, nu)) + /// @param v Grid index for k (may be negative in the range [-nv, nv)) + /// @param w Grid index for l (may be negative in the range [-nw, nw), or [0, nw) if half_l) + /// @return True if the indices are within valid range for this grid bool has_index(int u, int v, int w) const { bool half_u = (half_l && this->axis_order == AxisOrder::ZYX); bool half_w = (half_l && this->axis_order != AxisOrder::ZYX); @@ -27,29 +53,64 @@ struct ReciprocalGrid : GridBase { std::abs(2 * v) < this->nv && std::abs(half_w ? w : 2 * w) < this->nw; } + /// @brief Check indices and throw if out of range. + /// @param u Grid index for h + /// @param v Grid index for k + /// @param w Grid index for l + /// @throws std::out_of_range if any index is invalid void check_index(int u, int v, int w) const { if (!has_index(u, v, w)) throw std::out_of_range("ReciprocalGrid: index out of grid."); } - // Similar to Grid::index_n(), but works only for -nu <= u < nu, etc. + /// @brief Compute flat array index from possibly-negative Miller indices. + /// @param u Miller h index (range -nu <= u < nu) + /// @param v Miller k index (range -nv <= v < nv) + /// @param w Miller l index (range -nw <= w < nw or 0 <= w < nw with half_l) + /// @return Flat array index with periodic wrapping applied size_t index_n(int u, int v, int w) const { return this->index_q(u >= 0 ? u : u + this->nu, v >= 0 ? v : v + this->nv, w >= 0 ? w : w + this->nw); } + /// @brief Compute checked and wrapped index from Miller indices. + /// @param u Miller h index + /// @param v Miller k index + /// @param w Miller l index + /// @return Flat array index after bounds checking and wrapping + /// @throws std::out_of_range if indices are invalid size_t index_checked(int u, int v, int w) const { check_index(u, v, w); return index_n(u, v, w); } + /// @brief Get value at Miller indices with bounds checking. + /// @param u Miller h index + /// @param v Miller k index + /// @param w Miller l index + /// @return The value at (u, v, w) + /// @throws std::out_of_range if indices are out of range T get_value(int u, int v, int w) const { return this->data[index_checked(u, v, w)]; } + /// @brief Get value at Miller indices, returning default T{} if out of range. + /// @param u Miller h index + /// @param v Miller k index + /// @param w Miller l index + /// @return The value at (u, v, w), or T{} (zero) if indices are invalid T get_value_or_zero(int u, int v, int w) const { return has_index(u, v, w) ? this->data[index_n(u, v, w)] : T{}; } + /// @brief Set value at Miller indices with bounds checking. + /// @param u Miller h index + /// @param v Miller k index + /// @param w Miller l index + /// @param x The value to store + /// @throws std::out_of_range if indices are out of range void set_value(int u, int v, int w, T x) { this->data[index_checked(u, v, w)] = x; } + /// @brief Convert grid Point (with u, v, w indices) to Miller indices (h, k, l). + /// @param point Point from grid iteration with normalized indices [0, n) + /// @return Miller index vector accounting for FFT conventions and axis order Miller to_hkl(const typename GridBase::Point& point) const { Miller hkl{{point.u, point.v, point.w}}; if (2 * point.u >= this->nu && @@ -65,13 +126,26 @@ struct ReciprocalGrid : GridBase { return hkl; } + /// @brief Compute 1/d^2 resolution from a grid point (inverse d-spacing squared). + /// @param point Grid point with u, v, w indices + /// @return 1/d^2 in reciprocal Angstrom units double calculate_1_d2(const typename GridBase::Point& point) const { return this->unit_cell.calculate_1_d2(to_hkl(point)); } + /// @brief Compute d-spacing for a grid point. + /// @param point Grid point with u, v, w indices + /// @return d-spacing (resolution) in Angstroms double calculate_d(const typename GridBase::Point& point) const { return this->unit_cell.calculate_d(to_hkl(point)); } + /// @brief Get reflection value by Miller indices with optional post-processing. + /// @param hkl Miller indices (h, k, l) + /// @param unblur Sharpening factor (B-factor correction); 0 = no sharpening + /// @param mott_bethe If true, apply Mott-Bethe factor for scattering angle dependence + /// @return Value at (h, k, l), with Friedel mate retrieval if half_l is set and l<0, + /// and post-processing applied (unblur/Mott-Bethe) + /// @note For half_l grids with negative l, fetches the Friedel conjugate at (-h,-k,-l) T get_value_by_hkl(Miller hkl, double unblur=0, bool mott_bethe=false) const { if (this->axis_order == AxisOrder::ZYX) @@ -86,16 +160,23 @@ struct ReciprocalGrid : GridBase { double inv_d2 = this->unit_cell.calculate_1_d2(hkl); double mult = 1; if (unblur != 0) - // cf. reciprocal_space_multiplier() mult = std::exp(unblur * 0.25 * inv_d2); if (mott_bethe) - // cf. mott_bethe_factor mult *= -mott_bethe_const() / inv_d2; value *= static_cast(mult); } return value; } + /// @brief Extract reflection data in asymmetric unit (ASU) sorted by Miller indices. + /// @tparam R Output data type (default: same as grid type T) + /// @param dmin Minimum resolution cutoff (Angstroms); 0 = no cutoff + /// @param unblur B-factor sharpening/blurring factor + /// @param with_000 If true, include the origin reflection (0,0,0) + /// @param with_sys_abs If true, include systematically absent reflections + /// @param mott_bethe If true, apply Mott-Bethe factor correction + /// @return AsuData object with reflections in the asymmetric unit, sorted by (h,k,l) + /// @note For half_l grids, automatically retrieves Friedel mates for l<0 // the result is always sorted by h,k,l template AsuData prepare_asu_data(double dmin=0, double unblur=0, @@ -104,10 +185,6 @@ struct ReciprocalGrid : GridBase { AsuData asu_data; if (this->axis_order == AxisOrder::ZYX) fail("get_asu_values(): ZYX order is not supported yet"); - // Why "- 1" below? To skip the value at Nyquist frequency (±n/2). - // For even lengths of DFT (real -> reciprocal space) the resulting - // h=+nu/2 and h=-nu/2 are both represented by one (strictly real) value. - // The grid should be big enough so that these values are not needed. int max_h = (this->nu - 1) / 2; int max_k = (this->nv - 1) / 2; int max_l = half_l ? this->nw - 1 : (this->nw - 1) / 2; @@ -169,6 +246,8 @@ struct ReciprocalGrid : GridBase { } }; +/// @brief Convenience alias for a reciprocal grid of complex F (magnitude/phase) values. +/// @tparam T Floating-point precision (float or double) template using FPhiGrid = ReciprocalGrid>; } // namespace gemmi diff --git a/include/gemmi/reciproc.hpp b/include/gemmi/reciproc.hpp index 6d5caf09c..504777a4a 100644 --- a/include/gemmi/reciproc.hpp +++ b/include/gemmi/reciproc.hpp @@ -1,3 +1,9 @@ +/// @file reciproc.hpp +/// @brief Utility functions for working with reflections and reciprocal space. +/// +/// Provides functions to enumerate unique reflections within a resolution range, +/// respecting space group symmetry and systematically absent reflections. + // Copyright 2020 Global Phasing Ltd. // // Reciprocal space helper functions. @@ -10,7 +16,18 @@ namespace gemmi { -// dmin should include a tiny margin for numerical errors +/// @brief Iterate over all reflections within a resolution range, applying a function to each. +/// @tparam Func Callable taking a Miller index array +/// @param func Function to apply to each valid reflection +/// @param cell Unit cell parameters +/// @param spacegroup Space group for symmetry and systematic absences +/// @param dmin Minimum d-spacing (Angstroms, lower resolution limit) +/// @param dmax Maximum d-spacing (Angstroms, higher resolution limit); 0 = unlimited; INFINITY = exact high-res limit +/// @param unique If true, iterate only over asymmetric unit (one per symmetry equivalent); if false, all in range +/// +/// Generates Miller indices (h, k, l) in the resolution range [dmax, dmin] (or [dmin, inf) if dmax=0). +/// Checks space group symmetries and excludes systematically absent reflections. +/// @note dmin should include a small margin (e.g., 1e-6 Angstrom) to avoid numerical boundary issues. template void for_all_reflections(Func func, const UnitCell& cell, const SpaceGroup* spacegroup, @@ -34,7 +51,15 @@ void for_all_reflections(Func func, } } -// dmin should include a tiny margin for numerical errors +/// @brief Count the number of unique (or all) reflections in a resolution range. +/// @param cell Unit cell parameters +/// @param spacegroup Space group +/// @param dmin Minimum d-spacing (Angstroms) +/// @param dmax Maximum d-spacing (Angstroms); 0 = no upper limit +/// @param unique If true, count only asymmetric unit; if false, count all +/// @return Total number of reflections satisfying the criteria +/// +/// @note dmin should include a small margin for numerical accuracy. inline int count_reflections(const UnitCell& cell, const SpaceGroup* spacegroup, double dmin, double dmax=0., bool unique=true) { int counter = 0; @@ -43,6 +68,15 @@ inline int count_reflections(const UnitCell& cell, const SpaceGroup* spacegroup, return counter; } +/// @brief Generate a vector of all reflections in a resolution range. +/// @param cell Unit cell parameters +/// @param spacegroup Space group +/// @param dmin Minimum d-spacing (Angstroms) +/// @param dmax Maximum d-spacing (Angstroms); 0 = no upper limit +/// @param unique If true, return only asymmetric unit; if false, all in range +/// @return Vector of Miller indices [h, k, l] sorted in enumeration order +/// +/// Useful for generating a complete or unique list of expected reflections for comparison with data. inline std::vector make_miller_vector(const UnitCell& cell, const SpaceGroup* spacegroup, double dmin, double dmax=0., bool unique=true) { diff --git a/include/gemmi/solmask.hpp b/include/gemmi/solmask.hpp index 7d32929fa..5835c17bb 100644 --- a/include/gemmi/solmask.hpp +++ b/include/gemmi/solmask.hpp @@ -2,6 +2,9 @@ // // Flat bulk solvent mask. With helper tools that modify data on grid. +/// @file +/// @brief Solvent masking utilities for crystallographic refinement. + #ifndef GEMMI_SOLMASK_HPP_ #define GEMMI_SOLMASK_HPP_ @@ -11,9 +14,19 @@ namespace gemmi { -enum class AtomicRadiiSet { VanDerWaals, Cctbx, Refmac, Constant }; +/// Enumeration of atomic radii sets for solvent masking. +/// Determines which radii library is used when computing the protein region. +enum class AtomicRadiiSet { + VanDerWaals, ///< Standard van der Waals radii from crystallographic tables + Cctbx, ///< CCTBX/CCP4 van der Waals radii + Refmac, ///< Refmac radii for bulk solvent correction + Constant ///< Constant radius applied to all atoms +}; -// data from cctbx/eltbx/van_der_waals_radii.py used to generate identical mask +/// Returns the van der Waals radius from CCTBX/CCP4 library for a given element. +/// Data derived from cctbx/eltbx/van_der_waals_radii.py for compatibility. +/// @param el Chemical element +/// @return Radius in Angstroms inline float cctbx_vdw_radius(El el) { static constexpr float radii[] = { /*X*/ 1.00f, @@ -52,8 +65,11 @@ inline float cctbx_vdw_radius(El el) { return radii[static_cast(el)]; } -// Data from Refmac's ener_lib.cif: ionic radius - 0.2A or vdW radius + 0.2A. -// For full compatibility use r_probe=1.0A and r_shrink=0.8A. +/// Returns the effective radius for bulk solvent correction from Refmac's ener_lib.cif. +/// Represents ionic radius minus 0.2 Angstroms or vdW radius plus 0.2 Angstroms. +/// For full Refmac compatibility, use @c r_probe=1.0 and @c r_shrink=0.8. +/// @param el Chemical element +/// @return Radius in Angstroms inline float refmac_radius_for_bulk_solvent(El el) { #if 0 static constexpr float radii[] = { @@ -102,8 +118,16 @@ inline float refmac_radius_for_bulk_solvent(El el) { #endif } -// mask utilities - +/// Marks grid points within a radius around atoms as masked. +/// Sets all grid points within (atomic radius + probe radius) of each atom to the given value. +/// @tparam T Grid value type (typically int8_t for binary masks or float for weighted masks) +/// @param mask Grid to modify in-place +/// @param model Molecular model containing atoms to mask around +/// @param atomic_radii_set Which atomic radii library to use +/// @param r_probe Probe radius (in Angstroms) added to each atomic radius +/// @param value Value to set for masked grid points +/// @param ignore_hydrogen If true, skip hydrogen atoms +/// @param ignore_zero_occupancy_atoms If true, skip atoms with zero occupancy template void mask_points_in_radius(Grid& mask, const Model& model, AtomicRadiiSet atomic_radii_set, @@ -128,7 +152,15 @@ void mask_points_in_radius(Grid& mask, const Model& model, } } -// deprecated +/// @deprecated Use mask_points_in_radius with AtomicRadiiSet::Constant instead. +/// Marks grid points within a constant radius around atoms. +/// @tparam T Grid value type +/// @param mask Grid to modify +/// @param model Molecular model with atoms to mask around +/// @param radius Constant radius in Angstroms +/// @param value Value to set for masked grid points +/// @param ignore_hydrogen If true, skip hydrogen atoms +/// @param ignore_zero_occupancy_atoms If true, skip atoms with zero occupancy template void mask_points_in_constant_radius(Grid& mask, const Model& model, double radius, T value, @@ -139,6 +171,9 @@ void mask_points_in_constant_radius(Grid& mask, const Model& model, } +/// Collects all distinct alternate location indicators from a model. +/// @param model Molecular model to scan +/// @return String containing unique altloc characters found in the model inline std::string distinct_altlocs(const Model& model) { std::string altlocs; for (const Chain& chain : model.chains) @@ -147,15 +182,15 @@ inline std::string distinct_altlocs(const Model& model) { return altlocs; } -/// mask all grid points within a varying radius around model atoms -/// @param mask existing mask object -/// @param model model to consider -/// @param atomic_radii_set set of atomic radii -/// @param r_probe probe radius to add to atomic radius (in A) -/// @param value value to use (unless use_atom_occupancy is set to true) -/// @param ignore_hydrogen should we skip hydrogen atoms -/// @param ignore_zero_occupancy_atoms should we skip atoms with occupancy of zero -/// @param use_atom_occupancy should we use the occupancy of each atom (to build up a non-binary mask) +/// Masks grid points using atom occupancy values to create a weighted mask. +/// Creates a non-binary mask by considering occupancy factors for atoms with alternate locations. +/// Each altloc is processed separately and contributions are accumulated. +/// @param mask Existing float mask to modify in-place (contributions are subtracted) +/// @param model Model to mask; atoms are categorized by alternate location code +/// @param atomic_radii_set Which atomic radii library to use +/// @param r_probe Probe radius (in Angstroms) added to each atomic radius +/// @param ignore_hydrogen If true, skip hydrogen atoms +/// @param ignore_zero_occupancy_atoms If true, skip atoms with zero occupancy inline void mask_points_using_occupancy(Grid& mask, const Model& model, AtomicRadiiSet atomic_radii_set, double r_probe, @@ -207,11 +242,15 @@ void mask_points_using_occupancy(Grid& mask, const Model& model, d = std::max(d, 0.f); } -/// All points close to a grid point which is currently set below a value are changed -/// @param mask given mask -/// @param r distance value (in A) -/// @param value consider only grid points that are currently set to this value -/// @param margin_value new value for grid points satsifying criteria +/// Creates a margin of points around the boundary of a masked region. +/// Finds grid points at distance <= @p r from points with value equal to @p value, +/// and sets them to @p margin_value. Uses efficient stencil-based neighbor search. +/// @tparam T Grid value type +/// @param mask Grid to modify in-place +/// @param r Distance threshold in Angstroms +/// @param value Boundary value to search for (typically solvent/protein boundary) +/// @param margin_value Value to set for margin points +/// @throws gemmi::Failure if radius exceeds half the unit cell dimensions template void set_margin_around(Grid& mask, double r, T value, T margin_value) { int du = (int) std::floor(r / mask.spacing[0]); @@ -281,22 +320,32 @@ void set_margin_around(Grid& mask, double r, T value, T margin_value) { //printf("margin: %zu\n", std::count(mask.data.begin(), mask.data.end(), margin_value)); } +/// Helper class for computing and applying solvent masks to crystallographic grids. +/// Encapsulates parameters and operations for bulk solvent masking, including +/// mask generation, shrinking, inversion, and symmetry handling. struct SolventMasker { - AtomicRadiiSet atomic_radii_set; - bool ignore_hydrogen; - bool ignore_zero_occupancy_atoms; - bool use_atom_occupancy = false; - double rprobe; - double rshrink; - double island_min_volume; - double constant_r; - double requested_spacing = 0.; - + AtomicRadiiSet atomic_radii_set; ///< Which atomic radii library is used + bool ignore_hydrogen; ///< If true, hydrogen atoms are skipped + bool ignore_zero_occupancy_atoms; ///< If true, atoms with zero occupancy are skipped + bool use_atom_occupancy = false; ///< If true, use atom occupancy for weighted masking + double rprobe; ///< Probe radius added to atomic radii (in A) + double rshrink; ///< Shrinking radius applied after masking (in A) + double island_min_volume; ///< Minimum volume (as fraction) of protein islands to retain + double constant_r; ///< Constant radius (for AtomicRadiiSet::Constant) + double requested_spacing = 0.; ///< Requested grid spacing (0 = auto) + + /// Initialize SolventMasker with a radii set and optional constant radius. + /// Automatically sets default parameters (rprobe, rshrink) for the chosen set. + /// @param choice Atomic radii set to use + /// @param constant_r_ Constant radius (only used if choice is AtomicRadiiSet::Constant) SolventMasker(AtomicRadiiSet choice, double constant_r_=0.) { set_radii(choice, constant_r_); } - // currently this function sets also parameters other than radii + /// Sets the atomic radii set and related parameters. + /// Updates rprobe, rshrink, and island_min_volume based on the chosen library. + /// @param choice Atomic radii set to use + /// @param constant_r_ Constant radius override void set_radii(AtomicRadiiSet choice, double constant_r_=0.) { atomic_radii_set = choice; constant_r = constant_r_; @@ -326,15 +375,24 @@ struct SolventMasker { } } - /// fill whole grid with 1 + /// Fills the entire grid with 1 (solvent region). + /// @tparam T Grid value type + /// @param grid Grid to fill template void clear(Grid& grid) const { grid.fill((T)1); } - /// set grid points around atoms to 0 + /// Sets grid points around atoms to 0 (protein region). + /// @tparam T Grid value type + /// @param grid Grid to modify in-place + /// @param model Molecular model template void mask_points(Grid& grid, const Model& model) const { mask_points_in_radius(grid, model, atomic_radii_set, constant_r + rprobe, (T)0, ignore_hydrogen, ignore_zero_occupancy_atoms); } + /// Sets grid points around atoms to 0, with optional occupancy weighting. + /// Uses atom occupancy if @c use_atom_occupancy is true; otherwise calls mask_points. + /// @param grid Grid to modify in-place + /// @param model Molecular model void mask_points(Grid& grid, const Model& model) const { if (use_atom_occupancy) mask_points_using_occupancy(grid, model, atomic_radii_set, constant_r + rprobe, @@ -343,10 +401,11 @@ struct SolventMasker { mask_points(grid, model); } - /// Apply symmetry to grid (to fill whole grid). If we are using an - /// integer/binary mask this is done by distributing all 0-value - /// grid points - while for a float map/mask we are setting each - /// grid point to the minimum value. + /// Applies space group symmetry to fill the entire grid. + /// For integer/binary masks, distributes 0-value points via symmetry operators. + /// For float masks, sets each point to the minimum value across symmetry mates. + /// @tparam T Grid value type + /// @param grid Grid to symmetrize in-place template void symmetrize(Grid& grid) const { if (std::is_same::value) { grid.symmetrize([&](T a, T b) { return a == (T)0 || b == (T)0 ? (T)0 : (T)1; }); @@ -356,6 +415,10 @@ struct SolventMasker { } } + /// Shrinks the masked (protein) region by marking a margin as solvent. + /// Marks all points within @c rshrink of the protein-solvent boundary. + /// @tparam T Grid value type + /// @param grid Grid to shrink in-place template void shrink(Grid& grid) const { if (rshrink > 0) { set_margin_around(grid, rshrink, (T)1, (T)-1); @@ -363,15 +426,20 @@ struct SolventMasker { } } + /// Inverts the mask (1 becomes 0, 0 becomes 1). + /// @tparam T Grid value type + /// @param grid Grid to invert in-place template void invert(Grid& grid) const { for (auto& v : grid.data) v = (T)1 - v; } - - // Removes small islands of Land=1 in the sea of 0. Uses flood fill. - // cf. find_blobs_by_flood_fill() - // Currently doesn't work mask from mask_points_using_occupancy(). + /// Removes small disconnected regions (islands) of value 1 using flood fill. + /// Islands smaller than @c island_min_volume (as a fraction of unit cell volume) are removed. + /// @tparam T Grid value type (typically int8_t) + /// @param grid Grid to modify in-place + /// @return Number of islands removed + /// @note Does not work with masks generated by mask_points_using_occupancy() template int remove_islands(Grid& grid) const { if (island_min_volume <= 0) return 0; @@ -390,6 +458,11 @@ struct SolventMasker { return counter; } + /// Generates a complete solvent mask on a grid using the standard pipeline. + /// Steps: clear grid (1) -> mask atoms (0) -> apply symmetry -> remove islands -> shrink. + /// @tparam T Grid value type + /// @param grid Grid to populate with mask + /// @param model Molecular model to mask template void put_mask_on_grid(Grid& grid, const Model& model) const { clear(grid); assert(!grid.data.empty()); @@ -399,6 +472,10 @@ struct SolventMasker { shrink(grid); } + /// Sets grid points around atoms to 0, applying symmetry without shrinking. + /// Used to zero out a float density map in the protein region. + /// @param grid Float grid to modify in-place + /// @param model Molecular model void set_to_zero(Grid& grid, const Model& model) const { mask_points(grid, model); grid.symmetrize([&](float a, float b) { return b == 0.f ? 0.f : a; }); @@ -425,14 +502,19 @@ struct SolventMasker { #endif }; +/// Information about a grid point's relationship to nearby model atoms. +/// Used internally for interpolation of density maps around model atoms. struct NodeInfo { - double dist_sq; // distance from the nearest atom - bool found = false; // the mask flag - //Element elem = El::X; - int u = 0, v = 0, w = 0; // not normalized near-model grid coordinates + double dist_sq; ///< Square of distance from the nearest atom + bool found = false; ///< True if a nearby atom was found within radius + int u = 0, v = 0, w = 0; ///< Non-normalized near-model grid coordinates of nearest atom }; -/// Populate NodeInfo grid for nodes near the model. +/// Populates a grid with NodeInfo for all grid points near model atoms. +/// Finds the nearest atom and its distance for each grid point within @p radius. +/// @param mask NodeInfo grid to populate +/// @param model Molecular model to search +/// @param radius Search radius in Angstroms inline void mask_with_node_info(Grid& mask, const Model& model, double radius) { NodeInfo default_ni; default_ni.dist_sq = radius * radius; @@ -462,10 +544,11 @@ inline void mask_with_node_info(Grid& mask, const Model& model, double } } -/// Skip nodes that are closer to a symmetry mate of the model than -/// to the original model. A node is closer to a symmetry mate when it has -/// a symmetry image that is closer to the original model than the node. -/// We ignore NCS here. +/// Removes grid points that are closer to a symmetry mate than to the original model. +/// A grid point is unmasked (marked as @c found=false) if any symmetry image of the +/// nearest atom is closer than that atom. This avoids double-counting in density calculations. +/// Non-crystallographic symmetry (NCS) is ignored. +/// @param mask NodeInfo grid to update in-place inline void unmask_symmetry_mates(Grid& mask) { std::vector symmetry_ops = mask.get_scaled_ops_except_id(); size_t idx = 0; @@ -483,7 +566,17 @@ inline void unmask_symmetry_mates(Grid& mask) { } } -// TODO: would it be better to use src_model rather than dest_model? +/// Interpolates grid values from a source grid around atoms in a destination model. +/// Identifies grid points in the destination grid that are near atoms in the destination model, +/// transforms them to source grid coordinates, and interpolates values from the source. +/// Grid points closer to symmetry mates are skipped to avoid double-counting. +/// @tparam T Grid value type +/// @param dest Destination grid to interpolate into (modified in-place) +/// @param src Source grid to interpolate from +/// @param tr Transformation from destination to source +/// @param dest_model Model in destination grid frame (determines which points to interpolate) +/// @param radius Search radius in Angstroms for atoms +/// @param order Interpolation order (1=linear, 3=cubic, default 1) template void interpolate_grid_around_model(Grid& dest, const Grid& src, const Transform& tr, @@ -506,7 +599,12 @@ void interpolate_grid_around_model(Grid& dest, const Grid& src, } -// add soft edge to 1/0 mask using raised cosine function +/// Adds a smooth transition zone to the boundary of a binary mask. +/// Converts sharp 0/1 boundaries to smooth transitions using a raised cosine function. +/// Grid points at distance < @p width from the boundary are set to cosine-interpolated values. +/// @tparam T Grid value type +/// @param grid Binary mask to smooth in-place (0s become 1s beyond boundary) +/// @param width Width of transition zone in Angstroms template void add_soft_edge_to_mask(Grid& grid, double width) { const double width2 = width * width;