diff --git a/docs/api.rst b/docs/api.rst index 00982747b..b53cb8ae6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -30,6 +30,94 @@ Map and Grid Data .. doxygenfile:: grid.hpp :project: gemmi +I/O and Filesystem Utilities +------------------------------ + +File and directory traversal, gzip support, stream abstractions, PDB path +utilities, and general-purpose string and container helpers. + +*(Full documentation added in PR 10.)* + +.. doxygenfile:: dirwalk.hpp + :project: gemmi + +.. doxygenfile:: fileutil.hpp + :project: gemmi + +.. doxygenfile:: fstream.hpp + :project: gemmi + +.. doxygenfile:: gz.hpp + :project: gemmi + +.. doxygenfile:: input.hpp + :project: gemmi + +.. doxygenfile:: glob.hpp + :project: gemmi + +.. doxygenfile:: logger.hpp + :project: gemmi + +.. doxygenfile:: pdb_id.hpp + :project: gemmi + +.. doxygenfile:: util.hpp + :project: gemmi + +Low-level Primitives +-------------------- + +Span and range views, custom iterators, error utilities, fast numeric parsing, +and version information. + +*(Full documentation added in PR 10.)* + +.. doxygenfile:: span.hpp + :project: gemmi + +.. doxygenfile:: iterator.hpp + :project: gemmi + +.. doxygenfile:: fail.hpp + :project: gemmi + +.. doxygenfile:: atof.hpp + :project: gemmi + +.. doxygenfile:: atox.hpp + :project: gemmi + +.. doxygenfile:: version.hpp + :project: gemmi + +Miscellaneous +------------- + +Anomalous scattering addends, bond index, DSN6/BRIX map format, enum/string +conversions, string formatting, statistics, and PyMOL selection language. + +*(Full documentation added in PR 10.)* + +.. doxygenfile:: addends.hpp + :project: gemmi + +.. doxygenfile:: bond_idx.hpp + :project: gemmi + +.. doxygenfile:: dsn6.hpp + :project: gemmi + +.. doxygenfile:: enumstr.hpp + :project: gemmi + +.. doxygenfile:: sprintf.hpp + :project: gemmi + +.. doxygenfile:: stats.hpp + :project: gemmi + +.. doxygenfile:: pymol_select.hpp Scattering, Math, and Geometry ------------------------------- diff --git a/include/gemmi/addends.hpp b/include/gemmi/addends.hpp index a7081e0ee..5b8273ae0 100644 --- a/include/gemmi/addends.hpp +++ b/include/gemmi/addends.hpp @@ -11,16 +11,34 @@ namespace gemmi { +/// @brief Container for anomalous scattering correction addends +/// Stores addend values for each element used in density and structure factor calculations. struct Addends { std::array values = {}; + /// @brief Set the addend value for a given element + /// @param el the chemical element + /// @param val the addend value to set void set(Element el, float val) { values[el.ordinal()] = val; } + + /// @brief Get the addend value for a given element + /// @param el the chemical element + /// @return the addend value for the element float get(Element el) const { return values[el.ordinal()]; } + + /// @brief Get the total number of elements in the array + /// @return the size of the addends array size_t size() const { return values.size(); } + + /// @brief Clear all addend values to zero void clear() { for (size_t i = 0; i != size(); ++i) values[i] = 0.; } + + /// @brief Subtract atomic number Z from each element's addend value + /// Optionally preserves hydrogen and deuterium values. + /// @param except_hydrogen if true, skip subtracting from hydrogen and deuterium void subtract_z(bool except_hydrogen=false) { for (int z = 2; z < (int)El::D; ++z) values[z] -= z; diff --git a/include/gemmi/atof.hpp b/include/gemmi/atof.hpp index 7f275f7bf..e326d16f4 100644 --- a/include/gemmi/atof.hpp +++ b/include/gemmi/atof.hpp @@ -11,8 +11,15 @@ namespace gemmi { +/// @brief Result type from fast_float::from_chars. using fast_float::from_chars_result; +/// @brief Fast locale-independent string to double conversion with range. +/// @param start pointer to string start +/// @param end pointer to one-past-end +/// @param d reference to output double +/// @return from_chars_result with ptr field pointing to first non-converted character +/// @details Skips leading whitespace and optional '+' sign before parsing. inline from_chars_result fast_from_chars(const char* start, const char* end, double& d) { while (start < end && is_space(*start)) ++start; @@ -21,6 +28,11 @@ inline from_chars_result fast_from_chars(const char* start, const char* end, dou return fast_float::from_chars(start, end, d); } +/// @brief Fast locale-independent string to double conversion (null-terminated). +/// @param start pointer to null-terminated string +/// @param d reference to output double +/// @return from_chars_result with ptr field pointing to first non-converted character +/// @details Skips leading whitespace and optional '+' sign before parsing. inline from_chars_result fast_from_chars(const char* start, double& d) { while (is_space(*start)) ++start; @@ -29,6 +41,10 @@ inline from_chars_result fast_from_chars(const char* start, double& d) { return fast_float::from_chars(start, start + std::strlen(start), d); } +/// @brief Fast locale-independent string to double conversion with optional end pointer. +/// @param p pointer to string (null-terminated) +/// @param endptr optional pointer to receive end of parsed string (may be nullptr) +/// @return the parsed double value inline double fast_atof(const char* p, const char** endptr=nullptr) { double d = 0; auto result = fast_from_chars(p, d); diff --git a/include/gemmi/atox.hpp b/include/gemmi/atox.hpp index 14b4a3b30..2d942baca 100644 --- a/include/gemmi/atox.hpp +++ b/include/gemmi/atox.hpp @@ -19,7 +19,9 @@ namespace gemmi { -// equivalent of std::isspace for C locale (no handling of EOF) +/// @brief Locale-independent isspace equivalent (C locale only, no EOF handling). +/// @param c character to test +/// @return true if c is whitespace (tab, newline, vertical tab, form feed, carriage return, or space) inline bool is_space(char c) { static const std::uint8_t table[256] = { // 1 for 9-13 and 32 0,0,0,0,0,0,0,0, 0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, @@ -34,16 +36,23 @@ inline bool is_space(char c) { return table[(std::uint8_t)c] != 0; } -// equivalent of std::isblank for C locale (no handling of EOF) +/// @brief Locale-independent isblank equivalent (C locale only, no EOF handling). +/// @param c character to test +/// @return true if c is space or tab inline bool is_blank(char c) { return c == ' ' || c == '\t'; } -// equivalent of std::isdigit for C locale (no handling of EOF) +/// @brief Locale-independent isdigit equivalent (C locale only, no EOF handling). +/// @param c character to test +/// @return true if c is a decimal digit (0-9) inline bool is_digit(char c) { return c >= '0' && c <= '9'; } +/// @brief Skip leading blank characters (spaces and tabs). +/// @param p pointer to string (may be null) +/// @return pointer to first non-blank character, or end of string inline const char* skip_blank(const char* p) { if (p) while (is_blank(*p)) @@ -51,6 +60,9 @@ inline const char* skip_blank(const char* p) { return p; } +/// @brief Skip word (non-whitespace characters). +/// @param p pointer to string (may be null) +/// @return pointer to first whitespace or null terminator inline const char* skip_word(const char* p) { if (p) while (*p != '\0' && !is_space(*p)) @@ -58,18 +70,30 @@ inline const char* skip_word(const char* p) { return p; } +/// @brief Read a word from the start of a line (skipping leading blanks). +/// @param line pointer to start of line +/// @return string containing the word inline std::string read_word(const char* line) { line = skip_blank(line); return std::string(line, skip_word(line)); } +/// @brief Read a word from the start of a line with end pointer. +/// @param line pointer to start of line +/// @param endptr pointer to receive pointer to character after word +/// @return string containing the word inline std::string read_word(const char* line, const char** endptr) { line = skip_blank(line); *endptr = skip_word(line); return std::string(line, *endptr); } -// no checking for overflow +/// @brief Convert string to signed integer (locale-independent, no overflow checking). +/// @param p pointer to string +/// @param checked if true, throw std::invalid_argument if string is not a valid integer +/// @param length max length to parse (0 = unlimited) +/// @return the converted integer +/// @throws std::invalid_argument if checked=true and string is invalid inline int string_to_int(const char* p, bool checked, size_t length=0) { int mult = -1; int n = 0; @@ -98,10 +122,19 @@ inline int string_to_int(const char* p, bool checked, size_t length=0) { return mult * n; } +/// @brief Convert std::string to signed integer (checked version). +/// @param str the string to convert +/// @param checked if true, throw std::invalid_argument if string is not a valid integer +/// @return the converted integer +/// @throws std::invalid_argument if checked=true and string is invalid inline int string_to_int(const std::string& str, bool checked) { return string_to_int(str.c_str(), checked); } +/// @brief Fast atoi-like conversion with optional end pointer (unchecked, allows partial parse). +/// @param p pointer to null-terminated string +/// @param endptr optional pointer to receive pointer to first non-digit character +/// @return the converted integer inline int simple_atoi(const char* p, const char** endptr=nullptr) { int mult = -1; int n = 0; @@ -120,6 +153,10 @@ inline int simple_atoi(const char* p, const char** endptr=nullptr) { return mult * n; } +/// @brief Fast atoi-like conversion without sign (positive only, unchecked). +/// @param p pointer to null-terminated string +/// @param endptr optional pointer to receive pointer to first non-digit character +/// @return the converted non-negative integer inline int no_sign_atoi(const char* p, const char** endptr=nullptr) { int n = 0; while (is_space(*p)) diff --git a/include/gemmi/bond_idx.hpp b/include/gemmi/bond_idx.hpp index 4f262786e..bc4012138 100644 --- a/include/gemmi/bond_idx.hpp +++ b/include/gemmi/bond_idx.hpp @@ -11,24 +11,41 @@ namespace gemmi { +/// @brief Index for efficient bond topology queries in a crystal structure +/// Enables checking atom connectivity and calculating graph distances, including +/// handling of atoms in different unit cell images. struct BondIndex { const Model& model; + /// @brief Represents an atom and whether it's in the same unit cell image struct AtomImage { - int atom_serial; - bool same_image; + int atom_serial; ///< Serial number of the atom + bool same_image; ///< True if atom is in the same unit cell image as reference + /// @brief Equality comparison + /// @param o the other AtomImage to compare + /// @return true if both serial and image flag match bool operator==(const AtomImage& o) const { return atom_serial == o.atom_serial && same_image == o.same_image; } }; std::map> index; + /// @brief Construct a BondIndex for the given model + /// @details Initializes the index with all atoms from the model. + /// Fails if duplicate atom serial numbers are found. + /// @param model_ the crystallographic model to index BondIndex(const Model& model_) : model(model_) { for (const_CRA cra : model.all()) if (!index.emplace(cra.atom->serial, std::vector()).second) fail("duplicated serial numbers"); } + /// @brief Add a unidirectional bond link between two atoms + /// @details Does not add the reverse link (a->b without b->a). + /// Does not add duplicate links. + /// @param a the first atom + /// @param b the second atom + /// @param same_image whether both atoms are in the same unit cell image void add_oneway_link(const Atom& a, const Atom& b, bool same_image) { std::vector& list_a = index.at(a.serial); AtomImage ai{b.serial, same_image}; @@ -36,14 +53,21 @@ struct BondIndex { list_a.push_back(ai); } + /// @brief Add a bidirectional bond link between two atoms + /// @param a the first atom + /// @param b the second atom + /// @param same_image whether both atoms are in the same unit cell image void add_link(const Atom& a, const Atom& b, bool same_image) { add_oneway_link(a, b, same_image); add_oneway_link(b, a, same_image); } - // add_monomer_bonds() is not aware of modifications associated with links. - // Modifications that add bonds are rare, but to be more correct, use bonds - // from topology (Topo::bonds). + /// @brief Add bonds from monomer library restraints to the index + /// @details Populates the bond index with standard bonds defined for each + /// residue type in the monomer library. Does not handle custom + /// bond modifications; for more accurate results, use bonds from + /// topology (Topo::bonds) which accounts for modifications. + /// @param monlib the monomer library containing bond definitions void add_monomer_bonds(MonLib& monlib) { for (const Chain& chain : model.chains) for (const Residue& res : chain.residues) { @@ -65,10 +89,23 @@ struct BondIndex { } } + /// @brief Check if two atoms are directly bonded + /// @param a the first atom + /// @param b the second atom + /// @param same_image whether both atoms should be in the same unit cell image + /// @return true if a direct bond exists between the atoms bool are_linked(const Atom& a, const Atom& b, bool same_image) const { return in_vector({b.serial, same_image}, index.at(a.serial)); } + /// @brief Calculate the minimum graph distance between two atoms + /// @details Uses breadth-first search to find the shortest path through bonds. + /// Automatically handles transitions between unit cell images. + /// @param a the starting atom + /// @param b the target atom + /// @param same_image whether both atoms should be in the same unit cell image + /// @param max_distance maximum distance to search (default 4) + /// @return the graph distance in bonds, or (max_distance + 1) if no path exists int graph_distance(const Atom& a, const Atom& b, bool same_image, int max_distance=4) const { std::vector neighbors(1, {a.serial, true}); diff --git a/include/gemmi/dirwalk.hpp b/include/gemmi/dirwalk.hpp index b1b2213c9..f49214228 100644 --- a/include/gemmi/dirwalk.hpp +++ b/include/gemmi/dirwalk.hpp @@ -107,9 +107,17 @@ inline int utf8_tinydir_file_open(tinydir_file* file, const char* path) { } // namespace impl +/// Template class for iterating over files and directories in a directory tree. +/// @brief Directory tree walker (depth-first, alphabetical order). +/// @tparam FileOnly if true, iterate over files only; if false, include directories +/// @tparam Filter predicate type to filter which files/directories to visit template class DirWalk { public: + /// Construct a DirWalk starting from a given path. + /// @brief Initialize directory walker. + /// @param path root directory or file path to start traversal + /// @param try_pdbid expansion type char (e.g. 'M'), or '\0' to skip PDB code expansion explicit DirWalk(const char* path, char try_pdbid='\0') { if (impl::utf8_tinydir_file_open(&top_, path) != -1) return; @@ -121,18 +129,31 @@ class DirWalk { } sys_fail("Cannot open " + std::string(path)); } + /// Construct a DirWalk from a std::string path. + /// @brief Initialize directory walker from string path. + /// @param path root directory or file path to start traversal + /// @param try_pdbid expansion type char (e.g. 'M'), or '\0' to skip PDB code expansion explicit DirWalk(const std::string& path, char try_pdbid='\0') : DirWalk(path.c_str(), try_pdbid) {} + /// Destructor. + /// @brief Clean up resources. ~DirWalk() { for (auto& d : dirs_) tinydir_close(&d.second); } + /// Push a subdirectory onto the traversal stack. + /// @brief Record current position and open a new subdirectory. + /// @param cur_pos index of current file in parent directory + /// @param path subdirectory path to open void push_dir(size_t cur_pos, const _tinydir_char_t* path) { dirs_.emplace_back(); dirs_.back().first = cur_pos; if (tinydir_open_sorted(&dirs_.back().second, path) == -1) sys_fail("Cannot open directory " + as_utf8(path)); } + /// Pop a subdirectory from the traversal stack. + /// @brief Close current directory and return to parent. + /// @return position (index) to resume in parent directory size_t pop_dir() { assert(!dirs_.empty()); size_t old_pos = dirs_.back().first; @@ -141,12 +162,20 @@ class DirWalk { return old_pos; } + /// Iterator for directory tree traversal. + /// @brief Depth-first iterator over files and directories. struct Iter { DirWalk& walk; size_t cur; + /// Get reference to current directory. + /// @brief Access current tinydir_dir structure. + /// @return reference to the current directory being traversed const tinydir_dir& get_dir() const { return walk.dirs_.back().second; } + /// Get current file/directory entry. + /// @brief Access the current tinydir_file structure. + /// @return reference to current file or directory being traversed const tinydir_file& get() const { if (walk.dirs_.empty()) return walk.top_; @@ -154,16 +183,27 @@ class DirWalk { return get_dir()._files[cur]; } + /// Dereference iterator to get file path. + /// @brief Get the full path of current file/directory. + /// @return current file/directory path as UTF-8 string std::string operator*() const { return as_utf8(get().path); } - // checks for "." and ".." + /// Check if name is "." or "..". + /// @brief Test if name is a special directory reference. + /// @param name filename to check + /// @return true if name is "." or ".." bool is_special(const _tinydir_char_t* name) const { return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); } + /// Get current traversal depth. + /// @brief Return the nesting level in the directory tree. + /// @return depth (0 for root level) size_t depth() const { return walk.dirs_.size(); } + /// Advance to next file/directory (internal use). + /// @brief Perform one step of depth-first traversal. void next() { // depth first const tinydir_file& tf = get(); if (tf.is_dir) { @@ -182,6 +222,8 @@ class DirWalk { } } + /// Pre-increment operator. + /// @brief Advance to next matching file/directory in traversal. void operator++() { for (;;) { next(); @@ -194,11 +236,21 @@ class DirWalk { } } - // == and != is used only to compare with end() + /// Equality comparison (for range-based for loops). + /// @brief Check if iterator equals another (compared with end()). + /// @param o other iterator to compare with + /// @return true if both reach end of traversal bool operator==(const Iter& o) const { return depth()==0 && cur == o.cur; } + /// Inequality comparison (for range-based for loops). + /// @brief Check if iterator differs from another (compared with end()). + /// @param o other iterator to compare with + /// @return true if iterators are at different positions bool operator!=(const Iter& o) const { return !operator==(o); } }; + /// Get iterator to beginning of traversal. + /// @brief Create iterator for range-based for loop. + /// @return iterator pointing to first file/directory Iter begin() { Iter it{*this, 0}; if (FileOnly && !is_single_file()) // i.e. the top item is a directory @@ -206,7 +258,13 @@ class DirWalk { return it; } + /// Get iterator to end of traversal. + /// @brief Sentinel iterator for range-based for loop. + /// @return iterator marking end of traversal Iter end() { return Iter{*this, 1}; } + /// Check if root path is a single file. + /// @brief Test whether the root is a file rather than directory. + /// @return true if root path is a file (not a directory) bool is_single_file() { return !top_.is_dir; } private: @@ -217,12 +275,22 @@ class DirWalk { Filter filter; }; +/// @brief Type alias for walking CIF (mmCIF and SF) files. using CifWalk = DirWalk; +/// @brief Type alias for walking mmCIF files only. using MmCifWalk = DirWalk; +/// @brief Type alias for walking PDB files. using PdbWalk = DirWalk; +/// @brief Type alias for walking coordinate files (mmCIF, PDB, ENT). using CoorFileWalk = DirWalk; +/// @brief Directory walker with glob pattern matching. +/// @details Iterates over files matching a wildcard pattern. struct GlobWalk : public DirWalk { + /// Construct a GlobWalk with path and glob pattern. + /// @brief Initialize glob-filtered directory walker. + /// @param path root directory to start traversal + /// @param glob glob pattern for filtering files GlobWalk(const std::string& path, const std::string& glob) : DirWalk(path) { filter.pattern = glob; } diff --git a/include/gemmi/dsn6.hpp b/include/gemmi/dsn6.hpp index 8af5010f7..d0be945ca 100644 --- a/include/gemmi/dsn6.hpp +++ b/include/gemmi/dsn6.hpp @@ -30,7 +30,16 @@ inline int16_t read_dsn6_i16(const char* buf, size_t buf_size, } // namespace impl -/// Reads a DSN6/BRIX map from memory into a Grid and returns statistics. +/// @brief Read a DSN6/BRIX electron density map from memory +/// @details Parses the DSN6/BRIX format (used for density maps) from a binary buffer +/// and populates a grid with the density values. Automatically detects +/// endianness from the header. Reads unit cell parameters and scales data +/// appropriately. +/// @param buf pointer to the buffer containing DSN6 data +/// @param size size of the buffer in bytes (must be at least 512 for header) +/// @param grid output grid to be populated with density values +/// @return statistics of the loaded density data (min, max, mean, rms, NaN count) +/// @throws std::runtime_error if format is invalid or data is truncated inline DataStats read_dsn6_from_memory(const char* buf, size_t size, Grid& grid) { if (size < 512) @@ -113,7 +122,10 @@ inline DataStats read_dsn6_from_memory(const char* buf, size_t size, return calculate_data_statistics(grid.data); } -/// Reads a DSN6/BRIX map from a file. Returns the grid. +/// @brief Read a DSN6/BRIX electron density map from a file +/// @param path file path to the DSN6/BRIX format density map +/// @return a Grid containing the loaded density data +/// @throws std::runtime_error if file cannot be read or format is invalid inline Grid read_dsn6_map(const std::string& path) { CharArray buf = read_file_into_buffer(path); Grid grid; diff --git a/include/gemmi/enumstr.hpp b/include/gemmi/enumstr.hpp index e804895ea..1352ecc4a 100644 --- a/include/gemmi/enumstr.hpp +++ b/include/gemmi/enumstr.hpp @@ -11,6 +11,9 @@ namespace gemmi { +/// @brief Convert EntityType enum to mmCIF string representation +/// @param entity_type the entity type to convert +/// @return mmCIF string: "polymer", "branched", "non-polymer", "water", or "?" inline const char* entity_type_to_string(EntityType entity_type) { switch (entity_type) { case EntityType::Polymer: return "polymer"; @@ -21,6 +24,9 @@ inline const char* entity_type_to_string(EntityType entity_type) { } } +/// @brief Convert mmCIF string to EntityType enum +/// @param t the mmCIF entity type string +/// @return EntityType enum value; EntityType::Unknown if string is not recognized inline EntityType entity_type_from_string(const std::string& t) { if (t == "polymer") return EntityType::Polymer; if (t == "branched") return EntityType::Branched; @@ -30,6 +36,9 @@ inline EntityType entity_type_from_string(const std::string& t) { } +/// @brief Convert PolymerType enum to mmCIF string representation +/// @param polymer_type the polymer type to convert +/// @return mmCIF string representation of the polymer type inline const char* polymer_type_to_string(PolymerType polymer_type) { switch (polymer_type) { case PolymerType::PeptideL: return "polypeptide(L)"; @@ -47,6 +56,9 @@ inline const char* polymer_type_to_string(PolymerType polymer_type) { } } +/// @brief Convert mmCIF string to PolymerType enum +/// @param t the mmCIF polymer type string +/// @return PolymerType enum value; PolymerType::Unknown if string is not recognized inline PolymerType polymer_type_from_string(const std::string& t) { if (t == "polypeptide(L)") return PolymerType::PeptideL; if (t == "polydeoxyribonucleotide") return PolymerType::Dna; @@ -63,6 +75,9 @@ inline PolymerType polymer_type_from_string(const std::string& t) { } +/// @brief Convert Connection::Type enum to mmCIF string representation +/// @param t the connection type to convert +/// @return mmCIF string: "covale", "disulf", "hydrog", "metalc", or "." inline const char* connection_type_to_string(Connection::Type t) { static constexpr const char* type_ids[] = { "covale", "disulf", "hydrog", "metalc", "." @@ -70,6 +85,9 @@ inline const char* connection_type_to_string(Connection::Type t) { return type_ids[t]; } +/// @brief Convert mmCIF string to Connection::Type enum +/// @param t the mmCIF connection type string +/// @return Connection::Type enum value; Connection::Unknown if string is not recognized inline Connection::Type connection_type_from_string(const std::string& t) { for (int i = 0; i != Connection::Unknown; ++i) if (connection_type_to_string(Connection::Type(i)) == t) @@ -77,6 +95,9 @@ inline Connection::Type connection_type_from_string(const std::string& t) { return Connection::Unknown; } +/// @brief Convert SoftwareItem::Classification enum to string representation +/// @param c the software classification to convert +/// @return classification string such as "data collection", "refinement", etc. inline std::string software_classification_to_string(SoftwareItem::Classification c) { switch (c) { @@ -93,6 +114,9 @@ std::string software_classification_to_string(SoftwareItem::Classification c) { unreachable(); } +/// @brief Convert string to SoftwareItem::Classification enum (case-insensitive) +/// @param str the classification string to parse +/// @return SoftwareItem::Classification enum value; SoftwareItem::Unspecified if not recognized inline SoftwareItem::Classification software_classification_from_string(const std::string& str) { if (iequal(str, "data collection")) return SoftwareItem::DataCollection; diff --git a/include/gemmi/fail.hpp b/include/gemmi/fail.hpp index 10596385b..4ce21975d 100644 --- a/include/gemmi/fail.hpp +++ b/include/gemmi/fail.hpp @@ -55,28 +55,46 @@ namespace gemmi { +/// @brief Throw a std::runtime_error with the given message. +/// @param msg error message [[noreturn]] inline void fail(const std::string& msg) { throw std::runtime_error(msg); } +/// @brief Variadic fail that concatenates arguments and throws std::runtime_error. +/// @tparam T type of first argument +/// @tparam Args types of remaining arguments +/// @param str accumulating error message +/// @param arg1 first argument to append +/// @param args remaining arguments to append template [[noreturn]] void fail(std::string&& str, T&& arg1, Args&&... args) { str += arg1; fail(std::move(str), std::forward(args)...); } +/// @brief Throw a std::runtime_error with the given message (c-string overload). +/// @param msg error message (null-terminated C-string) [[noreturn]] inline GEMMI_COLD void fail(const char* msg) { throw std::runtime_error(msg); } +/// @brief Throw a std::system_error with current errno. +/// @param msg error message +/// @details The system error code is read from errno at the time of the call. [[noreturn]] inline GEMMI_COLD void sys_fail(const std::string& msg) { throw std::system_error(errno, std::system_category(), msg); } +/// @brief Throw a std::system_error with current errno (c-string overload). +/// @param msg error message (null-terminated C-string) +/// @details The system error code is read from errno at the time of the call. [[noreturn]] inline GEMMI_COLD void sys_fail(const char* msg) { throw std::system_error(errno, std::system_category(), msg); } -// unreachable() is used to silence GCC -Wreturn-type and hint the compiler +/// @brief Mark a code path as unreachable. +/// @details Calls compiler-specific unreachable builtins (e.g., __builtin_unreachable for GCC/Clang, __assume(0) for MSVC). +/// Used to silence warnings and provide optimization hints. [[noreturn]] inline void unreachable() { #if defined(__GNUC__) || defined(__clang__) __builtin_unreachable(); diff --git a/include/gemmi/fileutil.hpp b/include/gemmi/fileutil.hpp index b91c55d51..2ab4e6cb7 100644 --- a/include/gemmi/fileutil.hpp +++ b/include/gemmi/fileutil.hpp @@ -20,7 +20,11 @@ namespace gemmi { -// strip directory and suffixes from filename +/// Extract basename from path, optionally stripping directory and suffixes. +/// @brief Extract filename with optional suffix removal. +/// @param path full file path +/// @param exts list of file extensions to strip from basename +/// @return basename without directory path and specified extensions inline std::string path_basename(const std::string& path, std::initializer_list exts) { size_t pos = path.find_last_of("\\/"); @@ -36,17 +40,29 @@ inline std::string path_basename(const std::string& path, // file operations -/// deleter for fileptr_t +/// @brief Custom deleter for FILE* pointers. +/// @details Conditionally calls std::fclose based on use_fclose flag. struct needs_fclose { + /// Whether to call fclose when deleting. bool use_fclose; + /// Delete operator that optionally calls std::fclose. + /// @brief Delete FILE* pointer if use_fclose is true. + /// @param f FILE pointer to delete void operator()(std::FILE* f) const noexcept { if (use_fclose) std::fclose(f); } }; +/// @brief Unique pointer to FILE with custom deleter. typedef std::unique_ptr fileptr_t; +/// Open a file and return a managed pointer. +/// @brief Open file with UTF-8 filename support and error handling. +/// @param path UTF-8 encoded file path +/// @param mode file open mode (e.g., "rb", "wb") +/// @return managed FILE pointer that auto-closes on destruction +/// @throws std::runtime_error if file cannot be opened inline fileptr_t file_open(const char* path, const char* mode) { std::FILE* file; #if defined(_WIN32) && !defined(GEMMI_USE_FOPEN) @@ -61,7 +77,11 @@ inline fileptr_t file_open(const char* path, const char* mode) { return fileptr_t(file, needs_fclose{true}); } -// like file_open() but returns null fileptr_t instead of throwing +/// Open a file, returning null pointer on failure instead of throwing. +/// @brief Open file without exception on error. +/// @param path UTF-8 encoded file path +/// @param mode file open mode (e.g., "rb", "wb") +/// @return managed FILE pointer or empty pointer if open fails inline fileptr_t file_open_or_null(const char* path, const char* mode) { std::FILE* file; #if defined(_WIN32) && !defined(GEMMI_USE_FOPEN) @@ -74,7 +94,12 @@ inline fileptr_t file_open_or_null(const char* path, const char* mode) { return fileptr_t(file, needs_fclose{true}); } -// helper function for treating "-" as stdin or stdout +/// Open a file, treating "-" as stdin/stdout. +/// @brief Open file or return predefined stream for dash character. +/// @param path file path, or "-" for stdin/stdout +/// @param mode file open mode (e.g., "rb" or "wb") +/// @param dash_stream stream to use when path is "-" +/// @return managed FILE pointer (either opened file or dash_stream) inline fileptr_t file_open_or(const char* path, const char* mode, std::FILE* dash_stream) { if (path[0] == '-' && path[1] == '\0') @@ -82,6 +107,12 @@ inline fileptr_t file_open_or(const char* path, const char* mode, return file_open(path, mode); } +/// Get file size by seeking to end and back. +/// @brief Determine file size in bytes. +/// @param f open FILE pointer +/// @param path file path (used only for error messages) +/// @return file size in bytes +/// @throws std::runtime_error if seek or tell operations fail inline std::size_t file_size(std::FILE* f, const std::string& path) { if (std::fseek(f, 0, SEEK_END) != 0) sys_fail(path + ": fseek failed"); @@ -93,23 +124,36 @@ inline std::size_t file_size(std::FILE* f, const std::string& path) { return length; } -// helper function for working with binary files +// helper functions for working with binary files + +/// Check if platform is little-endian. +/// @brief Test platform byte order. +/// @return true if platform is little-endian, false if big-endian inline bool is_little_endian() { std::uint32_t x = 1; return *reinterpret_cast(&x) == 1; } +/// Swap bytes in a 2-byte value. +/// @brief Reverse byte order of a short integer. +/// @param start pointer to 2-byte value to swap in-place inline void swap_two_bytes(void* start) { char* bytes = static_cast(start); std::swap(bytes[0], bytes[1]); } +/// Swap bytes in a 4-byte value. +/// @brief Reverse byte order of a 32-bit integer. +/// @param start pointer to 4-byte value to swap in-place inline void swap_four_bytes(void* start) { char* bytes = static_cast(start); std::swap(bytes[0], bytes[3]); std::swap(bytes[1], bytes[2]); } +/// Swap bytes in an 8-byte value. +/// @brief Reverse byte order of a 64-bit integer or double. +/// @param start pointer to 8-byte value to swap in-place inline void swap_eight_bytes(void* start) { char* bytes = static_cast(start); std::swap(bytes[0], bytes[7]); @@ -119,18 +163,44 @@ inline void swap_eight_bytes(void* start) { } +/// @brief Dynamically allocated character buffer. +/// @details Manages memory using std::malloc/std::realloc/std::free. class CharArray { std::unique_ptr ptr_; size_t size_; public: + /// Create an empty buffer. + /// @brief Default constructor for zero-sized buffer. CharArray() : ptr_(nullptr, &std::free), size_(0) {} + /// Create a buffer of specified size. + /// @brief Allocate buffer of given size. + /// @param n buffer size in bytes explicit CharArray(size_t n) : ptr_((char*)std::malloc(n), &std::free), size_(n) {} + /// Check if buffer is allocated. + /// @brief Test whether buffer contains valid memory. + /// @return true if buffer is not null explicit operator bool() const { return (bool)ptr_; } + /// Access buffer data. + /// @brief Get writable pointer to buffer. + /// @return pointer to buffer data char* data() { return ptr_.get(); } + /// Access buffer data (const). + /// @brief Get read-only pointer to buffer. + /// @return const pointer to buffer data const char* data() const { return ptr_.get(); } + /// Get buffer size. + /// @brief Return current buffer size in bytes. + /// @return buffer size size_t size() const { return size_; } + /// Change recorded buffer size. + /// @brief Update internal size without reallocating. + /// @param n new size value void set_size(size_t n) { size_ = n; } + /// Resize buffer to new size. + /// @brief Reallocate buffer to given size. + /// @param n new buffer size in bytes + /// @throws std::runtime_error if reallocation fails and n is non-zero void resize(size_t n) { char* new_ptr = (char*) std::realloc(ptr_.get(), n); if (!new_ptr && n != 0) @@ -140,7 +210,10 @@ class CharArray { size_ = n; } - // Remove first n bytes making space for more text at the returned position. + /// Remove first n bytes and shift remaining data. + /// @brief Roll buffer forward by removing leading bytes. + /// @param n number of bytes to remove from start + /// @return pointer to space at end for new data char* roll(size_t n) { assert(n <= size()); std::memmove(data(), data() + n, n); @@ -149,7 +222,11 @@ class CharArray { }; -/// reading file into a memory buffer (optimized: uses fseek to determine file size) +/// Read entire file into a memory buffer. +/// @brief Load file contents into CharArray (uses fseek for size determination). +/// @param path UTF-8 file path to read +/// @return CharArray containing file data +/// @throws std::runtime_error if file cannot be opened or read inline CharArray read_file_into_buffer(const std::string& path) { fileptr_t f = file_open(path.c_str(), "rb"); size_t size = file_size(f.get(), path); @@ -159,6 +236,10 @@ inline CharArray read_file_into_buffer(const std::string& path) { return buffer; } +/// Read stdin into a memory buffer. +/// @brief Load standard input into CharArray. +/// @return CharArray containing stdin data +/// @throws std::runtime_error if read operation fails inline CharArray read_stdin_into_buffer() { size_t n = 0; CharArray buffer(16 * 1024); @@ -173,6 +254,11 @@ inline CharArray read_stdin_into_buffer() { return buffer; } +/// Read input (file, gzip, or stdin) into a buffer. +/// @brief Intelligently read various input sources into CharArray. +/// @tparam T input type (typically BasicInput or derived) +/// @param input input object with is_compressed(), is_stdin(), and path() methods +/// @return CharArray containing input data template inline CharArray read_into_buffer(T&& input) { if (input.is_compressed()) diff --git a/include/gemmi/fstream.hpp b/include/gemmi/fstream.hpp index a145aa9f2..25887f459 100644 --- a/include/gemmi/fstream.hpp +++ b/include/gemmi/fstream.hpp @@ -25,6 +25,11 @@ namespace gemmi { +/// Open a file stream with UTF-8 filename support. +/// @brief Helper to open streams with UTF-8 paths on Windows. +/// @tparam T stream type (std::ofstream or std::ifstream) +/// @param ptr pointer to stream object to open +/// @param filename UTF-8 encoded filename template inline void open_stream_from_utf8_path(T& ptr, const std::string& filename) { #if defined(_MSC_VER) @@ -40,7 +45,13 @@ inline void open_stream_from_utf8_path(T& ptr, const std::string& filename) { // note: move of std::ofstream doesn't work in GCC 4.8. +/// @brief Output file stream wrapper with UTF-8 filename support. +/// @details Handles filename "-" as stdout and UTF-8 paths on Windows. struct Ofstream { + /// Open output file with optional dash handling. + /// @brief Construct output stream. + /// @param filename UTF-8 file path (or "-" to use dash_stream) + /// @param dash pointer to stream to use if filename is "-" (typically std::cout) Ofstream(const std::string& filename, std::ostream* dash=nullptr) { if (filename.size() == 1 && filename[0] == '-' && dash) { ptr_ = dash; @@ -53,7 +64,13 @@ struct Ofstream { ptr_ = keeper_.get(); } + /// Get pointer to stream. + /// @brief Access stream object via pointer. + /// @return pointer to std::ostream std::ostream* operator->() { return ptr_; } + /// Get reference to stream. + /// @brief Access stream object by reference. + /// @return reference to std::ostream std::ostream& ref() { return *ptr_; } private: @@ -61,7 +78,13 @@ struct Ofstream { std::ostream* ptr_; }; +/// @brief Input file stream wrapper with UTF-8 filename support. +/// @details Handles filename "-" as stdin and UTF-8 paths on Windows. struct Ifstream { + /// Open input file with optional dash handling. + /// @brief Construct input stream. + /// @param filename UTF-8 file path (or "-" to use dash_stream) + /// @param dash pointer to stream to use if filename is "-" (typically std::cin) Ifstream(const std::string& filename, std::istream* dash=nullptr) { if (filename.size() == 1 && filename[0] == '-' && dash) { ptr_ = dash; @@ -74,7 +97,13 @@ struct Ifstream { ptr_ = keeper_.get(); } + /// Get pointer to stream. + /// @brief Access stream object via pointer. + /// @return pointer to std::istream std::istream* operator->() { return ptr_; } + /// Get reference to stream. + /// @brief Access stream object by reference. + /// @return reference to std::istream std::istream& ref() { return *ptr_; } private: diff --git a/include/gemmi/glob.hpp b/include/gemmi/glob.hpp index 58d90aa35..b8f56d677 100644 --- a/include/gemmi/glob.hpp +++ b/include/gemmi/glob.hpp @@ -9,7 +9,12 @@ namespace gemmi { -// linear-time glob matching: https://research.swtch.com/glob +/// Match string against glob pattern with `*` and `?` wildcards. +/// @brief Test if string matches a glob pattern. +/// @details Linear-time algorithm from https://research.swtch.com/glob +/// @param pattern glob pattern (`*` matches any sequence, `?` matches single char) +/// @param str string to match against pattern +/// @return true if str matches the pattern inline bool glob_match(const std::string& pattern, const std::string& str) { size_t pat_next = 0; size_t str_next = std::string::npos; diff --git a/include/gemmi/gz.hpp b/include/gemmi/gz.hpp index b4edc0166..2ef421be8 100644 --- a/include/gemmi/gz.hpp +++ b/include/gemmi/gz.hpp @@ -11,36 +11,95 @@ namespace gemmi { +/// @brief String describing zlib version and build information. GEMMI_DLL extern const char* const zlib_description; +/// Estimate uncompressed size of a gzipped file. +/// @brief Estimate the decompressed size of a .gz file. +/// @param path path to gzipped file +/// @return estimated uncompressed size in bytes GEMMI_DLL size_t estimate_uncompressed_size(const std::string& path); -// the same interface as FileStream and MemoryStream +/// @brief Stream wrapper for reading gzipped files. +/// @details Implements AnyStream interface for transparent gzip reading (using zlib). struct GEMMI_DLL GzStream final : public AnyStream { + /// Create a gzip stream from a zlib gzFile pointer. + /// @brief Construct stream from gzFile handle. + /// @param f_ opaque gzFile pointer GzStream(void* f_) : f(f_) {} + /// Read a line from the stream. + /// @brief Read next line into buffer. + /// @param line buffer to store line + /// @param size maximum bytes to read (including null terminator) + /// @return pointer to line or null if end of file char* gets(char* line, int size) override; + /// Read a single character. + /// @brief Get next byte from stream. + /// @return character as int, or -1 for end of file int getc() override; + /// Read a block of data. + /// @brief Read specified number of bytes. + /// @param buf buffer to read into + /// @param len number of bytes to read + /// @return true if exactly len bytes were read bool read(void* buf, size_t len) override; + /// Skip forward in stream. + /// @brief Advance stream position without reading. + /// @param n number of bytes to skip + /// @return true if skip succeeded bool skip(size_t n) override; + /// Get current position. + /// @brief Report stream position. + /// @return current byte offset in stream long tell() override; + /// Read remainder of stream. + /// @brief Read all remaining data as string. + /// @return remaining stream contents as std::string std::string read_rest() override; private: void* f; // implementation detail }; +/// @brief Input source that transparently handles gzipped files. +/// @details Manages both regular and gzipped files with automatic detection. class GEMMI_DLL MaybeGzipped : public BasicInput { public: + /// Open a file (compressed or uncompressed). + /// @brief Initialize reader for file that may be gzipped. + /// @param path file path (may end in .gz) explicit MaybeGzipped(const std::string& path); + /// Close file resources. + /// @brief Destructor. ~MaybeGzipped(); + /// Read from gzipped file with error checking. + /// @brief Read bytes from gzipped stream. + /// @param buf buffer to read into + /// @param len number of bytes to read + /// @return number of bytes read + /// @throws std::runtime_error on gzip read error size_t gzread_checked(void* buf, size_t len); + /// Check if file is gzip compressed. + /// @brief Test whether file has .gz extension. + /// @return true if path ends with .gz bool is_compressed() const { return iends_with(path(), ".gz"); } + /// Get path without .gz extension. + /// @brief Remove .gz suffix if present. + /// @return path without extension, or original path if not gzipped std::string basepath() const { return is_compressed() ? path().substr(0, path().size() - 3) : path(); } + /// Decompress entire file into buffer. + /// @brief Load gzipped or plain file contents into memory. + /// @param limit maximum decompressed size (0 = unlimited) + /// @return CharArray with file contents + /// @throws std::runtime_error if decompression fails or size limit exceeded CharArray uncompress_into_buffer(size_t limit=0); + /// Create a stream reader for this file. + /// @brief Create appropriate stream object (GzStream or FileStream). + /// @return unique_ptr to AnyStream for reading file std::unique_ptr create_stream(); private: diff --git a/include/gemmi/input.hpp b/include/gemmi/input.hpp index 2bf505f76..0b69c1dec 100644 --- a/include/gemmi/input.hpp +++ b/include/gemmi/input.hpp @@ -14,20 +14,44 @@ namespace gemmi { -// base class for FileStream, MemoryStream and GzStream +/// @brief Base class for stream abstractions (FileStream, MemoryStream, GzStream). struct AnyStream { virtual ~AnyStream() = default; - virtual char* gets(char* line, int size) = 0; // for pdb, copy_line() - virtual int getc() = 0; // for copy_line() - virtual bool read(void* buf, size_t len) = 0; // for ccp4, mtz - - // these are not used in GzStream because MemoryStream is used for mtz - virtual long tell() = 0; // temporary, for testing - virtual bool skip(size_t n) = 0; // for reading mtz without data - virtual std::string read_rest() { return {}; } // for mtz (appendix) - - size_t copy_line(char* line, int size) { // for pdb, xds_ascii + /// @brief Read a line of text into a buffer. + /// @param line Output buffer for the line + /// @param size Maximum number of characters to read + /// @return Pointer to line on success, nullptr if end of stream reached + virtual char* gets(char* line, int size) = 0; + + /// @brief Read a single character from the stream. + /// @return Next character, or EOF if end of stream reached + virtual int getc() = 0; + + /// @brief Read a block of binary data. + /// @param buf Output buffer + /// @param len Number of bytes to read + /// @return True if successfully read exactly len bytes, false otherwise + virtual bool read(void* buf, size_t len) = 0; + + /// @brief Get current position in the stream. + /// @return Current byte offset + virtual long tell() = 0; + + /// @brief Skip ahead in the stream. + /// @param n Number of bytes to skip + /// @return True if skip succeeded + virtual bool skip(size_t n) = 0; + + /// @brief Read remaining data in the stream. + /// @return String containing remaining data, or empty string if none + virtual std::string read_rest() { return {}; } + + /// @brief Read a line and discard any overflow. + /// @param line Output buffer + /// @param size Maximum characters to read + /// @return Length of line read (including newline if present) + size_t copy_line(char* line, int size) { if (!gets(line, size)) return 0; size_t len = std::strlen(line); @@ -39,14 +63,35 @@ struct AnyStream { }; }; +/// @brief Stream abstraction for reading from files or stdin. struct FileStream final : public AnyStream { + /// @brief Open a file stream from a FILE* pointer. + /// @param f_ Existing FILE* pointer (not closed on destruction) FileStream(std::FILE* f_) : f(f_, needs_fclose{false}) {} + + /// @brief Open a file stream from a file path. + /// @param path File path (use "-" for stdin) + /// @param mode File open mode ("rb", "r", etc.) FileStream(const char* path, const char* mode) : f(file_open_or(path, mode, stdin)) {} + /// @brief Read a line of text from the file. + /// @param line Output buffer + /// @param size Maximum characters to read + /// @return Pointer to line, or nullptr if at end of file char* gets(char* line, int size) override { return std::fgets(line, size, f.get()); } + + /// @brief Read a single character from the file. + /// @return Next character, or EOF if at end of file int getc() override { return std::fgetc(f.get()); } + + /// @brief Read a block of binary data from the file. + /// @param buf Output buffer + /// @param len Number of bytes to read + /// @return True if successfully read exactly len bytes bool read(void* buf, size_t len) override { return std::fread(buf, len, 1, f.get()) == 1; } + /// @brief Read all remaining data from current position to end of file. + /// @return String containing remaining data std::string read_rest() override { std::string ret; int c = std::fgetc(f.get()); @@ -63,10 +108,15 @@ struct FileStream final : public AnyStream { return ret; } + /// @brief Get current file position. + /// @return Current byte offset in file long tell() override { return std::ftell(f.get()); } + /// @brief Skip ahead in the file. + /// @param n Number of bytes to skip + /// @return True if successfully skipped bool skip(size_t n) override { #if defined(_MSC_VER) int result = _fseeki64(f.get(), (std::ptrdiff_t)n, SEEK_CUR); @@ -92,10 +142,18 @@ struct FileStream final : public AnyStream { fileptr_t f; }; +/// @brief Stream abstraction for reading from memory buffers. struct MemoryStream final : public AnyStream { + /// @brief Create a stream from a memory buffer. + /// @param start_ Pointer to start of buffer + /// @param size Size of buffer in bytes MemoryStream(const char* start_, size_t size) : start(start_), end(start_ + size), cur(start_) {} + /// @brief Read a line of text from the buffer. + /// @param line Output buffer + /// @param size Maximum characters to read + /// @return Pointer to line, or nullptr if at end of buffer char* gets(char* line, int size) override { --size; // fgets reads in at most one less than size characters if (cur >= end) @@ -109,8 +167,15 @@ struct MemoryStream final : public AnyStream { cur += len; return line; } + + /// @brief Read a single character from the buffer. + /// @return Next character, or EOF if at end of buffer int getc() override { return cur < end ? *cur++ : EOF; } + /// @brief Read a block of binary data from the buffer. + /// @param buf Output buffer + /// @param len Number of bytes to read + /// @return True if successfully read exactly len bytes bool read(void* buf, size_t len) override { if (cur + len > end) return false; @@ -119,15 +184,23 @@ struct MemoryStream final : public AnyStream { return true; } + /// @brief Read all remaining data from current position to end of buffer. + /// @return String containing remaining data std::string read_rest() override { const char* last = cur; cur = end; return std::string(last, end); } + /// @brief Get current position in the buffer. + /// @return Current byte offset long tell() override { return cur - start; } + + /// @brief Skip ahead in the buffer. + /// @param n Number of bytes to skip + /// @return True if skip did not exceed buffer end bool skip(size_t n) override { cur += n; return cur < end; @@ -139,23 +212,36 @@ struct MemoryStream final : public AnyStream { const char* cur; }; +/// @brief Input source abstraction for file paths. class BasicInput { public: + /// @brief Initialize with a file path. + /// @param path File path (use "-" for stdin) explicit BasicInput(const std::string& path) : path_(path) {} + /// @brief Get the input path. + /// @return Input path const std::string& path() const { return path_; } + + /// @brief Get the base path for reading (same as path() for non-compressed files). + /// @return Base path const std::string& basepath() const { return path_; } - // Does the path stands for stdin? - // Each reading function needs to call it (some functions use stdin - // and some std::cin, so we don't try to unify it here). + /// @brief Check if this input source is stdin. + /// @return True if path is "-" bool is_stdin() const { return path() == "-"; } - // providing the same interface as MaybeGzipped + /// @brief Check if the input source is compressed. + /// @return False for BasicInput (always uncompressed) bool is_compressed() const { return false; } - // for reading (uncompressing into memory) the whole file at once + + /// @brief Read whole file into memory (for compatibility with MaybeGzipped interface). + /// @details The size parameter is unused; present for interface compatibility. + /// @return Empty CharArray (no decompression for BasicInput) CharArray uncompress_into_buffer(size_t=0) { return {}; } + /// @brief Create a stream for sequential reading. + /// @return Unique pointer to a FileStream std::unique_ptr create_stream() { return std::unique_ptr(new FileStream(path().c_str(), "rb")); } diff --git a/include/gemmi/iterator.hpp b/include/gemmi/iterator.hpp index 824472f96..20e85247e 100644 --- a/include/gemmi/iterator.hpp +++ b/include/gemmi/iterator.hpp @@ -22,7 +22,8 @@ namespace gemmi { #pragma nv_diag_suppress = conversion_function_not_usable #endif -// implements concept BidirectionalIterator +/// @brief Generic bidirectional iterator adapter implementing std::bidirectional_iterator_tag. +/// @tparam Policy the iteration policy class defining increment/decrement/dereference behavior template struct BidirIterator : Policy { using value_type = typename std::remove_cv::type; @@ -31,76 +32,117 @@ struct BidirIterator : Policy { using reference = typename Policy::reference; using iterator_category = std::bidirectional_iterator_tag; + /// @brief Default constructor. BidirIterator() = default; + /// @brief Construct from policy. + /// @param p policy instance BidirIterator(Policy&& p) : Policy(p) {} + /// @brief Pre-increment operator. BidirIterator& operator++() { Policy::increment(); return *this; } + /// @brief Post-increment operator. BidirIterator operator++(int) { BidirIterator x = *this; ++*this; return x; } + /// @brief Pre-decrement operator. BidirIterator& operator--() { Policy::decrement(); return *this; } + /// @brief Post-decrement operator. BidirIterator operator--(int) { BidirIterator x = *this; --*this; return x; } + /// @brief Equality comparison. bool operator==(const BidirIterator &o) const { return Policy::equal(o); } + /// @brief Inequality comparison. bool operator!=(const BidirIterator &o) const { return !Policy::equal(o); } + /// @brief Dereference operator. reference operator*() { return Policy::dereference(); } + /// @brief Member access operator. pointer operator->() { return &Policy::dereference(); } using const_variant = BidirIterator; + /// @brief Conversion to const variant iterator. operator const_variant() const { return const_variant(static_cast(*this)); } }; +/// @brief Policy for striding iterator that skips elements by a fixed stride. template class StrideIterPolicy { public: using value_type = Value; using reference = Value&; + /// @brief Default constructor. StrideIterPolicy() : cur_(nullptr), offset_(0), stride_(0) {} + /// @brief Construct stride iterator policy. + /// @param ptr pointer to data + /// @param offset offset into current element + /// @param stride stride distance (elements per step) StrideIterPolicy(Value* ptr, std::size_t offset, size_t stride) : cur_(ptr), offset_(offset), stride_((unsigned)stride) {} + /// @brief Advance iterator by one stride. void increment() { cur_ += stride_; } + /// @brief Move iterator back by one stride. void decrement() { cur_ -= stride_; } + /// @brief Check iterator equality. bool equal(const StrideIterPolicy& o) const { return cur_ == o.cur_; } + /// @brief Dereference to element. Value& dereference() { return cur_[offset_]; } using const_policy = StrideIterPolicy; + /// @brief Conversion to const policy. operator const_policy() const { return const_policy(cur_, offset_, stride_); } private: Value* cur_; std::size_t offset_; unsigned stride_; }; +/// @brief Bidirectional iterator that strides through elements. template using StrideIter = BidirIterator>; +/// @brief Policy for indirect iterator that accesses elements through redirection. template class IndirectIterPolicy { public: using value_type = Value; using reference = Value&; + /// @brief Default constructor. IndirectIterPolicy() : redir_(nullptr) {} + /// @brief Construct indirect iterator policy. + /// @param redir redirection object supporting value_at(int) method + /// @param cur iterator into position vector IndirectIterPolicy(Redirect* redir, std::vector::const_iterator cur) : redir_(redir), cur_(cur) {} + /// @brief Advance to next position. void increment() { ++cur_; } + /// @brief Move back to previous position. void decrement() { --cur_; } + /// @brief Check iterator equality. bool equal(const IndirectIterPolicy& o) const { return cur_ == o.cur_; } + /// @brief Dereference via redirection object. Value& dereference() { return redir_->value_at(*cur_); } using const_policy = IndirectIterPolicy; + /// @brief Conversion to const policy. operator const_policy() const { return const_policy(redir_, cur_); } // TODO: what should be done with absent optional tags (*cur_ < 0)? private: Redirect* redir_; std::vector::const_iterator cur_; // points into positions }; +/// @brief Bidirectional iterator that accesses elements indirectly through redirection. template using IndirectIter = BidirIterator>; +/// @brief Policy for iterator that skips duplicate group keys. template class UniqIterPolicy { public: using value_type = Value; using reference = Value&; + /// @brief Default constructor. UniqIterPolicy() : vec_(nullptr), pos_(0) {} + /// @brief Construct uniquifying iterator policy. + /// @param vec vector with group_key() method on elements + /// @param pos starting position UniqIterPolicy(Vector* vec, std::size_t pos) : vec_(vec), pos_(pos) {} + /// @brief Move to the first element of the next group. void increment() { // move to the first element of the next group const auto& key = (*vec_)[pos_].group_key(); @@ -108,46 +150,64 @@ class UniqIterPolicy { while (pos_ != vec_->size() && (*vec_)[pos_].group_key() == key) ++pos_; } + /// @brief Move back to the first element of the previous group. void decrement() { --pos_; // now we are at the last element of the previous group const auto& key = (*vec_)[pos_].group_key(); while (pos_ != 0 && (*vec_)[pos_-1].group_key() == key) --pos_; // move to the group beginning } + /// @brief Check iterator equality. bool equal(const UniqIterPolicy& o) const { return pos_ == o.pos_; } + /// @brief Dereference to element. Value& dereference() { return (*vec_)[pos_]; } using const_policy = UniqIterPolicy; + /// @brief Conversion to const policy. operator const_policy() const { return const_policy(vec_, pos_); } private: Vector* vec_; std::size_t pos_; }; +/// @brief Bidirectional iterator that skips duplicate group keys. template using UniqIter = BidirIterator>; +/// @brief Range proxy for iterating with uniquification. template> struct UniqProxy { + /// @brief The underlying vector. Vector& vec; using iterator = UniqIter; + /// @brief Get begin iterator (first element). iterator begin() { return {{&vec, 0}}; } + /// @brief Get end iterator (one past last element). iterator end() { return {{&vec, vec.size()}}; } }; +/// @brief Const range proxy for iterating with uniquification. template> struct ConstUniqProxy { + /// @brief The underlying const vector. const Vector& vec; using iterator = UniqIter; + /// @brief Get begin const iterator (first element). iterator begin() const { return {{&vec, 0}}; } + /// @brief Get end const iterator (one past last element). iterator end() const { return {{&vec, vec.size()}}; } }; +/// @brief Policy for grouping iterator that returns spans of elements with matching group keys. template class GroupingIterPolicy { public: using value_type = Value; using reference = Value&; + /// @brief Default constructor. GroupingIterPolicy() = default; + /// @brief Construct grouping iterator policy. + /// @param span span object defining the range GroupingIterPolicy(const Value& span) : span_(span) {} + /// @brief Advance to the next group. void increment() { span_.set_begin(span_.end()); span_.set_size(0); @@ -155,6 +215,7 @@ class GroupingIterPolicy { span_.begin()->group_key() == span_.end()->group_key()) span_.set_size(span_.size() + 1); } + /// @brief Move back to the previous group. void decrement() { span_.set_begin(span_.begin() - 1); span_.set_size(1); @@ -164,68 +225,100 @@ class GroupingIterPolicy { span_.set_size(span_.size() + 1); } } + /// @brief Check iterator equality. bool equal(const GroupingIterPolicy& o) const { return span_.begin() == o.span_.begin(); } + /// @brief Dereference to span. Value& dereference() { return span_; } using const_policy = GroupingIterPolicy; + /// @brief Conversion to const policy. operator const_policy() const { return const_policy(span_); } private: Value span_; }; +/// @brief Bidirectional iterator that yields spans of elements with matching group keys. template using GroupingIter = BidirIterator>; +/// @brief Policy for filtering iterator that selects elements matching a predicate. template class FilterIterPolicy { public: using value_type = Value; using reference = Value&; + /// @brief Default constructor. FilterIterPolicy() : vec_(nullptr), pos_(0) {} + /// @brief Construct filtering iterator policy. + /// @param filter filter object with matches(const Value&) method + /// @param vec vector to filter + /// @param pos starting position FilterIterPolicy(const Filter* filter, Vector* vec, std::size_t pos) : filter_(filter), vec_(vec), pos_(pos) { while (pos_ != vec_->size() && !matches(pos_)) ++pos_; } + /// @brief Check if element at position matches filter. bool matches(std::size_t p) const { return filter_->matches((*vec_)[p]); } + /// @brief Advance to next matching element. void increment() { while (++pos_ < vec_->size() && !matches(pos_)) {} } + /// @brief Move back to previous matching element. void decrement() { while (pos_ != 0 && !matches(--pos_)) {} } + /// @brief Check iterator equality. bool equal(const FilterIterPolicy& o) const { return pos_ == o.pos_; } + /// @brief Dereference to element. Value& dereference() { return (*vec_)[pos_]; } using const_policy = FilterIterPolicy; + /// @brief Conversion to const policy. operator const_policy() const { return const_policy(vec_, pos_); } private: const Filter* filter_; Vector* vec_; std::size_t pos_; }; +/// @brief Bidirectional iterator that filters elements matching a predicate. template using FilterIter = BidirIterator>; +/// @brief Range proxy for filtering iteration. template struct FilterProxy { + /// @brief The filter predicate. const Filter& filter; + /// @brief The underlying vector. std::vector& vec; using iterator = FilterIter, Value>; + /// @brief Get begin iterator (first matching element). iterator begin() { return {{&filter, &vec, 0}}; } + /// @brief Get end iterator (one past last element). iterator end() { return {{&filter, &vec, vec.size()}}; } }; +/// @brief Const range proxy for filtering iteration. template struct ConstFilterProxy { + /// @brief The filter predicate. const Filter& filter; + /// @brief The underlying const vector. const std::vector& vec; using iterator = FilterIter, const Value>; + /// @brief Get begin const iterator (first matching element). iterator begin() const { return {{&filter, &vec, 0}}; } + /// @brief Get end const iterator (one past last element). iterator end() const { return {{&filter, &vec, vec.size()}}; } }; +/// @brief A group of items with the same group_key(), possibly sparse. template struct ItemGroup { using element_type = Item; + /// @brief Construct a group from item range. + /// @param start pointer to first item in the group + /// @param end pointer to one-past-last item in the range + /// @details Counts contiguous items with matching group_key() to determine size. ItemGroup(Item* start, const Item* end) : size_(int(end - start)), extent_(int(end - start)), start_(start) { for (const Item* i = start + 1; i != end; ++i) @@ -233,32 +326,52 @@ struct ItemGroup { --size_; } + /// @brief Iterator for accessing sparse group items. struct iterator { + /// @brief Pointer to current item. Item* ptr; + /// @brief Pointer to one-past-last item in range. const Item* end; + /// @brief Equality comparison. bool operator==(const iterator& o) const { return ptr == o.ptr; } + /// @brief Inequality comparison. bool operator!=(const iterator& o) const { return ptr != o.ptr; } + /// @brief Pre-increment operator. iterator& operator++() { const Item* prev = ptr++; while (ptr != end && ptr->group_key() != prev->group_key()) ++ptr; return *this; } + /// @brief Dereference operator. Item& operator*() { return *ptr; } + /// @brief Member access operator. Item* operator->() { return ptr; } }; + /// @brief Get begin iterator. iterator begin() { return iterator{start_, start_+extent_}; } + /// @brief Get end iterator. iterator end() { return iterator{start_+extent_, start_+extent_}; } + /// @brief Get number of items with matching group_key() (sparse count). size_t size() const { return (size_t) size_; } + /// @brief Get extent (total items in range, may include gaps). int extent() const { return extent_; } + /// @brief Check if group is empty. bool empty() const { return size_ == 0; } + /// @brief Access first item. Item& front() { return *start_; } + /// @brief Access first item (const). const Item& front() const { return *start_; } + /// @brief Access last item in extent. Item& back() { return start_[extent_ - 1]; } + /// @brief Access last item in extent (const). const Item& back() const { return start_[extent_ - 1]; } - // constant time unless sparse (extend_ > size_) + /// @brief Access i-th item with matching group_key(). + /// @param i index within group + /// @return reference to item + /// @details O(1) if dense, O(i) if sparse (gap items with different key). Item& operator[](std::size_t i) { if (size_ == extent_ || i == 0) return start_[i]; @@ -267,6 +380,9 @@ struct ItemGroup { if (--i == 0) return *ptr; } + /// @brief Access i-th item with matching group_key() (const). + /// @param i index within group + /// @return const reference to item const Item& operator[](std::size_t i) const { return const_cast(this)->operator[](i); } diff --git a/include/gemmi/logger.hpp b/include/gemmi/logger.hpp index 35fafd176..36765b373 100644 --- a/include/gemmi/logger.hpp +++ b/include/gemmi/logger.hpp @@ -12,40 +12,54 @@ namespace gemmi { -/// Passes messages (including warnings/errors) to a callback function. -/// Messages are passed as strings without a trailing newline. +/// @brief Logger for passing messages through callbacks with severity levels. +/// @details Messages are passed as strings without a trailing newline. /// They have syslog-like severity levels: 8=debug, 6=info, 5=notice, 3=error, /// allowing the use of a threshold to filter them. /// Quirk: Errors double as both errors and warnings. Unrecoverable errors -/// don't go through this class; Logger only handles errors that can -/// be downgraded to warnings. If a callback is set, the error is passed -/// as a warning message. Otherwise, it's thrown as std::runtime_error. +/// don't go through this class; Logger only handles errors that can +/// be downgraded to warnings. If a callback is set, the error is passed +/// as a warning message. Otherwise, it's thrown as std::runtime_error. struct Logger { - /// A function that handles messages. + /// @brief Callback function that handles each logged message. std::function callback; - /// Pass messages of this level and all lower (more severe) levels: + + /// @brief Severity threshold for filtering messages. + /// @details Pass messages of this level and all lower (more severe) levels: /// 8=all, 6=all but debug, 5=notes and warnings, 3=warnings, 0=none int threshold = 6; - /// suspend() and resume() are used internally to avoid duplicate messages - /// when the same function is called (internally) multiple times. + /// @brief Temporarily suspend message logging. + /// @details Used internally to avoid duplicate messages when the same function + /// is called (internally) multiple times. void suspend() { threshold -= 100; } + + /// @brief Resume message logging after suspension. void resume() { threshold += 100; } - /// Send a message without any prefix on with a numeric threshold N. + /// @brief Send a message at a specific severity level. + /// @tparam N Severity level threshold for this message + /// @param args Message content to concatenate and send template void level(Args const&... args) const { if (threshold >= N && callback) callback(cat(args...)); } - /// Send a debug message. + /// @brief Send a debug message. + /// @param args Message content template void debug(Args const&... args) const { level<8>("Debug: ", args...); } - /// Send a message without any prefix. + + /// @brief Send an informational message without prefix. + /// @param args Message content template void mesg(Args const&... args) const { level<6>(args...); } - /// Send a note (a notice, a significant message). + + /// @brief Send a note (notice-level significant message). + /// @param args Message content template void note(Args const&... args) const { level<5>("Note: ", args...); } - /// Send a warning/error (see Quirk above). + /// @brief Send a warning or error message. + /// @details If callback is set, sends as warning; otherwise throws exception. + /// @param args Message content template GEMMI_COLD void err(Args const&... args) const { if (threshold >= 3) { std::string msg = cat(args...); @@ -55,13 +69,16 @@ struct Logger { } } - // predefined callbacks - - /// to be used as: logger.callback = Logger::to_stderr; + /// @brief Predefined callback function to print messages to stderr. + /// @details Use as: logger.callback = Logger::to_stderr; + /// @param s Message string (printed with newline) static void to_stderr(const std::string& s) { std::fprintf(stderr, "%s\n", s.c_str()); } - /// to be used as: logger.callback = Logger::to_stdout; + + /// @brief Predefined callback function to print messages to stdout. + /// @details Use as: logger.callback = Logger::to_stdout; + /// @param s Message string (printed with newline) static void to_stdout(const std::string& s) { std::fprintf(stdout, "%s\n", s.c_str()); } diff --git a/include/gemmi/pdb_id.hpp b/include/gemmi/pdb_id.hpp index 97a232290..5365bb6a1 100644 --- a/include/gemmi/pdb_id.hpp +++ b/include/gemmi/pdb_id.hpp @@ -13,6 +13,9 @@ namespace gemmi { +/// @brief Check if a string consists entirely of alphanumeric characters. +/// @param p Null-terminated string pointer +/// @return True if all characters are alphanumeric inline bool all_alnums(const char* p) { for (;;++p) if (!std::isalnum(*p)) @@ -20,11 +23,19 @@ inline bool all_alnums(const char* p) { unreachable(); } +/// @brief Check if a string is a valid PDB code. +/// @param str Code to validate +/// @return True if str is a valid 4-character or extended PDB code inline bool is_pdb_code(const std::string& str) { return (str.length() == 4 && std::isdigit(str[0]) && all_alnums(&str[1])) || (str.length() == 12 && str.compare(0, 4, "pdb_") == 0 && std::isdigit(str[4]) && all_alnums(&str[5])); } + +/// @brief Build a PDB_DIR-style path component for a given code and type. +/// @param code PDB code (4 characters only; extended codes not yet supported) +/// @param type File type: 'P' for PDB, 'M' for mmCIF, 'S' for structure factors +/// @return Path component relative to $PDB_DIR (e.g., "/structures/divided/pdb/...") inline std::string path_in_pdb_dir(const std::string& code, char type) { if (code.size() == 12) fail("extended PDB codes are not supported yet: " + code); @@ -46,9 +57,13 @@ inline std::string path_in_pdb_dir(const std::string& code, char type) { return path; } -/// Call it after checking the code with gemmi::is_pdb_code(code). -/// The convention for $PDB_DIR is the same as in BioJava, see the docs. -/// \par type is the requested file type: 'M' for mmCIF or 'P' for PDB, 'S' for SF-mmCIF. +/// @brief Expand a PDB code to a full file path using $PDB_DIR. +/// @details Call this after checking the code with gemmi::is_pdb_code(code). +/// The convention for $PDB_DIR is the same as in BioJava. +/// @param code Valid PDB code (4 or 12 characters) +/// @param type File type: 'P' for PDB, 'M' for mmCIF, 'S' for structure factors +/// @param throw_if_unset If true, fail() if $PDB_DIR is not set; if false, return empty string +/// @return Full file path (empty if $PDB_DIR is not set and throw_if_unset is false) inline std::string expand_pdb_code_to_path(const std::string& code, char type, bool throw_if_unset=false) { std::string path; @@ -60,7 +75,10 @@ inline std::string expand_pdb_code_to_path(const std::string& code, char type, return path; } -/// \par type is: 'M' for mmCIF or 'P' for PDB, 'S' for SF-mmCIF. +/// @brief Expand a PDB code to a path, or return the input unchanged if not a code. +/// @param input Either a PDB code or a file path +/// @param type File type: 'P' for PDB, 'M' for mmCIF, 'S' for structure factors +/// @return Expanded path if input is a PDB code; input itself otherwise inline std::string expand_if_pdb_code(const std::string& input, char type='M') { if (is_pdb_code(input)) return expand_pdb_code_to_path(input, type, true); diff --git a/include/gemmi/pymol_select.hpp b/include/gemmi/pymol_select.hpp index 1a13bf4d7..a8dc9a8a9 100644 --- a/include/gemmi/pymol_select.hpp +++ b/include/gemmi/pymol_select.hpp @@ -34,22 +34,29 @@ namespace gemmi { namespace psimpl { +/// @brief Base class for abstract syntax tree nodes in PyMOL selection expressions. +/// All selector nodes inherit from this base and implement atom matching logic. struct Node { virtual ~Node() = default; + /// @brief Test whether an atom matches this selection criterion. + /// @param a The atom to test + /// @return True if the atom matches the selection criterion, false otherwise virtual bool match(const gemmi::FlatAtom& a) const = 0; }; // --- Logic Nodes --- struct AndNode : Node { - std::unique_ptr left, right; + std::unique_ptr left; + std::unique_ptr right; bool match(const gemmi::FlatAtom& a) const override { return left->match(a) && right->match(a); } }; struct OrNode : Node { - std::unique_ptr left, right; + std::unique_ptr left; + std::unique_ptr right; bool match(const gemmi::FlatAtom& a) const override { return left->match(a) || right->match(a); } @@ -89,7 +96,8 @@ struct AltLocNode : Node { }; struct ResiRangeNode : Node { - int min, max; + int min; + int max; ResiRangeNode(int a, int b) : min(a), max(b) {} bool match(const gemmi::FlatAtom& a) const override { return *a.seq_id.num >= min && *a.seq_id.num <= max; @@ -97,7 +105,8 @@ struct ResiRangeNode : Node { }; struct IndexRangeNode : Node { - int min, max; + int min; + int max; IndexRangeNode(int a, int b) : min(a), max(b) {} bool match(const gemmi::FlatAtom& a) const override { return a.serial >= min && a.serial <= max; @@ -115,7 +124,7 @@ struct ElementNode : Node { }; struct HetatmNode : Node { - bool hetatm; // true = hetatm, false = not hetatm (i.e., ATOM) + bool hetatm; explicit HetatmNode(bool h) : hetatm(h) {} bool match(const gemmi::FlatAtom& a) const override { return hetatm ? (a.het_flag == 'H') : (a.het_flag == 'A'); @@ -136,7 +145,14 @@ struct HydrogenNode : Node { } }; -enum class CompareOp { LT, LE, GT, GE, EQ, NE }; +enum class CompareOp { + LT, + LE, + GT, + GE, + EQ, + NE +}; struct BfactorNode : Node { CompareOp op; @@ -202,7 +218,7 @@ namespace p = tao::pegtl; // --- State --- struct State { std::vector> stack; - std::vector string_list; // temp storage for building value lists + std::vector string_list; CompareOp current_op = CompareOp::EQ; }; @@ -596,7 +612,12 @@ template<> struct action { // Public API // ============================================================================ -// Returns a compiled selection tree +/// @brief Compile a PyMOL selection string into an abstract syntax tree. +/// @param selector PyMOL selection syntax string (e.g., "name CA and chain A") +/// @return A unique pointer to the root Node of the compiled selection tree, +/// or nullptr if parsing fails +/// @details The returned tree can be used to test atoms with the match() method. +/// If parsing fails, an error message is printed to stderr. inline std::unique_ptr compile_pymol_selection(const std::string& selector) { psimpl::State state; tao::pegtl::memory_input<> in(selector, ""); @@ -611,6 +632,12 @@ inline std::unique_ptr compile_pymol_selection(const std::string& } } +/// @brief Select atoms from a FlatStructure matching a PyMOL selection query. +/// @param fs The structure containing atoms to select from +/// @param query PyMOL selection syntax string +/// @return Vector of const pointers to atoms matching the selection +/// @details Compiles the query string into an AST and tests each atom in the structure. +/// Returns empty vector if the query is invalid or matches no atoms. inline std::vector select_atoms(const gemmi::FlatStructure& fs, const std::string& query) { auto root = compile_pymol_selection(query); @@ -624,6 +651,12 @@ select_atoms(const gemmi::FlatStructure& fs, const std::string& query) { } return result; } + +/// @brief Remove atoms from a FlatStructure that do not match a PyMOL selection. +/// @param fs The structure to filter (modified in-place) +/// @param query PyMOL selection syntax string +/// @details Keeps only atoms matching the query; removes all others. +/// If the query is invalid, no atoms are removed. inline void remove_not_selected(gemmi::FlatStructure& fs, const std::string& query) { if (auto root = compile_pymol_selection(query)) vector_remove_if(fs.table, [&](FlatAtom& atom) { return !root->match(atom); }); diff --git a/include/gemmi/span.hpp b/include/gemmi/span.hpp index 695bc78f3..2df39bdeb 100644 --- a/include/gemmi/span.hpp +++ b/include/gemmi/span.hpp @@ -15,7 +15,7 @@ namespace gemmi { template struct MutableVectorSpan; -// Minimalistic Span, somewhat similar to C++20 std::span. +/// @brief Minimalistic span of array or vector, similar to C++20 std::span. template struct Span { using iterator = Item*; using const_iterator = Item const*; @@ -25,64 +25,107 @@ template struct Span { friend Span; friend MutableVectorSpan; + /// @brief Default constructor (empty span). Span() = default; + /// @brief Construct span from pointer and size. + /// @param begin pointer to first element + /// @param n number of elements Span(iterator begin, std::size_t n) : begin_(begin), size_(n) {} #if !defined(_MSC_VER) || _MSC_VER-0 >= 1926 - // constructor only for const Item, to allow non-const -> const conversion + /// @brief Copy-convert constructor from mutable span to const span. template Span(const Span& o, typename std::enable_if::value>::type* = 0) #else - // older MSVC don't like the version above + /// @brief Copy-convert constructor from mutable span to const span. Span(const Span& o) #endif : begin_(o.begin_), size_(o.size_) {} + /// @brief Set the begin pointer. + /// @param begin new begin pointer void set_begin(iterator begin) { begin_ = begin; } + /// @brief Set the span size. + /// @param n new size void set_size(std::size_t n) { size_ = n; } + /// @brief Get const iterator to beginning. const_iterator begin() const { return begin_; } + /// @brief Get const iterator to end. const_iterator end() const { return begin_ + size_; } + /// @brief Get mutable iterator to beginning. iterator begin() { return begin_; } + /// @brief Get mutable iterator to end. iterator end() { return begin_ + size_; } + /// @brief Access first element. Item& front() { return *begin_; } + /// @brief Access first element (const). const Item& front() const { return *begin_; } + /// @brief Access last element. Item& back() { return *(begin_ + size_ - 1); } + /// @brief Access last element (const). const Item& back() const { return *(begin_ + size_ - 1); } + /// @brief Subscript access to element. const Item& operator[](std::size_t i) const { return *(begin_ + i); } + /// @brief Subscript access to element (mutable). Item& operator[](std::size_t i) { return *(begin_ + i); } + /// @brief Bounds-checked element access. + /// @param i index + /// @return reference to element at index i + /// @throws std::out_of_range if index is out of bounds Item& at(std::size_t i) { if (i >= size()) throw std::out_of_range("item index ouf of range: #" + std::to_string(i)); return *(begin_ + i); } + /// @brief Bounds-checked element access (const). + /// @param i index + /// @return const reference to element at index i + /// @throws std::out_of_range if index is out of bounds const Item& at(std::size_t i) const { return const_cast(this)->at(i); } + /// @brief Get span size. std::size_t size() const { return size_; } + /// @brief Check if span is empty. bool empty() const { return size_ == 0; } + /// @brief Conversion to bool (true if not empty). explicit operator bool() const { return size_ != 0; } + /// @brief Get a subspan from iterator range. + /// @param first iterator to first element + /// @param last iterator to one-past-last element + /// @return new Span covering the range template Span sub(Iter first, Iter last) { return Span(&*first, last - first); } + /// @brief Get a subspan matching a predicate. + /// @tparam F predicate type + /// @tparam V element type (deduced) + /// @param func predicate function + /// @return new Span of contiguous elements matching the predicate template Span subspan(F&& func) { iterator group_begin = std::find_if(this->begin(), this->end(), func); iterator group_end = std::find_if_not(group_begin, this->end(), func); return Span(&*group_begin, group_end - group_begin); } + /// @brief Get a const subspan matching a predicate. + /// @tparam F predicate type + /// @param func predicate function + /// @return const Span of contiguous elements matching the predicate template Span subspan(F&& func) const { using V = const value_type; return const_cast(this)->subspan(std::forward(func)); } - // we use children() to iterate over Model, Chain, etc + /// @brief Get children (returns self for iteration protocol). Span& children() { return *this; } + /// @brief Get const children (returns self for iteration protocol). const Span& children() const { return *this; } private: @@ -90,28 +133,50 @@ template struct Span { std::size_t size_ = 0; }; -// Span of std::vector, implements insert() and erase(). +/// @brief Span of std::vector that supports insert() and erase() operations. template struct MutableVectorSpan : Span { using vector_type = std::vector::value_type>; using iterator = typename Span::iterator; //friend Span; + /// @brief Default constructor. MutableVectorSpan() = default; + /// @brief Construct from span and vector pointer. + /// @param p source span + /// @param v pointer to underlying vector MutableVectorSpan(Span&& p, vector_type* v) : Span(p), vector_(v) {} + /// @brief Construct from vector and element range. + /// @param v the underlying vector + /// @param begin iterator to first element in span + /// @param n number of elements in span MutableVectorSpan(vector_type& v, iterator begin, std::size_t n) : Span(begin, n), vector_(&v) {} + /// @brief Get a subspan from iterator range. + /// @param first iterator to first element + /// @param last iterator to one-past-last element + /// @return new MutableVectorSpan covering the range template MutableVectorSpan sub(Iter first, Iter last) { return {Span::sub(first, last), vector_}; } + /// @brief Get a mutable subspan matching a predicate. + /// @param func predicate function + /// @return new MutableVectorSpan of contiguous elements matching the predicate template MutableVectorSpan subspan(F&& func) { return {Span::subspan(std::forward(func)), vector_}; } + /// @brief Get a const subspan matching a predicate. + /// @param func predicate function + /// @return const MutableVectorSpan of contiguous elements matching the predicate template MutableVectorSpan subspan(F&& func) const { return {Span::subspan(std::forward(func)), vector_}; } + /// @brief Insert an element at the given position. + /// @param pos iterator position for insertion + /// @param item element to insert (moved) + /// @return iterator to the newly inserted element iterator insert(iterator pos, Item&& item) { auto offset = this->begin_ - this->vector_->data(); auto iter = vector_->begin() + (pos - this->vector_->data()); @@ -121,12 +186,16 @@ template struct MutableVectorSpan : Span { return &*ret; } + /// @brief Erase the element at the given position. + /// @param pos iterator to element to erase void erase(iterator pos) { vector_->erase(vector_->begin() + (pos - vector_->data())); --this->size_; } + /// @brief Check if span starts at the beginning of the vector. bool is_beginning() const { return this->begin() == vector_->data(); } + /// @brief Check if span extends to the end of the vector. bool is_ending() const { return this->end() == vector_->data() + vector_->size(); } private: diff --git a/include/gemmi/sprintf.hpp b/include/gemmi/sprintf.hpp index ead2ce0f9..b140bf272 100644 --- a/include/gemmi/sprintf.hpp +++ b/include/gemmi/sprintf.hpp @@ -26,25 +26,47 @@ namespace gemmi { #else # define GEMMI_ATTRIBUTE_FORMAT(fmt,va) #endif -/// stb_snprintf in gemmi namespace - like snprintf, but ignores locale -/// and is always zero-terminated (hence _z). +/// @brief snprintf-style string formatting (locale-independent, always zero-terminated) +/// @details Uses stb_snprintf which ignores locale and guarantees zero-termination +/// (hence the _z suffix). Signature follows snprintf. +/// @param buf output character buffer +/// @param count maximum number of characters to write (including terminator) +/// @param fmt printf-style format string +/// @return number of characters written (not including the terminator), or negative on error GEMMI_DLL int snprintf_z(char *buf, int count, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(3,4); -/// stb_sprintf in gemmi namespace + +/// @brief sprintf-style string formatting (locale-independent, always zero-terminated) +/// @details Uses stb_sprintf which ignores locale and guarantees zero-termination. +/// The buffer must be large enough for the formatted output. +/// @param buf output character buffer (must be large enough) +/// @param fmt printf-style format string +/// @return number of characters written (not including the terminator), or negative on error GEMMI_DLL int sprintf_z(char *buf, char const *fmt, ...) GEMMI_ATTRIBUTE_FORMAT(2,3); +/// @brief Convert a double to a string with default precision +/// @param d the double value to convert +/// @return string representation using format "%.9g" inline std::string to_str(double d) { char buf[24]; int len = sprintf_z(buf, "%.9g", d); return std::string(buf, len > 0 ? len : 0); } +/// @brief Convert a float to a string with default precision +/// @param d the float value to convert +/// @return string representation using format "%.6g" inline std::string to_str(float d) { char buf[16]; int len = sprintf_z(buf, "%.6g", d); return std::string(buf, len > 0 ? len : 0); } +/// @brief Convert a double to a string with specified decimal precision +/// @tparam Prec decimal precision (0-6 places after decimal point) +/// @param d the double value to convert +/// @return string representation with fixed decimal places or scientific notation +/// @details Uses fixed-point format for values in [-1e8, 1e8), scientific notation otherwise template std::string to_str_prec(double d) { static_assert(Prec >= 0 && Prec < 7, "unsupported precision"); @@ -54,7 +76,13 @@ std::string to_str_prec(double d) { return std::string(buf, len > 0 ? len : 0); } -/// zero-terminated to_chars() +/// @brief Convert an integer to a zero-terminated C-string +/// @details Uses std::to_chars if available (C++17), otherwise snprintf_z. +/// Guarantees zero-termination within the output range. +/// @param first pointer to start of output buffer +/// @param last pointer to one-past-end of output buffer +/// @param value the integer value to convert +/// @return pointer to the zero terminator in the output buffer inline char* to_chars_z(char* first, char* last, int value) { #if __cpp_lib_to_chars >= 201611L auto result = std::to_chars(first, last-1, value); @@ -65,6 +93,14 @@ inline char* to_chars_z(char* first, char* last, int value) { return std::min(first + n, last - 1); #endif } + +/// @brief Convert a size_t to a zero-terminated C-string +/// @details Uses std::to_chars if available (C++17), otherwise snprintf_z. +/// Guarantees zero-termination within the output range. +/// @param first pointer to start of output buffer +/// @param last pointer to one-past-end of output buffer +/// @param value the size_t value to convert +/// @return pointer to the zero terminator in the output buffer inline char* to_chars_z(char* first, char* last, size_t value) { #if __cpp_lib_to_chars >= 201611L auto result = std::to_chars(first, last-1, value); diff --git a/include/gemmi/stats.hpp b/include/gemmi/stats.hpp index 5178c4c13..83fa812b6 100644 --- a/include/gemmi/stats.hpp +++ b/include/gemmi/stats.hpp @@ -11,29 +11,51 @@ namespace gemmi { -// popular single-pass algorithm for calculating variance and mean +/// @brief Single-pass algorithm for calculating variance and mean +/// @details Uses Welford's algorithm for numerical stability. +/// Supports both initialization from iterators and incremental point addition. struct Variance { - int n = 0; - double sum_sq = 0.; - double mean_x = 0.; + int n = 0; ///< Number of points added + double sum_sq = 0.; ///< Running sum of squared deviations + double mean_x = 0.; ///< Running mean Variance() = default; + + /// @brief Construct Variance from an iterator range + /// @tparam T iterator type + /// @param begin iterator to first element + /// @param end iterator to one-past-last element template Variance(T begin, T end) : Variance() { for (auto i = begin; i != end; ++i) add_point(*i); } + + /// @brief Add a single data point and update running statistics + /// @param x the data value to add void add_point(double x) { ++n; double dx = x - mean_x; mean_x += dx / n; sum_sq += dx * (x - mean_x); } + + /// @brief Calculate sample variance (divide by n-1) + /// @return sample variance double for_sample() const { return sum_sq / (n - 1); } + + /// @brief Calculate population variance (divide by n) + /// @return population variance double for_population() const { return sum_sq / n; } }; +/// @brief Covariance of two variables using single-pass algorithm +/// @details Extends Variance to track covariance between paired (x, y) points. struct Covariance : Variance { - double mean_y = 0.; + double mean_y = 0.; ///< Running mean of y values + + /// @brief Add a paired data point (x, y) and update running statistics + /// @param x the x data value + /// @param y the y data value void add_point(double x, double y) { ++n; double dx = x - mean_x; @@ -43,13 +65,20 @@ struct Covariance : Variance { } }; +/// @brief Correlation coefficient and regression statistics for paired data +/// @details Accumulates running statistics for two variables to compute +/// correlation coefficient, regression line (slope/intercept), and variances. struct Correlation { - int n = 0; - double sum_xx = 0.; - double sum_yy = 0.; - double sum_xy = 0.; - double mean_x = 0.; - double mean_y = 0.; + int n = 0; ///< Number of point pairs added + double sum_xx = 0.; ///< Sum of weighted squared x deviations + double sum_yy = 0.; ///< Sum of weighted squared y deviations + double sum_xy = 0.; ///< Sum of weighted xy deviations + double mean_x = 0.; ///< Running mean of x values + double mean_y = 0.; ///< Running mean of y values + + /// @brief Add a paired data point (x, y) and update running statistics + /// @param x the x data value + /// @param y the y data value void add_point(double x, double y) { ++n; double weight = (double)(n - 1) / n; @@ -61,17 +90,43 @@ struct Correlation { mean_x += dx / n; mean_y += dy / n; } + + /// @brief Calculate Pearson correlation coefficient + /// @return correlation coefficient (ranges from -1 to 1) double coefficient() const { return sum_xy / std::sqrt(sum_xx * sum_yy); } + + /// @brief Calculate variance of x values + /// @return x variance double x_variance() const { return sum_xx / n; } + + /// @brief Calculate variance of y values + /// @return y variance double y_variance() const { return sum_yy / n; } + + /// @brief Calculate covariance between x and y + /// @return covariance double covariance() const { return sum_xy / n; } + + /// @brief Calculate ratio of means (mean_y / mean_x) + /// @return ratio of means double mean_ratio() const { return mean_y / mean_x; } - // the regression line + + /// @brief Calculate slope of linear regression line (y = slope * x + intercept) + /// @return regression slope double slope() const { return sum_xy / sum_xx; } + + /// @brief Calculate y-intercept of linear regression line + /// @return regression intercept (y-value where x=0) double intercept() const { return mean_y - slope() * mean_x; } }; +/// @brief Combine two independent Correlation objects into one +/// @details Merges statistics from two separate correlation calculations +/// to produce a combined result as if all data had been processed together. +/// @param a the first Correlation object +/// @param b the second Correlation object +/// @return a new Correlation object with combined statistics inline Correlation combine_two_correlations(const Correlation& a, const Correlation& b) { auto sq = [](double x) { return x * x; }; Correlation r; @@ -87,6 +142,9 @@ inline Correlation combine_two_correlations(const Correlation& a, const Correlat return r; } +/// @brief Combine multiple Correlation objects into a single result +/// @param cors vector of Correlation objects to combine +/// @return a new Correlation object with combined statistics from all input correlations inline Correlation combine_correlations(const std::vector& cors) { Correlation result; for (const Correlation& cor : cors) @@ -95,14 +153,21 @@ inline Correlation combine_correlations(const std::vector& cors) { } +/// @brief Statistics describing a dataset (min, max, mean, RMS, NaN count) struct DataStats { - double dmin = NAN; - double dmax = NAN; - double dmean = NAN; - double rms = NAN; - size_t nan_count = 0; + double dmin = NAN; ///< Minimum value in the dataset + double dmax = NAN; ///< Maximum value in the dataset + double dmean = NAN; ///< Mean (average) value + double rms = NAN; ///< Root mean square (standard deviation) + size_t nan_count = 0; ///< Number of NaN values encountered }; +/// @brief Calculate statistical summary of a dataset +/// @details Computes min, max, mean, RMS, and counts NaN values. +/// For all-NaN inputs, min and max are set to NaN. +/// @tparam T numeric type of the data container +/// @param data vector of numeric values +/// @return DataStats object containing the calculated statistics template DataStats calculate_data_statistics(const std::vector& data) { DataStats stats; diff --git a/include/gemmi/util.hpp b/include/gemmi/util.hpp index 9d4e0a121..3327451fe 100644 --- a/include/gemmi/util.hpp +++ b/include/gemmi/util.hpp @@ -17,18 +17,46 @@ namespace gemmi { // ##### string helpers ##### +/// @brief Append an integer to a string. +/// @param out Output string +/// @param v Integer value to append inline void append_to_str(std::string& out, int v) { out += std::to_string(v); } + +/// @brief Append an unsigned integer to a string. +/// @param out Output string +/// @param v Size/unsigned value to append inline void append_to_str(std::string& out, size_t v) { out += std::to_string(v); } + +/// @brief Double appending is not supported. void append_to_str(std::string& out, double) = delete; + +/// @brief Append any other type to a string (calls operator+). +/// @tparam T Type to append +/// @param out Output string +/// @param v Value to append template void append_to_str(std::string& out, const T& v) { out += v; } +/// @brief Concatenate values into a string (base case). +/// @param out Output string inline void cat_to(std::string&) {} + +/// @brief Recursively concatenate values into a string. +/// @tparam T First value type +/// @tparam Args Remaining value types +/// @param out Output string +/// @param value First value to append +/// @param args Remaining values to append recursively template void cat_to(std::string& out, const T& value, Args const&... args) { append_to_str(out, value); cat_to(out, args...); } + +/// @brief Concatenate variadic arguments into a new string. +/// @tparam Args Value types +/// @param args Values to concatenate +/// @return Concatenated string template std::string cat(Args const&... args) { std::string out; @@ -36,30 +64,50 @@ std::string cat(Args const&... args) { return out; } +/// @brief Check if a string starts with a given prefix. +/// @param str String to check +/// @param prefix Prefix to look for +/// @return True if str begins with prefix inline bool starts_with(const std::string& str, const std::string& prefix) { size_t sl = prefix.length(); return str.length() >= sl && str.compare(0, sl, prefix) == 0; } +/// @brief Check if a string starts with a string literal prefix. +/// @tparam N Length of string literal +/// @param a String to check (C string) +/// @param b String literal prefix +/// @return True if a starts with b template bool starts_with(const char* a, const char (&b)[N]) { return std::strncmp(a, b, N-1) == 0; } +/// @brief Check if a string ends with a given suffix. +/// @param str String to check +/// @param suffix Suffix to look for +/// @return True if str ends with suffix inline bool ends_with(const std::string& str, const std::string& suffix) { size_t sl = suffix.length(); return str.length() >= sl && str.compare(str.length() - sl, sl, suffix) == 0; } -// can be faster than std::tolower() b/c it takes char not int +/// @brief Convert a single character to lowercase (faster than std::tolower). +/// @param c Character to convert +/// @return Lowercase version of c, or c if not uppercase inline char lower(char c) { if (c >= 'A' && c <= 'Z') return c | 0x20; return c; } -// works as expected only for a-zA-Z +/// @brief Convert a single character to uppercase (ASCII letters only). +/// @param c Character to convert +/// @return Uppercase version of c (works only for a-zA-Z) inline char alpha_up(char c) { return c & ~0x20; } +/// @brief Convert a string to lowercase. +/// @param str String to convert +/// @return Lowercase copy of str inline std::string to_lower(std::string str) { for (char& c : str) if (c >= 'A' && c <= 'Z') @@ -67,6 +115,9 @@ inline std::string to_lower(std::string str) { return str; } +/// @brief Convert a string to uppercase. +/// @param str String to convert +/// @return Uppercase copy of str inline std::string to_upper(std::string str) { for (char& c : str) if (c >= 'a' && c <= 'z') @@ -74,28 +125,49 @@ inline std::string to_upper(std::string str) { return str; } -// case-insensitive character comparison +/// @brief Case-insensitive single character comparison. +/// @param a First character +/// @param b Second character +/// @return True if characters are equal (ignoring case) inline bool isame(char a, char b) { return a == b || ((a^b) == 0x20 && (a|0x20) >= 'a' && (a|0x20) <= 'z'); } -// Case-insensitive comparisons. The second arg must be lowercase. - +/// @brief Case-insensitive string equality starting at an offset. +/// @details The second argument must be lowercase for comparison. +/// @param str String to check +/// @param offset Starting offset in str +/// @param low Lowercase reference string +/// @return True if (str[offset:] == low) case-insensitively inline bool iequal_from(const std::string& str, size_t offset, const std::string& low) { return str.length() == low.length() + offset && std::equal(std::begin(low), std::end(low), str.begin() + offset, [](char c1, char c2) { return c1 == lower(c2); }); } +/// @brief Case-insensitive string equality. +/// @details The second argument must be lowercase for comparison. +/// @param str String to check +/// @param low Lowercase reference string +/// @return True if str == low (case-insensitively) inline bool iequal(const std::string& str, const std::string& low) { return iequal_from(str, 0, low); } +/// @brief Case-insensitive prefix check. +/// @param str String to check +/// @param prefix Lowercase prefix to look for +/// @return True if str starts with prefix (case-insensitively) inline bool istarts_with(const std::string& str, const std::string& prefix) { return str.length() >= prefix.length() && std::equal(std::begin(prefix), std::end(prefix), str.begin(), [](char c1, char c2) { return c1 == lower(c2); }); } + +/// @brief Case-insensitive suffix check. +/// @param str String to check +/// @param suffix Lowercase suffix to look for +/// @return True if str ends with suffix (case-insensitively) inline bool iends_with(const std::string& str, const std::string& suffix) { size_t sl = suffix.length(); return str.length() >= sl && @@ -103,10 +175,17 @@ inline bool iends_with(const std::string& str, const std::string& suffix) { [](char c1, char c2) { return c1 == lower(c2); }); } +/// @brief Check if string ends with suffix or suffix.gz (case-insensitive). +/// @param str String to check +/// @param suffix Lowercase suffix to look for (or suffix.gz) +/// @return True if str ends with suffix or suffix.gz inline bool giends_with(const std::string& str, const std::string& suffix) { return iends_with(str, suffix) || iends_with(str, suffix + ".gz"); } +/// @brief Trim whitespace from both ends of a string. +/// @param str String to trim +/// @return Trimmed copy of str inline std::string trim_str(const std::string& str) { const std::string ws = " \r\n\t"; std::string::size_type first = str.find_first_not_of(ws); @@ -116,12 +195,18 @@ inline std::string trim_str(const std::string& str) { return str.substr(first, last - first + 1); } +/// @brief Trim whitespace from the right end of a string. +/// @param str String to trim +/// @return Right-trimmed copy of str inline std::string rtrim_str(const std::string& str) { std::string::size_type last = str.find_last_not_of(" \r\n\t"); return str.substr(0, last == std::string::npos ? 0 : last + 1); } -// end is after the last character of the string (typically \0) +/// @brief Trim whitespace from the right end of a C string. +/// @param start Pointer to start of string +/// @param end Pointer to end of string (after last character, typically \\0); nullptr to auto-detect +/// @return Pointer to first non-whitespace character from the right inline const char* rtrim_cstr(const char* start, const char* end=nullptr) { if (!start) return nullptr; @@ -140,8 +225,12 @@ inline size_t length(char) { return 1; } inline size_t length(const std::string& s) { return s.length(); } } -// takes a single separator (usually char or string); -// may return empty fields +/// @brief Split a string by a separator into a vector (append to existing vector). +/// @details Takes a single separator (char or string); may return empty fields. +/// @tparam S Separator type (char or string) +/// @param str String to split +/// @param sep Separator to split on +/// @param result Vector to append results to template void split_str_into(const std::string& str, S sep, std::vector& result) { @@ -153,6 +242,12 @@ void split_str_into(const std::string& str, S sep, result.emplace_back(str, start); } +/// @brief Split a string by a separator into a vector. +/// @details Takes a single separator (char or string); may return empty fields. +/// @tparam S Separator type (char or string) +/// @param str String to split +/// @param sep Separator to split on +/// @return Vector of substrings template std::vector split_str(const std::string& str, S sep) { std::vector result; @@ -160,8 +255,11 @@ std::vector split_str(const std::string& str, S sep) { return result; } -// _multi variants takes multiple 1-char separators as a string; -// discards empty fields +/// @brief Split a string by multiple single-character separators (append to existing vector). +/// @details Discards empty fields (unlike split_str_into). +/// @param str String to split +/// @param seps String of separator characters +/// @param result Vector to append results to inline void split_str_into_multi(const std::string& str, const char* seps, std::vector& result) { std::size_t start = str.find_first_not_of(seps); @@ -172,6 +270,11 @@ inline void split_str_into_multi(const std::string& str, const char* seps, } } +/// @brief Split a string by multiple single-character separators into a vector. +/// @details Discards empty fields (unlike split_str). +/// @param str String to split +/// @param seps String of separator characters (default: space and tab) +/// @return Vector of non-empty substrings inline std::vector split_str_multi(const std::string& str, const char* seps=" \t") { std::vector result; @@ -179,6 +282,15 @@ inline std::vector split_str_multi(const std::string& str, return result; } +/// @brief Join elements from iterators with a separator. +/// @tparam T Iterator type +/// @tparam S Separator type +/// @tparam F Getter function type +/// @param begin Iterator to first element +/// @param end Iterator to end (exclusive) +/// @param sep Separator to insert between elements +/// @param getter Function to convert each element to string +/// @return Joined string template std::string join_str(T begin, T end, const S& sep, const F& getter) { std::string r; @@ -192,21 +304,48 @@ std::string join_str(T begin, T end, const S& sep, const F& getter) { return r; } +/// @brief Join elements from iterators with a separator. +/// @tparam T Iterator type +/// @tparam S Separator type +/// @param begin Iterator to first element +/// @param end Iterator to end (exclusive) +/// @param sep Separator to insert between elements +/// @return Joined string template std::string join_str(T begin, T end, const S& sep) { return join_str(begin, end, sep, [](const std::string& t) { return t; }); } +/// @brief Join elements from an iterable with a separator. +/// @tparam T Iterable type +/// @tparam S Separator type +/// @tparam F Getter function type +/// @param iterable Container of elements +/// @param sep Separator to insert between elements +/// @param getter Function to convert each element to string +/// @return Joined string template std::string join_str(const T& iterable, const S& sep, const F& getter) { return join_str(iterable.begin(), iterable.end(), sep, getter); } +/// @brief Join elements from an iterable with a separator. +/// @tparam T Iterable type +/// @tparam S Separator type +/// @param iterable Container of string elements +/// @param sep Separator to insert between elements +/// @return Joined string template std::string join_str(const T& iterable, const S& sep) { return join_str(iterable.begin(), iterable.end(), sep); } +/// @brief Append an item to a string with a separator if string is non-empty. +/// @tparam T Item type +/// @tparam S Separator type +/// @param str String to append to +/// @param sep Separator to insert before item +/// @param item Item to append template void string_append_sep(std::string& str, S sep, const T& item) { if (!str.empty()) @@ -214,6 +353,10 @@ void string_append_sep(std::string& str, S sep, const T& item) { str += item; } +/// @brief Replace all occurrences of a substring with another. +/// @param s String to modify in-place +/// @param old Substring to find +/// @param new_ Replacement substring inline void replace_all(std::string &s, const std::string &old, const std::string &new_) { std::string::size_type pos = 0; @@ -223,7 +366,11 @@ inline void replace_all(std::string &s, } } -// list is a comma separated string +/// @brief Check if a name appears as an item in a separated list. +/// @param name Item to search for +/// @param list Separated list of items +/// @param sep Separator character (default: comma) +/// @return True if name appears as a complete item in list inline bool is_in_list(const std::string& name, const std::string& list, char sep=',') { if (name.length() >= list.length()) @@ -238,21 +385,45 @@ inline bool is_in_list(const std::string& name, const std::string& list, // ##### vector helpers ##### +/// @brief Check if a value exists in a vector. +/// @tparam T Vector element type +/// @param x Value to search for +/// @param v Vector to search in +/// @return True if x is found in v template bool in_vector(const T& x, const std::vector& v) { return std::find(v.begin(), v.end(), x) != v.end(); } +/// @brief Check if any element in a vector matches a predicate. +/// @tparam F Predicate function type +/// @tparam T Vector element type +/// @param f Predicate function +/// @param v Vector to search in +/// @return True if predicate matches any element template bool in_vector_f(F f, const std::vector& v) { return std::find_if(v.begin(), v.end(), f) != v.end(); } +/// @brief Get pointer to one-past-end of a vector (like end()). +/// @tparam T Vector element type +/// @param v Vector +/// @return Pointer to one-past-end (v.data() + v.size()) template T* vector_end_ptr(std::vector& v) { return v.data() + v.size(); } + +/// @brief Get const pointer to one-past-end of a vector. +/// @tparam T Vector element type +/// @param v Vector +/// @return Const pointer to one-past-end template const T* vector_end_ptr(const std::vector& v) { return v.data() + v.size(); } +/// @brief Move elements from source vector to destination vector. +/// @tparam T Vector element type +/// @param dst Destination vector +/// @param src Source vector (moved from, will be empty) template void vector_move_extend(std::vector& dst, std::vector&& src) { if (dst.empty()) @@ -262,14 +433,24 @@ void vector_move_extend(std::vector& dst, std::vector&& src) { std::make_move_iterator(src.end())); } -// wrapper around the erase-remove idiom +/// @brief Remove all elements matching a condition from a vector. +/// @tparam T Vector element type +/// @tparam F Predicate function type +/// @param v Vector to modify in-place +/// @param condition Predicate; elements matching return true are removed template void vector_remove_if(std::vector& v, F&& condition) { v.erase(std::remove_if(v.begin(), v.end(), condition), v.end()); } -/// \par data - 2d array (old_width x length) in a vector -/// Insert \par n new columns at position pos. +/// @brief Insert columns into a 2D array stored as a flat vector. +/// @tparam T Array element type +/// @param data 2D array (old_width x length) stored in a flat vector +/// @param old_width Original number of columns +/// @param length Number of rows +/// @param n Number of new columns to insert +/// @param pos Column position to insert at +/// @param new_value Value to fill new columns with template void vector_insert_columns(std::vector& data, size_t old_width, size_t length, size_t n, size_t pos, const T& new_value) { @@ -287,8 +468,12 @@ void vector_insert_columns(std::vector& data, size_t old_width, } assert(dst == data.begin()); } -/// \par data - 2d array with new_width+1 columns, in a vector -/// Remove column at position pos. + +/// @brief Remove a column from a 2D array stored as a flat vector. +/// @tparam T Array element type +/// @param data 2D array with (new_width + 1) columns stored in a flat vector +/// @param new_width Number of columns after removal +/// @param pos Column position to remove template void vector_remove_column(std::vector& data, size_t new_width, size_t pos) { assert(pos <= new_width); @@ -301,12 +486,16 @@ void vector_remove_column(std::vector& data, size_t new_width, size_t pos) { // ##### other helpers ##### -// Numeric ID used for case-insensitive comparison of 4 letters. -// s must have 4 chars or 3 chars + NUL, ' ' and NUL are equivalent in s. +/// @brief Generate a case-insensitive numeric ID for 4-letter strings. +/// @param s Pointer to 4 characters (or 3 chars + NUL); space and NUL are equivalent +/// @return Numeric ID suitable for case-insensitive comparison constexpr int ialpha4_id(const char* s) { return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) & ~0x20202020; } -// Numeric ID used for case-insensitive comparison of 3 letters. + +/// @brief Generate a case-insensitive numeric ID for 3-letter strings. +/// @param s Pointer to 3 characters +/// @return Numeric ID suitable for case-insensitive comparison constexpr int ialpha3_id(const char* s) { return (s[0] << 16 | s[1] << 8 | s[2]) & ~0x20202020; } diff --git a/include/gemmi/version.hpp b/include/gemmi/version.hpp index 3a19c3303..6fdf45ad4 100644 --- a/include/gemmi/version.hpp +++ b/include/gemmi/version.hpp @@ -5,6 +5,7 @@ #ifndef GEMMI_VERSION_HPP_ #define GEMMI_VERSION_HPP_ +/// @brief Gemmi library version string. #define GEMMI_VERSION "0.7.6-dev" #endif