diff --git a/docs/api.rst b/docs/api.rst
index 6697dfc5..95beeae6 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -54,6 +54,33 @@ Map and Grid Data
 .. doxygenfile:: model.hpp
    :project: gemmi
 
+CIF Data Reading and Writing
+-----------------------------
+
+*(Full documentation added in PR 3.)*
+
+.. doxygenfile:: cifdoc.hpp
+   :project: gemmi
+
+.. doxygenfile:: cif.hpp
+   :project: gemmi
+
+.. doxygenfile:: read_cif.hpp
+   :project: gemmi
+
+.. doxygenfile:: to_cif.hpp
+   :project: gemmi
+
+.. doxygenfile:: to_json.hpp
+   :project: gemmi
+
+.. doxygenfile:: json.hpp
+   :project: gemmi
+
+.. doxygenfile:: numb.hpp
+   :project: gemmi
+
+.. doxygenfile:: ddl.hpp
 Structure I/O
 -------------
 
diff --git a/include/gemmi/cif.hpp b/include/gemmi/cif.hpp
index 8ac46fc8..df2c694e 100644
--- a/include/gemmi/cif.hpp
+++ b/include/gemmi/cif.hpp
@@ -1,3 +1,14 @@
+/// @file
+/// @brief PEGTL-based CIF parser with pluggable action handlers and Document construction.
+///
+/// This header provides the complete CIF parsing infrastructure:
+/// - PEG grammar rules for CIF 1.1 syntax (namespace `rules`)
+/// - Customizable action handlers (templates specializing `Action<Rule>`)
+/// - Built-in actions that construct an in-memory Document
+/// - Entry points: read_file(), read_memory(), read_cstream(), read_istream(), read()
+///
+/// For high-level parsing of standard formats (mmCIF, plain CIF), prefer read_cif.hpp.
+
 // Copyright 2017 Global Phasing Ltd.
 //
 // CIF parser (based on PEGTL) with pluggable actions,
@@ -264,10 +275,22 @@ template<> struct Action<rules::loop> {
 };
 
 
+/// @brief Parse CIF content from an input, populating a Document.
+/// @tparam Input PEGTL input type (e.g., pegtl::file_input, pegtl::memory_input).
+/// @param d Document to populate with parsed blocks and items.
+/// @param in PEGTL input object.
+/// @throws pegtl::parse_error on syntax errors.
 template<typename Input> void parse_input(Document& d, Input&& in) {
   pegtl::parse<rules::file, Action, Errors>(in, d);
 }
 
+/// @brief Read a complete CIF file and return a Document.
+/// @tparam Input PEGTL input type.
+/// @param in PEGTL input object with a source() method.
+/// @param check_level Validation strictness: 0=no checks, 1=missing values & duplicates, 2=also empty loops.
+/// @return Fully parsed Document.
+/// @throws pegtl::parse_error on syntax errors.
+/// @throws std::runtime_error on validation failures (check_level > 0).
 template<typename Input> Document read_input(Input&& in, int check_level=1) {
   Document doc;
   doc.source = in.source();
@@ -286,6 +309,12 @@ template<typename Input> Document read_input(Input&& in, int check_level=1) {
   return doc;
 }
 
+/// @brief Parse a single CIF data block and add it to a Document.
+/// @tparam Input PEGTL input type.
+/// @param d Document to append to.
+/// @param in PEGTL input.
+/// @return Byte offset after parsing the block.
+/// @throws pegtl::parse_error on syntax errors.
 template<typename Input>
 size_t parse_one_block(Document& d, Input&& in) {
   pegtl::parse<rules::one_block, Action, Errors>(in, d);
@@ -302,21 +331,48 @@ size_t parse_one_block(Document& d, Input&& in) {
   tao::pegtl::file_input<> in(path)
 #endif
 
+/// @brief Read a CIF file from disk.
+/// @param filename Path to the CIF file.
+/// @param check_level Validation level (0-2).
+/// @return Parsed Document.
+/// @throws std::runtime_error if file cannot be opened.
+/// @throws pegtl::parse_error on syntax errors.
 inline Document read_file(const std::string& filename, int check_level=1) {
   GEMMI_CIF_FILE_INPUT(in, filename);
   return read_input(in, check_level);
 }
 
+/// @brief Read CIF from memory.
+/// @param data Pointer to CIF content (need not be null-terminated).
+/// @param size Number of bytes to parse.
+/// @param name Label for error messages (e.g., "buffer").
+/// @param check_level Validation level (0-2).
+/// @return Parsed Document.
+/// @throws pegtl::parse_error on syntax errors.
 inline Document read_memory(const char* data, size_t size, const char* name, int check_level=1) {
   pegtl::memory_input<> in(data, size, name);
   return read_input(in, check_level);
 }
 
+/// @brief Read CIF from a C FILE stream.
+/// @param f Open FILE pointer (e.g., stdin, or result of fopen()).
+/// @param bufsize Buffering size for reading (e.g., 16*1024).
+/// @param name Label for error messages.
+/// @param check_level Validation level (0-2).
+/// @return Parsed Document.
+/// @throws pegtl::parse_error on syntax errors.
 inline Document read_cstream(std::FILE *f, size_t bufsize, const char* name, int check_level=1) {
   pegtl::cstream_input<> in(f, bufsize, name);
   return read_input(in, check_level);
 }
 
+/// @brief Read CIF from a C++ std::istream.
+/// @param is Input stream (e.g., std::ifstream, std::cin).
+/// @param bufsize Buffering size (e.g., 16*1024).
+/// @param name Label for error messages.
+/// @param check_level Validation level (0-2).
+/// @return Parsed Document.
+/// @throws pegtl::parse_error on syntax errors.
 inline Document read_istream(std::istream &is, size_t bufsize, const char* name,
                              int check_level=1) {
   pegtl::istream_input<> in(is, bufsize, name);
@@ -332,6 +388,11 @@ template<> struct CheckAction<rules::missing_value> {
   }
 };
 
+/// @brief Try parsing CIF without validation or error throwing.
+/// @tparam Input PEGTL input type.
+/// @param in PEGTL input.
+/// @param msg Optional pointer to store error message (if parsing fails).
+/// @return true if parse succeeded, false otherwise.
 template<typename Input> bool try_parse(Input&& in, std::string* msg) {
   try {
     return pegtl::parse<rules::file, CheckAction, Errors>(in);
@@ -342,8 +403,13 @@ template<typename Input> bool try_parse(Input&& in, std::string* msg) {
   }
 }
 
-// A function for transparent reading of normal and compressed files.
-// T should have the same traits as BasicInput and MaybeGzipped.
+/// @brief Read CIF from a file or stream, handling compression transparently.
+/// @tparam T Type with methods: uncompress_into_buffer(), is_stdin(), is_compressed(), path().
+/// (Traits matching BasicInput and MaybeGzipped wrappers in Gemmi.)
+/// @param input Input wrapper (handles gzip, bzip2, and plain files).
+/// @param check_level Validation level (0-2).
+/// @return Parsed Document.
+/// @throws pegtl::parse_error on syntax errors.
 template<typename T>
 Document read(T&& input, int check_level=1) {
   if (CharArray mem = input.uncompress_into_buffer())
@@ -353,6 +419,11 @@ Document read(T&& input, int check_level=1) {
   return read_file(input.path(), check_level);
 }
 
+/// @brief Check CIF syntax without building a Document.
+/// @tparam T Type with uncompress_into_buffer() and path() methods.
+/// @param input Input wrapper.
+/// @param msg Optional pointer to store error message.
+/// @return true if syntax is valid, false otherwise.
 template<typename T>
 bool check_syntax(T&& input, std::string* msg) {
   if (CharArray mem = input.uncompress_into_buffer()) {
@@ -363,6 +434,13 @@ bool check_syntax(T&& input, std::string* msg) {
   return try_parse(in, msg);
 }
 
+/// @brief Read one CIF block from a file or stream into an existing Document.
+/// @tparam T Type with is_compressed(), is_stdin(), uncompress_into_buffer(size_t), path() methods.
+/// @param d Document to append block to.
+/// @param input Input wrapper.
+/// @param limit Max bytes to read from compressed file (0 = no limit).
+/// @return Byte offset after parsing the block.
+/// @throws pegtl::parse_error on syntax errors.
 template<typename T>
 size_t read_one_block(Document& d, T&& input, size_t limit) {
   if (input.is_compressed()) {
diff --git a/include/gemmi/cifdoc.hpp b/include/gemmi/cifdoc.hpp
index 1adcdd99..db846313 100644
--- a/include/gemmi/cifdoc.hpp
+++ b/include/gemmi/cifdoc.hpp
@@ -1,3 +1,11 @@
+/// @file
+/// @brief In-memory representation of a CIF (Crystallographic Information File) document.
+///
+/// This header defines the core data structures for parsing and manipulating CIF files.
+/// It provides a document model that can represent both traditional CIF and mmCIF (macromolecular CIF)
+/// formats, as well as alternative serializations like CIF-JSON or mmJSON.
+/// The model consists of blocks, items (tag-value pairs or loops), and supports frame nesting.
+
 // Copyright 2017 Global Phasing Ltd.
 //
 // struct Document that represents the CIF file (but can also be
@@ -33,11 +41,17 @@ namespace cif {
 using std::size_t;
 using gemmi::fail;
 
+/// @brief Discriminator for CIF items: single tag-value pairs, loops, or frames.
 enum class ItemType : unsigned char {
+  /// A single tag-value pair (e.g., `_cell.length_a  10.5`)
   Pair,
+  /// A loop with tags (column headers) and values in row-major storage
   Loop,
+  /// A save frame (nested block); used in CIF to define templates or additional metadata
   Frame,
+  /// A comment item (prefix-preserved in output, not validated for syntax)
   Comment,
+  /// Placeholder for a logically removed item; storage not reclaimed
   Erased,
 };
 
@@ -124,48 +138,90 @@ struct LoopArg {};
 struct FrameArg { std::string str; };
 struct CommentArg { std::string str; };
 
+/// @brief A tabular loop structure: tags (column names) and flat row-major values.
+///
+/// In CIF syntax, a loop is a compact representation of a table with named columns:
+/// ```
+/// loop_
+///   _category.tag1  _category.tag2  _category.tag3
+///   value1a        value2a         value3a
+///   value1b        value2b         value3b
+/// ```
+/// Internally, the tag names are stored in `tags` and all values are stored sequentially
+/// in `values` using row-major layout: for N columns and M rows, `values.size() == N*M`,
+/// and element at (row r, column c) is at index `r * N + c`.
 struct Loop {
+  /// Column header names (tags), typically with a common prefix (e.g., `_atom_site.`)
   std::vector<std::string> tags;
+  /// All values in row-major order: consecutive `tags.size()` elements form one row.
+  /// Invariant: `values.size() % tags.size() == 0`.
   std::vector<std::string> values;
 
-  // search and access
+  /// @brief Find a tag by case-insensitive match.
+  /// @param lctag Tag name converted to lowercase.
+  /// @return Column index (0-based) if found; -1 if not found.
   int find_tag_lc(const std::string& lctag) const {
     auto f = std::find_if(tags.begin(), tags.end(),
         [&lctag](const std::string& t) { return gemmi::iequal(t, lctag); });
     return f == tags.end() ? -1 : f - tags.begin();
   }
+  /// @brief Find a tag by case-insensitive match.
+  /// @param tag Tag name (converted to lowercase internally).
+  /// @return Column index (0-based) if found; -1 if not found.
   int find_tag(const std::string& tag) const {
     return find_tag_lc(gemmi::to_lower(tag));
   }
+  /// @brief Check if a tag exists (case-insensitive).
   bool has_tag(const std::string& tag) const { return find_tag(tag) != -1; }
+  /// @brief Number of columns in this loop.
   size_t width() const { return tags.size(); }
+  /// @brief Number of rows in this loop.
   size_t length() const { return values.size() / tags.size(); }
 
+  /// @brief Direct access to a value by row and column index (row-major layout).
+  /// @param row Row index (0-based).
+  /// @param col Column index (0-based).
+  /// @return Reference to the value at (row, col).
   std::string& val(size_t row, size_t col) { return values[row * tags.size() + col]; }
+  /// @brief Const overload of val().
   const std::string& val(size_t row, size_t col) const {
     return const_cast<Loop*>(this)->val(row, col);
   }
 
+  /// @brief Clear all tags and values from this loop.
   void clear() { tags.clear(); values.clear(); }
 
+  /// @brief Insert values into the loop, optionally at a specific row position.
+  /// @tparam T Container type with begin()/end() iterators (e.g., std::vector, std::initializer_list).
+  /// @param new_values Container of strings to insert.
+  /// @param pos Row position to insert at (-1 appends at end).
   template <typename T> void add_values(T new_values, int pos=-1) {
     auto it = values.end();
     if (pos >= 0 && pos * width() < values.size())
       it = values.begin() + pos * tags.size();
     values.insert(it, new_values.begin(), new_values.end());
   }
+  /// @brief Overload for initializer_list.
   void add_values(std::initializer_list<std::string> new_values, int pos=-1) {
     add_values<std::initializer_list<std::string>>(new_values, pos);
   }
+  /// @brief Add a complete row to the loop (must match column count).
+  /// @tparam T Container with begin()/end() iterators.
+  /// @param new_values Container of strings; size must equal `width()`.
+  /// @param pos Row position to insert at (-1 appends at end).
+  /// @throws std::runtime_error if new_values.size() != tags.size().
   template <typename T> void add_row(T new_values, int pos=-1) {
     if (new_values.size() != tags.size())
       fail("add_row(): wrong row length.");
     add_values<T>(new_values, pos);
   }
+  /// @brief Overload for initializer_list.
   void add_row(std::initializer_list<std::string> new_values, int pos=-1) {
     add_row<std::initializer_list<std::string>>(new_values, pos);
   }
-  // comments are added relying on how cif writing works
+  /// @brief Add a comment prefix to the first value of a row, then add the row.
+  /// @param ss Initializer list with comment string at index 0, then `width()` value strings.
+  /// @throws std::runtime_error if ss.size() != tags.size() + 1.
   void add_comment_and_row(std::initializer_list<std::string> ss) {
     if (ss.size() != tags.size() + 1)
       fail("add_comment_and_row(): wrong row length.");
@@ -173,13 +229,17 @@ struct Loop {
     vec[0] = cat('#', *ss.begin(), '\n', vec[0]);
     add_row(vec);
   }
+  /// @brief Remove the last row from the loop.
+  /// @throws std::runtime_error if the loop is already empty.
   void pop_row() {
     if (values.size() < tags.size())
       fail("pop_row() called on empty Loop");
     values.resize(values.size() - tags.size());
   }
 
-  // the arguments must be valid row indices
+  /// @brief Move a row to a different position within the loop.
+  /// @param old_pos Current row index (0-based); must be < length().
+  /// @param new_pos Target row index (0-based); must be < length().
   void move_row(int old_pos, int new_pos) {
     size_t w = width();
     auto src = values.begin() + old_pos * w;
@@ -190,7 +250,10 @@ struct Loop {
       std::rotate(dst, src, src+w);
   }
 
-  // column_names are not checked for duplicates nor for category name
+  /// @brief Add new columns with an initial fill value.
+  /// @param column_names Vector of new tag names (must start with '_').
+  /// @param value String value to fill for all existing rows.
+  /// @param pos Column position to insert at (-1 appends at end).
   void add_columns(const std::vector<std::string>& column_names,
                    const std::string& value, int pos=-1) {
     for (const std::string& name : column_names)
@@ -202,6 +265,9 @@ struct Loop {
     vector_insert_columns(values, old_width, len, column_names.size(), upos, value);
   }
 
+  /// @brief Remove a column by tag name.
+  /// @param column_name Tag to remove (case-insensitive search).
+  /// @throws std::runtime_error if tag not found.
   void remove_column(const std::string& column_name) {
     int n = find_tag(column_name);
     if (n == -1)
@@ -209,14 +275,19 @@ struct Loop {
     remove_column_at(n);
   }
 
-  /// \pre: n < tags.size()
+  /// @brief Remove a column by index.
+  /// @param n Column index; must be < tags.size().
   void remove_column_at(size_t n) {
     tags.erase(tags.begin() + n);
     vector_remove_column(values, tags.size(), n);
   }
 
+  /// @brief Replace all values with columns from a vector of column vectors.
+  /// @param columns Vector of columns; size must equal width(), each column must equal length().
   void set_all_values(std::vector<std::vector<std::string>> columns);
 
+  /// @brief Extract the common prefix from all tags in this loop.
+  /// @return Longest prefix that all tags share (case-insensitive).
   std::string common_prefix() const {
     if (tags.empty())
       return {};
@@ -235,30 +306,55 @@ struct Loop {
 struct Item;
 struct Block;
 
-// Accessor to a specific loop column, or to a single value from a Pair.
+/// @brief A view into a single column of a Loop, or a single Pair value.
+///
+/// Provides array-like access to a sequence of values from either a loop column or a pair value.
+/// Acts as both a reference (can be modified through operator[]) and an iterable container.
 class Column {
 public:
+  /// @brief Construct an empty/null column.
   Column() : item_(nullptr) {}
+  /// @brief Construct a column view for a specific item and column index.
+  /// @param item Pointer to an Item (must be Loop or Pair type).
+  /// @param col Column index; for Loop, this is the column position; for Pair, should be 0.
   Column(Item* item, size_t col) : item_(item), col_(col) {}
+  /// @brief Iterator type for strided traversal of column values.
   using iterator = StrideIter<std::string>;
+  /// @brief Begin iterator; provides access to the first value in the column.
   iterator begin();
+  /// @brief End iterator; one-past-the-last value.
   iterator end();
+  /// @brief Const iterator type.
   using const_iterator = StrideIter<const std::string>;
+  /// @brief Const begin iterator.
   const_iterator begin() const { return const_cast<Column*>(this)->begin(); }
+  /// @brief Const end iterator.
   const_iterator end() const { return const_cast<Column*>(this)->end(); }
 
+  /// @brief Get the underlying Loop, if this column comes from a Loop item; nullptr otherwise.
   Loop* get_loop() const;
+  /// @brief Get the tag (column header) string for this column.
+  /// @return Pointer to the tag string (valid as long as the Item is alive).
   std::string* get_tag();
+  /// @brief Const overload of get_tag().
   const std::string* get_tag() const {
     return const_cast<Column*>(this)->get_tag();
   }
+  /// @brief Number of values in this column.
+  /// @return Loop length if from a Loop; 1 if from a Pair; 0 if null.
   int length() const {
     if (const Loop* loop = get_loop())
       return loop->length();
     return item_ ? 1 : 0;
   }
+  /// @brief Check if this column is valid (not null).
   explicit operator bool() const { return item_ != nullptr ; }
+  /// @brief Access a value by index (0-based; for Pair, only index 0 is valid).
   std::string& operator[](int n);
+  /// @brief Safe access with bounds checking and negative indexing support.
+  /// @param n Index (negative indices count from end).
+  /// @return Reference to the value.
+  /// @throws std::out_of_range if index is out of bounds.
   std::string& at(int n) {
     if (n < 0)
       n += length();
@@ -267,67 +363,97 @@ class Column {
           " in Column with length " + std::to_string(length()));
     return operator[](n);
   }
+  /// @brief Const overload of at().
   const std::string& at(int n) const {
     return const_cast<Column*>(this)->at(n);
   }
 
+  /// @brief Get a CIF-unquoted string value (removes quotes/semicolons).
   std::string str(int n) const { return as_string(at(n)); }
+  /// @brief Get const pointer to the underlying Item.
   const Item* item() const { return item_; }
+  /// @brief Get mutable pointer to the underlying Item.
   Item* item() { return item_; }
+  /// @brief Get the column index within the Loop (or 0 for Pair).
   size_t col() const { return col_; }
 
+  /// @brief Erase this column from its item (removes from Loop or erases Pair).
   void erase();
 
 private:
-  Item* item_;
-  size_t col_;  // for loop this is a column index in item_->loop
+  Item* item_;           ///< Pointer to the Item (Loop or Pair).
+  size_t col_;           ///< Column index in the Loop, or 0 for Pair.
 };
 
-// Some values can be given either in loop or as tag-value pairs.
-// The latter case is equivalent to a loop with a single row.
-// We optimized for loops, and in case of tag-values we copy the values
-// into the `values` vector.
+/// @brief A unified view of data as either a loop (multiple rows) or pairs (single row).
+///
+/// Some CIF data can be represented either way:
+/// - As a loop with multiple rows (efficient for large tables)
+/// - As separate tag-value pairs (equivalent to a loop with one row)
+///
+/// This struct abstracts both representations to provide uniform access through Row objects.
+/// It internally tracks column mappings and optimizes for the loop case.
 struct Table {
+  /// @brief Pointer to the Loop Item, or nullptr if data is in pairs.
   Item* loop_item;
+  /// @brief Reference to the Block containing the items.
   Block& bloc;
+  /// @brief Column position mappings: for each query column, the position in loop/pairs.
+  /// Negative position (-1) means the column is optional and absent.
   std::vector<int> positions;
+  /// @brief Length of the common tag prefix (e.g., `_atom_site.` length).
   size_t prefix_length;
 
+  /// @brief A single row of the table, providing key-value access to columns.
   struct Row {
+    /// @brief Reference to the parent Table.
     Table& tab;
+    /// @brief Row index (-1 represents the tag row itself).
     int row_index;
 
+    /// @brief Unsafe access: position must be valid (>=0).
     std::string& value_at_unsafe(int pos);
+    /// @brief Safe access by position; throws if position is -1 (optional column absent).
     std::string& value_at(int pos) {
       if (pos == -1)
         throw std::out_of_range("Cannot access missing optional tag.");
       return value_at_unsafe(pos);
     }
+    /// @brief Const overload of value_at().
     const std::string& value_at(int pos) const {
       return const_cast<Row*>(this)->value_at(pos);
     }
 
+    /// @brief Access by column index in the table query (with bounds checking).
     std::string& at(int n) {
       return value_at(tab.positions.at(n < 0 ? n + size() : n));
     }
+    /// @brief Const overload of at().
     const std::string& at(int n) const { return const_cast<Row*>(this)->at(n); }
 
+    /// @brief Unchecked access by column index.
     std::string& operator[](size_t n);
+    /// @brief Const overload.
     const std::string& operator[](size_t n) const {
       return const_cast<Row*>(this)->operator[](n);
     }
 
+    /// @brief Pointer-based access to optional columns (nullptr if absent).
     std::string* ptr_at(int n) {
       int pos = tab.positions.at(n < 0 ? n + size() : n);
       return pos >= 0 ? &value_at(pos) : nullptr;
     }
+    /// @brief Const overload of ptr_at().
     const std::string* ptr_at(int n) const {
       return const_cast<Row*>(this)->ptr_at(n);
     }
 
+    /// @brief Check if a column is present.
     bool has(size_t n) const { return tab.positions.at(n) >= 0; }
+    /// @brief Check if a column is present and has a non-null value.
     bool has2(size_t n) const { return has(n) && !cif::is_null(operator[](n)); }
 
+    /// @brief Return the first non-null value among two columns, or a null placeholder.
     const std::string& one_of(size_t n1, size_t n2) const {
       static const std::string nul(1, '.');
       if (has2(n1))
@@ -337,48 +463,72 @@ struct Table {
       return nul;
     }
 
+    /// @brief Number of columns in the table query.
     size_t size() const { return tab.width(); }
 
+    /// @brief Get a CIF-unquoted string value.
     std::string str(int n) const { return as_string(at(n)); }
 
+    /// @brief Iterator type for traversing columns in this row.
     using iterator = IndirectIter<Row, std::string>;
+    /// @brief Const iterator type.
     using const_iterator = IndirectIter<const Row, const std::string>;
+    /// @brief Begin iterator.
     iterator begin() { return iterator({this, tab.positions.begin()}); }
+    /// @brief End iterator.
     iterator end() { return iterator({this, tab.positions.end()}); }
+    /// @brief Const begin iterator.
     const_iterator begin() const {
       return const_iterator({this, tab.positions.begin()});
     }
+    /// @brief Const end iterator.
     const_iterator end() const {
       return const_iterator({this, tab.positions.end()});
     }
   };
 
+  /// @brief Get the underlying Loop, if this table is loop-based.
   Loop* get_loop();
+  /// @brief Check if this table is valid (has at least one column).
   bool ok() const { return !positions.empty(); }
+  /// @brief Number of columns in the table query.
   size_t width() const { return positions.size(); }
+  /// @brief Number of rows in this table.
   size_t length() const;
+  /// @brief Alias for length().
   size_t size() const { return length(); }
+  /// @brief Check if column n is present (not -1).
   bool has_column(int n) const { return ok() && positions.at(n) >= 0; }
+  /// @brief Access the tag row (row_index == -1).
   Row tags() { return Row{*this, -1}; }
+  /// @brief Access a data row by index.
   Row operator[](int n) { return Row{*this, n}; }
 
+  /// @brief Validate and normalize a row index (supports negative indexing).
+  /// @param n Index to check (modified in-place).
+  /// @throws std::out_of_range if index is invalid.
   void at_check(int& n) const {
     if (n < 0)
       n += length();
     if (n < 0 || static_cast<size_t>(n) >= length())
       throw std::out_of_range("No row with index " + std::to_string(n));
   }
+  /// @brief Safe row access with bounds checking.
   Row at(int n) {
     at_check(n);
     return (*this)[n];
   }
 
+  /// @brief Get the single row of a one-row table.
+  /// @return The first (and only) row.
+  /// @throws std::runtime_error if table has != 1 row.
   Row one() {
     if (length() != 1)
       fail("Expected one value, found " + std::to_string(length()));
     return (*this)[0];
   }
 
+  /// @brief Get the common category prefix for this table (e.g., `_atom_site`).
   std::string get_prefix() const {
     for (int pos : positions)
       if (pos >= 0)
@@ -387,15 +537,29 @@ struct Table {
     fail("The table has no columns.");
   }
 
+  /// @brief Find the first row where the first column matches a value.
+  /// @param s String value to search for (compared with as_string unquoting).
+  /// @return The matching row.
+  /// @throws std::runtime_error if no row matches.
   Row find_row(const std::string& s);
 
+  /// @brief Append a row with values matching the table columns.
+  /// @tparam T Container type with begin()/end().
+  /// @param new_values Container of strings; size must equal width().
   template <typename T> void append_row(const T& new_values);
+  /// @brief Overload for initializer_list.
   void append_row(std::initializer_list<std::string> new_values) {
     append_row<std::initializer_list<std::string>>(new_values);
   }
+  /// @brief Remove a single row.
   void remove_row(int row_index) { remove_rows(row_index, row_index+1); }
+  /// @brief Remove rows [start, end).
   void remove_rows(int start, int end);
+  /// @brief Create a Column view for a position.
   Column column_at_pos(int pos);
+  /// @brief Get a Column view by table column index.
+  /// @param n Column index in the query.
+  /// @throws std::runtime_error if the column is absent (position -1).
   Column column(int n) {
     int pos = positions.at(n);
     if (pos == -1)
@@ -403,6 +567,7 @@ struct Table {
     return column_at_pos(pos);
   }
 
+  /// @brief Move a row to a different position.
   void move_row(int old_pos, int new_pos) {
     at_check(old_pos);
     at_check(new_pos);
@@ -410,7 +575,10 @@ struct Table {
       loop->move_row(old_pos, new_pos);
   }
 
-  // prefix is optional
+  /// @brief Find a column by tag name (supports prefix matching).
+  /// @param tag Column name to search for (case-insensitive).
+  /// @return Position of the matching column.
+  /// @throws std::runtime_error if tag not found.
   int find_column_position(const std::string& tag) const {
     std::string lctag = gemmi::to_lower(tag);
     Row tag_row = const_cast<Table*>(this)->tags();
@@ -423,16 +591,18 @@ struct Table {
     fail("Column name not found: " + tag);
   }
 
+  /// @brief Get a Column view by tag name.
   Column find_column(const std::string& tag) {
     return column_at_pos(find_column_position(tag));
   }
 
+  /// @brief Erase this table (remove all its items from the block).
   void erase();
 
-  /// if it's pairs, convert it to loop
+  /// @brief Ensure data is in loop form (convert from pairs if needed).
   void ensure_loop();
 
-  // It is not a proper input iterator, but just enough for using range-for.
+  /// @brief Iterator for range-based for loops over rows.
   struct iterator {
     Table& parent;
     int index;
@@ -442,37 +612,85 @@ struct Table {
     Row operator*() { return parent[index]; }
     const std::string& get(int n) const { return parent[index].at(n); }
   };
+  /// @brief Begin iterator for rows.
   iterator begin() { return iterator{*this, 0}; }
+  /// @brief End iterator for rows.
   iterator end() { return iterator{*this, (int)length()}; }
 };
 
+/// @brief A CIF data block, containing tags (pairs), loops, and nested frames.
+///
+/// In CIF syntax, a block starts with `data_blockname` and contains items:
+/// - Tag-value pairs: `_tag value`
+/// - Loops: `loop_ _tag1 _tag2 ... value1a value2a value1b value2b ...`
+/// - Frames: `save_framename ... save_`
+///
+/// Blocks are case-insensitive for tag lookup (but case is preserved in output).
 struct Block {
+  /// @brief Block name (e.g., "structure" in `data_structure`).
   std::string name;
+  /// @brief Items in this block (pairs, loops, frames, comments).
   std::vector<Item> items;
 
+  /// @brief Construct a named block.
   explicit Block(const std::string& name_);
+  /// @brief Construct an unnamed block.
   Block();
 
+  /// @brief Swap contents with another block.
   void swap(Block& o) noexcept { name.swap(o.name); items.swap(o.items); }
   // access functions
+  /// @brief Find an Item that is a tag-value pair by tag name.
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Pointer to the Item, or nullptr if not found or not a Pair.
   const Item* find_pair_item(const std::string& tag) const;
+  /// @brief Find a tag-value pair (Pair).
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Pointer to the Pair, or nullptr if not found.
   const Pair* find_pair(const std::string& tag) const;
+  /// @brief Find a loop containing a tag and get a Column view.
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Column view if found and item is a Loop; empty Column otherwise.
   Column find_loop(const std::string& tag);
+  /// @brief Find an Item that is a loop containing a tag.
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Pointer to the Item, or nullptr if not found.
   const Item* find_loop_item(const std::string& tag) const;
+  /// @brief Find a single value (from Pair or first row of Loop with single column).
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Pointer to the value string, or nullptr if not found.
   const std::string* find_value(const std::string& tag) const;
+  /// @brief Find all values with a tag (Column from Loop or Pair).
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Column view (empty if not found).
   Column find_values(const std::string& tag);
+  /// @brief Check if a tag exists in this block.
   bool has_tag(const std::string& tag) const {
     return const_cast<Block*>(this)->find_values(tag).item() != nullptr;
   }
+  /// @brief Check if a tag exists and has at least one non-null value.
   bool has_any_value(const std::string& tag) const {
     Column c = const_cast<Block*>(this)->find_values(tag);
     return c.item() != nullptr && !std::all_of(c.begin(), c.end(), is_null);
   }
+  /// @brief Find a table of values with specified tags (required tags).
+  /// @param prefix Common tag prefix (e.g., `_atom_site`).
+  /// @param tags Tags to search for (no '?' prefix; all required).
+  /// @return Table view (ok() == false if not all tags found).
   Table find(const std::string& prefix,
              const std::vector<std::string>& tags);
+  /// @brief Overload without prefix.
   Table find(const std::vector<std::string>& tags) { return find({}, tags); }
+  /// @brief Find a table with optional tags (all columns attempted).
+  /// @param prefix Common tag prefix.
+  /// @param tags Tags to search for; position -1 if not found.
+  /// @return Table view.
   Table find_any(const std::string& prefix,
                  const std::vector<std::string>& tags);
+  /// @brief Find a table, creating it if not found.
+  /// @param prefix Common tag prefix.
+  /// @param tags Tags; all are created as a new loop if not found.
+  /// @return Table view (ok() == true).
   Table find_or_add(const std::string& prefix, std::vector<std::string> tags) {
     Table t = find(prefix, tags);
     if (!t.ok()) {
@@ -483,26 +701,54 @@ struct Block {
     }
     return t;
   }
+  /// @brief Find a nested frame (save block) by name.
+  /// @param name Frame name (case-insensitive).
+  /// @return Pointer to the frame Block, or nullptr if not found.
   Block* find_frame(std::string name);
+  /// @brief Convert a Loop Item to a Table view.
   Table item_as_table(Item& item);
 
+  /// @brief Get the index of an item containing a tag.
+  /// @param tag Tag to search for (case-insensitive).
+  /// @return Index in the items vector.
+  /// @throws std::runtime_error if tag not found.
   size_t get_index(const std::string& tag) const;
 
   // modifying functions
+  /// @brief Set or update a tag-value pair.
+  /// @param tag Tag name (case-insensitive for lookup, but case is updated if tag is added).
+  /// @param value Value to set.
   void set_pair(const std::string& tag, const std::string& value);
 
+  /// @brief Initialize or get a loop for specified tags.
+  /// @param prefix Common tag prefix.
+  /// @param tags Column names (prefix added automatically).
+  /// @return Reference to the Loop (newly created if needed).
   Loop& init_loop(const std::string& prefix, std::vector<std::string> tags) {
     Table tab = find_any(prefix, tags);
     return setup_loop(std::move(tab), prefix, std::move(tags));
   }
 
+  /// @brief Move an item to a different position.
+  /// @param old_pos Current position (supports negative indexing).
+  /// @param new_pos Target position (supports negative indexing).
   void move_item(int old_pos, int new_pos);
 
   // mmCIF specific functions
+  /// @brief Get all category prefixes in mmCIF format (ending with '.').
   std::vector<std::string> get_mmcif_category_names() const;
+  /// @brief Find a category (all tags starting with prefix).
+  /// @param cat Category prefix (e.g., `_atom_site`; '.' is added if missing).
+  /// @return Table view with all matching tags.
   Table find_mmcif_category(std::string cat);
+  /// @brief Check if an mmCIF category exists.
+  /// @param cat Category prefix.
   bool has_mmcif_category(std::string cat) const;
 
+  /// @brief Initialize an mmCIF category loop.
+  /// @param cat Category prefix.
+  /// @param tags Column names (category prefix added automatically).
+  /// @return Reference to the Loop.
   Loop& init_mmcif_loop(std::string cat, std::vector<std::string> tags) {
     ensure_mmcif_category(cat);  // modifies cat
     return setup_loop(find_mmcif_category(cat), cat, std::move(tags));
@@ -516,52 +762,78 @@ struct Block {
 };
 
 
+/// @brief A single item in a CIF block: a pair, loop, frame, comment, or erased marker.
+///
+/// Uses a discriminated union (tagged with ItemType) to store different data types.
+/// For a Pair, stores tag and value. For a Loop, stores tags and values vectors.
+/// For a Frame, stores a nested Block.
 struct Item {
+  /// @brief The type of item (discriminator for the union).
   ItemType type;
+  /// @brief Source line number where this item was parsed (or -1 if not from parsing).
   int line_number = -1;
+  /// @brief Union storing the actual data (only one is valid based on type).
   union {
+    /// @brief For Pair items: [tag, value].
     Pair pair;
+    /// @brief For Loop items: tags and values.
     Loop loop;
+    /// @brief For Frame items: nested save frame Block.
     Block frame;
   };
 
+  /// @brief Construct an erased (empty) item.
   Item() : type(ItemType::Erased) {}
+  /// @brief Construct a Loop item.
   explicit Item(LoopArg)
     : type{ItemType::Loop}, loop{} {}
+  /// @brief Construct a Pair with a tag (value empty).
   explicit Item(std::string&& t)
     : type{ItemType::Pair}, pair{{std::move(t), std::string()}} {}
+  /// @brief Construct a Pair with tag and value.
   Item(const std::string& t, const std::string& v)
     : type{ItemType::Pair}, pair{{t, v}} {}
+  /// @brief Construct a Frame from a FrameArg.
   explicit Item(FrameArg&& frame_arg)
     : type{ItemType::Frame}, frame(frame_arg.str) {}
+  /// @brief Construct a Comment from a CommentArg.
   explicit Item(CommentArg&& comment)
     : type{ItemType::Comment}, pair{{std::string(), std::move(comment.str)}} {}
 
+  /// @brief Move constructor.
   Item(Item&& o) noexcept
       : type(o.type), line_number(o.line_number) {
     move_value(std::move(o));
   }
+  /// @brief Copy constructor.
   Item(const Item& o)
       : type(o.type), line_number(o.line_number) {
     copy_value(o);
   }
 
+  /// @brief Assignment operator (move-based).
   Item& operator=(Item o) { set_value(std::move(o)); return *this; }
 
+  /// @brief Destructor (calls destruct on the active union member).
   ~Item() { destruct(); }
 
+  /// @brief Mark this item as erased without freeing underlying storage.
+  /// Changes type to Erased; the union memory is left as-is.
   void erase() {
     destruct();
     type = ItemType::Erased;
   }
 
-  // case-insensitive, the prefix should be lower-case
+  /// @brief Check if this item's tag(s) start with a prefix (case-insensitive).
+  /// @param prefix Prefix to match (should be lowercase).
+  /// @return True if the first tag starts with prefix.
   bool has_prefix(const std::string& prefix) const {
     return (type == ItemType::Pair && gemmi::istarts_with(pair[0], prefix)) ||
            (type == ItemType::Loop && !loop.tags.empty() &&
             gemmi::istarts_with(loop.tags[0], prefix));
   }
 
+  /// @brief Replace this item's value with another item (may change type).
   void set_value(Item&& o) {
     if (type == o.type) {
       switch (type) {
@@ -1058,13 +1330,26 @@ inline bool Block::has_mmcif_category(std::string cat) const {
   return false;
 }
 
+/// @brief A parsed CIF file: a collection of blocks with optional metadata.
+///
+/// Represents the complete document structure after parsing a CIF file.
+/// Contains one or more data blocks, each with tag-value pairs, loops, and frames.
 struct Document {
+  /// @brief Source filename or identifier (for error messages).
   std::string source;
+  /// @brief All blocks in the document (data blocks).
   std::vector<Block> blocks;
 
-  // implementation detail: items of the currently parsed block or frame
+  /// @brief Implementation detail: pointer to items of current block during parsing.
+  /// (Used internally by the parser; not for public use.)
   std::vector<Item>* items_ = nullptr;
 
+  /// @brief Add a new block to the document.
+  /// @param name Block name (must be unique).
+  /// @param pos Position to insert (-1 appends at end).
+  /// @return Reference to the new Block.
+  /// @throws std::runtime_error if name already exists.
+  /// @throws std::out_of_range if pos is invalid.
   Block& add_new_block(const std::string& name, int pos=-1) {
     if (find_block(name))
       fail("Block with such name already exists: " + name);
@@ -1073,28 +1358,36 @@ struct Document {
     return *blocks.emplace(pos < 0 ? blocks.end() : blocks.begin() + pos, name);
   }
 
+  /// @brief Clear all blocks and source info.
   void clear() noexcept {
     source.clear();
     blocks.clear();
     items_ = nullptr;
   }
 
-  // returns blocks[0] if the document has exactly one block (like mmCIF)
+  /// @brief Get the single block from a one-block document (typical for mmCIF).
+  /// @return Reference to blocks[0].
+  /// @throws std::runtime_error if document has != 1 block.
   Block& sole_block() {
     if (blocks.size() > 1)
       fail("single data block expected, got " + std::to_string(blocks.size()));
     return blocks.at(0);
   }
+  /// @brief Const overload of sole_block().
   const Block& sole_block() const {
     return const_cast<Document*>(this)->sole_block();
   }
 
+  /// @brief Find a block by name (case-sensitive).
+  /// @param name Block name.
+  /// @return Pointer to the Block, or nullptr if not found.
   Block* find_block(const std::string& name) {
     for (Block& b : blocks)
       if (b.name == name)
         return &b;
     return nullptr;
   }
+  /// @brief Const overload of find_block().
   const Block* find_block(const std::string& name) const {
     return const_cast<Document*>(this)->find_block(name);
   }
diff --git a/include/gemmi/ddl.hpp b/include/gemmi/ddl.hpp
index 47c70a21..18c2e089 100644
--- a/include/gemmi/ddl.hpp
+++ b/include/gemmi/ddl.hpp
@@ -1,6 +1,7 @@
 // Copyright Global Phasing Ltd.
-//
-// Using DDL1/DDL2 dictionaries to validate CIF/mmCIF files.
+
+/// @file
+/// @brief DDL1/DDL2 dictionary-based validation of CIF and mmCIF files.
 
 #ifndef GEMMI_DDL_HPP_
 #define GEMMI_DDL_HPP_
@@ -13,49 +14,139 @@
 
 namespace gemmi { namespace cif {
 
-/// Represents DDL1 or DDL2 dictionary (ontology).
+/// Represents a CIF dictionary (DDL1 or DDL2 ontology) for validation.
+///
+/// A DDL (Data Definition Language) dictionary defines the structure, constraints,
+/// and validation rules for CIF data. This class can load and use either:
+/// - **DDL1** dictionaries (IUCr core, chemical structures)
+/// - **DDL2** dictionaries (macromolecular CIF / mmCIF, used by PDB)
+///
+/// After loading a dictionary with read_ddl(), you can validate CIF documents
+/// against it to check for missing mandatory items, type violations, enumeration
+/// violations, unique key violations, and other data integrity issues.
 struct GEMMI_DLL Ddl {
-  /// member functions use logger's callback and threshold for output
+  /// Logger for validation messages and warnings.
+  /// Member functions use this logger's callback and threshold settings for output.
   Logger logger;
-  // configuration - some of these flag must be set before read_ddl()
+
+  // Configuration flags - set these before calling read_ddl()
+
+  /// Report unknown tags (tags not defined in the dictionary).
+  /// Useful for catching typos in tag names.
   bool print_unknown_tags = true;
-  // these flags below are relevant to DDL2 only
+
+  // The following flags apply to DDL2 dictionaries only
+
+  /// Enable validation using regular expression patterns (DDL2 _item_type.code).
   bool use_regex = true;
+
+  /// Use context-dependent validation rules (DDL2).
+  /// If true, validates items in specific category contexts.
   bool use_context = false;
+
+  /// Use parent-child item relationships (DDL2 _item_linked).
+  /// If true, enforces dependencies between items.
   bool use_parents = false;
+
+  /// Validate mandatory items (DDL2 _item.mandatory_code).
+  /// If true, reports missing items marked as mandatory.
   bool use_mandatory = true;
+
+  /// Validate unique keys in loops (DDL2 _item_linked.key_id).
+  /// If true, checks that unique key values don't repeat.
   bool use_unique_keys = true;
-  // instead of _item_type.code, _pdbx_item_enumeration.value, and _item_range
-  // use _pdbx-prefixed equivalents (_pdbx_item_type.code, etc).
+
+  /// Use PDBx deposition-specific validation checks.
+  /// If true, uses _pdbx-prefixed dictionary items instead of standard ones
+  /// (_pdbx_item_type.code instead of _item_type.code, etc.).
+  /// This mode is typically used during structure deposition to PDB.
   bool use_deposition_checks = false;
 
-  // variables set when reading DLL; normally, no need to change them
-  int major_version = 0;  // currently 1 and 2 are supported
-  std::string dict_name;  // _dictionary_name or _dictionary.title
-  std::string dict_version;  // _dictionary_version or _dictionary.version
+  // Read-only fields set when reading a dictionary
+
+  /// Major version of the loaded DDL (1 or 2).
+  /// Read from _dictionary_version or similar field in the dictionary.
+  int major_version = 0;
+
+  /// Name of the dictionary (e.g., "cif_core.dic" or "mmcif_pdbx_v50").
+  /// Read from _dictionary_name (DDL1) or _dictionary.title (DDL2).
+  std::string dict_name;
+
+  /// Version string of the dictionary (e.g., "2.0.11").
+  /// Read from _dictionary_version or _dictionary.version.
+  std::string dict_version;
 
   Ddl() = default;
-  // MSVC with dllexport attempts to export all non-deleted member functions,
-  // failing with Error C2280 (because of ddl_docs_) if we don't delete these:
+
+  // Copy/assignment deleted: MSVC dllexport cannot handle the unique_ptr
+  // member in ddl_docs_. Instances should be moved or held in stable storage.
   Ddl(Ddl const&) = delete;
   Ddl& operator=(Ddl const&) = delete;
 
-  /// it moves doc to ddl_docs_ to control lifetime and prevent modifications
+  /// Load a DDL dictionary into this validator.
+  ///
+  /// Parses a DDL1 or DDL2 dictionary document and indexes it for validation.
+  /// The document is moved into internal storage to manage its lifetime.
+  /// The dictionary version (DDL1 or DDL2) is auto-detected.
+  ///
+  /// Configuration flags (e.g., use_mandatory, use_regex) should be set
+  /// before calling this function.
+  ///
+  /// @param doc CIF document containing a DDL dictionary (will be moved)
   void read_ddl(cif::Document&& doc);
 
+  /// Validate all blocks in a CIF document against this dictionary.
+  ///
+  /// Checks all blocks in the document and reports validation errors
+  /// via the configured logger.
+  ///
+  /// @param doc The CIF document to validate
+  /// @return true if validation passes, false if errors are found
+  ///
+  /// @see validate_block() to validate individual blocks
   bool validate_cif(const cif::Document& doc) const;
+
+  /// Validate a single CIF block against this dictionary.
+  ///
+  /// Performs all enabled validation checks on the block:
+  /// - Mandatory items (if use_mandatory=true)
+  /// - Item types and enumeration values
+  /// - Regular expression patterns (if use_regex=true)
+  /// - Unique keys (if use_unique_keys=true)
+  /// - Parent-child relationships (if use_parents=true)
+  /// - Unknown tags (if print_unknown_tags=true)
+  ///
+  /// @param b      The CIF block to validate
+  /// @param source Source identifier for error messages (e.g., block name or filename)
+  /// @return true if validation passes, false if errors are found
   bool validate_block(const cif::Block& b, const std::string& source) const;
 
+  /// Check audit conformance fields in a CIF document.
+  ///
+  /// Verifies that the document's audit records match dictionary expectations
+  /// (e.g., _audit_conform_dict_name, _audit_conform_dict_version).
+  /// Reports mismatches via the logger.
+  ///
+  /// @param doc The CIF document to check
   void check_audit_conform(const cif::Document& doc) const;
 
+  /// Access the regex patterns loaded from the dictionary.
+  ///
+  /// Returns a map of tag names to compiled regular expressions
+  /// that constrain the format of values for those tags (DDL2 validation).
+  ///
+  /// @return Map of regex patterns indexed by tag name
   const std::map<std::string, std::regex>& regexes() const { return regexes_; }
 
 private:
-  // items from DDL2 _pdbx_item_linked_group[_list]
+  /// Internal representation of DDL2 parent-child item relationships.
+  ///
+  /// Links parent and child tags that must be coordinated in the data.
+  /// Used for enforcing referential integrity (use_parents=true).
   struct ParentLink {
-    std::string group;
-    std::vector<std::string> child_tags;
-    std::vector<std::string> parent_tags;
+    std::string group;                   ///< Name of the linked group
+    std::vector<std::string> child_tags; ///< Child item tags
+    std::vector<std::string> parent_tags;///< Parent item tags
   };
 
   std::vector<std::unique_ptr<cif::Document>> ddl_docs_;
diff --git a/include/gemmi/json.hpp b/include/gemmi/json.hpp
index 4e20606f..191201d5 100644
--- a/include/gemmi/json.hpp
+++ b/include/gemmi/json.hpp
@@ -1,6 +1,7 @@
 // Copyright 2017 Global Phasing Ltd.
-//
-// Reading CIF-JSON (COMCIFS) and mmJSON (PDBj) formats into cif::Document.
+
+/// @file
+/// @brief Reading JSON formats (mmJSON and CIF-JSON) into CIF documents.
 
 #ifndef GEMMI_JSON_HPP_
 #define GEMMI_JSON_HPP_
@@ -12,15 +13,39 @@
 namespace gemmi {
 namespace cif {
 
-// reads mmJSON file mutating the input buffer as a side effect
+/// Parse mmJSON format from a buffer (with in-place mutation).
+///
+/// mmJSON is the macromolecular JSON format used by PDBj for structure data.
+/// This function parses JSON in-place, modifying the input buffer as a side effect
+/// for efficiency. If you need to preserve the original buffer, make a copy first.
+///
+/// @param buffer Pointer to buffer containing mmJSON data (will be modified)
+/// @param size   Number of bytes in the buffer
+/// @param name   Optional source name for error messages (default: "mmJSON")
+/// @return Parsed CIF document
 GEMMI_DLL Document read_mmjson_insitu(char* buffer, std::size_t size,
                                       const std::string& name="mmJSON");
 
+/// Read and parse an mmJSON file from disk.
+///
+/// Convenience function that loads the file into memory and parses it.
+/// The entire file is read into a buffer for parsing.
+///
+/// @param path Path to the mmJSON file (may end with .gz for gzip compression)
+/// @return Parsed CIF document
 inline Document read_mmjson_file(const std::string& path) {
   CharArray buffer = read_file_into_buffer(path);
   return read_mmjson_insitu(buffer.data(), buffer.size(), path);
 }
 
+/// Read and parse mmJSON from an input source (file or stream).
+///
+/// Template function supporting both file paths and stream inputs.
+/// Reads data from the input source into a buffer, then parses.
+///
+/// @tparam T An input type with is_stdin() and path() methods
+/// @param input The input source to read from
+/// @return Parsed CIF document
 template<typename T>
 Document read_mmjson(T&& input) {
   std::string name = input.is_stdin() ? "stdin" : input.path();
diff --git a/include/gemmi/numb.hpp b/include/gemmi/numb.hpp
index 54f09d5b..61c43f28 100644
--- a/include/gemmi/numb.hpp
+++ b/include/gemmi/numb.hpp
@@ -1,10 +1,7 @@
 // Copyright 2017 Global Phasing Ltd.
-//
-// Utilities for parsing CIF numbers (the CIF spec calls them 'numb').
-//
-// Numb - the numeric type in CIF - is a number with optional
-// standard uncertainty (s.u.) in brackets: 1.23(8).
-// Mmcif file do not use s.u. though - they define own numeric categories.
+
+/// @file
+/// @brief Parsing CIF numeric values (numb) with optional standard uncertainty.
 
 #ifndef GEMMI_NUMB_HPP_
 #define GEMMI_NUMB_HPP_
@@ -16,6 +13,28 @@
 namespace gemmi {
 namespace cif {
 
+/// Parse a CIF numeric value (numb), optionally including standard uncertainty.
+///
+/// In CIF format, numeric values (numb) can include optional standard uncertainty
+/// (s.u.) in parentheses, e.g., "1.23(8)" represents 1.23 with s.u. of 0.08.
+/// The s.u. information is parsed and skipped; only the numeric value is returned.
+///
+/// Note: mmCIF files typically do not use s.u. notation for numeric values;
+/// they define their own numeric data categories instead.
+///
+/// @param s   String containing the numeric value to parse
+/// @param nan Default return value if parsing fails (default: NaN)
+/// @return Parsed numeric value (with s.u. removed), or nan if invalid
+///
+/// @note The function accepts leading '+' signs and rejects NaN, Inf, and -Inf
+///       as they are not allowed in standard CIF format.
+///
+/// @example
+/// @code
+/// double d = as_number("1.234");        // returns 1.234
+/// double d = as_number("1.234(5)");     // returns 1.234 (s.u. ignored)
+/// double d = as_number("invalid");      // returns NAN
+/// @endcode
 inline double as_number(const std::string& s, double nan=NAN) {
   const char* start = s.data();
   const char* end = s.data() + s.size();
@@ -40,15 +59,32 @@ inline double as_number(const std::string& s, double nan=NAN) {
   return result.ptr == end ? d : nan;
 }
 
+/// Check if a string represents a valid CIF numeric value (numb).
+///
+/// @param s String to check
+/// @return true if the string is a valid CIF number, false otherwise
 inline bool is_numb(const std::string& s) {
   return !std::isnan(as_number(s));
 }
 
 
-// for use in templates (see also as_any() functions in cifdoc.hpp)
+// Template overloads for use in generic type conversion functions
+// (see also as_any() functions in cifdoc.hpp)
+
+/// Parse CIF numeric value as a float (template specialization).
+///
+/// @param s    String containing the numeric value
+/// @param null Fallback value if parsing fails
+/// @return Parsed float value, or null on failure
 inline float as_any(const std::string& s, float null) {
   return (float) as_number(s, null);
 }
+
+/// Parse CIF numeric value as a double (template specialization).
+///
+/// @param s    String containing the numeric value
+/// @param null Fallback value if parsing fails
+/// @return Parsed double value, or null on failure
 inline double as_any(const std::string& s, double null) {
   return as_number(s, null);
 }
diff --git a/include/gemmi/read_cif.hpp b/include/gemmi/read_cif.hpp
index 8a488884..b5fc022f 100644
--- a/include/gemmi/read_cif.hpp
+++ b/include/gemmi/read_cif.hpp
@@ -1,6 +1,7 @@
 // Copyright 2021 Global Phasing Ltd.
 //
-// Functions for reading possibly gzipped CIF files.
+/// @file
+/// @brief Reading possibly gzip-compressed CIF and JSON files.
 
 #ifndef GEMMI_READ_CIF_HPP_
 #define GEMMI_READ_CIF_HPP_
@@ -10,21 +11,76 @@
 
 namespace gemmi {
 
+/// Read a CIF file, optionally gzip-compressed, from disk.
+///
+/// @param path    Path to the CIF file (may end with .gz for gzip compression)
+/// @param check_level Syntax checking level (0=none, 1=moderate, 2=strict)
+/// @return Parsed CIF document
 GEMMI_DLL cif::Document read_cif_gz(const std::string& path, int check_level=1);
+
+/// Check CIF syntax without fully parsing the file.
+///
+/// Performs a quick syntax validation pass on a CIF file (optionally gzipped).
+///
+/// @param path    Path to the CIF file (may end with .gz for gzip compression)
+/// @param msg     If non-null, receives an error message if validation fails
+/// @return true if file syntax is valid, false otherwise
 GEMMI_DLL bool check_cif_syntax_gz(const std::string& path, std::string* msg);
+
+/// Read an mmJSON file (optionally gzip-compressed) from disk.
+///
+/// mmJSON is the JSON format used by PDBj for macromolecular CIF data.
+///
+/// @param path    Path to the mmJSON file (may end with .gz for gzip compression)
+/// @return Parsed CIF document
 GEMMI_DLL cif::Document read_mmjson_gz(const std::string& path);
+
+/// Read a file into a buffer, optionally decompressing if gzip-compressed.
+///
+/// @param path    Path to the file (may end with .gz for gzip compression)
+/// @return Buffer containing decompressed file contents
 GEMMI_DLL CharArray read_into_buffer_gz(const std::string& path);
+
+/// Parse a CIF document from a memory buffer.
+///
+/// @param data        Pointer to buffer containing CIF data
+/// @param size        Number of bytes to read
+/// @param name        Optional name for the source (used in error messages)
+/// @param check_level Syntax checking level (0=none, 1=moderate, 2=strict)
+/// @return Parsed CIF document
 GEMMI_DLL cif::Document read_cif_from_memory(const char* data, size_t size, const char* name,
                                              int check_level=1);
+
+/// Read only the first block from a CIF file, optionally gzip-compressed.
+///
+/// Useful for reading CIF files where only the first block is needed,
+/// potentially saving memory and parsing time.
+///
+/// @param path    Path to the CIF file (may end with .gz for gzip compression)
+/// @param limit   Maximum number of bytes to read from the file
+/// @return CIF document containing only the first block
 GEMMI_DLL cif::Document read_first_block_gz(const std::string& path, size_t limit);
 
-// cif::read_string() was moved here from cif.hpp to speed up compilation
+/// Read CIF data from a string.
+///
+/// This function was moved here from cif.hpp to speed up compilation.
+///
+/// @param data        CIF-formatted string
+/// @param check_level Syntax checking level (0=none, 1=moderate, 2=strict)
+/// @return Parsed CIF document
 namespace cif {
 inline Document read_string(const std::string& data, int check_level=1) {
   return read_cif_from_memory(data.data(), data.size(), "string", check_level);
 }
 }  // namespace cif
 
+/// Auto-detect and read either CIF or mmJSON format from a file.
+///
+/// Determines format by file extension (.json, .js for JSON; otherwise CIF).
+/// Handles gzip-compressed files transparently.
+///
+/// @param path Path to the file (may end with .gz for gzip compression)
+/// @return Parsed CIF document
 inline cif::Document read_cif_or_mmjson_gz(const std::string& path) {
   if (giends_with(path, "json") || giends_with(path, "js"))
     return read_mmjson_gz(path);
diff --git a/include/gemmi/to_cif.hpp b/include/gemmi/to_cif.hpp
index 09ef32ad..6e3fba5f 100644
--- a/include/gemmi/to_cif.hpp
+++ b/include/gemmi/to_cif.hpp
@@ -1,6 +1,7 @@
 // Copyright 2017 Global Phasing Ltd.
 
-// Writing cif::Document or its parts to std::ostream.
+/// @file
+/// @brief Writing CIF documents to output streams with configurable formatting.
 
 #ifndef GEMMI_TO_CIF_HPP_
 #define GEMMI_TO_CIF_HPP_
@@ -11,31 +12,44 @@
 namespace gemmi {
 namespace cif {
 
-/// deprecated, use cif::WriteOptions instead
+/// Deprecated output formatting style. Use cif::WriteOptions instead.
+///
+/// This enum is provided for backward compatibility. Each style
+/// corresponds to a particular WriteOptions configuration.
 enum class Style {
-  Simple,
-  NoBlankLines,
-  PreferPairs,  // write single-row loops as pairs
-  Pdbx,         // PreferPairs + put '#' (empty comments) between categories
-  Indent35,     // start values in pairs from 35th column
-  Aligned,      // columns in tables are left-aligned
+  Simple,       ///< Standard CIF format (default)
+  NoBlankLines, ///< Compact: no blank lines between categories
+  PreferPairs,  ///< Write single-row loops as pairs
+  Pdbx,         ///< PreferPairs + put '#' (empty comments) between categories
+  Indent35,     ///< Start values in pairs from 35th column
+  Aligned,      ///< Align columns in loops to fixed width
 };
 
+/// Options for writing CIF output.
+///
+/// Controls formatting, alignment, and output style of CIF documents.
 struct WriteOptions {
-  /// write single-row loops as pairs
+  /// Write single-row loops as tag-value pairs instead of loop constructs.
   bool prefer_pairs = false;
-  /// no blank lines between categories, only between blocks
+  /// Omit blank lines between categories (keep only between blocks).
   bool compact = false;
-  /// put '#' (empty comments) before/after categories
+  /// Insert '#' (empty comment lines) before and after categories.
+  /// This is a non-standard CIF extension.
   bool misuse_hash = false;
-  /// width reserved for tags in pairs (e.g. 34 = value starts at 35th column)
+  /// Width reserved for tags in pairs (0=no alignment, typical value 33-34).
+  /// If set, values start at column (align_pairs + 1).
+  /// Example: align_pairs=33 starts values at column 35.
   std::uint16_t align_pairs = 0;
-  /// if non-zero, determines max width of each column in a loop and aligns
-  /// all values to this width; the width is capped with the given value
+  /// Maximum column width in loops when aligning (0=no alignment).
+  /// If non-zero, all columns are padded to at most this width.
+  /// This produces more compact, readable loop output.
   std::uint16_t align_loops = 0;
 
   WriteOptions() {}
-  // implicit conversion from deprecated Style (for backward compatibility)
+
+  /// Implicit conversion from deprecated Style enum (for backward compatibility).
+  ///
+  /// @param style Legacy Style enum value to convert
   WriteOptions(Style style) {
     switch (style) {
       case Style::Simple:
@@ -59,6 +73,10 @@ struct WriteOptions {
         break;
     }
   }
+
+  /// Return a human-readable string representation of active options.
+  ///
+  /// @return Comma-separated list of enabled options (e.g., "prefer_pairs,compact")
   std::string str() const {
     std::string s;
     if (prefer_pairs)
@@ -77,16 +95,29 @@ struct WriteOptions {
   }
 };
 
-/// std::ostream with buffering. C++ streams are so slow that even primitive
-/// buffering makes it significantly more efficient.
+/// Buffered output stream wrapper for efficient CIF writing.
+///
+/// Wraps std::ostream with a 4KB buffer to significantly improve I/O performance
+/// when writing CIF documents. The buffer is automatically flushed on destruction
+/// and when it fills.
 class BufOstream {
 public:
+  /// Construct a buffered output stream.
+  /// @param os_ The underlying std::ostream to write to
   explicit BufOstream(std::ostream& os_) : os(os_), ptr(buf) {}
+
+  /// Destructor flushes remaining buffered data.
   ~BufOstream() { flush(); }
+
+  /// Flush all buffered data to the underlying stream.
   void flush() {
     os.write(buf, ptr - buf);
     ptr = buf;
   }
+
+  /// Write data to the buffer, flushing if necessary.
+  /// @param s    Pointer to data to write
+  /// @param len  Number of bytes to write
   void write(const char* s, size_t len) {
     constexpr int margin = sizeof(buf) - 512;
     if (ptr - buf + len > margin) {
@@ -99,13 +130,24 @@ class BufOstream {
     std::memcpy(ptr, s, len);
     ptr += len;
   }
+
+  /// Write a string to the buffer.
+  /// @param s The string to write
   void operator<<(const std::string& s) {
     write(s.c_str(), s.size());
   }
-  // below we don't check the buffer boundary, these functions add <512 bytes
+
+  // Note: The following functions assume writes are small (<512 bytes).
+  // No buffer boundary check is performed for performance.
+
+  /// Write a single character to the buffer.
+  /// @param c The character to write
   void put(char c) {
     *ptr++ = c;
   }
+
+  /// Write n space characters to the buffer (for padding/alignment).
+  /// @param n Number of spaces to write
   void pad(size_t n) {
     std::memset(ptr, ' ', n);
     ptr += n;
@@ -117,10 +159,11 @@ class BufOstream {
   char* ptr;
 };
 
-// CIF files are read in binary mode. It makes difference only for text fields.
-// If the text field with \r\n would be written as is in text mode on Windows
-// \r would get duplicated. As a workaround, here we convert \r\n to \n.
-// Hopefully \r that gets removed here is never meaningful.
+// Note: CIF files are read in binary mode. Text fields with \r\n line endings
+// are normalized to \n when writing to avoid duplication in Windows text mode.
+/// Write a text field, normalizing \\r\\n to \\n.
+/// @param os    Buffered output stream
+/// @param value The text field value to write
 inline void write_text_field(BufOstream& os, const std::string& value) {
   for (size_t pos = 0, end = 0; end != std::string::npos; pos = end + 1) {
     end = value.find("\r\n", pos);
@@ -238,6 +281,13 @@ inline bool should_be_separated_(const Item& a, const Item& b) {
   return adot != bdot || a.pair[0].compare(0, adot, b.pair[0], 0, adot) != 0;
 }
 
+/// Write a single CIF block to an output stream.
+///
+/// Writes a CIF data block with the specified formatting options.
+///
+/// @param os_     Output stream to write to
+/// @param block   The CIF block to write
+/// @param options Formatting options (see WriteOptions documentation)
 inline void write_cif_block_to_stream(std::ostream& os_, const Block& block,
                                       WriteOptions options=WriteOptions()) {
   BufOstream os(os_);
@@ -262,6 +312,14 @@ inline void write_cif_block_to_stream(std::ostream& os_, const Block& block,
     os.write("#\n", 2);
 }
 
+/// Write a CIF document to an output stream.
+///
+/// Writes a complete CIF document with all its blocks, using the specified
+/// formatting options. Blocks are separated by blank lines for readability.
+///
+/// @param os      Output stream to write to
+/// @param doc     The CIF document to write
+/// @param options Formatting options (see WriteOptions documentation)
 inline void write_cif_to_stream(std::ostream& os, const Document& doc,
                                 WriteOptions options=WriteOptions()) {
   bool first = true;
diff --git a/include/gemmi/to_json.hpp b/include/gemmi/to_json.hpp
index 3915cdd0..00bbddb9 100644
--- a/include/gemmi/to_json.hpp
+++ b/include/gemmi/to_json.hpp
@@ -1,6 +1,7 @@
 // Copyright 2017 Global Phasing Ltd.
 
-// Writing cif::Document or its parts as JSON (mmJSON, CIF-JSON, etc).
+/// @file
+/// @brief Writing CIF documents as JSON (mmJSON and CIF-JSON formats).
 
 #ifndef GEMMI_TO_JSON_HPP_
 #define GEMMI_TO_JSON_HPP_
@@ -10,16 +11,54 @@
 namespace gemmi {
 namespace cif {
 
+/// Options for writing CIF data as JSON.
+///
+/// Supports multiple JSON-based serialization formats for CIF data:
+/// - **CIF-JSON (COMCIFS)**: Standard JSON representation of CIF documents,
+///   supporting numbered values with uncertainties.
+/// - **mmJSON (PDBj)**: Specialized JSON format optimized for macromolecular
+///   CIF (mmCIF) data, with DDL2 category grouping and bare tags.
+///
+/// Choose between preset configurations (comcifs() or mmjson()) or
+/// configure individual options for custom output.
 struct JsonWriteOptions {
-  bool as_comcifs = false;  // conform to the COMCIFS CIF-JSON draft
-  bool group_ddl2_categories = false;  // for mmJSON
-  bool with_data_keyword = false;  // for mmJSON
-  bool bare_tags = false;  // "tag" instead of "_tag"
-  bool values_as_arrays = false;  // "_tag": ["value"]
-  bool lowercase_names = true; // write case-insensitive names as lower case
-  int quote_numbers = 1;  // 0=never (no s.u.), 1=mix, 2=always
-  std::string cif_dot = "null";  // how to convert '.' from CIF
+  /// Conform to the COMCIFS CIF-JSON draft specification.
+  /// If true, enables values_as_arrays, sets quote_numbers=2, and cif_dot="false".
+  bool as_comcifs = false;
 
+  /// Group items by DDL2 categories (for mmJSON compatibility).
+  /// Relevant mainly for mmJSON format.
+  bool group_ddl2_categories = false;
+
+  /// Include the mmJSON "data_" keyword wrapper.
+  /// Used in mmJSON format output.
+  bool with_data_keyword = false;
+
+  /// Use bare tag names (e.g., "tag" instead of "_tag").
+  /// Used in mmJSON and other compact formats.
+  bool bare_tags = false;
+
+  /// Represent all values as JSON arrays (e.g., "_tag": ["value"]).
+  /// Used in COMCIFS CIF-JSON and mmJSON; disabled if false.
+  bool values_as_arrays = false;
+
+  /// Write case-insensitive tag names in lowercase.
+  /// CIF tag names are case-insensitive; this normalizes them.
+  bool lowercase_names = true;
+
+  /// Control quoting of numeric values with uncertainty (s.u.).
+  /// - 0: Never quote numbers; s.u. information is lost (used for mmJSON)
+  /// - 1: Quote numbers only when they include s.u. (default, mixed mode)
+  /// - 2: Always quote numbers as strings (used for COMCIFS)
+  int quote_numbers = 1;
+
+  /// How to represent the CIF '.' (not-applicable) value in JSON.
+  /// Common choices: "null" (JSON null), "false" (boolean false, used in COMCIFS).
+  std::string cif_dot = "null";
+
+  /// Preset options for COMCIFS CIF-JSON format.
+  ///
+  /// @return JsonWriteOptions configured for standard CIF-JSON output
   static JsonWriteOptions comcifs() {
     JsonWriteOptions opt;
     opt.as_comcifs = true;
@@ -29,6 +68,12 @@ struct JsonWriteOptions {
     return opt;
   }
 
+  /// Preset options for mmJSON format (PDBj macromolecular JSON).
+  ///
+  /// mmJSON is used by PDBj for macromolecular structures.
+  /// It groups data by DDL2 categories and uses bare tag names.
+  ///
+  /// @return JsonWriteOptions configured for mmJSON output
   static JsonWriteOptions mmjson() {
     JsonWriteOptions opt;
     opt.group_ddl2_categories = true;
@@ -41,9 +86,26 @@ struct JsonWriteOptions {
   }
 };
 
+/// Write a CIF document as JSON to an output stream.
+///
+/// Serializes a CIF document in JSON format according to the specified options.
+/// See JsonWriteOptions for details on supported formats and customization.
+///
+/// @param os      Output stream to write to
+/// @param doc     The CIF document to write
+/// @param options Formatting and format selection options
 GEMMI_DLL void write_json_to_stream(std::ostream& os, const Document& doc,
                                     const JsonWriteOptions& options);
 
+/// Write a CIF document as mmJSON (PDBj macromolecular JSON) to an output stream.
+///
+/// Convenience function equivalent to:
+/// @code
+/// write_json_to_stream(os, doc, JsonWriteOptions::mmjson());
+/// @endcode
+///
+/// @param os  Output stream to write to
+/// @param doc The CIF document to write
 inline void write_mmjson_to_stream(std::ostream& os, const Document& doc) {
   write_json_to_stream(os, doc, JsonWriteOptions::mmjson());
 }