From 4566e4a78e46de17b9101fda068a7d3b60ccb61d Mon Sep 17 00:00:00 2001 From: Draco Date: Tue, 15 Jul 2025 11:03:30 -0400 Subject: [PATCH 01/27] feat: add eviction callback in LRU cache --- cache/lru/cache.go | 28 ++++++++++++++++++++++++++-- cache/lru/cache_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/cache/lru/cache.go b/cache/lru/cache.go index dd83ee0cca65..94adba0ba69d 100644 --- a/cache/lru/cache.go +++ b/cache/lru/cache.go @@ -20,6 +20,16 @@ type Cache[K comparable, V any] struct { lock sync.Mutex elements *linked.Hashmap[K, V] size int + + // onEvict is called with the key and value of an evicted entry, if set. + onEvict func(K, V) +} + +// SetOnEvict sets a callback to be called with the key and value of an evicted entry. +func (c *Cache[K, V]) SetOnEvict(cb func(K, V)) { + c.lock.Lock() + defer c.lock.Unlock() + c.onEvict = cb } func NewCache[K comparable, V any](size int) *Cache[K, V] { @@ -34,8 +44,11 @@ func (c *Cache[K, V]) Put(key K, value V) { defer c.lock.Unlock() if c.elements.Len() == c.size { - oldestKey, _, _ := c.elements.Oldest() + oldestKey, oldestValue, _ := c.elements.Oldest() c.elements.Delete(oldestKey) + if c.onEvict != nil { + c.onEvict(oldestKey, oldestValue) + } } c.elements.Put(key, value) } @@ -55,14 +68,25 @@ func (c *Cache[K, V]) Get(key K) (V, bool) { func (c *Cache[K, _]) Evict(key K) { c.lock.Lock() defer c.lock.Unlock() - c.elements.Delete(key) + if c.onEvict != nil { + value, _ := c.elements.Get(key) + c.onEvict(key, value) + } } func (c *Cache[_, _]) Flush() { c.lock.Lock() defer c.lock.Unlock() + // Call onEvict for each element before clearing + if c.onEvict != nil { + iter := c.elements.NewIterator() + for iter.Next() { + c.onEvict(iter.Key(), iter.Value()) + } + } + c.elements.Clear() } diff --git a/cache/lru/cache_test.go b/cache/lru/cache_test.go index 345ce42f1f76..907c1c24c48d 100644 --- a/cache/lru/cache_test.go +++ b/cache/lru/cache_test.go @@ -6,6 +6,8 @@ package lru import ( "testing" + "github.com/stretchr/testify/require" + "github.com/ava-labs/avalanchego/cache/cachetest" "github.com/ava-labs/avalanchego/ids" ) @@ -19,3 +21,30 @@ func TestCacheEviction(t *testing.T) { c := NewCache[ids.ID, int64](2) cachetest.Eviction(t, c) } + +func TestCacheFlushWithOnEvict(t *testing.T) { + c := NewCache[ids.ID, int64](2) + + // Track which elements were evicted + evicted := make(map[ids.ID]int64) + c.SetOnEvict(func(key ids.ID, value int64) { + evicted[key] = value + }) + + cachetest.Eviction(t, c) + require.Zero(t, c.Len()) + require.Len(t, evicted, 3) +} + +func TestCachePutWithOnEvict(t *testing.T) { + c := NewCache[ids.ID, int64](1) + + evicted := make(map[ids.ID]int64) + c.SetOnEvict(func(key ids.ID, value int64) { + evicted[key] = value + }) + + cachetest.Basic(t, c) + require.Len(t, evicted, 1) + require.Equal(t, evicted[ids.ID{1}], int64(1)) +} From 68527f1e5aaa46228c4325bf3f2ed2c3694ec89e Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 1 Jun 2025 19:53:09 -0400 Subject: [PATCH 02/27] blockdb setup & readme --- x/blockdb/README.md | 212 +++++++++++++++++++++++++++++++++++++++++++ x/blockdb/block.go | 58 ++++++++++++ x/blockdb/blockdb.go | 1 + x/blockdb/config.go | 38 ++++++++ x/blockdb/errors.go | 15 +++ x/blockdb/index.go | 90 ++++++++++++++++++ x/blockdb/store.go | 52 +++++++++++ 7 files changed, 466 insertions(+) create mode 100644 x/blockdb/README.md create mode 100644 x/blockdb/block.go create mode 100644 x/blockdb/blockdb.go create mode 100644 x/blockdb/config.go create mode 100644 x/blockdb/errors.go create mode 100644 x/blockdb/index.go create mode 100644 x/blockdb/store.go diff --git a/x/blockdb/README.md b/x/blockdb/README.md new file mode 100644 index 000000000000..80f129d4ea97 --- /dev/null +++ b/x/blockdb/README.md @@ -0,0 +1,212 @@ +# BlockDB + +BlockDB is a specialized storage system designed for blockchain blocks. It provides O(1) write performance with support for parallel operations. Unlike general-purpose key-value stores like LevelDB that require periodic compaction, BlockDB's append-only design ensures consistently fast writes without the overhead of background maintenance operations. + +## Key Functionalities (Needs Review) + +- **O(1) Performance**: Both reads and writes complete in constant time +- **Parallel Operations**: Multiple threads can read and write blocks concurrently without blocking +- **Flexible Write Ordering**: Supports out-of-order block writes for efficient synchronization +- **Configurable Durability**: Optional `syncToDisk` mode guarantees immediate recoverability at the cost of performance +- **Automatic Recovery**: Detects and recovers unindexed blocks after unclean shutdowns +- **Data Integrity**: Checksums verify block data on every read +- **No Maintenance Required**: Append-only design eliminates the need for compaction or reorganization +- **Progress Tracking**: Maintains maximum contiguous height for sync status + +## Architecture + +BlockDB uses two file types: index files and data files. The index file maps block heights to locations in data files, while data files store the actual block content. Data storage can be split across multiple files based on size limits. + +``` +┌─────────────────┐ ┌─────────────────┐ +│ Index File │ │ Data File 1 │ +│ (.idx) │ │ (.dat) │ +├─────────────────┤ ├─────────────────┤ +│ Header │ │ Block 1 │ +│ - Version │ ┌─────>│ - Header │ +│ - Min Height │ │ │ - Data │ +│ - MCH │ │ ├─────────────────┤ +│ - Data Size │ │ │ Block 2 │ +├─────────────────┤ │ ┌──>│ - Header │ +│ Entry[0] │ │ │ │ - Data │ +│ - Offset ───────┼──┘ │ ├─────────────────┤ +│ - Size │ │ │ ... │ +├─────────────────┤ │ └─────────────────┘ +│ Entry[1] │ │ +│ - Offset ───────┼─────┘ ┌─────────────────┐ +│ - Size │ │ Data File 2 │ +├─────────────────┤ │ (.dat) │ +│ ... │ ├─────────────────┤ +└─────────────────┘ │ Block N │ + │ - Header │ + │ - Data │ + ├─────────────────┤ + │ ... │ + └─────────────────┘ +``` + +## Implementation Details + +### File Formats + +#### Index File Structure + +The index file consists of a fixed-size header followed by fixed-size entries: + +``` +Index File Header (48 bytes): +┌────────────────────────────────┬─────────┐ +│ Field │ Size │ +├────────────────────────────────┼─────────┤ +│ Version │ 8 bytes │ +│ Max Data File Size │ 8 bytes │ +│ Max Block Height │ 8 bytes │ +│ Min Block Height │ 8 bytes │ +│ Max Contiguous Height │ 8 bytes │ +│ Data File Size │ 8 bytes │ +└────────────────────────────────┴─────────┘ + +Index Entry (16 bytes): +┌────────────────────────────────┬─────────┐ +│ Field │ Size │ +├────────────────────────────────┼─────────┤ +│ Data File Offset │ 8 bytes │ +│ Block Data Size │ 8 bytes │ +└────────────────────────────────┴─────────┘ +``` + +#### Data File Structure + +Each block in the data file is stored with a header followed by the raw block data: + +``` +Block Header (24 bytes): +┌────────────────────────────────┬─────────┐ +│ Field │ Size │ +├────────────────────────────────┼─────────┤ +│ Height │ 8 bytes │ +│ Size │ 8 bytes │ +│ Checksum │ 8 bytes │ +└────────────────────────────────┴─────────┘ +``` + +### Design Decisions + +#### Append-Only Architecture + +BlockDB is strictly append-only with no support for deletions. This aligns with blockchain's immutable nature and provides: + +- Simplified concurrency model +- Predictable write performance +- Straightforward recovery logic +- No compaction overhead + +**Trade-off**: Overwriting a block leaves the old data as unreferenced "dead" space. However, since blockchain blocks are immutable and rarely overwritten (only during reorgs), this trade-off has minimal impact in practice. + +#### Fixed-Size Index Entries + +Each index entry is exactly 16 bytes, containing the offset and size. This fixed size enables direct calculation of where each block's index entry is located, providing O(1) lookups. For blockchains with high block heights, the index remains efficient - even at height 1 billion, the index file would only be ~16GB. + +#### Two File Type Separation + +Separating index and data provides several benefits: + +- Index files remain relatively small and can benefit from SSD storage +- Data files can use cheaper storage and be backed up independently +- Sequential append-only writes to data files minimize fragmentation +- Index can be rebuilt by scanning data files if needed + +#### Out-of-Order Block Writing + +Blocks can be written at any height regardless of arrival order. This is essential for blockchain nodes that may receive blocks out of sequence during syncing operations. + +#### Durability and Fsync Behavior + +BlockDB provides configurable durability through the `syncToDisk` parameter: + +- When enabled, the data file is fsync'd after every block write, guaranteeing immediate durability +- The index file is fsync'd periodically (every `CheckpointInterval` blocks) to balance performance and recovery time +- When disabled, writes rely on OS buffering, trading durability for significantly better performance + +### Key Operations + +#### Write Performance + +- **Time Complexity**: O(1) to write a block +- **I/O Pattern**: Sequential append to data file + single index entry write +- **Block Size Impact**: While index operations are O(1), total write time depends on block size. With a maximum block size enforced, write time remains bounded, maintaining effectively O(1) performance. + +#### Read Performance + +- **Time Complexity**: O(1) to read a block +- **I/O Pattern**: One index read + one data read +- **Concurrency**: Multiple blocks can be read in parallel + +#### Recovery Mechanism + +On startup, BlockDB checks for signs of an unclean shutdown. If detected, it performs recovery: + +1. Compares the data file size with the indexed data size (stored in index header) +2. If data file is larger, starts scanning from where the index left off +3. For each unindexed block found: + - Validates block header and checksum + - Writes the corresponding index entry +4. Updates maximum contiguous height +5. Persists the updated index header + +### Concurrency Model + +BlockDB uses a reader-writer lock for overall thread safety, with atomic operations for write coordination: + +- Multiple threads can read different blocks simultaneously without blocking +- Multiple threads can write concurrently - they use atomic operations to allocate unique space in the data file +- The reader-writer lock ensures consistency between reads and writes + +## Usage + +### Creating a Store + +```go +import "github.com/ava-labs/avalanchego/x/blockdb" + +opts := blockdb.DefaultStoreOptions() +opts.MinimumHeight = 1 + +store, err := blockdb.NewStore( + "/path/to/index", // Index directory + "/path/to/data", // Data directory + true, // Sync to disk + false, // Don't truncate existing data + opts, + logger, +) +if err != nil { + return err +} +defer store.Close() +``` + +### Writing and Reading Blocks + +```go +// Write a block +height := uint64(100) +blockData := []byte("block data...") +err := store.WriteBlock(height, blockData) + +// Read a block +blockData, err := store.ReadBlock(height) +if err == blockdb.ErrBlockNotFound { + // Block doesn't exist at this height +} + +// Query store state +maxContiguous := store.MaxContiguousHeight() +minHeight := store.MinHeight() +``` + +## Future Improvements + +- **Multiple Data Files**: Split data across multiple files when MaxDataFileSize is reached +- **Block Cache**: Implement circular buffer cache for recently accessed blocks +- **Enforced In-Order Writes**: Optional mode to require blocks be written sequentially, preventing gaps diff --git a/x/blockdb/block.go b/x/blockdb/block.go new file mode 100644 index 000000000000..59bc792f0639 --- /dev/null +++ b/x/blockdb/block.go @@ -0,0 +1,58 @@ +package blockdb + +import ( + "encoding" + "encoding/binary" + "fmt" +) + +const MaxBlockDataSize = 1 << 30 // 1 GB + +var ( + _ encoding.BinaryMarshaler = blockHeader{} + _ encoding.BinaryUnmarshaler = &blockHeader{} + + sizeOfBlockHeader = uint64(binary.Size(blockHeader{})) +) + +// blockHeader is prepended to each block in the data file. +type blockHeader struct { + Height uint64 + // Size of the raw block data (excluding this blockHeader). + Size uint64 + Checksum uint64 +} + +// MarshalBinary implements the encoding.BinaryMarshaler interface. +func (bh blockHeader) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfBlockHeader) + binary.LittleEndian.PutUint64(buf[0:], bh.Height) + binary.LittleEndian.PutUint64(buf[8:], bh.Size) + binary.LittleEndian.PutUint64(buf[16:], bh.Checksum) + return buf, nil +} + +// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. +func (bh *blockHeader) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfBlockHeader) { + return fmt.Errorf("incorrect data length to unmarshal blockHeader: got %d bytes, need exactly %d", len(data), sizeOfBlockHeader) + } + bh.Height = binary.LittleEndian.Uint64(data[0:]) + bh.Size = binary.LittleEndian.Uint64(data[8:]) + bh.Checksum = binary.LittleEndian.Uint64(data[16:]) + return nil +} + +// WriteBlock inserts a block into the store at the given height. +// Returns an error if the store is closed, the block is empty, or the write fails. +func (s *Store) WriteBlock(height BlockHeight, block Block) error { + // TODO + return nil +} + +// ReadBlock retrieves a block by its height. +// Returns the block data or an error if not found or block data is corrupted. +func (s *Store) ReadBlock(height BlockHeight) (Block, error) { + // TODO + return nil, nil +} diff --git a/x/blockdb/blockdb.go b/x/blockdb/blockdb.go new file mode 100644 index 000000000000..37ea8191bd79 --- /dev/null +++ b/x/blockdb/blockdb.go @@ -0,0 +1 @@ +package blockdb diff --git a/x/blockdb/config.go b/x/blockdb/config.go new file mode 100644 index 000000000000..946e57ca176a --- /dev/null +++ b/x/blockdb/config.go @@ -0,0 +1,38 @@ +package blockdb + +import ( + "fmt" +) + +// StoreOptions contains optional configuration parameters for BlockDB. +type StoreOptions struct { + // MinimumHeight is the lowest block height the store will track (must be >= 1). + MinimumHeight uint64 + + // MaxDataFileSize sets the maximum size of the data block file in bytes. If 0, there is no limit. + MaxDataFileSize uint64 + + // CheckpointInterval defines how frequently (in blocks) the index file header is updated (default: 1024). + CheckpointInterval uint64 +} + +// DefaultStoreOptions returns the default options for BlockDB. +func DefaultStoreOptions() StoreOptions { + return StoreOptions{ + MinimumHeight: 1, + MaxDataFileSize: 1 << 31, // Default to 2GB + CheckpointInterval: 1024, + } +} + +// Validate checks if the store options are valid. +func (opts StoreOptions) Validate() error { + if opts.MinimumHeight == 0 { + return fmt.Errorf("%w: MinimumHeight cannot be 0, must be >= 1", ErrInvalidBlockHeight) + } + + if opts.CheckpointInterval == 0 { + return fmt.Errorf("%w: CheckpointInterval cannot be 0", ErrInvalidCheckpointInterval) + } + return nil +} diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go new file mode 100644 index 000000000000..405dcd6228b9 --- /dev/null +++ b/x/blockdb/errors.go @@ -0,0 +1,15 @@ +package blockdb + +import "fmt" + +var ( + ErrInvalidBlockHeight = fmt.Errorf("blockdb: invalid block height") + ErrBlockNotFound = fmt.Errorf("blockdb: block not found") + ErrBlockEmpty = fmt.Errorf("blockdb: block is empty") + ErrBlockSizeMismatch = fmt.Errorf("blockdb: block size in index file does not match data header") + ErrChecksumMismatch = fmt.Errorf("blockdb: checksum mismatch") + ErrStoreClosed = fmt.Errorf("blockdb: store is closed") + ErrInvalidCheckpointInterval = fmt.Errorf("blockdb: invalid checkpoint interval") + ErrCorrupted = fmt.Errorf("blockdb: unrecoverable corruption detected") + ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) +) diff --git a/x/blockdb/index.go b/x/blockdb/index.go new file mode 100644 index 000000000000..ecaf3b95237f --- /dev/null +++ b/x/blockdb/index.go @@ -0,0 +1,90 @@ +package blockdb + +import ( + "encoding" + "encoding/binary" + "fmt" +) + +const ( + IndexFileVersion uint64 = 1 +) + +var ( + _ encoding.BinaryMarshaler = IndexEntry{} + _ encoding.BinaryUnmarshaler = &IndexEntry{} + + sizeOfIndexEntry = uint64(binary.Size(IndexEntry{})) + sizeOfIndexFileHeader = uint64(binary.Size(IndexFileHeader{})) +) + +// IndexEntry locates a block within the data file. +type IndexEntry struct { + // Offset is the byte offset in the data file where the block's header starts. + Offset uint64 + // Size is the length in bytes of the block's data (not including the header). + Size uint64 +} + +// IsEmpty returns true if this entry is uninitialized. +// This indicates a slot where no block has been written. +func (e IndexEntry) IsEmpty() bool { + return e.Offset == 0 && e.Size == 0 +} + +// MarshalBinary implements encoding.BinaryMarshaler for IndexEntry. +func (e IndexEntry) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfIndexEntry) + binary.LittleEndian.PutUint64(buf[0:], e.Offset) + binary.LittleEndian.PutUint64(buf[8:], e.Size) + return buf, nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler for IndexEntry. +func (e *IndexEntry) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfIndexEntry) { + return fmt.Errorf("incorrect data length to unmarshal IndexEntry: got %d bytes, need exactly %d", len(data), sizeOfIndexEntry) + } + e.Offset = binary.LittleEndian.Uint64(data[0:]) + e.Size = binary.LittleEndian.Uint64(data[8:]) + return nil +} + +// IndexFileHeader is the header of the index file. +type IndexFileHeader struct { + Version uint64 + MaxDataFileSize uint64 + MaxBlockHeight uint64 + MinBlockHeight BlockHeight + MaxContiguousBlockHeight BlockHeight + DataFileSize uint64 +} + +// Add MarshalBinary for IndexFileHeader +func (h IndexFileHeader) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfIndexFileHeader) + binary.LittleEndian.PutUint64(buf[0:], h.Version) + binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) + binary.LittleEndian.PutUint64(buf[16:], h.MaxBlockHeight) + binary.LittleEndian.PutUint64(buf[24:], h.MinBlockHeight) + binary.LittleEndian.PutUint64(buf[32:], h.MaxContiguousBlockHeight) + binary.LittleEndian.PutUint64(buf[40:], h.DataFileSize) + return buf, nil +} + +// Add UnmarshalBinary for IndexFileHeader +func (h *IndexFileHeader) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfIndexFileHeader) { + return fmt.Errorf( + "incorrect data length to unmarshal IndexFileHeader: got %d bytes, need exactly %d", + len(data), sizeOfIndexFileHeader, + ) + } + h.Version = binary.LittleEndian.Uint64(data[0:]) + h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) + h.MaxBlockHeight = binary.LittleEndian.Uint64(data[16:]) + h.MinBlockHeight = binary.LittleEndian.Uint64(data[24:]) + h.MaxContiguousBlockHeight = binary.LittleEndian.Uint64(data[32:]) + h.DataFileSize = binary.LittleEndian.Uint64(data[40:]) + return nil +} diff --git a/x/blockdb/store.go b/x/blockdb/store.go new file mode 100644 index 000000000000..95a6bad4b04e --- /dev/null +++ b/x/blockdb/store.go @@ -0,0 +1,52 @@ +package blockdb + +import ( + "os" + "sync" + "sync/atomic" + + "github.com/ava-labs/avalanchego/utils/logging" +) + +const ( + indexFileName = "blockdb.idx" + dataFileName = "blockdb.dat" +) + +// BlockHeight defines the type for block heights. +type BlockHeight = uint64 + +// Block defines the type for block data. +type Block = []byte + +// Store is a collection of blockchain blocks. It provides methods to read, write, and manage blocks on disk. +type Store struct { + indexFile *os.File + dataFile *os.File + options StoreOptions + header IndexFileHeader + log logging.Logger + + // syncToDisk determines if fsync is called after each write for durability. + syncToDisk bool + // maxBlockHeight tracks the highest block height that has been written to the store, even if there are gaps in the sequence. + maxBlockHeight atomic.Uint64 + // closed indicates if the store has been closed. + closed bool + // mu synchronizes access to the store. + mu sync.RWMutex + // nextDataWriteOffset tracks the next position to write new data in the data file. + nextDataWriteOffset atomic.Uint64 + // maxContiguousHeight tracks the highest block height known to be contiguously stored. + maxContiguousHeight atomic.Uint64 +} + +// MaxContiguousHeight returns the highest block height known to be contiguously stored. +func (s *Store) MaxContiguousHeight() BlockHeight { + return s.maxContiguousHeight.Load() +} + +// MinHeight returns the minimum block height configured for this store. +func (s *Store) MinHeight() uint64 { + return s.header.MinBlockHeight +} From eb3f50c59c8401aae8f8087f166e6464a439a3e7 Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 9 Jun 2025 09:00:53 -0400 Subject: [PATCH 03/27] feat: block db implementation & readme --- x/blockdb/README.md | 13 ++- x/blockdb/block.go | 222 +++++++++++++++++++++++++++++++++++++++++- x/blockdb/index.go | 108 +++++++++++++++++++- x/blockdb/recovery.go | 158 ++++++++++++++++++++++++++++++ x/blockdb/store.go | 154 +++++++++++++++++++++++++++++ 5 files changed, 645 insertions(+), 10 deletions(-) create mode 100644 x/blockdb/recovery.go diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 80f129d4ea97..59c5a668c373 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -205,8 +205,11 @@ maxContiguous := store.MaxContiguousHeight() minHeight := store.MinHeight() ``` -## Future Improvements - -- **Multiple Data Files**: Split data across multiple files when MaxDataFileSize is reached -- **Block Cache**: Implement circular buffer cache for recently accessed blocks -- **Enforced In-Order Writes**: Optional mode to require blocks be written sequentially, preventing gaps +## TODO + +- [ ] **Multiple Data Files**: Split data across multiple files when MaxDataFileSize is reached +- [ ] **Block Cache**: Implement circular buffer cache for recently accessed blocks +- [ ] **Enforced In-Order Writes**: Optional mode to require blocks be written sequentially, preventing gaps +- [ ] **User buffered pool**: Use a buffered pool for fetch index entries and block headers to avoid allocations +- [ ] **Unit Tests**: Add comprehensive test coverage for all core functionality +- [ ] **Benchmarks**: Add performance benchmarks for all major operations diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 59bc792f0639..973307393eb4 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -3,7 +3,11 @@ package blockdb import ( "encoding" "encoding/binary" + "errors" "fmt" + "math" + + "github.com/cespare/xxhash/v2" ) const MaxBlockDataSize = 1 << 30 // 1 GB @@ -46,13 +50,223 @@ func (bh *blockHeader) UnmarshalBinary(data []byte) error { // WriteBlock inserts a block into the store at the given height. // Returns an error if the store is closed, the block is empty, or the write fails. func (s *Store) WriteBlock(height BlockHeight, block Block) error { - // TODO - return nil + s.mu.RLock() + defer s.mu.RUnlock() + + if s.closed { + return ErrStoreClosed + } + + if len(block) == 0 { + return ErrBlockEmpty + } + + if len(block) > MaxBlockDataSize { + return ErrBlockTooLarge + } + + indexFileOffset, err := s.indexEntryOffset(height) + if err != nil { + return err + } + + blockDataLen := uint64(len(block)) + sizeWithDataHeader := sizeOfBlockHeader + blockDataLen + writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) + if err != nil { + return err + } + + bh := blockHeader{ + Height: height, + Size: uint64(len(block)), + Checksum: calculateChecksum(block), + } + if err := s.writeBlockAtOffset(writeDataOffset, bh, block); err != nil { + return err + } + + if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen); err != nil { + return err + } + + return s.updateBlockHeights(height) } // ReadBlock retrieves a block by its height. // Returns the block data or an error if not found or block data is corrupted. func (s *Store) ReadBlock(height BlockHeight) (Block, error) { - // TODO - return nil, nil + s.mu.RLock() + defer s.mu.RUnlock() + + if s.closed { + return nil, ErrStoreClosed + } + + indexEntry, err := s.readIndexEntry(height) + if err != nil { + if errors.Is(err, ErrInvalidBlockHeight) { + return nil, ErrBlockNotFound + } + return nil, fmt.Errorf("failed to prepare for reading index entry for height %d: %w", height, err) + } + if indexEntry.IsEmpty() { + return nil, ErrBlockNotFound + } + + bh, err := s.readAndVerifyBlockHeader(indexEntry, height) + if err != nil { + return nil, err + } + + return s.readAndVerifyBlockData(indexEntry, bh) +} + +func (s *Store) readAndVerifyBlockHeader(indexEntry IndexEntry, expectedHeight BlockHeight) (blockHeader, error) { + var bh blockHeader + dataHeaderBuf := make([]byte, sizeOfBlockHeader) + _, err := s.dataFile.ReadAt(dataHeaderBuf, int64(indexEntry.Offset)) + if err != nil { + return bh, fmt.Errorf("failed to read block header from data file for height %d: %w", expectedHeight, err) + } + + if err := bh.UnmarshalBinary(dataHeaderBuf); err != nil { + return bh, fmt.Errorf("failed to deserialize block header for height %d: %w", expectedHeight, err) + } + + if bh.Size != indexEntry.Size { + return bh, fmt.Errorf("%w: for height %d, index size %d, data header size %d", ErrBlockSizeMismatch, expectedHeight, indexEntry.Size, bh.Size) + } + if bh.Height != expectedHeight { + return bh, fmt.Errorf("internal error: requested %d, data header contains %d", expectedHeight, bh.Height) + } + return bh, nil +} + +func (s *Store) readAndVerifyBlockData(indexEntry IndexEntry, bh blockHeader) (Block, error) { + blockData := make(Block, bh.Size) + actualDataOffset := indexEntry.Offset + sizeOfBlockHeader + if actualDataOffset < indexEntry.Offset { + return nil, fmt.Errorf("internal error: block data offset calculation overflowed for height %d", bh.Height) + } + + _, err := s.dataFile.ReadAt(blockData, int64(actualDataOffset)) + if err != nil { + return nil, fmt.Errorf("failed to read block data from data file for height %d: %w", bh.Height, err) + } + + calculatedChecksum := calculateChecksum(blockData) + if calculatedChecksum != bh.Checksum { + return nil, fmt.Errorf("%w: for block height %d", ErrChecksumMismatch, bh.Height) + } + + return blockData, nil +} + +func calculateChecksum(data []byte) uint64 { + return xxhash.Sum64(data) +} + +func (s *Store) writeBlockAtOffset(offset uint64, bh blockHeader, block Block) error { + headerBytes, err := bh.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize block header: %w", err) + } + + // Allocate combined buffer for header and block data + // Using a single WriteAt instead of two separate calls for header and block + // data reduces syscall overhead in high-concurrency environments. + // The memory copy cost is lower than the syscall cost for typical block sizes. + combinedBuf := make([]byte, sizeOfBlockHeader+uint64(len(block))) + copy(combinedBuf, headerBytes) + copy(combinedBuf[sizeOfBlockHeader:], block) + if _, err := s.dataFile.WriteAt(combinedBuf, int64(offset)); err != nil { + return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) + } + + if s.syncToDisk { + if err := s.dataFile.Sync(); err != nil { + return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) + } + } + return nil +} + +func (s *Store) updateBlockHeights(writtenBlockHeight uint64) error { + // update max contiguous height + var prevContiguousCandidate uint64 + if writtenBlockHeight == s.header.MinBlockHeight { + if s.header.MinBlockHeight > 0 { + prevContiguousCandidate = s.header.MinBlockHeight - 1 + } else { + prevContiguousCandidate = 0 + } + } else if writtenBlockHeight > s.header.MinBlockHeight { + prevContiguousCandidate = writtenBlockHeight - 1 + } else { + return fmt.Errorf("internal error in MCH update: height %d < minimum %d", writtenBlockHeight, s.header.MinBlockHeight) + } + if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { + currentMax := writtenBlockHeight + for { + nextHeightToVerify := currentMax + 1 + idxEntry, readErr := s.readIndexEntry(nextHeightToVerify) + if readErr != nil || idxEntry.IsEmpty() { + break + } + if !s.maxContiguousHeight.CompareAndSwap(currentMax, nextHeightToVerify) { + break // Someone else updated + } + currentMax = nextHeightToVerify + } + } + + // update max block height and persist header on checkpoint interval + var oldMaxHeight uint64 + for { + oldMaxHeight = s.maxBlockHeight.Load() + if writtenBlockHeight <= oldMaxHeight { + break + } + if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { + // todo: consider separating checkpoint logic out of this function + // a situation may arise where multiple blocks are written that trigger a checkpoint + // in this case, we are persisting the header multiple times. But this can only happen during bootstrapping. + // One solution is only checkpoint after x blocks are written, instead of at specific heights. + if writtenBlockHeight%s.options.CheckpointInterval == 0 { + if err := s.persistIndexHeader(s.syncToDisk); err != nil { + return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) + } + } + break + } + } + return nil +} + +func (s *Store) allocateBlockSpace(sizeWithDataHeader uint64) (writeDataOffset uint64, err error) { + maxDataFileSize := s.header.MaxDataFileSize + + for { + // Check if the new offset would overflow uint64. + currentOffset := s.nextDataWriteOffset.Load() + if currentOffset > math.MaxUint64-sizeWithDataHeader { + return 0, fmt.Errorf( + "adding block of size %d to offset %d would overflow uint64 data file pointer", + sizeWithDataHeader, currentOffset, + ) + } + + newOffset := currentOffset + sizeWithDataHeader + if maxDataFileSize > 0 && newOffset > maxDataFileSize { + return 0, fmt.Errorf( + "adding block of size %d to offset %d (new offset %d) would exceed configured max data file size of %d bytes", + sizeWithDataHeader, currentOffset, newOffset, maxDataFileSize, + ) + } + + if s.nextDataWriteOffset.CompareAndSwap(currentOffset, newOffset) { + return currentOffset, nil + } + } } diff --git a/x/blockdb/index.go b/x/blockdb/index.go index ecaf3b95237f..5c52f7d29ab6 100644 --- a/x/blockdb/index.go +++ b/x/blockdb/index.go @@ -4,6 +4,8 @@ import ( "encoding" "encoding/binary" "fmt" + "io" + "math" ) const ( @@ -23,7 +25,7 @@ type IndexEntry struct { // Offset is the byte offset in the data file where the block's header starts. Offset uint64 // Size is the length in bytes of the block's data (not including the header). - Size uint64 + Size uint64 // todo: can this be omitted? currently is this only used to verify the block size, but we are already doing checksum verification. Removing this can double the amount of data in the index file. } // IsEmpty returns true if this entry is uninitialized. @@ -88,3 +90,107 @@ func (h *IndexFileHeader) UnmarshalBinary(data []byte) error { h.DataFileSize = binary.LittleEndian.Uint64(data[40:]) return nil } + +func (s *Store) indexEntryOffset(height BlockHeight) (uint64, error) { + if height < s.header.MinBlockHeight { + return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinBlockHeight) + } + relativeHeight := height - s.header.MinBlockHeight + if relativeHeight > (math.MaxUint64-sizeOfIndexFileHeader)/sizeOfIndexEntry { + return 0, fmt.Errorf("%w: index entry offset multiplication overflow for height %d", ErrInvalidBlockHeight, height) + } + offsetFromHeaderStart := relativeHeight * sizeOfIndexEntry + finalOffset := sizeOfIndexFileHeader + offsetFromHeaderStart + if finalOffset < sizeOfIndexFileHeader { + return 0, fmt.Errorf("%w: index entry offset addition overflow for height %d", ErrInvalidBlockHeight, height) + } + return finalOffset, nil +} + +func (s *Store) readIndexEntry(height BlockHeight) (IndexEntry, error) { + offset, err := s.indexEntryOffset(height) + if err != nil { + return IndexEntry{}, err + } + + var entry IndexEntry + buf := make([]byte, sizeOfIndexEntry) + _, err = s.indexFile.ReadAt(buf, int64(offset)) + if err != nil { + if err == io.EOF { + return entry, nil + } + return IndexEntry{}, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) + } + if err := entry.UnmarshalBinary(buf); err != nil { + return IndexEntry{}, fmt.Errorf("failed to deserialize index entry for height %d: %w", height, err) + } + return entry, nil +} + +func (s *Store) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64) error { + indexEntry := IndexEntry{ + Offset: dataFileBlockOffset, + Size: blockDataLen, + } + + entryBytes, err := indexEntry.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize index entry: %w", err) + } + + if _, err := s.indexFile.WriteAt(entryBytes, int64(indexFileOffset)); err != nil { + return fmt.Errorf("failed to write index entry: %w", err) + } + return nil +} + +func (s *Store) persistIndexHeader(syncToDisk bool) error { + // Why fsync indexFile before writing its header? + // To prevent a critical inconsistency: the header must not describe a state + // more advanced than what's durably stored in the index entries. + // + // 1. Writes Are Buffered: OS buffers index entry writes; they aren't immediately on disk. + // 2. Header Reflects New State: The header is updated with new DataFileSize (for data file) + // and MaxContiguousBlockHeight (based on index entries). + // 3. THE RISK IF HEADER IS WRITTEN/FLUSHED FIRST (before fsyncing entries): + // If the OS flushes the updated header to disk *before* it flushes the buffered + // index entries that justify the header's new state, then a crash would mean: + // - The on-disk header claims certain blocks/entries exist (up to new DataFileSize/MCH). + // - But the corresponding index entries themselves were lost (still in buffer at crash). + // This results in the header pointing to the updated DataFileSize in the data file + // but the index entries are not yet on disk, leading to missing blocks in the index file. + // + // By fsyncing indexFile *first*, we ensure all index entries are durably on disk. + // Only then is the header written, guaranteeing it reflects a truly persisted state. + if syncToDisk { + if s.indexFile != nil { + if err := s.indexFile.Sync(); err != nil { + return fmt.Errorf("failed to sync index file before writing header state: %w", err) + } + } else { + return fmt.Errorf("index file is nil, cannot sync or write header state") + } + } + + header := s.header + header.DataFileSize = s.nextDataWriteOffset.Load() + header.MaxContiguousBlockHeight = s.maxContiguousHeight.Load() + header.MaxBlockHeight = s.maxBlockHeight.Load() + headerBytes, err := header.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize header for writing state: %w", err) + } + if uint64(len(headerBytes)) != sizeOfIndexFileHeader { + return fmt.Errorf("internal error: serialized header state size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) + } + + if s.indexFile == nil { + return fmt.Errorf("index file is nil, cannot write header state") + } + _, err = s.indexFile.WriteAt(headerBytes, 0) + if err != nil { + return fmt.Errorf("failed to write header state to index file: %w", err) + } + return nil +} diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go new file mode 100644 index 000000000000..ed2b67513996 --- /dev/null +++ b/x/blockdb/recovery.go @@ -0,0 +1,158 @@ +package blockdb + +import ( + "fmt" + + "go.uber.org/zap" +) + +const ( + // maxRecoverBlockSize is a sanity limit for block sizes encountered during the recovery scan. + // It prevents attempts to read/allocate excessively large blocks due to data corruption in a block header. + maxRecoverBlockSize uint64 = 50 * 1024 * 1024 // 50MB +) + +// recover attempts to restore the store to a consistent state by scanning the data file +// for blocks that may not be correctly indexed, usually after an unclean shutdown. +// It reconciles the data file with the index file header and entries. +func (s *Store) recover() error { + dataFileInfo, err := s.dataFile.Stat() + if err != nil { + return fmt.Errorf("failed to get data file stats for recovery: %w", err) + } + dataFileActualSize := uint64(dataFileInfo.Size()) + nextDataWriteOffset := s.nextDataWriteOffset.Load() + + // If the data file size matches the size recorded in the index header, then no recovery is needed. + if dataFileActualSize == nextDataWriteOffset { + // TODO: Do we need to validate that the max contiguous height is correct? + // it might not be correct if the previous shutdown was not clean and + // only the new datafile size was persisted somehow. In this case, we need + // to fix the max contiguous height otherwise it will never be updated. + return nil + } + + // If the data file is smaller than the index header indicates, this is a fatal inconsistency. + // The index file claims more data than actually exists, which cannot be recovered automatically. + if dataFileActualSize < nextDataWriteOffset { + return fmt.Errorf("%w: data file is smaller than index header claims (data file: %d bytes, index header: %d bytes) -- possible corruption or incomplete flush", + ErrCorrupted, dataFileActualSize, nextDataWriteOffset) + } + + // Data file is larger than the index header indicates. + s.log.Info("Data file larger than indexed size; recovering blocks", + zap.Uint64("dataFileSize", dataFileActualSize), + zap.Uint64("indexedSize", nextDataWriteOffset), + zap.Uint64("scanStartOffset", nextDataWriteOffset), + ) + + // Start scan from where the index left off. + currentScanOffset := nextDataWriteOffset + var recoveredBlocksCount int = 0 + var maxRecoveredHeightSeen uint64 = s.maxBlockHeight.Load() + for currentScanOffset < dataFileActualSize { + bh, err := s.recoverBlockAtOffset(currentScanOffset, dataFileActualSize) + if err != nil { + s.log.Error("Recovery: scan stopped due to invalid block data", + zap.Uint64("offset", currentScanOffset), + zap.Error(err), + ) + break + } + s.log.Debug("Recovery: Successfully validated and indexed block", + zap.Uint64("height", bh.Height), + zap.Uint64("size", bh.Size), + zap.Uint64("offset", currentScanOffset), + ) + recoveredBlocksCount++ + if bh.Height > maxRecoveredHeightSeen { + maxRecoveredHeightSeen = bh.Height + } + currentScanOffset += sizeOfBlockHeader + bh.Size + } + s.nextDataWriteOffset.Store(currentScanOffset) + s.maxBlockHeight.Store(maxRecoveredHeightSeen) + + // Recalculate MCH if we recovered any blocks + if recoveredBlocksCount > 0 { + s.updateMaxContiguousHeightOnRecovery() + } + + if err := s.persistIndexHeader(true); err != nil { + return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) + } + + s.log.Info("Recovery: Scan finished", + zap.Int("recoveredBlocks", recoveredBlocksCount), + zap.Uint64("dataFileSize", nextDataWriteOffset), + zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), + zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), + ) + + return nil +} + +// recoverBlockAtOffset attempts to read, validate, and index a block at the given offset. +// Returns the blockHeader and an error if the block is invalid or incomplete. +func (s *Store) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHeader, error) { + var bh blockHeader + if dataFileActualSize-offset < sizeOfBlockHeader { + return bh, fmt.Errorf("not enough data for block header at offset %d", offset) + } + bhBuf := make([]byte, sizeOfBlockHeader) + _, readErr := s.dataFile.ReadAt(bhBuf, int64(offset)) + if readErr != nil { + return bh, fmt.Errorf("error reading block header at offset %d: %w", offset, readErr) + } + if err := bh.UnmarshalBinary(bhBuf); err != nil { + return bh, fmt.Errorf("error deserializing block header at offset %d: %w", offset, err) + } + if bh.Size == 0 || bh.Size > maxRecoverBlockSize { + return bh, fmt.Errorf("invalid block size in header at offset %d: %d", offset, bh.Size) + } + if bh.Height < s.header.MinBlockHeight { + return bh, fmt.Errorf( + "invalid block height in header at offset %d: found %d, expected >= %d", + offset, bh.Height, s.header.MinBlockHeight, + ) + } + expectedBlockEndOffset := offset + sizeOfBlockHeader + bh.Size + if expectedBlockEndOffset < offset || expectedBlockEndOffset > dataFileActualSize { + return bh, fmt.Errorf("block data out of bounds at offset %d", offset) + } + blockData := make([]byte, bh.Size) + _, readErr = s.dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)) + if readErr != nil { + return bh, fmt.Errorf("failed to read block data at offset %d: %w", offset, readErr) + } + calculatedChecksum := calculateChecksum(blockData) + if calculatedChecksum != bh.Checksum { + return bh, fmt.Errorf("checksum mismatch for block at offset %d", offset) + } + + // Write index entry for this block + indexFileOffset, idxErr := s.indexEntryOffset(bh.Height) + if idxErr != nil { + return bh, fmt.Errorf("cannot get index offset for recovered block %d: %w", bh.Height, idxErr) + } + if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size); err != nil { + return bh, fmt.Errorf("failed to update index for recovered block %d: %w", bh.Height, err) + } + return bh, nil +} + +// updateMaxContiguousHeightOnRecovery extends the max contiguous height from the value in the header, +// incrementing as long as contiguous blocks exist. +func (s *Store) updateMaxContiguousHeightOnRecovery() { + currentMCH := s.header.MaxContiguousBlockHeight + highestKnown := s.maxBlockHeight.Load() + + for nextHeight := currentMCH + 1; nextHeight <= highestKnown; nextHeight++ { + entry, err := s.readIndexEntry(nextHeight) + if err != nil || entry.IsEmpty() { + break + } + currentMCH = nextHeight + } + s.maxContiguousHeight.Store(currentMCH) +} diff --git a/x/blockdb/store.go b/x/blockdb/store.go index 95a6bad4b04e..08683796914b 100644 --- a/x/blockdb/store.go +++ b/x/blockdb/store.go @@ -1,7 +1,11 @@ package blockdb import ( + "bytes" + "encoding/binary" + "fmt" "os" + "path/filepath" "sync" "sync/atomic" @@ -41,6 +45,136 @@ type Store struct { maxContiguousHeight atomic.Uint64 } +func (s *Store) openOrCreateFiles(indexDir, dataDir string, truncate bool) error { + indexPath := filepath.Join(indexDir, indexFileName) + dataPath := filepath.Join(dataDir, dataFileName) + + if err := os.MkdirAll(indexDir, 0755); err != nil { + return fmt.Errorf("failed to create index directory %s: %w", indexDir, err) + } + if err := os.MkdirAll(dataDir, 0755); err != nil { + return fmt.Errorf("failed to create data directory %s: %w", dataDir, err) + } + + openFlags := os.O_RDWR | os.O_CREATE + if truncate { + openFlags |= os.O_TRUNC + } + + var err error + s.indexFile, err = os.OpenFile(indexPath, openFlags, 0666) + if err != nil { + return fmt.Errorf("failed to open index file %s: %w", indexPath, err) + } + s.dataFile, err = os.OpenFile(dataPath, openFlags, 0666) + if err != nil { + // Clean up partially opened resources + s.indexFile.Close() + return fmt.Errorf("failed to open data file %s: %w", dataPath, err) + } + return nil +} + +func (s *Store) loadOrInitializeHeader(truncate bool) error { + if truncate { + initialMCH := uint64(0) + if s.options.MinimumHeight > 1 { + initialMCH = s.options.MinimumHeight - 1 + s.maxContiguousHeight.Store(initialMCH) + } + + s.header = IndexFileHeader{ + Version: IndexFileVersion, + MinBlockHeight: s.options.MinimumHeight, + MaxDataFileSize: s.options.MaxDataFileSize, + MaxBlockHeight: 0, + MaxContiguousBlockHeight: initialMCH, + DataFileSize: 0, + } + + buf := new(bytes.Buffer) + if err := binary.Write(buf, binary.LittleEndian, &s.header); err != nil { + return fmt.Errorf("failed to serialize new header: %w", err) + } + if uint64(buf.Len()) != sizeOfIndexFileHeader { + return fmt.Errorf("internal error: serialized new header size %d, expected %d", buf.Len(), sizeOfIndexFileHeader) + } + if _, err := s.indexFile.WriteAt(buf.Bytes(), 0); err != nil { + return fmt.Errorf("failed to write initial index header: %w", err) + } + + return nil + } + + // Not truncating, load existing header + headerBuf := make([]byte, sizeOfIndexFileHeader) + _, readErr := s.indexFile.ReadAt(headerBuf, 0) + if readErr != nil { + return fmt.Errorf("failed to read index header: %w", readErr) + } + if err := s.header.UnmarshalBinary(headerBuf); err != nil { + return fmt.Errorf("failed to deserialize index header: %w", err) + } + if s.header.Version != IndexFileVersion { + return fmt.Errorf("mismatched index file version: found %d, expected %d", s.header.Version, IndexFileVersion) + } + s.nextDataWriteOffset.Store(s.header.DataFileSize) + s.maxContiguousHeight.Store(s.header.MaxContiguousBlockHeight) + s.maxBlockHeight.Store(s.header.MaxBlockHeight) + + return nil +} + +// NewStore creates or opens a block store. +// Parameters: +// - indexDir: Directory for the index file +// - dataDir: Directory for the data file(s) +// - syncToDisk: If true, forces fsync after writes for guaranteed recoverability +// - truncate: If true, truncates existing store files +// - opts: Optional configuration parameters +// - log: Logger instance for structured logging +func NewStore(indexDir, dataDir string, syncToDisk bool, truncate bool, opts StoreOptions, log logging.Logger) (*Store, error) { + if indexDir == "" || dataDir == "" { + return nil, fmt.Errorf("both indexDir and dataDir must be provided") + } + + if err := opts.Validate(); err != nil { + return nil, err + } + + s := &Store{ + options: opts, + syncToDisk: syncToDisk, + log: log, + } + + if err := s.openOrCreateFiles(indexDir, dataDir, truncate); err != nil { + return nil, err + } + + if err := s.loadOrInitializeHeader(truncate); err != nil { + s.closeFiles() + return nil, err + } + + if !truncate { + if err := s.recover(); err != nil { + s.closeFiles() + return nil, fmt.Errorf("recovery failed: %w", err) + } + } + return s, nil +} + +func (s *Store) closeFiles() { + if s.indexFile != nil { + s.indexFile.Close() + } + if s.dataFile != nil { + s.dataFile.Close() + } +} + // MaxContiguousHeight returns the highest block height known to be contiguously stored. func (s *Store) MaxContiguousHeight() BlockHeight { return s.maxContiguousHeight.Load() @@ -50,3 +184,23 @@ func (s *Store) MaxContiguousHeight() BlockHeight { func (s *Store) MinHeight() uint64 { return s.header.MinBlockHeight } + +func (s *Store) MaxBlockHeight() BlockHeight { + return s.maxBlockHeight.Load() +} + +// Close flushes pending writes and closes the store files. +// It is safe to call Close multiple times. +func (s *Store) Close() error { + s.mu.Lock() + defer s.mu.Unlock() + + if s.closed { + return nil + } + s.closed = true + + err := s.persistIndexHeader(false) + s.closeFiles() + return err +} From 07c1e5cb1cfe79bb1763154974a7265e7b8041cb Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 9 Jun 2025 14:41:19 -0400 Subject: [PATCH 04/27] refactor: rename store to database --- x/blockdb/README.md | 35 ++++++++++++++--------------- x/blockdb/block.go | 20 ++++++++--------- x/blockdb/blockdb.go | 1 - x/blockdb/config.go | 12 +++++----- x/blockdb/{store.go => database.go} | 32 +++++++++++++------------- x/blockdb/errors.go | 2 +- x/blockdb/index.go | 8 +++---- x/blockdb/recovery.go | 6 ++--- 8 files changed, 57 insertions(+), 59 deletions(-) delete mode 100644 x/blockdb/blockdb.go rename x/blockdb/{store.go => database.go} (86%) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 59c5a668c373..5b96c0afeb41 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -2,16 +2,14 @@ BlockDB is a specialized storage system designed for blockchain blocks. It provides O(1) write performance with support for parallel operations. Unlike general-purpose key-value stores like LevelDB that require periodic compaction, BlockDB's append-only design ensures consistently fast writes without the overhead of background maintenance operations. -## Key Functionalities (Needs Review) +## Key Functionalities - **O(1) Performance**: Both reads and writes complete in constant time - **Parallel Operations**: Multiple threads can read and write blocks concurrently without blocking -- **Flexible Write Ordering**: Supports out-of-order block writes for efficient synchronization -- **Configurable Durability**: Optional `syncToDisk` mode guarantees immediate recoverability at the cost of performance +- **Flexible Write Ordering**: Supports out-of-order block writes for bootstrapping +- **Configurable Durability**: Optional `syncToDisk` mode guarantees immediate recoverability - **Automatic Recovery**: Detects and recovers unindexed blocks after unclean shutdowns -- **Data Integrity**: Checksums verify block data on every read -- **No Maintenance Required**: Append-only design eliminates the need for compaction or reorganization -- **Progress Tracking**: Maintains maximum contiguous height for sync status +- **Data Integrity**: Checksums verify block data on reads ## Architecture @@ -27,6 +25,7 @@ BlockDB uses two file types: index files and data files. The index file maps blo │ - Min Height │ │ │ - Data │ │ - MCH │ │ ├─────────────────┤ │ - Data Size │ │ │ Block 2 │ +│ - ... │ │ │ │ ├─────────────────┤ │ ┌──>│ - Header │ │ Entry[0] │ │ │ │ - Data │ │ - Offset ───────┼──┘ │ ├─────────────────┤ @@ -101,7 +100,7 @@ BlockDB is strictly append-only with no support for deletions. This aligns with - Straightforward recovery logic - No compaction overhead -**Trade-off**: Overwriting a block leaves the old data as unreferenced "dead" space. However, since blockchain blocks are immutable and rarely overwritten (only during reorgs), this trade-off has minimal impact in practice. +**Trade-off**: Overwriting a block leaves the old data as unreferenced "dead" space. However, since blocks are immutable and rarely overwritten (only during reorgs), this trade-off has minimal impact in practice. #### Fixed-Size Index Entries @@ -164,26 +163,26 @@ BlockDB uses a reader-writer lock for overall thread safety, with atomic operati ## Usage -### Creating a Store +### Creating a Database ```go import "github.com/ava-labs/avalanchego/x/blockdb" -opts := blockdb.DefaultStoreOptions() -opts.MinimumHeight = 1 +config := blockdb.DefaultDatabaseOptions() +config.MinimumHeight = 1 -store, err := blockdb.NewStore( +db, err := blockdb.New( "/path/to/index", // Index directory "/path/to/data", // Data directory true, // Sync to disk false, // Don't truncate existing data - opts, + config, logger, ) if err != nil { return err } -defer store.Close() +defer db.Close() ``` ### Writing and Reading Blocks @@ -192,17 +191,17 @@ defer store.Close() // Write a block height := uint64(100) blockData := []byte("block data...") -err := store.WriteBlock(height, blockData) +err := db.WriteBlock(height, blockData) // Read a block -blockData, err := store.ReadBlock(height) +blockData, err := db.ReadBlock(height) if err == blockdb.ErrBlockNotFound { // Block doesn't exist at this height } -// Query store state -maxContiguous := store.MaxContiguousHeight() -minHeight := store.MinHeight() +// Query database state +maxContiguous := db.MaxContiguousHeight() +minHeight := db.MinHeight() ``` ## TODO diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 973307393eb4..8c2d319af4c0 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -21,7 +21,7 @@ var ( // blockHeader is prepended to each block in the data file. type blockHeader struct { - Height uint64 + Height uint64 // todo: can this be omitted? currently only used for verification // Size of the raw block data (excluding this blockHeader). Size uint64 Checksum uint64 @@ -49,12 +49,12 @@ func (bh *blockHeader) UnmarshalBinary(data []byte) error { // WriteBlock inserts a block into the store at the given height. // Returns an error if the store is closed, the block is empty, or the write fails. -func (s *Store) WriteBlock(height BlockHeight, block Block) error { +func (s *Database) WriteBlock(height BlockHeight, block Block) error { s.mu.RLock() defer s.mu.RUnlock() if s.closed { - return ErrStoreClosed + return ErrDatabaseClosed } if len(block) == 0 { @@ -95,12 +95,12 @@ func (s *Store) WriteBlock(height BlockHeight, block Block) error { // ReadBlock retrieves a block by its height. // Returns the block data or an error if not found or block data is corrupted. -func (s *Store) ReadBlock(height BlockHeight) (Block, error) { +func (s *Database) ReadBlock(height BlockHeight) (Block, error) { s.mu.RLock() defer s.mu.RUnlock() if s.closed { - return nil, ErrStoreClosed + return nil, ErrDatabaseClosed } indexEntry, err := s.readIndexEntry(height) @@ -122,7 +122,7 @@ func (s *Store) ReadBlock(height BlockHeight) (Block, error) { return s.readAndVerifyBlockData(indexEntry, bh) } -func (s *Store) readAndVerifyBlockHeader(indexEntry IndexEntry, expectedHeight BlockHeight) (blockHeader, error) { +func (s *Database) readAndVerifyBlockHeader(indexEntry IndexEntry, expectedHeight BlockHeight) (blockHeader, error) { var bh blockHeader dataHeaderBuf := make([]byte, sizeOfBlockHeader) _, err := s.dataFile.ReadAt(dataHeaderBuf, int64(indexEntry.Offset)) @@ -143,7 +143,7 @@ func (s *Store) readAndVerifyBlockHeader(indexEntry IndexEntry, expectedHeight B return bh, nil } -func (s *Store) readAndVerifyBlockData(indexEntry IndexEntry, bh blockHeader) (Block, error) { +func (s *Database) readAndVerifyBlockData(indexEntry IndexEntry, bh blockHeader) (Block, error) { blockData := make(Block, bh.Size) actualDataOffset := indexEntry.Offset + sizeOfBlockHeader if actualDataOffset < indexEntry.Offset { @@ -167,7 +167,7 @@ func calculateChecksum(data []byte) uint64 { return xxhash.Sum64(data) } -func (s *Store) writeBlockAtOffset(offset uint64, bh blockHeader, block Block) error { +func (s *Database) writeBlockAtOffset(offset uint64, bh blockHeader, block Block) error { headerBytes, err := bh.MarshalBinary() if err != nil { return fmt.Errorf("failed to serialize block header: %w", err) @@ -192,7 +192,7 @@ func (s *Store) writeBlockAtOffset(offset uint64, bh blockHeader, block Block) e return nil } -func (s *Store) updateBlockHeights(writtenBlockHeight uint64) error { +func (s *Database) updateBlockHeights(writtenBlockHeight uint64) error { // update max contiguous height var prevContiguousCandidate uint64 if writtenBlockHeight == s.header.MinBlockHeight { @@ -244,7 +244,7 @@ func (s *Store) updateBlockHeights(writtenBlockHeight uint64) error { return nil } -func (s *Store) allocateBlockSpace(sizeWithDataHeader uint64) (writeDataOffset uint64, err error) { +func (s *Database) allocateBlockSpace(sizeWithDataHeader uint64) (writeDataOffset uint64, err error) { maxDataFileSize := s.header.MaxDataFileSize for { diff --git a/x/blockdb/blockdb.go b/x/blockdb/blockdb.go deleted file mode 100644 index 37ea8191bd79..000000000000 --- a/x/blockdb/blockdb.go +++ /dev/null @@ -1 +0,0 @@ -package blockdb diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 946e57ca176a..a88f45770dd5 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -4,8 +4,8 @@ import ( "fmt" ) -// StoreOptions contains optional configuration parameters for BlockDB. -type StoreOptions struct { +// DatabaseConfig contains configuration parameters for BlockDB. +type DatabaseConfig struct { // MinimumHeight is the lowest block height the store will track (must be >= 1). MinimumHeight uint64 @@ -16,9 +16,9 @@ type StoreOptions struct { CheckpointInterval uint64 } -// DefaultStoreOptions returns the default options for BlockDB. -func DefaultStoreOptions() StoreOptions { - return StoreOptions{ +// DefaultDatabaseConfig returns the default options for BlockDB. +func DefaultDatabaseConfig() DatabaseConfig { + return DatabaseConfig{ MinimumHeight: 1, MaxDataFileSize: 1 << 31, // Default to 2GB CheckpointInterval: 1024, @@ -26,7 +26,7 @@ func DefaultStoreOptions() StoreOptions { } // Validate checks if the store options are valid. -func (opts StoreOptions) Validate() error { +func (opts DatabaseConfig) Validate() error { if opts.MinimumHeight == 0 { return fmt.Errorf("%w: MinimumHeight cannot be 0, must be >= 1", ErrInvalidBlockHeight) } diff --git a/x/blockdb/store.go b/x/blockdb/database.go similarity index 86% rename from x/blockdb/store.go rename to x/blockdb/database.go index 08683796914b..4489376247d8 100644 --- a/x/blockdb/store.go +++ b/x/blockdb/database.go @@ -23,11 +23,11 @@ type BlockHeight = uint64 // Block defines the type for block data. type Block = []byte -// Store is a collection of blockchain blocks. It provides methods to read, write, and manage blocks on disk. -type Store struct { +// Database is a collection of blockchain blocks. It provides methods to read, write, and manage blocks on disk. +type Database struct { indexFile *os.File dataFile *os.File - options StoreOptions + options DatabaseConfig header IndexFileHeader log logging.Logger @@ -45,7 +45,7 @@ type Store struct { maxContiguousHeight atomic.Uint64 } -func (s *Store) openOrCreateFiles(indexDir, dataDir string, truncate bool) error { +func (s *Database) openOrCreateFiles(indexDir, dataDir string, truncate bool) error { indexPath := filepath.Join(indexDir, indexFileName) dataPath := filepath.Join(dataDir, dataFileName) @@ -75,7 +75,7 @@ func (s *Store) openOrCreateFiles(indexDir, dataDir string, truncate bool) error return nil } -func (s *Store) loadOrInitializeHeader(truncate bool) error { +func (s *Database) loadOrInitializeHeader(truncate bool) error { if truncate { initialMCH := uint64(0) if s.options.MinimumHeight > 1 { @@ -125,25 +125,25 @@ func (s *Store) loadOrInitializeHeader(truncate bool) error { return nil } -// NewStore creates or opens a block store. +// New creates a block database. // Parameters: // - indexDir: Directory for the index file // - dataDir: Directory for the data file(s) // - syncToDisk: If true, forces fsync after writes for guaranteed recoverability // - truncate: If true, truncates existing store files -// - opts: Optional configuration parameters +// - config: Optional configuration parameters // - log: Logger instance for structured logging -func NewStore(indexDir, dataDir string, syncToDisk bool, truncate bool, opts StoreOptions, log logging.Logger) (*Store, error) { +func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config DatabaseConfig, log logging.Logger) (*Database, error) { if indexDir == "" || dataDir == "" { return nil, fmt.Errorf("both indexDir and dataDir must be provided") } - if err := opts.Validate(); err != nil { + if err := config.Validate(); err != nil { return nil, err } - s := &Store{ - options: opts, + s := &Database{ + options: config, syncToDisk: syncToDisk, log: log, } @@ -166,7 +166,7 @@ func NewStore(indexDir, dataDir string, syncToDisk bool, truncate bool, opts Sto return s, nil } -func (s *Store) closeFiles() { +func (s *Database) closeFiles() { if s.indexFile != nil { s.indexFile.Close() } @@ -176,22 +176,22 @@ func (s *Store) closeFiles() { } // MaxContiguousHeight returns the highest block height known to be contiguously stored. -func (s *Store) MaxContiguousHeight() BlockHeight { +func (s *Database) MaxContiguousHeight() BlockHeight { return s.maxContiguousHeight.Load() } // MinHeight returns the minimum block height configured for this store. -func (s *Store) MinHeight() uint64 { +func (s *Database) MinHeight() uint64 { return s.header.MinBlockHeight } -func (s *Store) MaxBlockHeight() BlockHeight { +func (s *Database) MaxBlockHeight() BlockHeight { return s.maxBlockHeight.Load() } // Close flushes pending writes and closes the store files. // It is safe to call Close multiple times. -func (s *Store) Close() error { +func (s *Database) Close() error { s.mu.Lock() defer s.mu.Unlock() diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 405dcd6228b9..919310d718dd 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -8,7 +8,7 @@ var ( ErrBlockEmpty = fmt.Errorf("blockdb: block is empty") ErrBlockSizeMismatch = fmt.Errorf("blockdb: block size in index file does not match data header") ErrChecksumMismatch = fmt.Errorf("blockdb: checksum mismatch") - ErrStoreClosed = fmt.Errorf("blockdb: store is closed") + ErrDatabaseClosed = fmt.Errorf("blockdb: database is closed") ErrInvalidCheckpointInterval = fmt.Errorf("blockdb: invalid checkpoint interval") ErrCorrupted = fmt.Errorf("blockdb: unrecoverable corruption detected") ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) diff --git a/x/blockdb/index.go b/x/blockdb/index.go index 5c52f7d29ab6..661d748cf278 100644 --- a/x/blockdb/index.go +++ b/x/blockdb/index.go @@ -91,7 +91,7 @@ func (h *IndexFileHeader) UnmarshalBinary(data []byte) error { return nil } -func (s *Store) indexEntryOffset(height BlockHeight) (uint64, error) { +func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { if height < s.header.MinBlockHeight { return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinBlockHeight) } @@ -107,7 +107,7 @@ func (s *Store) indexEntryOffset(height BlockHeight) (uint64, error) { return finalOffset, nil } -func (s *Store) readIndexEntry(height BlockHeight) (IndexEntry, error) { +func (s *Database) readIndexEntry(height BlockHeight) (IndexEntry, error) { offset, err := s.indexEntryOffset(height) if err != nil { return IndexEntry{}, err @@ -128,7 +128,7 @@ func (s *Store) readIndexEntry(height BlockHeight) (IndexEntry, error) { return entry, nil } -func (s *Store) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64) error { +func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64) error { indexEntry := IndexEntry{ Offset: dataFileBlockOffset, Size: blockDataLen, @@ -145,7 +145,7 @@ func (s *Store) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDat return nil } -func (s *Store) persistIndexHeader(syncToDisk bool) error { +func (s *Database) persistIndexHeader(syncToDisk bool) error { // Why fsync indexFile before writing its header? // To prevent a critical inconsistency: the header must not describe a state // more advanced than what's durably stored in the index entries. diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go index ed2b67513996..1e8eb492b204 100644 --- a/x/blockdb/recovery.go +++ b/x/blockdb/recovery.go @@ -15,7 +15,7 @@ const ( // recover attempts to restore the store to a consistent state by scanning the data file // for blocks that may not be correctly indexed, usually after an unclean shutdown. // It reconciles the data file with the index file header and entries. -func (s *Store) recover() error { +func (s *Database) recover() error { dataFileInfo, err := s.dataFile.Stat() if err != nil { return fmt.Errorf("failed to get data file stats for recovery: %w", err) @@ -94,7 +94,7 @@ func (s *Store) recover() error { // recoverBlockAtOffset attempts to read, validate, and index a block at the given offset. // Returns the blockHeader and an error if the block is invalid or incomplete. -func (s *Store) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHeader, error) { +func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHeader, error) { var bh blockHeader if dataFileActualSize-offset < sizeOfBlockHeader { return bh, fmt.Errorf("not enough data for block header at offset %d", offset) @@ -143,7 +143,7 @@ func (s *Store) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHe // updateMaxContiguousHeightOnRecovery extends the max contiguous height from the value in the header, // incrementing as long as contiguous blocks exist. -func (s *Store) updateMaxContiguousHeightOnRecovery() { +func (s *Database) updateMaxContiguousHeightOnRecovery() { currentMCH := s.header.MaxContiguousBlockHeight highestKnown := s.maxBlockHeight.Load() From 5fd75ea4472e2c074bdd34b0b1a916002ef74d63 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 22 Jun 2025 16:51:51 -0400 Subject: [PATCH 05/27] feat: add tests and update blockdb to have separate methods to read header and body --- x/blockdb/README.md | 149 +++++--------- x/blockdb/block.go | 195 ++++++++++-------- x/blockdb/config.go | 15 +- x/blockdb/database.go | 84 ++++---- x/blockdb/database_test.go | 322 ++++++++++++++++++++++++++++++ x/blockdb/errors.go | 15 +- x/blockdb/helpers_test.go | 64 ++++++ x/blockdb/index.go | 150 +++++++------- x/blockdb/readblock_test.go | 243 +++++++++++++++++++++++ x/blockdb/recovery.go | 52 +++-- x/blockdb/writeblock_test.go | 371 +++++++++++++++++++++++++++++++++++ 11 files changed, 1308 insertions(+), 352 deletions(-) create mode 100644 x/blockdb/database_test.go create mode 100644 x/blockdb/helpers_test.go create mode 100644 x/blockdb/readblock_test.go create mode 100644 x/blockdb/writeblock_test.go diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 5b96c0afeb41..1fc70c9e0059 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -1,6 +1,6 @@ # BlockDB -BlockDB is a specialized storage system designed for blockchain blocks. It provides O(1) write performance with support for parallel operations. Unlike general-purpose key-value stores like LevelDB that require periodic compaction, BlockDB's append-only design ensures consistently fast writes without the overhead of background maintenance operations. +BlockDB is a specialized database optimized for blockchain blocks. ## Key Functionalities @@ -9,43 +9,39 @@ BlockDB is a specialized storage system designed for blockchain blocks. It provi - **Flexible Write Ordering**: Supports out-of-order block writes for bootstrapping - **Configurable Durability**: Optional `syncToDisk` mode guarantees immediate recoverability - **Automatic Recovery**: Detects and recovers unindexed blocks after unclean shutdowns -- **Data Integrity**: Checksums verify block data on reads -## Architecture +## Design -BlockDB uses two file types: index files and data files. The index file maps block heights to locations in data files, while data files store the actual block content. Data storage can be split across multiple files based on size limits. +BlockDB uses two file types: index files and data files. The index file maps block heights to locations in data files, while data files store the actual block content. Data storage can be split across multiple files based on the maximum data file size. ``` ┌─────────────────┐ ┌─────────────────┐ │ Index File │ │ Data File 1 │ │ (.idx) │ │ (.dat) │ ├─────────────────┤ ├─────────────────┤ -│ Header │ │ Block 1 │ +│ Header │ │ Block 0 │ │ - Version │ ┌─────>│ - Header │ │ - Min Height │ │ │ - Data │ -│ - MCH │ │ ├─────────────────┤ -│ - Data Size │ │ │ Block 2 │ -│ - ... │ │ │ │ -├─────────────────┤ │ ┌──>│ - Header │ -│ Entry[0] │ │ │ │ - Data │ -│ - Offset ───────┼──┘ │ ├─────────────────┤ -│ - Size │ │ │ ... │ -├─────────────────┤ │ └─────────────────┘ -│ Entry[1] │ │ -│ - Offset ───────┼─────┘ ┌─────────────────┐ -│ - Size │ │ Data File 2 │ -├─────────────────┤ │ (.dat) │ -│ ... │ ├─────────────────┤ -└─────────────────┘ │ Block N │ - │ - Header │ - │ - Data │ - ├─────────────────┤ +│ - Max Height │ │ ├─────────────────┤ +│ - Data Size │ │ │ Block 1 │ +│ - ... │ │ │ - Header │ +├─────────────────┤ │ ┌──>│ - Data │ +│ Entry[0] │ │ │ ├─────────────────┤ +│ - Offset ───────┼──┘ │ │ ... │ +│ - Size │ │ └─────────────────┘ +│ - Header Size │ │ +├─────────────────┤ │ ┌─────────────────┐ +│ Entry[1] │ │ │ Data File 2 │ +│ - Offset ───────┼─────┘ │ (.dat) │ +│ - Size │ ├─────────────────┤ +│ - Header Size │ │ Block N │ +├─────────────────┤ │ - Header │ +│ ... │ │ - Data │ +└─────────────────┘ ├─────────────────┤ │ ... │ └─────────────────┘ ``` -## Implementation Details - ### File Formats #### Index File Structure @@ -53,7 +49,7 @@ BlockDB uses two file types: index files and data files. The index file maps blo The index file consists of a fixed-size header followed by fixed-size entries: ``` -Index File Header (48 bytes): +Index File Header (72 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ @@ -63,14 +59,16 @@ Index File Header (48 bytes): │ Min Block Height │ 8 bytes │ │ Max Contiguous Height │ 8 bytes │ │ Data File Size │ 8 bytes │ +│ Reserved │ 24 bytes│ └────────────────────────────────┴─────────┘ -Index Entry (16 bytes): +Index Entry (18 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Data File Offset │ 8 bytes │ │ Block Data Size │ 8 bytes │ +│ Header Size │ 2 bytes │ └────────────────────────────────┴─────────┘ ``` @@ -79,47 +77,26 @@ Index Entry (16 bytes): Each block in the data file is stored with a header followed by the raw block data: ``` -Block Header (24 bytes): +Block Header (26 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Height │ 8 bytes │ │ Size │ 8 bytes │ +│ Header Size │ 2 bytes │ │ Checksum │ 8 bytes │ └────────────────────────────────┴─────────┘ ``` -### Design Decisions - -#### Append-Only Architecture - -BlockDB is strictly append-only with no support for deletions. This aligns with blockchain's immutable nature and provides: - -- Simplified concurrency model -- Predictable write performance -- Straightforward recovery logic -- No compaction overhead - -**Trade-off**: Overwriting a block leaves the old data as unreferenced "dead" space. However, since blocks are immutable and rarely overwritten (only during reorgs), this trade-off has minimal impact in practice. - -#### Fixed-Size Index Entries - -Each index entry is exactly 16 bytes, containing the offset and size. This fixed size enables direct calculation of where each block's index entry is located, providing O(1) lookups. For blockchains with high block heights, the index remains efficient - even at height 1 billion, the index file would only be ~16GB. - -#### Two File Type Separation - -Separating index and data provides several benefits: +### Block Overwrites -- Index files remain relatively small and can benefit from SSD storage -- Data files can use cheaper storage and be backed up independently -- Sequential append-only writes to data files minimize fragmentation -- Index can be rebuilt by scanning data files if needed +BlockDB allows overwriting blocks at existing heights. When a block is overwritten, the new block is appended to the data file and the index entry is updated to point to the new location, leaving the old block data as unreferenced "dead" space. However, since blocks are immutable and rarely overwritten (e.g., during reorgs), this trade-off should have minimal impact in practice. -#### Out-of-Order Block Writing +### Fixed-Size Index Entries -Blocks can be written at any height regardless of arrival order. This is essential for blockchain nodes that may receive blocks out of sequence during syncing operations. +Each index entry is exactly 18 bytes on disk, containing the offset, size, and header size. This fixed size enables direct calculation of where each block's index entry is located, providing O(1) lookups. For blockchains with high block heights, the index remains efficient, even at height 1 billion, the index file would only be ~18GB. -#### Durability and Fsync Behavior +### Durability and Fsync Behavior BlockDB provides configurable durability through the `syncToDisk` parameter: @@ -127,40 +104,18 @@ BlockDB provides configurable durability through the `syncToDisk` parameter: - The index file is fsync'd periodically (every `CheckpointInterval` blocks) to balance performance and recovery time - When disabled, writes rely on OS buffering, trading durability for significantly better performance -### Key Operations - -#### Write Performance - -- **Time Complexity**: O(1) to write a block -- **I/O Pattern**: Sequential append to data file + single index entry write -- **Block Size Impact**: While index operations are O(1), total write time depends on block size. With a maximum block size enforced, write time remains bounded, maintaining effectively O(1) performance. - -#### Read Performance - -- **Time Complexity**: O(1) to read a block -- **I/O Pattern**: One index read + one data read -- **Concurrency**: Multiple blocks can be read in parallel - -#### Recovery Mechanism +### Recovery Mechanism On startup, BlockDB checks for signs of an unclean shutdown. If detected, it performs recovery: -1. Compares the data file size with the indexed data size (stored in index header) -2. If data file is larger, starts scanning from where the index left off +1. Compares the data file size with the indexed data size (stored in the index header) +2. If the data file is larger, it starts scanning from where the index left off 3. For each unindexed block found: - - Validates block header and checksum + - Validates the block header and checksum - Writes the corresponding index entry -4. Updates maximum contiguous height +4. Updates the max contiguous height and max block height 5. Persists the updated index header -### Concurrency Model - -BlockDB uses a reader-writer lock for overall thread safety, with atomic operations for write coordination: - -- Multiple threads can read different blocks simultaneously without blocking -- Multiple threads can write concurrently - they use atomic operations to allocate unique space in the data file -- The reader-writer lock ensures consistency between reads and writes - ## Usage ### Creating a Database @@ -169,8 +124,6 @@ BlockDB uses a reader-writer lock for overall thread safety, with atomic operati import "github.com/ava-labs/avalanchego/x/blockdb" config := blockdb.DefaultDatabaseOptions() -config.MinimumHeight = 1 - db, err := blockdb.New( "/path/to/index", // Index directory "/path/to/data", // Data directory @@ -180,7 +133,8 @@ db, err := blockdb.New( logger, ) if err != nil { - return err + fmt.Println("Error creating database:", err) + return } defer db.Close() ``` @@ -188,27 +142,28 @@ defer db.Close() ### Writing and Reading Blocks ```go -// Write a block +// Write a block with header size height := uint64(100) blockData := []byte("block data...") -err := db.WriteBlock(height, blockData) +headerSize := uint16(500) // First 500 bytes are the header +err := db.WriteBlock(height, blockData, headerSize) -// Read a block +// Read a complete block blockData, err := db.ReadBlock(height) -if err == blockdb.ErrBlockNotFound { +if blockData == nil { // Block doesn't exist at this height } -// Query database state -maxContiguous := db.MaxContiguousHeight() -minHeight := db.MinHeight() +// Read block components separately +headerData, err := db.ReadHeader(height) +bodyData, err := db.ReadBody(height) ``` ## TODO -- [ ] **Multiple Data Files**: Split data across multiple files when MaxDataFileSize is reached -- [ ] **Block Cache**: Implement circular buffer cache for recently accessed blocks -- [ ] **Enforced In-Order Writes**: Optional mode to require blocks be written sequentially, preventing gaps -- [ ] **User buffered pool**: Use a buffered pool for fetch index entries and block headers to avoid allocations -- [ ] **Unit Tests**: Add comprehensive test coverage for all core functionality -- [ ] **Benchmarks**: Add performance benchmarks for all major operations +- [ ] Compress data files to reduce storage size +- [ ] Split data across multiple files when `MaxDataFileSize` is reached +- [ ] Implement a block cache for recently accessed blocks +- [ ] Use a buffered pool to avoid allocations on reads and writes +- [ ] Add tests for core functionality +- [ ] Add performance benchmarks diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 8c2d319af4c0..6f25dfe662fd 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -3,15 +3,13 @@ package blockdb import ( "encoding" "encoding/binary" - "errors" "fmt" "math" "github.com/cespare/xxhash/v2" + "go.uber.org/zap" ) -const MaxBlockDataSize = 1 << 30 // 1 GB - var ( _ encoding.BinaryMarshaler = blockHeader{} _ encoding.BinaryUnmarshaler = &blockHeader{} @@ -19,12 +17,24 @@ var ( sizeOfBlockHeader = uint64(binary.Size(blockHeader{})) ) +// BlockHeight defines the type for block heights. +type BlockHeight = uint64 + +// BlockData defines the type for block data. +type BlockData = []byte + +// BlockHeaderSize is the size of the header in the block data. +type BlockHeaderSize = uint16 + +// MaxBlockDataSize is the maximum size of a block in bytes (16 MB). +const MaxBlockDataSize = 1 << 24 + // blockHeader is prepended to each block in the data file. type blockHeader struct { - Height uint64 // todo: can this be omitted? currently only used for verification - // Size of the raw block data (excluding this blockHeader). - Size uint64 - Checksum uint64 + Height BlockHeight + Size uint64 + HeaderSize BlockHeaderSize + Checksum uint64 } // MarshalBinary implements the encoding.BinaryMarshaler interface. @@ -32,7 +42,8 @@ func (bh blockHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfBlockHeader) binary.LittleEndian.PutUint64(buf[0:], bh.Height) binary.LittleEndian.PutUint64(buf[8:], bh.Size) - binary.LittleEndian.PutUint64(buf[16:], bh.Checksum) + binary.LittleEndian.PutUint16(buf[16:], bh.HeaderSize) + binary.LittleEndian.PutUint64(buf[18:], bh.Checksum) return buf, nil } @@ -43,13 +54,13 @@ func (bh *blockHeader) UnmarshalBinary(data []byte) error { } bh.Height = binary.LittleEndian.Uint64(data[0:]) bh.Size = binary.LittleEndian.Uint64(data[8:]) - bh.Checksum = binary.LittleEndian.Uint64(data[16:]) + bh.HeaderSize = binary.LittleEndian.Uint16(data[16:]) + bh.Checksum = binary.LittleEndian.Uint64(data[18:]) return nil } -// WriteBlock inserts a block into the store at the given height. -// Returns an error if the store is closed, the block is empty, or the write fails. -func (s *Database) WriteBlock(height BlockHeight, block Block) error { +// WriteBlock inserts a block into the store at the given height with the specified header size. +func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize BlockHeaderSize) error { s.mu.RLock() defer s.mu.RUnlock() @@ -65,6 +76,10 @@ func (s *Database) WriteBlock(height BlockHeight, block Block) error { return ErrBlockTooLarge } + if uint64(headerSize) >= uint64(len(block)) { + return ErrHeaderSizeTooLarge + } + indexFileOffset, err := s.indexEntryOffset(height) if err != nil { return err @@ -78,15 +93,16 @@ func (s *Database) WriteBlock(height BlockHeight, block Block) error { } bh := blockHeader{ - Height: height, - Size: uint64(len(block)), - Checksum: calculateChecksum(block), + Height: height, + Size: blockDataLen, + HeaderSize: headerSize, + Checksum: calculateChecksum(block), } - if err := s.writeBlockAtOffset(writeDataOffset, bh, block); err != nil { + if err := s.writeBlockAt(writeDataOffset, bh, block); err != nil { return err } - if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen); err != nil { + if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen, headerSize); err != nil { return err } @@ -94,8 +110,8 @@ func (s *Database) WriteBlock(height BlockHeight, block Block) error { } // ReadBlock retrieves a block by its height. -// Returns the block data or an error if not found or block data is corrupted. -func (s *Database) ReadBlock(height BlockHeight) (Block, error) { +// Returns nil if the block is not found. +func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { s.mu.RLock() defer s.mu.RUnlock() @@ -105,78 +121,107 @@ func (s *Database) ReadBlock(height BlockHeight) (Block, error) { indexEntry, err := s.readIndexEntry(height) if err != nil { - if errors.Is(err, ErrInvalidBlockHeight) { - return nil, ErrBlockNotFound - } - return nil, fmt.Errorf("failed to prepare for reading index entry for height %d: %w", height, err) + return nil, err } if indexEntry.IsEmpty() { - return nil, ErrBlockNotFound + return nil, nil } - bh, err := s.readAndVerifyBlockHeader(indexEntry, height) + // Read the complete block data + blockData := make(BlockData, indexEntry.Size) + actualDataOffset := indexEntry.Offset + sizeOfBlockHeader + if actualDataOffset < indexEntry.Offset { + return nil, fmt.Errorf("internal error: block data offset calculation overflowed") + } + _, err = s.dataFile.ReadAt(blockData, int64(actualDataOffset)) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to read block data from data file: %w", err) } - return s.readAndVerifyBlockData(indexEntry, bh) + return blockData, nil } -func (s *Database) readAndVerifyBlockHeader(indexEntry IndexEntry, expectedHeight BlockHeight) (blockHeader, error) { - var bh blockHeader - dataHeaderBuf := make([]byte, sizeOfBlockHeader) - _, err := s.dataFile.ReadAt(dataHeaderBuf, int64(indexEntry.Offset)) - if err != nil { - return bh, fmt.Errorf("failed to read block header from data file for height %d: %w", expectedHeight, err) +// ReadHeader retrieves only the header portion of a block by its height. +// Returns nil if the block is not found or no header. +func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.closed { + return nil, ErrDatabaseClosed } - if err := bh.UnmarshalBinary(dataHeaderBuf); err != nil { - return bh, fmt.Errorf("failed to deserialize block header for height %d: %w", expectedHeight, err) + indexEntry, err := s.readIndexEntry(height) + if err != nil { + return nil, err + } + if indexEntry.IsEmpty() { + return nil, nil } - if bh.Size != indexEntry.Size { - return bh, fmt.Errorf("%w: for height %d, index size %d, data header size %d", ErrBlockSizeMismatch, expectedHeight, indexEntry.Size, bh.Size) + // Return nil if there's no header data + if indexEntry.HeaderSize == 0 { + return nil, nil } - if bh.Height != expectedHeight { - return bh, fmt.Errorf("internal error: requested %d, data header contains %d", expectedHeight, bh.Height) + + // Validate header size doesn't exceed total block size + if uint64(indexEntry.HeaderSize) > indexEntry.Size { + return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) } - return bh, nil -} -func (s *Database) readAndVerifyBlockData(indexEntry IndexEntry, bh blockHeader) (Block, error) { - blockData := make(Block, bh.Size) + // Read only the header portion + headerData := make([]byte, indexEntry.HeaderSize) actualDataOffset := indexEntry.Offset + sizeOfBlockHeader if actualDataOffset < indexEntry.Offset { - return nil, fmt.Errorf("internal error: block data offset calculation overflowed for height %d", bh.Height) + return nil, fmt.Errorf("internal error: block data offset calculation overflowed") } - - _, err := s.dataFile.ReadAt(blockData, int64(actualDataOffset)) + _, err = s.dataFile.ReadAt(headerData, int64(actualDataOffset)) if err != nil { - return nil, fmt.Errorf("failed to read block data from data file for height %d: %w", bh.Height, err) + return nil, fmt.Errorf("failed to read block header data from data file: %w", err) } - calculatedChecksum := calculateChecksum(blockData) - if calculatedChecksum != bh.Checksum { - return nil, fmt.Errorf("%w: for block height %d", ErrChecksumMismatch, bh.Height) + return headerData, nil +} + +// ReadBody retrieves only the body portion (excluding header) of a block by its height. +// Returns nil if the block is not found. +func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.closed { + return nil, ErrDatabaseClosed } - return blockData, nil + indexEntry, err := s.readIndexEntry(height) + if err != nil { + return nil, err + } + if indexEntry.IsEmpty() { + return nil, nil + } + + bodySize := indexEntry.Size - uint64(indexEntry.HeaderSize) + bodyData := make([]byte, bodySize) + bodyOffset := indexEntry.Offset + sizeOfBlockHeader + uint64(indexEntry.HeaderSize) + _, err = s.dataFile.ReadAt(bodyData, int64(bodyOffset)) + if err != nil { + return nil, fmt.Errorf("failed to read block body data from data file: %w", err) + } + return bodyData, nil } func calculateChecksum(data []byte) uint64 { return xxhash.Sum64(data) } -func (s *Database) writeBlockAtOffset(offset uint64, bh blockHeader, block Block) error { +func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) error { headerBytes, err := bh.MarshalBinary() if err != nil { return fmt.Errorf("failed to serialize block header: %w", err) } - // Allocate combined buffer for header and block data - // Using a single WriteAt instead of two separate calls for header and block - // data reduces syscall overhead in high-concurrency environments. - // The memory copy cost is lower than the syscall cost for typical block sizes. + // Allocate combined buffer for header and block data and write it to the data file combinedBuf := make([]byte, sizeOfBlockHeader+uint64(len(block))) copy(combinedBuf, headerBytes) copy(combinedBuf[sizeOfBlockHeader:], block) @@ -192,26 +237,26 @@ func (s *Database) writeBlockAtOffset(offset uint64, bh blockHeader, block Block return nil } -func (s *Database) updateBlockHeights(writtenBlockHeight uint64) error { - // update max contiguous height - var prevContiguousCandidate uint64 - if writtenBlockHeight == s.header.MinBlockHeight { - if s.header.MinBlockHeight > 0 { - prevContiguousCandidate = s.header.MinBlockHeight - 1 - } else { - prevContiguousCandidate = 0 - } - } else if writtenBlockHeight > s.header.MinBlockHeight { +func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { + prevContiguousCandidate := uint64(unsetHeight) + if writtenBlockHeight > s.header.MinHeight { prevContiguousCandidate = writtenBlockHeight - 1 - } else { - return fmt.Errorf("internal error in MCH update: height %d < minimum %d", writtenBlockHeight, s.header.MinBlockHeight) } + if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { currentMax := writtenBlockHeight for { nextHeightToVerify := currentMax + 1 - idxEntry, readErr := s.readIndexEntry(nextHeightToVerify) - if readErr != nil || idxEntry.IsEmpty() { + entry, err := s.readIndexEntry(nextHeightToVerify) + if err != nil { + s.log.Error( + "error reading index entry when updating max contiguous height", + zap.Uint64("height", nextHeightToVerify), + zap.Error(err), + ) + break + } + if entry.IsEmpty() { break } if !s.maxContiguousHeight.CompareAndSwap(currentMax, nextHeightToVerify) { @@ -222,19 +267,15 @@ func (s *Database) updateBlockHeights(writtenBlockHeight uint64) error { } // update max block height and persist header on checkpoint interval - var oldMaxHeight uint64 + var oldMaxHeight BlockHeight for { oldMaxHeight = s.maxBlockHeight.Load() - if writtenBlockHeight <= oldMaxHeight { + if writtenBlockHeight <= oldMaxHeight && oldMaxHeight != unsetHeight { break } if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { - // todo: consider separating checkpoint logic out of this function - // a situation may arise where multiple blocks are written that trigger a checkpoint - // in this case, we are persisting the header multiple times. But this can only happen during bootstrapping. - // One solution is only checkpoint after x blocks are written, instead of at specific heights. if writtenBlockHeight%s.options.CheckpointInterval == 0 { - if err := s.persistIndexHeader(s.syncToDisk); err != nil { + if err := s.persistIndexHeader(); err != nil { return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) } } diff --git a/x/blockdb/config.go b/x/blockdb/config.go index a88f45770dd5..9d888eceb1d3 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -4,9 +4,12 @@ import ( "fmt" ) +// DefaultMaxDataFileSize is the default maximum size of the data block file in bytes (500GB). +const DefaultMaxDataFileSize = 500 * 1024 * 1024 * 1024 + // DatabaseConfig contains configuration parameters for BlockDB. type DatabaseConfig struct { - // MinimumHeight is the lowest block height the store will track (must be >= 1). + // MinimumHeight is the lowest block height tracked by the database. MinimumHeight uint64 // MaxDataFileSize sets the maximum size of the data block file in bytes. If 0, there is no limit. @@ -19,20 +22,16 @@ type DatabaseConfig struct { // DefaultDatabaseConfig returns the default options for BlockDB. func DefaultDatabaseConfig() DatabaseConfig { return DatabaseConfig{ - MinimumHeight: 1, - MaxDataFileSize: 1 << 31, // Default to 2GB + MinimumHeight: 0, + MaxDataFileSize: DefaultMaxDataFileSize, CheckpointInterval: 1024, } } // Validate checks if the store options are valid. func (opts DatabaseConfig) Validate() error { - if opts.MinimumHeight == 0 { - return fmt.Errorf("%w: MinimumHeight cannot be 0, must be >= 1", ErrInvalidBlockHeight) - } - if opts.CheckpointInterval == 0 { - return fmt.Errorf("%w: CheckpointInterval cannot be 0", ErrInvalidCheckpointInterval) + return fmt.Errorf("CheckpointInterval cannot be 0") } return nil } diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 4489376247d8..b74a9c02ea8c 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -1,9 +1,8 @@ package blockdb import ( - "bytes" - "encoding/binary" "fmt" + "math" "os" "path/filepath" "sync" @@ -15,30 +14,26 @@ import ( const ( indexFileName = "blockdb.idx" dataFileName = "blockdb.dat" -) - -// BlockHeight defines the type for block heights. -type BlockHeight = uint64 -// Block defines the type for block data. -type Block = []byte + // Since 0 is a valid height, math.MaxUint64 is used to indicate unset height. + // It is not be possible for block height to be max uint64 as it would overflow the index entry offset + unsetHeight = math.MaxUint64 +) -// Database is a collection of blockchain blocks. It provides methods to read, write, and manage blocks on disk. +// Database stores blockchain blocks on disk and provides methods to read, and write blocks. type Database struct { indexFile *os.File dataFile *os.File options DatabaseConfig - header IndexFileHeader + header indexFileHeader log logging.Logger + mu sync.RWMutex + closed bool // syncToDisk determines if fsync is called after each write for durability. syncToDisk bool - // maxBlockHeight tracks the highest block height that has been written to the store, even if there are gaps in the sequence. + // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. maxBlockHeight atomic.Uint64 - // closed indicates if the store has been closed. - closed bool - // mu synchronizes access to the store. - mu sync.RWMutex // nextDataWriteOffset tracks the next position to write new data in the data file. nextDataWriteOffset atomic.Uint64 // maxContiguousHeight tracks the highest block height known to be contiguously stored. @@ -77,29 +72,25 @@ func (s *Database) openOrCreateFiles(indexDir, dataDir string, truncate bool) er func (s *Database) loadOrInitializeHeader(truncate bool) error { if truncate { - initialMCH := uint64(0) - if s.options.MinimumHeight > 1 { - initialMCH = s.options.MinimumHeight - 1 - s.maxContiguousHeight.Store(initialMCH) - } - - s.header = IndexFileHeader{ - Version: IndexFileVersion, - MinBlockHeight: s.options.MinimumHeight, - MaxDataFileSize: s.options.MaxDataFileSize, - MaxBlockHeight: 0, - MaxContiguousBlockHeight: initialMCH, - DataFileSize: 0, + s.header = indexFileHeader{ + Version: IndexFileVersion, + MinHeight: s.options.MinimumHeight, + MaxDataFileSize: s.options.MaxDataFileSize, + MaxHeight: unsetHeight, + MaxContiguousHeight: unsetHeight, + DataFileSize: 0, } + s.maxContiguousHeight.Store(unsetHeight) + s.maxBlockHeight.Store(unsetHeight) - buf := new(bytes.Buffer) - if err := binary.Write(buf, binary.LittleEndian, &s.header); err != nil { + headerBytes, err := s.header.MarshalBinary() + if err != nil { return fmt.Errorf("failed to serialize new header: %w", err) } - if uint64(buf.Len()) != sizeOfIndexFileHeader { - return fmt.Errorf("internal error: serialized new header size %d, expected %d", buf.Len(), sizeOfIndexFileHeader) + if uint64(len(headerBytes)) != sizeOfIndexFileHeader { + return fmt.Errorf("internal error: serialized new header size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) } - if _, err := s.indexFile.WriteAt(buf.Bytes(), 0); err != nil { + if _, err := s.indexFile.WriteAt(headerBytes, 0); err != nil { return fmt.Errorf("failed to write initial index header: %w", err) } @@ -119,8 +110,8 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { return fmt.Errorf("mismatched index file version: found %d, expected %d", s.header.Version, IndexFileVersion) } s.nextDataWriteOffset.Store(s.header.DataFileSize) - s.maxContiguousHeight.Store(s.header.MaxContiguousBlockHeight) - s.maxBlockHeight.Store(s.header.MaxBlockHeight) + s.maxContiguousHeight.Store(s.header.MaxContiguousHeight) + s.maxBlockHeight.Store(s.header.MaxHeight) return nil } @@ -129,8 +120,8 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { // Parameters: // - indexDir: Directory for the index file // - dataDir: Directory for the data file(s) -// - syncToDisk: If true, forces fsync after writes for guaranteed recoverability -// - truncate: If true, truncates existing store files +// - syncToDisk: If true, forces fsync after writes +// - truncate: If true, truncates the index file // - config: Optional configuration parameters // - log: Logger instance for structured logging func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config DatabaseConfig, log logging.Logger) (*Database, error) { @@ -176,21 +167,14 @@ func (s *Database) closeFiles() { } // MaxContiguousHeight returns the highest block height known to be contiguously stored. -func (s *Database) MaxContiguousHeight() BlockHeight { - return s.maxContiguousHeight.Load() -} - -// MinHeight returns the minimum block height configured for this store. -func (s *Database) MinHeight() uint64 { - return s.header.MinBlockHeight -} - -func (s *Database) MaxBlockHeight() BlockHeight { - return s.maxBlockHeight.Load() +func (s *Database) MaxContiguousHeight() (height BlockHeight, found bool) { + if s.maxContiguousHeight.Load() == unsetHeight { + return 0, false + } + return s.maxContiguousHeight.Load(), true } // Close flushes pending writes and closes the store files. -// It is safe to call Close multiple times. func (s *Database) Close() error { s.mu.Lock() defer s.mu.Unlock() @@ -200,7 +184,7 @@ func (s *Database) Close() error { } s.closed = true - err := s.persistIndexHeader(false) + err := s.persistIndexHeader() s.closeFiles() return err } diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go new file mode 100644 index 000000000000..1b0a4c00b4ad --- /dev/null +++ b/x/blockdb/database_test.go @@ -0,0 +1,322 @@ +package blockdb + +import ( + "errors" + "os" + "path/filepath" + "testing" + + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNew_Truncate(t *testing.T) { + // Create initial database + tempDir, err := os.MkdirTemp("", "blockdb_truncate_test_*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + indexDir := filepath.Join(tempDir, "index") + dataDir := filepath.Join(tempDir, "data") + db, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db) + + // Write some test data and close the database + testBlock := []byte("test block data") + err = db.WriteBlock(0, testBlock, 0) + require.NoError(t, err) + err = db.Close() + require.NoError(t, err) + + // Reopen with truncate=true and verify data is gone + db2, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db2) + defer db2.Close() + readBlock2, err := db2.ReadBlock(1) + require.NoError(t, err) + require.Nil(t, readBlock2) + _, found := db2.MaxContiguousHeight() + require.False(t, found) +} + +func TestNew_NoTruncate(t *testing.T) { + tempDir, err := os.MkdirTemp("", "blockdb_no_truncate_test_*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + indexDir := filepath.Join(tempDir, "index") + dataDir := filepath.Join(tempDir, "data") + db, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db) + + // Write some test data and close the database + testBlock := []byte("test block data") + err = db.WriteBlock(1, testBlock, 5) + require.NoError(t, err) + readBlock, err := db.ReadBlock(1) + require.NoError(t, err) + require.Equal(t, testBlock, readBlock) + err = db.Close() + require.NoError(t, err) + + // Reopen with truncate=false and verify data is still there + db2, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db2) + defer db2.Close() + readBlock1, err := db2.ReadBlock(1) + require.NoError(t, err) + require.Equal(t, testBlock, readBlock1) + + // Verify we can write additional data + testBlock2 := []byte("test block data 3") + err = db2.WriteBlock(2, testBlock2, 0) + require.NoError(t, err) + readBlock2, err := db2.ReadBlock(2) + require.NoError(t, err) + require.Equal(t, testBlock2, readBlock2) +} + +func TestNew_Params(t *testing.T) { + tempDir, err := os.MkdirTemp("", "blockdb_test_*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + tests := []struct { + name string + indexDir string + dataDir string + syncToDisk bool + config DatabaseConfig + log logging.Logger + wantErr error + expectClose bool + }{ + { + name: "default config", + indexDir: tempDir, + dataDir: tempDir, + config: DefaultDatabaseConfig(), + }, + { + name: "custom config", + indexDir: tempDir, + dataDir: tempDir, + syncToDisk: true, + config: DatabaseConfig{ + MinimumHeight: 100, + MaxDataFileSize: 1024 * 1024 * 1024, // 1GB + CheckpointInterval: 512, + }, + }, + { + name: "empty index directory", + indexDir: "", + dataDir: tempDir, + config: DefaultDatabaseConfig(), + wantErr: errors.New("both indexDir and dataDir must be provided"), + }, + { + name: "empty data directory", + indexDir: tempDir, + dataDir: "", + config: DefaultDatabaseConfig(), + wantErr: errors.New("both indexDir and dataDir must be provided"), + }, + { + name: "both directories empty", + indexDir: "", + config: DefaultDatabaseConfig(), + dataDir: "", + wantErr: errors.New("both indexDir and dataDir must be provided"), + }, + { + name: "invalid config - zero checkpoint interval", + indexDir: tempDir, + dataDir: tempDir, + config: DatabaseConfig{ + MinimumHeight: 0, + MaxDataFileSize: DefaultMaxDataFileSize, + CheckpointInterval: 0, + }, + wantErr: errors.New("CheckpointInterval cannot be 0"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + db, err := New(tt.indexDir, tt.dataDir, tt.syncToDisk, true, tt.config, tt.log) + + if tt.wantErr != nil { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr.Error()) + return + } + + require.NoError(t, err) + require.NotNil(t, db) + + // Verify the database was created with correct configuration + assert.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) + assert.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) + assert.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) + assert.Equal(t, tt.syncToDisk, db.syncToDisk) + + // Verify files were created + indexPath := filepath.Join(tt.indexDir, indexFileName) + dataPath := filepath.Join(tt.dataDir, dataFileName) + assert.FileExists(t, indexPath) + assert.FileExists(t, dataPath) + + // Test that we can close the database + err = db.Close() + require.NoError(t, err) + }) + } +} + +func TestNew_IndexFileErrors(t *testing.T) { + tests := []struct { + name string + setup func() (string, string) + wantErrMsg string + }{ + { + name: "corrupted index file", + setup: func() (string, string) { + tempDir, _ := os.MkdirTemp("", "blockdb_test_*") + indexDir := filepath.Join(tempDir, "index") + dataDir := filepath.Join(tempDir, "data") + os.MkdirAll(indexDir, 0755) + os.MkdirAll(dataDir, 0755) + + // Create a corrupted index file + indexPath := filepath.Join(indexDir, indexFileName) + corruptedData := []byte("corrupted index file data") + err := os.WriteFile(indexPath, corruptedData, 0666) + if err != nil { + return "", "" + } + + return indexDir, dataDir + }, + wantErrMsg: "failed to read index header", + }, + { + name: "version mismatch in existing index file", + setup: func() (string, string) { + tempDir, _ := os.MkdirTemp("", "blockdb_test_*") + indexDir := filepath.Join(tempDir, "index") + dataDir := filepath.Join(tempDir, "data") + + // Create directories + os.MkdirAll(indexDir, 0755) + os.MkdirAll(dataDir, 0755) + + // Create a valid index file with wrong version + indexPath := filepath.Join(indexDir, indexFileName) + header := indexFileHeader{ + Version: 999, // Wrong version + MinHeight: 0, + MaxDataFileSize: DefaultMaxDataFileSize, + MaxHeight: unsetHeight, + MaxContiguousHeight: unsetHeight, + DataFileSize: 0, + } + + headerBytes, err := header.MarshalBinary() + if err != nil { + return "", "" + } + err = os.WriteFile(indexPath, headerBytes, 0666) + if err != nil { + return "", "" + } + + return indexDir, dataDir + }, + wantErrMsg: "mismatched index file version", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + indexDir, dataDir := tt.setup() + if indexDir == "" || dataDir == "" { + t.Skip("Setup failed, skipping test") + } + defer os.RemoveAll(filepath.Dir(indexDir)) + defer os.RemoveAll(filepath.Dir(dataDir)) + + _, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErrMsg) + }) + } +} + +func TestIndexFileHeaderAlignment(t *testing.T) { + if sizeOfIndexFileHeader%sizeOfIndexEntry != 0 { + t.Errorf("sizeOfIndexFileHeader (%d) is not a multiple of sizeOfIndexEntry (%d)", + sizeOfIndexFileHeader, sizeOfIndexEntry) + } +} + +func TestNew_IndexFileConfigPrecedence(t *testing.T) { + // set up db + initialConfig := DatabaseConfig{ + MinimumHeight: 100, + MaxDataFileSize: 1024 * 1024, // 1MB limit + CheckpointInterval: 1024, + } + tempDir, err := os.MkdirTemp("", "blockdb_config_precedence_test_*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + db, err := New(tempDir, tempDir, false, true, initialConfig, logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db) + + // Write a block at height 100 and close db + testBlock := []byte("test block data") + err = db.WriteBlock(100, testBlock, 0) + require.NoError(t, err) + readBlock, err := db.ReadBlock(100) + require.NoError(t, err) + require.Equal(t, testBlock, readBlock) + err = db.Close() + require.NoError(t, err) + + // Reopen with different config that has higher minimum height and smaller max data file size + differentConfig := DatabaseConfig{ + MinimumHeight: 200, // Higher minimum height + MaxDataFileSize: 512 * 1024, // 512KB limit (smaller than original 1MB) + CheckpointInterval: 512, + } + db2, err := New(tempDir, tempDir, false, false, differentConfig, logging.NoLog{}) + require.NoError(t, err) + require.NotNil(t, db2) + defer db2.Close() + + // The database should still accept blocks between 100 and 200 + testBlock2 := []byte("test block data 2") + err = db2.WriteBlock(150, testBlock2, 0) + require.NoError(t, err) + readBlock2, err := db2.ReadBlock(150) + require.NoError(t, err) + require.Equal(t, testBlock2, readBlock2) + + // Verify that writing below initial minimum height fails + err = db2.WriteBlock(50, []byte("invalid block"), 0) + require.Error(t, err) + require.True(t, errors.Is(err, ErrInvalidBlockHeight)) + + // Write a large block that would exceed the new config's 512KB limit + // but should succeed because we use the original 1MB limit from index file + largeBlock := make([]byte, 768*1024) // 768KB block + err = db2.WriteBlock(200, largeBlock, 0) + require.NoError(t, err) + readLargeBlock, err := db2.ReadBlock(200) + require.NoError(t, err) + require.Equal(t, largeBlock, readLargeBlock) +} diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 919310d718dd..75a82187bfa4 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -3,13 +3,10 @@ package blockdb import "fmt" var ( - ErrInvalidBlockHeight = fmt.Errorf("blockdb: invalid block height") - ErrBlockNotFound = fmt.Errorf("blockdb: block not found") - ErrBlockEmpty = fmt.Errorf("blockdb: block is empty") - ErrBlockSizeMismatch = fmt.Errorf("blockdb: block size in index file does not match data header") - ErrChecksumMismatch = fmt.Errorf("blockdb: checksum mismatch") - ErrDatabaseClosed = fmt.Errorf("blockdb: database is closed") - ErrInvalidCheckpointInterval = fmt.Errorf("blockdb: invalid checkpoint interval") - ErrCorrupted = fmt.Errorf("blockdb: unrecoverable corruption detected") - ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrInvalidBlockHeight = fmt.Errorf("blockdb: invalid block height") + ErrBlockEmpty = fmt.Errorf("blockdb: block is empty") + ErrDatabaseClosed = fmt.Errorf("blockdb: database is closed") + ErrCorrupted = fmt.Errorf("blockdb: unrecoverable corruption detected") + ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrHeaderSizeTooLarge = fmt.Errorf("blockdb: header size cannot be >= block size") ) diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go new file mode 100644 index 000000000000..79cb4ffa2d7e --- /dev/null +++ b/x/blockdb/helpers_test.go @@ -0,0 +1,64 @@ +package blockdb + +import ( + "crypto/rand" + "math/big" + "os" + "testing" + + "github.com/ava-labs/avalanchego/utils/logging" +) + +func newTestDatabase(t *testing.T, syncToDisk bool, opts *DatabaseConfig) (*Database, func()) { + t.Helper() + dir, err := os.MkdirTemp("", "blockdb_test_*") + if err != nil { + t.Fatalf("failed to create temp dir: %v", err) + } + idxDir := dir + "/idx" + dataDir := dir + "/dat" + var config DatabaseConfig + if opts != nil { + config = *opts + } else { + config = DefaultDatabaseConfig() + } + db, err := New(idxDir, dataDir, syncToDisk, true, config, logging.NoLog{}) + if err != nil { + os.RemoveAll(dir) + t.Fatalf("failed to create database: %v", err) + } + cleanup := func() { + db.Close() + os.RemoveAll(dir) + } + return db, cleanup +} + +// randomBlock generates a random block of size 1KB-50KB. +func randomBlock(t *testing.T) []byte { + size, err := rand.Int(rand.Reader, big.NewInt(50*1024-1024+1)) + if err != nil { + t.Fatalf("failed to generate random size: %v", err) + } + blockSize := int(size.Int64()) + 1024 // 1KB to 50KB + b := make([]byte, blockSize) + _, err = rand.Read(b) + if err != nil { + t.Fatalf("failed to fill random block: %v", err) + } + return b +} + +func checkDatabaseState(t *testing.T, db *Database, maxHeight uint64, maxContiguousHeight uint64) { + if got := db.maxBlockHeight.Load(); got != maxHeight { + t.Fatalf("maxBlockHeight: got %d, want %d", got, maxHeight) + } + gotMCH, ok := db.MaxContiguousHeight() + if maxContiguousHeight != unsetHeight && !ok { + t.Fatalf("MaxContiguousHeight is not set, want %d", maxContiguousHeight) + } + if ok && gotMCH != maxContiguousHeight { + t.Fatalf("maxContiguousHeight: got %d, want %d", gotMCH, maxContiguousHeight) + } +} diff --git a/x/blockdb/index.go b/x/blockdb/index.go index 661d748cf278..3bf6a108b76a 100644 --- a/x/blockdb/index.go +++ b/x/blockdb/index.go @@ -3,6 +3,7 @@ package blockdb import ( "encoding" "encoding/binary" + "errors" "fmt" "io" "math" @@ -13,125 +14,132 @@ const ( ) var ( - _ encoding.BinaryMarshaler = IndexEntry{} - _ encoding.BinaryUnmarshaler = &IndexEntry{} + _ encoding.BinaryMarshaler = indexEntry{} + _ encoding.BinaryUnmarshaler = &indexEntry{} - sizeOfIndexEntry = uint64(binary.Size(IndexEntry{})) - sizeOfIndexFileHeader = uint64(binary.Size(IndexFileHeader{})) + sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) + sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) ) -// IndexEntry locates a block within the data file. -type IndexEntry struct { +type indexEntry struct { // Offset is the byte offset in the data file where the block's header starts. Offset uint64 - // Size is the length in bytes of the block's data (not including the header). - Size uint64 // todo: can this be omitted? currently is this only used to verify the block size, but we are already doing checksum verification. Removing this can double the amount of data in the index file. + // Size is the length in bytes of the block's data (excluding the blockHeader). + Size uint64 + // HeaderSize is the size in bytes of the block's header portion within the data. + HeaderSize uint16 } // IsEmpty returns true if this entry is uninitialized. // This indicates a slot where no block has been written. -func (e IndexEntry) IsEmpty() bool { +func (e indexEntry) IsEmpty() bool { return e.Offset == 0 && e.Size == 0 } -// MarshalBinary implements encoding.BinaryMarshaler for IndexEntry. -func (e IndexEntry) MarshalBinary() ([]byte, error) { +// MarshalBinary implements encoding.BinaryMarshaler for indexEntry. +func (e indexEntry) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexEntry) binary.LittleEndian.PutUint64(buf[0:], e.Offset) binary.LittleEndian.PutUint64(buf[8:], e.Size) + binary.LittleEndian.PutUint16(buf[16:], e.HeaderSize) return buf, nil } -// UnmarshalBinary implements encoding.BinaryUnmarshaler for IndexEntry. -func (e *IndexEntry) UnmarshalBinary(data []byte) error { +// UnmarshalBinary implements encoding.BinaryUnmarshaler for indexEntry. +func (e *indexEntry) UnmarshalBinary(data []byte) error { if len(data) != int(sizeOfIndexEntry) { - return fmt.Errorf("incorrect data length to unmarshal IndexEntry: got %d bytes, need exactly %d", len(data), sizeOfIndexEntry) + return fmt.Errorf("incorrect data length to unmarshal indexEntry: got %d bytes, need exactly %d", len(data), sizeOfIndexEntry) } e.Offset = binary.LittleEndian.Uint64(data[0:]) e.Size = binary.LittleEndian.Uint64(data[8:]) + e.HeaderSize = binary.LittleEndian.Uint16(data[16:]) return nil } -// IndexFileHeader is the header of the index file. -type IndexFileHeader struct { - Version uint64 - MaxDataFileSize uint64 - MaxBlockHeight uint64 - MinBlockHeight BlockHeight - MaxContiguousBlockHeight BlockHeight - DataFileSize uint64 +// indexFileHeader is the header of the index file. +type indexFileHeader struct { + Version uint64 + MaxDataFileSize uint64 + MaxHeight BlockHeight + MinHeight BlockHeight + MaxContiguousHeight BlockHeight + DataFileSize uint64 + // reserve 24 bytes for future use + Reserved [24]byte } -// Add MarshalBinary for IndexFileHeader -func (h IndexFileHeader) MarshalBinary() ([]byte, error) { +// Add MarshalBinary for indexFileHeader +func (h indexFileHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexFileHeader) binary.LittleEndian.PutUint64(buf[0:], h.Version) binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) - binary.LittleEndian.PutUint64(buf[16:], h.MaxBlockHeight) - binary.LittleEndian.PutUint64(buf[24:], h.MinBlockHeight) - binary.LittleEndian.PutUint64(buf[32:], h.MaxContiguousBlockHeight) + binary.LittleEndian.PutUint64(buf[16:], h.MaxHeight) + binary.LittleEndian.PutUint64(buf[24:], h.MinHeight) + binary.LittleEndian.PutUint64(buf[32:], h.MaxContiguousHeight) binary.LittleEndian.PutUint64(buf[40:], h.DataFileSize) return buf, nil } -// Add UnmarshalBinary for IndexFileHeader -func (h *IndexFileHeader) UnmarshalBinary(data []byte) error { +// Add UnmarshalBinary for indexFileHeader +func (h *indexFileHeader) UnmarshalBinary(data []byte) error { if len(data) != int(sizeOfIndexFileHeader) { return fmt.Errorf( - "incorrect data length to unmarshal IndexFileHeader: got %d bytes, need exactly %d", + "incorrect data length to unmarshal indexFileHeader: got %d bytes, need exactly %d", len(data), sizeOfIndexFileHeader, ) } h.Version = binary.LittleEndian.Uint64(data[0:]) h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) - h.MaxBlockHeight = binary.LittleEndian.Uint64(data[16:]) - h.MinBlockHeight = binary.LittleEndian.Uint64(data[24:]) - h.MaxContiguousBlockHeight = binary.LittleEndian.Uint64(data[32:]) + h.MaxHeight = binary.LittleEndian.Uint64(data[16:]) + h.MinHeight = binary.LittleEndian.Uint64(data[24:]) + h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[32:]) h.DataFileSize = binary.LittleEndian.Uint64(data[40:]) return nil } func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { - if height < s.header.MinBlockHeight { - return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinBlockHeight) + if height < s.header.MinHeight { + return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinHeight) } - relativeHeight := height - s.header.MinBlockHeight + relativeHeight := height - s.header.MinHeight + + // Check for overflow before calculating the final offset. if relativeHeight > (math.MaxUint64-sizeOfIndexFileHeader)/sizeOfIndexEntry { - return 0, fmt.Errorf("%w: index entry offset multiplication overflow for height %d", ErrInvalidBlockHeight, height) + return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) } + offsetFromHeaderStart := relativeHeight * sizeOfIndexEntry finalOffset := sizeOfIndexFileHeader + offsetFromHeaderStart - if finalOffset < sizeOfIndexFileHeader { - return 0, fmt.Errorf("%w: index entry offset addition overflow for height %d", ErrInvalidBlockHeight, height) - } return finalOffset, nil } -func (s *Database) readIndexEntry(height BlockHeight) (IndexEntry, error) { +func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { + var entry indexEntry offset, err := s.indexEntryOffset(height) if err != nil { - return IndexEntry{}, err + return entry, err } - var entry IndexEntry buf := make([]byte, sizeOfIndexEntry) _, err = s.indexFile.ReadAt(buf, int64(offset)) if err != nil { - if err == io.EOF { + if errors.Is(err, io.EOF) { return entry, nil } - return IndexEntry{}, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) + return entry, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) } if err := entry.UnmarshalBinary(buf); err != nil { - return IndexEntry{}, fmt.Errorf("failed to deserialize index entry for height %d: %w", height, err) + return entry, fmt.Errorf("failed to deserialize index entry for height %d: %w", height, err) } + return entry, nil } -func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64) error { - indexEntry := IndexEntry{ - Offset: dataFileBlockOffset, - Size: blockDataLen, +func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64, headerSize uint16) error { + indexEntry := indexEntry{ + Offset: dataFileBlockOffset, + Size: blockDataLen, + HeaderSize: headerSize, } entryBytes, err := indexEntry.MarshalBinary() @@ -145,38 +153,20 @@ func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, block return nil } -func (s *Database) persistIndexHeader(syncToDisk bool) error { - // Why fsync indexFile before writing its header? - // To prevent a critical inconsistency: the header must not describe a state - // more advanced than what's durably stored in the index entries. - // - // 1. Writes Are Buffered: OS buffers index entry writes; they aren't immediately on disk. - // 2. Header Reflects New State: The header is updated with new DataFileSize (for data file) - // and MaxContiguousBlockHeight (based on index entries). - // 3. THE RISK IF HEADER IS WRITTEN/FLUSHED FIRST (before fsyncing entries): - // If the OS flushes the updated header to disk *before* it flushes the buffered - // index entries that justify the header's new state, then a crash would mean: - // - The on-disk header claims certain blocks/entries exist (up to new DataFileSize/MCH). - // - But the corresponding index entries themselves were lost (still in buffer at crash). - // This results in the header pointing to the updated DataFileSize in the data file - // but the index entries are not yet on disk, leading to missing blocks in the index file. - // - // By fsyncing indexFile *first*, we ensure all index entries are durably on disk. - // Only then is the header written, guaranteeing it reflects a truly persisted state. - if syncToDisk { - if s.indexFile != nil { - if err := s.indexFile.Sync(); err != nil { - return fmt.Errorf("failed to sync index file before writing header state: %w", err) - } - } else { - return fmt.Errorf("index file is nil, cannot sync or write header state") +func (s *Database) persistIndexHeader() error { + // The index file must be fsync'd before the header is written to prevent + // a state where the header is persisted but the index entries it refers to + // are not. This could lead to data inconsistency on recovery. + if s.syncToDisk { + if err := s.indexFile.Sync(); err != nil { + return fmt.Errorf("failed to sync index file before writing header state: %w", err) } } header := s.header header.DataFileSize = s.nextDataWriteOffset.Load() - header.MaxContiguousBlockHeight = s.maxContiguousHeight.Load() - header.MaxBlockHeight = s.maxBlockHeight.Load() + header.MaxContiguousHeight = s.maxContiguousHeight.Load() + header.MaxHeight = s.maxBlockHeight.Load() headerBytes, err := header.MarshalBinary() if err != nil { return fmt.Errorf("failed to serialize header for writing state: %w", err) @@ -185,11 +175,7 @@ func (s *Database) persistIndexHeader(syncToDisk bool) error { return fmt.Errorf("internal error: serialized header state size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) } - if s.indexFile == nil { - return fmt.Errorf("index file is nil, cannot write header state") - } - _, err = s.indexFile.WriteAt(headerBytes, 0) - if err != nil { + if _, err := s.indexFile.WriteAt(headerBytes, 0); err != nil { return fmt.Errorf("failed to write header state to index file: %w", err) } return nil diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go new file mode 100644 index 000000000000..f22e9d345d13 --- /dev/null +++ b/x/blockdb/readblock_test.go @@ -0,0 +1,243 @@ +package blockdb + +import ( + "errors" + "math" + "sync" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReadOperations(t *testing.T) { + tests := []struct { + name string + readHeight uint64 + noBlock bool + config *DatabaseConfig + setup func(db *Database) + wantErr error + }{ + { + name: "read first block", + readHeight: 0, + }, + { + name: "read max height block", + readHeight: 50, + }, + { + name: "read height with no block", + readHeight: 40, + noBlock: true, + }, + { + name: "read block higher than max height", + readHeight: 100, + noBlock: true, + }, + { + name: "read valid block with non-zero minimum height", + readHeight: 25, + config: &DatabaseConfig{ + MinimumHeight: 20, + MaxDataFileSize: DefaultMaxDataFileSize, + CheckpointInterval: 1024, + }, + }, + { + name: "database closed", + readHeight: 1, + setup: func(db *Database) { + db.Close() + }, + wantErr: ErrDatabaseClosed, + }, + { + name: "height below minimum", + readHeight: 5, + config: &DatabaseConfig{ + MinimumHeight: 10, + MaxDataFileSize: DefaultMaxDataFileSize, + CheckpointInterval: 1024, + }, + wantErr: ErrInvalidBlockHeight, + }, + { + name: "height causes overflow", + readHeight: math.MaxUint64, + wantErr: ErrInvalidBlockHeight, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := tt.config + if config == nil { + defaultConfig := DefaultDatabaseConfig() + config = &defaultConfig + } + + store, cleanup := newTestDatabase(t, false, tt.config) + defer cleanup() + + // Seed database with blocks based on config + seededBlocks := make(map[uint64][]byte) + if tt.wantErr == nil { + minHeight := config.MinimumHeight + maxHeight := minHeight + 50 // Always write 51 blocks + gapHeight := minHeight + 40 // Gap at relative position 40 + + for i := minHeight; i <= maxHeight; i++ { + if i == gapHeight { + continue // Create gap + } + + block := randomBlock(t) + err := store.WriteBlock(i, block, uint16(i-minHeight)) + require.NoError(t, err) + seededBlocks[i] = block + } + } + + if tt.setup != nil { + tt.setup(store) + } + readBlock, err := store.ReadBlock(tt.readHeight) + readHeader, err := store.ReadHeader(tt.readHeight) + readBody, err := store.ReadBody(tt.readHeight) + + if tt.wantErr != nil { + require.Error(t, err) + require.True(t, errors.Is(err, tt.wantErr)) + return + } + + // Handle success cases + require.NoError(t, err) + if tt.noBlock { + require.Nil(t, readBlock) + require.Nil(t, readHeader) + require.Nil(t, readBody) + } else { + require.NotNil(t, readBlock) + expectedBlock := seededBlocks[tt.readHeight] + headerSize := uint16(tt.readHeight - config.MinimumHeight) + var expectHeader []byte + if headerSize > 0 { + expectHeader = expectedBlock[:headerSize] + } + assert.Equal(t, expectedBlock, readBlock) + assert.Equal(t, expectHeader, readHeader) + assert.Equal(t, expectedBlock[headerSize:], readBody) + } + }) + } +} + +func TestReadOperations_Concurrency(t *testing.T) { + store, cleanup := newTestDatabase(t, false, nil) + defer cleanup() + + // Pre-generate blocks and write them + numBlocks := 50 + blocks := make([][]byte, numBlocks) + headerSizes := make([]uint16, numBlocks) + gapHeights := map[uint64]bool{ + 10: true, + 20: true, + } + + for i := range numBlocks { + if gapHeights[uint64(i)] { + continue + } + + blocks[i] = randomBlock(t) + headerSizes[i] = uint16(i * 10) // Varying header sizes + if headerSizes[i] > uint16(len(blocks[i])) { + headerSizes[i] = uint16(len(blocks[i])) / 2 + } + + err := store.WriteBlock(uint64(i), blocks[i], headerSizes[i]) + require.NoError(t, err) + } + + var wg sync.WaitGroup + var errors atomic.Int32 + for i := range numBlocks + 10 { + wg.Add(3) // One for each read operation + + go func(height int) { + defer wg.Done() + block, err := store.ReadBlock(uint64(height)) + if err != nil { + t.Errorf("ReadBlock failed for height %d: %v", height, err) + errors.Add(1) + return + } + if gapHeights[uint64(height)] || height >= numBlocks { + if block != nil { + t.Errorf("Expected nil block for height %d", height) + errors.Add(1) + } + } else { + if !assert.Equal(t, blocks[height], block) { + t.Errorf("ReadBlock data mismatch at height %d", height) + errors.Add(1) + } + } + }(i) + + go func(height int) { + defer wg.Done() + header, err := store.ReadHeader(uint64(height)) + if err != nil { + t.Errorf("ReadHeader failed for height %d: %v", height, err) + errors.Add(1) + return + } + if gapHeights[uint64(height)] || height >= numBlocks { + if header != nil { + t.Errorf("Expected nil header for height %d", height) + errors.Add(1) + } + } else { + expectedHeader := blocks[height][:headerSizes[height]] + if headerSizes[height] == 0 { + expectedHeader = nil + } + if !assert.Equal(t, expectedHeader, header) { + t.Errorf("ReadHeader data mismatch at height %d", height) + errors.Add(1) + } + } + }(i) + + go func(height int) { + defer wg.Done() + body, err := store.ReadBody(uint64(height)) + if err != nil { + t.Errorf("ReadBody failed for height %d: %v", height, err) + errors.Add(1) + return + } + if gapHeights[uint64(height)] || height >= numBlocks { + if body != nil { + t.Errorf("Expected nil body for height %d", height) + errors.Add(1) + } + } else { + expectedBody := blocks[height][headerSizes[height]:] + if !assert.Equal(t, expectedBody, body) { + t.Errorf("ReadBody data mismatch at height %d", height) + errors.Add(1) + } + } + }(i) + } + wg.Wait() + require.Zero(t, errors.Load(), "concurrent read operations had errors") +} diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go index 1e8eb492b204..43d2414cec87 100644 --- a/x/blockdb/recovery.go +++ b/x/blockdb/recovery.go @@ -6,12 +6,6 @@ import ( "go.uber.org/zap" ) -const ( - // maxRecoverBlockSize is a sanity limit for block sizes encountered during the recovery scan. - // It prevents attempts to read/allocate excessively large blocks due to data corruption in a block header. - maxRecoverBlockSize uint64 = 50 * 1024 * 1024 // 50MB -) - // recover attempts to restore the store to a consistent state by scanning the data file // for blocks that may not be correctly indexed, usually after an unclean shutdown. // It reconciles the data file with the index file header and entries. @@ -25,10 +19,6 @@ func (s *Database) recover() error { // If the data file size matches the size recorded in the index header, then no recovery is needed. if dataFileActualSize == nextDataWriteOffset { - // TODO: Do we need to validate that the max contiguous height is correct? - // it might not be correct if the previous shutdown was not clean and - // only the new datafile size was persisted somehow. In this case, we need - // to fix the max contiguous height otherwise it will never be updated. return nil } @@ -43,7 +33,6 @@ func (s *Database) recover() error { s.log.Info("Data file larger than indexed size; recovering blocks", zap.Uint64("dataFileSize", dataFileActualSize), zap.Uint64("indexedSize", nextDataWriteOffset), - zap.Uint64("scanStartOffset", nextDataWriteOffset), ) // Start scan from where the index left off. @@ -65,7 +54,7 @@ func (s *Database) recover() error { zap.Uint64("offset", currentScanOffset), ) recoveredBlocksCount++ - if bh.Height > maxRecoveredHeightSeen { + if bh.Height > maxRecoveredHeightSeen || maxRecoveredHeightSeen == unsetHeight { maxRecoveredHeightSeen = bh.Height } currentScanOffset += sizeOfBlockHeader + bh.Size @@ -78,7 +67,7 @@ func (s *Database) recover() error { s.updateMaxContiguousHeightOnRecovery() } - if err := s.persistIndexHeader(true); err != nil { + if err := s.persistIndexHeader(); err != nil { return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) } @@ -92,38 +81,37 @@ func (s *Database) recover() error { return nil } -// recoverBlockAtOffset attempts to read, validate, and index a block at the given offset. -// Returns the blockHeader and an error if the block is invalid or incomplete. func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHeader, error) { var bh blockHeader if dataFileActualSize-offset < sizeOfBlockHeader { return bh, fmt.Errorf("not enough data for block header at offset %d", offset) } bhBuf := make([]byte, sizeOfBlockHeader) - _, readErr := s.dataFile.ReadAt(bhBuf, int64(offset)) - if readErr != nil { - return bh, fmt.Errorf("error reading block header at offset %d: %w", offset, readErr) + if _, err := s.dataFile.ReadAt(bhBuf, int64(offset)); err != nil { + return bh, fmt.Errorf("error reading block header at offset %d: %w", offset, err) } if err := bh.UnmarshalBinary(bhBuf); err != nil { return bh, fmt.Errorf("error deserializing block header at offset %d: %w", offset, err) } - if bh.Size == 0 || bh.Size > maxRecoverBlockSize { + if bh.Size == 0 || bh.Size > MaxBlockDataSize { return bh, fmt.Errorf("invalid block size in header at offset %d: %d", offset, bh.Size) } - if bh.Height < s.header.MinBlockHeight { + if bh.Height < s.header.MinHeight || bh.Height == unsetHeight { return bh, fmt.Errorf( "invalid block height in header at offset %d: found %d, expected >= %d", - offset, bh.Height, s.header.MinBlockHeight, + offset, bh.Height, s.header.MinHeight, ) } + if uint64(bh.HeaderSize) > bh.Size { + return bh, fmt.Errorf("invalid block header size in header at offset %d: %d > %d", offset, bh.HeaderSize, bh.Size) + } expectedBlockEndOffset := offset + sizeOfBlockHeader + bh.Size if expectedBlockEndOffset < offset || expectedBlockEndOffset > dataFileActualSize { return bh, fmt.Errorf("block data out of bounds at offset %d", offset) } blockData := make([]byte, bh.Size) - _, readErr = s.dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)) - if readErr != nil { - return bh, fmt.Errorf("failed to read block data at offset %d: %w", offset, readErr) + if _, err := s.dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)); err != nil { + return bh, fmt.Errorf("failed to read block data at offset %d: %w", offset, err) } calculatedChecksum := calculateChecksum(blockData) if calculatedChecksum != bh.Checksum { @@ -135,21 +123,27 @@ func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (bloc if idxErr != nil { return bh, fmt.Errorf("cannot get index offset for recovered block %d: %w", bh.Height, idxErr) } - if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size); err != nil { + if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size, bh.HeaderSize); err != nil { return bh, fmt.Errorf("failed to update index for recovered block %d: %w", bh.Height, err) } return bh, nil } -// updateMaxContiguousHeightOnRecovery extends the max contiguous height from the value in the header, -// incrementing as long as contiguous blocks exist. func (s *Database) updateMaxContiguousHeightOnRecovery() { - currentMCH := s.header.MaxContiguousBlockHeight + currentMCH := s.header.MaxContiguousHeight highestKnown := s.maxBlockHeight.Load() for nextHeight := currentMCH + 1; nextHeight <= highestKnown; nextHeight++ { entry, err := s.readIndexEntry(nextHeight) - if err != nil || entry.IsEmpty() { + if err != nil { + s.log.Error( + "error reading index entry when updating max contiguous height on recovery", + zap.Uint64("height", nextHeight), + zap.Error(err), + ) + break + } + if entry.IsEmpty() { break } currentMCH = nextHeight diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go new file mode 100644 index 000000000000..df462e759ecd --- /dev/null +++ b/x/blockdb/writeblock_test.go @@ -0,0 +1,371 @@ +package blockdb + +import ( + "errors" + "math" + "sync" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// todo: create TestWriteBlock test that includes error tests and also tests for things like write when sync is true, etc + +func TestWriteBlock_HeightsVerification(t *testing.T) { + customConfig := DefaultDatabaseConfig() + customConfig.MinimumHeight = 10 + + tests := []struct { + name string + blockHeights []uint64 // block heights to write, in order + config *DatabaseConfig + expectedMCH uint64 // expected max contiguous height + expectedMaxHeight uint64 + headerSizes []uint16 + syncToDisk bool + checkpointInterval uint64 + }{ + { + name: "no blocks to write", + expectedMCH: unsetHeight, + expectedMaxHeight: unsetHeight, + }, + { + name: "single block at min height", + blockHeights: []uint64{0}, + expectedMCH: 0, + expectedMaxHeight: 0, + }, + { + name: "sequential blocks from min", + blockHeights: []uint64{0, 1, 2, 3}, + expectedMCH: 3, + expectedMaxHeight: 3, + }, + { + name: "out of order with no gaps", + blockHeights: []uint64{3, 1, 2, 0, 4}, + expectedMCH: 4, + expectedMaxHeight: 4, + }, + { + name: "blocks with gaps", + blockHeights: []uint64{0, 1, 3, 5, 6}, + expectedMCH: 1, + expectedMaxHeight: 6, + }, + { + name: "start with gap", + blockHeights: []uint64{5, 6}, + expectedMCH: unsetHeight, + expectedMaxHeight: 6, + }, + { + name: "overwrite same height", + blockHeights: []uint64{0, 1, 0}, // Write to height 0 twice + expectedMCH: 1, + expectedMaxHeight: 1, + }, + { + name: "custom min height single block", + blockHeights: []uint64{10}, + config: &customConfig, + expectedMCH: 10, + expectedMaxHeight: 10, + }, + { + name: "custom min height out of order", + blockHeights: []uint64{13, 11, 10, 12}, + config: &customConfig, + expectedMCH: 13, + expectedMaxHeight: 13, + }, + { + name: "custom min height with gaps", + blockHeights: []uint64{10, 11, 13, 15}, + config: &customConfig, + expectedMCH: 11, + expectedMaxHeight: 15, + }, + { + name: "custom min height start with gap", + blockHeights: []uint64{11, 12}, + config: &customConfig, + expectedMCH: unsetHeight, + expectedMaxHeight: 12, + }, + { + name: "blocks with various header sizes", + blockHeights: []uint64{0, 1, 2}, + headerSizes: []uint16{0, 50, 100}, + expectedMCH: 2, + expectedMaxHeight: 2, + }, + { + name: "overwrite with different header size", + blockHeights: []uint64{12, 13, 12}, // Write twice to same height + headerSizes: []uint16{10, 0, 50}, + expectedMCH: unsetHeight, + expectedMaxHeight: 13, + }, + { + name: "with sync to disk", + blockHeights: []uint64{0, 1, 2, 5}, + syncToDisk: true, + expectedMCH: 2, + expectedMaxHeight: 5, + }, + { + name: "custom checkpoint interval", + blockHeights: []uint64{0, 1, 2, 3, 4}, + checkpointInterval: 2, + expectedMCH: 4, + expectedMaxHeight: 4, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := tt.config + if config == nil { + defaultConfig := DefaultDatabaseConfig() + config = &defaultConfig + } + if tt.checkpointInterval > 0 { + configCopy := *config + configCopy.CheckpointInterval = tt.checkpointInterval + config = &configCopy + } + + store, cleanup := newTestDatabase(t, tt.syncToDisk, config) + defer cleanup() + + blocksWritten := make(map[uint64][]byte) + headerSizesWritten := make(map[uint64]uint16) + for i, h := range tt.blockHeights { + block := randomBlock(t) + var headerSize uint16 + + // Use specific header size if provided + if tt.headerSizes != nil && i < len(tt.headerSizes) { + headerSize = tt.headerSizes[i] + // Ensure header size doesn't exceed block size + require.LessOrEqual(t, int(headerSize), len(block), "header size %d exceeds block size %d for test case", headerSize, len(block)) + } + + err := store.WriteBlock(h, block, headerSize) + require.NoError(t, err, "unexpected error at height %d", h) + + blocksWritten[h] = block + headerSizesWritten[h] = headerSize // Store the header size for the final write to this height + } + + // Verify all written blocks are readable and data is correct + for h, expectedBlock := range blocksWritten { + readBlock, err := store.ReadBlock(h) + require.NoError(t, err, "ReadBlock failed at height %d", h) + assert.Equal(t, expectedBlock, readBlock, "data integrity check failed at height %d", h) + + // Test header/body separation if header size was specified + if tt.headerSizes != nil { + if headerSize, exists := headerSizesWritten[h]; exists { + header, err := store.ReadHeader(h) + require.NoError(t, err, "ReadHeader failed at height %d", h) + + body, err := store.ReadBody(h) + require.NoError(t, err, "ReadBody failed at height %d", h) + + if headerSize == 0 { + assert.Nil(t, header, "header should be nil for headerSize=0") + assert.Equal(t, expectedBlock, body, "body should equal full block when headerSize=0") + } else { + expectedHeader := expectedBlock[:headerSize] + expectedBody := expectedBlock[headerSize:] + assert.Equal(t, expectedHeader, header, "header mismatch at height %d", h) + assert.Equal(t, expectedBody, body, "body mismatch at height %d", h) + } + } + } + } + + checkDatabaseState(t, store, tt.expectedMaxHeight, tt.expectedMCH) + }) + } +} + +func TestWriteBlock_Concurrency(t *testing.T) { + store, cleanup := newTestDatabase(t, false, nil) + defer cleanup() + + var wg sync.WaitGroup + var errors atomic.Int32 + + // Pre-generate blocks for reuse + blocks := make([][]byte, 20) + for i := range 20 { + blocks[i] = randomBlock(t) + wg.Add(1) + go func(i int) { + defer wg.Done() + var height uint64 + block := blocks[i] + + // create gaps at heights 5 and 10 and rewrite last block + if i == 5 || i == 10 { + height = uint64(i - 1) + } else { + height = uint64(i) + } + + err := store.WriteBlock(height, block, 1) + if err != nil { + t.Errorf("WriteBlock failed for iteration %d (height %d): %v", i, height, err) + errors.Add(1) + } + }(i) + } + + wg.Wait() + require.Zero(t, errors.Load(), "concurrent writes had errors") + + // Verify that all expected heights have blocks (except 5, 10) + for i := range 20 { + height := uint64(i) + block, err := store.ReadBlock(height) + require.NoError(t, err) + + if i == 5 || i == 10 { + require.Nil(t, block, "expected nil block at gap height %d", height) + } else { + require.NotNil(t, block) + } + } + checkDatabaseState(t, store, 19, 4) +} + +func TestWriteBlock_Errors(t *testing.T) { + tests := []struct { + name string + height uint64 + block []byte + headerSize uint16 + setup func(db *Database) + config *DatabaseConfig + wantErr error + }{ + { + name: "empty block nil", + height: 0, + block: nil, + headerSize: 0, + wantErr: ErrBlockEmpty, + }, + { + name: "empty block zero length", + height: 0, + block: []byte{}, + headerSize: 0, + wantErr: ErrBlockEmpty, + }, + { + name: "block too large", + height: 0, + block: make([]byte, MaxBlockDataSize+1), + headerSize: 0, + wantErr: ErrBlockTooLarge, + }, + { + name: "header size larger than block", + height: 0, + block: []byte("small"), + headerSize: 6, // block is only 5 bytes + wantErr: ErrHeaderSizeTooLarge, + }, + { + name: "header size equal to block", + height: 0, + block: []byte("small"), + headerSize: 5, + wantErr: ErrHeaderSizeTooLarge, + }, + { + name: "height below custom minimum", + height: 5, + block: randomBlock(t), + config: &DatabaseConfig{ + MinimumHeight: 10, + MaxDataFileSize: DefaultMaxDataFileSize, + CheckpointInterval: 1024, + }, + headerSize: 0, + wantErr: ErrInvalidBlockHeight, + }, + { + name: "height causes overflow", + height: math.MaxUint64, + block: randomBlock(t), + headerSize: 0, + wantErr: ErrInvalidBlockHeight, + }, + { + name: "database closed", + height: 0, + block: randomBlock(t), + headerSize: 0, + setup: func(db *Database) { + db.Close() + }, + wantErr: ErrDatabaseClosed, + }, + { + name: "exceed max data file size", + height: 0, + block: make([]byte, 1000), // Block + header will exceed 1024 limit + config: &DatabaseConfig{ + MinimumHeight: 0, + MaxDataFileSize: 1024, // 1KB limit + CheckpointInterval: 1024, + }, + headerSize: 0, + wantErr: errors.New("exceed configured max data file size"), + }, + { + name: "data file offset overflow", + height: 0, + block: make([]byte, 100), + config: &DatabaseConfig{ + MinimumHeight: 0, + MaxDataFileSize: 0, // No limit + CheckpointInterval: 1024, + }, + setup: func(db *Database) { + // Set the next write offset to near max to trigger overflow + db.nextDataWriteOffset.Store(math.MaxUint64 - 50) + }, + headerSize: 0, + wantErr: errors.New("would overflow uint64 data file pointer"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + store, cleanup := newTestDatabase(t, false, tt.config) + defer cleanup() + + if tt.setup != nil { + tt.setup(store) + } + + err := store.WriteBlock(tt.height, tt.block, tt.headerSize) + require.Error(t, err) + require.NotNil(t, tt.wantErr, "test case must specify expected error") + + if !errors.Is(err, tt.wantErr) { + require.Contains(t, err.Error(), tt.wantErr.Error()) + } + checkDatabaseState(t, store, unsetHeight, unsetHeight) + }) + } +} From 4fe0e19a679b8cb27d59046caf6784aef891dc46 Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 23 Jun 2025 17:29:50 -0400 Subject: [PATCH 06/27] feat: data splitting & fix linting --- go.mod | 2 +- x/blockdb/block.go | 102 ++++++++++++++++++------- x/blockdb/config.go | 9 ++- x/blockdb/database.go | 124 +++++++++++++++++++++++------- x/blockdb/database_test.go | 90 +++++++++------------- x/blockdb/datasplit_test.go | 101 +++++++++++++++++++++++++ x/blockdb/errors.go | 18 +++-- x/blockdb/helpers_test.go | 36 ++++----- x/blockdb/index.go | 19 +++-- x/blockdb/readblock_test.go | 53 +++++-------- x/blockdb/recovery.go | 141 +++++++++++++++++++++-------------- x/blockdb/writeblock_test.go | 25 +++---- 12 files changed, 473 insertions(+), 247 deletions(-) create mode 100644 x/blockdb/datasplit_test.go diff --git a/go.mod b/go.mod index e32b9028a44d..09f0eb328e95 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/ava-labs/ledger-avalanche/go v0.0.0-20241009183145-e6f90a8a1a60 github.com/ava-labs/libevm v1.13.14-0.3.0.rc.1 github.com/btcsuite/btcd/btcutil v1.1.3 + github.com/cespare/xxhash/v2 v2.3.0 github.com/cockroachdb/pebble v0.0.0-20230928194634-aa077af62593 github.com/compose-spec/compose-go v1.20.2 github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0 @@ -92,7 +93,6 @@ require ( github.com/bits-and-blooms/bitset v1.10.0 // indirect github.com/btcsuite/btcd/btcec/v2 v2.3.2 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cockroachdb/errors v1.9.1 // indirect github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect github.com/cockroachdb/redact v1.1.3 // indirect diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 6f25dfe662fd..7c008c4363d8 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -1,18 +1,24 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( "encoding" "encoding/binary" + "errors" "fmt" + "io" "math" + "os" "github.com/cespare/xxhash/v2" "go.uber.org/zap" ) var ( - _ encoding.BinaryMarshaler = blockHeader{} - _ encoding.BinaryUnmarshaler = &blockHeader{} + _ encoding.BinaryMarshaler = (*blockHeader)(nil) + _ encoding.BinaryUnmarshaler = (*blockHeader)(nil) sizeOfBlockHeader = uint64(binary.Size(blockHeader{})) ) @@ -68,11 +74,12 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return ErrDatabaseClosed } - if len(block) == 0 { + blockDataLen := uint64(len(block)) + if blockDataLen == 0 { return ErrBlockEmpty } - if len(block) > MaxBlockDataSize { + if blockDataLen > MaxBlockDataSize { return ErrBlockTooLarge } @@ -85,7 +92,6 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return err } - blockDataLen := uint64(len(block)) sizeWithDataHeader := sizeOfBlockHeader + blockDataLen writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) if err != nil { @@ -129,12 +135,15 @@ func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { // Read the complete block data blockData := make(BlockData, indexEntry.Size) - actualDataOffset := indexEntry.Offset + sizeOfBlockHeader - if actualDataOffset < indexEntry.Offset { - return nil, fmt.Errorf("internal error: block data offset calculation overflowed") + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) } - _, err = s.dataFile.ReadAt(blockData, int64(actualDataOffset)) + _, err = dataFile.ReadAt(blockData, int64(localOffset+sizeOfBlockHeader)) if err != nil { + if errors.Is(err, io.EOF) { + return nil, nil + } return nil, fmt.Errorf("failed to read block data from data file: %w", err) } @@ -171,12 +180,15 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { // Read only the header portion headerData := make([]byte, indexEntry.HeaderSize) - actualDataOffset := indexEntry.Offset + sizeOfBlockHeader - if actualDataOffset < indexEntry.Offset { - return nil, fmt.Errorf("internal error: block data offset calculation overflowed") + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) } - _, err = s.dataFile.ReadAt(headerData, int64(actualDataOffset)) + _, err = dataFile.ReadAt(headerData, int64(localOffset+sizeOfBlockHeader)) if err != nil { + if errors.Is(err, io.EOF) { + return nil, nil + } return nil, fmt.Errorf("failed to read block header data from data file: %w", err) } @@ -203,9 +215,16 @@ func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { bodySize := indexEntry.Size - uint64(indexEntry.HeaderSize) bodyData := make([]byte, bodySize) - bodyOffset := indexEntry.Offset + sizeOfBlockHeader + uint64(indexEntry.HeaderSize) - _, err = s.dataFile.ReadAt(bodyData, int64(bodyOffset)) + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) + } + bodyOffset := localOffset + sizeOfBlockHeader + uint64(indexEntry.HeaderSize) + _, err = dataFile.ReadAt(bodyData, int64(bodyOffset)) if err != nil { + if errors.Is(err, io.EOF) { + return nil, nil + } return nil, fmt.Errorf("failed to read block body data from data file: %w", err) } return bodyData, nil @@ -221,16 +240,21 @@ func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) return fmt.Errorf("failed to serialize block header: %w", err) } + dataFile, localOffset, err := s.getDataFileAndOffset(offset) + if err != nil { + return fmt.Errorf("failed to get data file for writing block %d: %w", bh.Height, err) + } + // Allocate combined buffer for header and block data and write it to the data file combinedBuf := make([]byte, sizeOfBlockHeader+uint64(len(block))) copy(combinedBuf, headerBytes) copy(combinedBuf[sizeOfBlockHeader:], block) - if _, err := s.dataFile.WriteAt(combinedBuf, int64(offset)); err != nil { + if _, err := dataFile.WriteAt(combinedBuf, int64(localOffset)); err != nil { return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) } if s.syncToDisk { - if err := s.dataFile.Sync(); err != nil { + if err := dataFile.Sync(); err != nil { return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) } } @@ -285,29 +309,51 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { return nil } -func (s *Database) allocateBlockSpace(sizeWithDataHeader uint64) (writeDataOffset uint64, err error) { +func (s *Database) allocateBlockSpace(totalSize uint64) (writeDataOffset uint64, err error) { maxDataFileSize := s.header.MaxDataFileSize + // Check if a single block would exceed the max data file size + if maxDataFileSize > 0 && totalSize > maxDataFileSize { + return 0, ErrBlockTooLarge + } + for { - // Check if the new offset would overflow uint64. currentOffset := s.nextDataWriteOffset.Load() - if currentOffset > math.MaxUint64-sizeWithDataHeader { + if currentOffset > math.MaxUint64-totalSize { return 0, fmt.Errorf( "adding block of size %d to offset %d would overflow uint64 data file pointer", - sizeWithDataHeader, currentOffset, + totalSize, currentOffset, ) } - newOffset := currentOffset + sizeWithDataHeader - if maxDataFileSize > 0 && newOffset > maxDataFileSize { - return 0, fmt.Errorf( - "adding block of size %d to offset %d (new offset %d) would exceed configured max data file size of %d bytes", - sizeWithDataHeader, currentOffset, newOffset, maxDataFileSize, - ) + writeOffset := currentOffset + newOffset := currentOffset + totalSize + + if maxDataFileSize > 0 { + fileIndex := int(currentOffset / maxDataFileSize) + localOffset := currentOffset % maxDataFileSize + + if localOffset+totalSize > maxDataFileSize { + writeOffset = (uint64(fileIndex) + 1) * maxDataFileSize + newOffset = writeOffset + totalSize + } } if s.nextDataWriteOffset.CompareAndSwap(currentOffset, newOffset) { - return currentOffset, nil + return writeOffset, nil } } } + +func (s *Database) getDataFileAndOffset(globalOffset uint64) (*os.File, uint64, error) { + maxFileSize := s.header.MaxDataFileSize + if maxFileSize == 0 { + handle, err := s.getOrOpenDataFile(0) + return handle, globalOffset, err + } + + fileIndex := int(globalOffset / maxFileSize) + localOffset := globalOffset % maxFileSize + handle, err := s.getOrOpenDataFile(fileIndex) + return handle, localOffset, err +} diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 9d888eceb1d3..771796a369bd 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -1,8 +1,9 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb -import ( - "fmt" -) +import "errors" // DefaultMaxDataFileSize is the default maximum size of the data block file in bytes (500GB). const DefaultMaxDataFileSize = 500 * 1024 * 1024 * 1024 @@ -31,7 +32,7 @@ func DefaultDatabaseConfig() DatabaseConfig { // Validate checks if the store options are valid. func (opts DatabaseConfig) Validate() error { if opts.CheckpointInterval == 0 { - return fmt.Errorf("CheckpointInterval cannot be 0") + return errors.New("CheckpointInterval cannot be 0") } return nil } diff --git a/x/blockdb/database.go b/x/blockdb/database.go index b74a9c02ea8c..1ff02d3ad50a 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -1,6 +1,10 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( + "errors" "fmt" "math" "os" @@ -12,8 +16,9 @@ import ( ) const ( - indexFileName = "blockdb.idx" - dataFileName = "blockdb.dat" + indexFileName = "blockdb.idx" + dataFileNameFormat = "blockdb_%d.dat" + defaultFilePermissions = 0o666 // Since 0 is a valid height, math.MaxUint64 is used to indicate unset height. // It is not be possible for block height to be max uint64 as it would overflow the index entry offset @@ -23,15 +28,17 @@ const ( // Database stores blockchain blocks on disk and provides methods to read, and write blocks. type Database struct { indexFile *os.File - dataFile *os.File + dataDir string options DatabaseConfig header indexFileHeader log logging.Logger mu sync.RWMutex closed bool + fileCache sync.Map // syncToDisk determines if fsync is called after each write for durability. syncToDisk bool + // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. maxBlockHeight atomic.Uint64 // nextDataWriteOffset tracks the next position to write new data in the data file. @@ -40,32 +47,72 @@ type Database struct { maxContiguousHeight atomic.Uint64 } -func (s *Database) openOrCreateFiles(indexDir, dataDir string, truncate bool) error { - indexPath := filepath.Join(indexDir, indexFileName) - dataPath := filepath.Join(dataDir, dataFileName) - - if err := os.MkdirAll(indexDir, 0755); err != nil { - return fmt.Errorf("failed to create index directory %s: %w", indexDir, err) +func (s *Database) listDataFiles() (map[int]string, int, error) { + files, err := os.ReadDir(s.dataDir) + if err != nil { + return nil, -1, fmt.Errorf("failed to read data directory %s: %w", s.dataDir, err) } - if err := os.MkdirAll(dataDir, 0755); err != nil { - return fmt.Errorf("failed to create data directory %s: %w", dataDir, err) + + dataFiles := make(map[int]string) + maxIndex := -1 + for _, file := range files { + if file.IsDir() { + continue + } + var index int + if n, err := fmt.Sscanf(file.Name(), dataFileNameFormat, &index); n == 1 && err == nil { + dataFiles[index] = filepath.Join(s.dataDir, file.Name()) + if index > maxIndex { + maxIndex = index + } + } } + return dataFiles, maxIndex, nil +} + +func (s *Database) openAndInitializeIndex(indexDir string, truncate bool) error { + indexPath := filepath.Join(indexDir, indexFileName) + if err := os.MkdirAll(indexDir, 0o755); err != nil { + return fmt.Errorf("failed to create index directory %s: %w", indexDir, err) + } openFlags := os.O_RDWR | os.O_CREATE if truncate { openFlags |= os.O_TRUNC } - var err error - s.indexFile, err = os.OpenFile(indexPath, openFlags, 0666) + s.indexFile, err = os.OpenFile(indexPath, openFlags, defaultFilePermissions) if err != nil { return fmt.Errorf("failed to open index file %s: %w", indexPath, err) } - s.dataFile, err = os.OpenFile(dataPath, openFlags, 0666) - if err != nil { - // Clean up partially opened resources - s.indexFile.Close() - return fmt.Errorf("failed to open data file %s: %w", dataPath, err) + return s.loadOrInitializeHeader(truncate) +} + +func (s *Database) initializeDataFiles(dataDir string, truncate bool) error { + s.dataDir = dataDir + if err := os.MkdirAll(dataDir, 0o755); err != nil { + return fmt.Errorf("failed to create data directory %s: %w", dataDir, err) + } + + if truncate { + dataFiles, _, err := s.listDataFiles() + if err != nil { + return fmt.Errorf("failed to list data files for truncation: %w", err) + } + for _, filePath := range dataFiles { + if err := os.Remove(filePath); err != nil { + return fmt.Errorf("failed to remove old data file %s: %w", filePath, err) + } + } + } + + // Pre-load the data file for the next write offset. + nextOffset := s.nextDataWriteOffset.Load() + if nextOffset > 0 { + _, _, err := s.getDataFileAndOffset(nextOffset) + if err != nil { + return fmt.Errorf("failed to pre-load data file for offset %d: %w", nextOffset, err) + } } return nil } @@ -78,7 +125,7 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { MaxDataFileSize: s.options.MaxDataFileSize, MaxHeight: unsetHeight, MaxContiguousHeight: unsetHeight, - DataFileSize: 0, + NextWriteOffset: 0, } s.maxContiguousHeight.Store(unsetHeight) s.maxBlockHeight.Store(unsetHeight) @@ -109,7 +156,7 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { if s.header.Version != IndexFileVersion { return fmt.Errorf("mismatched index file version: found %d, expected %d", s.header.Version, IndexFileVersion) } - s.nextDataWriteOffset.Store(s.header.DataFileSize) + s.nextDataWriteOffset.Store(s.header.NextWriteOffset) s.maxContiguousHeight.Store(s.header.MaxContiguousHeight) s.maxBlockHeight.Store(s.header.MaxHeight) @@ -126,7 +173,7 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { // - log: Logger instance for structured logging func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config DatabaseConfig, log logging.Logger) (*Database, error) { if indexDir == "" || dataDir == "" { - return nil, fmt.Errorf("both indexDir and dataDir must be provided") + return nil, errors.New("both indexDir and dataDir must be provided") } if err := config.Validate(); err != nil { @@ -137,13 +184,14 @@ func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config Databa options: config, syncToDisk: syncToDisk, log: log, + fileCache: sync.Map{}, } - if err := s.openOrCreateFiles(indexDir, dataDir, truncate); err != nil { + if err := s.openAndInitializeIndex(indexDir, truncate); err != nil { return nil, err } - if err := s.loadOrInitializeHeader(truncate); err != nil { + if err := s.initializeDataFiles(dataDir, truncate); err != nil { s.closeFiles() return nil, err } @@ -161,9 +209,35 @@ func (s *Database) closeFiles() { if s.indexFile != nil { s.indexFile.Close() } - if s.dataFile != nil { - s.dataFile.Close() + s.fileCache.Range(func(_, value any) bool { + file := value.(*os.File) + file.Close() + return true + }) +} + +func (s *Database) dataFilePath(index int) string { + return filepath.Join(s.dataDir, fmt.Sprintf(dataFileNameFormat, index)) +} + +func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { + if handle, ok := s.fileCache.Load(fileIndex); ok { + return handle.(*os.File), nil + } + + filePath := s.dataFilePath(fileIndex) + handle, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, defaultFilePermissions) + if err != nil { + return nil, fmt.Errorf("failed to open data file %s: %w", filePath, err) } + actual, loaded := s.fileCache.LoadOrStore(fileIndex, handle) + if loaded { + // Another goroutine created the file first, close ours + handle.Close() + return actual.(*os.File), nil + } + + return handle, nil } // MaxContiguousHeight returns the highest block height known to be contiguously stored. diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 1b0a4c00b4ad..c29efdec73f0 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -1,3 +1,6 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( @@ -6,9 +9,9 @@ import ( "path/filepath" "testing" - "github.com/ava-labs/avalanchego/utils/logging" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/utils/logging" ) func TestNew_Truncate(t *testing.T) { @@ -24,10 +27,8 @@ func TestNew_Truncate(t *testing.T) { // Write some test data and close the database testBlock := []byte("test block data") - err = db.WriteBlock(0, testBlock, 0) - require.NoError(t, err) - err = db.Close() - require.NoError(t, err) + require.NoError(t, db.WriteBlock(0, testBlock, 0)) + require.NoError(t, db.Close()) // Reopen with truncate=true and verify data is gone db2, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) @@ -53,13 +54,11 @@ func TestNew_NoTruncate(t *testing.T) { // Write some test data and close the database testBlock := []byte("test block data") - err = db.WriteBlock(1, testBlock, 5) - require.NoError(t, err) + require.NoError(t, db.WriteBlock(1, testBlock, 5)) readBlock, err := db.ReadBlock(1) require.NoError(t, err) require.Equal(t, testBlock, readBlock) - err = db.Close() - require.NoError(t, err) + require.NoError(t, db.Close()) // Reopen with truncate=false and verify data is still there db2, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) @@ -72,8 +71,7 @@ func TestNew_NoTruncate(t *testing.T) { // Verify we can write additional data testBlock2 := []byte("test block data 3") - err = db2.WriteBlock(2, testBlock2, 0) - require.NoError(t, err) + require.NoError(t, db2.WriteBlock(2, testBlock2, 0)) readBlock2, err := db2.ReadBlock(2) require.NoError(t, err) require.Equal(t, testBlock2, readBlock2) @@ -149,8 +147,7 @@ func TestNew_Params(t *testing.T) { db, err := New(tt.indexDir, tt.dataDir, tt.syncToDisk, true, tt.config, tt.log) if tt.wantErr != nil { - require.Error(t, err) - assert.Contains(t, err.Error(), tt.wantErr.Error()) + require.Equal(t, tt.wantErr.Error(), err.Error()) return } @@ -158,20 +155,16 @@ func TestNew_Params(t *testing.T) { require.NotNil(t, db) // Verify the database was created with correct configuration - assert.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) - assert.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) - assert.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) - assert.Equal(t, tt.syncToDisk, db.syncToDisk) + require.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) + require.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) + require.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) + require.Equal(t, tt.syncToDisk, db.syncToDisk) - // Verify files were created indexPath := filepath.Join(tt.indexDir, indexFileName) - dataPath := filepath.Join(tt.dataDir, dataFileName) - assert.FileExists(t, indexPath) - assert.FileExists(t, dataPath) + require.FileExists(t, indexPath) // Test that we can close the database - err = db.Close() - require.NoError(t, err) + require.NoError(t, db.Close()) }) } } @@ -188,16 +181,13 @@ func TestNew_IndexFileErrors(t *testing.T) { tempDir, _ := os.MkdirTemp("", "blockdb_test_*") indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") - os.MkdirAll(indexDir, 0755) - os.MkdirAll(dataDir, 0755) + require.NoError(t, os.MkdirAll(indexDir, 0o755)) + require.NoError(t, os.MkdirAll(dataDir, 0o755)) // Create a corrupted index file indexPath := filepath.Join(indexDir, indexFileName) corruptedData := []byte("corrupted index file data") - err := os.WriteFile(indexPath, corruptedData, 0666) - if err != nil { - return "", "" - } + require.NoError(t, os.WriteFile(indexPath, corruptedData, defaultFilePermissions)) return indexDir, dataDir }, @@ -211,8 +201,8 @@ func TestNew_IndexFileErrors(t *testing.T) { dataDir := filepath.Join(tempDir, "data") // Create directories - os.MkdirAll(indexDir, 0755) - os.MkdirAll(dataDir, 0755) + require.NoError(t, os.MkdirAll(indexDir, 0o755)) + require.NoError(t, os.MkdirAll(dataDir, 0o755)) // Create a valid index file with wrong version indexPath := filepath.Join(indexDir, indexFileName) @@ -222,17 +212,12 @@ func TestNew_IndexFileErrors(t *testing.T) { MaxDataFileSize: DefaultMaxDataFileSize, MaxHeight: unsetHeight, MaxContiguousHeight: unsetHeight, - DataFileSize: 0, + NextWriteOffset: 0, } headerBytes, err := header.MarshalBinary() - if err != nil { - return "", "" - } - err = os.WriteFile(indexPath, headerBytes, 0666) - if err != nil { - return "", "" - } + require.NoError(t, err) + require.NoError(t, os.WriteFile(indexPath, headerBytes, defaultFilePermissions)) return indexDir, dataDir }, @@ -250,17 +235,15 @@ func TestNew_IndexFileErrors(t *testing.T) { defer os.RemoveAll(filepath.Dir(dataDir)) _, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) - require.Error(t, err) - assert.Contains(t, err.Error(), tt.wantErrMsg) + require.Contains(t, err.Error(), tt.wantErrMsg) }) } } func TestIndexFileHeaderAlignment(t *testing.T) { - if sizeOfIndexFileHeader%sizeOfIndexEntry != 0 { - t.Errorf("sizeOfIndexFileHeader (%d) is not a multiple of sizeOfIndexEntry (%d)", - sizeOfIndexFileHeader, sizeOfIndexEntry) - } + require.Equal(t, uint64(0), sizeOfIndexFileHeader%sizeOfIndexEntry, + "sizeOfIndexFileHeader (%d) is not a multiple of sizeOfIndexEntry (%d)", + sizeOfIndexFileHeader, sizeOfIndexEntry) } func TestNew_IndexFileConfigPrecedence(t *testing.T) { @@ -279,13 +262,11 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { // Write a block at height 100 and close db testBlock := []byte("test block data") - err = db.WriteBlock(100, testBlock, 0) - require.NoError(t, err) + require.NoError(t, db.WriteBlock(100, testBlock, 0)) readBlock, err := db.ReadBlock(100) require.NoError(t, err) require.Equal(t, testBlock, readBlock) - err = db.Close() - require.NoError(t, err) + require.NoError(t, db.Close()) // Reopen with different config that has higher minimum height and smaller max data file size differentConfig := DatabaseConfig{ @@ -300,22 +281,19 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { // The database should still accept blocks between 100 and 200 testBlock2 := []byte("test block data 2") - err = db2.WriteBlock(150, testBlock2, 0) - require.NoError(t, err) + require.NoError(t, db2.WriteBlock(150, testBlock2, 0)) readBlock2, err := db2.ReadBlock(150) require.NoError(t, err) require.Equal(t, testBlock2, readBlock2) // Verify that writing below initial minimum height fails err = db2.WriteBlock(50, []byte("invalid block"), 0) - require.Error(t, err) - require.True(t, errors.Is(err, ErrInvalidBlockHeight)) + require.ErrorIs(t, err, ErrInvalidBlockHeight) // Write a large block that would exceed the new config's 512KB limit // but should succeed because we use the original 1MB limit from index file largeBlock := make([]byte, 768*1024) // 768KB block - err = db2.WriteBlock(200, largeBlock, 0) - require.NoError(t, err) + require.NoError(t, db2.WriteBlock(200, largeBlock, 0)) readLargeBlock, err := db2.ReadBlock(200) require.NoError(t, err) require.Equal(t, largeBlock, readLargeBlock) diff --git a/x/blockdb/datasplit_test.go b/x/blockdb/datasplit_test.go new file mode 100644 index 000000000000..1ac8b95f3257 --- /dev/null +++ b/x/blockdb/datasplit_test.go @@ -0,0 +1,101 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package blockdb + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDataSplitting(t *testing.T) { + // Each data file should have enough space for 2 blocks + config := &DatabaseConfig{ + MaxDataFileSize: 1024 * 2.5, + CheckpointInterval: 1024, + } + store, cleanup := newTestDatabase(t, false, config) + defer cleanup() + + // create 11 blocks, 1kb each + numBlocks := 11 + blocks := make([][]byte, numBlocks) + for i := range numBlocks { + blocks[i] = make([]byte, 1024) + blocks[i][0] = byte(i) + require.NoError(t, store.WriteBlock(uint64(i), blocks[i], 0)) + } + + // Verify that multiple data files were created. + files, err := os.ReadDir(store.dataDir) + require.NoError(t, err) + var dataFileCount int + for _, file := range files { + var index int + if n, err := fmt.Sscanf(file.Name(), dataFileNameFormat, &index); n == 1 && err == nil { + dataFileCount++ + } + } + + // 6 data files should be created + require.Equal(t, 6, dataFileCount) + + // Verify all blocks are readable + for i := range numBlocks { + readBlock, err := store.ReadBlock(uint64(i)) + require.NoError(t, err) + require.Equal(t, blocks[i], readBlock) + } + + // reopen and verify all blocks are readable + require.NoError(t, store.Close()) + store, err = New(filepath.Dir(store.indexFile.Name()), store.dataDir, false, false, *config, store.log) + require.NoError(t, err) + defer store.Close() + for i := range numBlocks { + readBlock, err := store.ReadBlock(uint64(i)) + require.NoError(t, err) + require.Equal(t, blocks[i], readBlock) + } +} + +func TestDataSplitting_DeletedFile(t *testing.T) { + config := &DatabaseConfig{ + MaxDataFileSize: 1024 * 2.5, + CheckpointInterval: 1024, + } + store, cleanup := newTestDatabase(t, false, config) + defer cleanup() + + // create 5 blocks, 1kb each + numBlocks := 5 + blocks := make([][]byte, numBlocks) + for i := range numBlocks { + blocks[i] = make([]byte, 1024) + blocks[i][0] = byte(i) + require.NoError(t, store.WriteBlock(uint64(i), blocks[i], 0)) + } + + // Delete the first data file (blockdb_0.dat) + firstDataFilePath := filepath.Join(store.dataDir, fmt.Sprintf(dataFileNameFormat, 0)) + require.NoError(t, os.Remove(firstDataFilePath)) + + // reopen and verify the blocks + require.NoError(t, store.Close()) + store, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, false, false, *config, store.log) + require.NoError(t, err) + defer store.Close() + for i := range numBlocks { + readBlock, err := store.ReadBlock(uint64(i)) + require.NoError(t, err) + if i < 2 { + require.Nil(t, readBlock) + } else { + require.Equal(t, blocks[i], readBlock) + } + } +} diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 75a82187bfa4..7ec789a0dd2a 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -1,12 +1,18 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb -import "fmt" +import ( + "errors" + "fmt" +) var ( - ErrInvalidBlockHeight = fmt.Errorf("blockdb: invalid block height") - ErrBlockEmpty = fmt.Errorf("blockdb: block is empty") - ErrDatabaseClosed = fmt.Errorf("blockdb: database is closed") - ErrCorrupted = fmt.Errorf("blockdb: unrecoverable corruption detected") + ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") + ErrBlockEmpty = errors.New("blockdb: block is empty") + ErrDatabaseClosed = errors.New("blockdb: database is closed") + ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") + ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) - ErrHeaderSizeTooLarge = fmt.Errorf("blockdb: header size cannot be >= block size") ) diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index 79cb4ffa2d7e..4e3083178fc8 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -1,22 +1,26 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( "crypto/rand" "math/big" "os" + "path/filepath" "testing" + "github.com/stretchr/testify/require" + "github.com/ava-labs/avalanchego/utils/logging" ) func newTestDatabase(t *testing.T, syncToDisk bool, opts *DatabaseConfig) (*Database, func()) { t.Helper() dir, err := os.MkdirTemp("", "blockdb_test_*") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - idxDir := dir + "/idx" - dataDir := dir + "/dat" + require.NoError(t, err, "failed to create temp dir") + idxDir := filepath.Join(dir, "idx") + dataDir := filepath.Join(dir, "dat") var config DatabaseConfig if opts != nil { config = *opts @@ -26,7 +30,7 @@ func newTestDatabase(t *testing.T, syncToDisk bool, opts *DatabaseConfig) (*Data db, err := New(idxDir, dataDir, syncToDisk, true, config, logging.NoLog{}) if err != nil { os.RemoveAll(dir) - t.Fatalf("failed to create database: %v", err) + require.NoError(t, err, "failed to create database") } cleanup := func() { db.Close() @@ -38,27 +42,19 @@ func newTestDatabase(t *testing.T, syncToDisk bool, opts *DatabaseConfig) (*Data // randomBlock generates a random block of size 1KB-50KB. func randomBlock(t *testing.T) []byte { size, err := rand.Int(rand.Reader, big.NewInt(50*1024-1024+1)) - if err != nil { - t.Fatalf("failed to generate random size: %v", err) - } + require.NoError(t, err, "failed to generate random size") blockSize := int(size.Int64()) + 1024 // 1KB to 50KB b := make([]byte, blockSize) _, err = rand.Read(b) - if err != nil { - t.Fatalf("failed to fill random block: %v", err) - } + require.NoError(t, err, "failed to fill random block") return b } func checkDatabaseState(t *testing.T, db *Database, maxHeight uint64, maxContiguousHeight uint64) { - if got := db.maxBlockHeight.Load(); got != maxHeight { - t.Fatalf("maxBlockHeight: got %d, want %d", got, maxHeight) - } + require.Equal(t, maxHeight, db.maxBlockHeight.Load(), "maxBlockHeight mismatch") gotMCH, ok := db.MaxContiguousHeight() - if maxContiguousHeight != unsetHeight && !ok { - t.Fatalf("MaxContiguousHeight is not set, want %d", maxContiguousHeight) - } - if ok && gotMCH != maxContiguousHeight { - t.Fatalf("maxContiguousHeight: got %d, want %d", gotMCH, maxContiguousHeight) + if maxContiguousHeight != unsetHeight { + require.True(t, ok, "MaxContiguousHeight is not set, want %d", maxContiguousHeight) + require.Equal(t, maxContiguousHeight, gotMCH, "maxContiguousHeight mismatch") } } diff --git a/x/blockdb/index.go b/x/blockdb/index.go index 3bf6a108b76a..d52960c02f6a 100644 --- a/x/blockdb/index.go +++ b/x/blockdb/index.go @@ -1,3 +1,6 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( @@ -14,8 +17,8 @@ const ( ) var ( - _ encoding.BinaryMarshaler = indexEntry{} - _ encoding.BinaryUnmarshaler = &indexEntry{} + _ encoding.BinaryMarshaler = (*indexEntry)(nil) + _ encoding.BinaryUnmarshaler = (*indexEntry)(nil) sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) @@ -63,7 +66,7 @@ type indexFileHeader struct { MaxHeight BlockHeight MinHeight BlockHeight MaxContiguousHeight BlockHeight - DataFileSize uint64 + NextWriteOffset uint64 // reserve 24 bytes for future use Reserved [24]byte } @@ -76,7 +79,7 @@ func (h indexFileHeader) MarshalBinary() ([]byte, error) { binary.LittleEndian.PutUint64(buf[16:], h.MaxHeight) binary.LittleEndian.PutUint64(buf[24:], h.MinHeight) binary.LittleEndian.PutUint64(buf[32:], h.MaxContiguousHeight) - binary.LittleEndian.PutUint64(buf[40:], h.DataFileSize) + binary.LittleEndian.PutUint64(buf[40:], h.NextWriteOffset) return buf, nil } @@ -93,7 +96,7 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { h.MaxHeight = binary.LittleEndian.Uint64(data[16:]) h.MinHeight = binary.LittleEndian.Uint64(data[24:]) h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[32:]) - h.DataFileSize = binary.LittleEndian.Uint64(data[40:]) + h.NextWriteOffset = binary.LittleEndian.Uint64(data[40:]) return nil } @@ -115,6 +118,10 @@ func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { var entry indexEntry + if height > s.maxBlockHeight.Load() { + return entry, nil + } + offset, err := s.indexEntryOffset(height) if err != nil { return entry, err @@ -164,7 +171,7 @@ func (s *Database) persistIndexHeader() error { } header := s.header - header.DataFileSize = s.nextDataWriteOffset.Load() + header.NextWriteOffset = s.nextDataWriteOffset.Load() header.MaxContiguousHeight = s.maxContiguousHeight.Load() header.MaxHeight = s.maxBlockHeight.Load() headerBytes, err := header.MarshalBinary() diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index f22e9d345d13..f13092b1fdbb 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -1,13 +1,14 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( - "errors" "math" "sync" "sync/atomic" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -96,8 +97,7 @@ func TestReadOperations(t *testing.T) { } block := randomBlock(t) - err := store.WriteBlock(i, block, uint16(i-minHeight)) - require.NoError(t, err) + require.NoError(t, store.WriteBlock(i, block, uint16(i-minHeight))) seededBlocks[i] = block } } @@ -105,18 +105,21 @@ func TestReadOperations(t *testing.T) { if tt.setup != nil { tt.setup(store) } - readBlock, err := store.ReadBlock(tt.readHeight) - readHeader, err := store.ReadHeader(tt.readHeight) - readBody, err := store.ReadBody(tt.readHeight) if tt.wantErr != nil { - require.Error(t, err) - require.True(t, errors.Is(err, tt.wantErr)) + _, err := store.ReadBlock(tt.readHeight) + require.ErrorIs(t, err, tt.wantErr) return } - // Handle success cases + readBlock, err := store.ReadBlock(tt.readHeight) + require.NoError(t, err) + readHeader, err := store.ReadHeader(tt.readHeight) + require.NoError(t, err) + readBody, err := store.ReadBody(tt.readHeight) require.NoError(t, err) + + // Handle success cases if tt.noBlock { require.Nil(t, readBlock) require.Nil(t, readHeader) @@ -129,9 +132,9 @@ func TestReadOperations(t *testing.T) { if headerSize > 0 { expectHeader = expectedBlock[:headerSize] } - assert.Equal(t, expectedBlock, readBlock) - assert.Equal(t, expectHeader, readHeader) - assert.Equal(t, expectedBlock[headerSize:], readBody) + require.Equal(t, expectedBlock, readBlock) + require.Equal(t, expectHeader, readHeader) + require.Equal(t, expectedBlock[headerSize:], readBody) } }) } @@ -161,8 +164,7 @@ func TestReadOperations_Concurrency(t *testing.T) { headerSizes[i] = uint16(len(blocks[i])) / 2 } - err := store.WriteBlock(uint64(i), blocks[i], headerSizes[i]) - require.NoError(t, err) + require.NoError(t, store.WriteBlock(uint64(i), blocks[i], headerSizes[i])) } var wg sync.WaitGroup @@ -174,20 +176,15 @@ func TestReadOperations_Concurrency(t *testing.T) { defer wg.Done() block, err := store.ReadBlock(uint64(height)) if err != nil { - t.Errorf("ReadBlock failed for height %d: %v", height, err) errors.Add(1) return } if gapHeights[uint64(height)] || height >= numBlocks { if block != nil { - t.Errorf("Expected nil block for height %d", height) errors.Add(1) } } else { - if !assert.Equal(t, blocks[height], block) { - t.Errorf("ReadBlock data mismatch at height %d", height) - errors.Add(1) - } + require.Equal(t, blocks[height], block) } }(i) @@ -195,13 +192,11 @@ func TestReadOperations_Concurrency(t *testing.T) { defer wg.Done() header, err := store.ReadHeader(uint64(height)) if err != nil { - t.Errorf("ReadHeader failed for height %d: %v", height, err) errors.Add(1) return } if gapHeights[uint64(height)] || height >= numBlocks { if header != nil { - t.Errorf("Expected nil header for height %d", height) errors.Add(1) } } else { @@ -209,10 +204,7 @@ func TestReadOperations_Concurrency(t *testing.T) { if headerSizes[height] == 0 { expectedHeader = nil } - if !assert.Equal(t, expectedHeader, header) { - t.Errorf("ReadHeader data mismatch at height %d", height) - errors.Add(1) - } + require.Equal(t, expectedHeader, header) } }(i) @@ -220,21 +212,16 @@ func TestReadOperations_Concurrency(t *testing.T) { defer wg.Done() body, err := store.ReadBody(uint64(height)) if err != nil { - t.Errorf("ReadBody failed for height %d: %v", height, err) errors.Add(1) return } if gapHeights[uint64(height)] || height >= numBlocks { if body != nil { - t.Errorf("Expected nil body for height %d", height) errors.Add(1) } } else { expectedBody := blocks[height][headerSizes[height]:] - if !assert.Equal(t, expectedBody, body) { - t.Errorf("ReadBody data mismatch at height %d", height) - errors.Add(1) - } + require.Equal(t, expectedBody, body) } }(i) } diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go index 43d2414cec87..11ca34c1b795 100644 --- a/x/blockdb/recovery.go +++ b/x/blockdb/recovery.go @@ -1,7 +1,11 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( "fmt" + "os" "go.uber.org/zap" ) @@ -10,74 +14,98 @@ import ( // for blocks that may not be correctly indexed, usually after an unclean shutdown. // It reconciles the data file with the index file header and entries. func (s *Database) recover() error { - dataFileInfo, err := s.dataFile.Stat() + dataFiles, maxIndex, err := s.listDataFiles() if err != nil { - return fmt.Errorf("failed to get data file stats for recovery: %w", err) + return fmt.Errorf("failed to list data files for recovery: %w", err) } - dataFileActualSize := uint64(dataFileInfo.Size()) - nextDataWriteOffset := s.nextDataWriteOffset.Load() - // If the data file size matches the size recorded in the index header, then no recovery is needed. - if dataFileActualSize == nextDataWriteOffset { + if len(dataFiles) == 0 { return nil } - // If the data file is smaller than the index header indicates, this is a fatal inconsistency. - // The index file claims more data than actually exists, which cannot be recovered automatically. - if dataFileActualSize < nextDataWriteOffset { - return fmt.Errorf("%w: data file is smaller than index header claims (data file: %d bytes, index header: %d bytes) -- possible corruption or incomplete flush", - ErrCorrupted, dataFileActualSize, nextDataWriteOffset) + // Calculate the expected next write offset based on the data files on disk. + var calculatedNextDataWriteOffset uint64 + if s.header.MaxDataFileSize > 0 { + // All data files before the last one are full. + fullFilesCount := maxIndex + calculatedNextDataWriteOffset += uint64(fullFilesCount) * s.header.MaxDataFileSize + + lastFileInfo, err := os.Stat(dataFiles[maxIndex]) + if err != nil { + return fmt.Errorf("failed to get stats for last data file %s: %w", dataFiles[maxIndex], err) + } + calculatedNextDataWriteOffset += uint64(lastFileInfo.Size()) + } else { + lastFileInfo, err := os.Stat(dataFiles[0]) + if err != nil { + return fmt.Errorf("failed to get stats for data file %s: %w", dataFiles[0], err) + } + calculatedNextDataWriteOffset = uint64(lastFileInfo.Size()) } - // Data file is larger than the index header indicates. - s.log.Info("Data file larger than indexed size; recovering blocks", - zap.Uint64("dataFileSize", dataFileActualSize), - zap.Uint64("indexedSize", nextDataWriteOffset), - ) + nextDataWriteOffset := s.nextDataWriteOffset.Load() - // Start scan from where the index left off. - currentScanOffset := nextDataWriteOffset - var recoveredBlocksCount int = 0 - var maxRecoveredHeightSeen uint64 = s.maxBlockHeight.Load() - for currentScanOffset < dataFileActualSize { - bh, err := s.recoverBlockAtOffset(currentScanOffset, dataFileActualSize) - if err != nil { - s.log.Error("Recovery: scan stopped due to invalid block data", + switch { + case calculatedNextDataWriteOffset == nextDataWriteOffset: + s.log.Debug("Recovery: data files match index header, no recovery needed.") + return nil + + case calculatedNextDataWriteOffset < nextDataWriteOffset: + return fmt.Errorf("%w: calculated next write offset is smaller than index header claims "+ + "(calculated: %d bytes, index header: %d bytes)", + ErrCorrupted, calculatedNextDataWriteOffset, nextDataWriteOffset) + default: + // The data on disk is ahead of the index. We need to recover un-indexed blocks. + s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", + zap.Uint64("headerNextWriteOffset", nextDataWriteOffset), + zap.Uint64("calculatedNextWriteOffset", calculatedNextDataWriteOffset), + ) + + // Start scan from where the index left off. + currentScanOffset := nextDataWriteOffset + recoveredBlocksCount := 0 + maxRecoveredHeightSeen := s.maxBlockHeight.Load() + + totalDataFileSize := calculatedNextDataWriteOffset + for currentScanOffset < totalDataFileSize { + bh, err := s.recoverBlockAtOffset(currentScanOffset, totalDataFileSize) + if err != nil { + s.log.Error("Recovery: scan stopped due to invalid block data", + zap.Uint64("offset", currentScanOffset), + zap.Error(err), + ) + break + } + s.log.Debug("Recovery: Successfully validated and indexed block", + zap.Uint64("height", bh.Height), + zap.Uint64("size", bh.Size), zap.Uint64("offset", currentScanOffset), - zap.Error(err), ) - break + recoveredBlocksCount++ + if bh.Height > maxRecoveredHeightSeen || maxRecoveredHeightSeen == unsetHeight { + maxRecoveredHeightSeen = bh.Height + } + currentScanOffset += sizeOfBlockHeader + bh.Size } - s.log.Debug("Recovery: Successfully validated and indexed block", - zap.Uint64("height", bh.Height), - zap.Uint64("size", bh.Size), - zap.Uint64("offset", currentScanOffset), - ) - recoveredBlocksCount++ - if bh.Height > maxRecoveredHeightSeen || maxRecoveredHeightSeen == unsetHeight { - maxRecoveredHeightSeen = bh.Height + s.nextDataWriteOffset.Store(currentScanOffset) + s.maxBlockHeight.Store(maxRecoveredHeightSeen) + + // Recalculate MCH if we recovered any blocks + if recoveredBlocksCount > 0 { + s.updateMaxContiguousHeightOnRecovery() } - currentScanOffset += sizeOfBlockHeader + bh.Size - } - s.nextDataWriteOffset.Store(currentScanOffset) - s.maxBlockHeight.Store(maxRecoveredHeightSeen) - // Recalculate MCH if we recovered any blocks - if recoveredBlocksCount > 0 { - s.updateMaxContiguousHeightOnRecovery() - } + if err := s.persistIndexHeader(); err != nil { + return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) + } - if err := s.persistIndexHeader(); err != nil { - return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) + s.log.Info("Recovery: Scan finished", + zap.Int("recoveredBlocks", recoveredBlocksCount), + zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), + zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), + zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), + ) } - - s.log.Info("Recovery: Scan finished", - zap.Int("recoveredBlocks", recoveredBlocksCount), - zap.Uint64("dataFileSize", nextDataWriteOffset), - zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), - zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), - ) - return nil } @@ -86,8 +114,13 @@ func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (bloc if dataFileActualSize-offset < sizeOfBlockHeader { return bh, fmt.Errorf("not enough data for block header at offset %d", offset) } + + dataFile, localOffset, err := s.getDataFileAndOffset(offset) + if err != nil { + return bh, fmt.Errorf("recovery: failed to get data file for offset %d: %w", offset, err) + } bhBuf := make([]byte, sizeOfBlockHeader) - if _, err := s.dataFile.ReadAt(bhBuf, int64(offset)); err != nil { + if _, err := dataFile.ReadAt(bhBuf, int64(localOffset)); err != nil { return bh, fmt.Errorf("error reading block header at offset %d: %w", offset, err) } if err := bh.UnmarshalBinary(bhBuf); err != nil { @@ -110,7 +143,7 @@ func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (bloc return bh, fmt.Errorf("block data out of bounds at offset %d", offset) } blockData := make([]byte, bh.Size) - if _, err := s.dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)); err != nil { + if _, err := dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)); err != nil { return bh, fmt.Errorf("failed to read block data at offset %d: %w", offset, err) } calculatedChecksum := calculateChecksum(blockData) diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index df462e759ecd..f4a291d627e5 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -1,3 +1,6 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + package blockdb import ( @@ -7,7 +10,6 @@ import ( "sync/atomic" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -166,7 +168,7 @@ func TestWriteBlock_HeightsVerification(t *testing.T) { for h, expectedBlock := range blocksWritten { readBlock, err := store.ReadBlock(h) require.NoError(t, err, "ReadBlock failed at height %d", h) - assert.Equal(t, expectedBlock, readBlock, "data integrity check failed at height %d", h) + require.Equal(t, expectedBlock, readBlock) // Test header/body separation if header size was specified if tt.headerSizes != nil { @@ -178,13 +180,13 @@ func TestWriteBlock_HeightsVerification(t *testing.T) { require.NoError(t, err, "ReadBody failed at height %d", h) if headerSize == 0 { - assert.Nil(t, header, "header should be nil for headerSize=0") - assert.Equal(t, expectedBlock, body, "body should equal full block when headerSize=0") + require.Nil(t, header) + require.Equal(t, expectedBlock, body) } else { expectedHeader := expectedBlock[:headerSize] expectedBody := expectedBlock[headerSize:] - assert.Equal(t, expectedHeader, header, "header mismatch at height %d", h) - assert.Equal(t, expectedBody, body, "body mismatch at height %d", h) + require.Equal(t, expectedHeader, header, "header mismatch at height %d", h) + require.Equal(t, expectedBody, body, "body mismatch at height %d", h) } } } @@ -221,7 +223,7 @@ func TestWriteBlock_Concurrency(t *testing.T) { err := store.WriteBlock(height, block, 1) if err != nil { - t.Errorf("WriteBlock failed for iteration %d (height %d): %v", i, height, err) + require.NoError(t, err, "WriteBlock failed for iteration %d (height %d)", i, height) errors.Add(1) } }(i) @@ -329,7 +331,7 @@ func TestWriteBlock_Errors(t *testing.T) { CheckpointInterval: 1024, }, headerSize: 0, - wantErr: errors.New("exceed configured max data file size"), + wantErr: ErrBlockTooLarge, }, { name: "data file offset overflow", @@ -359,12 +361,7 @@ func TestWriteBlock_Errors(t *testing.T) { } err := store.WriteBlock(tt.height, tt.block, tt.headerSize) - require.Error(t, err) - require.NotNil(t, tt.wantErr, "test case must specify expected error") - - if !errors.Is(err, tt.wantErr) { - require.Contains(t, err.Error(), tt.wantErr.Error()) - } + require.Contains(t, err.Error(), tt.wantErr.Error()) checkDatabaseState(t, store, unsetHeight, unsetHeight) }) } From 99d0ee2f223d0af81a59d9b5a9dbdf5ec0566300 Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 23 Jun 2025 18:57:57 -0400 Subject: [PATCH 07/27] fix: close db before deleting the file --- x/blockdb/datasplit_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x/blockdb/datasplit_test.go b/x/blockdb/datasplit_test.go index 1ac8b95f3257..a12e7df92e65 100644 --- a/x/blockdb/datasplit_test.go +++ b/x/blockdb/datasplit_test.go @@ -79,6 +79,7 @@ func TestDataSplitting_DeletedFile(t *testing.T) { blocks[i][0] = byte(i) require.NoError(t, store.WriteBlock(uint64(i), blocks[i], 0)) } + store.Close() // Delete the first data file (blockdb_0.dat) firstDataFilePath := filepath.Join(store.dataDir, fmt.Sprintf(dataFileNameFormat, 0)) From 26ccb7062c436f8670a9c152e1ff284ebcc0c55b Mon Sep 17 00:00:00 2001 From: Draco Date: Thu, 26 Jun 2025 12:56:10 -0400 Subject: [PATCH 08/27] fix: recovery issues with data files splitting & feedback --- x/blockdb/README.md | 98 ++++++---- x/blockdb/block.go | 149 ++++++++++----- x/blockdb/config.go | 46 ++++- x/blockdb/database.go | 46 +++-- x/blockdb/database_test.go | 58 +++--- x/blockdb/datasplit_test.go | 18 +- x/blockdb/errors.go | 13 +- x/blockdb/helpers_test.go | 11 +- x/blockdb/index.go | 48 ++--- x/blockdb/readblock_test.go | 16 +- x/blockdb/recovery.go | 139 +++++++------- x/blockdb/recovery_test.go | 348 +++++++++++++++++++++++++++++++++++ x/blockdb/writeblock_test.go | 101 +++++----- 13 files changed, 780 insertions(+), 311 deletions(-) create mode 100644 x/blockdb/recovery_test.go diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 1fc70c9e0059..113fa0ff570a 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -12,7 +12,7 @@ BlockDB is a specialized database optimized for blockchain blocks. ## Design -BlockDB uses two file types: index files and data files. The index file maps block heights to locations in data files, while data files store the actual block content. Data storage can be split across multiple files based on the maximum data file size. +BlockDB uses a single index file and multiple data files. The index file maps block heights to locations in the data files, while data files store the actual block content. Data storage can be split across multiple data files based on the maximum data file size. ``` ┌─────────────────┐ ┌─────────────────┐ @@ -24,22 +24,20 @@ BlockDB uses two file types: index files and data files. The index file maps blo │ - Min Height │ │ │ - Data │ │ - Max Height │ │ ├─────────────────┤ │ - Data Size │ │ │ Block 1 │ -│ - ... │ │ │ - Header │ -├─────────────────┤ │ ┌──>│ - Data │ +│ - ... │ │ ┌──>│ - Header │ +├─────────────────┤ │ │ │ - Data │ │ Entry[0] │ │ │ ├─────────────────┤ │ - Offset ───────┼──┘ │ │ ... │ │ - Size │ │ └─────────────────┘ │ - Header Size │ │ -├─────────────────┤ │ ┌─────────────────┐ -│ Entry[1] │ │ │ Data File 2 │ -│ - Offset ───────┼─────┘ │ (.dat) │ -│ - Size │ ├─────────────────┤ -│ - Header Size │ │ Block N │ -├─────────────────┤ │ - Header │ -│ ... │ │ - Data │ -└─────────────────┘ ├─────────────────┤ - │ ... │ - └─────────────────┘ +├─────────────────┤ │ +│ Entry[1] │ │ +│ - Offset ───────┼─────┘ +│ - Size │ +│ - Header Size │ +├─────────────────┤ +│ ... │ +└─────────────────┘ ``` ### File Formats @@ -49,26 +47,26 @@ BlockDB uses two file types: index files and data files. The index file maps blo The index file consists of a fixed-size header followed by fixed-size entries: ``` -Index File Header (72 bytes): +Index File Header (80 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Version │ 8 bytes │ │ Max Data File Size │ 8 bytes │ -│ Max Block Height │ 8 bytes │ │ Min Block Height │ 8 bytes │ │ Max Contiguous Height │ 8 bytes │ -│ Data File Size │ 8 bytes │ -│ Reserved │ 24 bytes│ +│ Max Block Height │ 8 bytes │ +│ Next Write Offset │ 8 bytes │ +│ Reserved │ 32 bytes│ └────────────────────────────────┴─────────┘ -Index Entry (18 bytes): +Index Entry (16 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Data File Offset │ 8 bytes │ -│ Block Data Size │ 8 bytes │ -│ Header Size │ 2 bytes │ +│ Block Data Size │ 4 bytes │ +│ Header Size │ 4 bytes │ └────────────────────────────────┴─────────┘ ``` @@ -77,14 +75,14 @@ Index Entry (18 bytes): Each block in the data file is stored with a header followed by the raw block data: ``` -Block Header (26 bytes): +Block Header (24 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Height │ 8 bytes │ -│ Size │ 8 bytes │ -│ Header Size │ 2 bytes │ │ Checksum │ 8 bytes │ +│ Size │ 4 bytes │ +│ Header Size │ 4 bytes │ └────────────────────────────────┴─────────┘ ``` @@ -94,27 +92,34 @@ BlockDB allows overwriting blocks at existing heights. When a block is overwritt ### Fixed-Size Index Entries -Each index entry is exactly 18 bytes on disk, containing the offset, size, and header size. This fixed size enables direct calculation of where each block's index entry is located, providing O(1) lookups. For blockchains with high block heights, the index remains efficient, even at height 1 billion, the index file would only be ~18GB. +Each index entry is exactly 16 bytes on disk, containing the offset, size, and header size. This fixed size enables direct calculation of where each block's index entry is located, providing O(1) lookups. For blockchains with high block heights, the index remains efficient, even at height 1 billion, the index file would only be ~16GB. ### Durability and Fsync Behavior BlockDB provides configurable durability through the `syncToDisk` parameter: -- When enabled, the data file is fsync'd after every block write, guaranteeing immediate durability -- The index file is fsync'd periodically (every `CheckpointInterval` blocks) to balance performance and recovery time -- When disabled, writes rely on OS buffering, trading durability for significantly better performance +**Data File Behavior:** + +- **When `syncToDisk=true`**: The data file is fsync'd after every block write, guaranteeing durability against both process failures and kernel/machine failures. +- **When `syncToDisk=false`**: Data file writes are buffered, providing durability against process failures but not against kernel or machine failures. + +**Index File Behavior:** + +- **When `syncToDisk=true`**: The index file is fsync'd every `CheckpointInterval` blocks (when the header is written). +- **When `syncToDisk=false`**: The index file relies on OS buffering and is not explicitly fsync'd. ### Recovery Mechanism -On startup, BlockDB checks for signs of an unclean shutdown. If detected, it performs recovery: +On startup, BlockDB checks for signs of an unclean shutdown by comparing the data file size on disk with the indexed data size stored in the index file header. If the data files are larger than what the index claims, it indicates that blocks were written but the index wasn't properly updated before shutdown. -1. Compares the data file size with the indexed data size (stored in the index header) -2. If the data file is larger, it starts scanning from where the index left off -3. For each unindexed block found: +**Recovery Process:** + +1. Starts scanning from where the index left off (`NextWriteOffset`) +2. For each unindexed block found: - Validates the block header and checksum - Writes the corresponding index entry -4. Updates the max contiguous height and max block height -5. Persists the updated index header +3. Updates the max contiguous height and max block height +4. Persists the updated index header ## Usage @@ -123,12 +128,10 @@ On startup, BlockDB checks for signs of an unclean shutdown. If detected, it per ```go import "github.com/ava-labs/avalanchego/x/blockdb" -config := blockdb.DefaultDatabaseOptions() +config := blockdb.DefaultDatabaseConfig() db, err := blockdb.New( "/path/to/index", // Index directory "/path/to/data", // Data directory - true, // Sync to disk - false, // Don't truncate existing data config, logger, ) @@ -144,19 +147,36 @@ defer db.Close() ```go // Write a block with header size height := uint64(100) -blockData := []byte("block data...") -headerSize := uint16(500) // First 500 bytes are the header +blockData := []byte("header:block data") +headerSize := uint32(7) // First 7 bytes are the header err := db.WriteBlock(height, blockData, headerSize) +if err != nil { + fmt.Println("Error writing block:", err) + return +} -// Read a complete block +// Read a block blockData, err := db.ReadBlock(height) +if err != nil { + fmt.Println("Error reading block:", err) + return +} if blockData == nil { // Block doesn't exist at this height + return } // Read block components separately headerData, err := db.ReadHeader(height) +if err != nil { + fmt.Println("Error reading header:", err) + return +} bodyData, err := db.ReadBody(height) +if err != nil { + fmt.Println("Error reading body:", err) + return +} ``` ## TODO diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 7c008c4363d8..38d39c5c5486 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -9,18 +9,19 @@ import ( "errors" "fmt" "io" - "math" "os" "github.com/cespare/xxhash/v2" "go.uber.org/zap" + + safemath "github.com/ava-labs/avalanchego/utils/math" ) var ( _ encoding.BinaryMarshaler = (*blockHeader)(nil) _ encoding.BinaryUnmarshaler = (*blockHeader)(nil) - sizeOfBlockHeader = uint64(binary.Size(blockHeader{})) + sizeOfBlockHeader = uint32(binary.Size(blockHeader{})) ) // BlockHeight defines the type for block heights. @@ -30,7 +31,7 @@ type BlockHeight = uint64 type BlockData = []byte // BlockHeaderSize is the size of the header in the block data. -type BlockHeaderSize = uint16 +type BlockHeaderSize = uint32 // MaxBlockDataSize is the maximum size of a block in bytes (16 MB). const MaxBlockDataSize = 1 << 24 @@ -38,18 +39,18 @@ const MaxBlockDataSize = 1 << 24 // blockHeader is prepended to each block in the data file. type blockHeader struct { Height BlockHeight - Size uint64 - HeaderSize BlockHeaderSize Checksum uint64 + Size uint32 + HeaderSize BlockHeaderSize } // MarshalBinary implements the encoding.BinaryMarshaler interface. func (bh blockHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfBlockHeader) binary.LittleEndian.PutUint64(buf[0:], bh.Height) - binary.LittleEndian.PutUint64(buf[8:], bh.Size) - binary.LittleEndian.PutUint16(buf[16:], bh.HeaderSize) - binary.LittleEndian.PutUint64(buf[18:], bh.Checksum) + binary.LittleEndian.PutUint64(buf[8:], bh.Checksum) + binary.LittleEndian.PutUint32(buf[16:], bh.Size) + binary.LittleEndian.PutUint32(buf[20:], bh.HeaderSize) return buf, nil } @@ -59,22 +60,22 @@ func (bh *blockHeader) UnmarshalBinary(data []byte) error { return fmt.Errorf("incorrect data length to unmarshal blockHeader: got %d bytes, need exactly %d", len(data), sizeOfBlockHeader) } bh.Height = binary.LittleEndian.Uint64(data[0:]) - bh.Size = binary.LittleEndian.Uint64(data[8:]) - bh.HeaderSize = binary.LittleEndian.Uint16(data[16:]) - bh.Checksum = binary.LittleEndian.Uint64(data[18:]) + bh.Checksum = binary.LittleEndian.Uint64(data[8:]) + bh.Size = binary.LittleEndian.Uint32(data[16:]) + bh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) return nil } // WriteBlock inserts a block into the store at the given height with the specified header size. func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize BlockHeaderSize) error { - s.mu.RLock() - defer s.mu.RUnlock() + s.closeMu.RLock() + defer s.closeMu.RUnlock() if s.closed { return ErrDatabaseClosed } - blockDataLen := uint64(len(block)) + blockDataLen := uint32(len(block)) if blockDataLen == 0 { return ErrBlockEmpty } @@ -83,7 +84,7 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return ErrBlockTooLarge } - if uint64(headerSize) >= uint64(len(block)) { + if headerSize >= blockDataLen { return ErrHeaderSizeTooLarge } @@ -92,7 +93,10 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return err } - sizeWithDataHeader := sizeOfBlockHeader + blockDataLen + sizeWithDataHeader, err := safemath.Add(sizeOfBlockHeader, blockDataLen) + if err != nil { + return fmt.Errorf("calculating total block size would overflow for block at height %d: %w", height, err) + } writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) if err != nil { return err @@ -118,8 +122,8 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl // ReadBlock retrieves a block by its height. // Returns nil if the block is not found. func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { - s.mu.RLock() - defer s.mu.RUnlock() + s.closeMu.RLock() + defer s.closeMu.RUnlock() if s.closed { return nil, ErrDatabaseClosed @@ -139,7 +143,7 @@ func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { if err != nil { return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) } - _, err = dataFile.ReadAt(blockData, int64(localOffset+sizeOfBlockHeader)) + _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockHeader))) if err != nil { if errors.Is(err, io.EOF) { return nil, nil @@ -153,8 +157,8 @@ func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { // ReadHeader retrieves only the header portion of a block by its height. // Returns nil if the block is not found or no header. func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { - s.mu.RLock() - defer s.mu.RUnlock() + s.closeMu.RLock() + defer s.closeMu.RUnlock() if s.closed { return nil, ErrDatabaseClosed @@ -174,7 +178,7 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { } // Validate header size doesn't exceed total block size - if uint64(indexEntry.HeaderSize) > indexEntry.Size { + if indexEntry.HeaderSize > indexEntry.Size { return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) } @@ -184,7 +188,7 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { if err != nil { return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) } - _, err = dataFile.ReadAt(headerData, int64(localOffset+sizeOfBlockHeader)) + _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockHeader))) if err != nil { if errors.Is(err, io.EOF) { return nil, nil @@ -198,8 +202,8 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { // ReadBody retrieves only the body portion (excluding header) of a block by its height. // Returns nil if the block is not found. func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { - s.mu.RLock() - defer s.mu.RUnlock() + s.closeMu.RLock() + defer s.closeMu.RUnlock() if s.closed { return nil, ErrDatabaseClosed @@ -213,13 +217,21 @@ func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { return nil, nil } - bodySize := indexEntry.Size - uint64(indexEntry.HeaderSize) + bodySize := indexEntry.Size - indexEntry.HeaderSize bodyData := make([]byte, bodySize) dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) if err != nil { return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) } - bodyOffset := localOffset + sizeOfBlockHeader + uint64(indexEntry.HeaderSize) + headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + if err != nil { + return nil, fmt.Errorf("calculating header offset would overflow for block at height %d: %w", height, err) + } + bodyOffset, err := safemath.Add(headerOffset, uint64(indexEntry.HeaderSize)) + if err != nil { + return nil, fmt.Errorf("calculating body offset would overflow for block at height %d: %w", height, err) + } + _, err = dataFile.ReadAt(bodyData, int64(bodyOffset)) if err != nil { if errors.Is(err, io.EOF) { @@ -246,14 +258,18 @@ func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) } // Allocate combined buffer for header and block data and write it to the data file - combinedBuf := make([]byte, sizeOfBlockHeader+uint64(len(block))) + combinedBufSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(len(block))) + if err != nil { + return fmt.Errorf("calculating combined buffer size would overflow for block %d: %w", bh.Height, err) + } + combinedBuf := make([]byte, combinedBufSize) copy(combinedBuf, headerBytes) copy(combinedBuf[sizeOfBlockHeader:], block) if _, err := dataFile.WriteAt(combinedBuf, int64(localOffset)); err != nil { return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) } - if s.syncToDisk { + if s.options.SyncToDisk { if err := dataFile.Sync(); err != nil { return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) } @@ -270,7 +286,11 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { currentMax := writtenBlockHeight for { - nextHeightToVerify := currentMax + 1 + nextHeightToVerify, err := safemath.Add(currentMax, 1) + if err != nil { + s.log.Error("overflow in height calculation when updating max contiguous height") + break + } entry, err := s.readIndexEntry(nextHeightToVerify) if err != nil { s.log.Error( @@ -309,38 +329,79 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { return nil } -func (s *Database) allocateBlockSpace(totalSize uint64) (writeDataOffset uint64, err error) { +// allocateBlockSpace reserves space for a block and returns the data file offset where it should be written. +// +// This function atomically reserves space by updating the nextWriteOffset and handles +// file splitting by advancing the nextWriteOffset when a data file would be exceeded. +// +// Parameters: +// - totalSize: The total size in bytes needed for the block +// +// Returns: +// - writeDataOffset: The data file offset where the block should be written +// - err: Error if allocation fails (e.g., block too large, overflow, etc.) +func (s *Database) allocateBlockSpace(totalSize uint32) (writeDataOffset uint64, err error) { maxDataFileSize := s.header.MaxDataFileSize // Check if a single block would exceed the max data file size - if maxDataFileSize > 0 && totalSize > maxDataFileSize { + if maxDataFileSize > 0 && uint64(totalSize) > maxDataFileSize { return 0, ErrBlockTooLarge } for { currentOffset := s.nextDataWriteOffset.Load() - if currentOffset > math.MaxUint64-totalSize { + + // Calculate where this block would end if written at current offset + blockEndOffset, err := safemath.Add(currentOffset, uint64(totalSize)) + if err != nil { return 0, fmt.Errorf( - "adding block of size %d to offset %d would overflow uint64 data file pointer", - totalSize, currentOffset, + "adding block of size %d to offset %d would overflow uint64 data file pointer: %w", + totalSize, currentOffset, err, ) } - writeOffset := currentOffset - newOffset := currentOffset + totalSize + // Determine the actual write offset for this block, taking into account + // data file splitting when max data file size is reached. + actualWriteOffset := currentOffset + actualBlockEndOffset := blockEndOffset + // If we have a max file size, check if we need to start a new file if maxDataFileSize > 0 { - fileIndex := int(currentOffset / maxDataFileSize) - localOffset := currentOffset % maxDataFileSize + currentFileIndex := int(currentOffset / maxDataFileSize) + offsetWithinCurrentFile := currentOffset % maxDataFileSize - if localOffset+totalSize > maxDataFileSize { - writeOffset = (uint64(fileIndex) + 1) * maxDataFileSize - newOffset = writeOffset + totalSize + // Check if this block would span across file boundaries + blockEndWithinFile, err := safemath.Add(offsetWithinCurrentFile, uint64(totalSize)) + if err != nil { + return 0, fmt.Errorf( + "calculating block end within file would overflow: %w", + err, + ) + } + if blockEndWithinFile > maxDataFileSize { + // Advance the current write offset to the start of the next file since + // it would exceed the current file size. + nextFileStartOffset, err := safemath.Mul(uint64(currentFileIndex+1), maxDataFileSize) + if err != nil { + return 0, fmt.Errorf( + "calculating next file offset would overflow: %w", + err, + ) + } + actualWriteOffset = nextFileStartOffset + + // Recalculate the end offset for the block space to set the next write offset + if actualBlockEndOffset, err = safemath.Add(actualWriteOffset, uint64(totalSize)); err != nil { + return 0, fmt.Errorf( + "adding block of size %d to new file offset %d would overflow: %w", + totalSize, actualWriteOffset, err, + ) + } } } - if s.nextDataWriteOffset.CompareAndSwap(currentOffset, newOffset) { - return writeOffset, nil + if s.nextDataWriteOffset.CompareAndSwap(currentOffset, actualBlockEndOffset) { + return actualWriteOffset, nil } } } diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 771796a369bd..b91b15fc8eb1 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -8,6 +8,10 @@ import "errors" // DefaultMaxDataFileSize is the default maximum size of the data block file in bytes (500GB). const DefaultMaxDataFileSize = 500 * 1024 * 1024 * 1024 +// MaxDataFiles is the maximum number of data files that can be created. +// This prevents running out of file descriptors when MaxDataFileSize is small. +const MaxDataFiles = 10_000 + // DatabaseConfig contains configuration parameters for BlockDB. type DatabaseConfig struct { // MinimumHeight is the lowest block height tracked by the database. @@ -18,6 +22,12 @@ type DatabaseConfig struct { // CheckpointInterval defines how frequently (in blocks) the index file header is updated (default: 1024). CheckpointInterval uint64 + + // SyncToDisk determines if fsync is called after each write for durability. + SyncToDisk bool + + // Truncate determines if existing data should be truncated when opening the database. + Truncate bool } // DefaultDatabaseConfig returns the default options for BlockDB. @@ -26,12 +36,44 @@ func DefaultDatabaseConfig() DatabaseConfig { MinimumHeight: 0, MaxDataFileSize: DefaultMaxDataFileSize, CheckpointInterval: 1024, + SyncToDisk: true, + Truncate: false, } } +// WithSyncToDisk returns a copy of the config with SyncToDisk set to the given value. +func (c DatabaseConfig) WithSyncToDisk(syncToDisk bool) DatabaseConfig { + c.SyncToDisk = syncToDisk + return c +} + +// WithTruncate returns a copy of the config with Truncate set to the given value. +func (c DatabaseConfig) WithTruncate(truncate bool) DatabaseConfig { + c.Truncate = truncate + return c +} + +// WithMinimumHeight returns a copy of the config with MinimumHeight set to the given value. +func (c DatabaseConfig) WithMinimumHeight(minHeight uint64) DatabaseConfig { + c.MinimumHeight = minHeight + return c +} + +// WithMaxDataFileSize returns a copy of the config with MaxDataFileSize set to the given value. +func (c DatabaseConfig) WithMaxDataFileSize(maxSize uint64) DatabaseConfig { + c.MaxDataFileSize = maxSize + return c +} + +// WithCheckpointInterval returns a copy of the config with CheckpointInterval set to the given value. +func (c DatabaseConfig) WithCheckpointInterval(interval uint64) DatabaseConfig { + c.CheckpointInterval = interval + return c +} + // Validate checks if the store options are valid. -func (opts DatabaseConfig) Validate() error { - if opts.CheckpointInterval == 0 { +func (c DatabaseConfig) Validate() error { + if c.CheckpointInterval == 0 { return errors.New("CheckpointInterval cannot be 0") } return nil diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 1ff02d3ad50a..286342a68ddc 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -32,12 +32,12 @@ type Database struct { options DatabaseConfig header indexFileHeader log logging.Logger - mu sync.RWMutex closed bool fileCache sync.Map - // syncToDisk determines if fsync is called after each write for durability. - syncToDisk bool + // closeMu prevents the database from being closed while in use and prevents + // use of a closed database. + closeMu sync.RWMutex // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. maxBlockHeight atomic.Uint64 @@ -118,7 +118,13 @@ func (s *Database) initializeDataFiles(dataDir string, truncate bool) error { } func (s *Database) loadOrInitializeHeader(truncate bool) error { - if truncate { + fileInfo, err := s.indexFile.Stat() + if err != nil { + return fmt.Errorf("failed to get index file stats: %w", err) + } + + // reset index file if its empty or we are truncating + if truncate || fileInfo.Size() == 0 { s.header = indexFileHeader{ Version: IndexFileVersion, MinHeight: s.options.MinimumHeight, @@ -144,14 +150,13 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { return nil } - // Not truncating, load existing header headerBuf := make([]byte, sizeOfIndexFileHeader) _, readErr := s.indexFile.ReadAt(headerBuf, 0) if readErr != nil { - return fmt.Errorf("failed to read index header: %w", readErr) + return fmt.Errorf("failed to read index header (delete index file to reindex): %w", readErr) } if err := s.header.UnmarshalBinary(headerBuf); err != nil { - return fmt.Errorf("failed to deserialize index header: %w", err) + return fmt.Errorf("failed to deserialize index header (delete index file to reindex): %w", err) } if s.header.Version != IndexFileVersion { return fmt.Errorf("mismatched index file version: found %d, expected %d", s.header.Version, IndexFileVersion) @@ -167,11 +172,9 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { // Parameters: // - indexDir: Directory for the index file // - dataDir: Directory for the data file(s) -// - syncToDisk: If true, forces fsync after writes -// - truncate: If true, truncates the index file -// - config: Optional configuration parameters +// - config: Configuration parameters // - log: Logger instance for structured logging -func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config DatabaseConfig, log logging.Logger) (*Database, error) { +func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (*Database, error) { if indexDir == "" || dataDir == "" { return nil, errors.New("both indexDir and dataDir must be provided") } @@ -181,22 +184,21 @@ func New(indexDir, dataDir string, syncToDisk bool, truncate bool, config Databa } s := &Database{ - options: config, - syncToDisk: syncToDisk, - log: log, - fileCache: sync.Map{}, + options: config, + log: log, + fileCache: sync.Map{}, } - if err := s.openAndInitializeIndex(indexDir, truncate); err != nil { + if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { return nil, err } - if err := s.initializeDataFiles(dataDir, truncate); err != nil { + if err := s.initializeDataFiles(dataDir, config.Truncate); err != nil { s.closeFiles() return nil, err } - if !truncate { + if !config.Truncate { if err := s.recover(); err != nil { s.closeFiles() return nil, fmt.Errorf("recovery failed: %w", err) @@ -225,6 +227,10 @@ func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { return handle.(*os.File), nil } + if fileIndex >= MaxDataFiles { + return nil, fmt.Errorf("%w: file index %d would exceed limit of %d", ErrMaxDataFilesExceeded, fileIndex, MaxDataFiles) + } + filePath := s.dataFilePath(fileIndex) handle, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, defaultFilePermissions) if err != nil { @@ -250,8 +256,8 @@ func (s *Database) MaxContiguousHeight() (height BlockHeight, found bool) { // Close flushes pending writes and closes the store files. func (s *Database) Close() error { - s.mu.Lock() - defer s.mu.Unlock() + s.closeMu.Lock() + defer s.closeMu.Unlock() if s.closed { return nil diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index c29efdec73f0..7bcb62892b36 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -21,7 +21,8 @@ func TestNew_Truncate(t *testing.T) { defer os.RemoveAll(tempDir) indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") - db, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + config := DefaultDatabaseConfig().WithTruncate(true) + db, err := New(indexDir, dataDir, config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -31,7 +32,7 @@ func TestNew_Truncate(t *testing.T) { require.NoError(t, db.Close()) // Reopen with truncate=true and verify data is gone - db2, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + db2, err := New(indexDir, dataDir, config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() @@ -48,7 +49,8 @@ func TestNew_NoTruncate(t *testing.T) { defer os.RemoveAll(tempDir) indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") - db, err := New(indexDir, dataDir, false, true, DefaultDatabaseConfig(), logging.NoLog{}) + config := DefaultDatabaseConfig().WithTruncate(true) + db, err := New(indexDir, dataDir, config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -61,7 +63,8 @@ func TestNew_NoTruncate(t *testing.T) { require.NoError(t, db.Close()) // Reopen with truncate=false and verify data is still there - db2, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) + config = DefaultDatabaseConfig().WithTruncate(false) + db2, err := New(indexDir, dataDir, config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() @@ -85,7 +88,6 @@ func TestNew_Params(t *testing.T) { name string indexDir string dataDir string - syncToDisk bool config DatabaseConfig log logging.Logger wantErr error @@ -98,15 +100,13 @@ func TestNew_Params(t *testing.T) { config: DefaultDatabaseConfig(), }, { - name: "custom config", - indexDir: tempDir, - dataDir: tempDir, - syncToDisk: true, - config: DatabaseConfig{ - MinimumHeight: 100, - MaxDataFileSize: 1024 * 1024 * 1024, // 1GB - CheckpointInterval: 512, - }, + name: "custom config", + indexDir: tempDir, + dataDir: tempDir, + config: DefaultDatabaseConfig(). + WithMinimumHeight(100). + WithMaxDataFileSize(1024 * 1024). // 1MB + WithCheckpointInterval(512), }, { name: "empty index directory", @@ -133,18 +133,14 @@ func TestNew_Params(t *testing.T) { name: "invalid config - zero checkpoint interval", indexDir: tempDir, dataDir: tempDir, - config: DatabaseConfig{ - MinimumHeight: 0, - MaxDataFileSize: DefaultMaxDataFileSize, - CheckpointInterval: 0, - }, - wantErr: errors.New("CheckpointInterval cannot be 0"), + config: DefaultDatabaseConfig().WithCheckpointInterval(0), + wantErr: errors.New("CheckpointInterval cannot be 0"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - db, err := New(tt.indexDir, tt.dataDir, tt.syncToDisk, true, tt.config, tt.log) + db, err := New(tt.indexDir, tt.dataDir, tt.config, tt.log) if tt.wantErr != nil { require.Equal(t, tt.wantErr.Error(), err.Error()) @@ -158,7 +154,7 @@ func TestNew_Params(t *testing.T) { require.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) require.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) require.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) - require.Equal(t, tt.syncToDisk, db.syncToDisk) + require.Equal(t, tt.config.SyncToDisk, db.options.SyncToDisk) indexPath := filepath.Join(tt.indexDir, indexFileName) require.FileExists(t, indexPath) @@ -234,7 +230,7 @@ func TestNew_IndexFileErrors(t *testing.T) { defer os.RemoveAll(filepath.Dir(indexDir)) defer os.RemoveAll(filepath.Dir(dataDir)) - _, err := New(indexDir, dataDir, false, false, DefaultDatabaseConfig(), logging.NoLog{}) + _, err := New(indexDir, dataDir, DefaultDatabaseConfig(), logging.NoLog{}) require.Contains(t, err.Error(), tt.wantErrMsg) }) } @@ -248,15 +244,11 @@ func TestIndexFileHeaderAlignment(t *testing.T) { func TestNew_IndexFileConfigPrecedence(t *testing.T) { // set up db - initialConfig := DatabaseConfig{ - MinimumHeight: 100, - MaxDataFileSize: 1024 * 1024, // 1MB limit - CheckpointInterval: 1024, - } + initialConfig := DefaultDatabaseConfig().WithMinimumHeight(100).WithMaxDataFileSize(1024 * 1024) tempDir, err := os.MkdirTemp("", "blockdb_config_precedence_test_*") require.NoError(t, err) defer os.RemoveAll(tempDir) - db, err := New(tempDir, tempDir, false, true, initialConfig, logging.NoLog{}) + db, err := New(tempDir, tempDir, initialConfig, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -269,12 +261,8 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { require.NoError(t, db.Close()) // Reopen with different config that has higher minimum height and smaller max data file size - differentConfig := DatabaseConfig{ - MinimumHeight: 200, // Higher minimum height - MaxDataFileSize: 512 * 1024, // 512KB limit (smaller than original 1MB) - CheckpointInterval: 512, - } - db2, err := New(tempDir, tempDir, false, false, differentConfig, logging.NoLog{}) + differentConfig := DefaultDatabaseConfig().WithMinimumHeight(200).WithMaxDataFileSize(512 * 1024) + db2, err := New(tempDir, tempDir, differentConfig, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() diff --git a/x/blockdb/datasplit_test.go b/x/blockdb/datasplit_test.go index a12e7df92e65..55208ac890d6 100644 --- a/x/blockdb/datasplit_test.go +++ b/x/blockdb/datasplit_test.go @@ -14,11 +14,8 @@ import ( func TestDataSplitting(t *testing.T) { // Each data file should have enough space for 2 blocks - config := &DatabaseConfig{ - MaxDataFileSize: 1024 * 2.5, - CheckpointInterval: 1024, - } - store, cleanup := newTestDatabase(t, false, config) + config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 2.5) + store, cleanup := newTestDatabase(t, config) defer cleanup() // create 11 blocks, 1kb each @@ -53,7 +50,7 @@ func TestDataSplitting(t *testing.T) { // reopen and verify all blocks are readable require.NoError(t, store.Close()) - store, err = New(filepath.Dir(store.indexFile.Name()), store.dataDir, false, false, *config, store.log) + store, err = New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) require.NoError(t, err) defer store.Close() for i := range numBlocks { @@ -64,11 +61,8 @@ func TestDataSplitting(t *testing.T) { } func TestDataSplitting_DeletedFile(t *testing.T) { - config := &DatabaseConfig{ - MaxDataFileSize: 1024 * 2.5, - CheckpointInterval: 1024, - } - store, cleanup := newTestDatabase(t, false, config) + config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 2.5) + store, cleanup := newTestDatabase(t, config) defer cleanup() // create 5 blocks, 1kb each @@ -87,7 +81,7 @@ func TestDataSplitting_DeletedFile(t *testing.T) { // reopen and verify the blocks require.NoError(t, store.Close()) - store, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, false, false, *config, store.log) + store, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) require.NoError(t, err) defer store.Close() for i := range numBlocks { diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 7ec789a0dd2a..e9c98087e61d 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -9,10 +9,11 @@ import ( ) var ( - ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") - ErrBlockEmpty = errors.New("blockdb: block is empty") - ErrDatabaseClosed = errors.New("blockdb: database is closed") - ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") - ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") - ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") + ErrBlockEmpty = errors.New("blockdb: block is empty") + ErrDatabaseClosed = errors.New("blockdb: database is closed") + ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") + ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") + ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrMaxDataFilesExceeded = errors.New("blockdb: maximum number of data files exceeded") ) diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index 4e3083178fc8..cc0a0a916cbd 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -15,19 +15,14 @@ import ( "github.com/ava-labs/avalanchego/utils/logging" ) -func newTestDatabase(t *testing.T, syncToDisk bool, opts *DatabaseConfig) (*Database, func()) { +func newTestDatabase(t *testing.T, opts DatabaseConfig) (*Database, func()) { t.Helper() dir, err := os.MkdirTemp("", "blockdb_test_*") require.NoError(t, err, "failed to create temp dir") idxDir := filepath.Join(dir, "idx") dataDir := filepath.Join(dir, "dat") - var config DatabaseConfig - if opts != nil { - config = *opts - } else { - config = DefaultDatabaseConfig() - } - db, err := New(idxDir, dataDir, syncToDisk, true, config, logging.NoLog{}) + + db, err := New(idxDir, dataDir, opts, logging.NoLog{}) if err != nil { os.RemoveAll(dir) require.NoError(t, err, "failed to create database") diff --git a/x/blockdb/index.go b/x/blockdb/index.go index d52960c02f6a..fa3054539941 100644 --- a/x/blockdb/index.go +++ b/x/blockdb/index.go @@ -9,7 +9,8 @@ import ( "errors" "fmt" "io" - "math" + + safemath "github.com/ava-labs/avalanchego/utils/math" ) const ( @@ -28,9 +29,9 @@ type indexEntry struct { // Offset is the byte offset in the data file where the block's header starts. Offset uint64 // Size is the length in bytes of the block's data (excluding the blockHeader). - Size uint64 + Size uint32 // HeaderSize is the size in bytes of the block's header portion within the data. - HeaderSize uint16 + HeaderSize BlockHeaderSize } // IsEmpty returns true if this entry is uninitialized. @@ -43,8 +44,8 @@ func (e indexEntry) IsEmpty() bool { func (e indexEntry) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexEntry) binary.LittleEndian.PutUint64(buf[0:], e.Offset) - binary.LittleEndian.PutUint64(buf[8:], e.Size) - binary.LittleEndian.PutUint16(buf[16:], e.HeaderSize) + binary.LittleEndian.PutUint32(buf[8:], e.Size) + binary.LittleEndian.PutUint32(buf[12:], e.HeaderSize) return buf, nil } @@ -54,8 +55,8 @@ func (e *indexEntry) UnmarshalBinary(data []byte) error { return fmt.Errorf("incorrect data length to unmarshal indexEntry: got %d bytes, need exactly %d", len(data), sizeOfIndexEntry) } e.Offset = binary.LittleEndian.Uint64(data[0:]) - e.Size = binary.LittleEndian.Uint64(data[8:]) - e.HeaderSize = binary.LittleEndian.Uint16(data[16:]) + e.Size = binary.LittleEndian.Uint32(data[8:]) + e.HeaderSize = binary.LittleEndian.Uint32(data[12:]) return nil } @@ -63,12 +64,12 @@ func (e *indexEntry) UnmarshalBinary(data []byte) error { type indexFileHeader struct { Version uint64 MaxDataFileSize uint64 - MaxHeight BlockHeight MinHeight BlockHeight MaxContiguousHeight BlockHeight + MaxHeight BlockHeight NextWriteOffset uint64 - // reserve 24 bytes for future use - Reserved [24]byte + // reserve 32 bytes for future use + Reserved [32]byte } // Add MarshalBinary for indexFileHeader @@ -76,9 +77,9 @@ func (h indexFileHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexFileHeader) binary.LittleEndian.PutUint64(buf[0:], h.Version) binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) - binary.LittleEndian.PutUint64(buf[16:], h.MaxHeight) - binary.LittleEndian.PutUint64(buf[24:], h.MinHeight) - binary.LittleEndian.PutUint64(buf[32:], h.MaxContiguousHeight) + binary.LittleEndian.PutUint64(buf[16:], h.MinHeight) + binary.LittleEndian.PutUint64(buf[24:], h.MaxContiguousHeight) + binary.LittleEndian.PutUint64(buf[32:], h.MaxHeight) binary.LittleEndian.PutUint64(buf[40:], h.NextWriteOffset) return buf, nil } @@ -93,9 +94,9 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { } h.Version = binary.LittleEndian.Uint64(data[0:]) h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) - h.MaxHeight = binary.LittleEndian.Uint64(data[16:]) - h.MinHeight = binary.LittleEndian.Uint64(data[24:]) - h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[32:]) + h.MinHeight = binary.LittleEndian.Uint64(data[16:]) + h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[24:]) + h.MaxHeight = binary.LittleEndian.Uint64(data[32:]) h.NextWriteOffset = binary.LittleEndian.Uint64(data[40:]) return nil } @@ -105,14 +106,15 @@ func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinHeight) } relativeHeight := height - s.header.MinHeight - - // Check for overflow before calculating the final offset. - if relativeHeight > (math.MaxUint64-sizeOfIndexFileHeader)/sizeOfIndexEntry { + offsetFromHeaderStart, err := safemath.Mul(relativeHeight, sizeOfIndexEntry) + if err != nil { + return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) + } + finalOffset, err := safemath.Add(sizeOfIndexFileHeader, offsetFromHeaderStart) + if err != nil { return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) } - offsetFromHeaderStart := relativeHeight * sizeOfIndexEntry - finalOffset := sizeOfIndexFileHeader + offsetFromHeaderStart return finalOffset, nil } @@ -142,7 +144,7 @@ func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { return entry, nil } -func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset, blockDataLen uint64, headerSize uint16) error { +func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset uint64, blockDataLen uint32, headerSize BlockHeaderSize) error { indexEntry := indexEntry{ Offset: dataFileBlockOffset, Size: blockDataLen, @@ -164,7 +166,7 @@ func (s *Database) persistIndexHeader() error { // The index file must be fsync'd before the header is written to prevent // a state where the header is persisted but the index entries it refers to // are not. This could lead to data inconsistency on recovery. - if s.syncToDisk { + if s.options.SyncToDisk { if err := s.indexFile.Sync(); err != nil { return fmt.Errorf("failed to sync index file before writing header state: %w", err) } diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index f13092b1fdbb..5977471886a0 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -81,7 +81,7 @@ func TestReadOperations(t *testing.T) { config = &defaultConfig } - store, cleanup := newTestDatabase(t, false, tt.config) + store, cleanup := newTestDatabase(t, *config) defer cleanup() // Seed database with blocks based on config @@ -97,7 +97,7 @@ func TestReadOperations(t *testing.T) { } block := randomBlock(t) - require.NoError(t, store.WriteBlock(i, block, uint16(i-minHeight))) + require.NoError(t, store.WriteBlock(i, block, BlockHeaderSize(i-minHeight))) seededBlocks[i] = block } } @@ -127,7 +127,7 @@ func TestReadOperations(t *testing.T) { } else { require.NotNil(t, readBlock) expectedBlock := seededBlocks[tt.readHeight] - headerSize := uint16(tt.readHeight - config.MinimumHeight) + headerSize := BlockHeaderSize(tt.readHeight - config.MinimumHeight) var expectHeader []byte if headerSize > 0 { expectHeader = expectedBlock[:headerSize] @@ -141,13 +141,13 @@ func TestReadOperations(t *testing.T) { } func TestReadOperations_Concurrency(t *testing.T) { - store, cleanup := newTestDatabase(t, false, nil) + store, cleanup := newTestDatabase(t, DefaultDatabaseConfig()) defer cleanup() // Pre-generate blocks and write them numBlocks := 50 blocks := make([][]byte, numBlocks) - headerSizes := make([]uint16, numBlocks) + headerSizes := make([]BlockHeaderSize, numBlocks) gapHeights := map[uint64]bool{ 10: true, 20: true, @@ -159,9 +159,9 @@ func TestReadOperations_Concurrency(t *testing.T) { } blocks[i] = randomBlock(t) - headerSizes[i] = uint16(i * 10) // Varying header sizes - if headerSizes[i] > uint16(len(blocks[i])) { - headerSizes[i] = uint16(len(blocks[i])) / 2 + headerSizes[i] = BlockHeaderSize(i * 10) // Varying header sizes + if headerSizes[i] > BlockHeaderSize(len(blocks[i])) { + headerSizes[i] = BlockHeaderSize(len(blocks[i])) / 2 } require.NoError(t, store.WriteBlock(uint64(i), blocks[i], headerSizes[i])) diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go index 11ca34c1b795..25342b43c376 100644 --- a/x/blockdb/recovery.go +++ b/x/blockdb/recovery.go @@ -4,15 +4,21 @@ package blockdb import ( + "errors" "fmt" + "io" "os" "go.uber.org/zap" + + safemath "github.com/ava-labs/avalanchego/utils/math" ) -// recover attempts to restore the store to a consistent state by scanning the data file -// for blocks that may not be correctly indexed, usually after an unclean shutdown. -// It reconciles the data file with the index file header and entries. +// recover detects and recovers unindexed blocks by scanning data files and updating the index. +// It compares the actual data file sizes on disk with the indexed data size to detect +// blocks that were written but not properly indexed. +// For each unindexed block found, it validates the block, then +// writes the corresponding index entry and updates block height tracking. func (s *Database) recover() error { dataFiles, maxIndex, err := s.listDataFiles() if err != nil { @@ -23,18 +29,25 @@ func (s *Database) recover() error { return nil } - // Calculate the expected next write offset based on the data files on disk. + // Calculate the expected next write offset based on the data on disk. var calculatedNextDataWriteOffset uint64 if s.header.MaxDataFileSize > 0 { - // All data files before the last one are full. + // All data files before the last should be full. fullFilesCount := maxIndex - calculatedNextDataWriteOffset += uint64(fullFilesCount) * s.header.MaxDataFileSize + fileSizeContribution, err := safemath.Mul(uint64(fullFilesCount), s.header.MaxDataFileSize) + if err != nil { + return fmt.Errorf("calculating file size contribution would overflow: %w", err) + } + calculatedNextDataWriteOffset = fileSizeContribution lastFileInfo, err := os.Stat(dataFiles[maxIndex]) if err != nil { return fmt.Errorf("failed to get stats for last data file %s: %w", dataFiles[maxIndex], err) } - calculatedNextDataWriteOffset += uint64(lastFileInfo.Size()) + calculatedNextDataWriteOffset, err = safemath.Add(calculatedNextDataWriteOffset, uint64(lastFileInfo.Size())) + if err != nil { + return fmt.Errorf("adding last file size would overflow: %w", err) + } } else { lastFileInfo, err := os.Stat(dataFiles[0]) if err != nil { @@ -51,7 +64,8 @@ func (s *Database) recover() error { return nil case calculatedNextDataWriteOffset < nextDataWriteOffset: - return fmt.Errorf("%w: calculated next write offset is smaller than index header claims "+ + // this happens when the index claims to have more data than is actually on disk + return fmt.Errorf("%w: index header claims to have more data than is actually on disk "+ "(calculated: %d bytes, index header: %d bytes)", ErrCorrupted, calculatedNextDataWriteOffset, nextDataWriteOffset) default: @@ -65,35 +79,42 @@ func (s *Database) recover() error { currentScanOffset := nextDataWriteOffset recoveredBlocksCount := 0 maxRecoveredHeightSeen := s.maxBlockHeight.Load() - - totalDataFileSize := calculatedNextDataWriteOffset - for currentScanOffset < totalDataFileSize { - bh, err := s.recoverBlockAtOffset(currentScanOffset, totalDataFileSize) + for currentScanOffset < calculatedNextDataWriteOffset { + bh, err := s.recoverBlockAtOffset(currentScanOffset, calculatedNextDataWriteOffset) if err != nil { - s.log.Error("Recovery: scan stopped due to invalid block data", - zap.Uint64("offset", currentScanOffset), - zap.Error(err), - ) - break + if errors.Is(err, io.EOF) && s.header.MaxDataFileSize > 0 { + // reach end of this file, try to read the next file + currentFileIndex := int(currentScanOffset / s.header.MaxDataFileSize) + nextFileIndex, err := safemath.Add(uint64(currentFileIndex), 1) + if err != nil { + return fmt.Errorf("recovery: overflow in file index calculation: %w", err) + } + if currentScanOffset, err = safemath.Mul(nextFileIndex, s.header.MaxDataFileSize); err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + } + continue + } + return err } s.log.Debug("Recovery: Successfully validated and indexed block", zap.Uint64("height", bh.Height), - zap.Uint64("size", bh.Size), + zap.Uint32("size", bh.Size), zap.Uint64("offset", currentScanOffset), ) recoveredBlocksCount++ if bh.Height > maxRecoveredHeightSeen || maxRecoveredHeightSeen == unsetHeight { maxRecoveredHeightSeen = bh.Height } - currentScanOffset += sizeOfBlockHeader + bh.Size + blockTotalSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(bh.Size)) + if err != nil { + return fmt.Errorf("recovery: overflow in block size calculation: %w", err) + } + currentScanOffset, err = safemath.Add(currentScanOffset, blockTotalSize) + if err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + } } s.nextDataWriteOffset.Store(currentScanOffset) - s.maxBlockHeight.Store(maxRecoveredHeightSeen) - - // Recalculate MCH if we recovered any blocks - if recoveredBlocksCount > 0 { - s.updateMaxContiguousHeightOnRecovery() - } if err := s.persistIndexHeader(); err != nil { return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) @@ -109,10 +130,10 @@ func (s *Database) recover() error { return nil } -func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (blockHeader, error) { +func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHeader, error) { var bh blockHeader - if dataFileActualSize-offset < sizeOfBlockHeader { - return bh, fmt.Errorf("not enough data for block header at offset %d", offset) + if totalDataSize-offset < uint64(sizeOfBlockHeader) { + return bh, fmt.Errorf("%w: not enough data for block header at offset %d", ErrCorrupted, offset) } dataFile, localOffset, err := s.getDataFileAndOffset(offset) @@ -121,34 +142,45 @@ func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (bloc } bhBuf := make([]byte, sizeOfBlockHeader) if _, err := dataFile.ReadAt(bhBuf, int64(localOffset)); err != nil { - return bh, fmt.Errorf("error reading block header at offset %d: %w", offset, err) + return bh, fmt.Errorf("%w: error reading block header at offset %d: %w", ErrCorrupted, offset, err) } if err := bh.UnmarshalBinary(bhBuf); err != nil { - return bh, fmt.Errorf("error deserializing block header at offset %d: %w", offset, err) + return bh, fmt.Errorf("%w: error deserializing block header at offset %d: %w", ErrCorrupted, offset, err) } if bh.Size == 0 || bh.Size > MaxBlockDataSize { - return bh, fmt.Errorf("invalid block size in header at offset %d: %d", offset, bh.Size) + return bh, fmt.Errorf("%w: invalid block size in header at offset %d: %d", ErrCorrupted, offset, bh.Size) } if bh.Height < s.header.MinHeight || bh.Height == unsetHeight { return bh, fmt.Errorf( - "invalid block height in header at offset %d: found %d, expected >= %d", - offset, bh.Height, s.header.MinHeight, + "%w: invalid block height in header at offset %d: found %d, expected >= %d", + ErrCorrupted, offset, bh.Height, s.header.MinHeight, ) } - if uint64(bh.HeaderSize) > bh.Size { - return bh, fmt.Errorf("invalid block header size in header at offset %d: %d > %d", offset, bh.HeaderSize, bh.Size) + if bh.HeaderSize > bh.Size { + return bh, fmt.Errorf("%w: invalid block header size in header at offset %d: %d > %d", ErrCorrupted, offset, bh.HeaderSize, bh.Size) } - expectedBlockEndOffset := offset + sizeOfBlockHeader + bh.Size - if expectedBlockEndOffset < offset || expectedBlockEndOffset > dataFileActualSize { - return bh, fmt.Errorf("block data out of bounds at offset %d", offset) + expectedBlockEndOffset, err := safemath.Add(offset, uint64(sizeOfBlockHeader)) + if err != nil { + return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) + } + expectedBlockEndOffset, err = safemath.Add(expectedBlockEndOffset, uint64(bh.Size)) + if err != nil { + return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) + } + if expectedBlockEndOffset > totalDataSize { + return bh, fmt.Errorf("%w: block data out of bounds at offset %d", ErrCorrupted, offset) } blockData := make([]byte, bh.Size) - if _, err := dataFile.ReadAt(blockData, int64(offset+sizeOfBlockHeader)); err != nil { - return bh, fmt.Errorf("failed to read block data at offset %d: %w", offset, err) + blockDataOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + if err != nil { + return bh, fmt.Errorf("calculating block data offset would overflow at offset %d: %w", offset, err) + } + if _, err := dataFile.ReadAt(blockData, int64(blockDataOffset)); err != nil { + return bh, fmt.Errorf("%w: failed to read block data at offset %d: %w", ErrCorrupted, offset, err) } calculatedChecksum := calculateChecksum(blockData) if calculatedChecksum != bh.Checksum { - return bh, fmt.Errorf("checksum mismatch for block at offset %d", offset) + return bh, fmt.Errorf("%w: checksum mismatch for block at offset %d", ErrCorrupted, offset) } // Write index entry for this block @@ -159,27 +191,10 @@ func (s *Database) recoverBlockAtOffset(offset, dataFileActualSize uint64) (bloc if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size, bh.HeaderSize); err != nil { return bh, fmt.Errorf("failed to update index for recovered block %d: %w", bh.Height, err) } - return bh, nil -} - -func (s *Database) updateMaxContiguousHeightOnRecovery() { - currentMCH := s.header.MaxContiguousHeight - highestKnown := s.maxBlockHeight.Load() - for nextHeight := currentMCH + 1; nextHeight <= highestKnown; nextHeight++ { - entry, err := s.readIndexEntry(nextHeight) - if err != nil { - s.log.Error( - "error reading index entry when updating max contiguous height on recovery", - zap.Uint64("height", nextHeight), - zap.Error(err), - ) - break - } - if entry.IsEmpty() { - break - } - currentMCH = nextHeight + if err := s.updateBlockHeights(bh.Height); err != nil { + return bh, fmt.Errorf("failed to update block heights for recovered block %d: %w", bh.Height, err) } - s.maxContiguousHeight.Store(currentMCH) + + return bh, nil } diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go new file mode 100644 index 000000000000..133b2f36d044 --- /dev/null +++ b/x/blockdb/recovery_test.go @@ -0,0 +1,348 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package blockdb + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRecovery_Success(t *testing.T) { + // Create database with 10KB file size and 4KB blocks + // This means each file will have 2 blocks (4KB + 24 bytes header = ~4KB per block) + config := DefaultDatabaseConfig().WithMaxDataFileSize(10 * 1024) // 10KB per file + + tests := []struct { + name string + corruptIndex func(indexPath string) error + }{ + { + name: "recovery from missing index file; blocks will be recovered", + corruptIndex: os.Remove, + }, + { + name: "recovery from truncated index file", + corruptIndex: func(indexPath string) error { + // Remove the existing index file + if err := os.Remove(indexPath); err != nil { + return err + } + + // Create a new index file with only the first block indexed + // This simulates an unclean shutdown where the index file is behind + indexFile, err := os.OpenFile(indexPath, os.O_RDWR|os.O_CREATE, defaultFilePermissions) + if err != nil { + return err + } + defer indexFile.Close() + + // Create a header that only knows about the first block + // Block 0: 4KB data + header + firstBlockOffset := uint64(sizeOfBlockHeader) + 4*1024 + + header := indexFileHeader{ + Version: IndexFileVersion, + MaxDataFileSize: 10 * 1024, // 10KB per file + MinHeight: 0, + MaxContiguousHeight: 0, + MaxHeight: 0, + NextWriteOffset: firstBlockOffset, + } + + // Write the header + headerBytes, err := header.MarshalBinary() + if err != nil { + return err + } + if _, err := indexFile.WriteAt(headerBytes, 0); err != nil { + return err + } + + // Write index entry for only the first block + indexEntry := indexEntry{ + Offset: 0, + Size: 4 * 1024, // 4KB + HeaderSize: 0, + } + entryBytes, err := indexEntry.MarshalBinary() + if err != nil { + return err + } + indexEntryOffset := sizeOfIndexFileHeader + if _, err := indexFile.WriteAt(entryBytes, int64(indexEntryOffset)); err != nil { + return err + } + + return nil + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + store, cleanup := newTestDatabase(t, config) + defer cleanup() + blockHeights := []uint64{0, 1, 3, 5, 2, 8} + blocks := make(map[uint64][]byte) + + for _, height := range blockHeights { + // Create 4KB blocks + block := make([]byte, 4*1024) + copy(block, fmt.Appendf(nil, "block-data-for-height-%d", height)) + + require.NoError(t, store.WriteBlock(height, block, 0)) + blocks[height] = block + } + checkDatabaseState(t, store, 8, 3) + require.NoError(t, store.Close()) + + // Corrupt the index file according to the test case + indexPath := store.indexFile.Name() + require.NoError(t, tt.corruptIndex(indexPath)) + + // Reopen the database and test recovery + indexDir := filepath.Join(indexPath, "..") + recoveredStore, err := New(indexDir, store.dataDir, config, store.log) + require.NoError(t, err) + defer recoveredStore.Close() + + // Verify all blocks are still readable + for _, height := range blockHeights { + readBlock, err := recoveredStore.ReadBlock(height) + require.NoError(t, err) + require.Equal(t, blocks[height], readBlock, "block %d should be the same", height) + } + + checkDatabaseState(t, recoveredStore, 8, 3) + }) + } +} + +func TestRecovery_CorruptionDetection(t *testing.T) { + tests := []struct { + name string + blockHeights []uint64 + minHeight uint64 + setupCorruption func(store *Database, blocks [][]byte) error + wantErr error + wantErrText string + }{ + { + name: "index header claims larger offset than actual data", + blockHeights: []uint64{0, 1, 2, 3, 4}, + setupCorruption: func(store *Database, _ [][]byte) error { + indexPath := store.indexFile.Name() + indexFile, err := os.OpenFile(indexPath, os.O_RDWR, 0) + if err != nil { + return err + } + defer indexFile.Close() + + // Read the current header + headerBuf := make([]byte, sizeOfIndexFileHeader) + _, err = indexFile.ReadAt(headerBuf, 0) + if err != nil { + return err + } + + // Parse and corrupt the header by setting NextWriteOffset to be much larger than actual data + var header indexFileHeader + err = header.UnmarshalBinary(headerBuf) + if err != nil { + return err + } + header.NextWriteOffset = 1000000 + + // Write the corrupted header back + corruptedHeaderBytes, err := header.MarshalBinary() + if err != nil { + return err + } + _, err = indexFile.WriteAt(corruptedHeaderBytes, 0) + return err + }, + wantErr: ErrCorrupted, + wantErrText: "index header claims to have more data than is actually on disk", + }, + { + name: "corrupted block header in data file", + blockHeights: []uint64{0, 1}, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { + return err + } + // Corrupt second block header with invalid data + secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) + corruptedHeader := make([]byte, sizeOfBlockHeader) + for i := range corruptedHeader { + corruptedHeader[i] = 0xFF // Invalid data + } + dataFilePath := store.dataFilePath(0) + dataFile, err := os.OpenFile(dataFilePath, os.O_RDWR, 0) + if err != nil { + return err + } + defer dataFile.Close() + _, err = dataFile.WriteAt(corruptedHeader, secondBlockOffset) + return err + }, + wantErr: ErrCorrupted, + wantErrText: "invalid block size in header", + }, + { + name: "block with invalid block size in header", + blockHeights: []uint64{0, 1}, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { + return err + } + secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) + bh := blockHeader{ + Height: 1, + Checksum: calculateChecksum(blocks[1]), + Size: uint32(len(blocks[1])) + 1, // too large + HeaderSize: 0, + } + return writeBlockHeader(store, secondBlockOffset, bh) + }, + wantErr: ErrCorrupted, + wantErrText: "block data out of bounds", + }, + { + name: "block with checksum mismatch", + blockHeights: []uint64{0, 1}, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { + return err + } + secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) + bh := blockHeader{ + Height: 1, + Checksum: 0xDEADBEEF, // Wrong checksum + Size: uint32(len(blocks[1])), + HeaderSize: 0, + } + return writeBlockHeader(store, secondBlockOffset, bh) + }, + wantErr: ErrCorrupted, + wantErrText: "checksum mismatch for block", + }, + { + name: "partial block at end of file", + blockHeights: []uint64{0}, + setupCorruption: func(store *Database, blocks [][]byte) error { + dataFilePath := store.dataFilePath(0) + dataFile, err := os.OpenFile(dataFilePath, os.O_RDWR, 0) + if err != nil { + return err + } + defer dataFile.Close() + + // Truncate to have only partial block data + truncateSize := int64(sizeOfBlockHeader) + int64(len(blocks[0]))/2 + return dataFile.Truncate(truncateSize) + }, + wantErr: ErrCorrupted, + wantErrText: "index header claims to have more data than is actually on disk", + }, + { + name: "block with invalid height", + blockHeights: []uint64{10, 11}, + minHeight: 10, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 10); err != nil { + return err + } + secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) + bh := blockHeader{ + Height: 5, // Below minimum height of 10 + Checksum: calculateChecksum(blocks[1]), + Size: uint32(len(blocks[1])), + HeaderSize: 0, + } + return writeBlockHeader(store, secondBlockOffset, bh) + }, + wantErr: ErrCorrupted, + wantErrText: "invalid block height in header", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := DefaultDatabaseConfig() + if tt.minHeight > 0 { + config = config.WithMinimumHeight(tt.minHeight) + } + + store, cleanup := newTestDatabase(t, config) + defer cleanup() + + // Setup blocks + blocks := make([][]byte, len(tt.blockHeights)) + for i, height := range tt.blockHeights { + blocks[i] = randomBlock(t) + require.NoError(t, store.WriteBlock(height, blocks[i], 0)) + } + require.NoError(t, store.Close()) + + // Apply corruption + require.NoError(t, tt.setupCorruption(store, blocks)) + + // Try to reopen the database - it should detect corruption + indexDir := filepath.Dir(store.indexFile.Name()) + _, err := New(indexDir, store.dataDir, config, store.log) + require.ErrorIs(t, err, tt.wantErr) + require.Contains(t, err.Error(), tt.wantErrText, "error message should contain expected text") + }) + } +} + +// Helper function to reset index to only a single block +func resetIndexToBlock(store *Database, blockSize uint64, minHeight uint64) error { + indexPath := store.indexFile.Name() + indexFile, err := os.OpenFile(indexPath, os.O_RDWR, 0) + if err != nil { + return err + } + defer indexFile.Close() + + header := indexFileHeader{ + Version: IndexFileVersion, + MaxDataFileSize: DefaultMaxDataFileSize, + MinHeight: minHeight, + MaxContiguousHeight: minHeight, + MaxHeight: minHeight, + NextWriteOffset: uint64(sizeOfBlockHeader) + blockSize, + } + + headerBytes, err := header.MarshalBinary() + if err != nil { + return err + } + _, err = indexFile.WriteAt(headerBytes, 0) + return err +} + +// Helper function to write a block header at a specific offset +func writeBlockHeader(store *Database, offset int64, bh blockHeader) error { + fileIndex := int(offset / int64(store.header.MaxDataFileSize)) + localOffset := offset % int64(store.header.MaxDataFileSize) + dataFilePath := store.dataFilePath(fileIndex) + dataFile, err := os.OpenFile(dataFilePath, os.O_RDWR, 0) + if err != nil { + return err + } + defer dataFile.Close() + + headerBytes, err := bh.MarshalBinary() + if err != nil { + return err + } + _, err = dataFile.WriteAt(headerBytes, localOffset) + return err +} diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index f4a291d627e5..2bac15ad566b 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -4,28 +4,26 @@ package blockdb import ( - "errors" "math" "sync" "sync/atomic" "testing" "github.com/stretchr/testify/require" -) -// todo: create TestWriteBlock test that includes error tests and also tests for things like write when sync is true, etc + safemath "github.com/ava-labs/avalanchego/utils/math" +) -func TestWriteBlock_HeightsVerification(t *testing.T) { - customConfig := DefaultDatabaseConfig() - customConfig.MinimumHeight = 10 +func TestWriteBlock_Basic(t *testing.T) { + customConfig := DefaultDatabaseConfig().WithMinimumHeight(10) tests := []struct { name string blockHeights []uint64 // block heights to write, in order - config *DatabaseConfig + config DatabaseConfig expectedMCH uint64 // expected max contiguous height expectedMaxHeight uint64 - headerSizes []uint16 + headerSizes []BlockHeaderSize syncToDisk bool checkpointInterval uint64 }{ @@ -73,42 +71,42 @@ func TestWriteBlock_HeightsVerification(t *testing.T) { { name: "custom min height single block", blockHeights: []uint64{10}, - config: &customConfig, + config: customConfig, expectedMCH: 10, expectedMaxHeight: 10, }, { name: "custom min height out of order", blockHeights: []uint64{13, 11, 10, 12}, - config: &customConfig, + config: customConfig, expectedMCH: 13, expectedMaxHeight: 13, }, { name: "custom min height with gaps", blockHeights: []uint64{10, 11, 13, 15}, - config: &customConfig, + config: customConfig, expectedMCH: 11, expectedMaxHeight: 15, }, { name: "custom min height start with gap", blockHeights: []uint64{11, 12}, - config: &customConfig, + config: customConfig, expectedMCH: unsetHeight, expectedMaxHeight: 12, }, { name: "blocks with various header sizes", blockHeights: []uint64{0, 1, 2}, - headerSizes: []uint16{0, 50, 100}, + headerSizes: []BlockHeaderSize{0, 50, 100}, expectedMCH: 2, expectedMaxHeight: 2, }, { name: "overwrite with different header size", blockHeights: []uint64{12, 13, 12}, // Write twice to same height - headerSizes: []uint16{10, 0, 50}, + headerSizes: []BlockHeaderSize{10, 0, 50}, expectedMCH: unsetHeight, expectedMaxHeight: 13, }, @@ -131,24 +129,18 @@ func TestWriteBlock_HeightsVerification(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { config := tt.config - if config == nil { - defaultConfig := DefaultDatabaseConfig() - config = &defaultConfig - } - if tt.checkpointInterval > 0 { - configCopy := *config - configCopy.CheckpointInterval = tt.checkpointInterval - config = &configCopy + if config.CheckpointInterval == 0 { + config = DefaultDatabaseConfig() } - store, cleanup := newTestDatabase(t, tt.syncToDisk, config) + store, cleanup := newTestDatabase(t, config) defer cleanup() blocksWritten := make(map[uint64][]byte) - headerSizesWritten := make(map[uint64]uint16) + headerSizesWritten := make(map[uint64]BlockHeaderSize) for i, h := range tt.blockHeights { block := randomBlock(t) - var headerSize uint16 + var headerSize BlockHeaderSize // Use specific header size if provided if tt.headerSizes != nil && i < len(tt.headerSizes) { @@ -198,7 +190,7 @@ func TestWriteBlock_HeightsVerification(t *testing.T) { } func TestWriteBlock_Concurrency(t *testing.T) { - store, cleanup := newTestDatabase(t, false, nil) + store, cleanup := newTestDatabase(t, DefaultDatabaseConfig()) defer cleanup() var wg sync.WaitGroup @@ -252,9 +244,9 @@ func TestWriteBlock_Errors(t *testing.T) { name string height uint64 block []byte - headerSize uint16 + headerSize BlockHeaderSize setup func(db *Database) - config *DatabaseConfig + config DatabaseConfig wantErr error }{ { @@ -293,14 +285,10 @@ func TestWriteBlock_Errors(t *testing.T) { wantErr: ErrHeaderSizeTooLarge, }, { - name: "height below custom minimum", - height: 5, - block: randomBlock(t), - config: &DatabaseConfig{ - MinimumHeight: 10, - MaxDataFileSize: DefaultMaxDataFileSize, - CheckpointInterval: 1024, - }, + name: "height below custom minimum", + height: 5, + block: randomBlock(t), + config: DefaultDatabaseConfig().WithMinimumHeight(10), headerSize: 0, wantErr: ErrInvalidBlockHeight, }, @@ -322,14 +310,10 @@ func TestWriteBlock_Errors(t *testing.T) { wantErr: ErrDatabaseClosed, }, { - name: "exceed max data file size", - height: 0, - block: make([]byte, 1000), // Block + header will exceed 1024 limit - config: &DatabaseConfig{ - MinimumHeight: 0, - MaxDataFileSize: 1024, // 1KB limit - CheckpointInterval: 1024, - }, + name: "exceed max data file size", + height: 0, + block: make([]byte, 1001), // Block + header will exceed 1024 limit (1001 + 24 = 1025 > 1024) + config: DefaultDatabaseConfig().WithMaxDataFileSize(1024), headerSize: 0, wantErr: ErrBlockTooLarge, }, @@ -337,23 +321,36 @@ func TestWriteBlock_Errors(t *testing.T) { name: "data file offset overflow", height: 0, block: make([]byte, 100), - config: &DatabaseConfig{ - MinimumHeight: 0, - MaxDataFileSize: 0, // No limit - CheckpointInterval: 1024, - }, + config: DefaultDatabaseConfig(), setup: func(db *Database) { // Set the next write offset to near max to trigger overflow db.nextDataWriteOffset.Store(math.MaxUint64 - 50) }, headerSize: 0, - wantErr: errors.New("would overflow uint64 data file pointer"), + wantErr: safemath.ErrOverflow, + }, + { + name: "max data files exceeded", + height: 0, + block: make([]byte, 1), + config: DefaultDatabaseConfig().WithMaxDataFileSize(100), + setup: func(db *Database) { + // set next data write offset to the max to trigger max data files exceeded error + db.nextDataWriteOffset.Store(uint64(MaxDataFiles) * 100) + }, + headerSize: 0, + wantErr: ErrMaxDataFilesExceeded, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - store, cleanup := newTestDatabase(t, false, tt.config) + config := tt.config + if config.CheckpointInterval == 0 { + config = DefaultDatabaseConfig() + } + + store, cleanup := newTestDatabase(t, config) defer cleanup() if tt.setup != nil { @@ -361,7 +358,7 @@ func TestWriteBlock_Errors(t *testing.T) { } err := store.WriteBlock(tt.height, tt.block, tt.headerSize) - require.Contains(t, err.Error(), tt.wantErr.Error()) + require.ErrorIs(t, err, tt.wantErr) checkDatabaseState(t, store, unsetHeight, unsetHeight) }) } From 1497732e7f5db683bb64eb8f1a1390b2bfd30186 Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 30 Jun 2025 11:36:16 -0400 Subject: [PATCH 09/27] use lru for file cache and fix recovery issues - Added more tests for recovery --- x/blockdb/README.md | 1 + x/blockdb/block.go | 1 - x/blockdb/database.go | 42 ++++++++++++---------- x/blockdb/database_test.go | 68 +++++++++++++++++++++++++++++++++++- x/blockdb/datasplit_test.go | 20 +++-------- x/blockdb/errors.go | 13 ++++--- x/blockdb/helpers_test.go | 19 ++++++++++ x/blockdb/recovery.go | 16 ++++++++- x/blockdb/recovery_test.go | 63 +++++++++++++++++++++++++++------ x/blockdb/writeblock_test.go | 12 ------- 10 files changed, 188 insertions(+), 67 deletions(-) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 113fa0ff570a..9d8b67a9473f 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -187,3 +187,4 @@ if err != nil { - [ ] Use a buffered pool to avoid allocations on reads and writes - [ ] Add tests for core functionality - [ ] Add performance benchmarks +- [ ] Consider supporting missing data files (currently we error if any data files are missing) diff --git a/x/blockdb/block.go b/x/blockdb/block.go index 38d39c5c5486..081a990e1cab 100644 --- a/x/blockdb/block.go +++ b/x/blockdb/block.go @@ -412,7 +412,6 @@ func (s *Database) getDataFileAndOffset(globalOffset uint64) (*os.File, uint64, handle, err := s.getOrOpenDataFile(0) return handle, globalOffset, err } - fileIndex := int(globalOffset / maxFileSize) localOffset := globalOffset % maxFileSize handle, err := s.getOrOpenDataFile(fileIndex) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 286342a68ddc..f826f94030e8 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -12,6 +12,7 @@ import ( "sync" "sync/atomic" + "github.com/ava-labs/avalanchego/cache/lru" "github.com/ava-labs/avalanchego/utils/logging" ) @@ -33,12 +34,15 @@ type Database struct { header indexFileHeader log logging.Logger closed bool - fileCache sync.Map + fileCache *lru.Cache[int, *os.File] // closeMu prevents the database from being closed while in use and prevents // use of a closed database. closeMu sync.RWMutex + // fileOpenMu prevents race conditions when multiple threads try to open the same data file + fileOpenMu sync.Mutex + // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. maxBlockHeight atomic.Uint64 // nextDataWriteOffset tracks the next position to write new data in the data file. @@ -186,8 +190,13 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* s := &Database{ options: config, log: log, - fileCache: sync.Map{}, + fileCache: lru.NewCache[int, *os.File](MaxDataFiles), } + s.fileCache.SetOnEvict(func(_ int, f *os.File) { + if f != nil { + f.Close() + } + }) if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { return nil, err @@ -211,11 +220,9 @@ func (s *Database) closeFiles() { if s.indexFile != nil { s.indexFile.Close() } - s.fileCache.Range(func(_, value any) bool { - file := value.(*os.File) - file.Close() - return true - }) + if s.fileCache != nil { + s.fileCache.Flush() + } } func (s *Database) dataFilePath(index int) string { @@ -223,12 +230,17 @@ func (s *Database) dataFilePath(index int) string { } func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { - if handle, ok := s.fileCache.Load(fileIndex); ok { - return handle.(*os.File), nil + if handle, ok := s.fileCache.Get(fileIndex); ok { + return handle, nil } - if fileIndex >= MaxDataFiles { - return nil, fmt.Errorf("%w: file index %d would exceed limit of %d", ErrMaxDataFilesExceeded, fileIndex, MaxDataFiles) + // Prevent race conditions when multiple threads try to open the same file + s.fileOpenMu.Lock() + defer s.fileOpenMu.Unlock() + + // Double-check the cache after acquiring the lock + if handle, ok := s.fileCache.Get(fileIndex); ok { + return handle, nil } filePath := s.dataFilePath(fileIndex) @@ -236,13 +248,7 @@ func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { if err != nil { return nil, fmt.Errorf("failed to open data file %s: %w", filePath, err) } - actual, loaded := s.fileCache.LoadOrStore(fileIndex, handle) - if loaded { - // Another goroutine created the file first, close ours - handle.Close() - return actual.(*os.File), nil - } - + s.fileCache.Put(fileIndex, handle) return handle, nil } diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 7bcb62892b36..539eb8684826 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -7,10 +7,13 @@ import ( "errors" "os" "path/filepath" + "sync" + "sync/atomic" "testing" "github.com/stretchr/testify/require" + "github.com/ava-labs/avalanchego/cache/lru" "github.com/ava-labs/avalanchego/utils/logging" ) @@ -260,7 +263,7 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { require.Equal(t, testBlock, readBlock) require.NoError(t, db.Close()) - // Reopen with different config that has higher minimum height and smaller max data file size + // Reopen with different config that has minimum height of 200 and smaller max data file size differentConfig := DefaultDatabaseConfig().WithMinimumHeight(200).WithMaxDataFileSize(512 * 1024) db2, err := New(tempDir, tempDir, differentConfig, logging.NoLog{}) require.NoError(t, err) @@ -286,3 +289,66 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { require.NoError(t, err) require.Equal(t, largeBlock, readLargeBlock) } + +func TestFileCache_Eviction(t *testing.T) { + // Create a database with a small max data file size to force multiple files + // each file should have enough for 2 blocks (0.5kb * 2) + config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 1.5) + store, cleanup := newTestDatabase(t, config) + defer cleanup() + + // Override the file cache with a smaller size to force evictions + evictionCount := atomic.Int32{} + evictionMu := sync.Mutex{} + smallCache := lru.NewCache[int, *os.File](3) // Only 3 files in cache + smallCache.SetOnEvict(func(_ int, file *os.File) { + evictionMu.Lock() + defer evictionMu.Unlock() + evictionCount.Add(1) + if file != nil { + file.Close() + } + }) + store.fileCache = smallCache + + const numBlocks = 20 // 20 blocks will create 10 files + const numGoroutines = 4 + var wg sync.WaitGroup + var writeErrors atomic.Int32 + + // Create blocks of 0.5kb each + blocks := make([][]byte, numBlocks) + for i := range blocks { + blocks[i] = fixedSizeBlock(t, 512, uint64(i)) + } + + // Each goroutine writes all block heights 0-(numBlocks-1) + for g := range numGoroutines { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + for i := range numBlocks { + height := uint64((i + goroutineID) % numBlocks) + err := store.WriteBlock(height, blocks[height], 0) + if err != nil { + writeErrors.Add(1) + } + } + }(g) + } + + wg.Wait() + + // Verify no write errors + require.Zero(t, writeErrors.Load(), "concurrent writes had errors") + + // Verify we had some evictions + require.Positive(t, evictionCount.Load(), "should have had some cache evictions") + + // Verify all blocks are readable + for i := range numBlocks { + block, err := store.ReadBlock(uint64(i)) + require.NoError(t, err, "failed to read block at height %d", i) + require.Equal(t, blocks[i], block, "block data mismatch at height %d", i) + } +} diff --git a/x/blockdb/datasplit_test.go b/x/blockdb/datasplit_test.go index 55208ac890d6..cca920b4de1e 100644 --- a/x/blockdb/datasplit_test.go +++ b/x/blockdb/datasplit_test.go @@ -22,8 +22,7 @@ func TestDataSplitting(t *testing.T) { numBlocks := 11 blocks := make([][]byte, numBlocks) for i := range numBlocks { - blocks[i] = make([]byte, 1024) - blocks[i][0] = byte(i) + blocks[i] = fixedSizeBlock(t, 1024, uint64(i)) require.NoError(t, store.WriteBlock(uint64(i), blocks[i], 0)) } @@ -69,8 +68,7 @@ func TestDataSplitting_DeletedFile(t *testing.T) { numBlocks := 5 blocks := make([][]byte, numBlocks) for i := range numBlocks { - blocks[i] = make([]byte, 1024) - blocks[i][0] = byte(i) + blocks[i] = fixedSizeBlock(t, 1024, uint64(i)) require.NoError(t, store.WriteBlock(uint64(i), blocks[i], 0)) } store.Close() @@ -81,16 +79,6 @@ func TestDataSplitting_DeletedFile(t *testing.T) { // reopen and verify the blocks require.NoError(t, store.Close()) - store, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) - require.NoError(t, err) - defer store.Close() - for i := range numBlocks { - readBlock, err := store.ReadBlock(uint64(i)) - require.NoError(t, err) - if i < 2 { - require.Nil(t, readBlock) - } else { - require.Equal(t, blocks[i], readBlock) - } - } + _, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) + require.ErrorIs(t, err, ErrCorrupted) } diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index e9c98087e61d..7ec789a0dd2a 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -9,11 +9,10 @@ import ( ) var ( - ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") - ErrBlockEmpty = errors.New("blockdb: block is empty") - ErrDatabaseClosed = errors.New("blockdb: database is closed") - ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") - ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") - ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) - ErrMaxDataFilesExceeded = errors.New("blockdb: maximum number of data files exceeded") + ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") + ErrBlockEmpty = errors.New("blockdb: block is empty") + ErrDatabaseClosed = errors.New("blockdb: database is closed") + ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") + ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") + ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) ) diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index cc0a0a916cbd..f00cf5c9559c 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -5,6 +5,7 @@ package blockdb import ( "crypto/rand" + "fmt" "math/big" "os" "path/filepath" @@ -45,6 +46,19 @@ func randomBlock(t *testing.T) []byte { return b } +// fixedSizeBlock generates a block of the specified fixed size with height information. +func fixedSizeBlock(t *testing.T, size int, height uint64) []byte { + require.Positive(t, size, "block size must be positive") + b := make([]byte, size) + + // Fill the beginning with height information for better testability + heightStr := fmt.Sprintf("block-height-%d-", height) + if len(heightStr) <= size { + copy(b, heightStr) + } + return b +} + func checkDatabaseState(t *testing.T, db *Database, maxHeight uint64, maxContiguousHeight uint64) { require.Equal(t, maxHeight, db.maxBlockHeight.Load(), "maxBlockHeight mismatch") gotMCH, ok := db.MaxContiguousHeight() @@ -53,3 +67,8 @@ func checkDatabaseState(t *testing.T, db *Database, maxHeight uint64, maxContigu require.Equal(t, maxContiguousHeight, gotMCH, "maxContiguousHeight mismatch") } } + +// Helper function to create a pointer to uint64 +func uint64Ptr(v uint64) *uint64 { + return &v +} diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go index 25342b43c376..ed9f729cb580 100644 --- a/x/blockdb/recovery.go +++ b/x/blockdb/recovery.go @@ -29,6 +29,21 @@ func (s *Database) recover() error { return nil } + // ensure no data files are missing + // If any data files are missing, we would need to recalculate the max height + // and max contiguous height. This can be supported in the future but for now + // to keep things simple, we will just error if the data files are not as expected. + if s.header.MaxDataFileSize > 0 { + // Ensure data files are sequential starting from 0 + for i := 0; i <= maxIndex; i++ { + if _, exists := dataFiles[i]; !exists { + return fmt.Errorf("%w: data file at index %d is missing", ErrCorrupted, i) + } + } + } else if len(dataFiles) > 1 || maxIndex > 1 { + return fmt.Errorf("%w: expect only 1 data file at index 0, got %d files with max index %d", ErrCorrupted, len(dataFiles), maxIndex) + } + // Calculate the expected next write offset based on the data on disk. var calculatedNextDataWriteOffset uint64 if s.header.MaxDataFileSize > 0 { @@ -57,7 +72,6 @@ func (s *Database) recover() error { } nextDataWriteOffset := s.nextDataWriteOffset.Load() - switch { case calculatedNextDataWriteOffset == nextDataWriteOffset: s.log.Debug("Recovery: data files match index header, no recovery needed.") diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index 133b2f36d044..6cee6042bb11 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -4,7 +4,6 @@ package blockdb import ( - "fmt" "os" "path/filepath" "testing" @@ -92,8 +91,7 @@ func TestRecovery_Success(t *testing.T) { for _, height := range blockHeights { // Create 4KB blocks - block := make([]byte, 4*1024) - copy(block, fmt.Appendf(nil, "block-data-for-height-%d", height)) + block := fixedSizeBlock(t, 4*1024, height) require.NoError(t, store.WriteBlock(height, block, 0)) blocks[height] = block @@ -111,13 +109,12 @@ func TestRecovery_Success(t *testing.T) { require.NoError(t, err) defer recoveredStore.Close() - // Verify all blocks are still readable + // Verify blocks are readable for _, height := range blockHeights { readBlock, err := recoveredStore.ReadBlock(height) require.NoError(t, err) require.Equal(t, blocks[height], readBlock, "block %d should be the same", height) } - checkDatabaseState(t, recoveredStore, 8, 3) }) } @@ -128,6 +125,8 @@ func TestRecovery_CorruptionDetection(t *testing.T) { name string blockHeights []uint64 minHeight uint64 + maxDataFileSize *uint64 + blockSize int // Optional: if set, creates fixed-size blocks instead of random setupCorruption func(store *Database, blocks [][]byte) error wantErr error wantErrText string @@ -180,7 +179,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) corruptedHeader := make([]byte, sizeOfBlockHeader) for i := range corruptedHeader { - corruptedHeader[i] = 0xFF // Invalid data + corruptedHeader[i] = 0xFF // Invalid header data } dataFilePath := store.dataFilePath(0) dataFile, err := os.OpenFile(dataFilePath, os.O_RDWR, 0) @@ -205,7 +204,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { bh := blockHeader{ Height: 1, Checksum: calculateChecksum(blocks[1]), - Size: uint32(len(blocks[1])) + 1, // too large + Size: uint32(len(blocks[1])) + 1, // make block larger than actual HeaderSize: 0, } return writeBlockHeader(store, secondBlockOffset, bh) @@ -243,7 +242,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { } defer dataFile.Close() - // Truncate to have only partial block data + // Truncate data file to have only partial block data truncateSize := int64(sizeOfBlockHeader) + int64(len(blocks[0]))/2 return dataFile.Truncate(truncateSize) }, @@ -260,7 +259,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { } secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) bh := blockHeader{ - Height: 5, // Below minimum height of 10 + Height: 5, // Invalid height because its below the minimum height of 10 Checksum: calculateChecksum(blocks[1]), Size: uint32(len(blocks[1])), HeaderSize: 0, @@ -270,6 +269,41 @@ func TestRecovery_CorruptionDetection(t *testing.T) { wantErr: ErrCorrupted, wantErrText: "invalid block height in header", }, + { + name: "missing data file at index 1", + blockHeights: []uint64{0, 1, 2, 3, 4, 5}, + maxDataFileSize: uint64Ptr(1024), // 1KB per file to force multiple files + blockSize: 512, // 512 bytes per block + setupCorruption: func(store *Database, _ [][]byte) error { + // Delete the second data file (index 1) + dataFilePath := store.dataFilePath(1) + return os.Remove(dataFilePath) + }, + wantErr: ErrCorrupted, + wantErrText: "data file at index 1 is missing", + }, + { + name: "unexpected multiple data files when MaxDataFileSize is 0", + blockHeights: []uint64{0, 1, 2}, + maxDataFileSize: uint64Ptr(0), // Single file mode + blockSize: 512, // 512 bytes per block + setupCorruption: func(store *Database, _ [][]byte) error { + // Manually create a second data file to simulate corruption + secondDataFilePath := store.dataFilePath(1) + secondDataFile, err := os.Create(secondDataFilePath) + if err != nil { + return err + } + defer secondDataFile.Close() + + // Write some dummy data to the second file + dummyData := []byte("dummy data file") + _, err = secondDataFile.Write(dummyData) + return err + }, + wantErr: ErrCorrupted, + wantErrText: "expect only 1 data file at index 0, got 2 files with max index 1", + }, } for _, tt := range tests { @@ -278,6 +312,9 @@ func TestRecovery_CorruptionDetection(t *testing.T) { if tt.minHeight > 0 { config = config.WithMinimumHeight(tt.minHeight) } + if tt.maxDataFileSize != nil { + config = config.WithMaxDataFileSize(*tt.maxDataFileSize) + } store, cleanup := newTestDatabase(t, config) defer cleanup() @@ -285,12 +322,16 @@ func TestRecovery_CorruptionDetection(t *testing.T) { // Setup blocks blocks := make([][]byte, len(tt.blockHeights)) for i, height := range tt.blockHeights { - blocks[i] = randomBlock(t) + if tt.blockSize > 0 { + blocks[i] = fixedSizeBlock(t, tt.blockSize, height) + } else { + blocks[i] = randomBlock(t) + } require.NoError(t, store.WriteBlock(height, blocks[i], 0)) } require.NoError(t, store.Close()) - // Apply corruption + // Apply corruption logic require.NoError(t, tt.setupCorruption(store, blocks)) // Try to reopen the database - it should detect corruption diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 2bac15ad566b..44916e1cadc8 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -329,18 +329,6 @@ func TestWriteBlock_Errors(t *testing.T) { headerSize: 0, wantErr: safemath.ErrOverflow, }, - { - name: "max data files exceeded", - height: 0, - block: make([]byte, 1), - config: DefaultDatabaseConfig().WithMaxDataFileSize(100), - setup: func(db *Database) { - // set next data write offset to the max to trigger max data files exceeded error - db.nextDataWriteOffset.Store(uint64(MaxDataFiles) * 100) - }, - headerSize: 0, - wantErr: ErrMaxDataFilesExceeded, - }, } for _, tt := range tests { From 718559b646d6cac30da7979afa662acaa0aeb746 Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 30 Jun 2025 11:57:13 -0400 Subject: [PATCH 10/27] refactor: use t.TempDir --- x/blockdb/database_test.go | 22 ++++++---------------- x/blockdb/helpers_test.go | 11 +++-------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 539eb8684826..50288052d2c8 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -19,9 +19,7 @@ import ( func TestNew_Truncate(t *testing.T) { // Create initial database - tempDir, err := os.MkdirTemp("", "blockdb_truncate_test_*") - require.NoError(t, err) - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") config := DefaultDatabaseConfig().WithTruncate(true) @@ -47,9 +45,7 @@ func TestNew_Truncate(t *testing.T) { } func TestNew_NoTruncate(t *testing.T) { - tempDir, err := os.MkdirTemp("", "blockdb_no_truncate_test_*") - require.NoError(t, err) - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") config := DefaultDatabaseConfig().WithTruncate(true) @@ -84,9 +80,7 @@ func TestNew_NoTruncate(t *testing.T) { } func TestNew_Params(t *testing.T) { - tempDir, err := os.MkdirTemp("", "blockdb_test_*") - require.NoError(t, err) - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() tests := []struct { name string indexDir string @@ -177,7 +171,7 @@ func TestNew_IndexFileErrors(t *testing.T) { { name: "corrupted index file", setup: func() (string, string) { - tempDir, _ := os.MkdirTemp("", "blockdb_test_*") + tempDir := t.TempDir() indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") require.NoError(t, os.MkdirAll(indexDir, 0o755)) @@ -195,7 +189,7 @@ func TestNew_IndexFileErrors(t *testing.T) { { name: "version mismatch in existing index file", setup: func() (string, string) { - tempDir, _ := os.MkdirTemp("", "blockdb_test_*") + tempDir := t.TempDir() indexDir := filepath.Join(tempDir, "index") dataDir := filepath.Join(tempDir, "data") @@ -230,8 +224,6 @@ func TestNew_IndexFileErrors(t *testing.T) { if indexDir == "" || dataDir == "" { t.Skip("Setup failed, skipping test") } - defer os.RemoveAll(filepath.Dir(indexDir)) - defer os.RemoveAll(filepath.Dir(dataDir)) _, err := New(indexDir, dataDir, DefaultDatabaseConfig(), logging.NoLog{}) require.Contains(t, err.Error(), tt.wantErrMsg) @@ -248,9 +240,7 @@ func TestIndexFileHeaderAlignment(t *testing.T) { func TestNew_IndexFileConfigPrecedence(t *testing.T) { // set up db initialConfig := DefaultDatabaseConfig().WithMinimumHeight(100).WithMaxDataFileSize(1024 * 1024) - tempDir, err := os.MkdirTemp("", "blockdb_config_precedence_test_*") - require.NoError(t, err) - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() db, err := New(tempDir, tempDir, initialConfig, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index f00cf5c9559c..8595d27ae66d 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -7,7 +7,6 @@ import ( "crypto/rand" "fmt" "math/big" - "os" "path/filepath" "testing" @@ -18,19 +17,15 @@ import ( func newTestDatabase(t *testing.T, opts DatabaseConfig) (*Database, func()) { t.Helper() - dir, err := os.MkdirTemp("", "blockdb_test_*") - require.NoError(t, err, "failed to create temp dir") + dir := t.TempDir() idxDir := filepath.Join(dir, "idx") dataDir := filepath.Join(dir, "dat") db, err := New(idxDir, dataDir, opts, logging.NoLog{}) - if err != nil { - os.RemoveAll(dir) - require.NoError(t, err, "failed to create database") - } + require.NoError(t, err, "failed to create database") + cleanup := func() { db.Close() - os.RemoveAll(dir) } return db, cleanup } From b3b797c46c3c21faf365776bb6c85e92b5f99bc4 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 12:22:02 -0400 Subject: [PATCH 11/27] refactor: move database methods to database.go --- x/blockdb/README.md | 4 +- x/blockdb/block.go | 419 --------------- x/blockdb/config.go | 2 +- x/blockdb/database.go | 971 ++++++++++++++++++++++++++++++++--- x/blockdb/database_test.go | 5 +- x/blockdb/errors.go | 1 + x/blockdb/index.go | 191 ------- x/blockdb/readblock_test.go | 97 ++-- x/blockdb/recovery.go | 214 -------- x/blockdb/recovery_test.go | 9 +- x/blockdb/writeblock_test.go | 8 +- 11 files changed, 979 insertions(+), 942 deletions(-) delete mode 100644 x/blockdb/block.go delete mode 100644 x/blockdb/index.go delete mode 100644 x/blockdb/recovery.go diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 9d8b67a9473f..ad737e1a9589 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -118,8 +118,8 @@ On startup, BlockDB checks for signs of an unclean shutdown by comparing the dat 2. For each unindexed block found: - Validates the block header and checksum - Writes the corresponding index entry -3. Updates the max contiguous height and max block height -4. Persists the updated index header +3. Calculates the max contiguous height and max block height +4. Updates the index header with the updated max contiguous height, max block height, and next write offset ## Usage diff --git a/x/blockdb/block.go b/x/blockdb/block.go deleted file mode 100644 index 081a990e1cab..000000000000 --- a/x/blockdb/block.go +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package blockdb - -import ( - "encoding" - "encoding/binary" - "errors" - "fmt" - "io" - "os" - - "github.com/cespare/xxhash/v2" - "go.uber.org/zap" - - safemath "github.com/ava-labs/avalanchego/utils/math" -) - -var ( - _ encoding.BinaryMarshaler = (*blockHeader)(nil) - _ encoding.BinaryUnmarshaler = (*blockHeader)(nil) - - sizeOfBlockHeader = uint32(binary.Size(blockHeader{})) -) - -// BlockHeight defines the type for block heights. -type BlockHeight = uint64 - -// BlockData defines the type for block data. -type BlockData = []byte - -// BlockHeaderSize is the size of the header in the block data. -type BlockHeaderSize = uint32 - -// MaxBlockDataSize is the maximum size of a block in bytes (16 MB). -const MaxBlockDataSize = 1 << 24 - -// blockHeader is prepended to each block in the data file. -type blockHeader struct { - Height BlockHeight - Checksum uint64 - Size uint32 - HeaderSize BlockHeaderSize -} - -// MarshalBinary implements the encoding.BinaryMarshaler interface. -func (bh blockHeader) MarshalBinary() ([]byte, error) { - buf := make([]byte, sizeOfBlockHeader) - binary.LittleEndian.PutUint64(buf[0:], bh.Height) - binary.LittleEndian.PutUint64(buf[8:], bh.Checksum) - binary.LittleEndian.PutUint32(buf[16:], bh.Size) - binary.LittleEndian.PutUint32(buf[20:], bh.HeaderSize) - return buf, nil -} - -// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. -func (bh *blockHeader) UnmarshalBinary(data []byte) error { - if len(data) != int(sizeOfBlockHeader) { - return fmt.Errorf("incorrect data length to unmarshal blockHeader: got %d bytes, need exactly %d", len(data), sizeOfBlockHeader) - } - bh.Height = binary.LittleEndian.Uint64(data[0:]) - bh.Checksum = binary.LittleEndian.Uint64(data[8:]) - bh.Size = binary.LittleEndian.Uint32(data[16:]) - bh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) - return nil -} - -// WriteBlock inserts a block into the store at the given height with the specified header size. -func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize BlockHeaderSize) error { - s.closeMu.RLock() - defer s.closeMu.RUnlock() - - if s.closed { - return ErrDatabaseClosed - } - - blockDataLen := uint32(len(block)) - if blockDataLen == 0 { - return ErrBlockEmpty - } - - if blockDataLen > MaxBlockDataSize { - return ErrBlockTooLarge - } - - if headerSize >= blockDataLen { - return ErrHeaderSizeTooLarge - } - - indexFileOffset, err := s.indexEntryOffset(height) - if err != nil { - return err - } - - sizeWithDataHeader, err := safemath.Add(sizeOfBlockHeader, blockDataLen) - if err != nil { - return fmt.Errorf("calculating total block size would overflow for block at height %d: %w", height, err) - } - writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) - if err != nil { - return err - } - - bh := blockHeader{ - Height: height, - Size: blockDataLen, - HeaderSize: headerSize, - Checksum: calculateChecksum(block), - } - if err := s.writeBlockAt(writeDataOffset, bh, block); err != nil { - return err - } - - if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen, headerSize); err != nil { - return err - } - - return s.updateBlockHeights(height) -} - -// ReadBlock retrieves a block by its height. -// Returns nil if the block is not found. -func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { - s.closeMu.RLock() - defer s.closeMu.RUnlock() - - if s.closed { - return nil, ErrDatabaseClosed - } - - indexEntry, err := s.readIndexEntry(height) - if err != nil { - return nil, err - } - if indexEntry.IsEmpty() { - return nil, nil - } - - // Read the complete block data - blockData := make(BlockData, indexEntry.Size) - dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) - if err != nil { - return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) - } - _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockHeader))) - if err != nil { - if errors.Is(err, io.EOF) { - return nil, nil - } - return nil, fmt.Errorf("failed to read block data from data file: %w", err) - } - - return blockData, nil -} - -// ReadHeader retrieves only the header portion of a block by its height. -// Returns nil if the block is not found or no header. -func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { - s.closeMu.RLock() - defer s.closeMu.RUnlock() - - if s.closed { - return nil, ErrDatabaseClosed - } - - indexEntry, err := s.readIndexEntry(height) - if err != nil { - return nil, err - } - if indexEntry.IsEmpty() { - return nil, nil - } - - // Return nil if there's no header data - if indexEntry.HeaderSize == 0 { - return nil, nil - } - - // Validate header size doesn't exceed total block size - if indexEntry.HeaderSize > indexEntry.Size { - return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) - } - - // Read only the header portion - headerData := make([]byte, indexEntry.HeaderSize) - dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) - if err != nil { - return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) - } - _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockHeader))) - if err != nil { - if errors.Is(err, io.EOF) { - return nil, nil - } - return nil, fmt.Errorf("failed to read block header data from data file: %w", err) - } - - return headerData, nil -} - -// ReadBody retrieves only the body portion (excluding header) of a block by its height. -// Returns nil if the block is not found. -func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { - s.closeMu.RLock() - defer s.closeMu.RUnlock() - - if s.closed { - return nil, ErrDatabaseClosed - } - - indexEntry, err := s.readIndexEntry(height) - if err != nil { - return nil, err - } - if indexEntry.IsEmpty() { - return nil, nil - } - - bodySize := indexEntry.Size - indexEntry.HeaderSize - bodyData := make([]byte, bodySize) - dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) - if err != nil { - return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) - } - headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) - if err != nil { - return nil, fmt.Errorf("calculating header offset would overflow for block at height %d: %w", height, err) - } - bodyOffset, err := safemath.Add(headerOffset, uint64(indexEntry.HeaderSize)) - if err != nil { - return nil, fmt.Errorf("calculating body offset would overflow for block at height %d: %w", height, err) - } - - _, err = dataFile.ReadAt(bodyData, int64(bodyOffset)) - if err != nil { - if errors.Is(err, io.EOF) { - return nil, nil - } - return nil, fmt.Errorf("failed to read block body data from data file: %w", err) - } - return bodyData, nil -} - -func calculateChecksum(data []byte) uint64 { - return xxhash.Sum64(data) -} - -func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) error { - headerBytes, err := bh.MarshalBinary() - if err != nil { - return fmt.Errorf("failed to serialize block header: %w", err) - } - - dataFile, localOffset, err := s.getDataFileAndOffset(offset) - if err != nil { - return fmt.Errorf("failed to get data file for writing block %d: %w", bh.Height, err) - } - - // Allocate combined buffer for header and block data and write it to the data file - combinedBufSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(len(block))) - if err != nil { - return fmt.Errorf("calculating combined buffer size would overflow for block %d: %w", bh.Height, err) - } - combinedBuf := make([]byte, combinedBufSize) - copy(combinedBuf, headerBytes) - copy(combinedBuf[sizeOfBlockHeader:], block) - if _, err := dataFile.WriteAt(combinedBuf, int64(localOffset)); err != nil { - return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) - } - - if s.options.SyncToDisk { - if err := dataFile.Sync(); err != nil { - return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) - } - } - return nil -} - -func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { - prevContiguousCandidate := uint64(unsetHeight) - if writtenBlockHeight > s.header.MinHeight { - prevContiguousCandidate = writtenBlockHeight - 1 - } - - if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { - currentMax := writtenBlockHeight - for { - nextHeightToVerify, err := safemath.Add(currentMax, 1) - if err != nil { - s.log.Error("overflow in height calculation when updating max contiguous height") - break - } - entry, err := s.readIndexEntry(nextHeightToVerify) - if err != nil { - s.log.Error( - "error reading index entry when updating max contiguous height", - zap.Uint64("height", nextHeightToVerify), - zap.Error(err), - ) - break - } - if entry.IsEmpty() { - break - } - if !s.maxContiguousHeight.CompareAndSwap(currentMax, nextHeightToVerify) { - break // Someone else updated - } - currentMax = nextHeightToVerify - } - } - - // update max block height and persist header on checkpoint interval - var oldMaxHeight BlockHeight - for { - oldMaxHeight = s.maxBlockHeight.Load() - if writtenBlockHeight <= oldMaxHeight && oldMaxHeight != unsetHeight { - break - } - if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { - if writtenBlockHeight%s.options.CheckpointInterval == 0 { - if err := s.persistIndexHeader(); err != nil { - return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) - } - } - break - } - } - return nil -} - -// allocateBlockSpace reserves space for a block and returns the data file offset where it should be written. -// -// This function atomically reserves space by updating the nextWriteOffset and handles -// file splitting by advancing the nextWriteOffset when a data file would be exceeded. -// -// Parameters: -// - totalSize: The total size in bytes needed for the block -// -// Returns: -// - writeDataOffset: The data file offset where the block should be written -// - err: Error if allocation fails (e.g., block too large, overflow, etc.) -func (s *Database) allocateBlockSpace(totalSize uint32) (writeDataOffset uint64, err error) { - maxDataFileSize := s.header.MaxDataFileSize - - // Check if a single block would exceed the max data file size - if maxDataFileSize > 0 && uint64(totalSize) > maxDataFileSize { - return 0, ErrBlockTooLarge - } - - for { - currentOffset := s.nextDataWriteOffset.Load() - - // Calculate where this block would end if written at current offset - blockEndOffset, err := safemath.Add(currentOffset, uint64(totalSize)) - if err != nil { - return 0, fmt.Errorf( - "adding block of size %d to offset %d would overflow uint64 data file pointer: %w", - totalSize, currentOffset, err, - ) - } - - // Determine the actual write offset for this block, taking into account - // data file splitting when max data file size is reached. - actualWriteOffset := currentOffset - actualBlockEndOffset := blockEndOffset - - // If we have a max file size, check if we need to start a new file - if maxDataFileSize > 0 { - currentFileIndex := int(currentOffset / maxDataFileSize) - offsetWithinCurrentFile := currentOffset % maxDataFileSize - - // Check if this block would span across file boundaries - blockEndWithinFile, err := safemath.Add(offsetWithinCurrentFile, uint64(totalSize)) - if err != nil { - return 0, fmt.Errorf( - "calculating block end within file would overflow: %w", - err, - ) - } - if blockEndWithinFile > maxDataFileSize { - // Advance the current write offset to the start of the next file since - // it would exceed the current file size. - nextFileStartOffset, err := safemath.Mul(uint64(currentFileIndex+1), maxDataFileSize) - if err != nil { - return 0, fmt.Errorf( - "calculating next file offset would overflow: %w", - err, - ) - } - actualWriteOffset = nextFileStartOffset - - // Recalculate the end offset for the block space to set the next write offset - if actualBlockEndOffset, err = safemath.Add(actualWriteOffset, uint64(totalSize)); err != nil { - return 0, fmt.Errorf( - "adding block of size %d to new file offset %d would overflow: %w", - totalSize, actualWriteOffset, err, - ) - } - } - } - - if s.nextDataWriteOffset.CompareAndSwap(currentOffset, actualBlockEndOffset) { - return actualWriteOffset, nil - } - } -} - -func (s *Database) getDataFileAndOffset(globalOffset uint64) (*os.File, uint64, error) { - maxFileSize := s.header.MaxDataFileSize - if maxFileSize == 0 { - handle, err := s.getOrOpenDataFile(0) - return handle, globalOffset, err - } - fileIndex := int(globalOffset / maxFileSize) - localOffset := globalOffset % maxFileSize - handle, err := s.getOrOpenDataFile(fileIndex) - return handle, localOffset, err -} diff --git a/x/blockdb/config.go b/x/blockdb/config.go index b91b15fc8eb1..2726a15269b0 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -17,7 +17,7 @@ type DatabaseConfig struct { // MinimumHeight is the lowest block height tracked by the database. MinimumHeight uint64 - // MaxDataFileSize sets the maximum size of the data block file in bytes. If 0, there is no limit. + // MaxDataFileSize sets the maximum size of the data block file in bytes. MaxDataFileSize uint64 // CheckpointInterval defines how frequently (in blocks) the index file header is updated (default: 1024). diff --git a/x/blockdb/database.go b/x/blockdb/database.go index f826f94030e8..f2cfa2b2531d 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -4,8 +4,11 @@ package blockdb import ( + "encoding" + "encoding/binary" "errors" "fmt" + "io" "math" "os" "path/filepath" @@ -14,6 +17,9 @@ import ( "github.com/ava-labs/avalanchego/cache/lru" "github.com/ava-labs/avalanchego/utils/logging" + safemath "github.com/ava-labs/avalanchego/utils/math" + "github.com/cespare/xxhash/v2" + "go.uber.org/zap" ) const ( @@ -22,10 +28,145 @@ const ( defaultFilePermissions = 0o666 // Since 0 is a valid height, math.MaxUint64 is used to indicate unset height. - // It is not be possible for block height to be max uint64 as it would overflow the index entry offset + // It is not possible for block height to be max uint64 as it would overflow the index entry offset unsetHeight = math.MaxUint64 + + // IndexFileVersion is the version of the index file format. + IndexFileVersion uint64 = 1 + + // MaxBlockDataSize is the maximum size of a block in bytes (16 MB). + MaxBlockDataSize = 1 << 24 ) +// BlockHeight defines the type for block heights. +type BlockHeight = uint64 + +// BlockData defines the type for block data. +type BlockData = []byte + +// BlockHeaderSize is the size of the header in the block data. +type BlockHeaderSize = uint32 + +var ( + _ encoding.BinaryMarshaler = (*blockHeader)(nil) + _ encoding.BinaryUnmarshaler = (*blockHeader)(nil) + _ encoding.BinaryMarshaler = (*indexEntry)(nil) + _ encoding.BinaryUnmarshaler = (*indexEntry)(nil) + _ encoding.BinaryMarshaler = (*indexFileHeader)(nil) + _ encoding.BinaryUnmarshaler = (*indexFileHeader)(nil) + + sizeOfBlockHeader = uint32(binary.Size(blockHeader{})) + sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) + sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) +) + +// blockHeader is prepended to each block in the data file. +type blockHeader struct { + Height BlockHeight + Checksum uint64 + Size uint32 + HeaderSize BlockHeaderSize +} + +// MarshalBinary implements the encoding.BinaryMarshaler interface. +func (bh blockHeader) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfBlockHeader) + binary.LittleEndian.PutUint64(buf[0:], bh.Height) + binary.LittleEndian.PutUint64(buf[8:], bh.Checksum) + binary.LittleEndian.PutUint32(buf[16:], bh.Size) + binary.LittleEndian.PutUint32(buf[20:], bh.HeaderSize) + return buf, nil +} + +// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. +func (bh *blockHeader) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfBlockHeader) { + return fmt.Errorf("%w: incorrect data length to unmarshal blockHeader: got %d bytes, need exactly %d", ErrCorrupted, len(data), sizeOfBlockHeader) + } + bh.Height = binary.LittleEndian.Uint64(data[0:]) + bh.Checksum = binary.LittleEndian.Uint64(data[8:]) + bh.Size = binary.LittleEndian.Uint32(data[16:]) + bh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) + return nil +} + +// indexEntry represents an entry in the index file. +type indexEntry struct { + // Offset is the byte offset in the data file where the block's header starts. + Offset uint64 + // Size is the length in bytes of the block's data (excluding the blockHeader). + Size uint32 + // HeaderSize is the size in bytes of the block's header portion within the data. + HeaderSize BlockHeaderSize +} + +// IsEmpty returns true if this entry is uninitialized. +// This indicates a slot where no block has been written. +func (e indexEntry) IsEmpty() bool { + return e.Offset == 0 && e.Size == 0 +} + +// MarshalBinary implements encoding.BinaryMarshaler for indexEntry. +func (e indexEntry) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfIndexEntry) + binary.LittleEndian.PutUint64(buf[0:], e.Offset) + binary.LittleEndian.PutUint32(buf[8:], e.Size) + binary.LittleEndian.PutUint32(buf[12:], e.HeaderSize) + return buf, nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler for indexEntry. +func (e *indexEntry) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfIndexEntry) { + return fmt.Errorf("%w: incorrect data length to unmarshal indexEntry: got %d bytes, need exactly %d", ErrCorrupted, len(data), sizeOfIndexEntry) + } + e.Offset = binary.LittleEndian.Uint64(data[0:]) + e.Size = binary.LittleEndian.Uint32(data[8:]) + e.HeaderSize = binary.LittleEndian.Uint32(data[12:]) + return nil +} + +// indexFileHeader is the header of the index file. +type indexFileHeader struct { + Version uint64 + MaxDataFileSize uint64 + MinHeight BlockHeight + MaxContiguousHeight BlockHeight + MaxHeight BlockHeight + NextWriteOffset uint64 + // reserve 32 bytes for future use + Reserved [32]byte +} + +// MarshalBinary implements encoding.BinaryMarshaler for indexFileHeader. +func (h indexFileHeader) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfIndexFileHeader) + binary.LittleEndian.PutUint64(buf[0:], h.Version) + binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) + binary.LittleEndian.PutUint64(buf[16:], h.MinHeight) + binary.LittleEndian.PutUint64(buf[24:], h.MaxContiguousHeight) + binary.LittleEndian.PutUint64(buf[32:], h.MaxHeight) + binary.LittleEndian.PutUint64(buf[40:], h.NextWriteOffset) + return buf, nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler for indexFileHeader. +func (h *indexFileHeader) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfIndexFileHeader) { + return fmt.Errorf( + "%w: incorrect data length to unmarshal indexFileHeader: got %d bytes, need exactly %d", + ErrCorrupted, len(data), sizeOfIndexFileHeader, + ) + } + h.Version = binary.LittleEndian.Uint64(data[0:]) + h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) + h.MinHeight = binary.LittleEndian.Uint64(data[16:]) + h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[24:]) + h.MaxHeight = binary.LittleEndian.Uint64(data[32:]) + h.NextWriteOffset = binary.LittleEndian.Uint64(data[40:]) + return nil +} + // Database stores blockchain blocks on disk and provides methods to read, and write blocks. type Database struct { indexFile *os.File @@ -51,6 +192,556 @@ type Database struct { maxContiguousHeight atomic.Uint64 } +// New creates a block database. +// Parameters: +// - indexDir: Directory for the index file +// - dataDir: Directory for the data file(s) +// - config: Configuration parameters +// - log: Logger instance for structured logging +func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (*Database, error) { + if indexDir == "" || dataDir == "" { + return nil, errors.New("both indexDir and dataDir must be provided") + } + + if err := config.Validate(); err != nil { + return nil, err + } + + s := &Database{ + options: config, + log: log, + fileCache: lru.NewCache[int, *os.File](MaxDataFiles), + } + s.fileCache.SetOnEvict(func(_ int, f *os.File) { + if f != nil { + f.Close() + } + }) + + if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { + return nil, err + } + + if err := s.initializeDataFiles(dataDir, config.Truncate); err != nil { + s.closeFiles() + return nil, err + } + + if !config.Truncate { + if err := s.recover(); err != nil { + s.closeFiles() + return nil, fmt.Errorf("recovery failed: %w", err) + } + } + return s, nil +} + +// MaxContiguousHeight returns the highest block height known to be contiguously stored. +func (s *Database) MaxContiguousHeight() (height BlockHeight, found bool) { + if s.maxContiguousHeight.Load() == unsetHeight { + return 0, false + } + return s.maxContiguousHeight.Load(), true +} + +// Close flushes pending writes and closes the store files. +func (s *Database) Close() error { + s.closeMu.Lock() + defer s.closeMu.Unlock() + + if s.closed { + return nil + } + s.closed = true + + err := s.persistIndexHeader() + s.closeFiles() + return err +} + +// WriteBlock inserts a block into the store at the given height with the specified header size. +func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize BlockHeaderSize) error { + s.closeMu.RLock() + defer s.closeMu.RUnlock() + + if s.closed { + return ErrDatabaseClosed + } + + blockDataLen := uint32(len(block)) + if blockDataLen == 0 { + return ErrBlockEmpty + } + + if blockDataLen > MaxBlockDataSize { + return ErrBlockTooLarge + } + + if headerSize >= blockDataLen { + return ErrHeaderSizeTooLarge + } + + if height < s.header.MinHeight { + return fmt.Errorf("%w: cannot write block at height %d, minimum height is %d", ErrInvalidBlockHeight, height, s.header.MinHeight) + } + + indexFileOffset, err := s.indexEntryOffset(height) + if err != nil { + return err + } + + sizeWithDataHeader, err := safemath.Add(sizeOfBlockHeader, blockDataLen) + if err != nil { + return fmt.Errorf("calculating total block size would overflow for block at height %d: %w", height, err) + } + writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) + if err != nil { + return err + } + + bh := blockHeader{ + Height: height, + Size: blockDataLen, + HeaderSize: headerSize, + Checksum: calculateChecksum(block), + } + if err := s.writeBlockAt(writeDataOffset, bh, block); err != nil { + return err + } + + if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen, headerSize); err != nil { + return err + } + + return s.updateBlockHeights(height) +} + +// readBlockIndex reads the index entry for the given height. +// It returns ErrBlockNotFound if the block does not exist. +func (s *Database) readBlockIndex(height BlockHeight) (indexEntry, error) { + var entry indexEntry + if s.closed { + return entry, ErrDatabaseClosed + } + + // Skip the index entry read if we know the block is past the max height. + maxHeight := s.maxBlockHeight.Load() + if maxHeight == unsetHeight || height > maxHeight { + return entry, ErrBlockNotFound + } + + entry, err := s.readIndexEntry(height) + if err != nil { + return entry, err + } + + if entry.IsEmpty() { + return entry, ErrBlockNotFound + } + + return entry, nil +} + +// ReadBlock retrieves a block by its height. +// Returns ErrBlockNotFound if the block is not found. +func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { + s.closeMu.RLock() + defer s.closeMu.RUnlock() + + indexEntry, err := s.readBlockIndex(height) + if err != nil { + return nil, err + } + + // Read the complete block data + blockData := make(BlockData, indexEntry.Size) + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) + } + _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockHeader))) + if err != nil { + return nil, fmt.Errorf("failed to read block data from data file: %w", err) + } + + return blockData, nil +} + +// ReadHeader retrieves only the header portion of a block by its height. +// Returns ErrBlockNotFound if the block is not found, or nil if no header. +func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { + s.closeMu.RLock() + defer s.closeMu.RUnlock() + + indexEntry, err := s.readBlockIndex(height) + if err != nil { + return nil, err + } + + // Return nil if there's no header data + if indexEntry.HeaderSize == 0 { + return nil, nil + } + + // Validate header size doesn't exceed total block size + if indexEntry.HeaderSize > indexEntry.Size { + return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) + } + + // Read only the header portion + headerData := make([]byte, indexEntry.HeaderSize) + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) + } + _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockHeader))) + if err != nil { + return nil, fmt.Errorf("failed to read block header data from data file: %w", err) + } + + return headerData, nil +} + +// ReadBody retrieves only the body portion (excluding header) of a block by its height. +// Returns ErrBlockNotFound if the block is not found. +func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { + s.closeMu.RLock() + defer s.closeMu.RUnlock() + + indexEntry, err := s.readBlockIndex(height) + if err != nil { + return nil, err + } + + bodySize := indexEntry.Size - indexEntry.HeaderSize + bodyData := make([]byte, bodySize) + dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) + if err != nil { + return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) + } + headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + if err != nil { + return nil, fmt.Errorf("calculating header offset would overflow for block at height %d: %w", height, err) + } + bodyOffset, err := safemath.Add(headerOffset, uint64(indexEntry.HeaderSize)) + if err != nil { + return nil, fmt.Errorf("calculating body offset would overflow for block at height %d: %w", height, err) + } + + _, err = dataFile.ReadAt(bodyData, int64(bodyOffset)) + if err != nil { + return nil, fmt.Errorf("failed to read block body data from data file: %w", err) + } + return bodyData, nil +} + +// HasBlock checks if a block exists at the given height. +func (s *Database) HasBlock(height BlockHeight) (bool, error) { + s.closeMu.RLock() + defer s.closeMu.RUnlock() + + _, err := s.readBlockIndex(height) + if err != nil { + if errors.Is(err, ErrBlockNotFound) { + return false, nil + } + return false, err + } + return true, nil +} + +func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { + relativeHeight := height - s.header.MinHeight + offsetFromHeaderStart, err := safemath.Mul(relativeHeight, sizeOfIndexEntry) + if err != nil { + return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) + } + finalOffset, err := safemath.Add(sizeOfIndexFileHeader, offsetFromHeaderStart) + if err != nil { + return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) + } + + return finalOffset, nil +} + +// readIndexEntry reads the index entry for the given height from the index file. +// Returns ErrBlockNotFound if the block does not exist. +func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { + var entry indexEntry + + if height < s.header.MinHeight { + return entry, fmt.Errorf("%w: cannot read block at height %d, minimum height is %d", ErrInvalidBlockHeight, height, s.header.MinHeight) + } + + offset, err := s.indexEntryOffset(height) + if err != nil { + return entry, err + } + + buf := make([]byte, sizeOfIndexEntry) + _, err = s.indexFile.ReadAt(buf, int64(offset)) + if err != nil { + // Return ErrBlockNotFound if trying to read past the end of the index file + // for a block that has not been indexed yet. + if errors.Is(err, io.EOF) { + return entry, ErrBlockNotFound + } + return entry, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) + } + if err := entry.UnmarshalBinary(buf); err != nil { + return entry, fmt.Errorf("failed to deserialize index entry for height %d: %w", height, err) + } + + if entry.IsEmpty() { + return entry, ErrBlockNotFound + } + + return entry, nil +} + +func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset uint64, blockDataLen uint32, headerSize BlockHeaderSize) error { + indexEntry := indexEntry{ + Offset: dataFileBlockOffset, + Size: blockDataLen, + HeaderSize: headerSize, + } + + entryBytes, err := indexEntry.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize index entry: %w", err) + } + + if _, err := s.indexFile.WriteAt(entryBytes, int64(indexFileOffset)); err != nil { + return fmt.Errorf("failed to write index entry: %w", err) + } + return nil +} + +func (s *Database) persistIndexHeader() error { + // The index file must be fsync'd before the header is written to prevent + // a state where the header is persisted but the index entries it refers to + // are not. This could lead to data inconsistency on recovery. + if s.options.SyncToDisk { + if err := s.indexFile.Sync(); err != nil { + return fmt.Errorf("failed to sync index file before writing header state: %w", err) + } + } + + header := s.header + + // Update the header with the current state of the database. + // Note: These atomic reads may occur at different times, potentially creating + // inconsistency where MaxContiguousHeight or MaxBlockHeight are higher than + // what NextWriteOffset indicates. This is safe because recovery will: + // 1. Use NextWriteOffset to determine where to start scanning + // 2. Re-index any unindexed blocks found beyond that point + // 3. Call updateBlockHeights() for each recovered block, which properly + // updates both MaxContiguousHeight and MaxBlockHeight atomically + header.NextWriteOffset = s.nextDataWriteOffset.Load() + header.MaxContiguousHeight = s.maxContiguousHeight.Load() + header.MaxHeight = s.maxBlockHeight.Load() + headerBytes, err := header.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize header for writing state: %w", err) + } + if uint64(len(headerBytes)) != sizeOfIndexFileHeader { + return fmt.Errorf("internal error: serialized header state size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) + } + + if _, err := s.indexFile.WriteAt(headerBytes, 0); err != nil { + return fmt.Errorf("failed to write header state to index file: %w", err) + } + return nil +} + +// recover detects and recovers unindexed blocks by scanning data files and updating the index. +// It compares the actual data file sizes on disk with the indexed data size to detect +// blocks that were written but not properly indexed. +// For each unindexed block found, it validates the block, then +// writes the corresponding index entry and updates block height tracking. +func (s *Database) recover() error { + dataFiles, maxIndex, err := s.listDataFiles() + if err != nil { + return fmt.Errorf("failed to list data files for recovery: %w", err) + } + + if len(dataFiles) == 0 { + return nil + } + + if s.header.MaxDataFileSize == math.MaxUint64 && len(dataFiles) > 1 { + return fmt.Errorf("%w: only one data file expected when MaxDataFileSize is max uint64, got %d files with max index %d", ErrCorrupted, len(dataFiles), maxIndex) + } + + // ensure no data files are missing + // If any data files are missing, we would need to recalculate the max height + // and max contiguous height. This can be supported in the future but for now + // to keep things simple, we will just error if the data files are not as expected. + for i := 0; i <= maxIndex; i++ { + if _, exists := dataFiles[i]; !exists { + return fmt.Errorf("%w: data file at index %d is missing", ErrCorrupted, i) + } + } + + // Calculate the expected next write offset based on the data on disk. + var calculatedNextDataWriteOffset uint64 + fileSizeContribution, err := safemath.Mul(uint64(maxIndex), s.header.MaxDataFileSize) + if err != nil { + return fmt.Errorf("calculating file size contribution would overflow: %w", err) + } + calculatedNextDataWriteOffset = fileSizeContribution + + lastFileInfo, err := os.Stat(dataFiles[maxIndex]) + if err != nil { + return fmt.Errorf("failed to get stats for last data file %s: %w", dataFiles[maxIndex], err) + } + calculatedNextDataWriteOffset, err = safemath.Add(calculatedNextDataWriteOffset, uint64(lastFileInfo.Size())) + if err != nil { + return fmt.Errorf("adding last file size would overflow: %w", err) + } + + nextDataWriteOffset := s.nextDataWriteOffset.Load() + switch { + case calculatedNextDataWriteOffset == nextDataWriteOffset: + s.log.Debug("Recovery: data files match index header, no recovery needed.") + return nil + + case calculatedNextDataWriteOffset < nextDataWriteOffset: + // this happens when the index claims to have more data than is actually on disk + return fmt.Errorf("%w: index header claims to have more data than is actually on disk "+ + "(calculated: %d bytes, index header: %d bytes)", + ErrCorrupted, calculatedNextDataWriteOffset, nextDataWriteOffset) + default: + // The data on disk is ahead of the index. We need to recover un-indexed blocks. + s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", + zap.Uint64("headerNextWriteOffset", nextDataWriteOffset), + zap.Uint64("calculatedNextWriteOffset", calculatedNextDataWriteOffset), + ) + + // Start scan from where the index left off. + currentScanOffset := nextDataWriteOffset + recoveredBlocksCount := 0 + recoveredHeights := make([]BlockHeight, 0) + for currentScanOffset < calculatedNextDataWriteOffset { + bh, err := s.recoverBlockAtOffset(currentScanOffset, calculatedNextDataWriteOffset) + if err != nil { + if errors.Is(err, io.EOF) { + // reach end of this file, try to read the next file + currentFileIndex := int(currentScanOffset / s.header.MaxDataFileSize) + nextFileIndex, err := safemath.Add(uint64(currentFileIndex), 1) + if err != nil { + return fmt.Errorf("recovery: overflow in file index calculation: %w", err) + } + if currentScanOffset, err = safemath.Mul(nextFileIndex, s.header.MaxDataFileSize); err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + } + continue + } + return err + } + s.log.Debug("Recovery: Successfully validated and indexed block", + zap.Uint64("height", bh.Height), + zap.Uint32("size", bh.Size), + zap.Uint64("offset", currentScanOffset), + ) + recoveredBlocksCount++ + recoveredHeights = append(recoveredHeights, bh.Height) + blockTotalSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(bh.Size)) + if err != nil { + return fmt.Errorf("recovery: overflow in block size calculation: %w", err) + } + currentScanOffset, err = safemath.Add(currentScanOffset, blockTotalSize) + if err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + } + } + s.nextDataWriteOffset.Store(currentScanOffset) + + // Update block heights based on recovered blocks + if len(recoveredHeights) > 0 { + if err := s.updateRecoveredBlockHeights(recoveredHeights); err != nil { + return fmt.Errorf("recovery: failed to update block heights: %w", err) + } + } + + if err := s.persistIndexHeader(); err != nil { + return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) + } + + s.log.Info("Recovery: Scan finished", + zap.Int("recoveredBlocks", recoveredBlocksCount), + zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), + zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), + zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), + ) + } + return nil +} + +func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHeader, error) { + var bh blockHeader + if totalDataSize-offset < uint64(sizeOfBlockHeader) { + return bh, fmt.Errorf("%w: not enough data for block header at offset %d", ErrCorrupted, offset) + } + + dataFile, localOffset, err := s.getDataFileAndOffset(offset) + if err != nil { + return bh, fmt.Errorf("recovery: failed to get data file for offset %d: %w", offset, err) + } + bhBuf := make([]byte, sizeOfBlockHeader) + if _, err := dataFile.ReadAt(bhBuf, int64(localOffset)); err != nil { + return bh, fmt.Errorf("%w: error reading block header at offset %d: %w", ErrCorrupted, offset, err) + } + if err := bh.UnmarshalBinary(bhBuf); err != nil { + return bh, fmt.Errorf("%w: error deserializing block header at offset %d: %w", ErrCorrupted, offset, err) + } + if bh.Size == 0 || bh.Size > MaxBlockDataSize { + return bh, fmt.Errorf("%w: invalid block size in header at offset %d: %d", ErrCorrupted, offset, bh.Size) + } + if bh.Height < s.header.MinHeight || bh.Height == unsetHeight { + return bh, fmt.Errorf( + "%w: invalid block height in header at offset %d: found %d, expected >= %d", + ErrCorrupted, offset, bh.Height, s.header.MinHeight, + ) + } + if bh.HeaderSize > bh.Size { + return bh, fmt.Errorf("%w: invalid block header size in header at offset %d: %d > %d", ErrCorrupted, offset, bh.HeaderSize, bh.Size) + } + expectedBlockEndOffset, err := safemath.Add(offset, uint64(sizeOfBlockHeader)) + if err != nil { + return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) + } + expectedBlockEndOffset, err = safemath.Add(expectedBlockEndOffset, uint64(bh.Size)) + if err != nil { + return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) + } + if expectedBlockEndOffset > totalDataSize { + return bh, fmt.Errorf("%w: block data out of bounds at offset %d", ErrCorrupted, offset) + } + blockData := make([]byte, bh.Size) + blockDataOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + if err != nil { + return bh, fmt.Errorf("calculating block data offset would overflow at offset %d: %w", offset, err) + } + if _, err := dataFile.ReadAt(blockData, int64(blockDataOffset)); err != nil { + return bh, fmt.Errorf("%w: failed to read block data at offset %d: %w", ErrCorrupted, offset, err) + } + calculatedChecksum := calculateChecksum(blockData) + if calculatedChecksum != bh.Checksum { + return bh, fmt.Errorf("%w: checksum mismatch for block at offset %d", ErrCorrupted, offset) + } + + // Write index entry for this block + indexFileOffset, idxErr := s.indexEntryOffset(bh.Height) + if idxErr != nil { + return bh, fmt.Errorf("cannot get index offset for recovered block %d: %w", bh.Height, idxErr) + } + if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size, bh.HeaderSize); err != nil { + return bh, fmt.Errorf("failed to update index for recovered block %d: %w", bh.Height, err) + } + return bh, nil +} + func (s *Database) listDataFiles() (map[int]string, int, error) { files, err := os.ReadDir(s.dataDir) if err != nil { @@ -172,50 +863,6 @@ func (s *Database) loadOrInitializeHeader(truncate bool) error { return nil } -// New creates a block database. -// Parameters: -// - indexDir: Directory for the index file -// - dataDir: Directory for the data file(s) -// - config: Configuration parameters -// - log: Logger instance for structured logging -func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (*Database, error) { - if indexDir == "" || dataDir == "" { - return nil, errors.New("both indexDir and dataDir must be provided") - } - - if err := config.Validate(); err != nil { - return nil, err - } - - s := &Database{ - options: config, - log: log, - fileCache: lru.NewCache[int, *os.File](MaxDataFiles), - } - s.fileCache.SetOnEvict(func(_ int, f *os.File) { - if f != nil { - f.Close() - } - }) - - if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { - return nil, err - } - - if err := s.initializeDataFiles(dataDir, config.Truncate); err != nil { - s.closeFiles() - return nil, err - } - - if !config.Truncate { - if err := s.recover(); err != nil { - s.closeFiles() - return nil, fmt.Errorf("recovery failed: %w", err) - } - } - return s, nil -} - func (s *Database) closeFiles() { if s.indexFile != nil { s.indexFile.Close() @@ -252,25 +899,231 @@ func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { return handle, nil } -// MaxContiguousHeight returns the highest block height known to be contiguously stored. -func (s *Database) MaxContiguousHeight() (height BlockHeight, found bool) { - if s.maxContiguousHeight.Load() == unsetHeight { - return 0, false +func calculateChecksum(data []byte) uint64 { + return xxhash.Sum64(data) +} + +func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) error { + headerBytes, err := bh.MarshalBinary() + if err != nil { + return fmt.Errorf("failed to serialize block header: %w", err) } - return s.maxContiguousHeight.Load(), true + + dataFile, localOffset, err := s.getDataFileAndOffset(offset) + if err != nil { + return fmt.Errorf("failed to get data file for writing block %d: %w", bh.Height, err) + } + + // Allocate combined buffer for header and block data and write it to the data file + combinedBufSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(len(block))) + if err != nil { + return fmt.Errorf("calculating combined buffer size would overflow for block %d: %w", bh.Height, err) + } + combinedBuf := make([]byte, combinedBufSize) + copy(combinedBuf, headerBytes) + copy(combinedBuf[sizeOfBlockHeader:], block) + if _, err := dataFile.WriteAt(combinedBuf, int64(localOffset)); err != nil { + return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) + } + + if s.options.SyncToDisk { + if err := dataFile.Sync(); err != nil { + return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) + } + } + return nil } -// Close flushes pending writes and closes the store files. -func (s *Database) Close() error { - s.closeMu.Lock() - defer s.closeMu.Unlock() +func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { + prevContiguousCandidate := uint64(unsetHeight) + if writtenBlockHeight > s.header.MinHeight { + prevContiguousCandidate = writtenBlockHeight - 1 + } - if s.closed { + if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { + // We successfully updated the max contiguous height. Now try to extend it further + // by checking if the next height is also available, which would repair gaps in the sequence. + currentMax := writtenBlockHeight + for { + nextHeightToVerify, err := safemath.Add(currentMax, 1) + if err != nil { + s.log.Error("overflow in height calculation when updating max contiguous height") + break + } + // Check if we have indexed a block at the next height, which would extend our contiguous sequence + _, err = s.readIndexEntry(nextHeightToVerify) + if err != nil { + // If no block exists at this height, we've reached the end of our contiguous sequence + if errors.Is(err, ErrBlockNotFound) { + break + } + + // log unexpected error + s.log.Error( + "error reading index entry when updating max contiguous height", + zap.Uint64("height", nextHeightToVerify), + zap.Error(err), + ) + break + } + if !s.maxContiguousHeight.CompareAndSwap(currentMax, nextHeightToVerify) { + break // Someone else updated + } + currentMax = nextHeightToVerify + } + } + + // update max block height and persist header on checkpoint interval + var oldMaxHeight BlockHeight + for { + oldMaxHeight = s.maxBlockHeight.Load() + if writtenBlockHeight <= oldMaxHeight && oldMaxHeight != unsetHeight { + break + } + if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { + if writtenBlockHeight%s.options.CheckpointInterval == 0 { + if err := s.persistIndexHeader(); err != nil { + return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) + } + } + break + } + } + return nil +} + +func (s *Database) updateRecoveredBlockHeights(recoveredHeights []BlockHeight) error { + if len(recoveredHeights) == 0 { return nil } - s.closed = true - err := s.persistIndexHeader() - s.closeFiles() - return err + // Find the maximum block height among recovered blocks + maxRecoveredHeight := recoveredHeights[0] + for _, height := range recoveredHeights[1:] { + if height > maxRecoveredHeight { + maxRecoveredHeight = height + } + } + + // Update max block height (no CAS needed since we're single-threaded during recovery) + currentMaxHeight := s.maxBlockHeight.Load() + if maxRecoveredHeight > currentMaxHeight || currentMaxHeight == unsetHeight { + s.maxBlockHeight.Store(maxRecoveredHeight) + } + + // Update max contiguous height by extending from current max contiguous height + currentMaxContiguous := s.maxContiguousHeight.Load() + nextHeightToVerify := s.header.MinHeight + if currentMaxContiguous != unsetHeight { + nextHeightToVerify = currentMaxContiguous + 1 + } + for { + entry, err := s.readIndexEntry(nextHeightToVerify) + if err != nil { + // If no block exists at this height, we've reached the end of our contiguous sequence + if errors.Is(err, ErrBlockNotFound) { + break + } + + // Log unexpected error but continue + s.log.Error( + "error reading index entry when updating max contiguous height during recovery", + zap.Uint64("height", currentMaxContiguous), + zap.Error(err), + ) + return err + } + if entry.IsEmpty() { + break + } + nextHeightToVerify++ + } + s.maxContiguousHeight.Store(nextHeightToVerify - 1) + + return nil +} + +// allocateBlockSpace reserves space for a block and returns the data file offset where it should be written. +// +// This function atomically reserves space by updating the nextWriteOffset and handles +// file splitting by advancing the nextWriteOffset when a data file would be exceeded. +// +// Parameters: +// - totalSize: The total size in bytes needed for the block +// +// Returns: +// - writeDataOffset: The data file offset where the block should be written +// - err: Error if allocation fails (e.g., block too large, overflow, etc.) +func (s *Database) allocateBlockSpace(totalSize uint32) (writeDataOffset uint64, err error) { + maxDataFileSize := s.header.MaxDataFileSize + + // Check if a single block would exceed the max data file size + if uint64(totalSize) > maxDataFileSize { + return 0, ErrBlockTooLarge + } + + for { + currentOffset := s.nextDataWriteOffset.Load() + + // Calculate where this block would end if written at current offset + blockEndOffset, err := safemath.Add(currentOffset, uint64(totalSize)) + if err != nil { + return 0, fmt.Errorf( + "adding block of size %d to offset %d would overflow uint64 data file pointer: %w", + totalSize, currentOffset, err, + ) + } + + // Determine the actual write offset for this block, taking into account + // data file splitting when max data file size is reached. + actualWriteOffset := currentOffset + actualBlockEndOffset := blockEndOffset + + // If we have a max file size, check if we need to start a new file + if maxDataFileSize > 0 { + currentFileIndex := int(currentOffset / maxDataFileSize) + offsetWithinCurrentFile := currentOffset % maxDataFileSize + + // Check if this block would span across file boundaries + blockEndWithinFile, err := safemath.Add(offsetWithinCurrentFile, uint64(totalSize)) + if err != nil { + return 0, fmt.Errorf( + "calculating block end within file would overflow: %w", + err, + ) + } + if blockEndWithinFile > maxDataFileSize { + // Advance the current write offset to the start of the next file since + // it would exceed the current file size. + nextFileStartOffset, err := safemath.Mul(uint64(currentFileIndex+1), maxDataFileSize) + if err != nil { + return 0, fmt.Errorf( + "calculating next file offset would overflow: %w", + err, + ) + } + actualWriteOffset = nextFileStartOffset + + // Recalculate the end offset for the block space to set the next write offset + if actualBlockEndOffset, err = safemath.Add(actualWriteOffset, uint64(totalSize)); err != nil { + return 0, fmt.Errorf( + "adding block of size %d to new file offset %d would overflow: %w", + totalSize, actualWriteOffset, err, + ) + } + } + } + + if s.nextDataWriteOffset.CompareAndSwap(currentOffset, actualBlockEndOffset) { + return actualWriteOffset, nil + } + } +} + +func (s *Database) getDataFileAndOffset(globalOffset uint64) (*os.File, uint64, error) { + maxFileSize := s.header.MaxDataFileSize + fileIndex := int(globalOffset / maxFileSize) + localOffset := globalOffset % maxFileSize + handle, err := s.getOrOpenDataFile(fileIndex) + return handle, localOffset, err } diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 50288052d2c8..3f438ac77fe4 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -37,9 +37,8 @@ func TestNew_Truncate(t *testing.T) { require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() - readBlock2, err := db2.ReadBlock(1) - require.NoError(t, err) - require.Nil(t, readBlock2) + _, err = db2.ReadBlock(1) + require.ErrorIs(t, err, ErrBlockNotFound) _, found := db2.MaxContiguousHeight() require.False(t, found) } diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 7ec789a0dd2a..0dff5254a03b 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -15,4 +15,5 @@ var ( ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrBlockNotFound = errors.New("blockdb: block not found") ) diff --git a/x/blockdb/index.go b/x/blockdb/index.go deleted file mode 100644 index fa3054539941..000000000000 --- a/x/blockdb/index.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package blockdb - -import ( - "encoding" - "encoding/binary" - "errors" - "fmt" - "io" - - safemath "github.com/ava-labs/avalanchego/utils/math" -) - -const ( - IndexFileVersion uint64 = 1 -) - -var ( - _ encoding.BinaryMarshaler = (*indexEntry)(nil) - _ encoding.BinaryUnmarshaler = (*indexEntry)(nil) - - sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) - sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) -) - -type indexEntry struct { - // Offset is the byte offset in the data file where the block's header starts. - Offset uint64 - // Size is the length in bytes of the block's data (excluding the blockHeader). - Size uint32 - // HeaderSize is the size in bytes of the block's header portion within the data. - HeaderSize BlockHeaderSize -} - -// IsEmpty returns true if this entry is uninitialized. -// This indicates a slot where no block has been written. -func (e indexEntry) IsEmpty() bool { - return e.Offset == 0 && e.Size == 0 -} - -// MarshalBinary implements encoding.BinaryMarshaler for indexEntry. -func (e indexEntry) MarshalBinary() ([]byte, error) { - buf := make([]byte, sizeOfIndexEntry) - binary.LittleEndian.PutUint64(buf[0:], e.Offset) - binary.LittleEndian.PutUint32(buf[8:], e.Size) - binary.LittleEndian.PutUint32(buf[12:], e.HeaderSize) - return buf, nil -} - -// UnmarshalBinary implements encoding.BinaryUnmarshaler for indexEntry. -func (e *indexEntry) UnmarshalBinary(data []byte) error { - if len(data) != int(sizeOfIndexEntry) { - return fmt.Errorf("incorrect data length to unmarshal indexEntry: got %d bytes, need exactly %d", len(data), sizeOfIndexEntry) - } - e.Offset = binary.LittleEndian.Uint64(data[0:]) - e.Size = binary.LittleEndian.Uint32(data[8:]) - e.HeaderSize = binary.LittleEndian.Uint32(data[12:]) - return nil -} - -// indexFileHeader is the header of the index file. -type indexFileHeader struct { - Version uint64 - MaxDataFileSize uint64 - MinHeight BlockHeight - MaxContiguousHeight BlockHeight - MaxHeight BlockHeight - NextWriteOffset uint64 - // reserve 32 bytes for future use - Reserved [32]byte -} - -// Add MarshalBinary for indexFileHeader -func (h indexFileHeader) MarshalBinary() ([]byte, error) { - buf := make([]byte, sizeOfIndexFileHeader) - binary.LittleEndian.PutUint64(buf[0:], h.Version) - binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) - binary.LittleEndian.PutUint64(buf[16:], h.MinHeight) - binary.LittleEndian.PutUint64(buf[24:], h.MaxContiguousHeight) - binary.LittleEndian.PutUint64(buf[32:], h.MaxHeight) - binary.LittleEndian.PutUint64(buf[40:], h.NextWriteOffset) - return buf, nil -} - -// Add UnmarshalBinary for indexFileHeader -func (h *indexFileHeader) UnmarshalBinary(data []byte) error { - if len(data) != int(sizeOfIndexFileHeader) { - return fmt.Errorf( - "incorrect data length to unmarshal indexFileHeader: got %d bytes, need exactly %d", - len(data), sizeOfIndexFileHeader, - ) - } - h.Version = binary.LittleEndian.Uint64(data[0:]) - h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) - h.MinHeight = binary.LittleEndian.Uint64(data[16:]) - h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[24:]) - h.MaxHeight = binary.LittleEndian.Uint64(data[32:]) - h.NextWriteOffset = binary.LittleEndian.Uint64(data[40:]) - return nil -} - -func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { - if height < s.header.MinHeight { - return 0, fmt.Errorf("%w: height %d is less than minimum block height %d", ErrInvalidBlockHeight, height, s.header.MinHeight) - } - relativeHeight := height - s.header.MinHeight - offsetFromHeaderStart, err := safemath.Mul(relativeHeight, sizeOfIndexEntry) - if err != nil { - return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) - } - finalOffset, err := safemath.Add(sizeOfIndexFileHeader, offsetFromHeaderStart) - if err != nil { - return 0, fmt.Errorf("%w: block height %d is too large", ErrInvalidBlockHeight, height) - } - - return finalOffset, nil -} - -func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { - var entry indexEntry - if height > s.maxBlockHeight.Load() { - return entry, nil - } - - offset, err := s.indexEntryOffset(height) - if err != nil { - return entry, err - } - - buf := make([]byte, sizeOfIndexEntry) - _, err = s.indexFile.ReadAt(buf, int64(offset)) - if err != nil { - if errors.Is(err, io.EOF) { - return entry, nil - } - return entry, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) - } - if err := entry.UnmarshalBinary(buf); err != nil { - return entry, fmt.Errorf("failed to deserialize index entry for height %d: %w", height, err) - } - - return entry, nil -} - -func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset uint64, blockDataLen uint32, headerSize BlockHeaderSize) error { - indexEntry := indexEntry{ - Offset: dataFileBlockOffset, - Size: blockDataLen, - HeaderSize: headerSize, - } - - entryBytes, err := indexEntry.MarshalBinary() - if err != nil { - return fmt.Errorf("failed to serialize index entry: %w", err) - } - - if _, err := s.indexFile.WriteAt(entryBytes, int64(indexFileOffset)); err != nil { - return fmt.Errorf("failed to write index entry: %w", err) - } - return nil -} - -func (s *Database) persistIndexHeader() error { - // The index file must be fsync'd before the header is written to prevent - // a state where the header is persisted but the index entries it refers to - // are not. This could lead to data inconsistency on recovery. - if s.options.SyncToDisk { - if err := s.indexFile.Sync(); err != nil { - return fmt.Errorf("failed to sync index file before writing header state: %w", err) - } - } - - header := s.header - header.NextWriteOffset = s.nextDataWriteOffset.Load() - header.MaxContiguousHeight = s.maxContiguousHeight.Load() - header.MaxHeight = s.maxBlockHeight.Load() - headerBytes, err := header.MarshalBinary() - if err != nil { - return fmt.Errorf("failed to serialize header for writing state: %w", err) - } - if uint64(len(headerBytes)) != sizeOfIndexFileHeader { - return fmt.Errorf("internal error: serialized header state size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) - } - - if _, err := s.indexFile.WriteAt(headerBytes, 0); err != nil { - return fmt.Errorf("failed to write header state to index file: %w", err) - } - return nil -} diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index 5977471886a0..6982f715f7ce 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -4,6 +4,7 @@ package blockdb import ( + "errors" "math" "sync" "sync/atomic" @@ -67,9 +68,14 @@ func TestReadOperations(t *testing.T) { wantErr: ErrInvalidBlockHeight, }, { - name: "height causes overflow", + name: "block is past max height", + readHeight: 51, + wantErr: ErrBlockNotFound, + }, + { + name: "block height is max height", readHeight: math.MaxUint64, - wantErr: ErrInvalidBlockHeight, + wantErr: ErrBlockNotFound, }, } @@ -86,20 +92,18 @@ func TestReadOperations(t *testing.T) { // Seed database with blocks based on config seededBlocks := make(map[uint64][]byte) - if tt.wantErr == nil { - minHeight := config.MinimumHeight - maxHeight := minHeight + 50 // Always write 51 blocks - gapHeight := minHeight + 40 // Gap at relative position 40 - - for i := minHeight; i <= maxHeight; i++ { - if i == gapHeight { - continue // Create gap - } - - block := randomBlock(t) - require.NoError(t, store.WriteBlock(i, block, BlockHeaderSize(i-minHeight))) - seededBlocks[i] = block + minHeight := config.MinimumHeight + maxHeight := minHeight + 50 // Always write 51 blocks + gapHeight := minHeight + 40 // Gap at relative position 40 + + for i := minHeight; i <= maxHeight; i++ { + if i == gapHeight { + continue // Create gap } + + block := randomBlock(t) + require.NoError(t, store.WriteBlock(i, block, BlockHeaderSize(i-minHeight))) + seededBlocks[i] = block } if tt.setup != nil { @@ -112,19 +116,22 @@ func TestReadOperations(t *testing.T) { return } - readBlock, err := store.ReadBlock(tt.readHeight) - require.NoError(t, err) - readHeader, err := store.ReadHeader(tt.readHeight) - require.NoError(t, err) - readBody, err := store.ReadBody(tt.readHeight) - require.NoError(t, err) - // Handle success cases if tt.noBlock { - require.Nil(t, readBlock) - require.Nil(t, readHeader) - require.Nil(t, readBody) + _, err := store.ReadBlock(tt.readHeight) + require.ErrorIs(t, err, ErrBlockNotFound) + _, err = store.ReadHeader(tt.readHeight) + require.ErrorIs(t, err, ErrBlockNotFound) + _, err = store.ReadBody(tt.readHeight) + require.ErrorIs(t, err, ErrBlockNotFound) } else { + readBlock, err := store.ReadBlock(tt.readHeight) + require.NoError(t, err) + readHeader, err := store.ReadHeader(tt.readHeight) + require.NoError(t, err) + readBody, err := store.ReadBody(tt.readHeight) + require.NoError(t, err) + require.NotNil(t, readBlock) expectedBlock := seededBlocks[tt.readHeight] headerSize := BlockHeaderSize(tt.readHeight - config.MinimumHeight) @@ -168,22 +175,22 @@ func TestReadOperations_Concurrency(t *testing.T) { } var wg sync.WaitGroup - var errors atomic.Int32 + var errorCount atomic.Int32 for i := range numBlocks + 10 { wg.Add(3) // One for each read operation go func(height int) { defer wg.Done() block, err := store.ReadBlock(uint64(height)) - if err != nil { - errors.Add(1) - return - } if gapHeights[uint64(height)] || height >= numBlocks { - if block != nil { - errors.Add(1) + if err == nil || !errors.Is(err, ErrBlockNotFound) { + errorCount.Add(1) } } else { + if err != nil { + errorCount.Add(1) + return + } require.Equal(t, blocks[height], block) } }(i) @@ -191,15 +198,15 @@ func TestReadOperations_Concurrency(t *testing.T) { go func(height int) { defer wg.Done() header, err := store.ReadHeader(uint64(height)) - if err != nil { - errors.Add(1) - return - } if gapHeights[uint64(height)] || height >= numBlocks { - if header != nil { - errors.Add(1) + if err == nil || !errors.Is(err, ErrBlockNotFound) { + errorCount.Add(1) } } else { + if err != nil { + errorCount.Add(1) + return + } expectedHeader := blocks[height][:headerSizes[height]] if headerSizes[height] == 0 { expectedHeader = nil @@ -211,20 +218,20 @@ func TestReadOperations_Concurrency(t *testing.T) { go func(height int) { defer wg.Done() body, err := store.ReadBody(uint64(height)) - if err != nil { - errors.Add(1) - return - } if gapHeights[uint64(height)] || height >= numBlocks { - if body != nil { - errors.Add(1) + if err == nil || !errors.Is(err, ErrBlockNotFound) { + errorCount.Add(1) } } else { + if err != nil { + errorCount.Add(1) + return + } expectedBody := blocks[height][headerSizes[height]:] require.Equal(t, expectedBody, body) } }(i) } wg.Wait() - require.Zero(t, errors.Load(), "concurrent read operations had errors") + require.Zero(t, errorCount.Load(), "concurrent read operations had errors") } diff --git a/x/blockdb/recovery.go b/x/blockdb/recovery.go deleted file mode 100644 index ed9f729cb580..000000000000 --- a/x/blockdb/recovery.go +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package blockdb - -import ( - "errors" - "fmt" - "io" - "os" - - "go.uber.org/zap" - - safemath "github.com/ava-labs/avalanchego/utils/math" -) - -// recover detects and recovers unindexed blocks by scanning data files and updating the index. -// It compares the actual data file sizes on disk with the indexed data size to detect -// blocks that were written but not properly indexed. -// For each unindexed block found, it validates the block, then -// writes the corresponding index entry and updates block height tracking. -func (s *Database) recover() error { - dataFiles, maxIndex, err := s.listDataFiles() - if err != nil { - return fmt.Errorf("failed to list data files for recovery: %w", err) - } - - if len(dataFiles) == 0 { - return nil - } - - // ensure no data files are missing - // If any data files are missing, we would need to recalculate the max height - // and max contiguous height. This can be supported in the future but for now - // to keep things simple, we will just error if the data files are not as expected. - if s.header.MaxDataFileSize > 0 { - // Ensure data files are sequential starting from 0 - for i := 0; i <= maxIndex; i++ { - if _, exists := dataFiles[i]; !exists { - return fmt.Errorf("%w: data file at index %d is missing", ErrCorrupted, i) - } - } - } else if len(dataFiles) > 1 || maxIndex > 1 { - return fmt.Errorf("%w: expect only 1 data file at index 0, got %d files with max index %d", ErrCorrupted, len(dataFiles), maxIndex) - } - - // Calculate the expected next write offset based on the data on disk. - var calculatedNextDataWriteOffset uint64 - if s.header.MaxDataFileSize > 0 { - // All data files before the last should be full. - fullFilesCount := maxIndex - fileSizeContribution, err := safemath.Mul(uint64(fullFilesCount), s.header.MaxDataFileSize) - if err != nil { - return fmt.Errorf("calculating file size contribution would overflow: %w", err) - } - calculatedNextDataWriteOffset = fileSizeContribution - - lastFileInfo, err := os.Stat(dataFiles[maxIndex]) - if err != nil { - return fmt.Errorf("failed to get stats for last data file %s: %w", dataFiles[maxIndex], err) - } - calculatedNextDataWriteOffset, err = safemath.Add(calculatedNextDataWriteOffset, uint64(lastFileInfo.Size())) - if err != nil { - return fmt.Errorf("adding last file size would overflow: %w", err) - } - } else { - lastFileInfo, err := os.Stat(dataFiles[0]) - if err != nil { - return fmt.Errorf("failed to get stats for data file %s: %w", dataFiles[0], err) - } - calculatedNextDataWriteOffset = uint64(lastFileInfo.Size()) - } - - nextDataWriteOffset := s.nextDataWriteOffset.Load() - switch { - case calculatedNextDataWriteOffset == nextDataWriteOffset: - s.log.Debug("Recovery: data files match index header, no recovery needed.") - return nil - - case calculatedNextDataWriteOffset < nextDataWriteOffset: - // this happens when the index claims to have more data than is actually on disk - return fmt.Errorf("%w: index header claims to have more data than is actually on disk "+ - "(calculated: %d bytes, index header: %d bytes)", - ErrCorrupted, calculatedNextDataWriteOffset, nextDataWriteOffset) - default: - // The data on disk is ahead of the index. We need to recover un-indexed blocks. - s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", - zap.Uint64("headerNextWriteOffset", nextDataWriteOffset), - zap.Uint64("calculatedNextWriteOffset", calculatedNextDataWriteOffset), - ) - - // Start scan from where the index left off. - currentScanOffset := nextDataWriteOffset - recoveredBlocksCount := 0 - maxRecoveredHeightSeen := s.maxBlockHeight.Load() - for currentScanOffset < calculatedNextDataWriteOffset { - bh, err := s.recoverBlockAtOffset(currentScanOffset, calculatedNextDataWriteOffset) - if err != nil { - if errors.Is(err, io.EOF) && s.header.MaxDataFileSize > 0 { - // reach end of this file, try to read the next file - currentFileIndex := int(currentScanOffset / s.header.MaxDataFileSize) - nextFileIndex, err := safemath.Add(uint64(currentFileIndex), 1) - if err != nil { - return fmt.Errorf("recovery: overflow in file index calculation: %w", err) - } - if currentScanOffset, err = safemath.Mul(nextFileIndex, s.header.MaxDataFileSize); err != nil { - return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) - } - continue - } - return err - } - s.log.Debug("Recovery: Successfully validated and indexed block", - zap.Uint64("height", bh.Height), - zap.Uint32("size", bh.Size), - zap.Uint64("offset", currentScanOffset), - ) - recoveredBlocksCount++ - if bh.Height > maxRecoveredHeightSeen || maxRecoveredHeightSeen == unsetHeight { - maxRecoveredHeightSeen = bh.Height - } - blockTotalSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(bh.Size)) - if err != nil { - return fmt.Errorf("recovery: overflow in block size calculation: %w", err) - } - currentScanOffset, err = safemath.Add(currentScanOffset, blockTotalSize) - if err != nil { - return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) - } - } - s.nextDataWriteOffset.Store(currentScanOffset) - - if err := s.persistIndexHeader(); err != nil { - return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) - } - - s.log.Info("Recovery: Scan finished", - zap.Int("recoveredBlocks", recoveredBlocksCount), - zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), - zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), - zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), - ) - } - return nil -} - -func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHeader, error) { - var bh blockHeader - if totalDataSize-offset < uint64(sizeOfBlockHeader) { - return bh, fmt.Errorf("%w: not enough data for block header at offset %d", ErrCorrupted, offset) - } - - dataFile, localOffset, err := s.getDataFileAndOffset(offset) - if err != nil { - return bh, fmt.Errorf("recovery: failed to get data file for offset %d: %w", offset, err) - } - bhBuf := make([]byte, sizeOfBlockHeader) - if _, err := dataFile.ReadAt(bhBuf, int64(localOffset)); err != nil { - return bh, fmt.Errorf("%w: error reading block header at offset %d: %w", ErrCorrupted, offset, err) - } - if err := bh.UnmarshalBinary(bhBuf); err != nil { - return bh, fmt.Errorf("%w: error deserializing block header at offset %d: %w", ErrCorrupted, offset, err) - } - if bh.Size == 0 || bh.Size > MaxBlockDataSize { - return bh, fmt.Errorf("%w: invalid block size in header at offset %d: %d", ErrCorrupted, offset, bh.Size) - } - if bh.Height < s.header.MinHeight || bh.Height == unsetHeight { - return bh, fmt.Errorf( - "%w: invalid block height in header at offset %d: found %d, expected >= %d", - ErrCorrupted, offset, bh.Height, s.header.MinHeight, - ) - } - if bh.HeaderSize > bh.Size { - return bh, fmt.Errorf("%w: invalid block header size in header at offset %d: %d > %d", ErrCorrupted, offset, bh.HeaderSize, bh.Size) - } - expectedBlockEndOffset, err := safemath.Add(offset, uint64(sizeOfBlockHeader)) - if err != nil { - return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) - } - expectedBlockEndOffset, err = safemath.Add(expectedBlockEndOffset, uint64(bh.Size)) - if err != nil { - return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) - } - if expectedBlockEndOffset > totalDataSize { - return bh, fmt.Errorf("%w: block data out of bounds at offset %d", ErrCorrupted, offset) - } - blockData := make([]byte, bh.Size) - blockDataOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) - if err != nil { - return bh, fmt.Errorf("calculating block data offset would overflow at offset %d: %w", offset, err) - } - if _, err := dataFile.ReadAt(blockData, int64(blockDataOffset)); err != nil { - return bh, fmt.Errorf("%w: failed to read block data at offset %d: %w", ErrCorrupted, offset, err) - } - calculatedChecksum := calculateChecksum(blockData) - if calculatedChecksum != bh.Checksum { - return bh, fmt.Errorf("%w: checksum mismatch for block at offset %d", ErrCorrupted, offset) - } - - // Write index entry for this block - indexFileOffset, idxErr := s.indexEntryOffset(bh.Height) - if idxErr != nil { - return bh, fmt.Errorf("cannot get index offset for recovered block %d: %w", bh.Height, idxErr) - } - if err := s.writeIndexEntryAt(indexFileOffset, offset, bh.Size, bh.HeaderSize); err != nil { - return bh, fmt.Errorf("failed to update index for recovered block %d: %w", bh.Height, err) - } - - if err := s.updateBlockHeights(bh.Height); err != nil { - return bh, fmt.Errorf("failed to update block heights for recovered block %d: %w", bh.Height, err) - } - - return bh, nil -} diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index 6cee6042bb11..dfbdaf0eb8a2 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -4,6 +4,7 @@ package blockdb import ( + "math" "os" "path/filepath" "testing" @@ -283,10 +284,10 @@ func TestRecovery_CorruptionDetection(t *testing.T) { wantErrText: "data file at index 1 is missing", }, { - name: "unexpected multiple data files when MaxDataFileSize is 0", + name: "unexpected multiple data files when MaxDataFileSize is max uint64", blockHeights: []uint64{0, 1, 2}, - maxDataFileSize: uint64Ptr(0), // Single file mode - blockSize: 512, // 512 bytes per block + maxDataFileSize: uint64Ptr(math.MaxUint64), // Single file mode + blockSize: 512, // 512 bytes per block setupCorruption: func(store *Database, _ [][]byte) error { // Manually create a second data file to simulate corruption secondDataFilePath := store.dataFilePath(1) @@ -302,7 +303,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { return err }, wantErr: ErrCorrupted, - wantErrText: "expect only 1 data file at index 0, got 2 files with max index 1", + wantErrText: "only one data file expected when MaxDataFileSize is max uint64, got 2 files with max index 1", }, } diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 44916e1cadc8..54840c55702c 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -209,6 +209,7 @@ func TestWriteBlock_Concurrency(t *testing.T) { // create gaps at heights 5 and 10 and rewrite last block if i == 5 || i == 10 { height = uint64(i - 1) + block = blocks[i-1] } else { height = uint64(i) } @@ -228,12 +229,11 @@ func TestWriteBlock_Concurrency(t *testing.T) { for i := range 20 { height := uint64(i) block, err := store.ReadBlock(height) - require.NoError(t, err) - if i == 5 || i == 10 { - require.Nil(t, block, "expected nil block at gap height %d", height) + require.ErrorIs(t, err, ErrBlockNotFound, "expected ErrBlockNotFound at gap height %d", height) } else { - require.NotNil(t, block) + require.NoError(t, err) + require.Equal(t, blocks[i], block, "block mismatch at height %d", height) } } checkDatabaseState(t, store, 19, 4) From d8237c92da54bfc55fe6a285a774c4aa85176535 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 16:22:30 -0400 Subject: [PATCH 12/27] rename blockHeader -> blockEntryHeader and improve recovery logic --- x/blockdb/database.go | 145 +++++++++++++++++++---------------- x/blockdb/database_test.go | 10 ++- x/blockdb/errors.go | 2 +- x/blockdb/recovery_test.go | 77 +++++++++++++++---- x/blockdb/writeblock_test.go | 9 +-- 5 files changed, 150 insertions(+), 93 deletions(-) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index f2cfa2b2531d..27f52987db0d 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -32,10 +32,10 @@ const ( unsetHeight = math.MaxUint64 // IndexFileVersion is the version of the index file format. - IndexFileVersion uint64 = 1 + IndexFileVersion uint16 = 1 - // MaxBlockDataSize is the maximum size of a block in bytes (16 MB). - MaxBlockDataSize = 1 << 24 + // BlockEntryVersion is the version of the block entry. + BlockEntryVersion uint16 = 1 ) // BlockHeight defines the type for block heights. @@ -48,45 +48,49 @@ type BlockData = []byte type BlockHeaderSize = uint32 var ( - _ encoding.BinaryMarshaler = (*blockHeader)(nil) - _ encoding.BinaryUnmarshaler = (*blockHeader)(nil) + _ encoding.BinaryMarshaler = (*blockEntryHeader)(nil) + _ encoding.BinaryUnmarshaler = (*blockEntryHeader)(nil) _ encoding.BinaryMarshaler = (*indexEntry)(nil) _ encoding.BinaryUnmarshaler = (*indexEntry)(nil) _ encoding.BinaryMarshaler = (*indexFileHeader)(nil) _ encoding.BinaryUnmarshaler = (*indexFileHeader)(nil) - sizeOfBlockHeader = uint32(binary.Size(blockHeader{})) - sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) - sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) + sizeOfBlockEntryHeader = uint32(binary.Size(blockEntryHeader{})) + sizeOfIndexEntry = uint64(binary.Size(indexEntry{})) + sizeOfIndexFileHeader = uint64(binary.Size(indexFileHeader{})) ) -// blockHeader is prepended to each block in the data file. -type blockHeader struct { +// blockEntryHeader is the header of a block entry in the data file. +// This is not the header portion of the block data itself. +type blockEntryHeader struct { Height BlockHeight Checksum uint64 Size uint32 HeaderSize BlockHeaderSize + Version uint16 } // MarshalBinary implements the encoding.BinaryMarshaler interface. -func (bh blockHeader) MarshalBinary() ([]byte, error) { - buf := make([]byte, sizeOfBlockHeader) - binary.LittleEndian.PutUint64(buf[0:], bh.Height) - binary.LittleEndian.PutUint64(buf[8:], bh.Checksum) - binary.LittleEndian.PutUint32(buf[16:], bh.Size) - binary.LittleEndian.PutUint32(buf[20:], bh.HeaderSize) +func (beh blockEntryHeader) MarshalBinary() ([]byte, error) { + buf := make([]byte, sizeOfBlockEntryHeader) + binary.LittleEndian.PutUint64(buf[0:], beh.Height) + binary.LittleEndian.PutUint64(buf[8:], beh.Checksum) + binary.LittleEndian.PutUint32(buf[16:], beh.Size) + binary.LittleEndian.PutUint32(buf[20:], beh.HeaderSize) + binary.LittleEndian.PutUint16(buf[24:], beh.Version) return buf, nil } // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. -func (bh *blockHeader) UnmarshalBinary(data []byte) error { - if len(data) != int(sizeOfBlockHeader) { - return fmt.Errorf("%w: incorrect data length to unmarshal blockHeader: got %d bytes, need exactly %d", ErrCorrupted, len(data), sizeOfBlockHeader) - } - bh.Height = binary.LittleEndian.Uint64(data[0:]) - bh.Checksum = binary.LittleEndian.Uint64(data[8:]) - bh.Size = binary.LittleEndian.Uint32(data[16:]) - bh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) +func (beh *blockEntryHeader) UnmarshalBinary(data []byte) error { + if len(data) != int(sizeOfBlockEntryHeader) { + return fmt.Errorf("%w: incorrect data length to unmarshal blockEntryHeader: got %d bytes, need exactly %d", ErrCorrupted, len(data), sizeOfBlockEntryHeader) + } + beh.Height = binary.LittleEndian.Uint64(data[0:]) + beh.Checksum = binary.LittleEndian.Uint64(data[8:]) + beh.Size = binary.LittleEndian.Uint32(data[16:]) + beh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) + beh.Version = binary.LittleEndian.Uint16(data[24:]) return nil } @@ -128,20 +132,20 @@ func (e *indexEntry) UnmarshalBinary(data []byte) error { // indexFileHeader is the header of the index file. type indexFileHeader struct { - Version uint64 + Version uint16 MaxDataFileSize uint64 MinHeight BlockHeight MaxContiguousHeight BlockHeight MaxHeight BlockHeight NextWriteOffset uint64 - // reserve 32 bytes for future use - Reserved [32]byte + // reserve 38 bytes for future use + Reserved [38]byte } // MarshalBinary implements encoding.BinaryMarshaler for indexFileHeader. func (h indexFileHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexFileHeader) - binary.LittleEndian.PutUint64(buf[0:], h.Version) + binary.LittleEndian.PutUint16(buf[0:], h.Version) binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) binary.LittleEndian.PutUint64(buf[16:], h.MinHeight) binary.LittleEndian.PutUint64(buf[24:], h.MaxContiguousHeight) @@ -158,7 +162,7 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { ErrCorrupted, len(data), sizeOfIndexFileHeader, ) } - h.Version = binary.LittleEndian.Uint64(data[0:]) + h.Version = binary.LittleEndian.Uint16(data[0:]) h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) h.MinHeight = binary.LittleEndian.Uint64(data[16:]) h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[24:]) @@ -207,9 +211,14 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* return nil, err } + databaseLog := log + if databaseLog == nil { + databaseLog = logging.NoLog{} + } + s := &Database{ options: config, - log: log, + log: databaseLog, fileCache: lru.NewCache[int, *os.File](MaxDataFiles), } s.fileCache.SetOnEvict(func(_ int, f *os.File) { @@ -268,29 +277,26 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return ErrDatabaseClosed } - blockDataLen := uint32(len(block)) - if blockDataLen == 0 { - return ErrBlockEmpty + blockSize := len(block) + if blockSize > math.MaxUint32 { + return fmt.Errorf("%w: block size cannot exceed %d bytes", ErrBlockTooLarge, math.MaxUint32) } - if blockDataLen > MaxBlockDataSize { - return ErrBlockTooLarge + blockDataLen := uint32(blockSize) + if blockDataLen == 0 { + return ErrBlockEmpty } if headerSize >= blockDataLen { return ErrHeaderSizeTooLarge } - if height < s.header.MinHeight { - return fmt.Errorf("%w: cannot write block at height %d, minimum height is %d", ErrInvalidBlockHeight, height, s.header.MinHeight) - } - indexFileOffset, err := s.indexEntryOffset(height) if err != nil { - return err + return fmt.Errorf("failed to get index entry offset for block at height %d: %w", height, err) } - sizeWithDataHeader, err := safemath.Add(sizeOfBlockHeader, blockDataLen) + sizeWithDataHeader, err := safemath.Add(sizeOfBlockEntryHeader, blockDataLen) if err != nil { return fmt.Errorf("calculating total block size would overflow for block at height %d: %w", height, err) } @@ -299,11 +305,12 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl return err } - bh := blockHeader{ + bh := blockEntryHeader{ Height: height, Size: blockDataLen, HeaderSize: headerSize, Checksum: calculateChecksum(block), + Version: BlockEntryVersion, } if err := s.writeBlockAt(writeDataOffset, bh, block); err != nil { return err @@ -359,7 +366,7 @@ func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { if err != nil { return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) } - _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockHeader))) + _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockEntryHeader))) if err != nil { return nil, fmt.Errorf("failed to read block data from data file: %w", err) } @@ -394,7 +401,7 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { if err != nil { return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) } - _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockHeader))) + _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockEntryHeader))) if err != nil { return nil, fmt.Errorf("failed to read block header data from data file: %w", err) } @@ -419,7 +426,7 @@ func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { if err != nil { return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) } - headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockEntryHeader)) if err != nil { return nil, fmt.Errorf("calculating header offset would overflow for block at height %d: %w", height, err) } @@ -451,6 +458,10 @@ func (s *Database) HasBlock(height BlockHeight) (bool, error) { } func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { + if height < s.header.MinHeight { + return 0, fmt.Errorf("%w: failed to get index entry offset for block at height %d, minimum height is %d", ErrInvalidBlockHeight, height, s.header.MinHeight) + } + relativeHeight := height - s.header.MinHeight offsetFromHeaderStart, err := safemath.Mul(relativeHeight, sizeOfIndexEntry) if err != nil { @@ -469,10 +480,6 @@ func (s *Database) indexEntryOffset(height BlockHeight) (uint64, error) { func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { var entry indexEntry - if height < s.header.MinHeight { - return entry, fmt.Errorf("%w: cannot read block at height %d, minimum height is %d", ErrInvalidBlockHeight, height, s.header.MinHeight) - } - offset, err := s.indexEntryOffset(height) if err != nil { return entry, err @@ -646,7 +653,7 @@ func (s *Database) recover() error { ) recoveredBlocksCount++ recoveredHeights = append(recoveredHeights, bh.Height) - blockTotalSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(bh.Size)) + blockTotalSize, err := safemath.Add(uint64(sizeOfBlockEntryHeader), uint64(bh.Size)) if err != nil { return fmt.Errorf("recovery: overflow in block size calculation: %w", err) } @@ -678,9 +685,9 @@ func (s *Database) recover() error { return nil } -func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHeader, error) { - var bh blockHeader - if totalDataSize-offset < uint64(sizeOfBlockHeader) { +func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockEntryHeader, error) { + var bh blockEntryHeader + if totalDataSize-offset < uint64(sizeOfBlockEntryHeader) { return bh, fmt.Errorf("%w: not enough data for block header at offset %d", ErrCorrupted, offset) } @@ -688,16 +695,19 @@ func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHead if err != nil { return bh, fmt.Errorf("recovery: failed to get data file for offset %d: %w", offset, err) } - bhBuf := make([]byte, sizeOfBlockHeader) + bhBuf := make([]byte, sizeOfBlockEntryHeader) if _, err := dataFile.ReadAt(bhBuf, int64(localOffset)); err != nil { return bh, fmt.Errorf("%w: error reading block header at offset %d: %w", ErrCorrupted, offset, err) } if err := bh.UnmarshalBinary(bhBuf); err != nil { return bh, fmt.Errorf("%w: error deserializing block header at offset %d: %w", ErrCorrupted, offset, err) } - if bh.Size == 0 || bh.Size > MaxBlockDataSize { + if bh.Size == 0 { return bh, fmt.Errorf("%w: invalid block size in header at offset %d: %d", ErrCorrupted, offset, bh.Size) } + if bh.Version > BlockEntryVersion { + return bh, fmt.Errorf("%w: invalid block entry version at offset %d, version %d is greater than the current version %d", ErrCorrupted, offset, bh.Version, BlockEntryVersion) + } if bh.Height < s.header.MinHeight || bh.Height == unsetHeight { return bh, fmt.Errorf( "%w: invalid block height in header at offset %d: found %d, expected >= %d", @@ -707,7 +717,7 @@ func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHead if bh.HeaderSize > bh.Size { return bh, fmt.Errorf("%w: invalid block header size in header at offset %d: %d > %d", ErrCorrupted, offset, bh.HeaderSize, bh.Size) } - expectedBlockEndOffset, err := safemath.Add(offset, uint64(sizeOfBlockHeader)) + expectedBlockEndOffset, err := safemath.Add(offset, uint64(sizeOfBlockEntryHeader)) if err != nil { return bh, fmt.Errorf("calculating block end offset would overflow at offset %d: %w", offset, err) } @@ -719,7 +729,7 @@ func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockHead return bh, fmt.Errorf("%w: block data out of bounds at offset %d", ErrCorrupted, offset) } blockData := make([]byte, bh.Size) - blockDataOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockHeader)) + blockDataOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockEntryHeader)) if err != nil { return bh, fmt.Errorf("calculating block data offset would overflow at offset %d: %w", offset, err) } @@ -755,11 +765,14 @@ func (s *Database) listDataFiles() (map[int]string, int, error) { continue } var index int - if n, err := fmt.Sscanf(file.Name(), dataFileNameFormat, &index); n == 1 && err == nil { - dataFiles[index] = filepath.Join(s.dataDir, file.Name()) - if index > maxIndex { - maxIndex = index - } + n, err := fmt.Sscanf(file.Name(), dataFileNameFormat, &index) + if err != nil || n != 1 { + s.log.Debug("non-data file scanned in data directory", zap.String("file", file.Name()), zap.Error(err)) + continue + } + dataFiles[index] = filepath.Join(s.dataDir, file.Name()) + if index > maxIndex { + maxIndex = index } } @@ -903,7 +916,7 @@ func calculateChecksum(data []byte) uint64 { return xxhash.Sum64(data) } -func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) error { +func (s *Database) writeBlockAt(offset uint64, bh blockEntryHeader, block BlockData) error { headerBytes, err := bh.MarshalBinary() if err != nil { return fmt.Errorf("failed to serialize block header: %w", err) @@ -915,13 +928,13 @@ func (s *Database) writeBlockAt(offset uint64, bh blockHeader, block BlockData) } // Allocate combined buffer for header and block data and write it to the data file - combinedBufSize, err := safemath.Add(uint64(sizeOfBlockHeader), uint64(len(block))) + combinedBufSize, err := safemath.Add(uint64(sizeOfBlockEntryHeader), uint64(len(block))) if err != nil { return fmt.Errorf("calculating combined buffer size would overflow for block %d: %w", bh.Height, err) } combinedBuf := make([]byte, combinedBufSize) copy(combinedBuf, headerBytes) - copy(combinedBuf[sizeOfBlockHeader:], block) + copy(combinedBuf[sizeOfBlockEntryHeader:], block) if _, err := dataFile.WriteAt(combinedBuf, int64(localOffset)); err != nil { return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) } @@ -1059,7 +1072,7 @@ func (s *Database) allocateBlockSpace(totalSize uint32) (writeDataOffset uint64, // Check if a single block would exceed the max data file size if uint64(totalSize) > maxDataFileSize { - return 0, ErrBlockTooLarge + return 0, fmt.Errorf("%w: block of size %d exceeds max data file size of %d", ErrBlockTooLarge, totalSize, maxDataFileSize) } for { diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 3f438ac77fe4..5b26d1395778 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -85,7 +85,6 @@ func TestNew_Params(t *testing.T) { indexDir string dataDir string config DatabaseConfig - log logging.Logger wantErr error expectClose bool }{ @@ -136,7 +135,7 @@ func TestNew_Params(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - db, err := New(tt.indexDir, tt.dataDir, tt.config, tt.log) + db, err := New(tt.indexDir, tt.dataDir, tt.config, nil) if tt.wantErr != nil { require.Equal(t, tt.wantErr.Error(), err.Error()) @@ -236,6 +235,13 @@ func TestIndexFileHeaderAlignment(t *testing.T) { sizeOfIndexFileHeader, sizeOfIndexEntry) } +func TestIndexEntrySizePowerOfTwo(t *testing.T) { + // Check that sizeOfIndexEntry is a power of 2 + // This is important for memory alignment and performance + require.Equal(t, uint64(0), sizeOfIndexEntry&(sizeOfIndexEntry-1), + "sizeOfIndexEntry (%d) is not a power of 2", sizeOfIndexEntry) +} + func TestNew_IndexFileConfigPrecedence(t *testing.T) { // set up db initialConfig := DefaultDatabaseConfig().WithMinimumHeight(100).WithMaxDataFileSize(1024 * 1024) diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index 0dff5254a03b..eea1d7a11632 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -14,6 +14,6 @@ var ( ErrDatabaseClosed = errors.New("blockdb: database is closed") ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") - ErrBlockTooLarge = fmt.Errorf("blockdb: block size exceeds maximum allowed size of %d bytes", MaxBlockDataSize) + ErrBlockTooLarge = fmt.Errorf("blockdb: block size too large") ErrBlockNotFound = errors.New("blockdb: block not found") ) diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index dfbdaf0eb8a2..9fb0b617c953 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -43,7 +43,7 @@ func TestRecovery_Success(t *testing.T) { // Create a header that only knows about the first block // Block 0: 4KB data + header - firstBlockOffset := uint64(sizeOfBlockHeader) + 4*1024 + firstBlockOffset := uint64(sizeOfBlockEntryHeader) + 4*1024 header := indexFileHeader{ Version: IndexFileVersion, @@ -171,14 +171,14 @@ func TestRecovery_CorruptionDetection(t *testing.T) { }, { name: "corrupted block header in data file", - blockHeights: []uint64{0, 1}, + blockHeights: []uint64{0, 1, 3}, setupCorruption: func(store *Database, blocks [][]byte) error { if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { return err } // Corrupt second block header with invalid data - secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) - corruptedHeader := make([]byte, sizeOfBlockHeader) + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + corruptedHeader := make([]byte, sizeOfBlockEntryHeader) for i := range corruptedHeader { corruptedHeader[i] = 0xFF // Invalid header data } @@ -192,26 +192,27 @@ func TestRecovery_CorruptionDetection(t *testing.T) { return err }, wantErr: ErrCorrupted, - wantErrText: "invalid block size in header", + wantErrText: "invalid block entry version at offset", }, { - name: "block with invalid block size in header", + name: "block with invalid block size in header that reads more than total data file size", blockHeights: []uint64{0, 1}, setupCorruption: func(store *Database, blocks [][]byte) error { if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { return err } - secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) - bh := blockHeader{ + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + bh := blockEntryHeader{ Height: 1, Checksum: calculateChecksum(blocks[1]), Size: uint32(len(blocks[1])) + 1, // make block larger than actual HeaderSize: 0, + Version: BlockEntryVersion, } return writeBlockHeader(store, secondBlockOffset, bh) }, wantErr: ErrCorrupted, - wantErrText: "block data out of bounds", + wantErrText: "block data out of bounds at offset ", }, { name: "block with checksum mismatch", @@ -220,12 +221,13 @@ func TestRecovery_CorruptionDetection(t *testing.T) { if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { return err } - secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) - bh := blockHeader{ + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + bh := blockEntryHeader{ Height: 1, Checksum: 0xDEADBEEF, // Wrong checksum Size: uint32(len(blocks[1])), HeaderSize: 0, + Version: BlockEntryVersion, } return writeBlockHeader(store, secondBlockOffset, bh) }, @@ -244,7 +246,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { defer dataFile.Close() // Truncate data file to have only partial block data - truncateSize := int64(sizeOfBlockHeader) + int64(len(blocks[0]))/2 + truncateSize := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0]))/2 return dataFile.Truncate(truncateSize) }, wantErr: ErrCorrupted, @@ -258,12 +260,13 @@ func TestRecovery_CorruptionDetection(t *testing.T) { if err := resetIndexToBlock(store, uint64(len(blocks[0])), 10); err != nil { return err } - secondBlockOffset := int64(sizeOfBlockHeader) + int64(len(blocks[0])) - bh := blockHeader{ + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + bh := blockEntryHeader{ Height: 5, // Invalid height because its below the minimum height of 10 Checksum: calculateChecksum(blocks[1]), Size: uint32(len(blocks[1])), HeaderSize: 0, + Version: BlockEntryVersion, } return writeBlockHeader(store, secondBlockOffset, bh) }, @@ -305,6 +308,48 @@ func TestRecovery_CorruptionDetection(t *testing.T) { wantErr: ErrCorrupted, wantErrText: "only one data file expected when MaxDataFileSize is max uint64, got 2 files with max index 1", }, + { + name: "block with invalid block entry version", + blockHeights: []uint64{0, 1}, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { + return err + } + // Corrupt second block header version + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + bh := blockEntryHeader{ + Height: 1, + Checksum: calculateChecksum(blocks[1]), + Size: uint32(len(blocks[1])), + HeaderSize: 0, + Version: BlockEntryVersion + 1, // Invalid version + } + return writeBlockHeader(store, secondBlockOffset, bh) + }, + wantErr: ErrCorrupted, + wantErrText: "invalid block entry version at offset", + }, + { + name: "second block with invalid version among 4 blocks", + blockHeights: []uint64{0, 3, 2, 4}, + setupCorruption: func(store *Database, blocks [][]byte) error { + if err := resetIndexToBlock(store, uint64(len(blocks[0])), 0); err != nil { + return err + } + // Corrupt second block header with invalid version + secondBlockOffset := int64(sizeOfBlockEntryHeader) + int64(len(blocks[0])) + bh := blockEntryHeader{ + Height: 1, + Checksum: calculateChecksum(blocks[1]), + Size: uint32(len(blocks[1])), + HeaderSize: 0, + Version: BlockEntryVersion + 10, // version cannot be greater than current + } + return writeBlockHeader(store, secondBlockOffset, bh) + }, + wantErr: ErrCorrupted, + wantErrText: "invalid block entry version at offset", + }, } for _, tt := range tests { @@ -359,7 +404,7 @@ func resetIndexToBlock(store *Database, blockSize uint64, minHeight uint64) erro MinHeight: minHeight, MaxContiguousHeight: minHeight, MaxHeight: minHeight, - NextWriteOffset: uint64(sizeOfBlockHeader) + blockSize, + NextWriteOffset: uint64(sizeOfBlockEntryHeader) + blockSize, } headerBytes, err := header.MarshalBinary() @@ -371,7 +416,7 @@ func resetIndexToBlock(store *Database, blockSize uint64, minHeight uint64) erro } // Helper function to write a block header at a specific offset -func writeBlockHeader(store *Database, offset int64, bh blockHeader) error { +func writeBlockHeader(store *Database, offset int64, bh blockEntryHeader) error { fileIndex := int(offset / int64(store.header.MaxDataFileSize)) localOffset := offset % int64(store.header.MaxDataFileSize) dataFilePath := store.dataFilePath(fileIndex) diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 54840c55702c..16ed9649ead1 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -263,13 +263,6 @@ func TestWriteBlock_Errors(t *testing.T) { headerSize: 0, wantErr: ErrBlockEmpty, }, - { - name: "block too large", - height: 0, - block: make([]byte, MaxBlockDataSize+1), - headerSize: 0, - wantErr: ErrBlockTooLarge, - }, { name: "header size larger than block", height: 0, @@ -312,7 +305,7 @@ func TestWriteBlock_Errors(t *testing.T) { { name: "exceed max data file size", height: 0, - block: make([]byte, 1001), // Block + header will exceed 1024 limit (1001 + 24 = 1025 > 1024) + block: make([]byte, 999), // Block + header will exceed 1024 limit (999 + 26 = 1025 > 1024) config: DefaultDatabaseConfig().WithMaxDataFileSize(1024), headerSize: 0, wantErr: ErrBlockTooLarge, From 9e44cce2d39d7ff5d5fe2d3514acfa5d73d3c440 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 16:41:46 -0400 Subject: [PATCH 13/27] make MaxDataFiles configurable --- x/blockdb/config.go | 18 ++++++++++-- x/blockdb/database.go | 2 +- x/blockdb/database_test.go | 55 +++++++++++++++++++++++++++++++++++++ x/blockdb/readblock_test.go | 2 ++ 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 2726a15269b0..28940cd8ed7d 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -8,9 +8,8 @@ import "errors" // DefaultMaxDataFileSize is the default maximum size of the data block file in bytes (500GB). const DefaultMaxDataFileSize = 500 * 1024 * 1024 * 1024 -// MaxDataFiles is the maximum number of data files that can be created. -// This prevents running out of file descriptors when MaxDataFileSize is small. -const MaxDataFiles = 10_000 +// DefaultMaxDataFiles is the default maximum number of data files descriptors cached. +const DefaultMaxDataFiles = 10 // DatabaseConfig contains configuration parameters for BlockDB. type DatabaseConfig struct { @@ -20,6 +19,9 @@ type DatabaseConfig struct { // MaxDataFileSize sets the maximum size of the data block file in bytes. MaxDataFileSize uint64 + // MaxDataFiles is the maximum number of data files descriptors cached. + MaxDataFiles int + // CheckpointInterval defines how frequently (in blocks) the index file header is updated (default: 1024). CheckpointInterval uint64 @@ -35,6 +37,7 @@ func DefaultDatabaseConfig() DatabaseConfig { return DatabaseConfig{ MinimumHeight: 0, MaxDataFileSize: DefaultMaxDataFileSize, + MaxDataFiles: DefaultMaxDataFiles, CheckpointInterval: 1024, SyncToDisk: true, Truncate: false, @@ -65,6 +68,12 @@ func (c DatabaseConfig) WithMaxDataFileSize(maxSize uint64) DatabaseConfig { return c } +// WithMaxDataFiles returns a copy of the config with MaxDataFiles set to the given value. +func (c DatabaseConfig) WithMaxDataFiles(maxFiles int) DatabaseConfig { + c.MaxDataFiles = maxFiles + return c +} + // WithCheckpointInterval returns a copy of the config with CheckpointInterval set to the given value. func (c DatabaseConfig) WithCheckpointInterval(interval uint64) DatabaseConfig { c.CheckpointInterval = interval @@ -76,5 +85,8 @@ func (c DatabaseConfig) Validate() error { if c.CheckpointInterval == 0 { return errors.New("CheckpointInterval cannot be 0") } + if c.MaxDataFiles <= 0 { + return errors.New("MaxDataFiles must be positive") + } return nil } diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 27f52987db0d..1ba6ed5188e2 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -219,7 +219,7 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* s := &Database{ options: config, log: databaseLog, - fileCache: lru.NewCache[int, *os.File](MaxDataFiles), + fileCache: lru.NewCache[int, *os.File](config.MaxDataFiles), } s.fileCache.SetOnEvict(func(_ int, f *os.File) { if f != nil { diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 5b26d1395778..73ed499f5fe2 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -101,6 +101,7 @@ func TestNew_Params(t *testing.T) { config: DefaultDatabaseConfig(). WithMinimumHeight(100). WithMaxDataFileSize(1024 * 1024). // 1MB + WithMaxDataFiles(50). WithCheckpointInterval(512), }, { @@ -131,6 +132,20 @@ func TestNew_Params(t *testing.T) { config: DefaultDatabaseConfig().WithCheckpointInterval(0), wantErr: errors.New("CheckpointInterval cannot be 0"), }, + { + name: "invalid config - zero max data files", + indexDir: tempDir, + dataDir: tempDir, + config: DefaultDatabaseConfig().WithMaxDataFiles(0), + wantErr: errors.New("MaxDataFiles must be positive"), + }, + { + name: "invalid config - negative max data files", + indexDir: tempDir, + dataDir: tempDir, + config: DefaultDatabaseConfig().WithMaxDataFiles(-1), + wantErr: errors.New("MaxDataFiles must be positive"), + }, } for _, tt := range tests { @@ -148,6 +163,7 @@ func TestNew_Params(t *testing.T) { // Verify the database was created with correct configuration require.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) require.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) + require.Equal(t, tt.config.MaxDataFiles, db.options.MaxDataFiles) require.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) require.Equal(t, tt.config.SyncToDisk, db.options.SyncToDisk) @@ -347,3 +363,42 @@ func TestFileCache_Eviction(t *testing.T) { require.Equal(t, blocks[i], block, "block data mismatch at height %d", i) } } + +func TestMaxDataFiles_CacheLimit(t *testing.T) { + // Test that the file cache respects the MaxDataFiles limit + // Create a small cache size to test eviction behavior + config := DefaultDatabaseConfig(). + WithMaxDataFiles(2). // Only allow 2 files in cache + WithMaxDataFileSize(1024) // Small file size to force multiple files + + store, cleanup := newTestDatabase(t, config) + defer cleanup() + + // Create blocks that will span multiple data files + // Each block is ~512 bytes, so 2 blocks per file + numBlocks := 6 // This will create 3 files, more than our cache limit of 2 + + evictionCount := 0 + store.fileCache.SetOnEvict(func(_ int, f *os.File) { + evictionCount++ + if f != nil { + f.Close() + } + }) + + // Write blocks to force multiple data files + for i := range numBlocks { + block := fixedSizeBlock(t, 512, uint64(i)) + require.NoError(t, store.WriteBlock(uint64(i), block, 0)) + } + + // Verify that evictions occurred due to cache limit + require.Positive(t, evictionCount, "should have had cache evictions due to MaxDataFiles limit") + + // Verify all blocks are still readable despite evictions + for i := range numBlocks { + block, err := store.ReadBlock(uint64(i)) + require.NoError(t, err, "failed to read block at height %d after eviction", i) + require.Equal(t, 512, len(block), "block size mismatch at height %d", i) + } +} diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index 6982f715f7ce..3a9f7ed9262c 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -47,6 +47,7 @@ func TestReadOperations(t *testing.T) { MinimumHeight: 20, MaxDataFileSize: DefaultMaxDataFileSize, CheckpointInterval: 1024, + MaxDataFiles: DefaultMaxDataFileSize, }, }, { @@ -64,6 +65,7 @@ func TestReadOperations(t *testing.T) { MinimumHeight: 10, MaxDataFileSize: DefaultMaxDataFileSize, CheckpointInterval: 1024, + MaxDataFiles: DefaultMaxDataFileSize, }, wantErr: ErrInvalidBlockHeight, }, From 978a2b5a27312dcabdb4461be0e0c27b28564da0 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 17:45:49 -0400 Subject: [PATCH 14/27] add more logging --- x/blockdb/README.md | 17 ++-- x/blockdb/database.go | 201 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 186 insertions(+), 32 deletions(-) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index ad737e1a9589..dafd627e18dd 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -133,7 +133,7 @@ db, err := blockdb.New( "/path/to/index", // Index directory "/path/to/data", // Data directory config, - logger, + logging.NoLog{}, ) if err != nil { fmt.Println("Error creating database:", err) @@ -181,10 +181,11 @@ if err != nil { ## TODO -- [ ] Compress data files to reduce storage size -- [ ] Split data across multiple files when `MaxDataFileSize` is reached -- [ ] Implement a block cache for recently accessed blocks -- [ ] Use a buffered pool to avoid allocations on reads and writes -- [ ] Add tests for core functionality -- [ ] Add performance benchmarks -- [ ] Consider supporting missing data files (currently we error if any data files are missing) +- Compress data files to reduce storage size +- ~~Split data across multiple files when `MaxDataFileSize` is reached~~ +- Implement a block cache for recently accessed blocks +- Use a buffered pool to avoid allocations on reads and writes +- ~~Add tests for core functionality~~ +- Add metrics collection +- Add performance benchmarks +- Consider supporting missing data files (currently we error if any data files are missing) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 1ba6ed5188e2..66d83f8558a0 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -227,21 +227,39 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* } }) + s.log.Info("Initializing BlockDB", + zap.String("indexDir", indexDir), + zap.String("dataDir", dataDir), + zap.Uint64("maxDataFileSize", config.MaxDataFileSize), + zap.Int("maxDataFiles", config.MaxDataFiles), + zap.Bool("truncate", config.Truncate), + ) + if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { + s.log.Error("Failed to initialize database: failed to initialize index", zap.Error(err)) return nil, err } if err := s.initializeDataFiles(dataDir, config.Truncate); err != nil { + s.log.Error("Failed to initialize database: failed to initialize data files", zap.Error(err)) s.closeFiles() return nil, err } if !config.Truncate { if err := s.recover(); err != nil { + s.log.Error("Failed to initialize database: recovery failed", zap.Error(err)) s.closeFiles() return nil, fmt.Errorf("recovery failed: %w", err) } } + + s.log.Info("BlockDB initialized successfully", + zap.Uint64("maxContiguousHeight", s.maxContiguousHeight.Load()), + zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), + zap.Uint64("nextWriteOffset", s.nextDataWriteOffset.Load()), + ) + return s, nil } @@ -264,7 +282,13 @@ func (s *Database) Close() error { s.closed = true err := s.persistIndexHeader() + if err != nil { + s.log.Error("Failed to close database: failed to persist index header", zap.Error(err)) + } + s.closeFiles() + + s.log.Info("Block database closed successfully") return err } @@ -274,34 +298,61 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl defer s.closeMu.RUnlock() if s.closed { + s.log.Error("Failed to write block: database is closed", + zap.Uint64("height", height), + ) return ErrDatabaseClosed } blockSize := len(block) if blockSize > math.MaxUint32 { + s.log.Error("Failed to write block: block size exceeds max size for uint32", + zap.Uint64("height", height), + zap.Int("blockSize", blockSize), + ) return fmt.Errorf("%w: block size cannot exceed %d bytes", ErrBlockTooLarge, math.MaxUint32) } blockDataLen := uint32(blockSize) if blockDataLen == 0 { + s.log.Error("Failed to write block: empty block", zap.Uint64("height", height)) return ErrBlockEmpty } if headerSize >= blockDataLen { + s.log.Error("Failed to write block: header size exceeds block size", + zap.Uint64("height", height), + zap.Uint32("headerSize", headerSize), + zap.Uint32("blockSize", blockDataLen), + ) return ErrHeaderSizeTooLarge } indexFileOffset, err := s.indexEntryOffset(height) if err != nil { + s.log.Error("Failed to write block: failed to calculate index entry offset", + zap.Uint64("height", height), + zap.Error(err), + ) return fmt.Errorf("failed to get index entry offset for block at height %d: %w", height, err) } sizeWithDataHeader, err := safemath.Add(sizeOfBlockEntryHeader, blockDataLen) if err != nil { + s.log.Error("Failed to write block: block size calculation overflow", + zap.Uint64("height", height), + zap.Uint32("blockSize", blockDataLen), + zap.Error(err), + ) return fmt.Errorf("calculating total block size would overflow for block at height %d: %w", height, err) } writeDataOffset, err := s.allocateBlockSpace(sizeWithDataHeader) if err != nil { + s.log.Error("Failed to write block: failed to allocate block space", + zap.Uint64("height", height), + zap.Uint32("totalSize", sizeWithDataHeader), + zap.Error(err), + ) return err } @@ -313,14 +364,40 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl Version: BlockEntryVersion, } if err := s.writeBlockAt(writeDataOffset, bh, block); err != nil { + s.log.Error("Failed to write block: error writing block data", + zap.Uint64("height", height), + zap.Uint64("dataOffset", writeDataOffset), + zap.Error(err), + ) return err } if err := s.writeIndexEntryAt(indexFileOffset, writeDataOffset, blockDataLen, headerSize); err != nil { + s.log.Error("Failed to write block: error writing index entry", + zap.Uint64("height", height), + zap.Uint64("indexOffset", indexFileOffset), + zap.Uint64("dataOffset", writeDataOffset), + zap.Error(err), + ) return err } - return s.updateBlockHeights(height) + if err := s.updateBlockHeights(height); err != nil { + s.log.Error("Failed to write block: error updating block heights", + zap.Uint64("height", height), + zap.Error(err), + ) + return err + } + + s.log.Debug("Block written successfully", + zap.Uint64("height", height), + zap.Uint32("blockSize", blockDataLen), + zap.Uint32("headerSize", headerSize), + zap.Uint64("dataOffset", writeDataOffset), + ) + + return nil } // readBlockIndex reads the index entry for the given height. @@ -328,24 +405,44 @@ func (s *Database) WriteBlock(height BlockHeight, block BlockData, headerSize Bl func (s *Database) readBlockIndex(height BlockHeight) (indexEntry, error) { var entry indexEntry if s.closed { + s.log.Error("Failed to read block index: database is closed", + zap.Uint64("height", height), + ) return entry, ErrDatabaseClosed } // Skip the index entry read if we know the block is past the max height. maxHeight := s.maxBlockHeight.Load() if maxHeight == unsetHeight || height > maxHeight { + reason := "height beyond max" + if maxHeight == unsetHeight { + reason = "no blocks written yet" + } + s.log.Debug("Block not found", + zap.Uint64("height", height), + zap.Uint64("maxHeight", maxHeight), + zap.String("reason", reason), + ) return entry, ErrBlockNotFound } entry, err := s.readIndexEntry(height) if err != nil { + if errors.Is(err, ErrBlockNotFound) { + s.log.Debug("Block not found", + zap.Uint64("height", height), + zap.String("reason", "no index entry found"), + zap.Error(err), + ) + } else { + s.log.Error("Failed to read block index: failed to read index entry", + zap.Uint64("height", height), + zap.Error(err), + ) + } return entry, err } - if entry.IsEmpty() { - return entry, ErrBlockNotFound - } - return entry, nil } @@ -364,10 +461,21 @@ func (s *Database) ReadBlock(height BlockHeight) (BlockData, error) { blockData := make(BlockData, indexEntry.Size) dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) if err != nil { + s.log.Error("Failed to read block: failed to get data file", + zap.Uint64("height", height), + zap.Uint64("dataOffset", indexEntry.Offset), + zap.Error(err), + ) return nil, fmt.Errorf("failed to get data file for block at height %d: %w", height, err) } _, err = dataFile.ReadAt(blockData, int64(localOffset+uint64(sizeOfBlockEntryHeader))) if err != nil { + s.log.Error("Failed to read block: failed to read block data from file", + zap.Uint64("height", height), + zap.Uint64("localOffset", localOffset), + zap.Uint32("blockSize", indexEntry.Size), + zap.Error(err), + ) return nil, fmt.Errorf("failed to read block data from data file: %w", err) } @@ -392,6 +500,11 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { // Validate header size doesn't exceed total block size if indexEntry.HeaderSize > indexEntry.Size { + s.log.Error("Failed to read header: header size exceeds block size", + zap.Uint64("height", height), + zap.Uint32("headerSize", indexEntry.HeaderSize), + zap.Uint32("blockSize", indexEntry.Size), + ) return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) } @@ -399,10 +512,21 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { headerData := make([]byte, indexEntry.HeaderSize) dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) if err != nil { + s.log.Error("Failed to read header: failed to get data file", + zap.Uint64("height", height), + zap.Uint64("dataOffset", indexEntry.Offset), + zap.Error(err), + ) return nil, fmt.Errorf("failed to get data file for block header at height %d: %w", height, err) } _, err = dataFile.ReadAt(headerData, int64(localOffset+uint64(sizeOfBlockEntryHeader))) if err != nil { + s.log.Error("Failed to read header: failed to read header data from file", + zap.Uint64("height", height), + zap.Uint64("localOffset", localOffset), + zap.Uint32("headerSize", indexEntry.HeaderSize), + zap.Error(err), + ) return nil, fmt.Errorf("failed to read block header data from data file: %w", err) } @@ -424,19 +548,41 @@ func (s *Database) ReadBody(height BlockHeight) (BlockData, error) { bodyData := make([]byte, bodySize) dataFile, localOffset, err := s.getDataFileAndOffset(indexEntry.Offset) if err != nil { + s.log.Error("Failed to read body: failed to get data file", + zap.Uint64("height", height), + zap.Uint64("dataOffset", indexEntry.Offset), + zap.Error(err), + ) return nil, fmt.Errorf("failed to get data file for block body at height %d: %w", height, err) } headerOffset, err := safemath.Add(localOffset, uint64(sizeOfBlockEntryHeader)) if err != nil { + s.log.Error("Failed to read body: header offset calculation overflow", + zap.Uint64("height", height), + zap.Uint64("localOffset", localOffset), + zap.Error(err), + ) return nil, fmt.Errorf("calculating header offset would overflow for block at height %d: %w", height, err) } bodyOffset, err := safemath.Add(headerOffset, uint64(indexEntry.HeaderSize)) if err != nil { + s.log.Error("Failed to read body: body offset calculation overflow", + zap.Uint64("height", height), + zap.Uint64("headerOffset", headerOffset), + zap.Uint32("headerSize", indexEntry.HeaderSize), + zap.Error(err), + ) return nil, fmt.Errorf("calculating body offset would overflow for block at height %d: %w", height, err) } _, err = dataFile.ReadAt(bodyData, int64(bodyOffset)) if err != nil { + s.log.Error("Failed to read body: failed to read body data from file", + zap.Uint64("height", height), + zap.Uint64("bodyOffset", bodyOffset), + zap.Uint32("bodySize", bodySize), + zap.Error(err), + ) return nil, fmt.Errorf("failed to read block body data from data file: %w", err) } return bodyData, nil @@ -491,7 +637,7 @@ func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { // Return ErrBlockNotFound if trying to read past the end of the index file // for a block that has not been indexed yet. if errors.Is(err, io.EOF) { - return entry, ErrBlockNotFound + return entry, fmt.Errorf("%w: EOF reading index entry at offset %d for height %d", ErrBlockNotFound, offset, height) } return entry, fmt.Errorf("failed to read index entry at offset %d for height %d: %w", offset, height, err) } @@ -500,7 +646,7 @@ func (s *Database) readIndexEntry(height BlockHeight) (indexEntry, error) { } if entry.IsEmpty() { - return entry, ErrBlockNotFound + return entry, fmt.Errorf("%w: empty index entry for height %d", ErrBlockNotFound, height) } return entry, nil @@ -622,12 +768,11 @@ func (s *Database) recover() error { // The data on disk is ahead of the index. We need to recover un-indexed blocks. s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", zap.Uint64("headerNextWriteOffset", nextDataWriteOffset), - zap.Uint64("calculatedNextWriteOffset", calculatedNextDataWriteOffset), + zap.Uint64("actualDataNextWriteOffset", calculatedNextDataWriteOffset), ) // Start scan from where the index left off. currentScanOffset := nextDataWriteOffset - recoveredBlocksCount := 0 recoveredHeights := make([]BlockHeight, 0) for currentScanOffset < calculatedNextDataWriteOffset { bh, err := s.recoverBlockAtOffset(currentScanOffset, calculatedNextDataWriteOffset) @@ -648,10 +793,9 @@ func (s *Database) recover() error { } s.log.Debug("Recovery: Successfully validated and indexed block", zap.Uint64("height", bh.Height), - zap.Uint32("size", bh.Size), - zap.Uint64("offset", currentScanOffset), + zap.Uint32("blockSize", bh.Size), + zap.Uint64("dataOffset", currentScanOffset), ) - recoveredBlocksCount++ recoveredHeights = append(recoveredHeights, bh.Height) blockTotalSize, err := safemath.Add(uint64(sizeOfBlockEntryHeader), uint64(bh.Size)) if err != nil { @@ -676,7 +820,7 @@ func (s *Database) recover() error { } s.log.Info("Recovery: Scan finished", - zap.Int("recoveredBlocks", recoveredBlocksCount), + zap.Int("recoveredBlocks", len(recoveredHeights)), zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), @@ -767,7 +911,7 @@ func (s *Database) listDataFiles() (map[int]string, int, error) { var index int n, err := fmt.Sscanf(file.Name(), dataFileNameFormat, &index) if err != nil || n != 1 { - s.log.Debug("non-data file scanned in data directory", zap.String("file", file.Name()), zap.Error(err)) + s.log.Debug("non-data file found in data directory", zap.String("fileName", file.Name()), zap.Error(err)) continue } dataFiles[index] = filepath.Join(s.dataDir, file.Name()) @@ -906,9 +1050,20 @@ func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { filePath := s.dataFilePath(fileIndex) handle, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, defaultFilePermissions) if err != nil { + s.log.Error("Failed to open data file", + zap.Int("fileIndex", fileIndex), + zap.String("filePath", filePath), + zap.Error(err), + ) return nil, fmt.Errorf("failed to open data file %s: %w", filePath, err) } s.fileCache.Put(fileIndex, handle) + + s.log.Debug("Opened data file", + zap.Int("fileIndex", fileIndex), + zap.String("filePath", filePath), + ) + return handle, nil } @@ -960,7 +1115,10 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { for { nextHeightToVerify, err := safemath.Add(currentMax, 1) if err != nil { - s.log.Error("overflow in height calculation when updating max contiguous height") + s.log.Error("Failed to update block heights: overflow in height calculation", + zap.Uint64("currentMax", currentMax), + zap.Error(err), + ) break } // Check if we have indexed a block at the next height, which would extend our contiguous sequence @@ -972,8 +1130,7 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { } // log unexpected error - s.log.Error( - "error reading index entry when updating max contiguous height", + s.log.Error("Failed to update block heights: error reading index entry", zap.Uint64("height", nextHeightToVerify), zap.Error(err), ) @@ -1031,7 +1188,7 @@ func (s *Database) updateRecoveredBlockHeights(recoveredHeights []BlockHeight) e nextHeightToVerify = currentMaxContiguous + 1 } for { - entry, err := s.readIndexEntry(nextHeightToVerify) + _, err := s.readIndexEntry(nextHeightToVerify) if err != nil { // If no block exists at this height, we've reached the end of our contiguous sequence if errors.Is(err, ErrBlockNotFound) { @@ -1039,16 +1196,12 @@ func (s *Database) updateRecoveredBlockHeights(recoveredHeights []BlockHeight) e } // Log unexpected error but continue - s.log.Error( - "error reading index entry when updating max contiguous height during recovery", - zap.Uint64("height", currentMaxContiguous), + s.log.Error("Failed to update recovered block heights: error reading index entry", + zap.Uint64("height", nextHeightToVerify), zap.Error(err), ) return err } - if entry.IsEmpty() { - break - } nextHeightToVerify++ } s.maxContiguousHeight.Store(nextHeightToVerify - 1) From 9329e0a823344bbbe323771366beee46a0b31d1c Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 18:36:52 -0400 Subject: [PATCH 15/27] move data and index dir to config and rename config --- x/blockdb/README.md | 31 ++++++---- x/blockdb/config.go | 34 ++++++++++- x/blockdb/database.go | 64 +++++++++----------- x/blockdb/database_test.go | 114 +++++++++++++++-------------------- x/blockdb/datasplit_test.go | 14 +++-- x/blockdb/helpers_test.go | 7 +-- x/blockdb/readblock_test.go | 95 ++++++++++++++++++++--------- x/blockdb/recovery_test.go | 11 ++-- x/blockdb/writeblock_test.go | 14 ++--- 9 files changed, 216 insertions(+), 168 deletions(-) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index dafd627e18dd..9a72bef8ec0a 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -126,15 +126,14 @@ On startup, BlockDB checks for signs of an unclean shutdown by comparing the dat ### Creating a Database ```go -import "github.com/ava-labs/avalanchego/x/blockdb" - -config := blockdb.DefaultDatabaseConfig() -db, err := blockdb.New( - "/path/to/index", // Index directory - "/path/to/data", // Data directory - config, - logging.NoLog{}, +import ( + "errors" + "github.com/ava-labs/avalanchego/x/blockdb" ) + +config := blockdb.DefaultConfig(). + WithDir("/path/to/blockdb") +db, err := blockdb.New(config, logging.NoLog{}) if err != nil { fmt.Println("Error creating database:", err) return @@ -158,22 +157,30 @@ if err != nil { // Read a block blockData, err := db.ReadBlock(height) if err != nil { + if errors.Is(err, blockdb.ErrBlockNotFound) { + fmt.Println("Block doesn't exist at this height") + return + } fmt.Println("Error reading block:", err) return } -if blockData == nil { - // Block doesn't exist at this height - return -} // Read block components separately headerData, err := db.ReadHeader(height) if err != nil { + if errors.Is(err, blockdb.ErrBlockNotFound) { + fmt.Println("Block doesn't exist at this height") + return + } fmt.Println("Error reading header:", err) return } bodyData, err := db.ReadBody(height) if err != nil { + if errors.Is(err, blockdb.ErrBlockNotFound) { + fmt.Println("Block doesn't exist at this height") + return + } fmt.Println("Error reading body:", err) return } diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 28940cd8ed7d..c5465bd32e34 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -13,6 +13,12 @@ const DefaultMaxDataFiles = 10 // DatabaseConfig contains configuration parameters for BlockDB. type DatabaseConfig struct { + // IndexDir is the directory where the index file is stored. + IndexDir string + + // DataDir is the directory where the data files are stored. + DataDir string + // MinimumHeight is the lowest block height tracked by the database. MinimumHeight uint64 @@ -32,9 +38,11 @@ type DatabaseConfig struct { Truncate bool } -// DefaultDatabaseConfig returns the default options for BlockDB. -func DefaultDatabaseConfig() DatabaseConfig { +// DefaultConfig returns the default options for BlockDB. +func DefaultConfig() DatabaseConfig { return DatabaseConfig{ + IndexDir: "", + DataDir: "", MinimumHeight: 0, MaxDataFileSize: DefaultMaxDataFileSize, MaxDataFiles: DefaultMaxDataFiles, @@ -44,6 +52,25 @@ func DefaultDatabaseConfig() DatabaseConfig { } } +// WithDir sets both IndexDir and DataDir to the given value. +func (c DatabaseConfig) WithDir(directory string) DatabaseConfig { + c.IndexDir = directory + c.DataDir = directory + return c +} + +// WithIndexDir returns a copy of the config with IndexDir set to the given value. +func (c DatabaseConfig) WithIndexDir(indexDir string) DatabaseConfig { + c.IndexDir = indexDir + return c +} + +// WithDataDir returns a copy of the config with DataDir set to the given value. +func (c DatabaseConfig) WithDataDir(dataDir string) DatabaseConfig { + c.DataDir = dataDir + return c +} + // WithSyncToDisk returns a copy of the config with SyncToDisk set to the given value. func (c DatabaseConfig) WithSyncToDisk(syncToDisk bool) DatabaseConfig { c.SyncToDisk = syncToDisk @@ -82,6 +109,9 @@ func (c DatabaseConfig) WithCheckpointInterval(interval uint64) DatabaseConfig { // Validate checks if the store options are valid. func (c DatabaseConfig) Validate() error { + if c.IndexDir == "" || c.DataDir == "" { + return errors.New("both IndexDir and DataDir must be provided") + } if c.CheckpointInterval == 0 { return errors.New("CheckpointInterval cannot be 0") } diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 66d83f8558a0..7544d3c9b8aa 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -174,8 +174,7 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { // Database stores blockchain blocks on disk and provides methods to read, and write blocks. type Database struct { indexFile *os.File - dataDir string - options DatabaseConfig + config DatabaseConfig header indexFileHeader log logging.Logger closed bool @@ -198,15 +197,9 @@ type Database struct { // New creates a block database. // Parameters: -// - indexDir: Directory for the index file -// - dataDir: Directory for the data file(s) // - config: Configuration parameters // - log: Logger instance for structured logging -func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (*Database, error) { - if indexDir == "" || dataDir == "" { - return nil, errors.New("both indexDir and dataDir must be provided") - } - +func New(config DatabaseConfig, log logging.Logger) (*Database, error) { if err := config.Validate(); err != nil { return nil, err } @@ -217,7 +210,7 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* } s := &Database{ - options: config, + config: config, log: databaseLog, fileCache: lru.NewCache[int, *os.File](config.MaxDataFiles), } @@ -228,19 +221,19 @@ func New(indexDir, dataDir string, config DatabaseConfig, log logging.Logger) (* }) s.log.Info("Initializing BlockDB", - zap.String("indexDir", indexDir), - zap.String("dataDir", dataDir), + zap.String("indexDir", config.IndexDir), + zap.String("dataDir", config.DataDir), zap.Uint64("maxDataFileSize", config.MaxDataFileSize), zap.Int("maxDataFiles", config.MaxDataFiles), zap.Bool("truncate", config.Truncate), ) - if err := s.openAndInitializeIndex(indexDir, config.Truncate); err != nil { + if err := s.openAndInitializeIndex(); err != nil { s.log.Error("Failed to initialize database: failed to initialize index", zap.Error(err)) return nil, err } - if err := s.initializeDataFiles(dataDir, config.Truncate); err != nil { + if err := s.initializeDataFiles(); err != nil { s.log.Error("Failed to initialize database: failed to initialize data files", zap.Error(err)) s.closeFiles() return nil, err @@ -674,7 +667,7 @@ func (s *Database) persistIndexHeader() error { // The index file must be fsync'd before the header is written to prevent // a state where the header is persisted but the index entries it refers to // are not. This could lead to data inconsistency on recovery. - if s.options.SyncToDisk { + if s.config.SyncToDisk { if err := s.indexFile.Sync(); err != nil { return fmt.Errorf("failed to sync index file before writing header state: %w", err) } @@ -897,9 +890,9 @@ func (s *Database) recoverBlockAtOffset(offset, totalDataSize uint64) (blockEntr } func (s *Database) listDataFiles() (map[int]string, int, error) { - files, err := os.ReadDir(s.dataDir) + files, err := os.ReadDir(s.config.DataDir) if err != nil { - return nil, -1, fmt.Errorf("failed to read data directory %s: %w", s.dataDir, err) + return nil, -1, fmt.Errorf("failed to read data directory %s: %w", s.config.DataDir, err) } dataFiles := make(map[int]string) @@ -914,7 +907,7 @@ func (s *Database) listDataFiles() (map[int]string, int, error) { s.log.Debug("non-data file found in data directory", zap.String("fileName", file.Name()), zap.Error(err)) continue } - dataFiles[index] = filepath.Join(s.dataDir, file.Name()) + dataFiles[index] = filepath.Join(s.config.DataDir, file.Name()) if index > maxIndex { maxIndex = index } @@ -923,13 +916,13 @@ func (s *Database) listDataFiles() (map[int]string, int, error) { return dataFiles, maxIndex, nil } -func (s *Database) openAndInitializeIndex(indexDir string, truncate bool) error { - indexPath := filepath.Join(indexDir, indexFileName) - if err := os.MkdirAll(indexDir, 0o755); err != nil { - return fmt.Errorf("failed to create index directory %s: %w", indexDir, err) +func (s *Database) openAndInitializeIndex() error { + indexPath := filepath.Join(s.config.IndexDir, indexFileName) + if err := os.MkdirAll(s.config.IndexDir, 0o755); err != nil { + return fmt.Errorf("failed to create index directory %s: %w", s.config.IndexDir, err) } openFlags := os.O_RDWR | os.O_CREATE - if truncate { + if s.config.Truncate { openFlags |= os.O_TRUNC } var err error @@ -937,16 +930,15 @@ func (s *Database) openAndInitializeIndex(indexDir string, truncate bool) error if err != nil { return fmt.Errorf("failed to open index file %s: %w", indexPath, err) } - return s.loadOrInitializeHeader(truncate) + return s.loadOrInitializeHeader() } -func (s *Database) initializeDataFiles(dataDir string, truncate bool) error { - s.dataDir = dataDir - if err := os.MkdirAll(dataDir, 0o755); err != nil { - return fmt.Errorf("failed to create data directory %s: %w", dataDir, err) +func (s *Database) initializeDataFiles() error { + if err := os.MkdirAll(s.config.DataDir, 0o755); err != nil { + return fmt.Errorf("failed to create data directory %s: %w", s.config.DataDir, err) } - if truncate { + if s.config.Truncate { dataFiles, _, err := s.listDataFiles() if err != nil { return fmt.Errorf("failed to list data files for truncation: %w", err) @@ -969,18 +961,18 @@ func (s *Database) initializeDataFiles(dataDir string, truncate bool) error { return nil } -func (s *Database) loadOrInitializeHeader(truncate bool) error { +func (s *Database) loadOrInitializeHeader() error { fileInfo, err := s.indexFile.Stat() if err != nil { return fmt.Errorf("failed to get index file stats: %w", err) } // reset index file if its empty or we are truncating - if truncate || fileInfo.Size() == 0 { + if s.config.Truncate || fileInfo.Size() == 0 { s.header = indexFileHeader{ Version: IndexFileVersion, - MinHeight: s.options.MinimumHeight, - MaxDataFileSize: s.options.MaxDataFileSize, + MinHeight: s.config.MinimumHeight, + MaxDataFileSize: s.config.MaxDataFileSize, MaxHeight: unsetHeight, MaxContiguousHeight: unsetHeight, NextWriteOffset: 0, @@ -1030,7 +1022,7 @@ func (s *Database) closeFiles() { } func (s *Database) dataFilePath(index int) string { - return filepath.Join(s.dataDir, fmt.Sprintf(dataFileNameFormat, index)) + return filepath.Join(s.config.DataDir, fmt.Sprintf(dataFileNameFormat, index)) } func (s *Database) getOrOpenDataFile(fileIndex int) (*os.File, error) { @@ -1094,7 +1086,7 @@ func (s *Database) writeBlockAt(offset uint64, bh blockEntryHeader, block BlockD return fmt.Errorf("failed to write block to data file at offset %d: %w", offset, err) } - if s.options.SyncToDisk { + if s.config.SyncToDisk { if err := dataFile.Sync(); err != nil { return fmt.Errorf("failed to sync data file after writing block %d: %w", bh.Height, err) } @@ -1151,7 +1143,7 @@ func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { break } if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { - if writtenBlockHeight%s.options.CheckpointInterval == 0 { + if writtenBlockHeight%s.config.CheckpointInterval == 0 { if err := s.persistIndexHeader(); err != nil { return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) } diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 73ed499f5fe2..4d1f19bf060a 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -20,10 +20,8 @@ import ( func TestNew_Truncate(t *testing.T) { // Create initial database tempDir := t.TempDir() - indexDir := filepath.Join(tempDir, "index") - dataDir := filepath.Join(tempDir, "data") - config := DefaultDatabaseConfig().WithTruncate(true) - db, err := New(indexDir, dataDir, config, logging.NoLog{}) + config := DefaultConfig().WithDir(tempDir).WithTruncate(true) + db, err := New(config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -33,7 +31,7 @@ func TestNew_Truncate(t *testing.T) { require.NoError(t, db.Close()) // Reopen with truncate=true and verify data is gone - db2, err := New(indexDir, dataDir, config, logging.NoLog{}) + db2, err := New(config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() @@ -45,10 +43,8 @@ func TestNew_Truncate(t *testing.T) { func TestNew_NoTruncate(t *testing.T) { tempDir := t.TempDir() - indexDir := filepath.Join(tempDir, "index") - dataDir := filepath.Join(tempDir, "data") - config := DefaultDatabaseConfig().WithTruncate(true) - db, err := New(indexDir, dataDir, config, logging.NoLog{}) + config := DefaultConfig().WithDir(tempDir).WithTruncate(true) + db, err := New(config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -61,8 +57,8 @@ func TestNew_NoTruncate(t *testing.T) { require.NoError(t, db.Close()) // Reopen with truncate=false and verify data is still there - config = DefaultDatabaseConfig().WithTruncate(false) - db2, err := New(indexDir, dataDir, config, logging.NoLog{}) + config = DefaultConfig().WithDir(tempDir).WithTruncate(false) + db2, err := New(config, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() @@ -82,75 +78,61 @@ func TestNew_Params(t *testing.T) { tempDir := t.TempDir() tests := []struct { name string - indexDir string - dataDir string config DatabaseConfig wantErr error expectClose bool }{ { - name: "default config", - indexDir: tempDir, - dataDir: tempDir, - config: DefaultDatabaseConfig(), + name: "default config", + config: DefaultConfig().WithDir(tempDir), }, { - name: "custom config", - indexDir: tempDir, - dataDir: tempDir, - config: DefaultDatabaseConfig(). + name: "custom config", + config: DefaultConfig().WithDir(tempDir). WithMinimumHeight(100). WithMaxDataFileSize(1024 * 1024). // 1MB WithMaxDataFiles(50). WithCheckpointInterval(512), }, { - name: "empty index directory", - indexDir: "", - dataDir: tempDir, - config: DefaultDatabaseConfig(), - wantErr: errors.New("both indexDir and dataDir must be provided"), + name: "empty index directory", + config: DefaultConfig().WithDataDir(tempDir), + wantErr: errors.New("both IndexDir and DataDir must be provided"), }, { - name: "empty data directory", - indexDir: tempDir, - dataDir: "", - config: DefaultDatabaseConfig(), - wantErr: errors.New("both indexDir and dataDir must be provided"), + name: "empty data directory", + config: DefaultConfig().WithDataDir(tempDir), + wantErr: errors.New("both IndexDir and DataDir must be provided"), }, { - name: "both directories empty", - indexDir: "", - config: DefaultDatabaseConfig(), - dataDir: "", - wantErr: errors.New("both indexDir and dataDir must be provided"), + name: "both directories empty", + config: DefaultConfig(), + wantErr: errors.New("both IndexDir and DataDir must be provided"), }, { - name: "invalid config - zero checkpoint interval", - indexDir: tempDir, - dataDir: tempDir, - config: DefaultDatabaseConfig().WithCheckpointInterval(0), - wantErr: errors.New("CheckpointInterval cannot be 0"), + name: "different index and data directories", + config: DefaultConfig().WithIndexDir(filepath.Join(tempDir, "index")).WithDataDir(filepath.Join(tempDir, "data")), }, { - name: "invalid config - zero max data files", - indexDir: tempDir, - dataDir: tempDir, - config: DefaultDatabaseConfig().WithMaxDataFiles(0), - wantErr: errors.New("MaxDataFiles must be positive"), + name: "invalid config - zero checkpoint interval", + config: DefaultConfig().WithDir(tempDir).WithCheckpointInterval(0), + wantErr: errors.New("CheckpointInterval cannot be 0"), }, { - name: "invalid config - negative max data files", - indexDir: tempDir, - dataDir: tempDir, - config: DefaultDatabaseConfig().WithMaxDataFiles(-1), - wantErr: errors.New("MaxDataFiles must be positive"), + name: "invalid config - zero max data files", + config: DefaultConfig().WithDir(tempDir).WithMaxDataFiles(0), + wantErr: errors.New("MaxDataFiles must be positive"), + }, + { + name: "invalid config - negative max data files", + config: DefaultConfig().WithDir(tempDir).WithMaxDataFiles(-1), + wantErr: errors.New("MaxDataFiles must be positive"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - db, err := New(tt.indexDir, tt.dataDir, tt.config, nil) + db, err := New(tt.config, nil) if tt.wantErr != nil { require.Equal(t, tt.wantErr.Error(), err.Error()) @@ -161,13 +143,12 @@ func TestNew_Params(t *testing.T) { require.NotNil(t, db) // Verify the database was created with correct configuration - require.Equal(t, tt.config.MinimumHeight, db.options.MinimumHeight) - require.Equal(t, tt.config.MaxDataFileSize, db.options.MaxDataFileSize) - require.Equal(t, tt.config.MaxDataFiles, db.options.MaxDataFiles) - require.Equal(t, tt.config.CheckpointInterval, db.options.CheckpointInterval) - require.Equal(t, tt.config.SyncToDisk, db.options.SyncToDisk) - - indexPath := filepath.Join(tt.indexDir, indexFileName) + require.Equal(t, tt.config.MinimumHeight, db.config.MinimumHeight) + require.Equal(t, tt.config.MaxDataFileSize, db.config.MaxDataFileSize) + require.Equal(t, tt.config.MaxDataFiles, db.config.MaxDataFiles) + require.Equal(t, tt.config.CheckpointInterval, db.config.CheckpointInterval) + require.Equal(t, tt.config.SyncToDisk, db.config.SyncToDisk) + indexPath := filepath.Join(tt.config.IndexDir, indexFileName) require.FileExists(t, indexPath) // Test that we can close the database @@ -239,7 +220,8 @@ func TestNew_IndexFileErrors(t *testing.T) { t.Skip("Setup failed, skipping test") } - _, err := New(indexDir, dataDir, DefaultDatabaseConfig(), logging.NoLog{}) + config := DefaultConfig().WithIndexDir(indexDir).WithDataDir(dataDir) + _, err := New(config, logging.NoLog{}) require.Contains(t, err.Error(), tt.wantErrMsg) }) } @@ -260,9 +242,9 @@ func TestIndexEntrySizePowerOfTwo(t *testing.T) { func TestNew_IndexFileConfigPrecedence(t *testing.T) { // set up db - initialConfig := DefaultDatabaseConfig().WithMinimumHeight(100).WithMaxDataFileSize(1024 * 1024) tempDir := t.TempDir() - db, err := New(tempDir, tempDir, initialConfig, logging.NoLog{}) + initialConfig := DefaultConfig().WithDir(tempDir).WithMinimumHeight(100).WithMaxDataFileSize(1024 * 1024) + db, err := New(initialConfig, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db) @@ -275,8 +257,8 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { require.NoError(t, db.Close()) // Reopen with different config that has minimum height of 200 and smaller max data file size - differentConfig := DefaultDatabaseConfig().WithMinimumHeight(200).WithMaxDataFileSize(512 * 1024) - db2, err := New(tempDir, tempDir, differentConfig, logging.NoLog{}) + differentConfig := DefaultConfig().WithDir(tempDir).WithMinimumHeight(200).WithMaxDataFileSize(512 * 1024) + db2, err := New(differentConfig, logging.NoLog{}) require.NoError(t, err) require.NotNil(t, db2) defer db2.Close() @@ -304,7 +286,7 @@ func TestNew_IndexFileConfigPrecedence(t *testing.T) { func TestFileCache_Eviction(t *testing.T) { // Create a database with a small max data file size to force multiple files // each file should have enough for 2 blocks (0.5kb * 2) - config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 1.5) + config := DefaultConfig().WithMaxDataFileSize(1024 * 1.5) store, cleanup := newTestDatabase(t, config) defer cleanup() @@ -367,7 +349,7 @@ func TestFileCache_Eviction(t *testing.T) { func TestMaxDataFiles_CacheLimit(t *testing.T) { // Test that the file cache respects the MaxDataFiles limit // Create a small cache size to test eviction behavior - config := DefaultDatabaseConfig(). + config := DefaultConfig(). WithMaxDataFiles(2). // Only allow 2 files in cache WithMaxDataFileSize(1024) // Small file size to force multiple files diff --git a/x/blockdb/datasplit_test.go b/x/blockdb/datasplit_test.go index cca920b4de1e..442dff0a255f 100644 --- a/x/blockdb/datasplit_test.go +++ b/x/blockdb/datasplit_test.go @@ -14,7 +14,7 @@ import ( func TestDataSplitting(t *testing.T) { // Each data file should have enough space for 2 blocks - config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 2.5) + config := DefaultConfig().WithMaxDataFileSize(1024 * 2.5) store, cleanup := newTestDatabase(t, config) defer cleanup() @@ -27,7 +27,7 @@ func TestDataSplitting(t *testing.T) { } // Verify that multiple data files were created. - files, err := os.ReadDir(store.dataDir) + files, err := os.ReadDir(store.config.DataDir) require.NoError(t, err) var dataFileCount int for _, file := range files { @@ -49,7 +49,8 @@ func TestDataSplitting(t *testing.T) { // reopen and verify all blocks are readable require.NoError(t, store.Close()) - store, err = New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) + config = config.WithDataDir(store.config.DataDir).WithIndexDir(store.config.IndexDir) + store, err = New(config, store.log) require.NoError(t, err) defer store.Close() for i := range numBlocks { @@ -60,7 +61,7 @@ func TestDataSplitting(t *testing.T) { } func TestDataSplitting_DeletedFile(t *testing.T) { - config := DefaultDatabaseConfig().WithMaxDataFileSize(1024 * 2.5) + config := DefaultConfig().WithMaxDataFileSize(1024 * 2.5) store, cleanup := newTestDatabase(t, config) defer cleanup() @@ -74,11 +75,12 @@ func TestDataSplitting_DeletedFile(t *testing.T) { store.Close() // Delete the first data file (blockdb_0.dat) - firstDataFilePath := filepath.Join(store.dataDir, fmt.Sprintf(dataFileNameFormat, 0)) + firstDataFilePath := filepath.Join(store.config.DataDir, fmt.Sprintf(dataFileNameFormat, 0)) require.NoError(t, os.Remove(firstDataFilePath)) // reopen and verify the blocks require.NoError(t, store.Close()) - _, err := New(filepath.Dir(store.indexFile.Name()), store.dataDir, config, store.log) + config = config.WithIndexDir(store.config.IndexDir).WithDataDir(store.config.DataDir) + _, err := New(config, store.log) require.ErrorIs(t, err, ErrCorrupted) } diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index 8595d27ae66d..14e3c094fcef 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -7,7 +7,6 @@ import ( "crypto/rand" "fmt" "math/big" - "path/filepath" "testing" "github.com/stretchr/testify/require" @@ -18,10 +17,8 @@ import ( func newTestDatabase(t *testing.T, opts DatabaseConfig) (*Database, func()) { t.Helper() dir := t.TempDir() - idxDir := filepath.Join(dir, "idx") - dataDir := filepath.Join(dir, "dat") - - db, err := New(idxDir, dataDir, opts, logging.NoLog{}) + config := opts.WithDir(dir) + db, err := New(config, logging.NoLog{}) require.NoError(t, err, "failed to create database") cleanup := func() { diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index 3a9f7ed9262c..c218716e82e1 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -15,12 +15,16 @@ import ( func TestReadOperations(t *testing.T) { tests := []struct { - name string - readHeight uint64 - noBlock bool - config *DatabaseConfig - setup func(db *Database) - wantErr error + name string + readHeight uint64 + noBlock bool + config *DatabaseConfig + setup func(db *Database) + wantErr error + expectedBlock []byte + expectedHeader []byte + expectedBody []byte + skipSeed bool }{ { name: "read first block", @@ -79,33 +83,61 @@ func TestReadOperations(t *testing.T) { readHeight: math.MaxUint64, wantErr: ErrBlockNotFound, }, + { + name: "read block with no header (headerSize=0)", + readHeight: 100, + setup: func(db *Database) { + // Write a block with no header + blockData := []byte("this is all body data") + require.NoError(t, db.WriteBlock(100, blockData, 0)) + }, + expectedBlock: []byte("this is all body data"), + expectedHeader: nil, + expectedBody: []byte("this is all body data"), + skipSeed: true, + }, + { + name: "read block with minimal body (headerSize=total size-1)", + readHeight: 101, + setup: func(db *Database) { + // Write a block where header is almost the entire block + blockData := []byte("this is all header data!") + require.NoError(t, db.WriteBlock(101, blockData, BlockHeaderSize(len(blockData)-1))) + }, + expectedBlock: []byte("this is all header data!"), + expectedHeader: []byte("this is all header data"), + expectedBody: []byte("!"), + skipSeed: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { config := tt.config if config == nil { - defaultConfig := DefaultDatabaseConfig() + defaultConfig := DefaultConfig() config = &defaultConfig } store, cleanup := newTestDatabase(t, *config) defer cleanup() - // Seed database with blocks based on config + // Seed database with blocks based on config (unless skipSeed is true) seededBlocks := make(map[uint64][]byte) - minHeight := config.MinimumHeight - maxHeight := minHeight + 50 // Always write 51 blocks - gapHeight := minHeight + 40 // Gap at relative position 40 + if !tt.skipSeed { + minHeight := config.MinimumHeight + maxHeight := minHeight + 50 // Always write 51 blocks + gapHeight := minHeight + 40 // Gap at relative position 40 - for i := minHeight; i <= maxHeight; i++ { - if i == gapHeight { - continue // Create gap - } + for i := minHeight; i <= maxHeight; i++ { + if i == gapHeight { + continue // Create gap + } - block := randomBlock(t) - require.NoError(t, store.WriteBlock(i, block, BlockHeaderSize(i-minHeight))) - seededBlocks[i] = block + block := randomBlock(t) + require.NoError(t, store.WriteBlock(i, block, BlockHeaderSize(i-minHeight))) + seededBlocks[i] = block + } } if tt.setup != nil { @@ -135,22 +167,31 @@ func TestReadOperations(t *testing.T) { require.NoError(t, err) require.NotNil(t, readBlock) - expectedBlock := seededBlocks[tt.readHeight] - headerSize := BlockHeaderSize(tt.readHeight - config.MinimumHeight) - var expectHeader []byte - if headerSize > 0 { - expectHeader = expectedBlock[:headerSize] + + // Use custom expected values if provided, otherwise use seeded blocks + if tt.expectedBlock != nil { + require.Equal(t, tt.expectedBlock, readBlock) + require.Equal(t, tt.expectedHeader, readHeader) + require.Equal(t, tt.expectedBody, readBody) + } else { + // Standard test case logic using seeded blocks + expectedBlock := seededBlocks[tt.readHeight] + headerSize := BlockHeaderSize(tt.readHeight - config.MinimumHeight) + var expectHeader []byte + if headerSize > 0 { + expectHeader = expectedBlock[:headerSize] + } + require.Equal(t, expectedBlock, readBlock) + require.Equal(t, expectHeader, readHeader) + require.Equal(t, expectedBlock[headerSize:], readBody) } - require.Equal(t, expectedBlock, readBlock) - require.Equal(t, expectHeader, readHeader) - require.Equal(t, expectedBlock[headerSize:], readBody) } }) } } func TestReadOperations_Concurrency(t *testing.T) { - store, cleanup := newTestDatabase(t, DefaultDatabaseConfig()) + store, cleanup := newTestDatabase(t, DefaultConfig()) defer cleanup() // Pre-generate blocks and write them diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index 9fb0b617c953..67886a7c779a 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -6,7 +6,6 @@ package blockdb import ( "math" "os" - "path/filepath" "testing" "github.com/stretchr/testify/require" @@ -15,7 +14,7 @@ import ( func TestRecovery_Success(t *testing.T) { // Create database with 10KB file size and 4KB blocks // This means each file will have 2 blocks (4KB + 24 bytes header = ~4KB per block) - config := DefaultDatabaseConfig().WithMaxDataFileSize(10 * 1024) // 10KB per file + config := DefaultConfig().WithMaxDataFileSize(10 * 1024) // 10KB per file tests := []struct { name string @@ -105,8 +104,7 @@ func TestRecovery_Success(t *testing.T) { require.NoError(t, tt.corruptIndex(indexPath)) // Reopen the database and test recovery - indexDir := filepath.Join(indexPath, "..") - recoveredStore, err := New(indexDir, store.dataDir, config, store.log) + recoveredStore, err := New(config.WithIndexDir(store.config.IndexDir).WithDataDir(store.config.DataDir), store.log) require.NoError(t, err) defer recoveredStore.Close() @@ -354,7 +352,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - config := DefaultDatabaseConfig() + config := DefaultConfig() if tt.minHeight > 0 { config = config.WithMinimumHeight(tt.minHeight) } @@ -381,8 +379,7 @@ func TestRecovery_CorruptionDetection(t *testing.T) { require.NoError(t, tt.setupCorruption(store, blocks)) // Try to reopen the database - it should detect corruption - indexDir := filepath.Dir(store.indexFile.Name()) - _, err := New(indexDir, store.dataDir, config, store.log) + _, err := New(config.WithIndexDir(store.config.IndexDir).WithDataDir(store.config.DataDir), store.log) require.ErrorIs(t, err, tt.wantErr) require.Contains(t, err.Error(), tt.wantErrText, "error message should contain expected text") }) diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 16ed9649ead1..54fab86a1b93 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -15,7 +15,7 @@ import ( ) func TestWriteBlock_Basic(t *testing.T) { - customConfig := DefaultDatabaseConfig().WithMinimumHeight(10) + customConfig := DefaultConfig().WithMinimumHeight(10) tests := []struct { name string @@ -130,7 +130,7 @@ func TestWriteBlock_Basic(t *testing.T) { t.Run(tt.name, func(t *testing.T) { config := tt.config if config.CheckpointInterval == 0 { - config = DefaultDatabaseConfig() + config = DefaultConfig() } store, cleanup := newTestDatabase(t, config) @@ -190,7 +190,7 @@ func TestWriteBlock_Basic(t *testing.T) { } func TestWriteBlock_Concurrency(t *testing.T) { - store, cleanup := newTestDatabase(t, DefaultDatabaseConfig()) + store, cleanup := newTestDatabase(t, DefaultConfig()) defer cleanup() var wg sync.WaitGroup @@ -281,7 +281,7 @@ func TestWriteBlock_Errors(t *testing.T) { name: "height below custom minimum", height: 5, block: randomBlock(t), - config: DefaultDatabaseConfig().WithMinimumHeight(10), + config: DefaultConfig().WithMinimumHeight(10), headerSize: 0, wantErr: ErrInvalidBlockHeight, }, @@ -306,7 +306,7 @@ func TestWriteBlock_Errors(t *testing.T) { name: "exceed max data file size", height: 0, block: make([]byte, 999), // Block + header will exceed 1024 limit (999 + 26 = 1025 > 1024) - config: DefaultDatabaseConfig().WithMaxDataFileSize(1024), + config: DefaultConfig().WithMaxDataFileSize(1024), headerSize: 0, wantErr: ErrBlockTooLarge, }, @@ -314,7 +314,7 @@ func TestWriteBlock_Errors(t *testing.T) { name: "data file offset overflow", height: 0, block: make([]byte, 100), - config: DefaultDatabaseConfig(), + config: DefaultConfig(), setup: func(db *Database) { // Set the next write offset to near max to trigger overflow db.nextDataWriteOffset.Store(math.MaxUint64 - 50) @@ -328,7 +328,7 @@ func TestWriteBlock_Errors(t *testing.T) { t.Run(tt.name, func(t *testing.T) { config := tt.config if config.CheckpointInterval == 0 { - config = DefaultDatabaseConfig() + config = DefaultConfig() } store, cleanup := newTestDatabase(t, config) From e45c2e6bfb757f58f5b5b5fe3aecd4725bac3a60 Mon Sep 17 00:00:00 2001 From: Draco Date: Sun, 6 Jul 2025 20:03:29 -0400 Subject: [PATCH 16/27] fix lint --- x/blockdb/database.go | 6 ++++-- x/blockdb/database_test.go | 2 +- x/blockdb/errors.go | 7 ++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 7544d3c9b8aa..66bc867cf8b7 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -15,11 +15,13 @@ import ( "sync" "sync/atomic" + "github.com/cespare/xxhash/v2" + "go.uber.org/zap" + "github.com/ava-labs/avalanchego/cache/lru" "github.com/ava-labs/avalanchego/utils/logging" + safemath "github.com/ava-labs/avalanchego/utils/math" - "github.com/cespare/xxhash/v2" - "go.uber.org/zap" ) const ( diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 4d1f19bf060a..786c22ab94d8 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -381,6 +381,6 @@ func TestMaxDataFiles_CacheLimit(t *testing.T) { for i := range numBlocks { block, err := store.ReadBlock(uint64(i)) require.NoError(t, err, "failed to read block at height %d after eviction", i) - require.Equal(t, 512, len(block), "block size mismatch at height %d", i) + require.Len(t, block, 512, "block size mismatch at height %d", i) } } diff --git a/x/blockdb/errors.go b/x/blockdb/errors.go index eea1d7a11632..51563578c8ca 100644 --- a/x/blockdb/errors.go +++ b/x/blockdb/errors.go @@ -3,10 +3,7 @@ package blockdb -import ( - "errors" - "fmt" -) +import "errors" var ( ErrInvalidBlockHeight = errors.New("blockdb: invalid block height") @@ -14,6 +11,6 @@ var ( ErrDatabaseClosed = errors.New("blockdb: database is closed") ErrCorrupted = errors.New("blockdb: unrecoverable corruption detected") ErrHeaderSizeTooLarge = errors.New("blockdb: header size cannot be >= block size") - ErrBlockTooLarge = fmt.Errorf("blockdb: block size too large") + ErrBlockTooLarge = errors.New("blockdb: block size too large") ErrBlockNotFound = errors.New("blockdb: block not found") ) From 7f2fb8273be03867c9d54640c2772eb76cf47f8d Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 7 Jul 2025 10:51:49 -0400 Subject: [PATCH 17/27] fix struct alignment and add tests --- x/blockdb/README.md | 15 ++++---- x/blockdb/config.go | 3 ++ x/blockdb/database.go | 22 +++++------ x/blockdb/database_test.go | 79 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 18 deletions(-) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index 9a72bef8ec0a..eaa1b89badb2 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -47,7 +47,7 @@ BlockDB uses a single index file and multiple data files. The index file maps bl The index file consists of a fixed-size header followed by fixed-size entries: ``` -Index File Header (80 bytes): +Index File Header (64 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ @@ -57,7 +57,7 @@ Index File Header (80 bytes): │ Max Contiguous Height │ 8 bytes │ │ Max Block Height │ 8 bytes │ │ Next Write Offset │ 8 bytes │ -│ Reserved │ 32 bytes│ +│ Reserved │ 16 bytes│ └────────────────────────────────┴─────────┘ Index Entry (16 bytes): @@ -72,17 +72,18 @@ Index Entry (16 bytes): #### Data File Structure -Each block in the data file is stored with a header followed by the raw block data: +Each block in the data file is stored with a block entry header followed by the raw block data: ``` -Block Header (24 bytes): +Block Entry Header (26 bytes): ┌────────────────────────────────┬─────────┐ │ Field │ Size │ ├────────────────────────────────┼─────────┤ │ Height │ 8 bytes │ -│ Checksum │ 8 bytes │ │ Size │ 4 bytes │ +│ Checksum │ 8 bytes │ │ Header Size │ 4 bytes │ +│ Version │ 2 bytes │ └────────────────────────────────┴─────────┘ ``` @@ -116,7 +117,7 @@ On startup, BlockDB checks for signs of an unclean shutdown by comparing the dat 1. Starts scanning from where the index left off (`NextWriteOffset`) 2. For each unindexed block found: - - Validates the block header and checksum + - Validates the block entry header and checksum - Writes the corresponding index entry 3. Calculates the max contiguous height and max block height 4. Updates the index header with the updated max contiguous height, max block height, and next write offset @@ -193,6 +194,6 @@ if err != nil { - Implement a block cache for recently accessed blocks - Use a buffered pool to avoid allocations on reads and writes - ~~Add tests for core functionality~~ -- Add metrics collection +- Add metrics - Add performance benchmarks - Consider supporting missing data files (currently we error if any data files are missing) diff --git a/x/blockdb/config.go b/x/blockdb/config.go index c5465bd32e34..a9e70d0e2814 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -118,5 +118,8 @@ func (c DatabaseConfig) Validate() error { if c.MaxDataFiles <= 0 { return errors.New("MaxDataFiles must be positive") } + if c.MaxDataFileSize == 0 { + return errors.New("MaxDataFileSize must be positive") + } return nil } diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 66bc867cf8b7..52a13b47b12d 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -34,7 +34,7 @@ const ( unsetHeight = math.MaxUint64 // IndexFileVersion is the version of the index file format. - IndexFileVersion uint16 = 1 + IndexFileVersion uint64 = 1 // BlockEntryVersion is the version of the block entry. BlockEntryVersion uint16 = 1 @@ -66,8 +66,8 @@ var ( // This is not the header portion of the block data itself. type blockEntryHeader struct { Height BlockHeight - Checksum uint64 Size uint32 + Checksum uint64 HeaderSize BlockHeaderSize Version uint16 } @@ -76,8 +76,8 @@ type blockEntryHeader struct { func (beh blockEntryHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfBlockEntryHeader) binary.LittleEndian.PutUint64(buf[0:], beh.Height) - binary.LittleEndian.PutUint64(buf[8:], beh.Checksum) - binary.LittleEndian.PutUint32(buf[16:], beh.Size) + binary.LittleEndian.PutUint32(buf[8:], beh.Size) + binary.LittleEndian.PutUint64(buf[12:], beh.Checksum) binary.LittleEndian.PutUint32(buf[20:], beh.HeaderSize) binary.LittleEndian.PutUint16(buf[24:], beh.Version) return buf, nil @@ -89,8 +89,8 @@ func (beh *blockEntryHeader) UnmarshalBinary(data []byte) error { return fmt.Errorf("%w: incorrect data length to unmarshal blockEntryHeader: got %d bytes, need exactly %d", ErrCorrupted, len(data), sizeOfBlockEntryHeader) } beh.Height = binary.LittleEndian.Uint64(data[0:]) - beh.Checksum = binary.LittleEndian.Uint64(data[8:]) - beh.Size = binary.LittleEndian.Uint32(data[16:]) + beh.Size = binary.LittleEndian.Uint32(data[8:]) + beh.Checksum = binary.LittleEndian.Uint64(data[12:]) beh.HeaderSize = binary.LittleEndian.Uint32(data[20:]) beh.Version = binary.LittleEndian.Uint16(data[24:]) return nil @@ -134,20 +134,20 @@ func (e *indexEntry) UnmarshalBinary(data []byte) error { // indexFileHeader is the header of the index file. type indexFileHeader struct { - Version uint16 + Version uint64 MaxDataFileSize uint64 MinHeight BlockHeight MaxContiguousHeight BlockHeight MaxHeight BlockHeight NextWriteOffset uint64 - // reserve 38 bytes for future use - Reserved [38]byte + // reserve 16 bytes for future use + Reserved [16]byte } // MarshalBinary implements encoding.BinaryMarshaler for indexFileHeader. func (h indexFileHeader) MarshalBinary() ([]byte, error) { buf := make([]byte, sizeOfIndexFileHeader) - binary.LittleEndian.PutUint16(buf[0:], h.Version) + binary.LittleEndian.PutUint64(buf[0:], h.Version) binary.LittleEndian.PutUint64(buf[8:], h.MaxDataFileSize) binary.LittleEndian.PutUint64(buf[16:], h.MinHeight) binary.LittleEndian.PutUint64(buf[24:], h.MaxContiguousHeight) @@ -164,7 +164,7 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { ErrCorrupted, len(data), sizeOfIndexFileHeader, ) } - h.Version = binary.LittleEndian.Uint16(data[0:]) + h.Version = binary.LittleEndian.Uint64(data[0:]) h.MaxDataFileSize = binary.LittleEndian.Uint64(data[8:]) h.MinHeight = binary.LittleEndian.Uint64(data[16:]) h.MaxContiguousHeight = binary.LittleEndian.Uint64(data[24:]) diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 786c22ab94d8..ac7e58500ae0 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -4,12 +4,15 @@ package blockdb import ( + "encoding" + "encoding/binary" "errors" "os" "path/filepath" "sync" "sync/atomic" "testing" + "unsafe" "github.com/stretchr/testify/require" @@ -384,3 +387,79 @@ func TestMaxDataFiles_CacheLimit(t *testing.T) { require.Len(t, block, 512, "block size mismatch at height %d", i) } } + +// TestStructSizes verifies that our critical data structures have the expected sizes +func TestStructSizes(t *testing.T) { + tests := []struct { + name string + memorySize uintptr + binarySize int + expectedMemorySize uintptr + expectedBinarySize int + expectedMarshalSize int + expectedPadding uintptr + createInstance func() interface{} + }{ + { + name: "indexFileHeader", + memorySize: unsafe.Sizeof(indexFileHeader{}), + binarySize: binary.Size(indexFileHeader{}), + expectedMemorySize: 64, + expectedBinarySize: 64, + expectedMarshalSize: 64, + expectedPadding: 0, + createInstance: func() interface{} { return indexFileHeader{} }, + }, + { + name: "blockEntryHeader", + memorySize: unsafe.Sizeof(blockEntryHeader{}), + binarySize: binary.Size(blockEntryHeader{}), + expectedMemorySize: 32, // 6 bytes padding due to version field being 2 bytes + expectedBinarySize: 26, + expectedMarshalSize: 26, + expectedPadding: 6, + createInstance: func() interface{} { return blockEntryHeader{} }, + }, + { + name: "indexEntry", + memorySize: unsafe.Sizeof(indexEntry{}), + binarySize: binary.Size(indexEntry{}), + expectedMemorySize: 16, + expectedBinarySize: 16, + expectedMarshalSize: 16, + expectedPadding: 0, + createInstance: func() interface{} { + return indexEntry{} + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actualMemorySize := tt.memorySize + require.Equal(t, tt.expectedMemorySize, actualMemorySize, + "%s has unexpected memory size: got %d bytes, expected %d bytes", + tt.name, actualMemorySize, tt.expectedMemorySize) + + binarySize := tt.binarySize + require.Equal(t, tt.expectedBinarySize, binarySize, + "%s binary size should be compact: got %d bytes, expected %d bytes", + tt.name, binarySize, tt.expectedBinarySize) + + instance := tt.createInstance() + var data []byte + var err error + + data, err = instance.(encoding.BinaryMarshaler).MarshalBinary() + require.NoError(t, err, "%s MarshalBinary should not fail", tt.name) + require.Equal(t, tt.expectedMarshalSize, len(data), + "%s MarshalBinary should produce exactly %d bytes, got %d bytes", + tt.name, tt.expectedMarshalSize, len(data)) + + padding := actualMemorySize - uintptr(binarySize) + require.Equal(t, tt.expectedPadding, padding, + "%s should have %d bytes of padding: memory=%d, binary=%d", + tt.name, tt.expectedPadding, actualMemorySize, binarySize) + }) + } +} From d9fa8439690d30cb8a009d58d4bcd75b0f8bccda Mon Sep 17 00:00:00 2001 From: Draco Date: Wed, 9 Jul 2025 14:30:25 -0400 Subject: [PATCH 18/27] fix: separate errors for directories --- x/blockdb/config.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/x/blockdb/config.go b/x/blockdb/config.go index a9e70d0e2814..18ac3bd2f6fa 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -109,8 +109,11 @@ func (c DatabaseConfig) WithCheckpointInterval(interval uint64) DatabaseConfig { // Validate checks if the store options are valid. func (c DatabaseConfig) Validate() error { - if c.IndexDir == "" || c.DataDir == "" { - return errors.New("both IndexDir and DataDir must be provided") + if c.IndexDir == "" { + return errors.New("IndexDir must be provided") + } + if c.DataDir == "" { + return errors.New("DataDir must be provided") } if c.CheckpointInterval == 0 { return errors.New("CheckpointInterval cannot be 0") From 5a523409e14f4700e0fa766d868467daa2baea8f Mon Sep 17 00:00:00 2001 From: Draco Date: Wed, 9 Jul 2025 19:48:23 -0400 Subject: [PATCH 19/27] consistent block height tracking --- x/blockdb/database.go | 214 +++++++++++++++++++++-------------- x/blockdb/database_test.go | 11 +- x/blockdb/helpers_test.go | 9 +- x/blockdb/recovery_test.go | 91 +++++++++++++-- x/blockdb/writeblock_test.go | 8 ++ 5 files changed, 234 insertions(+), 99 deletions(-) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 52a13b47b12d..af9f897b5d50 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -173,6 +173,13 @@ func (h *indexFileHeader) UnmarshalBinary(data []byte) error { return nil } +type blockHeights struct { + // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. + maxBlockHeight BlockHeight + // maxContiguousHeight tracks the highest block height known to be contiguously stored. + maxContiguousHeight BlockHeight +} + // Database stores blockchain blocks on disk and provides methods to read, and write blocks. type Database struct { indexFile *os.File @@ -189,12 +196,10 @@ type Database struct { // fileOpenMu prevents race conditions when multiple threads try to open the same data file fileOpenMu sync.Mutex - // maxBlockHeight tracks the highest block height that has been written to the db, even if there are gaps in the sequence. - maxBlockHeight atomic.Uint64 + // blockHeights holds the max block height and max contiguous height + blockHeights atomic.Pointer[blockHeights] // nextDataWriteOffset tracks the next position to write new data in the data file. nextDataWriteOffset atomic.Uint64 - // maxContiguousHeight tracks the highest block height known to be contiguously stored. - maxContiguousHeight atomic.Uint64 } // New creates a block database. @@ -249,10 +254,11 @@ func New(config DatabaseConfig, log logging.Logger) (*Database, error) { } } + heights := s.getBlockHeights() s.log.Info("BlockDB initialized successfully", - zap.Uint64("maxContiguousHeight", s.maxContiguousHeight.Load()), - zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), zap.Uint64("nextWriteOffset", s.nextDataWriteOffset.Load()), + zap.Uint64("maxContiguousHeight", heights.maxContiguousHeight), + zap.Uint64("maxBlockHeight", heights.maxBlockHeight), ) return s, nil @@ -260,10 +266,29 @@ func New(config DatabaseConfig, log logging.Logger) (*Database, error) { // MaxContiguousHeight returns the highest block height known to be contiguously stored. func (s *Database) MaxContiguousHeight() (height BlockHeight, found bool) { - if s.maxContiguousHeight.Load() == unsetHeight { + heights := s.getBlockHeights() + if heights.maxContiguousHeight == unsetHeight { return 0, false } - return s.maxContiguousHeight.Load(), true + return heights.maxContiguousHeight, true +} + +func (s *Database) setBlockHeights(maxBlock, maxContiguous BlockHeight) { + heights := &blockHeights{ + maxBlockHeight: maxBlock, + maxContiguousHeight: maxContiguous, + } + s.blockHeights.Store(heights) +} + +func (s *Database) updateBlockHeightsAtomically(updateFn func(*blockHeights) *blockHeights) { + for { + current := s.getBlockHeights() + updated := updateFn(current) + if s.blockHeights.CompareAndSwap(current, updated) { + break + } + } } // Close flushes pending writes and closes the store files. @@ -407,18 +432,21 @@ func (s *Database) readBlockIndex(height BlockHeight) (indexEntry, error) { } // Skip the index entry read if we know the block is past the max height. - maxHeight := s.maxBlockHeight.Load() - if maxHeight == unsetHeight || height > maxHeight { - reason := "height beyond max" - if maxHeight == unsetHeight { - reason = "no blocks written yet" - } + heights := s.getBlockHeights() + if heights.maxBlockHeight == unsetHeight { + s.log.Debug("Block not found", + zap.Uint64("height", height), + zap.String("reason", "no blocks written yet"), + ) + return entry, fmt.Errorf("%w: no blocks written yet", ErrBlockNotFound) + } + if height > heights.maxBlockHeight { s.log.Debug("Block not found", zap.Uint64("height", height), - zap.Uint64("maxHeight", maxHeight), - zap.String("reason", reason), + zap.Uint64("maxHeight", heights.maxBlockHeight), + zap.String("reason", "height beyond max"), ) - return entry, ErrBlockNotFound + return entry, fmt.Errorf("%w: height %d is beyond max height %d", ErrBlockNotFound, height, heights.maxBlockHeight) } entry, err := s.readIndexEntry(height) @@ -678,16 +706,10 @@ func (s *Database) persistIndexHeader() error { header := s.header // Update the header with the current state of the database. - // Note: These atomic reads may occur at different times, potentially creating - // inconsistency where MaxContiguousHeight or MaxBlockHeight are higher than - // what NextWriteOffset indicates. This is safe because recovery will: - // 1. Use NextWriteOffset to determine where to start scanning - // 2. Re-index any unindexed blocks found beyond that point - // 3. Call updateBlockHeights() for each recovered block, which properly - // updates both MaxContiguousHeight and MaxBlockHeight atomically header.NextWriteOffset = s.nextDataWriteOffset.Load() - header.MaxContiguousHeight = s.maxContiguousHeight.Load() - header.MaxHeight = s.maxBlockHeight.Load() + heights := s.getBlockHeights() + header.MaxContiguousHeight = heights.maxContiguousHeight + header.MaxHeight = heights.maxBlockHeight headerBytes, err := header.MarshalBinary() if err != nil { return fmt.Errorf("failed to serialize header for writing state: %w", err) @@ -702,6 +724,17 @@ func (s *Database) persistIndexHeader() error { return nil } +func (s *Database) getBlockHeights() *blockHeights { + heights := s.blockHeights.Load() + if heights == nil { + return &blockHeights{ + maxBlockHeight: unsetHeight, + maxContiguousHeight: unsetHeight, + } + } + return heights +} + // recover detects and recovers unindexed blocks by scanning data files and updating the index. // It compares the actual data file sizes on disk with the indexed data size to detect // blocks that were written but not properly indexed. @@ -814,11 +847,12 @@ func (s *Database) recover() error { return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) } + heights := s.getBlockHeights() s.log.Info("Recovery: Scan finished", zap.Int("recoveredBlocks", len(recoveredHeights)), zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), - zap.Uint64("maxContiguousBlockHeight", s.maxContiguousHeight.Load()), - zap.Uint64("maxBlockHeight", s.maxBlockHeight.Load()), + zap.Uint64("maxContiguousBlockHeight", heights.maxContiguousHeight), + zap.Uint64("maxBlockHeight", heights.maxBlockHeight), ) } return nil @@ -979,8 +1013,7 @@ func (s *Database) loadOrInitializeHeader() error { MaxContiguousHeight: unsetHeight, NextWriteOffset: 0, } - s.maxContiguousHeight.Store(unsetHeight) - s.maxBlockHeight.Store(unsetHeight) + s.setBlockHeights(unsetHeight, unsetHeight) headerBytes, err := s.header.MarshalBinary() if err != nil { @@ -1008,8 +1041,7 @@ func (s *Database) loadOrInitializeHeader() error { return fmt.Errorf("mismatched index file version: found %d, expected %d", s.header.Version, IndexFileVersion) } s.nextDataWriteOffset.Store(s.header.NextWriteOffset) - s.maxContiguousHeight.Store(s.header.MaxContiguousHeight) - s.maxBlockHeight.Store(s.header.MaxHeight) + s.setBlockHeights(s.header.MaxHeight, s.header.MaxContiguousHeight) return nil } @@ -1097,62 +1129,77 @@ func (s *Database) writeBlockAt(offset uint64, bh blockEntryHeader, block BlockD } func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { - prevContiguousCandidate := uint64(unsetHeight) - if writtenBlockHeight > s.header.MinHeight { - prevContiguousCandidate = writtenBlockHeight - 1 - } - - if s.maxContiguousHeight.CompareAndSwap(prevContiguousCandidate, writtenBlockHeight) { - // We successfully updated the max contiguous height. Now try to extend it further - // by checking if the next height is also available, which would repair gaps in the sequence. - currentMax := writtenBlockHeight - for { - nextHeightToVerify, err := safemath.Add(currentMax, 1) - if err != nil { - s.log.Error("Failed to update block heights: overflow in height calculation", - zap.Uint64("currentMax", currentMax), - zap.Error(err), - ) - break - } - // Check if we have indexed a block at the next height, which would extend our contiguous sequence - _, err = s.readIndexEntry(nextHeightToVerify) - if err != nil { - // If no block exists at this height, we've reached the end of our contiguous sequence - if errors.Is(err, ErrBlockNotFound) { - break - } - - // log unexpected error - s.log.Error("Failed to update block heights: error reading index entry", - zap.Uint64("height", nextHeightToVerify), - zap.Error(err), - ) - break + s.updateBlockHeightsAtomically(func(current *blockHeights) *blockHeights { + if current == nil { + heights := &blockHeights{ + maxBlockHeight: writtenBlockHeight, + maxContiguousHeight: unsetHeight, } - if !s.maxContiguousHeight.CompareAndSwap(currentMax, nextHeightToVerify) { - break // Someone else updated + if writtenBlockHeight == s.header.MinHeight { + heights.maxContiguousHeight = writtenBlockHeight } - currentMax = nextHeightToVerify + return heights } - } - // update max block height and persist header on checkpoint interval - var oldMaxHeight BlockHeight - for { - oldMaxHeight = s.maxBlockHeight.Load() - if writtenBlockHeight <= oldMaxHeight && oldMaxHeight != unsetHeight { - break + updated := &blockHeights{ + maxBlockHeight: current.maxBlockHeight, + maxContiguousHeight: current.maxContiguousHeight, + } + + // Update max block height if needed + if writtenBlockHeight > current.maxBlockHeight || current.maxBlockHeight == unsetHeight { + updated.maxBlockHeight = writtenBlockHeight } - if s.maxBlockHeight.CompareAndSwap(oldMaxHeight, writtenBlockHeight) { - if writtenBlockHeight%s.config.CheckpointInterval == 0 { - if err := s.persistIndexHeader(); err != nil { - return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) + + // Update max contiguous height logic + prevContiguousCandidate := uint64(unsetHeight) + if writtenBlockHeight > s.header.MinHeight { + prevContiguousCandidate = writtenBlockHeight - 1 + } + + if current.maxContiguousHeight == prevContiguousCandidate { + // We can extend the contiguous sequence. Try to extend it further + // by checking if the next height is also available, which would repair gaps in the sequence. + currentMax := writtenBlockHeight + for { + nextHeightToVerify, err := safemath.Add(currentMax, 1) + if err != nil { + s.log.Error("Failed to update block heights: overflow in height calculation", + zap.Uint64("currentMax", currentMax), + zap.Error(err), + ) + break + } + // Check if we have indexed a block at the next height, which would extend our contiguous sequence + _, err = s.readIndexEntry(nextHeightToVerify) + if err != nil { + // If no block exists at this height, we've reached the end of our contiguous sequence + if errors.Is(err, ErrBlockNotFound) { + break + } + + // log unexpected error + s.log.Error("Failed to update block heights: error reading index entry", + zap.Uint64("height", nextHeightToVerify), + zap.Error(err), + ) + break } + currentMax = nextHeightToVerify } - break + updated.maxContiguousHeight = currentMax + } + + return updated + }) + + // Check if we need to persist header on checkpoint interval + if writtenBlockHeight%s.config.CheckpointInterval == 0 { + if err := s.persistIndexHeader(); err != nil { + return fmt.Errorf("block %d written, but checkpoint failed: %w", writtenBlockHeight, err) } } + return nil } @@ -1170,13 +1217,14 @@ func (s *Database) updateRecoveredBlockHeights(recoveredHeights []BlockHeight) e } // Update max block height (no CAS needed since we're single-threaded during recovery) - currentMaxHeight := s.maxBlockHeight.Load() + currentHeights := s.getBlockHeights() + currentMaxHeight := currentHeights.maxBlockHeight if maxRecoveredHeight > currentMaxHeight || currentMaxHeight == unsetHeight { - s.maxBlockHeight.Store(maxRecoveredHeight) + currentMaxHeight = maxRecoveredHeight } // Update max contiguous height by extending from current max contiguous height - currentMaxContiguous := s.maxContiguousHeight.Load() + currentMaxContiguous := currentHeights.maxContiguousHeight nextHeightToVerify := s.header.MinHeight if currentMaxContiguous != unsetHeight { nextHeightToVerify = currentMaxContiguous + 1 @@ -1198,7 +1246,7 @@ func (s *Database) updateRecoveredBlockHeights(recoveredHeights []BlockHeight) e } nextHeightToVerify++ } - s.maxContiguousHeight.Store(nextHeightToVerify - 1) + s.setBlockHeights(currentMaxHeight, nextHeightToVerify-1) return nil } diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index ac7e58500ae0..0b19cff0d576 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -100,17 +100,12 @@ func TestNew_Params(t *testing.T) { { name: "empty index directory", config: DefaultConfig().WithDataDir(tempDir), - wantErr: errors.New("both IndexDir and DataDir must be provided"), + wantErr: errors.New("IndexDir must be provided"), }, { name: "empty data directory", - config: DefaultConfig().WithDataDir(tempDir), - wantErr: errors.New("both IndexDir and DataDir must be provided"), - }, - { - name: "both directories empty", - config: DefaultConfig(), - wantErr: errors.New("both IndexDir and DataDir must be provided"), + config: DefaultConfig().WithIndexDir(tempDir), + wantErr: errors.New("DataDir must be provided"), }, { name: "different index and data directories", diff --git a/x/blockdb/helpers_test.go b/x/blockdb/helpers_test.go index 14e3c094fcef..93979b55682d 100644 --- a/x/blockdb/helpers_test.go +++ b/x/blockdb/helpers_test.go @@ -52,11 +52,18 @@ func fixedSizeBlock(t *testing.T, size int, height uint64) []byte { } func checkDatabaseState(t *testing.T, db *Database, maxHeight uint64, maxContiguousHeight uint64) { - require.Equal(t, maxHeight, db.maxBlockHeight.Load(), "maxBlockHeight mismatch") + heights := db.blockHeights.Load() + if heights != nil { + require.Equal(t, maxHeight, heights.maxBlockHeight, "maxBlockHeight mismatch") + } else { + require.Equal(t, uint64(unsetHeight), maxHeight, "maxBlockHeight mismatch") + } gotMCH, ok := db.MaxContiguousHeight() if maxContiguousHeight != unsetHeight { require.True(t, ok, "MaxContiguousHeight is not set, want %d", maxContiguousHeight) require.Equal(t, maxContiguousHeight, gotMCH, "maxContiguousHeight mismatch") + } else { + require.False(t, ok) } } diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index 67886a7c779a..74a2543ac22c 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -25,7 +25,7 @@ func TestRecovery_Success(t *testing.T) { corruptIndex: os.Remove, }, { - name: "recovery from truncated index file", + name: "recovery from truncated index file that only indexed the first block", corruptIndex: func(indexPath string) error { // Remove the existing index file if err := os.Remove(indexPath); err != nil { @@ -46,7 +46,7 @@ func TestRecovery_Success(t *testing.T) { header := indexFileHeader{ Version: IndexFileVersion, - MaxDataFileSize: 10 * 1024, // 10KB per file + MaxDataFileSize: 4 * 10 * 1024, // 10KB per file MinHeight: 0, MaxContiguousHeight: 0, MaxHeight: 0, @@ -80,13 +80,90 @@ func TestRecovery_Success(t *testing.T) { return nil }, }, + { + name: "recovery from index file that is behind by one block", + corruptIndex: func(indexPath string) error { + // Read the current index file to get the header + indexFile, err := os.OpenFile(indexPath, os.O_RDWR, 0) + if err != nil { + return err + } + defer indexFile.Close() + + // Read the current header + headerBuf := make([]byte, sizeOfIndexFileHeader) + _, err = indexFile.ReadAt(headerBuf, 0) + if err != nil { + return err + } + + // Parse the header + var header indexFileHeader + err = header.UnmarshalBinary(headerBuf) + if err != nil { + return err + } + + // Corrupt the header by setting the NextWriteOffset to be one block behind + blockSize := uint64(sizeOfBlockEntryHeader) + 4*1024 + header.NextWriteOffset = header.NextWriteOffset - blockSize + header.MaxContiguousHeight = 3 + header.MaxHeight = 8 + + // Write the corrupted header back + corruptedHeaderBytes, err := header.MarshalBinary() + if err != nil { + return err + } + _, err = indexFile.WriteAt(corruptedHeaderBytes, 0) + return err + }, + }, + { + name: "recovery from inconsistent index header (offset lagging behind heights)", + corruptIndex: func(indexPath string) error { + // Read the current index file to get the header + indexFile, err := os.OpenFile(indexPath, os.O_RDWR, 0) + if err != nil { + return err + } + defer indexFile.Close() + + // Read the current header + headerBuf := make([]byte, sizeOfIndexFileHeader) + _, err = indexFile.ReadAt(headerBuf, 0) + if err != nil { + return err + } + + // Parse the header + var header indexFileHeader + err = header.UnmarshalBinary(headerBuf) + if err != nil { + return err + } + + // Calculate the offset after the 5th block (assuming 4KB blocks) + // 2 files, 10KB each, 4KB block size + blockSize := uint64(sizeOfBlockEntryHeader) + 4*1024 + header.NextWriteOffset = 10*1024*2 + blockSize + + // Write the corrupted header back + corruptedHeaderBytes, err := header.MarshalBinary() + if err != nil { + return err + } + _, err = indexFile.WriteAt(corruptedHeaderBytes, 0) + return err + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - store, cleanup := newTestDatabase(t, config) - defer cleanup() - blockHeights := []uint64{0, 1, 3, 5, 2, 8} + store, _ := newTestDatabase(t, config) + + blockHeights := []uint64{0, 1, 3, 6, 2, 8, 4} blocks := make(map[uint64][]byte) for _, height := range blockHeights { @@ -96,7 +173,7 @@ func TestRecovery_Success(t *testing.T) { require.NoError(t, store.WriteBlock(height, block, 0)) blocks[height] = block } - checkDatabaseState(t, store, 8, 3) + checkDatabaseState(t, store, 8, 4) require.NoError(t, store.Close()) // Corrupt the index file according to the test case @@ -114,7 +191,7 @@ func TestRecovery_Success(t *testing.T) { require.NoError(t, err) require.Equal(t, blocks[height], readBlock, "block %d should be the same", height) } - checkDatabaseState(t, recoveredStore, 8, 3) + checkDatabaseState(t, recoveredStore, 8, 4) }) } } diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 54fab86a1b93..3ce220543b9b 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -124,6 +124,14 @@ func TestWriteBlock_Basic(t *testing.T) { expectedMCH: 4, expectedMaxHeight: 4, }, + { + name: "complicated gaps", + blockHeights: []uint64{ + 10, 3, 2, 9, 35, 34, 30, 1, 9, 88, 83, 4, 43, 5, 0, + }, + expectedMCH: 5, + expectedMaxHeight: 88, + }, } for _, tt := range tests { From 2d6d85fdc86c834e79c516dfed4deb31c658f6f1 Mon Sep 17 00:00:00 2001 From: Draco Date: Thu, 10 Jul 2025 12:29:37 -0400 Subject: [PATCH 20/27] remove truncate config --- x/blockdb/config.go | 10 ------- x/blockdb/database.go | 30 ++++--------------- x/blockdb/database_test.go | 59 +------------------------------------- x/blockdb/recovery_test.go | 2 +- 4 files changed, 8 insertions(+), 93 deletions(-) diff --git a/x/blockdb/config.go b/x/blockdb/config.go index 18ac3bd2f6fa..15bd1bed729d 100644 --- a/x/blockdb/config.go +++ b/x/blockdb/config.go @@ -33,9 +33,6 @@ type DatabaseConfig struct { // SyncToDisk determines if fsync is called after each write for durability. SyncToDisk bool - - // Truncate determines if existing data should be truncated when opening the database. - Truncate bool } // DefaultConfig returns the default options for BlockDB. @@ -48,7 +45,6 @@ func DefaultConfig() DatabaseConfig { MaxDataFiles: DefaultMaxDataFiles, CheckpointInterval: 1024, SyncToDisk: true, - Truncate: false, } } @@ -77,12 +73,6 @@ func (c DatabaseConfig) WithSyncToDisk(syncToDisk bool) DatabaseConfig { return c } -// WithTruncate returns a copy of the config with Truncate set to the given value. -func (c DatabaseConfig) WithTruncate(truncate bool) DatabaseConfig { - c.Truncate = truncate - return c -} - // WithMinimumHeight returns a copy of the config with MinimumHeight set to the given value. func (c DatabaseConfig) WithMinimumHeight(minHeight uint64) DatabaseConfig { c.MinimumHeight = minHeight diff --git a/x/blockdb/database.go b/x/blockdb/database.go index af9f897b5d50..399e110d4821 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -232,7 +232,6 @@ func New(config DatabaseConfig, log logging.Logger) (*Database, error) { zap.String("dataDir", config.DataDir), zap.Uint64("maxDataFileSize", config.MaxDataFileSize), zap.Int("maxDataFiles", config.MaxDataFiles), - zap.Bool("truncate", config.Truncate), ) if err := s.openAndInitializeIndex(); err != nil { @@ -246,12 +245,10 @@ func New(config DatabaseConfig, log logging.Logger) (*Database, error) { return nil, err } - if !config.Truncate { - if err := s.recover(); err != nil { - s.log.Error("Failed to initialize database: recovery failed", zap.Error(err)) - s.closeFiles() - return nil, fmt.Errorf("recovery failed: %w", err) - } + if err := s.recover(); err != nil { + s.log.Error("Failed to initialize database: recovery failed", zap.Error(err)) + s.closeFiles() + return nil, fmt.Errorf("recovery failed: %w", err) } heights := s.getBlockHeights() @@ -958,9 +955,6 @@ func (s *Database) openAndInitializeIndex() error { return fmt.Errorf("failed to create index directory %s: %w", s.config.IndexDir, err) } openFlags := os.O_RDWR | os.O_CREATE - if s.config.Truncate { - openFlags |= os.O_TRUNC - } var err error s.indexFile, err = os.OpenFile(indexPath, openFlags, defaultFilePermissions) if err != nil { @@ -974,18 +968,6 @@ func (s *Database) initializeDataFiles() error { return fmt.Errorf("failed to create data directory %s: %w", s.config.DataDir, err) } - if s.config.Truncate { - dataFiles, _, err := s.listDataFiles() - if err != nil { - return fmt.Errorf("failed to list data files for truncation: %w", err) - } - for _, filePath := range dataFiles { - if err := os.Remove(filePath); err != nil { - return fmt.Errorf("failed to remove old data file %s: %w", filePath, err) - } - } - } - // Pre-load the data file for the next write offset. nextOffset := s.nextDataWriteOffset.Load() if nextOffset > 0 { @@ -1003,8 +985,8 @@ func (s *Database) loadOrInitializeHeader() error { return fmt.Errorf("failed to get index file stats: %w", err) } - // reset index file if its empty or we are truncating - if s.config.Truncate || fileInfo.Size() == 0 { + // reset index file if its empty + if fileInfo.Size() == 0 { s.header = indexFileHeader{ Version: IndexFileVersion, MinHeight: s.config.MinimumHeight, diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index 0b19cff0d576..c5c1215ac189 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -20,63 +20,6 @@ import ( "github.com/ava-labs/avalanchego/utils/logging" ) -func TestNew_Truncate(t *testing.T) { - // Create initial database - tempDir := t.TempDir() - config := DefaultConfig().WithDir(tempDir).WithTruncate(true) - db, err := New(config, logging.NoLog{}) - require.NoError(t, err) - require.NotNil(t, db) - - // Write some test data and close the database - testBlock := []byte("test block data") - require.NoError(t, db.WriteBlock(0, testBlock, 0)) - require.NoError(t, db.Close()) - - // Reopen with truncate=true and verify data is gone - db2, err := New(config, logging.NoLog{}) - require.NoError(t, err) - require.NotNil(t, db2) - defer db2.Close() - _, err = db2.ReadBlock(1) - require.ErrorIs(t, err, ErrBlockNotFound) - _, found := db2.MaxContiguousHeight() - require.False(t, found) -} - -func TestNew_NoTruncate(t *testing.T) { - tempDir := t.TempDir() - config := DefaultConfig().WithDir(tempDir).WithTruncate(true) - db, err := New(config, logging.NoLog{}) - require.NoError(t, err) - require.NotNil(t, db) - - // Write some test data and close the database - testBlock := []byte("test block data") - require.NoError(t, db.WriteBlock(1, testBlock, 5)) - readBlock, err := db.ReadBlock(1) - require.NoError(t, err) - require.Equal(t, testBlock, readBlock) - require.NoError(t, db.Close()) - - // Reopen with truncate=false and verify data is still there - config = DefaultConfig().WithDir(tempDir).WithTruncate(false) - db2, err := New(config, logging.NoLog{}) - require.NoError(t, err) - require.NotNil(t, db2) - defer db2.Close() - readBlock1, err := db2.ReadBlock(1) - require.NoError(t, err) - require.Equal(t, testBlock, readBlock1) - - // Verify we can write additional data - testBlock2 := []byte("test block data 3") - require.NoError(t, db2.WriteBlock(2, testBlock2, 0)) - readBlock2, err := db2.ReadBlock(2) - require.NoError(t, err) - require.Equal(t, testBlock2, readBlock2) -} - func TestNew_Params(t *testing.T) { tempDir := t.TempDir() tests := []struct { @@ -447,7 +390,7 @@ func TestStructSizes(t *testing.T) { data, err = instance.(encoding.BinaryMarshaler).MarshalBinary() require.NoError(t, err, "%s MarshalBinary should not fail", tt.name) - require.Equal(t, tt.expectedMarshalSize, len(data), + require.Len(t, data, tt.expectedMarshalSize, "%s MarshalBinary should produce exactly %d bytes, got %d bytes", tt.name, tt.expectedMarshalSize, len(data)) diff --git a/x/blockdb/recovery_test.go b/x/blockdb/recovery_test.go index 74a2543ac22c..cbe3210cc7f4 100644 --- a/x/blockdb/recovery_test.go +++ b/x/blockdb/recovery_test.go @@ -106,7 +106,7 @@ func TestRecovery_Success(t *testing.T) { // Corrupt the header by setting the NextWriteOffset to be one block behind blockSize := uint64(sizeOfBlockEntryHeader) + 4*1024 - header.NextWriteOffset = header.NextWriteOffset - blockSize + header.NextWriteOffset -= blockSize header.MaxContiguousHeight = 3 header.MaxHeight = 8 From b0c49386d40ff34fcb638e5fe6d5f9c302ba340d Mon Sep 17 00:00:00 2001 From: Draco Date: Thu, 10 Jul 2025 16:55:23 -0400 Subject: [PATCH 21/27] add additional tests --- x/blockdb/database.go | 130 +++++++++++++++++------------------ x/blockdb/readblock_test.go | 24 +++++++ x/blockdb/writeblock_test.go | 31 ++++++++- 3 files changed, 116 insertions(+), 69 deletions(-) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 399e110d4821..78fd751e9de7 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -525,7 +525,7 @@ func (s *Database) ReadHeader(height BlockHeight) (BlockData, error) { zap.Uint32("headerSize", indexEntry.HeaderSize), zap.Uint32("blockSize", indexEntry.Size), ) - return nil, fmt.Errorf("invalid header size %d exceeds block size %d", indexEntry.HeaderSize, indexEntry.Size) + return nil, fmt.Errorf("%w: invalid header size %d exceeds block size %d", ErrHeaderSizeTooLarge, indexEntry.HeaderSize, indexEntry.Size) } // Read only the header portion @@ -711,10 +711,6 @@ func (s *Database) persistIndexHeader() error { if err != nil { return fmt.Errorf("failed to serialize header for writing state: %w", err) } - if uint64(len(headerBytes)) != sizeOfIndexFileHeader { - return fmt.Errorf("internal error: serialized header state size %d, expected %d", len(headerBytes), sizeOfIndexFileHeader) - } - if _, err := s.indexFile.WriteAt(headerBytes, 0); err != nil { return fmt.Errorf("failed to write header state to index file: %w", err) } @@ -791,67 +787,75 @@ func (s *Database) recover() error { ErrCorrupted, calculatedNextDataWriteOffset, nextDataWriteOffset) default: // The data on disk is ahead of the index. We need to recover un-indexed blocks. - s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", - zap.Uint64("headerNextWriteOffset", nextDataWriteOffset), - zap.Uint64("actualDataNextWriteOffset", calculatedNextDataWriteOffset), - ) + if err := s.recoverUnindexedBlocks(nextDataWriteOffset, calculatedNextDataWriteOffset); err != nil { + return err + } + } + return nil +} - // Start scan from where the index left off. - currentScanOffset := nextDataWriteOffset - recoveredHeights := make([]BlockHeight, 0) - for currentScanOffset < calculatedNextDataWriteOffset { - bh, err := s.recoverBlockAtOffset(currentScanOffset, calculatedNextDataWriteOffset) - if err != nil { - if errors.Is(err, io.EOF) { - // reach end of this file, try to read the next file - currentFileIndex := int(currentScanOffset / s.header.MaxDataFileSize) - nextFileIndex, err := safemath.Add(uint64(currentFileIndex), 1) - if err != nil { - return fmt.Errorf("recovery: overflow in file index calculation: %w", err) - } - if currentScanOffset, err = safemath.Mul(nextFileIndex, s.header.MaxDataFileSize); err != nil { - return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) - } - continue +// recoverUnindexedBlocks scans data files from the given offset and recovers blocks that were written but not indexed. +func (s *Database) recoverUnindexedBlocks(startOffset, endOffset uint64) error { + s.log.Info("Recovery: data files are ahead of index; recovering un-indexed blocks.", + zap.Uint64("startOffset", startOffset), + zap.Uint64("endOffset", endOffset), + ) + + // Start scan from where the index left off. + currentScanOffset := startOffset + recoveredHeights := make([]BlockHeight, 0) + for currentScanOffset < endOffset { + bh, err := s.recoverBlockAtOffset(currentScanOffset, endOffset) + if err != nil { + if errors.Is(err, io.EOF) { + // reach end of this file, try to read the next file + currentFileIndex := int(currentScanOffset / s.header.MaxDataFileSize) + nextFileIndex, err := safemath.Add(uint64(currentFileIndex), 1) + if err != nil { + return fmt.Errorf("recovery: overflow in file index calculation: %w", err) } - return err - } - s.log.Debug("Recovery: Successfully validated and indexed block", - zap.Uint64("height", bh.Height), - zap.Uint32("blockSize", bh.Size), - zap.Uint64("dataOffset", currentScanOffset), - ) - recoveredHeights = append(recoveredHeights, bh.Height) - blockTotalSize, err := safemath.Add(uint64(sizeOfBlockEntryHeader), uint64(bh.Size)) - if err != nil { - return fmt.Errorf("recovery: overflow in block size calculation: %w", err) - } - currentScanOffset, err = safemath.Add(currentScanOffset, blockTotalSize) - if err != nil { - return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + if currentScanOffset, err = safemath.Mul(nextFileIndex, s.header.MaxDataFileSize); err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) + } + continue } + return err } - s.nextDataWriteOffset.Store(currentScanOffset) - - // Update block heights based on recovered blocks - if len(recoveredHeights) > 0 { - if err := s.updateRecoveredBlockHeights(recoveredHeights); err != nil { - return fmt.Errorf("recovery: failed to update block heights: %w", err) - } + s.log.Debug("Recovery: Successfully validated and indexed block", + zap.Uint64("height", bh.Height), + zap.Uint32("blockSize", bh.Size), + zap.Uint64("dataOffset", currentScanOffset), + ) + recoveredHeights = append(recoveredHeights, bh.Height) + blockTotalSize, err := safemath.Add(uint64(sizeOfBlockEntryHeader), uint64(bh.Size)) + if err != nil { + return fmt.Errorf("recovery: overflow in block size calculation: %w", err) + } + currentScanOffset, err = safemath.Add(currentScanOffset, blockTotalSize) + if err != nil { + return fmt.Errorf("recovery: overflow in scan offset calculation: %w", err) } + } + s.nextDataWriteOffset.Store(currentScanOffset) - if err := s.persistIndexHeader(); err != nil { - return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) + // Update block heights based on recovered blocks + if len(recoveredHeights) > 0 { + if err := s.updateRecoveredBlockHeights(recoveredHeights); err != nil { + return fmt.Errorf("recovery: failed to update block heights: %w", err) } + } - heights := s.getBlockHeights() - s.log.Info("Recovery: Scan finished", - zap.Int("recoveredBlocks", len(recoveredHeights)), - zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), - zap.Uint64("maxContiguousBlockHeight", heights.maxContiguousHeight), - zap.Uint64("maxBlockHeight", heights.maxBlockHeight), - ) + if err := s.persistIndexHeader(); err != nil { + return fmt.Errorf("recovery: failed to save index header after recovery scan: %w", err) } + + heights := s.getBlockHeights() + s.log.Info("Recovery: Scan finished", + zap.Int("recoveredBlocks", len(recoveredHeights)), + zap.Uint64("finalNextWriteOffset", s.nextDataWriteOffset.Load()), + zap.Uint64("maxContiguousBlockHeight", heights.maxContiguousHeight), + zap.Uint64("maxBlockHeight", heights.maxBlockHeight), + ) return nil } @@ -1033,6 +1037,7 @@ func (s *Database) closeFiles() { s.indexFile.Close() } if s.fileCache != nil { + // closes all data files s.fileCache.Flush() } } @@ -1112,17 +1117,6 @@ func (s *Database) writeBlockAt(offset uint64, bh blockEntryHeader, block BlockD func (s *Database) updateBlockHeights(writtenBlockHeight BlockHeight) error { s.updateBlockHeightsAtomically(func(current *blockHeights) *blockHeights { - if current == nil { - heights := &blockHeights{ - maxBlockHeight: writtenBlockHeight, - maxContiguousHeight: unsetHeight, - } - if writtenBlockHeight == s.header.MinHeight { - heights.maxContiguousHeight = writtenBlockHeight - } - return heights - } - updated := &blockHeights{ maxBlockHeight: current.maxBlockHeight, maxContiguousHeight: current.maxContiguousHeight, diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index c218716e82e1..34e37d085bd0 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -13,6 +13,30 @@ import ( "github.com/stretchr/testify/require" ) +func TestReadHeader(t *testing.T) { + t.Run("error if block header size exceeds block size", func(t *testing.T) { + db, cleanup := newTestDatabase(t, DefaultConfig()) + defer cleanup() + + block := randomBlock(t) + require.NoError(t, db.WriteBlock(0, block, 1)) + + // load the index file for this block and manually change it such that the header size is larger than block size + indexEntry, err := db.readBlockIndex(0) + require.NoError(t, err) + indexEntry.HeaderSize = indexEntry.Size + 1 + modified, err := indexEntry.MarshalBinary() + require.NoError(t, err) + offset, err := db.indexEntryOffset(0) + require.NoError(t, err) + _, err = db.indexFile.WriteAt(modified, int64(offset)) + require.NoError(t, err) + + _, err = db.ReadHeader(0) + require.ErrorIs(t, err, ErrHeaderSizeTooLarge) + }) +} + func TestReadOperations(t *testing.T) { tests := []struct { name string diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 3ce220543b9b..7ec63cd25480 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -5,6 +5,7 @@ package blockdb import ( "math" + "strings" "sync" "sync/atomic" "testing" @@ -256,6 +257,7 @@ func TestWriteBlock_Errors(t *testing.T) { setup func(db *Database) config DatabaseConfig wantErr error + wantErrMsg string }{ { name: "empty block nil", @@ -330,6 +332,29 @@ func TestWriteBlock_Errors(t *testing.T) { headerSize: 0, wantErr: safemath.ErrOverflow, }, + { + name: "writeBlockAt - data file write failure", + height: 0, + block: make([]byte, 100), + setup: func(db *Database) { + // close the data file to trigger a write error in writeBlockAt + file, err := db.getOrOpenDataFile(0) + require.NoError(t, err) + require.NoError(t, file.Close()) + }, + headerSize: 0, + wantErrMsg: "failed to write block to data file", + }, + { + name: "writeIndexEntryAt - index file write failure", + height: 0, + block: make([]byte, 100), + setup: func(db *Database) { + db.indexFile.Close() + }, + headerSize: 0, + wantErrMsg: "failed to write index entry", + }, } for _, tt := range tests { @@ -347,7 +372,11 @@ func TestWriteBlock_Errors(t *testing.T) { } err := store.WriteBlock(tt.height, tt.block, tt.headerSize) - require.ErrorIs(t, err, tt.wantErr) + if tt.wantErrMsg != "" { + require.True(t, strings.HasPrefix(err.Error(), tt.wantErrMsg), "expected error message to start with %s, got %s", tt.wantErrMsg, err.Error()) + } else { + require.ErrorIs(t, err, tt.wantErr) + } checkDatabaseState(t, store, unsetHeight, unsetHeight) }) } From e1f29dbb2ff825708c0e746f7d56174dd06fa675 Mon Sep 17 00:00:00 2001 From: Draco Date: Thu, 10 Jul 2025 17:54:48 -0400 Subject: [PATCH 22/27] fix lint and improve test error msg --- x/blockdb/database_test.go | 19 ++++++++++++++++++- x/blockdb/readblock_test.go | 21 ++++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/x/blockdb/database_test.go b/x/blockdb/database_test.go index c5c1215ac189..7987574113da 100644 --- a/x/blockdb/database_test.go +++ b/x/blockdb/database_test.go @@ -7,6 +7,7 @@ import ( "encoding" "encoding/binary" "errors" + "fmt" "os" "path/filepath" "sync" @@ -250,6 +251,10 @@ func TestFileCache_Eviction(t *testing.T) { var wg sync.WaitGroup var writeErrors atomic.Int32 + // Thread-safe error message collection + var errorMu sync.Mutex + var errorMessages []string + // Create blocks of 0.5kb each blocks := make([][]byte, numBlocks) for i := range blocks { @@ -266,6 +271,9 @@ func TestFileCache_Eviction(t *testing.T) { err := store.WriteBlock(height, blocks[height], 0) if err != nil { writeErrors.Add(1) + errorMu.Lock() + errorMessages = append(errorMessages, fmt.Sprintf("goroutine %d, height %d: %v", goroutineID, height, err)) + errorMu.Unlock() } } }(g) @@ -273,8 +281,17 @@ func TestFileCache_Eviction(t *testing.T) { wg.Wait() + // Build error message if there were errors + var errorMsg string + if writeErrors.Load() > 0 { + errorMsg = fmt.Sprintf("concurrent writes had %d errors:\n", writeErrors.Load()) + for _, msg := range errorMessages { + errorMsg += fmt.Sprintf(" %s\n", msg) + } + } + // Verify no write errors - require.Zero(t, writeErrors.Load(), "concurrent writes had errors") + require.Zero(t, writeErrors.Load(), errorMsg) // Verify we had some evictions require.Positive(t, evictionCount.Load(), "should have had some cache evictions") diff --git a/x/blockdb/readblock_test.go b/x/blockdb/readblock_test.go index 34e37d085bd0..3d050f44d43a 100644 --- a/x/blockdb/readblock_test.go +++ b/x/blockdb/readblock_test.go @@ -4,6 +4,7 @@ package blockdb import ( + "bytes" "errors" "math" "sync" @@ -243,6 +244,10 @@ func TestReadOperations_Concurrency(t *testing.T) { var wg sync.WaitGroup var errorCount atomic.Int32 + var blockErrors atomic.Int32 + var headerErrors atomic.Int32 + var bodyErrors atomic.Int32 + for i := range numBlocks + 10 { wg.Add(3) // One for each read operation @@ -258,7 +263,9 @@ func TestReadOperations_Concurrency(t *testing.T) { errorCount.Add(1) return } - require.Equal(t, blocks[height], block) + if !bytes.Equal(blocks[height], block) { + blockErrors.Add(1) + } } }(i) @@ -278,7 +285,9 @@ func TestReadOperations_Concurrency(t *testing.T) { if headerSizes[height] == 0 { expectedHeader = nil } - require.Equal(t, expectedHeader, header) + if !bytes.Equal(expectedHeader, header) { + headerErrors.Add(1) + } } }(i) @@ -295,10 +304,16 @@ func TestReadOperations_Concurrency(t *testing.T) { return } expectedBody := blocks[height][headerSizes[height]:] - require.Equal(t, expectedBody, body) + if !bytes.Equal(expectedBody, body) { + bodyErrors.Add(1) + } } }(i) } wg.Wait() + require.Zero(t, errorCount.Load(), "concurrent read operations had errors") + require.Zero(t, blockErrors.Load(), "block data mismatches detected") + require.Zero(t, headerErrors.Load(), "header data mismatches detected") + require.Zero(t, bodyErrors.Load(), "body data mismatches detected") } From 1df6bc13f7487d55813ee70075ac66a0026f63a0 Mon Sep 17 00:00:00 2001 From: Draco Date: Thu, 10 Jul 2025 18:11:03 -0400 Subject: [PATCH 23/27] remove assertion in go routine --- x/blockdb/writeblock_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/x/blockdb/writeblock_test.go b/x/blockdb/writeblock_test.go index 7ec63cd25480..4f33fd800912 100644 --- a/x/blockdb/writeblock_test.go +++ b/x/blockdb/writeblock_test.go @@ -225,7 +225,6 @@ func TestWriteBlock_Concurrency(t *testing.T) { err := store.WriteBlock(height, block, 1) if err != nil { - require.NoError(t, err, "WriteBlock failed for iteration %d (height %d)", i, height) errors.Add(1) } }(i) From e94a72e9903493ed5cf716312d31cfebaa786bd4 Mon Sep 17 00:00:00 2001 From: Draco Date: Fri, 11 Jul 2025 10:16:32 -0400 Subject: [PATCH 24/27] limit concurrent calls to persistIndexHeader --- x/blockdb/README.md | 3 --- x/blockdb/database.go | 11 +++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/x/blockdb/README.md b/x/blockdb/README.md index eaa1b89badb2..78bd3cd8eb24 100644 --- a/x/blockdb/README.md +++ b/x/blockdb/README.md @@ -189,11 +189,8 @@ if err != nil { ## TODO -- Compress data files to reduce storage size -- ~~Split data across multiple files when `MaxDataFileSize` is reached~~ - Implement a block cache for recently accessed blocks - Use a buffered pool to avoid allocations on reads and writes -- ~~Add tests for core functionality~~ - Add metrics - Add performance benchmarks - Consider supporting missing data files (currently we error if any data files are missing) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 78fd751e9de7..dfeaddb23f56 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -200,6 +200,8 @@ type Database struct { blockHeights atomic.Pointer[blockHeights] // nextDataWriteOffset tracks the next position to write new data in the data file. nextDataWriteOffset atomic.Uint64 + // headerWriteOccupied prevents concurrent writes to the index header + headerWriteOccupied atomic.Bool } // New creates a block database. @@ -691,6 +693,15 @@ func (s *Database) writeIndexEntryAt(indexFileOffset, dataFileBlockOffset uint64 } func (s *Database) persistIndexHeader() error { + if s.headerWriteOccupied.CompareAndSwap(false, true) { + defer s.headerWriteOccupied.Store(false) + return s.persistIndexHeaderInternal() + } + s.log.Warn("Skipping persistIndexHeader due to concurrent header write") + return nil +} + +func (s *Database) persistIndexHeaderInternal() error { // The index file must be fsync'd before the header is written to prevent // a state where the header is persisted but the index entries it refers to // are not. This could lead to data inconsistency on recovery. From 748dbf4d83576f4cbb6838a75e48b93cc0839e18 Mon Sep 17 00:00:00 2001 From: Draco Date: Fri, 11 Jul 2025 10:54:49 -0400 Subject: [PATCH 25/27] add warning log if config values differ from index header --- x/blockdb/database.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index dfeaddb23f56..a6b3fc0fd066 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -1002,6 +1002,7 @@ func (s *Database) loadOrInitializeHeader() error { // reset index file if its empty if fileInfo.Size() == 0 { + s.log.Info("Index file is empty, writing initial index file header") s.header = indexFileHeader{ Version: IndexFileVersion, MinHeight: s.config.MinimumHeight, @@ -1040,6 +1041,22 @@ func (s *Database) loadOrInitializeHeader() error { s.nextDataWriteOffset.Store(s.header.NextWriteOffset) s.setBlockHeights(s.header.MaxHeight, s.header.MaxContiguousHeight) + // log a warning if the config does not match the index header + if s.config.MinimumHeight != s.header.MinHeight { + s.log.Warn( + "MinimumHeight in blockdb config does not match the index header. The MinimumHeight in the index header will be used.", + zap.Uint64("configMinimumHeight", s.config.MinimumHeight), + zap.Uint64("headerMinimumHeight", s.header.MinHeight), + ) + } + if s.config.MaxDataFileSize != s.header.MaxDataFileSize { + s.log.Warn( + "MaxDataFileSize in blockdb config does not match the index header. The MaxDataFileSize in the index header will be used.", + zap.Uint64("configMaxDataFileSize", s.config.MaxDataFileSize), + zap.Uint64("headerMaxDataFileSize", s.header.MaxDataFileSize), + ) + } + return nil } From e17d95107c78c3e1b1cad7b0d36e071bd92428a5 Mon Sep 17 00:00:00 2001 From: Draco Date: Fri, 11 Jul 2025 11:08:17 -0400 Subject: [PATCH 26/27] change warn logs to info --- x/blockdb/database.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index a6b3fc0fd066..30dbfb8f3189 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -697,7 +697,7 @@ func (s *Database) persistIndexHeader() error { defer s.headerWriteOccupied.Store(false) return s.persistIndexHeaderInternal() } - s.log.Warn("Skipping persistIndexHeader due to concurrent header write") + s.log.Info("Skipping persistIndexHeader due to concurrent header write") return nil } @@ -1043,14 +1043,14 @@ func (s *Database) loadOrInitializeHeader() error { // log a warning if the config does not match the index header if s.config.MinimumHeight != s.header.MinHeight { - s.log.Warn( + s.log.Info( "MinimumHeight in blockdb config does not match the index header. The MinimumHeight in the index header will be used.", zap.Uint64("configMinimumHeight", s.config.MinimumHeight), zap.Uint64("headerMinimumHeight", s.header.MinHeight), ) } if s.config.MaxDataFileSize != s.header.MaxDataFileSize { - s.log.Warn( + s.log.Info( "MaxDataFileSize in blockdb config does not match the index header. The MaxDataFileSize in the index header will be used.", zap.Uint64("configMaxDataFileSize", s.config.MaxDataFileSize), zap.Uint64("headerMaxDataFileSize", s.header.MaxDataFileSize), From 3923a26bc79e01a3f4714fe2ae7deeb6e5a51def Mon Sep 17 00:00:00 2001 From: Draco Date: Mon, 14 Jul 2025 11:48:20 -0400 Subject: [PATCH 27/27] add error log for index entry failure --- x/blockdb/database.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x/blockdb/database.go b/x/blockdb/database.go index 30dbfb8f3189..d25f931eb2b2 100644 --- a/x/blockdb/database.go +++ b/x/blockdb/database.go @@ -620,6 +620,10 @@ func (s *Database) HasBlock(height BlockHeight) (bool, error) { if errors.Is(err, ErrBlockNotFound) { return false, nil } + s.log.Error("Failed to check if block exists: failed to read index entry", + zap.Uint64("height", height), + zap.Error(err), + ) return false, err } return true, nil