Skip to content

LedgerDB: implement predictable snapshotting #1575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: amesgen/ledgerdb-garbage-collect-states
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ openLedgerDB ::
, LedgerDB.TestInternals' IO blk
)
openLedgerDB [email protected]{LedgerDB.lgrFlavorArgs = LedgerDB.LedgerDbFlavorArgsV1 bss} = do
(ledgerDB, _, intLedgerDB) <-
(ledgerDB, intLedgerDB) <-
LedgerDB.openDBInternal
lgrDbArgs
( LedgerDB.V1.mkInitDb
Expand All @@ -76,7 +76,7 @@ openLedgerDB [email protected]{LedgerDB.lgrFlavorArgs = LedgerDB.L
genesisPoint
pure (ledgerDB, intLedgerDB)
openLedgerDB [email protected]{LedgerDB.lgrFlavorArgs = LedgerDB.LedgerDbFlavorArgsV2 args} = do
(ledgerDB, _, intLedgerDB) <-
(ledgerDB, intLedgerDB) <-
LedgerDB.openDBInternal
lgrDbArgs
( LedgerDB.V2.mkInitDb
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Breaking

- LedgerDB: implemented *predictable* snapshots, i.e. different nodes with the
same configuration will now create snapshots for the same slots.

See 'SnapshotPolicyArgs' for more details.
3 changes: 1 addition & 2 deletions ouroboros-consensus/ouroboros-consensus.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,6 @@ test-suite storage-test
Test.Ouroboros.Storage.ImmutableDB.StateMachine
Test.Ouroboros.Storage.LedgerDB
Test.Ouroboros.Storage.LedgerDB.Serialisation
Test.Ouroboros.Storage.LedgerDB.SnapshotPolicy
Test.Ouroboros.Storage.LedgerDB.Snapshots
Test.Ouroboros.Storage.LedgerDB.StateMachine
Test.Ouroboros.Storage.LedgerDB.StateMachine.TestBlock
Expand All @@ -733,7 +732,7 @@ test-suite storage-test
cardano-binary,
cardano-crypto-class ^>=2.2,
cardano-ledger-binary:testlib,
cardano-ledger-core:{cardano-ledger-core, testlib},
cardano-ledger-core:cardano-ledger-core,
cardano-slotting:{cardano-slotting, testlib},
cardano-strict-containers,
cborg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
(chainDB, testing, env) <- lift $ do
traceWith tracer $ TraceOpenEvent (OpenedVolatileDB maxSlot)
traceWith tracer $ TraceOpenEvent StartedOpeningLgrDB
(lgrDB, replayed) <-
lgrDB <-
LedgerDB.openDB
argsLgrDb
(ImmutableDB.streamAPI immutableDB)
Expand Down Expand Up @@ -279,8 +279,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
Internal
{ intCopyToImmutableDB = getEnv h (withFuse copyTestFuse . Background.copyToImmutableDB)
, intGarbageCollect = getEnv1 h Background.garbageCollect
, intTryTakeSnapshot = getEnv h $ \env' ->
void $ LedgerDB.tryTakeSnapshot (cdbLedgerDB env') Nothing maxBound
, intTryTakeSnapshot = getEnv h $ LedgerDB.tryTakeSnapshot . cdbLedgerDB
, intAddBlockRunner = getEnv h (Background.addBlockRunner addBlockTestFuse)
, intKillBgThreads = varKillBgThreads
}
Expand All @@ -291,7 +290,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
(castPoint $ AF.anchorPoint chain)
(castPoint $ AF.headPoint chain)

when launchBgTasks $ Background.launchBgTasks env replayed
when launchBgTasks $ Background.launchBgTasks env

return (chainDB, testing, env)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}

-- | Background tasks:
--
Expand Down Expand Up @@ -53,7 +52,6 @@ import Data.Sequence.Strict (StrictSeq (..))
import qualified Data.Sequence.Strict as Seq
import Data.Time.Clock
import Data.Void (Void)
import Data.Word
import GHC.Generics (Generic)
import GHC.Stack (HasCallStack)
import Ouroboros.Consensus.Block
Expand Down Expand Up @@ -93,10 +91,8 @@ launchBgTasks ::
, HasHardForkHistory blk
) =>
ChainDbEnv m blk ->
-- | Number of immutable blocks replayed on ledger DB startup
Word64 ->
m ()
launchBgTasks cdb@CDB{..} replayed = do
launchBgTasks cdb@CDB{..} = do
!addBlockThread <-
launch "ChainDB.addBlockRunner" $
addBlockRunner cdbChainSelFuse cdb
Expand All @@ -107,7 +103,7 @@ launchBgTasks cdb@CDB{..} replayed = do
garbageCollect cdb
!copyAndSnapshotThread <-
launch "ChainDB.copyAndSnapshotRunner" $
copyAndSnapshotRunner cdb gcSchedule replayed cdbCopyFuse
copyAndSnapshotRunner cdb gcSchedule cdbCopyFuse
atomically $
writeTVar cdbKillBgThreads $
sequence_ [addBlockThread, gcThread, copyAndSnapshotThread]
Expand Down Expand Up @@ -241,30 +237,22 @@ copyAndSnapshotRunner ::
) =>
ChainDbEnv m blk ->
GcSchedule m ->
-- | Number of immutable blocks replayed on ledger DB startup
Word64 ->
Fuse m ->
m Void
copyAndSnapshotRunner cdb@CDB{..} gcSchedule replayed fuse = do
copyAndSnapshotRunner cdb@CDB{..} gcSchedule fuse = do
-- this first flush will persist the differences that come from the initial
-- chain selection.
LedgerDB.tryFlush cdbLedgerDB
loop =<< LedgerDB.tryTakeSnapshot cdbLedgerDB Nothing replayed
forever copyAndSnapshot
where
SecurityParam k = configSecurityParam cdbTopLevelConfig

loop :: LedgerDB.SnapCounters -> m Void
loop counters = do
let LedgerDB.SnapCounters
{ prevSnapshotTime
, ntBlocksSinceLastSnap
} = counters

copyAndSnapshot :: m ()
copyAndSnapshot = do
-- Wait for the chain to grow larger than @k@
numToWrite <- atomically $ do
atomically $ do
curChain <- icWithoutTime <$> readTVar cdbChain
check $ fromIntegral (AF.length curChain) > unNonZero k
return $ fromIntegral (AF.length curChain) - unNonZero k

-- Copy blocks to ImmutableDB
--
Expand All @@ -273,16 +261,13 @@ copyAndSnapshotRunner cdb@CDB{..} gcSchedule replayed fuse = do
gcSlotNo <- withFuse fuse (copyToImmutableDB cdb)
scheduleGC' gcSlotNo

LedgerDB.tryTakeSnapshot cdbLedgerDB

-- See the Haddocks above as for why we garbage-collect the LedgerDB already
-- here (instead of as part of the scheduled GC).
whenJust (withOriginToMaybe gcSlotNo) $ LedgerDB.garbageCollect cdbLedgerDB
LedgerDB.tryFlush cdbLedgerDB

now <- getMonotonicTime
let ntBlocksSinceLastSnap' = ntBlocksSinceLastSnap + numToWrite

loop =<< LedgerDB.tryTakeSnapshot cdbLedgerDB ((,now) <$> prevSnapshotTime) ntBlocksSinceLastSnap'

scheduleGC' :: WithOrigin SlotNo -> m ()
scheduleGC' Origin = return ()
scheduleGC' (NotOrigin slotNo) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ module Ouroboros.Consensus.Storage.LedgerDB
) where

import Data.Functor.Contravariant ((>$<))
import Data.Word
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.HardFork.Abstract
import Ouroboros.Consensus.Ledger.Inspect
Expand Down Expand Up @@ -58,7 +57,7 @@ openDB ::
Point blk ->
-- | How to get blocks from the ChainDB
ResolveBlock m blk ->
m (LedgerDB' m blk, Word64)
m (LedgerDB' m blk)
openDB
args
stream
Expand Down Expand Up @@ -94,11 +93,9 @@ doOpenDB ::
InitDB db m blk ->
StreamAPI m blk blk ->
Point blk ->
m (LedgerDB' m blk, Word64)
m (LedgerDB' m blk)
doOpenDB args initDb stream replayGoal =
f <$> openDBInternal args initDb stream replayGoal
where
f (ldb, replayCounter, _) = (ldb, replayCounter)
fst <$> openDBInternal args initDb stream replayGoal

-- | Open the ledger DB and expose internals for testing purposes
openDBInternal ::
Expand All @@ -111,10 +108,10 @@ openDBInternal ::
InitDB db m blk ->
StreamAPI m blk blk ->
Point blk ->
m (LedgerDB' m blk, Word64, TestInternals' m blk)
m (LedgerDB' m blk, TestInternals' m blk)
openDBInternal args@(LedgerDbArgs{lgrHasFS = SomeHasFS fs}) initDb stream replayGoal = do
createDirectoryIfMissing fs True (mkFsPath [])
(_initLog, db, replayCounter) <-
(_initLog, db) <-
initialize
replayTracer
snapTracer
Expand All @@ -125,7 +122,7 @@ openDBInternal args@(LedgerDbArgs{lgrHasFS = SomeHasFS fs}) initDb stream replay
initDb
lgrStartSnapshot
(ledgerDb, internal) <- mkLedgerDb initDb db
return (ledgerDb, replayCounter, internal)
return (ledgerDb, internal)
where
LedgerDbArgs
{ lgrConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,6 @@ module Ouroboros.Consensus.Storage.LedgerDB.API
, withPrivateTipForker
, withTipForker

-- * Snapshots
, SnapCounters (..)

-- * Testing
, TestInternals (..)
, TestInternals'
Expand All @@ -160,7 +157,6 @@ module Ouroboros.Consensus.Storage.LedgerDB.API

import Codec.Serialise
import qualified Control.Monad as Monad
import Control.Monad.Class.MonadTime.SI
import Control.Monad.Except
import Control.ResourceRegistry
import Control.Tracer
Expand Down Expand Up @@ -262,18 +258,12 @@ data LedgerDB m l blk = LedgerDB
-- * The set of previously applied points.
, tryTakeSnapshot ::
l ~ ExtLedgerState blk =>
Maybe (Time, Time) ->
Word64 ->
m SnapCounters
m ()
-- ^ If the provided arguments indicate so (based on the SnapshotPolicy with
-- which this LedgerDB was opened), take a snapshot and delete stale ones.
--
-- The arguments are:
--
-- - If a snapshot has been taken already, the time at which it was taken
-- and the current time.
--
-- - How many blocks have been processed since the last snapshot.
-- For V1, this must not be called concurrently with 'garbageCollect' and/or
-- 'tryFlush'.
, tryFlush :: m ()
-- ^ Flush V1 in-memory LedgerDB state to disk, if possible. This is a no-op
-- for implementations that do not need an explicit flush function.
Expand Down Expand Up @@ -420,18 +410,6 @@ getReadOnlyForker ::
m (Either GetForkerError (ReadOnlyForker m l blk))
getReadOnlyForker ldb rr pt = fmap readOnlyForker <$> getForkerAtTarget ldb rr pt

{-------------------------------------------------------------------------------
Snapshots
-------------------------------------------------------------------------------}

-- | Counters to keep track of when we made the last snapshot.
data SnapCounters = SnapCounters
{ prevSnapshotTime :: !(Maybe Time)
-- ^ When was the last time we made a snapshot
, ntBlocksSinceLastSnap :: !Word64
-- ^ How many blocks have we processed since the last snapshot
}

{-------------------------------------------------------------------------------
Initialization
-------------------------------------------------------------------------------}
Expand Down Expand Up @@ -514,7 +492,7 @@ initialize ::
Point blk ->
InitDB db m blk ->
Maybe DiskSnapshot ->
m (InitLog blk, db, Word64)
m (InitLog blk, db)
initialize
replayTracer
snapTracer
Expand All @@ -536,7 +514,6 @@ initialize
m
( InitLog blk
, db
, Word64
)
tryNewestFirst acc [] = do
-- We're out of snapshots. Start at genesis
Expand All @@ -557,7 +534,7 @@ initialize
Left err -> do
closeDb initDb
error $ "Invariant violation: invalid immutable chain " <> show err
Right (db, replayed) -> return (acc InitFromGenesis, db, replayed)
Right db -> return (acc InitFromGenesis, db)
tryNewestFirst acc (s : ss) = do
eInitDb <- initFromSnapshot s
case eInitDb of
Expand Down Expand Up @@ -609,7 +586,7 @@ initialize
Monad.when (diskSnapshotIsTemporary s) $ deleteSnapshot hasFS s
closeDb initDb
tryNewestFirst (acc . InitFailure s err) ss
Right (db, replayed) -> return (acc (InitFromSnapshot s pt), db, replayed)
Right db -> return (acc (InitFromSnapshot s pt), db)

replayTracer' =
decorateReplayTracerWithGoal
Expand All @@ -633,32 +610,27 @@ replayStartingWith ::
db ->
Point blk ->
InitDB db m blk ->
ExceptT (SnapshotFailure blk) m (db, Word64)
ExceptT (SnapshotFailure blk) m db
replayStartingWith tracer cfg stream initDb from InitDB{initReapplyBlock, currentTip} = do
streamAll
stream
from
InitFailureTooRecent
(initDb, 0)
initDb
push
where
push ::
blk ->
(db, Word64) ->
m (db, Word64)
push blk (!db, !replayed) = do
push :: blk -> db -> m db
push blk !db = do
!db' <- initReapplyBlock cfg blk db

let !replayed' = replayed + 1

events =
let events =
inspectLedger
(getExtLedgerCfg (ledgerDbCfg cfg))
(currentTip db)
(currentTip db')

traceWith tracer (ReplayedBlock (blockRealPoint blk) events)
return (db', replayed')
return db'

{-------------------------------------------------------------------------------
Trace replay events
Expand Down
Loading