Skip to content

LedgerDB: prune on garbage collection instead of on every change #1513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: amesgen/ledgerdb-state-machine-precondition-bug
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### Breaking

- Changed pruning of immutable ledger states to happen on LedgerDB garbage
collection instead of directly on every block adoption. This is purely an
internal refactoring (with breaking API changes) supporting predictable ledger
snapshotting.

- Avoid maintaining volatile ledger states during ledger replay, making it
slightly more efficient.
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@ copyToImmutableDB CDB{..} = electric $ do
-- GC can happen, when we restart the node and schedule the /next/ GC, it will
-- /imply/ any previously scheduled GC, since GC is driven by slot number
-- ("garbage collect anything older than @x@").
--
-- Also garbage-collects the LedgerDB (instead of as part of the scheduled GC),
-- as this is a cheap operation, and we reduce the heap size this way by pruning
-- (the 'DbChangelog' in V1, or the 'LedgerSeq' in V2) earlier than we otherwise
-- would.
copyAndSnapshotRunner ::
forall m blk.
( IOLike m
Expand Down Expand Up @@ -265,8 +270,12 @@ copyAndSnapshotRunner cdb@CDB{..} gcSchedule replayed fuse = do
--
-- This is a synchronous operation: when it returns, the blocks have been
-- copied to disk (though not flushed, necessarily).
withFuse fuse (copyToImmutableDB cdb) >>= scheduleGC'
gcSlotNo <- withFuse fuse (copyToImmutableDB cdb)
scheduleGC' gcSlotNo

-- See the Haddocks above as for why we garbage-collect the LedgerDB already
-- here (instead of as part of the scheduled GC).
whenJust (withOriginToMaybe gcSlotNo) $ LedgerDB.garbageCollect cdbLedgerDB
LedgerDB.tryFlush cdbLedgerDB

now <- getMonotonicTime
Expand All @@ -293,9 +302,6 @@ copyAndSnapshotRunner cdb@CDB{..} gcSchedule replayed fuse = do
-- | Trigger a garbage collection for blocks older than the given 'SlotNo' on
-- the VolatileDB.
--
-- Also removes the corresponding cached "previously applied points" from the
-- LedgerDB.
--
-- This is thread-safe as the VolatileDB locks itself while performing a GC.
--
-- When calling this function it is __critical__ that the blocks that will be
Expand All @@ -309,7 +315,6 @@ garbageCollect :: forall m blk. IOLike m => ChainDbEnv m blk -> SlotNo -> m ()
garbageCollect CDB{..} slotNo = do
VolatileDB.garbageCollect cdbVolatileDB slotNo
atomically $ do
LedgerDB.garbageCollect cdbLedgerDB slotNo
modifyTVar cdbInvalid $ fmap $ Map.filter ((>= slotNo) . invalidBlockSlotNo)
traceWith cdbTracer $ TraceGCEvent $ PerformedGC slotNo

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,15 @@ data LedgerDB m l blk = LedgerDB
-- back as many blocks as the passed @Word64@.
, getPrevApplied :: STM m (Set (RealPoint blk))
-- ^ Get the references to blocks that have previously been applied.
, garbageCollect :: SlotNo -> STM m ()
-- ^ Garbage collect references to old blocks that have been previously
-- applied and committed.
, garbageCollect :: SlotNo -> m ()
-- ^ Garbage collect references to old state that is older than the given
-- slot.
--
-- Concretely, this affects:
--
-- * Ledger states (and potentially underlying handles for on-disk storage).
--
-- * The set of previously applied points.
, tryTakeSnapshot ::
l ~ ExtLedgerState blk =>
Maybe (Time, Time) ->
Expand Down Expand Up @@ -298,7 +304,14 @@ data TestInternals m l blk = TestInternals
{ wipeLedgerDB :: m ()
, takeSnapshotNOW :: WhereToTakeSnapshot -> Maybe String -> m ()
, push :: ExtLedgerState blk DiffMK -> m ()
-- ^ Push a ledger state, and prune the 'LedgerDB' to its immutable tip.
--
-- This does not modify the set of previously applied points.
, reapplyThenPushNOW :: blk -> m ()
-- ^ Apply block to the tip ledger state (using reapplication), and prune the
-- 'LedgerDB' to its immutable tip.
--
-- This does not modify the set of previously applied points.
, truncateSnapshots :: m ()
, closeLedgerDB :: m ()
, getNumLedgerTablesHandles :: m Word64
Expand Down Expand Up @@ -456,11 +469,10 @@ data InitDB db m blk = InitDB
-- ^ Closing the database, to be reopened again with a different snapshot or
-- with the genesis state.
, initReapplyBlock :: !(LedgerDbCfg (ExtLedgerState blk) -> blk -> db -> m db)
-- ^ Reapply a block from the immutable DB when initializing the DB.
-- ^ Reapply a block from the immutable DB when initializing the DB. Prune the
-- LedgerDB such that there are no volatile states.
, currentTip :: !(db -> LedgerState blk EmptyMK)
-- ^ Getting the current tip for tracing the Ledger Events.
, pruneDb :: !(db -> m db)
-- ^ Prune the database so that no immutable states are considered volatile.
, mkLedgerDb ::
!(db -> m (LedgerDB m (ExtLedgerState blk) blk, TestInternals m (ExtLedgerState blk) blk))
-- ^ Create a LedgerDB from the initialized data structures from previous
Expand Down Expand Up @@ -545,13 +557,7 @@ initialize
Left err -> do
closeDb initDb
error $ "Invariant violation: invalid immutable chain " <> show err
Right (db, replayed) -> do
db' <- pruneDb dbIface db
return
( acc InitFromGenesis
, db'
, replayed
)
Right (db, replayed) -> return (acc InitFromGenesis, db, replayed)
tryNewestFirst acc (s : ss) = do
eInitDb <- initFromSnapshot s
case eInitDb of
Expand Down Expand Up @@ -603,9 +609,7 @@ initialize
Monad.when (diskSnapshotIsTemporary s) $ deleteSnapshot hasFS s
closeDb initDb
tryNewestFirst (acc . InitFailure s err) ss
Right (db, replayed) -> do
db' <- pruneDb dbIface db
return (acc (InitFromSnapshot s pt), db', replayed)
Right (db, replayed) -> return (acc (InitFromSnapshot s pt), db, replayed)

replayTracer' =
decorateReplayTracerWithGoal
Expand Down Expand Up @@ -775,10 +779,10 @@ type LedgerSupportsLedgerDB blk =
-------------------------------------------------------------------------------}

-- | Options for prunning the LedgerDB
--
-- Rather than using a plain `Word64` we use this to be able to distinguish that
-- we are indeed using
-- 1. @0@ in places where it is necessary
-- 2. the security parameter as is, in other places
data LedgerDbPrune = LedgerDbPruneAll | LedgerDbPruneKeeping SecurityParam
data LedgerDbPrune
= -- | Prune all states, keeping only the current tip.
LedgerDbPruneAll
| -- | Prune such that all (non-anchor) states are not older than the given
-- slot.
LedgerDbPruneBeforeSlot SlotNo
deriving Show
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@
-- module will be gone.
module Ouroboros.Consensus.Storage.LedgerDB.V1 (mkInitDb) where

import Cardano.Ledger.BaseTypes.NonZero (NonZero (..))
import Control.Arrow ((>>>))
import Control.Monad
import Control.Monad.Except
import Control.Monad.Trans (lift)
import Control.ResourceRegistry
import Control.Tracer
import qualified Data.Foldable as Foldable
import Data.Functor ((<&>))
import Data.Functor.Contravariant ((>$<))
import Data.Kind (Type)
import Data.Map (Map)
Expand Down Expand Up @@ -119,7 +121,6 @@ mkInitDb args bss getBlock =
else pure chlog'
pure (chlog'', r, bstore)
, currentTip = \(ch, _, _) -> ledgerState . current $ ch
, pruneDb = \(ch, r, bs) -> pure (pruneToImmTipOnly ch, r, bs)
, mkLedgerDb = \(db, ldbBackingStoreKey, ldbBackingStore) -> do
(varDB, prevApplied) <-
(,) <$> newTVarIO db <*> newTVarIO Set.empty
Expand Down Expand Up @@ -185,7 +186,7 @@ implMkLedgerDb h =
, getForkerAtTarget = newForkerAtTarget h
, validateFork = getEnv5 h (implValidate h)
, getPrevApplied = getEnvSTM h implGetPrevApplied
, garbageCollect = getEnvSTM1 h implGarbageCollect
, garbageCollect = getEnv1 h implGarbageCollect
, tryTakeSnapshot = getEnv2 h implTryTakeSnapshot
, tryFlush = getEnv h implTryFlush
, closeDB = implCloseDB h
Expand All @@ -200,10 +201,15 @@ implGetVolatileTip ::
implGetVolatileTip = fmap current . readTVar . ldbChangelog

implGetImmutableTip ::
MonadSTM m =>
(MonadSTM m, GetTip l) =>
LedgerDBEnv m l blk ->
STM m (l EmptyMK)
implGetImmutableTip = fmap anchor . readTVar . ldbChangelog
implGetImmutableTip env =
-- The DbChangelog might contain more than k states if they have not yet
-- been garbage-collected.
fmap (AS.anchor . AS.anchorNewest (envMaxRollbacks env) . changelogStates)
. readTVar
$ ldbChangelog env

implGetPastLedgerState ::
( MonadSTM m
Expand All @@ -214,7 +220,17 @@ implGetPastLedgerState ::
, HeaderHash l ~ HeaderHash blk
) =>
LedgerDBEnv m l blk -> Point blk -> STM m (Maybe (l EmptyMK))
implGetPastLedgerState env point = getPastLedgerAt point <$> readTVar (ldbChangelog env)
implGetPastLedgerState env point =
readTVar (ldbChangelog env) <&> \chlog -> do
-- The DbChangelog might contain more than k states if they have not yet
-- been garbage-collected, so make sure that the point is volatile (or the
-- immutable tip).
guard $
AS.withinBounds
(pointSlot point)
((point ==) . castPoint . either getTip getTip)
(AS.anchorNewest (envMaxRollbacks env) (changelogStates chlog))
getPastLedgerAt point chlog

implGetHeaderStateHistory ::
( MonadSTM m
Expand All @@ -237,6 +253,9 @@ implGetHeaderStateHistory env = do
pure
. HeaderStateHistory
. AS.bimap mkHeaderStateWithTime' mkHeaderStateWithTime'
-- The DbChangelog might contain more than k states if they have not yet
-- been garbage-collected, so only take the corresponding suffix.
. AS.anchorNewest (envMaxRollbacks env)
$ changelogStates ldb

implValidate ::
Expand Down Expand Up @@ -274,10 +293,17 @@ implValidate h ldbEnv rr tr cache rollbacks hdrs =
implGetPrevApplied :: MonadSTM m => LedgerDBEnv m l blk -> STM m (Set (RealPoint blk))
implGetPrevApplied env = readTVar (ldbPrevApplied env)

-- | Remove all points with a slot older than the given slot from the set of
-- previously applied points.
implGarbageCollect :: MonadSTM m => LedgerDBEnv m l blk -> SlotNo -> STM m ()
implGarbageCollect env slotNo =
-- | Remove 'DbChangelog' states older than the given slot, and all points with
-- a slot older than the given slot from the set of previously applied points.
implGarbageCollect ::
( MonadSTM m
, IsLedger (LedgerState blk)
, l ~ ExtLedgerState blk
) =>
LedgerDBEnv m l blk -> SlotNo -> m ()
implGarbageCollect env slotNo = atomically $ do
modifyTVar (ldbChangelog env) $
prune (LedgerDbPruneBeforeSlot slotNo)
modifyTVar (ldbPrevApplied env) $
Set.dropWhileAntitone ((< slotNo) . realPointSlot)

Expand Down Expand Up @@ -410,7 +436,7 @@ implIntPush ::
LedgerDBEnv m l blk -> l DiffMK -> m ()
implIntPush env st = do
chlog <- readTVarIO $ ldbChangelog env
let chlog' = prune (LedgerDbPruneKeeping (ledgerDbCfgSecParam $ ldbCfg env)) $ extend st chlog
let chlog' = pruneToImmTipOnly $ extend st chlog
atomically $ writeTVar (ldbChangelog env) chlog'

implIntReapplyThenPush ::
Expand Down Expand Up @@ -558,6 +584,10 @@ deriving instance
) =>
NoThunks (LedgerDBEnv m l blk)

-- | Return the security parameter @k@. Convenience function.
envMaxRollbacks :: LedgerDBEnv m l blk -> Word64
envMaxRollbacks = unNonZero . maxRollbacks . ledgerDbCfgSecParam . ldbCfg

-- | Check if the LedgerDB is open, if so, executing the given function on the
-- 'LedgerDBEnv', otherwise, throw a 'CloseDBError'.
getEnv ::
Expand Down Expand Up @@ -729,27 +759,36 @@ acquireAtTarget ::
ReadLocked m (Either GetForkerError (DbChangelog l))
acquireAtTarget ldbEnv target = readLocked $ runExceptT $ do
dblog <- lift $ readTVarIO (ldbChangelog ldbEnv)
-- The DbChangelog might contain more than k states if they have not yet
-- been garbage-collected.
let immTip :: Point blk
immTip = castPoint $ getTip $ AS.anchor $ AS.anchorNewest k $ changelogStates dblog

rollbackTo pt
| pointSlot pt < pointSlot immTip = throwError $ PointTooOld Nothing
| otherwise = case rollback pt dblog of
Nothing -> throwError PointNotOnChain
Just dblog' -> pure dblog'
-- Get the prefix of the dblog ending in the specified target.
case target of
Right VolatileTip -> pure dblog
Right ImmutableTip -> pure $ rollbackToAnchor dblog
Right (SpecificPoint pt) -> do
let immTip = getTip $ anchor dblog
case rollback pt dblog of
Nothing
| pointSlot pt < pointSlot immTip -> throwError $ PointTooOld Nothing
| otherwise -> throwError PointNotOnChain
Just dblog' -> pure dblog'
Left n -> case rollbackN n dblog of
Nothing ->
Right ImmutableTip -> rollbackTo immTip
Right (SpecificPoint pt) -> rollbackTo pt
Left n -> do
let rollbackMax = maxRollback dblog `min` k
when (n > rollbackMax) $
throwError $
PointTooOld $
Just
ExceededRollback
{ rollbackMaximum = maxRollback dblog
{ rollbackMaximum = rollbackMax
, rollbackRequested = n
}
Just dblog' -> pure dblog'
case rollbackN n dblog of
Nothing -> error "unreachable"
Just dblog' -> pure dblog'
where
k = envMaxRollbacks ldbEnv

{-------------------------------------------------------------------------------
Make forkers from consistent views
Expand Down
Loading
Loading