From 6fb56dba4289b1f647d3ebcd5431b4c7eb2e8017 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 11:28:41 +0200 Subject: [PATCH 1/8] `blockio`: move `IO`-specific functions from `API` to an internal module --- blockio/blockio.cabal | 1 + blockio/src-linux/System/FS/BlockIO/Async.hs | 3 +- .../src-linux/System/FS/BlockIO/Internal.hs | 10 +-- .../src-macos/System/FS/BlockIO/Internal.hs | 6 +- .../src-windows/System/FS/BlockIO/Internal.hs | 5 +- blockio/src/System/FS/BlockIO/API.hs | 61 +--------------- blockio/src/System/FS/BlockIO/IO/Internal.hs | 70 +++++++++++++++++++ blockio/src/System/FS/BlockIO/Serial.hs | 3 +- 8 files changed, 88 insertions(+), 71 deletions(-) create mode 100644 blockio/src/System/FS/BlockIO/IO/Internal.hs diff --git a/blockio/blockio.cabal b/blockio/blockio.cabal index d73587480..c168fa3d4 100644 --- a/blockio/blockio.cabal +++ b/blockio/blockio.cabal @@ -71,6 +71,7 @@ library System.FS.BlockIO.IO System.FS.BlockIO.Serial + other-modules: System.FS.BlockIO.IO.Internal build-depends: , base >=4.16 && <4.22 , deepseq ^>=1.4 || ^>=1.5 diff --git a/blockio/src-linux/System/FS/BlockIO/Async.hs b/blockio/src-linux/System/FS/BlockIO/Async.hs index 348e4d4bd..ee0a00a35 100644 --- a/blockio/src-linux/System/FS/BlockIO/Async.hs +++ b/blockio/src-linux/System/FS/BlockIO/Async.hs @@ -16,6 +16,7 @@ import System.FS.API (BufferOffset (..), FsErrorPath, FsPath, import qualified System.FS.BlockIO.API as API import System.FS.BlockIO.API (IOOp (..), IOResult (..), LockMode, ioopHandle) +import qualified System.FS.BlockIO.IO.Internal as IOI import System.FS.IO (HandleIO) import System.FS.IO.Handle import qualified System.IO.BlockIO as I @@ -72,7 +73,7 @@ submitIO hasFS ioctx ioops = do -- the exception might change between versions of @blockio-uring@. -- Nonetheless, it's better than nothing. if isResourceVanishedError e && ioe_location e == "IOCtx closed" - then throwIO (API.mkClosedError (SomeHasFS hasFS) "submitIO") + then throwIO (IOI.mkClosedError (SomeHasFS hasFS) "submitIO") else throwIO e rethrowErrno :: diff --git a/blockio/src-linux/System/FS/BlockIO/Internal.hs b/blockio/src-linux/System/FS/BlockIO/Internal.hs index aa5737536..fb47f4557 100644 --- a/blockio/src-linux/System/FS/BlockIO/Internal.hs +++ b/blockio/src-linux/System/FS/BlockIO/Internal.hs @@ -6,9 +6,9 @@ module System.FS.BlockIO.Internal ( import qualified System.FS.API as FS import System.FS.API (FsPath, Handle (..), HasFS) -import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, IOCtxParams) +import qualified System.FS.BlockIO.IO.Internal as IOI import System.FS.IO (HandleIO) import qualified System.FS.IO.Handle as FS import qualified System.Posix.Fcntl as Fcntl @@ -31,10 +31,10 @@ ioHasBlockIO hfs _params = hSetNoCache hAdvise hAllocate - (FS.tryLockFileIO hfs) + (IOI.tryLockFileIO hfs) hSynchronise (synchroniseDirectory hfs) - (FS.createHardLinkIO hfs Unix.createLink) + (IOI.createHardLinkIO hfs Unix.createLink) hfs #else ioHasBlockIO hfs params = @@ -42,10 +42,10 @@ ioHasBlockIO hfs params = hSetNoCache hAdvise hAllocate - (FS.tryLockFileIO hfs) + (IOI.tryLockFileIO hfs) hSynchronise (synchroniseDirectory hfs) - (FS.createHardLinkIO hfs Unix.createLink) + (IOI.createHardLinkIO hfs Unix.createLink) hfs params #endif diff --git a/blockio/src-macos/System/FS/BlockIO/Internal.hs b/blockio/src-macos/System/FS/BlockIO/Internal.hs index abc18dca4..0ed21daa0 100644 --- a/blockio/src-macos/System/FS/BlockIO/Internal.hs +++ b/blockio/src-macos/System/FS/BlockIO/Internal.hs @@ -4,9 +4,9 @@ module System.FS.BlockIO.Internal ( import qualified System.FS.API as FS import System.FS.API (FsPath, Handle (..), HasFS) -import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, IOCtxParams) +import qualified System.FS.BlockIO.IO.Internal as IOI import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) import qualified System.FS.IO.Handle as FS @@ -28,10 +28,10 @@ ioHasBlockIO hfs _params = hSetNoCache hAdvise hAllocate - (FS.tryLockFileIO hfs) + (IOI.tryLockFileIO hfs) hSynchronise (synchroniseDirectory hfs) - (FS.createHardLinkIO hfs Unix.createLink) + (IOI.createHardLinkIO hfs Unix.createLink) hfs hSetNoCache :: Handle HandleIO -> Bool -> IO () diff --git a/blockio/src-windows/System/FS/BlockIO/Internal.hs b/blockio/src-windows/System/FS/BlockIO/Internal.hs index 1b40dc86b..ef46569f1 100644 --- a/blockio/src-windows/System/FS/BlockIO/Internal.hs +++ b/blockio/src-windows/System/FS/BlockIO/Internal.hs @@ -9,6 +9,7 @@ import System.FS.API (FsPath, Handle (..), HasFS) import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, IOCtxParams) +import qualified System.FS.BlockIO.IO.Internal as IOI import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) import qualified System.FS.IO.Handle as FS @@ -31,10 +32,10 @@ ioHasBlockIO hfs _params = hSetNoCache hAdvise hAllocate - (FS.tryLockFileIO hfs) + (IOI.tryLockFileIO hfs) hSynchronise (synchroniseDirectory hfs) - (FS.createHardLinkIO hfs Windows.createHardLink) + (IOI.createHardLinkIO hfs Windows.createHardLink) hfs hSetNoCache :: Handle HandleIO -> Bool -> IO () diff --git a/blockio/src/System/FS/BlockIO/API.hs b/blockio/src/System/FS/BlockIO/API.hs index 496e707c8..872b5070f 100644 --- a/blockio/src/System/FS/BlockIO/API.hs +++ b/blockio/src/System/FS/BlockIO/API.hs @@ -6,7 +6,6 @@ module System.FS.BlockIO.API ( HasBlockIO (..) , IOCtxParams (..) , defaultIOCtxParams - , mkClosedError , IOOp (..) , ioopHandle , ioopFileOffset @@ -25,9 +24,6 @@ module System.FS.BlockIO.API ( -- ** Storage synchronisation , synchroniseFile , synchroniseDirectoryRecursive - -- * Defaults for the real file system - , tryLockFileIO - , createHardLinkIO -- * Re-exports , ByteCount , FileOffset @@ -35,8 +31,7 @@ module System.FS.BlockIO.API ( import Control.DeepSeq import Control.Monad (forM_) -import Control.Monad.Class.MonadThrow (MonadCatch (bracketOnError), - MonadThrow (..), bracketOnError, try) +import Control.Monad.Class.MonadThrow (MonadThrow (..)) import Control.Monad.Primitive (PrimMonad (PrimState)) import Data.Primitive.ByteArray (MutableByteArray) import qualified Data.Vector as V @@ -45,15 +40,10 @@ import qualified Data.Vector.Generic.Mutable as VGM import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Unboxed.Mutable as VUM -import GHC.IO.Exception (IOErrorType (ResourceVanished)) import qualified GHC.IO.Handle.Lock as GHC import GHC.Stack (HasCallStack) import qualified System.FS.API as FS -import System.FS.API (BufferOffset, FsError (..), FsPath, Handle (..), - HasFS, SomeHasFS (..)) -import System.FS.IO (HandleIO) -import qualified System.IO as GHC -import System.IO.Error (ioeSetErrorString, mkIOError) +import System.FS.API (BufferOffset, FsPath, Handle (..), HasFS) import System.Posix.Types (ByteCount, FileOffset) import Text.Printf @@ -180,14 +170,6 @@ defaultIOCtxParams = IOCtxParams { ioctxConcurrencyLimit = 64 * 3 } -mkClosedError :: HasCallStack => SomeHasFS m -> String -> FsError -mkClosedError (SomeHasFS hasFS) loc = FS.ioToFsError (FS.mkFsErrorPath hasFS (FS.mkFsPath [])) ioerr - where ioerr = - ioeSetErrorString - (mkIOError ResourceVanished loc Nothing Nothing) - ("HasBlockIO closed: " <> loc) - - data IOOp s h = IOOpRead !(Handle h) !FileOffset !(MutableByteArray s) !BufferOffset !ByteCount | IOOpWrite !(Handle h) !FileOffset !(MutableByteArray s) !BufferOffset !ByteCount @@ -300,42 +282,3 @@ newtype LockFileHandle m = LockFileHandle { -- | Release a file lock acquired using 'tryLockFile'. hUnlock :: m () } - -tryLockFileIO :: HasFS IO HandleIO -> FsPath -> GHC.LockMode -> IO (Maybe (LockFileHandle IO)) -tryLockFileIO hfs fsp mode = do - fp <- FS.unsafeToFilePath hfs fsp -- shouldn't fail because we are in IO - rethrowFsErrorIO hfs fsp $ - bracketOnError (GHC.openFile fp GHC.WriteMode) GHC.hClose $ \h -> do - bracketOnError (GHC.hTryLock h mode) (\_ -> GHC.hUnlock h) $ \b -> do - if b then - pure $ Just LockFileHandle { hUnlock = rethrowFsErrorIO hfs fsp $ do - GHC.hUnlock h - `finally` GHC.hClose h - } - else - pure $ Nothing - --- This is copied/adapted from System.FS.IO -rethrowFsErrorIO :: HasCallStack => HasFS IO HandleIO -> FsPath -> IO a -> IO a -rethrowFsErrorIO hfs fp action = do - res <- try action - case res of - Left err -> handleError err - Right a -> pure a - where - handleError :: HasCallStack => IOError -> IO a - handleError ioErr = - throwIO $ FS.ioToFsError (FS.mkFsErrorPath hfs fp) ioErr - -{------------------------------------------------------------------------------- - Hard links --------------------------------------------------------------------------------} - -createHardLinkIO :: - HasFS IO HandleIO - -> (FilePath -> FilePath -> IO ()) - -> (FsPath -> FsPath -> IO ()) -createHardLinkIO hfs f = \source target -> do - source' <- FS.unsafeToFilePath hfs source -- shouldn't fail because we are in IO - target' <- FS.unsafeToFilePath hfs target -- shouldn't fail because we are in IO - f source' target' diff --git a/blockio/src/System/FS/BlockIO/IO/Internal.hs b/blockio/src/System/FS/BlockIO/IO/Internal.hs new file mode 100644 index 000000000..982d3ce84 --- /dev/null +++ b/blockio/src/System/FS/BlockIO/IO/Internal.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnboxedTuples #-} + +module System.FS.BlockIO.IO.Internal ( + mkClosedError + , tryLockFileIO + , createHardLinkIO + ) where + +import Control.Monad.Class.MonadThrow (MonadCatch (bracketOnError), + MonadThrow (..), bracketOnError, try) +import GHC.IO.Exception (IOErrorType (ResourceVanished)) +import qualified GHC.IO.Handle.Lock as GHC +import GHC.Stack (HasCallStack) +import qualified System.FS.API as FS +import System.FS.API (FsError (..), FsPath, HasFS, SomeHasFS (..)) +import System.FS.BlockIO.API (LockFileHandle (..)) +import System.FS.IO (HandleIO) +import qualified System.IO as GHC +import System.IO.Error (ioeSetErrorString, mkIOError) + +mkClosedError :: HasCallStack => SomeHasFS m -> String -> FsError +mkClosedError (SomeHasFS hasFS) loc = FS.ioToFsError (FS.mkFsErrorPath hasFS (FS.mkFsPath [])) ioerr + where ioerr = + ioeSetErrorString + (mkIOError ResourceVanished loc Nothing Nothing) + ("HasBlockIO closed: " <> loc) + +{------------------------------------------------------------------------------- + File locks +-------------------------------------------------------------------------------} + +tryLockFileIO :: HasFS IO HandleIO -> FsPath -> GHC.LockMode -> IO (Maybe (LockFileHandle IO)) +tryLockFileIO hfs fsp mode = do + fp <- FS.unsafeToFilePath hfs fsp -- shouldn't fail because we are in IO + rethrowFsErrorIO hfs fsp $ + bracketOnError (GHC.openFile fp GHC.WriteMode) GHC.hClose $ \h -> do + bracketOnError (GHC.hTryLock h mode) (\_ -> GHC.hUnlock h) $ \b -> do + if b then + pure $ Just LockFileHandle { hUnlock = rethrowFsErrorIO hfs fsp $ do + GHC.hUnlock h + `finally` GHC.hClose h + } + else + pure $ Nothing + +-- This is copied/adapted from System.FS.IO +rethrowFsErrorIO :: HasCallStack => HasFS IO HandleIO -> FsPath -> IO a -> IO a +rethrowFsErrorIO hfs fp action = do + res <- try action + case res of + Left err -> handleError err + Right a -> pure a + where + handleError :: HasCallStack => IOError -> IO a + handleError ioErr = + throwIO $ FS.ioToFsError (FS.mkFsErrorPath hfs fp) ioErr + +{------------------------------------------------------------------------------- + Hard links +-------------------------------------------------------------------------------} + +createHardLinkIO :: + HasFS IO HandleIO + -> (FilePath -> FilePath -> IO ()) + -> (FsPath -> FsPath -> IO ()) +createHardLinkIO hfs f = \source target -> do + source' <- FS.unsafeToFilePath hfs source -- shouldn't fail because we are in IO + target' <- FS.unsafeToFilePath hfs target -- shouldn't fail because we are in IO + f source' target' diff --git a/blockio/src/System/FS/BlockIO/Serial.hs b/blockio/src/System/FS/BlockIO/Serial.hs index c8d75019c..a6276ca7e 100644 --- a/blockio/src/System/FS/BlockIO/Serial.hs +++ b/blockio/src/System/FS/BlockIO/Serial.hs @@ -13,6 +13,7 @@ import GHC.Stack (HasCallStack) import System.FS.API import qualified System.FS.BlockIO.API as API import System.FS.BlockIO.API (IOOp (..), IOResult (..), LockMode (..)) +import qualified System.FS.BlockIO.IO.Internal as IOI {-# SPECIALISE serialHasBlockIO :: Eq h @@ -58,7 +59,7 @@ data IOCtx m = IOCtx { ctxFS :: SomeHasFS m, openVar :: MVar m Bool } {-# SPECIALISE guardIsOpen :: IOCtx IO -> IO () #-} guardIsOpen :: (HasCallStack, MonadMVar m, MonadThrow m) => IOCtx m -> m () guardIsOpen ctx = readMVar (openVar ctx) >>= \b -> - unless b $ throwIO (API.mkClosedError (ctxFS ctx) "submitIO") + unless b $ throwIO (IOI.mkClosedError (ctxFS ctx) "submitIO") {-# SPECIALISE initIOCtx :: SomeHasFS IO -> IO (IOCtx IO) #-} initIOCtx :: MonadMVar m => SomeHasFS m -> m (IOCtx m) From ed4b46477dd74ca13a4c97070631216f8dc28308 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:07:58 +0200 Subject: [PATCH 2/8] `blockio`: move `IOCtxParams` out of `API` and export it from `IO` instead It's specific to the `IO` implementation --- bench/macro/utxo-bench.hs | 5 +-- blockio/src-linux/System/FS/BlockIO/Async.hs | 6 +-- .../src-linux/System/FS/BlockIO/Internal.hs | 5 +-- .../src-macos/System/FS/BlockIO/Internal.hs | 5 +-- .../src-windows/System/FS/BlockIO/Internal.hs | 6 +-- blockio/src/System/FS/BlockIO/API.hs | 18 --------- blockio/src/System/FS/BlockIO/IO.hs | 9 +++-- blockio/src/System/FS/BlockIO/IO/Internal.hs | 37 ++++++++++++++++++- blockio/test/Main.hs | 8 ++-- src/Database/LSMTree.hs | 4 +- src/Database/LSMTree/Simple.hs | 4 +- test/Test/Database/LSMTree/StateMachine.hs | 4 +- 12 files changed, 63 insertions(+), 48 deletions(-) diff --git a/bench/macro/utxo-bench.hs b/bench/macro/utxo-bench.hs index 132502cdf..0d74d6535 100644 --- a/bench/macro/utxo-bench.hs +++ b/bench/macro/utxo-bench.hs @@ -63,7 +63,6 @@ import qualified Options.Applicative as O import Prelude hiding (lookup) import qualified System.Clock as Clock import qualified System.FS.API as FS -import qualified System.FS.BlockIO.API as FS import qualified System.FS.BlockIO.IO as FsIO import qualified System.FS.IO as FsIO import System.IO @@ -445,7 +444,7 @@ doSetup' gopts opts = do let hasFS :: FS.HasFS IO FsIO.HandleIO hasFS = FsIO.ioHasFS mountPoint - hasBlockIO <- FsIO.ioHasBlockIO hasFS FS.defaultIOCtxParams + hasBlockIO <- FsIO.ioHasBlockIO hasFS FsIO.defaultIOCtxParams let name = LSM.toSnapshotName ("bench_" ++ show (initialSize gopts)) @@ -607,7 +606,7 @@ doRun gopts opts = do let hasFS :: FS.HasFS IO FsIO.HandleIO hasFS = FsIO.ioHasFS mountPoint - hasBlockIO <- FsIO.ioHasBlockIO hasFS FS.defaultIOCtxParams + hasBlockIO <- FsIO.ioHasBlockIO hasFS FsIO.defaultIOCtxParams let name = LSM.toSnapshotName "bench" diff --git a/blockio/src-linux/System/FS/BlockIO/Async.hs b/blockio/src-linux/System/FS/BlockIO/Async.hs index ee0a00a35..41934e4fc 100644 --- a/blockio/src-linux/System/FS/BlockIO/Async.hs +++ b/blockio/src-linux/System/FS/BlockIO/Async.hs @@ -33,7 +33,7 @@ asyncHasBlockIO :: -> (FsPath -> IO ()) -> (FsPath -> FsPath -> IO ()) -> HasFS IO HandleIO - -> API.IOCtxParams + -> IOI.IOCtxParams -> IO (API.HasBlockIO IO HandleIO) asyncHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hSynchronise synchroniseDirectory createHardLink hasFS ctxParams = do ctx <- I.initIOCtx (ctxParamsConv ctxParams) @@ -49,8 +49,8 @@ asyncHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hSynchronise synchroni , API.createHardLink } -ctxParamsConv :: API.IOCtxParams -> I.IOCtxParams -ctxParamsConv API.IOCtxParams{API.ioctxBatchSizeLimit, API.ioctxConcurrencyLimit} = +ctxParamsConv :: IOI.IOCtxParams -> I.IOCtxParams +ctxParamsConv IOI.IOCtxParams{IOI.ioctxBatchSizeLimit, IOI.ioctxConcurrencyLimit} = I.IOCtxParams { I.ioctxBatchSizeLimit = ioctxBatchSizeLimit , I.ioctxConcurrencyLimit = ioctxConcurrencyLimit diff --git a/blockio/src-linux/System/FS/BlockIO/Internal.hs b/blockio/src-linux/System/FS/BlockIO/Internal.hs index fb47f4557..7bff61874 100644 --- a/blockio/src-linux/System/FS/BlockIO/Internal.hs +++ b/blockio/src-linux/System/FS/BlockIO/Internal.hs @@ -6,8 +6,7 @@ module System.FS.BlockIO.Internal ( import qualified System.FS.API as FS import System.FS.API (FsPath, Handle (..), HasFS) -import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, - IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO) import qualified System.FS.BlockIO.IO.Internal as IOI import System.FS.IO (HandleIO) import qualified System.FS.IO.Handle as FS @@ -23,7 +22,7 @@ import qualified System.FS.BlockIO.Async as Async ioHasBlockIO :: HasFS IO HandleIO - -> IOCtxParams + -> IOI.IOCtxParams -> IO (HasBlockIO IO HandleIO) #if SERIALBLOCKIO ioHasBlockIO hfs _params = diff --git a/blockio/src-macos/System/FS/BlockIO/Internal.hs b/blockio/src-macos/System/FS/BlockIO/Internal.hs index 0ed21daa0..0a24a0701 100644 --- a/blockio/src-macos/System/FS/BlockIO/Internal.hs +++ b/blockio/src-macos/System/FS/BlockIO/Internal.hs @@ -4,8 +4,7 @@ module System.FS.BlockIO.Internal ( import qualified System.FS.API as FS import System.FS.API (FsPath, Handle (..), HasFS) -import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, - IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO) import qualified System.FS.BlockIO.IO.Internal as IOI import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) @@ -21,7 +20,7 @@ import qualified System.Posix.Unistd as Unix -- The recommended choice would be to use the POSIX AIO API. ioHasBlockIO :: HasFS IO HandleIO - -> IOCtxParams + -> IOI.IOCtxParams -> IO (HasBlockIO IO HandleIO) ioHasBlockIO hfs _params = Serial.serialHasBlockIO diff --git a/blockio/src-windows/System/FS/BlockIO/Internal.hs b/blockio/src-windows/System/FS/BlockIO/Internal.hs index ef46569f1..09290f107 100644 --- a/blockio/src-windows/System/FS/BlockIO/Internal.hs +++ b/blockio/src-windows/System/FS/BlockIO/Internal.hs @@ -6,9 +6,7 @@ import Control.Exception (throwIO) import Control.Monad (unless) import qualified System.FS.API as FS import System.FS.API (FsPath, Handle (..), HasFS) -import qualified System.FS.BlockIO.API as FS -import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO, - IOCtxParams) +import System.FS.BlockIO.API (Advice (..), FileOffset, HasBlockIO) import qualified System.FS.BlockIO.IO.Internal as IOI import qualified System.FS.BlockIO.Serial as Serial import System.FS.IO (HandleIO) @@ -25,7 +23,7 @@ import qualified System.Win32.HardLink as Windows -- The recommended choice would be to use the Win32 IOCP API. ioHasBlockIO :: HasFS IO HandleIO - -> IOCtxParams + -> IOI.IOCtxParams -> IO (HasBlockIO IO HandleIO) ioHasBlockIO hfs _params = Serial.serialHasBlockIO diff --git a/blockio/src/System/FS/BlockIO/API.hs b/blockio/src/System/FS/BlockIO/API.hs index 872b5070f..81263d04c 100644 --- a/blockio/src/System/FS/BlockIO/API.hs +++ b/blockio/src/System/FS/BlockIO/API.hs @@ -4,8 +4,6 @@ module System.FS.BlockIO.API ( -- * HasBlockIO HasBlockIO (..) - , IOCtxParams (..) - , defaultIOCtxParams , IOOp (..) , ioopHandle , ioopFileOffset @@ -154,22 +152,6 @@ instance NFData (HasBlockIO m h) where rwhnf d `seq` rwhnf e `seq` rwhnf f `seq` rwhnf g `seq` rwhnf h `seq` rwhnf i --- | Concurrency parameters for initialising a 'HasBlockIO. Can be ignored by --- serial implementations. -data IOCtxParams = IOCtxParams { - ioctxBatchSizeLimit :: !Int, - ioctxConcurrencyLimit :: !Int - } - -instance NFData IOCtxParams where - rnf (IOCtxParams x y) = rnf x `seq` rnf y - -defaultIOCtxParams :: IOCtxParams -defaultIOCtxParams = IOCtxParams { - ioctxBatchSizeLimit = 64, - ioctxConcurrencyLimit = 64 * 3 - } - data IOOp s h = IOOpRead !(Handle h) !FileOffset !(MutableByteArray s) !BufferOffset !ByteCount | IOOpWrite !(Handle h) !FileOffset !(MutableByteArray s) !BufferOffset !ByteCount diff --git a/blockio/src/System/FS/BlockIO/IO.hs b/blockio/src/System/FS/BlockIO/IO.hs index 79e78c705..b048b1d92 100644 --- a/blockio/src/System/FS/BlockIO/IO.hs +++ b/blockio/src/System/FS/BlockIO/IO.hs @@ -1,24 +1,27 @@ module System.FS.BlockIO.IO ( ioHasBlockIO , withIOHasBlockIO + , IOI.IOCtxParams (..) + , IOI.defaultIOCtxParams ) where import Control.Exception (bracket) import System.FS.API (HasFS) -import System.FS.BlockIO.API (HasBlockIO (..), IOCtxParams) +import System.FS.BlockIO.API (HasBlockIO (..)) import qualified System.FS.BlockIO.Internal as I +import qualified System.FS.BlockIO.IO.Internal as IOI import System.FS.IO (HandleIO) -- | Platform-dependent IO instantiation of 'HasBlockIO'. ioHasBlockIO :: HasFS IO HandleIO - -> IOCtxParams + -> IOI.IOCtxParams -> IO (HasBlockIO IO HandleIO) ioHasBlockIO = I.ioHasBlockIO withIOHasBlockIO :: HasFS IO HandleIO - -> IOCtxParams + -> IOI.IOCtxParams -> (HasBlockIO IO HandleIO -> IO a) -> IO a withIOHasBlockIO hfs params action = diff --git a/blockio/src/System/FS/BlockIO/IO/Internal.hs b/blockio/src/System/FS/BlockIO/IO/Internal.hs index 982d3ce84..dc83f3cf2 100644 --- a/blockio/src/System/FS/BlockIO/IO/Internal.hs +++ b/blockio/src/System/FS/BlockIO/IO/Internal.hs @@ -2,11 +2,14 @@ {-# LANGUAGE UnboxedTuples #-} module System.FS.BlockIO.IO.Internal ( - mkClosedError + IOCtxParams (..) + , defaultIOCtxParams + , mkClosedError , tryLockFileIO , createHardLinkIO ) where +import Control.DeepSeq (NFData (..)) import Control.Monad.Class.MonadThrow (MonadCatch (bracketOnError), MonadThrow (..), bracketOnError, try) import GHC.IO.Exception (IOErrorType (ResourceVanished)) @@ -19,6 +22,38 @@ import System.FS.IO (HandleIO) import qualified System.IO as GHC import System.IO.Error (ioeSetErrorString, mkIOError) +{------------------------------------------------------------------------------- + IO context +-------------------------------------------------------------------------------} + +-- | Concurrency parameters for initialising the 'IO' context in a 'HasBlockIO' +-- instance. +-- +-- [IO context parameters]: These parameters are interpreted differently based +-- on the underlying platform: +-- +-- * Linux: Pass the parameters to 'initIOCtx' in the @blockio-uring@ package +-- * MacOS: Ignore the parameters +-- * Windows: Ignore the parameters +-- +-- For more information about what these parameters mean and how to configure +-- them, see the @blockio-uring@ package. +data IOCtxParams = IOCtxParams { + ioctxBatchSizeLimit :: !Int, + ioctxConcurrencyLimit :: !Int + } + +instance NFData IOCtxParams where + rnf (IOCtxParams x y) = rnf x `seq` rnf y + +-- | Default parameters. Some manual tuning of parameters might be required to +-- achieve higher performance targets (see 'IOCtxParams'). +defaultIOCtxParams :: IOCtxParams +defaultIOCtxParams = IOCtxParams { + ioctxBatchSizeLimit = 64, + ioctxConcurrencyLimit = 64 * 3 + } + mkClosedError :: HasCallStack => SomeHasFS m -> String -> FsError mkClosedError (SomeHasFS hasFS) loc = FS.ioToFsError (FS.mkFsErrorPath hasFS (FS.mkFsPath [])) ioerr where ioerr = diff --git a/blockio/test/Main.hs b/blockio/test/Main.hs index 537be7052..268426b0a 100644 --- a/blockio/test/Main.hs +++ b/blockio/test/Main.hs @@ -69,14 +69,14 @@ example_initClose :: Assertion example_initClose = withSystemTempDirectory "example_initClose" $ \dirPath -> do let mount = FS.MountPoint dirPath hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs FS.defaultIOCtxParams + hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams close hbio example_closeIsIdempotent :: Assertion example_closeIsIdempotent = withSystemTempDirectory "example_closeIsIdempotent" $ \dirPath -> do let mount = FS.MountPoint dirPath hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs FS.defaultIOCtxParams + hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams close hbio eith <- try @SomeException (close hbio) case eith of @@ -89,7 +89,7 @@ prop_readWrite :: ByteString -> Property prop_readWrite bs = ioProperty $ withSystemTempDirectory "prop_readWrite" $ \dirPath -> do let mount = FS.MountPoint dirPath hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs FS.defaultIOCtxParams + hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams prop <- FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do let n = BS.length bs writeBuf <- fromByteStringPinned bs @@ -108,7 +108,7 @@ prop_submitToClosedCtx :: ByteString -> Property prop_submitToClosedCtx bs = ioProperty $ withSystemTempDirectory "prop_a" $ \dir -> do let mount = FS.MountPoint dir hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs FS.defaultIOCtxParams + hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams props <- FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do void $ hPutAllStrict hfs h bs diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 2c685a0ac..56817ecfe 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -274,8 +274,8 @@ import Database.LSMTree.Internal.Unsafe (BlobRefInvalidError (..), import qualified Database.LSMTree.Internal.Unsafe as Internal import Prelude hiding (lookup, take, takeWhile) import System.FS.API (FsPath, HasFS (..), MountPoint (..), mkFsPath) -import System.FS.BlockIO.API (HasBlockIO (..), defaultIOCtxParams) -import System.FS.BlockIO.IO (withIOHasBlockIO) +import System.FS.BlockIO.API (HasBlockIO (..)) +import System.FS.BlockIO.IO (defaultIOCtxParams, withIOHasBlockIO) import System.FS.IO (HandleIO, ioHasFS) import System.Random (randomIO) diff --git a/src/Database/LSMTree/Simple.hs b/src/Database/LSMTree/Simple.hs index 5292297af..11ed3c321 100644 --- a/src/Database/LSMTree/Simple.hs +++ b/src/Database/LSMTree/Simple.hs @@ -188,8 +188,8 @@ import qualified Database.LSMTree.Internal.Types as LSMT import qualified Database.LSMTree.Internal.Unsafe as Internal import Prelude hiding (lookup, take, takeWhile) import System.FS.API (MountPoint (..), mkFsPath) -import System.FS.BlockIO.API (HasBlockIO (..), defaultIOCtxParams) -import System.FS.BlockIO.IO (ioHasBlockIO) +import System.FS.BlockIO.API (HasBlockIO (..)) +import System.FS.BlockIO.IO (defaultIOCtxParams, ioHasBlockIO) import System.FS.IO (ioHasFS) import System.Random (randomIO) diff --git a/test/Test/Database/LSMTree/StateMachine.hs b/test/Test/Database/LSMTree/StateMachine.hs index e817e8db1..0e4e3b3e3 100644 --- a/test/Test/Database/LSMTree/StateMachine.hs +++ b/test/Test/Database/LSMTree/StateMachine.hs @@ -105,8 +105,8 @@ import NoThunks.Class import Prelude hiding (init) import System.Directory (removeDirectoryRecursive) import System.FS.API (FsError (..), HasFS, MountPoint (..), mkFsPath) -import System.FS.BlockIO.API (HasBlockIO, close, defaultIOCtxParams) -import System.FS.BlockIO.IO (ioHasBlockIO) +import System.FS.BlockIO.API (HasBlockIO, close) +import System.FS.BlockIO.IO (defaultIOCtxParams, ioHasBlockIO) import System.FS.IO (HandleIO, ioHasFS) import qualified System.FS.Sim.Error as FSSim import System.FS.Sim.Error (Errors) From 6c2ec1fbda798e347989e4569303e3b3cb321e57 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:15:29 +0200 Subject: [PATCH 3/8] `blockio`: expose `serialHasBlockIO` from an internal module We use `serialHasBlockIO` in the `sim` sub-library and so we have to expose it somewhere, but we'd actually prefer it if it was not part of the public API. Exposing it only from an internal module makes it clear that it should ideally not be used by users. --- blockio/blockio.cabal | 5 ++++- blockio/src-sim/System/FS/BlockIO/Sim.hs | 2 +- blockio/src/System/FS/BlockIO/Serial/Internal.hs | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 blockio/src/System/FS/BlockIO/Serial/Internal.hs diff --git a/blockio/blockio.cabal b/blockio/blockio.cabal index c168fa3d4..ebeacbd37 100644 --- a/blockio/blockio.cabal +++ b/blockio/blockio.cabal @@ -69,9 +69,12 @@ library exposed-modules: System.FS.BlockIO.API System.FS.BlockIO.IO + System.FS.BlockIO.Serial.Internal + + other-modules: + System.FS.BlockIO.IO.Internal System.FS.BlockIO.Serial - other-modules: System.FS.BlockIO.IO.Internal build-depends: , base >=4.16 && <4.22 , deepseq ^>=1.4 || ^>=1.5 diff --git a/blockio/src-sim/System/FS/BlockIO/Sim.hs b/blockio/src-sim/System/FS/BlockIO/Sim.hs index 35fb90681..2a00bb489 100644 --- a/blockio/src-sim/System/FS/BlockIO/Sim.hs +++ b/blockio/src-sim/System/FS/BlockIO/Sim.hs @@ -18,7 +18,7 @@ import qualified System.FS.API.Lazy as API import qualified System.FS.API.Strict as API import System.FS.BlockIO.API (HasBlockIO (..), LockFileHandle (..), LockMode (..)) -import System.FS.BlockIO.Serial +import System.FS.BlockIO.Serial.Internal import System.FS.CallStack (prettyCallStack) import System.FS.Sim.Error import System.FS.Sim.MockFS hiding (hClose, hOpen) diff --git a/blockio/src/System/FS/BlockIO/Serial/Internal.hs b/blockio/src/System/FS/BlockIO/Serial/Internal.hs new file mode 100644 index 000000000..a0a968af9 --- /dev/null +++ b/blockio/src/System/FS/BlockIO/Serial/Internal.hs @@ -0,0 +1,12 @@ +-- | This is an internal module that has to be exposed for technical reasons. Do +-- not use it. +module System.FS.BlockIO.Serial.Internal ( + -- We have to re-export from somewhere so that the @blockio:sim@ sub-library + -- can use it. Unfortunately, this makes the function part of the public API + -- even though we'd prefer to keep it truly hidden. There are ways around + -- this, for example using a new private sub-library that contains the + -- "System.FS.BlockIO.Serial" module, but it's a lot of boilerplate. + serialHasBlockIO + ) where + +import System.FS.BlockIO.Serial (serialHasBlockIO) From 1baaf8b26fa6efd406630f9b6f309a37dc8e9714 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:18:04 +0200 Subject: [PATCH 4/8] `blockio`: document the `API` module We remove documentation about implementation specifics from the `HasBlockIO` type, but we'll include it in the `IO` module in one of the next commits. --- blockio/src/System/FS/BlockIO/API.hs | 121 ++++++++++++++++----------- 1 file changed, 73 insertions(+), 48 deletions(-) diff --git a/blockio/src/System/FS/BlockIO/API.hs b/blockio/src/System/FS/BlockIO/API.hs index 81263d04c..1322ea4e9 100644 --- a/blockio/src/System/FS/BlockIO/API.hs +++ b/blockio/src/System/FS/BlockIO/API.hs @@ -1,6 +1,7 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UnboxedTuples #-} +-- | Abstract interface, types, and utilities. module System.FS.BlockIO.API ( -- * HasBlockIO HasBlockIO (..) @@ -45,13 +46,51 @@ import System.FS.API (BufferOffset, FsPath, Handle (..), HasFS) import System.Posix.Types (ByteCount, FileOffset) import Text.Printf --- | Abstract interface for submitting large batches of I\/O operations. +-- | Abstract interface for submitting large batches of I\/O operations. This +-- interface is an extension of the 'HasFS' interface that is provided by the +-- @fs-api@ package. +-- +-- The interface tries to specify uniform behaviour, but each implementation can +-- have subtly different effects for a variety of reasons. However, for the most +-- part the underlying implementation of an instance of the interface should not +-- affect the correctness of programs that use the interface. +-- +-- For uniform behaviour across implementations, functions that create a new +-- instance of the interface should initialise an IO context. This IO context +-- may be of any shape, as long as the context has two modes: open and closed. +-- This context is only important for the 'close' and 'submitIO' functions. As +-- long as the IO context is open, 'submitIO' should perform batches of I\/O +-- operations as expected, but 'submitIO' should throw an error as soon as the +-- IO context is closed. Once the IO context is closed, it can not be re-opened +-- again. Instead, the user should create a new instance of the interface. +-- +-- Note: there are a bunch of functions in the interface that have nothing to do +-- with submitting large batches of I/O operations. In fact, only 'close' and +-- 'submitIO' are related to that. All other functions were put in this record +-- for simplicity because the authors of the library needed them and it was more +-- straightforward to add them here then to add them to @fs-api@. Still these +-- unrelated functions could and should all be moved into @fs-api@ at some point +-- in the future. +-- +-- === Implementations +-- +-- There are currently two known implementations of the interface: +-- +-- * An implementation using the real file system, which can be found in the +-- "System.FS.BlockIO.IO" module. This implementation is platform-dependent. +-- +-- * An implementation using a simulated file system, which can be found in the +-- @System.FS.BlockIO.Sim@ module of the @blockio:sim@ sublibrary. This +-- implementation is uniform across platforms. +-- data HasBlockIO m h = HasBlockIO { - -- | (Idempotent) close the interface. + -- | (Idempotent) close the IO context that is required for running + -- 'submitIO'. + -- + -- Using 'submitIO' after 'close' throws an 'FsError' exception. -- - -- Using 'submitIO' after 'close' should thrown an 'FsError' exception. See - -- 'mkClosedError'. close :: HasCallStack => m () + -- | Submit a batch of I\/O operations and wait for the result. -- -- Results correspond to input 'IOOp's in a pair-wise manner, i.e., one can @@ -59,41 +98,28 @@ data HasBlockIO m h = HasBlockIO { -- position. -- -- If any of the I\/O operations fails, an 'FsError' exception will be thrown. + -- , submitIO :: HasCallStack => V.Vector (IOOp (PrimState m) h) -> m (VU.Vector IOResult) + + -- TODO: once file caching is disabled, subsequent reads/writes with + -- misaligned byte arrays should throw an error. Preferably, this should + -- happen in both the simulation and real implementation, even if the real + -- implementation does not support setting the file caching mode. This would + -- make the behaviour of the file caching mode more uniform across + -- implementations and platforms. + -- | Set the file data caching mode for a file handle. -- - -- This has different effects on different distributions. - -- * [Linux]: set the @O_DIRECT@ flag. - -- * [MacOS]: set the @F_NOCACHE@ flag. - -- * [Windows]: no-op. - -- - -- TODO: subsequent reads/writes with misaligned byte arrays should fail - -- both in simulation and real implementation. , hSetNoCache :: Handle h -> Bool -> m () + -- | Predeclare an access pattern for file data. -- - -- This has different effects on different distributions. - -- * [Linux]: perform @posix_fadvise(2). - -- * [MacOS]: no-op. - -- * [Windows]: no-op. , hAdvise :: Handle h -> FileOffset -> FileOffset -> Advice -> m () + -- | Allocate file space. -- - -- This has different effects on different distributions. - -- * [Linux]: perform @posix_fallocate(2). - -- * [MacOS]: no-op. - -- * [Windows]: no-op. , hAllocate :: Handle h -> FileOffset -> FileOffset -> m () - -- | Try to acquire a file lock without blocking. - -- - -- This uses different locking methods on different distributions. - -- * [Linux]: Open file descriptor (OFD) - -- * [MacOS]: @flock@ - -- * [Windows]: @LockFileEx@ - -- - -- This function can throw 'GHC.FileLockingNotSupported' when file locking - -- is not supported. - -- + -- NOTE: though it would have been nicer to allow locking /file handles/ -- instead of /file paths/, it would make the implementation of this -- function in 'IO' much more complex. In particular, if we want to reuse @@ -114,35 +140,34 @@ data HasBlockIO m h = HasBlockIO { -- that allows you to use 'LockFileHandle' as a 'Handle', but only within a -- limited scope. That is, it has to fit the style of @withHandleToHANDLE :: -- Handle -> (HANDLE -> IO a) -> IO a@ from the @Win32@ package. + + -- | Try to acquire a file lock without blocking. + -- + -- This function throws 'GHC.FileLockingNotSupported' when file locking is + -- not supported. + -- , tryLockFile :: FsPath -> GHC.LockMode -> m (Maybe (LockFileHandle m)) -- | Synchronise file contents with the storage device. -- - -- Ensure that all change to the file handle's contents which exist only in - -- memory (as buffered system cache pages) are transferred/flushed to disk. - -- This will also update the file handle's associated metadata. + -- This ensures that all changes to the file handle's contents, which might + -- exist only in memory as buffered system cache pages, are + -- transferred/flushed to disk. This will also update the file handle's + -- associated metadata. -- - -- This uses different system calls on different distributions. - -- * [Linux]: @fsync(2)@ - -- * [MacOS]: @fsync(2)@ - -- * [Windows]: @flushFileBuffers@ , hSynchronise :: Handle h -> m () -- | Synchronise a directory with the storage device. -- - -- This uses different system calls on different distributions. - -- * [Linux]: @fsync(2)@ - -- * [MacOS]: @fsync(2)@ - -- * [Windows]: no-op + -- This ensures that all changes to the directory, which might exist only in + -- memory as buffered changes, are transferred/flushed to disk. This will + -- also update the directory's associated metadata. + -- , synchroniseDirectory :: FsPath -> m () -- | Create a hard link for an existing file at the source path and a new -- file at the target path. -- - -- This uses different system calls on different distributions. - -- * [Linux]: @link@ - -- * [MacOS]: @link@ - -- * [Windows]: @CreateHardLinkW@ , createHardLink :: FsPath -> FsPath -> m () } @@ -195,7 +220,7 @@ instance VUM.Unbox IOResult Advice -------------------------------------------------------------------------------} --- | Basically "System.Posix.Fcntl.Advice" from the @unix@ package +-- | Copy of "System.Posix.Fcntl.Advice" from the @unix@ package data Advice = AdviceNormal | AdviceRandom @@ -219,7 +244,7 @@ hDropCacheAll hbio h = hAdviseAll hbio h AdviceDontNeed -------------------------------------------------------------------------------} {-# SPECIALISE synchroniseFile :: HasFS IO h -> HasBlockIO IO h -> FsPath -> IO () #-} --- | Synchronise a file and its contents with the storage device. +-- | Synchronise a file's contents and metadata with the storage device. synchroniseFile :: MonadThrow m => HasFS m h -> HasBlockIO m h -> FsPath -> m () synchroniseFile hfs hbio path = FS.withFile hfs path (FS.ReadWriteMode FS.MustExist) $ hSynchronise hbio @@ -230,8 +255,8 @@ synchroniseFile hfs hbio path = -> FsPath -> IO () #-} --- | Synchronise a directory and recursively its contents with the storage --- device. +-- | Synchronise a directory's contents and metadata with the storage device, +-- and recursively for all entries in the directory. synchroniseDirectoryRecursive :: MonadThrow m => HasFS m h From c0ccc4d693bb1a55daa152b4a51000dc36accd78 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:18:34 +0200 Subject: [PATCH 5/8] `blockio`: refactor and document the `IO` module --- app/Database/LSMTree/Demo.hs | 4 +- bench/macro/lsm-tree-bench-lookups.hs | 9 +- bench/macro/utxo-bench.hs | 38 ++-- bench/micro/Bench/Database/LSMTree.hs | 3 +- .../Bench/Database/LSMTree/Internal/Lookup.hs | 3 +- .../Bench/Database/LSMTree/Internal/Merge.hs | 3 +- blockio/src/System/FS/BlockIO/IO.hs | 162 +++++++++++++++++- blockio/test/Main.hs | 86 ++++------ src/Database/LSMTree.hs | 5 +- src/Database/LSMTree/Simple.hs | 8 +- test/Test/Database/LSMTree/Internal/Run.hs | 4 +- test/Test/Database/LSMTree/StateMachine.hs | 5 +- test/Test/Util/FS.hs | 7 +- 13 files changed, 222 insertions(+), 115 deletions(-) diff --git a/app/Database/LSMTree/Demo.hs b/app/Database/LSMTree/Demo.hs index 5aa5f3c2b..916769888 100644 --- a/app/Database/LSMTree/Demo.hs +++ b/app/Database/LSMTree/Demo.hs @@ -28,7 +28,6 @@ import qualified System.FS.API as FS import qualified System.FS.BlockIO.API as FS import qualified System.FS.BlockIO.IO as FS import qualified System.FS.BlockIO.Sim as FSSim -import qualified System.FS.IO as FS import qualified System.FS.Sim.MockFS as FSSim import System.IO.Unsafe (unsafePerformIO) @@ -151,8 +150,7 @@ demo = do print' (fmap getValue os) do - let hasFS = FS.ioHasFS (FS.MountPoint "") - FS.withIOHasBlockIO hasFS FS.defaultIOCtxParams $ \hasBlockIO -> do + FS.withIOHasBlockIO (FS.MountPoint "") FS.defaultIOCtxParams $ \hasFS hasBlockIO -> do simpleAction hasFS hasBlockIO pause -- [16] diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 01f8a146c..cb4ad902e 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -310,11 +310,10 @@ totalNumEntriesSanityCheck l1 runSizes = withFS :: (FS.HasFS IO FS.HandleIO -> FS.HasBlockIO IO FS.HandleIO -> IO a) -> IO a -withFS action = do - let hfs = FS.ioHasFS (FS.MountPoint "_bench_lookups") - exists <- FS.doesDirectoryExist hfs (FS.mkFsPath [""]) - unless exists $ error ("_bench_lookups directory does not exist") - FS.withIOHasBlockIO hfs FS.defaultIOCtxParams $ \hbio -> +withFS action = + FS.withIOHasBlockIO (FS.MountPoint "_bench_lookups") FS.defaultIOCtxParams $ \hfs hbio -> do + exists <- FS.doesDirectoryExist hfs (FS.mkFsPath [""]) + unless exists $ error ("_bench_lookups directory does not exist") action hfs hbio -- | Input environment for benchmarking lookup functions. diff --git a/bench/macro/utxo-bench.hs b/bench/macro/utxo-bench.hs index 0d74d6535..943b4494b 100644 --- a/bench/macro/utxo-bench.hs +++ b/bench/macro/utxo-bench.hs @@ -64,7 +64,6 @@ import Prelude hiding (lookup) import qualified System.Clock as Clock import qualified System.FS.API as FS import qualified System.FS.BlockIO.IO as FsIO -import qualified System.FS.IO as FsIO import System.IO import System.Mem (performMajorGC) import qualified System.Random as Random @@ -437,17 +436,8 @@ doSetup gopts opts = do void $ timed_ $ doSetup' gopts opts doSetup' :: GlobalOpts -> SetupOpts -> IO () -doSetup' gopts opts = do - let mountPoint :: FS.MountPoint - mountPoint = FS.MountPoint (rootDir gopts) - - let hasFS :: FS.HasFS IO FsIO.HandleIO - hasFS = FsIO.ioHasFS mountPoint - - hasBlockIO <- FsIO.ioHasBlockIO hasFS FsIO.defaultIOCtxParams - - let name = LSM.toSnapshotName ("bench_" ++ show (initialSize gopts)) - +doSetup' gopts opts = + FsIO.withIOHasBlockIO mountPoint FsIO.defaultIOCtxParams $ \hasFS hasBlockIO -> LSM.withOpenSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session -> do tbl <- LSM.newTableWith @IO @K @V @B (mkTableConfigSetup gopts opts benchTableConfig) session @@ -461,6 +451,12 @@ doSetup' gopts opts = do ] LSM.saveSnapshot name label tbl + where + mountPoint :: FS.MountPoint + mountPoint = FS.MountPoint (rootDir gopts) + + name = LSM.toSnapshotName ("bench_" ++ show (initialSize gopts)) + ------------------------------------------------------------------------------- -- dry-run @@ -599,17 +595,8 @@ toOperations lookups inserts = (batch1, batch2) ------------------------------------------------------------------------------- doRun :: GlobalOpts -> RunOpts -> IO () -doRun gopts opts = do - let mountPoint :: FS.MountPoint - mountPoint = FS.MountPoint (rootDir gopts) - - let hasFS :: FS.HasFS IO FsIO.HandleIO - hasFS = FsIO.ioHasFS mountPoint - - hasBlockIO <- FsIO.ioHasBlockIO hasFS FsIO.defaultIOCtxParams - - let name = LSM.toSnapshotName "bench" - +doRun gopts opts = + FsIO.withIOHasBlockIO mountPoint FsIO.defaultIOCtxParams $ \hasFS hasBlockIO -> LSM.withOpenSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session -> withLatencyHandle $ \h -> do -- open snapshot @@ -651,6 +638,11 @@ doRun gopts opts = do let ops = batchCount opts * batchSize opts printf "Operations per second: %7.01f ops/sec\n" (fromIntegral ops / time) + where + mountPoint :: FS.MountPoint + mountPoint = FS.MountPoint (rootDir gopts) + + name = LSM.toSnapshotName ("bench_" ++ show (initialSize gopts)) ------------------------------------------------------------------------------- -- sequential diff --git a/bench/micro/Bench/Database/LSMTree.hs b/bench/micro/Bench/Database/LSMTree.hs index b949c24ea..995b1063e 100644 --- a/bench/micro/Bench/Database/LSMTree.hs +++ b/bench/micro/Bench/Database/LSMTree.hs @@ -433,8 +433,7 @@ mkFiles :: mkFiles = do sysTmpDir <- getCanonicalTemporaryDirectory benchTmpDir <- createTempDirectory sysTmpDir "full" - let hfs = FS.ioHasFS (FS.MountPoint benchTmpDir) - hbio <- FS.ioHasBlockIO hfs FS.defaultIOCtxParams + (hfs, hbio) <- FS.ioHasBlockIO (FS.MountPoint benchTmpDir) FS.defaultIOCtxParams pure (benchTmpDir, hfs, hbio) cleanupFiles :: diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs index 5c02b3b72..5ca58854b 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs @@ -191,8 +191,7 @@ lookupsInBatchesEnv Config {..} = do sysTmpDir <- getCanonicalTemporaryDirectory benchTmpDir <- createTempDirectory sysTmpDir "lookupsInBatchesEnv" (storedKeys, lookupKeys) <- lookupsEnv (mkStdGen 17) nentries npos nneg - let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir) - hasBlockIO <- FS.ioHasBlockIO hasFS (fromMaybe FS.defaultIOCtxParams ioctxps) + (hasFS, hasBlockIO) <- FS.ioHasBlockIO (FS.MountPoint benchTmpDir) (fromMaybe FS.defaultIOCtxParams ioctxps) wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"]) wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0) diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs index 07b26adc3..7028a48ad 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs @@ -373,8 +373,7 @@ mergeEnv :: mergeEnv config = do sysTmpDir <- getCanonicalTemporaryDirectory benchTmpDir <- createTempDirectory sysTmpDir "mergeEnv" - let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir) - hasBlockIO <- FS.ioHasBlockIO hasFS FS.defaultIOCtxParams + (hasFS, hasBlockIO) <- FS.ioHasBlockIO (FS.MountPoint benchTmpDir) FS.defaultIOCtxParams runs <- randomRuns hasFS hasBlockIO config (mkStdGen 17) pure (benchTmpDir, hasFS, hasBlockIO, runs) diff --git a/blockio/src/System/FS/BlockIO/IO.hs b/blockio/src/System/FS/BlockIO/IO.hs index b048b1d92..0b798cadc 100644 --- a/blockio/src/System/FS/BlockIO/IO.hs +++ b/blockio/src/System/FS/BlockIO/IO.hs @@ -1,28 +1,174 @@ +-- | Implementations using the real file system. +-- +-- The implementation of the 'HasBlockIO' interface provided in this module is +-- platform-dependent. Most importantly, on Linux, the implementation of +-- 'submitIO' is backed by @blockio-uring@: a library for asynchronous I/O. On +-- Windows and MacOS, the implementation of 'submitIO' only supports serial I/O. module System.FS.BlockIO.IO ( + -- * Implementation details #impl# + -- $impl + + -- * Initialisation ioHasBlockIO , withIOHasBlockIO + -- ** Parameters , IOI.IOCtxParams (..) , IOI.defaultIOCtxParams + -- ** Unsafe + , unsafeFromHasFS + , withUnsafeFromHasFS ) where import Control.Exception (bracket) -import System.FS.API (HasFS) +import System.FS.API (HasFS, MountPoint) import System.FS.BlockIO.API (HasBlockIO (..)) import qualified System.FS.BlockIO.Internal as I import qualified System.FS.BlockIO.IO.Internal as IOI -import System.FS.IO (HandleIO) +import System.FS.IO (HandleIO, ioHasFS) --- | Platform-dependent IO instantiation of 'HasBlockIO'. -ioHasBlockIO :: +{- $impl + + Though the 'HasBlockIO' interface tries to capture uniform behaviour, each + function in this implementation for the real file system can have subtly + different effects depending on the underlying patform. For example, some + features are not provided by some operating systems, and in some cases the + features behave subtly differently for different operating systems. For this + reason, we include below some documentation about the effects of calling the + interface functions on different platforms. + + Note: if the @serialblockio@ Cabal flag is enabled, then the Linux implementation + uses a mocked context and serial I/O for 'close' and 'submitIO', just like the + MacOS and Windows implementations do. + + [IO context]: When an instance of the 'HasBlockIO' interface for Linux + systems is initialised, an @io_uring@ context is created using the + @blockio-uring@ package and stored in the 'HasBlockIO' closure. For uniform + behaviour, each other platform creates and stores a mocked IO context that + has the same open/closed behaviour as an @io_uring@ context. In summary, + each platform creates: + + * Linux: an @io_uring@ context provided by the @blockio-uring@ package + * MacOS: a mocked context using an @MVar@ + * Windows: a mocked conext using an @MVar@ + + ['close']: + + * Linux: close the @io_uring@ context through the @blockio-uring@ package + * MacOS: close the mocked context + * Windows: close the mocked context + + ['submitIO']: Submit a batch of I/O operations using: + + * Linux: the @submitIO@ function from the @blockio-uring@ package + * MacOS: serial I/O using a 'HasFS' + * Windows: serial I/O using a 'HasFS' + + ['hSetNoCache']: + + * Linux: set the @O_DIRECT@ flag + * MacOS: set the @F_NOCACHE@ flag + * Windows: no-op + + ['hAdvise']: + + * Linux: perform @posix_fadvise(2)@ + * MacOS: no-op + * Windows: no-op + + ['hAllocate']: + + * Linux: perform @posix_fallocate(2)@ + * MacOS: no-op + * Windows: no-op + + ['tryLockFile']: This uses different locking methods depending on the OS. + + * Linux: Open file descriptor (OFD) + * MacOS: @flock@ + * Windows: @LockFileEx@ + + ['hSynchronise']: + + * Linux: perform @fsync(2)@ + * MacOS: perform @fsync(2)@ + * Windows: perform @flushFileBuffers@ + + ['synchroniseDirectory']: + + * Linux: perform @fsync(2)@ + * MacOS: perform @fsync(2)@ + * Windows: no-op + + ['createHardLink']: + + * Linux: perform @link@ + * MacOS: perform @link@ + * Windows: perform @CreateHardLinkW@ +-} + +-- | An implementation of the 'HasBlockIO' interface using the real file system. +-- +-- Make sure to use 'close' the resulting 'HasBlockIO' when it is no longer +-- used. 'withUnsafeFromHasFS' does this automatically. +-- +-- === Unsafe +-- +-- You will probably want to use 'ioHasBlockIO' or 'withIOHasBlockIO' instead. +-- +-- Only a 'HasFS' for the real file system, like 'ioHasFS', should be passed to +-- 'unsafeFromHasFS'. Technically, one could pass a 'HasFS' for a simulated file +-- system, but then the resulting 'HasBlockIO' would contain a mix of simulated +-- and real functions, which is probably not what you want. +unsafeFromHasFS :: HasFS IO HandleIO -> IOI.IOCtxParams -> IO (HasBlockIO IO HandleIO) -ioHasBlockIO = I.ioHasBlockIO +unsafeFromHasFS = I.ioHasBlockIO -withIOHasBlockIO :: +-- | Perform an action using a 'HasBlockIO' instance that is only open for the +-- duration of the action. +-- +-- The 'HasBlockIO' is initialised using 'unsafeFromHasFS'. +-- +-- === Unsafe +-- +-- You will probably want to use 'ioHasBlockIO' or 'withIOHasBlockIO' instead. +-- +-- Only a 'HasFS' for the real file system, like 'ioHasFS', should be passed to +-- 'withUnsafeFromHasFS'. Technically, one could pass a 'HasFS' for a simulated +-- file system, but then the resulting 'HasBlockIO' would contain a mix of +-- simulated and real functions, which is probably not what you want. +withUnsafeFromHasFS :: HasFS IO HandleIO -> IOI.IOCtxParams -> (HasBlockIO IO HandleIO -> IO a) -> IO a -withIOHasBlockIO hfs params action = - bracket (ioHasBlockIO hfs params) (\HasBlockIO{close} -> close) action +withUnsafeFromHasFS hfs params = + bracket (unsafeFromHasFS hfs params) (\HasBlockIO{close} -> close) + +-- | An implementation of the 'HasBlockIO' interface using the real file system. +-- +-- Make sure to use 'close' the resulting 'HasBlockIO' when it is no longer +-- used. 'withIOHasBlockIO' does this automatically. +-- +-- The 'HasFS' interface is instantiated using 'ioHasFS'. +ioHasBlockIO :: + MountPoint + -> IOI.IOCtxParams + -> IO (HasFS IO HandleIO, HasBlockIO IO HandleIO) +ioHasBlockIO mount params = do + let hfs = ioHasFS mount + hbio <- unsafeFromHasFS hfs params + pure (hfs, hbio) + +-- | Perform an action using a 'HasFS' and a 'HasBlockIO' instance. The latter +-- is only open for the duration of the action. +-- +-- The 'HasFS' and 'HasBlockIO' interfaces are initialised using 'ioHasBlockIO'. +withIOHasBlockIO :: + MountPoint + -> IOI.IOCtxParams + -> (HasFS IO HandleIO -> HasBlockIO IO HandleIO -> IO a) + -> IO a +withIOHasBlockIO mount params action = + bracket (ioHasBlockIO mount params) (\(_, HasBlockIO{close}) -> close) (uncurry action) diff --git a/blockio/test/Main.hs b/blockio/test/Main.hs index 268426b0a..452b7748f 100644 --- a/blockio/test/Main.hs +++ b/blockio/test/Main.hs @@ -24,8 +24,6 @@ import System.FS.API.Strict (hPutAllStrict) import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API import qualified System.FS.BlockIO.IO as IO -import System.FS.BlockIO.IO -import qualified System.FS.IO as IO import System.FS.IO import System.IO.Temp import Test.QuickCheck @@ -68,15 +66,12 @@ toByteString n mba = do example_initClose :: Assertion example_initClose = withSystemTempDirectory "example_initClose" $ \dirPath -> do let mount = FS.MountPoint dirPath - hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams - close hbio + IO.withIOHasBlockIO mount IO.defaultIOCtxParams $ \_ _ -> pure () example_closeIsIdempotent :: Assertion example_closeIsIdempotent = withSystemTempDirectory "example_closeIsIdempotent" $ \dirPath -> do let mount = FS.MountPoint dirPath - hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams + hbio <- IO.withIOHasBlockIO mount IO.defaultIOCtxParams $ \_ hbio -> pure hbio close hbio eith <- try @SomeException (close hbio) case eith of @@ -88,61 +83,48 @@ example_closeIsIdempotent = withSystemTempDirectory "example_closeIsIdempotent" prop_readWrite :: ByteString -> Property prop_readWrite bs = ioProperty $ withSystemTempDirectory "prop_readWrite" $ \dirPath -> do let mount = FS.MountPoint dirPath - hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams - prop <- FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do - let n = BS.length bs - writeBuf <- fromByteStringPinned bs - [IOResult m] <- VU.toList <$> submitIO hbio (V.singleton (IOOpWrite h 0 writeBuf 0 (fromIntegral n))) - let writeTest = n === fromIntegral m - readBuf <- newPinnedByteArray n - [IOResult o] <- VU.toList <$> submitIO hbio (V.singleton (IOOpRead h 0 readBuf 0 (fromIntegral n))) - let readTest = o === m - bs' <- toByteString n readBuf - let cmpTest = bs === bs' - pure $ writeTest .&&. readTest .&&. cmpTest - close hbio - pure prop + IO.withIOHasBlockIO mount IO.defaultIOCtxParams $ \hfs hbio -> do + FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do + let n = BS.length bs + writeBuf <- fromByteStringPinned bs + [IOResult m] <- VU.toList <$> submitIO hbio (V.singleton (IOOpWrite h 0 writeBuf 0 (fromIntegral n))) + let writeTest = n === fromIntegral m + readBuf <- newPinnedByteArray n + [IOResult o] <- VU.toList <$> submitIO hbio (V.singleton (IOOpRead h 0 readBuf 0 (fromIntegral n))) + let readTest = o === m + bs' <- toByteString n readBuf + let cmpTest = bs === bs' + pure $ writeTest .&&. readTest .&&. cmpTest prop_submitToClosedCtx :: ByteString -> Property prop_submitToClosedCtx bs = ioProperty $ withSystemTempDirectory "prop_a" $ \dir -> do let mount = FS.MountPoint dir - hfs = IO.ioHasFS mount - hbio <- IO.ioHasBlockIO hfs IO.defaultIOCtxParams - - props <- FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do - void $ hPutAllStrict hfs h bs - syncVar <- newMVar False - forConcurrently [0 .. BS.length bs - 1] $ \i -> - if i == 0 then do - threadDelay 15 - modifyMVar_ syncVar $ \_ -> do - close hbio - pure True - pure Nothing - else do - readBuf <- newPinnedByteArray (BS.length bs) - withMVar syncVar $ \b -> do - eith <- try @SomeException $ submitIO hbio (V.singleton (IOOpRead h 0 readBuf (fromIntegral i) 1)) - pure $ case eith of - Left _ -> Just $ tabulate "submitIO successful" [show False] $ counterexample "expected failure, but got success" (b === True) - Right _ -> Just $ tabulate "submitIO successful" [show True] $ counterexample "expected success, but got failure" (b === False) - pure $ conjoin (catMaybes props) - + IO.withIOHasBlockIO mount IO.defaultIOCtxParams $ \hfs hbio -> do + FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do + void $ hPutAllStrict hfs h bs + syncVar <- newMVar False + fmap (conjoin . catMaybes) $ forConcurrently [0 .. BS.length bs - 1] $ \i -> + if i == 0 then do + threadDelay 15 + modifyMVar_ syncVar $ \_ -> do + close hbio + pure True + pure Nothing + else do + readBuf <- newPinnedByteArray (BS.length bs) + withMVar syncVar $ \b -> do + eith <- try @SomeException $ submitIO hbio (V.singleton (IOOpRead h 0 readBuf (fromIntegral i) 1)) + pure $ case eith of + Left _ -> Just $ tabulate "submitIO successful" [show False] $ counterexample "expected failure, but got success" (b === True) + Right _ -> Just $ tabulate "submitIO successful" [show True] $ counterexample "expected success, but got failure" (b === False) {------------------------------------------------------------------------------- File locks -------------------------------------------------------------------------------} -withTempIOHasFS :: FilePath -> (HasFS IO HandleIO -> IO a) -> IO a -withTempIOHasFS path action = withSystemTempDirectory path $ \dir -> do - let hfs = ioHasFS (MountPoint dir) - action hfs - withTempIOHasBlockIO :: FilePath -> (HasFS IO HandleIO -> HasBlockIO IO HandleIO -> IO a) -> IO a -withTempIOHasBlockIO path action = - withTempIOHasFS path $ \hfs -> do - withIOHasBlockIO hfs defaultIOCtxParams (action hfs) +withTempIOHasBlockIO path action = withSystemTempDirectory path $ \dir -> do + IO.withIOHasBlockIO (MountPoint dir) IO.defaultIOCtxParams action showLeft :: Show a => String -> Either a b -> String showLeft x = \case diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 56817ecfe..8eedd9c3b 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -276,7 +276,7 @@ import Prelude hiding (lookup, take, takeWhile) import System.FS.API (FsPath, HasFS (..), MountPoint (..), mkFsPath) import System.FS.BlockIO.API (HasBlockIO (..)) import System.FS.BlockIO.IO (defaultIOCtxParams, withIOHasBlockIO) -import System.FS.IO (HandleIO, ioHasFS) +import System.FS.IO (HandleIO) import System.Random (randomIO) -------------------------------------------------------------------------------- @@ -471,9 +471,8 @@ withOpenSessionIO :: withOpenSessionIO tracer sessionDir action = do let mountPoint = MountPoint sessionDir let sessionDirFsPath = mkFsPath [] - let hasFS = ioHasFS mountPoint sessionSalt <- randomIO - withIOHasBlockIO hasFS defaultIOCtxParams $ \hasBlockIO -> + withIOHasBlockIO mountPoint defaultIOCtxParams $ \hasFS hasBlockIO -> withOpenSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath action {- | diff --git a/src/Database/LSMTree/Simple.hs b/src/Database/LSMTree/Simple.hs index 11ed3c321..287454afb 100644 --- a/src/Database/LSMTree/Simple.hs +++ b/src/Database/LSMTree/Simple.hs @@ -190,7 +190,6 @@ import Prelude hiding (lookup, take, takeWhile) import System.FS.API (MountPoint (..), mkFsPath) import System.FS.BlockIO.API (HasBlockIO (..)) import System.FS.BlockIO.IO (defaultIOCtxParams, ioHasBlockIO) -import System.FS.IO (ioHasFS) import System.Random (randomIO) -------------------------------------------------------------------------------- @@ -458,11 +457,10 @@ openSession dir = do _convertSessionDirErrors dir $ do let mountPoint = MountPoint dir let sessionDirFsPath = mkFsPath [] - let hasFS = ioHasFS mountPoint sessionSalt <- randomIO - let acquireHasBlockIO = ioHasBlockIO hasFS defaultIOCtxParams - let releaseHasBlockIO HasBlockIO{close} = close - bracketOnError acquireHasBlockIO releaseHasBlockIO $ \hasBlockIO -> + let acquireHasBlockIO = ioHasBlockIO mountPoint defaultIOCtxParams + let releaseHasBlockIO (_, HasBlockIO{close}) = close + bracketOnError acquireHasBlockIO releaseHasBlockIO $ \(hasFS, hasBlockIO) -> Session <$> LSMT.openSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath {- | diff --git a/test/Test/Database/LSMTree/Internal/Run.hs b/test/Test/Database/LSMTree/Internal/Run.hs index b703b037f..3056df92a 100644 --- a/test/Test/Database/LSMTree/Internal/Run.hs +++ b/test/Test/Database/LSMTree/Internal/Run.hs @@ -39,7 +39,6 @@ import qualified System.FS.API as FS import qualified System.FS.API.Lazy as FSL import qualified System.FS.BlockIO.API as FS import qualified System.FS.BlockIO.IO as FS -import qualified System.FS.IO as FsIO import qualified System.FS.Sim.MockFS as MockFS import qualified System.IO.Temp as Temp import Test.Database.LSMTree.Internal.RunReader (readKOps) @@ -105,8 +104,7 @@ testSalt = 4 -- | Runs in IO, with a real file system. testSingleInsert :: FilePath -> SerialisedKey -> SerialisedValue -> Maybe SerialisedBlob -> IO () testSingleInsert sessionRoot key val mblob = - let fs = FsIO.ioHasFS (FS.MountPoint sessionRoot) in - FS.withIOHasBlockIO fs FS.defaultIOCtxParams $ \hbio -> do + FS.withIOHasBlockIO (FS.MountPoint sessionRoot) FS.defaultIOCtxParams $ \fs hbio -> do -- flush write buffer let e = case mblob of Nothing -> Insert val; Just blob -> InsertWithBlob val blob wb = Map.singleton key e diff --git a/test/Test/Database/LSMTree/StateMachine.hs b/test/Test/Database/LSMTree/StateMachine.hs index 0e4e3b3e3..944cc472c 100644 --- a/test/Test/Database/LSMTree/StateMachine.hs +++ b/test/Test/Database/LSMTree/StateMachine.hs @@ -107,7 +107,7 @@ import System.Directory (removeDirectoryRecursive) import System.FS.API (FsError (..), HasFS, MountPoint (..), mkFsPath) import System.FS.BlockIO.API (HasBlockIO, close) import System.FS.BlockIO.IO (defaultIOCtxParams, ioHasBlockIO) -import System.FS.IO (HandleIO, ioHasFS) +import System.FS.IO (HandleIO) import qualified System.FS.Sim.Error as FSSim import System.FS.Sim.Error (Errors) import qualified System.FS.Sim.MockFS as MockFS @@ -469,8 +469,7 @@ createSystemTempDirectory :: [Char] -> IO (FilePath, HasFS IO HandleIO, HasBloc createSystemTempDirectory prefix = do systemTempDir <- getCanonicalTemporaryDirectory tempDir <- createTempDirectory systemTempDir prefix - let hasFS = ioHasFS (MountPoint tempDir) - hasBlockIO <- ioHasBlockIO hasFS defaultIOCtxParams + (hasFS, hasBlockIO) <- ioHasBlockIO (MountPoint tempDir) defaultIOCtxParams pure (tempDir, hasFS, hasBlockIO) {------------------------------------------------------------------------------- diff --git a/test/Test/Util/FS.hs b/test/Test/Util/FS.hs index 384fd9b00..a6e51a264 100644 --- a/test/Test/Util/FS.hs +++ b/test/Test/Util/FS.hs @@ -74,7 +74,7 @@ import GHC.Stack import System.FS.API as FS import qualified System.FS.API.Lazy as FSL import System.FS.BlockIO.API -import System.FS.BlockIO.IO +import System.FS.BlockIO.IO hiding (unsafeFromHasFS) import System.FS.BlockIO.Sim (fromHasFS) import System.FS.IO import System.FS.Sim.Error @@ -99,9 +99,8 @@ withTempIOHasFS path action = withSystemTempDirectory path $ \dir -> do action hfs withTempIOHasBlockIO :: FilePath -> (HasFS IO HandleIO -> HasBlockIO IO HandleIO -> IO a) -> IO a -withTempIOHasBlockIO path action = - withTempIOHasFS path $ \hfs -> do - withIOHasBlockIO hfs defaultIOCtxParams (action hfs) +withTempIOHasBlockIO path action = withSystemTempDirectory path $ \dir -> do + withIOHasBlockIO (MountPoint dir) defaultIOCtxParams action {------------------------------------------------------------------------------- Simulated file system From 2554fe97b193a3461b095aa69332e25976116dc0 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:19:23 +0200 Subject: [PATCH 6/8] `blockio`: refactor and document the `Sim` module --- blockio/src-sim/System/FS/BlockIO/Sim.hs | 158 +++++++++++++++++- test/Test/Database/LSMTree/Internal/Merge.hs | 5 +- .../Test/Database/LSMTree/Internal/Readers.hs | 4 +- test/Test/Util/FS.hs | 6 +- test/Test/Util/FS/Error.hs | 2 +- 5 files changed, 155 insertions(+), 20 deletions(-) diff --git a/blockio/src-sim/System/FS/BlockIO/Sim.hs b/blockio/src-sim/System/FS/BlockIO/Sim.hs index 2a00bb489..ee34a7b42 100644 --- a/blockio/src-sim/System/FS/BlockIO/Sim.hs +++ b/blockio/src-sim/System/FS/BlockIO/Sim.hs @@ -1,10 +1,18 @@ +-- | Simulated instances of 'HasBlockIO' and 'HasFS'. module System.FS.BlockIO.Sim ( - fromHasFS - -- * Initialisation helpers + -- * Implementation details #impl# + -- $impl + + -- * Runners + runSimHasBlockIO + , runSimErrorHasBlockIO + -- * Initialisation , simHasBlockIO , simHasBlockIO' , simErrorHasBlockIO , simErrorHasBlockIO' + -- ** Unsafe + , unsafeFromHasFS ) where import Control.Concurrent.Class.MonadMVar @@ -24,11 +32,55 @@ import System.FS.Sim.Error import System.FS.Sim.MockFS hiding (hClose, hOpen) import System.FS.Sim.STM -fromHasFS :: +{- $impl + + We include below some documentation about the effects of calling the interface + functions on the simulated instance of the 'HasBlockIO' interface. + + [IO context]: For uniform behaviour across implementations, the simulation + creates and stores a mocked IO context that has the open/closed behaviour + that is specified by the interface. + + ['close']: Close the mocked context + + ['submitIO']: Submit a batch of I\/O operations using serial I\/O using a + 'HasFS' + + ['hSetNoCache']: No-op + + ['hAdvise']: No-op + + ['hAllocate']: No-op + + ['tryLockFile']: Simulate a lock by putting the lock state into the file + contents + + ['hSynchronise']: No-op + + ['synchroniseDirectory']: No-op + + ['createHardLink']: Copy all file contents from the source path to the target + path. Therefore, this is currently only correctly simulating hard links + for /immutable/ files. +-} + +-- | Simulate a 'HasBlockIO' using the given 'HasFS'. +-- +-- === Unsafe +-- +-- You will probably want to use one of the safe functions like +-- 'runSimHasBlockIO' or 'simErrorHasBlockIO' instead. +-- +-- Only a simulated 'HasFS', like the 'simHasFS' and 'simErrorHasFS' +-- simulations, should be passed to 'unsafeFromHasFS'. Technically, one could +-- pass a 'HasFS' for the /real/ file system, but then the resulting +-- 'HasBlockIO' would contain a mix of simulated functions and real functions, +-- which is probably not what you want. +unsafeFromHasFS :: forall m. (MonadCatch m, MonadMVar m, PrimMonad m) => HasFS m HandleMock -> m (HasBlockIO m HandleMock) -fromHasFS hfs = +unsafeFromHasFS hfs = serialHasBlockIO hSetNoCache hAdvise @@ -142,27 +194,104 @@ simCreateHardLink hfs sourcePath targetPath = void $ API.hPutAll hfs targetHandle bs {------------------------------------------------------------------------------- - Initialisation helpers + Runners -------------------------------------------------------------------------------} +-- | @'runSimHasBlockIO' mockFS action@ runs an @action@ using a pair of +-- simulated 'HasFS' and 'HasBlockIO'. +-- +-- The pair of interfaces share the same mocked file system. The initial state +-- of the mocked file system is set to @mockFs@. The final state of the mocked +-- file system is returned with the result of @action@. +-- +-- If you want to have access to the current state of the mocked file system, +-- use 'simHasBlockIO' instead. +runSimHasBlockIO :: + (MonadSTM m, PrimMonad m, MonadCatch m, MonadMVar m) + => MockFS + -> (HasFS m HandleMock -> HasBlockIO m HandleMock -> m a) + -> m (a, MockFS) +runSimHasBlockIO mockFS k = do + runSimFS mockFS $ \hfs -> do + hbio <- unsafeFromHasFS hfs + k hfs hbio + +-- | @'runSimErrorHasBlockIO' mockFS errors action@ runs an @action@ using a +-- pair of simulated 'HasFS' and 'HasBlockIO' that allow fault injection. +-- +-- The pair of interfaces share the same mocked file system. The initial state +-- of the mocked file system is set to @mockFs@. The final state of the mocked +-- file system is returned with the result of @action@. +-- +-- The pair of interfaces share the same stream of errors. The initial state of +-- the stream of errors is set to @errors@. The final state of the stream of +-- errors is returned with the result of @action@. +-- +-- If you want to have access to the current state of the mocked file system +-- or stream of errors, use 'simErrorHasBlockIO' instead. +runSimErrorHasBlockIO :: + (MonadSTM m, PrimMonad m, MonadCatch m, MonadMVar m) + => MockFS + -> Errors + -> (HasFS m HandleMock -> HasBlockIO m HandleMock -> m a) + -> m (a, MockFS, Errors) +runSimErrorHasBlockIO mockFS errs k = do + fsVar <- newTMVarIO mockFS + errorsVar <- newTVarIO errs + (hfs, hbio) <- simErrorHasBlockIO fsVar errorsVar + a <- k hfs hbio + fs' <- atomically $ takeTMVar fsVar + errs' <- readTVarIO errorsVar + pure (a, fs', errs') + +{------------------------------------------------------------------------------- + Initialisation +-------------------------------------------------------------------------------} + +-- | @'simHasBlockIO' mockFsVar@ creates a pair of simulated 'HasFS' and +-- 'HasBlockIO'. +-- +-- The pair of interfaces share the same mocked file system, which is stored in +-- @mockFsVar@. The current state of the mocked file system can be accessed by +-- the user by reading @mockFsVar@, but note that the user should not leave +-- @mockFsVar@ empty. simHasBlockIO :: (MonadCatch m, MonadMVar m, PrimMonad m, MonadSTM m) => StrictTMVar m MockFS -> m (HasFS m HandleMock, HasBlockIO m HandleMock) simHasBlockIO var = do let hfs = simHasFS var - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs pure (hfs, hbio) +-- | @'simHasBlockIO' mockFs@ creates a pair of simulated 'HasFS' and +-- 'HasBlockIO' that allow fault injection. +-- +-- The pair of interfaces share the same mocked file system. The initial state +-- of the mocked file system is set to @mockFs@. +-- +-- If you want to have access to the current state of the mocked file system, +-- use 'simHasBlockIO' instead. simHasBlockIO' :: (MonadCatch m, MonadMVar m, PrimMonad m, MonadSTM m) => MockFS -> m (HasFS m HandleMock, HasBlockIO m HandleMock) simHasBlockIO' mockFS = do hfs <- simHasFS' mockFS - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs pure (hfs, hbio) +-- | @'simErrorHasBlockIO' mockFsVar errorsVar@ creates a pair of simulated +-- 'HasFS' and 'HasBlockIO' that allow fault injection. +-- +-- The pair of interfaces share the same mocked file system, which is stored in +-- @mockFsVar@. The current state of the mocked file system can be accessed by +-- the user by reading @mockFsVar@, but note that the user should not leave +-- @mockFsVar@ empty. +-- +-- The pair of interfaces share the same stream of errors, which is stored in +-- @errorsVar@. The current state of the stream of errors can be accessed by the +-- user by reading @errorsVar@. simErrorHasBlockIO :: forall m. (MonadCatch m, MonadMVar m, PrimMonad m, MonadSTM m) => StrictTMVar m MockFS @@ -170,9 +299,20 @@ simErrorHasBlockIO :: -> m (HasFS m HandleMock, HasBlockIO m HandleMock) simErrorHasBlockIO fsVar errorsVar = do let hfs = simErrorHasFS fsVar errorsVar - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs pure (hfs, hbio) +-- | @'simErrorHasBlockIO' mockFs errors@ creates a pair of simulated 'HasFS' +-- and 'HasBlockIO' that allow fault injection. +-- +-- The pair of interfaces share the same mocked file system. The initial state +-- of the mocked file system is set to @mockFs@. +-- +-- The pair of interfaces share the same stream of errors. The initial state of +-- the stream of errors is set to @errors@. +-- +-- If you want to have access to the current state of the mocked file system +-- or stream of errors, use 'simErrorHasBlockIO' instead. simErrorHasBlockIO' :: (MonadCatch m, MonadMVar m, PrimMonad m, MonadSTM m) => MockFS @@ -180,5 +320,5 @@ simErrorHasBlockIO' :: -> m (HasFS m HandleMock, HasBlockIO m HandleMock) simErrorHasBlockIO' mockFS errs = do hfs <- simErrorHasFS' mockFS errs - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs pure (hfs, hbio) diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index 7dbca8c1d..da72e6c18 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -51,10 +51,7 @@ tests = testGroup "Test.Database.LSMTree.Internal.Merge" => (FS.HasFS IO FsSim.HandleMock -> FS.HasBlockIO IO FsSim.HandleMock -> IO p) -> Property ioPropertyWithMockFS prop = ioProperty $ do - (res, mockFS) <- - FsSim.runSimErrorFS FsSim.empty FsSim.emptyErrors $ \_ fs -> do - hbio <- FsSim.fromHasFS fs - prop fs hbio + (res, mockFS, _) <- FsSim.runSimErrorHasBlockIO FsSim.empty FsSim.emptyErrors prop pure $ res .&&. counterexample "open handles" (FsSim.numOpenHandles mockFS === 0) diff --git a/test/Test/Database/LSMTree/Internal/Readers.hs b/test/Test/Database/LSMTree/Internal/Readers.hs index cc8b8b247..22750eba1 100644 --- a/test/Test/Database/LSMTree/Internal/Readers.hs +++ b/test/Test/Database/LSMTree/Internal/Readers.hs @@ -39,7 +39,6 @@ import qualified System.FS.API as FS import qualified System.FS.BlockIO.API as FS import qualified System.FS.BlockIO.Sim as FsSim import qualified System.FS.Sim.MockFS as MockFS -import qualified System.FS.Sim.STM as FsSim import qualified Test.QuickCheck as QC import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck @@ -55,8 +54,7 @@ tests = testGroup "Database.LSMTree.Internal.Readers" [ testProperty "prop_lockstep" $ Lockstep.runActionsBracket (Proxy @ReadersState) mempty mempty $ \act () -> do - (prop, mockFS) <- FsSim.runSimFS MockFS.empty $ \hfs -> do - hbio <- FsSim.fromHasFS hfs + (prop, mockFS) <- FsSim.runSimHasBlockIO MockFS.empty $ \hfs hbio -> do (prop, RealState _ mCtx) <- runRealMonad hfs hbio (RealState 0 Nothing) act traverse_ closeReadersCtx mCtx -- close current readers diff --git a/test/Test/Util/FS.hs b/test/Test/Util/FS.hs index a6e51a264..3a89d9855 100644 --- a/test/Test/Util/FS.hs +++ b/test/Test/Util/FS.hs @@ -75,7 +75,7 @@ import System.FS.API as FS import qualified System.FS.API.Lazy as FSL import System.FS.BlockIO.API import System.FS.BlockIO.IO hiding (unsafeFromHasFS) -import System.FS.BlockIO.Sim (fromHasFS) +import System.FS.BlockIO.Sim (unsafeFromHasFS) import System.FS.IO import System.FS.Sim.Error import System.FS.Sim.MockFS (HandleMock, MockFS, numOpenHandles, @@ -136,7 +136,7 @@ withSimHasBlockIO :: -> m Property withSimHasBlockIO post fs k = do withSimHasFS post fs $ \hfs fsVar -> do - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs k hfs hbio fsVar {------------------------------------------------------------------------------- @@ -180,7 +180,7 @@ withSimErrorHasBlockIO :: -> m Property withSimErrorHasBlockIO post fs errs k = withSimErrorHasFS post fs errs $ \hfs fsVar errsVar -> do - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs k hfs hbio fsVar errsVar {------------------------------------------------------------------------------- diff --git a/test/Test/Util/FS/Error.hs b/test/Test/Util/FS/Error.hs index 22b798237..4f9632dd5 100644 --- a/test/Test/Util/FS/Error.hs +++ b/test/Test/Util/FS/Error.hs @@ -114,7 +114,7 @@ simErrorHasBlockIOLogged :: -> m (HasFS m HandleMock, HasBlockIO m HandleMock) simErrorHasBlockIOLogged fsVar errorsVar logVar = do let hfs = simErrorHasFSLogged fsVar errorsVar logVar - hbio <- fromHasFS hfs + hbio <- unsafeFromHasFS hfs pure (hfs, hbio) -- | Produce a simulated file system with injected errors and a logger for those From 254b95a0a8f4cbf25e2547e7183eb2a182a9f433 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 23 Jun 2025 12:19:37 +0200 Subject: [PATCH 7/8] `blockio`: update cabal file for the upcoming release of `blockio-0.1.0.0` --- blockio/blockio.cabal | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/blockio/blockio.cabal b/blockio/blockio.cabal index ebeacbd37..70cdccc71 100644 --- a/blockio/blockio.cabal +++ b/blockio/blockio.cabal @@ -18,7 +18,7 @@ license-files: author: Duncan Coutts, Joris Dral, Matthias Heinzel, Wolfgang Jeltsch, Wen Kokke, and Alex Washburn -maintainer: TODO: MAINTAINER EMAIL +maintainer: joris@well-typed.com copyright: (c) 2023 Input Output Global, Inc. (IOG) (c) 2023-2025 INTERSECT @@ -33,13 +33,11 @@ source-repository head location: https://github.com/IntersectMBO/lsm-tree subdir: blockio --- TODO: this tag obviously does not exist yet because the package has not --- been published source-repository this type: git location: https://github.com/IntersectMBO/lsm-tree - tag: blockio-0.1.0.0 subdir: blockio + tag: blockio-0.1.0.0 common warnings ghc-options: @@ -117,7 +115,7 @@ test-suite test , bytestring , fs-api , primitive - , QuickCheck ^>=2.15.0.1 + , QuickCheck >=2.15.0.1 , tasty , tasty-hunit , tasty-quickcheck @@ -132,12 +130,12 @@ library sim hs-source-dirs: src-sim exposed-modules: System.FS.BlockIO.Sim build-depends: - , base >=4.16 && <4.22 + , base >=4.16 && <4.22 , blockio - , bytestring ^>=0.11.4.0 || ^>=0.12.1.0 + , bytestring ^>=0.11 || ^>=0.12 , fs-api ^>=0.4 , fs-sim ^>=0.4 - , io-classes ^>=1.6 || ^>=1.7 || ^>=1.8.0.1 + , io-classes ^>=1.6 || ^>=1.7 || ^>=1.8.0.1 , io-classes:strict-stm , primitive ^>=0.9 From 359f03c0f5a662ffaa18ecfdb827b530d4bf4a4a Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 7 Jul 2025 12:56:09 +0200 Subject: [PATCH 8/8] `blockio`: update package description, update readme, add example --- blockio/README.md | 51 ++++- blockio/blockio.cabal | 24 ++- .../src-macos/System/FS/BlockIO/Internal.hs | 2 +- blockio/src-sim/System/FS/BlockIO/Sim.hs | 2 +- .../src-windows/System/FS/BlockIO/Internal.hs | 2 +- blockio/src/System/FS/BlockIO.hs | 195 ++++++++++++++++++ blockio/src/System/FS/BlockIO/API.hs | 14 +- blockio/src/System/FS/BlockIO/IO.hs | 17 +- blockio/src/System/FS/BlockIO/Serial.hs | 2 +- 9 files changed, 273 insertions(+), 36 deletions(-) create mode 100644 blockio/src/System/FS/BlockIO.hs diff --git a/blockio/README.md b/blockio/README.md index e4ca4be2b..9a7081773 100644 --- a/blockio/README.md +++ b/blockio/README.md @@ -1,8 +1,49 @@ # blockio -This packages defines an abstract interface for batched, asynchronous I\/O, -for use with the abstract interface for file system I\/O defined by the -[fs-api](https://hackage.haskell.org/package/fs-api) package. +Perform batches of disk I/O operations. Performing batches of disk I/O can lead +to performance improvements over performing each disk I/O operation +individually. Performing batches of disk I/O *concurrently* can lead to an even +bigger performance improvement depending on the implementation of batched I/O. -The /sim/ sub-library of this package defines /simulated/ batched, asynchronous I\/O -for use with the [fs-sim](https://hackage.haskell.org/package/fs-sim) package. +The batched I/O functionality in the library is separated into an *abstract +interface* and *implementations* of that abstract interface. The advantage of +programming against an abstract interface is that code can be agnostic to the +implementation of the interface, allowing implementations to be freely swapped +out. The library provides multiple implementations of batched I/O: +platform-dependent implementations using the *real* file system (using +asynchronous I/O), and a simulated implementation for testing purposes. + +See the `System.FS.BlockIO` module for an example of how to use the library. + +On Linux systems the *real* implementation is backed by +[blockio-uring](https://hackage.haskell.org/package/blockio-uring), a library +for asynchronous I/O that achieves good performance when performing batches +concurrently. On Windows and MacOS systems the *real* implementation currently +simply performs each I/O operation sequentially, which should achieve about the +same performance as using non-batched I/O, but the library could be extended +with asynchronous I/O implementations for Windows and MacOS as well. The +simulated implementation also performs each I/O operation sequentially. + +As mentioned before, the batched I/O functionality is separated into an +*abstract interface* and *implementations* of that abstract interface. The +advantage of programming against an abstract interface is that code can be +agnostic to the implementation of the interface. For example, we could run code +in production using the real file system, but we could also run the same code in +a testing environment using a simulated file system. We could even switch from a +default implementation to a more performant implementation in production if the +performant implementation is available. Lastly, the abstract interface allows us +to program against the file system in a uniform manner across different +platforms, i.e., operating systems. + +The `blockio` library defines the abstract interface for batched I/O. The +library is an extension of the +[fs-api](https://hackage.haskell.org/package/fs-api) library, which defines an +abstract interface for (basic) file system I/O. Both `blockio` and `fs-api` +provide an implementation of their interfaces using the real file system in +`IO`. + +The `blockio:sim` sub-library defines an implementation of the abstract +interface from `blockio` that *simulates* batched I/O. This sub-library is an +extension of the [fs-sim](https://hackage.haskell.org/package/fs-sim) library, +which defines an implementation of the abstract interface from `fs-api` that +simulates (basic) file system I/O. \ No newline at end of file diff --git a/blockio/blockio.cabal b/blockio/blockio.cabal index 70cdccc71..a08cdce2c 100644 --- a/blockio/blockio.cabal +++ b/blockio/blockio.cabal @@ -1,14 +1,23 @@ cabal-version: 3.4 name: blockio version: 0.1.0.0 -synopsis: Abstract interface for batched, asynchronous I/O +synopsis: Perform batches of disk I/O operations. description: - This packages defines an abstract interface for batched, asynchronous I\/O, - for use with the abstract interface for file system I\/O defined by the - [fs-api](https://hackage.haskell.org/package/fs-api) package. - - The /sim/ sub-library of this package defines /simulated/ batched, asynchronous I\/O - for use with the [fs-sim](https://hackage.haskell.org/package/fs-sim) package. + Perform batches of disk I\/O operations. Performing batches of disk I\/O can + lead to performance improvements over performing each disk I\/O operation + individually. Performing batches of disk I\/O /concurrently/ can lead to an + even bigger performance improvement depending on the implementation of batched + I\/O. + + The batched I\/O functionality in the library is separated into an /abstract/ + /interface/ and /implementations/ of that abstract interface. The advantage of + programming against an abstract interface is that code can be agnostic to the + implementation of the interface, allowing implementations to be freely swapped + out. The library provides multiple implementations of batched I\/O: + platform-dependent implementations using the /real/ file system (with + asynchronous I\/O), and a simulated implementation for testing purposes. + + See the "System.FS.BlockIO" module for an example of how to use the library. license: Apache-2.0 license-files: @@ -65,6 +74,7 @@ library import: language, warnings hs-source-dirs: src exposed-modules: + System.FS.BlockIO System.FS.BlockIO.API System.FS.BlockIO.IO System.FS.BlockIO.Serial.Internal diff --git a/blockio/src-macos/System/FS/BlockIO/Internal.hs b/blockio/src-macos/System/FS/BlockIO/Internal.hs index 0a24a0701..7da24ebc7 100644 --- a/blockio/src-macos/System/FS/BlockIO/Internal.hs +++ b/blockio/src-macos/System/FS/BlockIO/Internal.hs @@ -14,7 +14,7 @@ import qualified System.Posix.Files as Unix import qualified System.Posix.Unistd as Unix -- | For now we use the portable serial implementation of HasBlockIO. If you --- want to provide a proper async I/O implementation for OSX, then this is where +-- want to provide a proper async I\/O implementation for OSX, then this is where -- you should put it. -- -- The recommended choice would be to use the POSIX AIO API. diff --git a/blockio/src-sim/System/FS/BlockIO/Sim.hs b/blockio/src-sim/System/FS/BlockIO/Sim.hs index ee34a7b42..e04f04986 100644 --- a/blockio/src-sim/System/FS/BlockIO/Sim.hs +++ b/blockio/src-sim/System/FS/BlockIO/Sim.hs @@ -92,7 +92,7 @@ unsafeFromHasFS hfs = hfs where -- TODO: It should be possible for the implementations and simulation to - -- throw an FsError when doing file I/O with misaligned byte arrays after + -- throw an FsError when doing file I\/O with misaligned byte arrays after -- hSetNoCache. Maybe they should? It might be nicest to move hSetNoCache -- into fs-api and fs-sim because we'd need access to the internals. hSetNoCache _h _b = pure () diff --git a/blockio/src-windows/System/FS/BlockIO/Internal.hs b/blockio/src-windows/System/FS/BlockIO/Internal.hs index 09290f107..185a2b227 100644 --- a/blockio/src-windows/System/FS/BlockIO/Internal.hs +++ b/blockio/src-windows/System/FS/BlockIO/Internal.hs @@ -17,7 +17,7 @@ import qualified System.Win32.File as Windows import qualified System.Win32.HardLink as Windows -- | For now we use the portable serial implementation of HasBlockIO. If you --- want to provide a proper async I/O implementation for Windows, then this is +-- want to provide a proper async I\/O implementation for Windows, then this is -- where you should put it. -- -- The recommended choice would be to use the Win32 IOCP API. diff --git a/blockio/src/System/FS/BlockIO.hs b/blockio/src/System/FS/BlockIO.hs new file mode 100644 index 000000000..9694a995c --- /dev/null +++ b/blockio/src/System/FS/BlockIO.hs @@ -0,0 +1,195 @@ +module System.FS.BlockIO ( + -- * Description + -- $description + + -- * Re-exports + module System.FS.BlockIO.API + , module System.FS.BlockIO.IO + + -- * Example + -- $example +) where + +import System.FS.BlockIO.API +import System.FS.BlockIO.IO + +{------------------------------------------------------------------------------- + Examples +-------------------------------------------------------------------------------} + +{- $description + + The 'HasBlockIO' record type defines an /abstract interface/. A value of a + 'HasBlockIO' type is what we call an /instance/ of the abstract interface, and + an instance is produced by a function that we call an /implementation/. In + principle, we can have multiple instances of the same implementation. + + There are currently two known implementations of the interface: + + * An implementation using the real file system, which can be found in the + "System.FS.BlockIO.IO" module. This implementation is platform-dependent, + but has largely similar observable behaviour. + + * An implementation using a simulated file system, which can be found in the + @System.FS.BlockIO.Sim@ module of the @blockio:sim@ sublibrary. This + implementation is uniform across platforms. + + The 'HasBlockIO' abstract interface is an extension of the 'HasFS' abstract + interface that is provided by the + [@fs-api@](https://hackage.haskell.org/package/fs-api) package. Whereas + 'HasFS' defines many primitive functions, for example for opening a file, the + main feature of 'HasBlockIO' is to define a function for performing batched + I\/O. As such, users of @blockio@ will more often than not need to pass both a + 'HasFS' and a 'HasBlockIO' instance to their functions. +-} + +{- $example + + >>> import Control.Monad + >>> import Control.Monad.Primitive + >>> import Data.Primitive.ByteArray + >>> import Data.Vector qualified as V + >>> import Data.Word + >>> import Debug.Trace + >>> import System.FS.API as FS + >>> import System.FS.BlockIO.IO + >>> import System.FS.BlockIO.API + >>> import System.FS.IO + + The main feature of the 'HasBlockIO' abstract interface is that it provides a + function for performing batched I\/O using 'submitIO'. Depending on the + implementation of the interface, performing I\/O in batches concurrently using + 'submitIO' can be much faster than performing each I\/O operation in a + sequential order. We will not go into detail about this performance + consideration here, but more information can be found in the + "System.FS.BlockIO.IO" module. Instead, we will show an example of how + 'submitIO' can be used in your own projects. + + We aim to build an example that writes some contents to a file using + 'submitIO', and then reads the contents out again using 'submitIO'. The file + contents will simply be bytes. + + >>> type Byte = Word8 + + The first part of the example is to write out bytes to a given file path using + 'submitIO'. We define a @writeFile@ function that does just that. The file is + assumed to not exist already. + + The bytes, which are provided as separate bytes, are written into a buffer (a + mutable byte array). Note that the buffer should be /pinned/ memory to prevent + pointer aliasing. In the case of write operations, this buffer is used to + communicate to the backend what the bytes are that should be written to disk. + For simplicity, we create a separate 'IOOpWrite' instruction for each byte. + This instruction requires information about the write operation. In order of + appearence these are: the file handle to write bytes to, the offset into that + file, the buffer, the offset into that buffer, and the number of bytes to + write. Finally, all instructions are batched together and submitted in one go + using 'submitIO'. For each instruction, an 'IOResult' is returned, which + describes in this case the number of written bytes. If any of the instructions + failed to be performed, an error is thrown. We print the 'IOResult's to + standard output. + + Note that in real scenarios it would be much more performant to aggregate the + bytes into larger chunks, and to create an instruction for each of those + chunks. A sensible size for those chunks would be the disk page size (4Kb for + example), or a multiple of that disk page size. The disk page size is + typically the smallest chunk of memory that can be written to or read from the + disk. In some cases it is also desirable or even required that the buffers are + aligned to the disk page size. For example, alignment is required when using + direct I\/O. + + >>> :{ + writeFile :: + HasFS IO HandleIO + -> HasBlockIO IO HandleIO + -> FsPath + -> [Byte] + -> IO () + writeFile hasFS hasBlockIO file bytes = do + let numBytes = length bytes + FS.withFile hasFS file (FS.WriteMode FS.MustBeNew) $ \h -> do + buffer <- newPinnedByteArray numBytes + forM_ (zip [0..] bytes) $ \(i, byte) -> + let bufferOffset = fromIntegral i + in writeByteArray @Byte buffer bufferOffset byte + results <- submitIO hasBlockIO $ V.fromList [ + IOOpWrite h fileOffset buffer bufferOffset 1 + | i <- take numBytes [0..] + , let fileOffset = fromIntegral i + bufferOffset = FS.BufferOffset i + ] + print results + :} + + The second part of the example is to read a given number of bytes from a given + file path using 'submitIO'. We define a @readFile@ function that follows the + same general structure and behaviour as @writeFile@, but @readFile@ is of + course reading bytes instead of writing bytes. + + >>> :{ + readFile :: + HasFS IO HandleIO + -> HasBlockIO IO HandleIO + -> FsPath + -> Int + -> IO [Byte] + readFile hasFS hasBlockIO file numBytes = do + FS.withFile hasFS file FS.ReadMode $ \h -> do + buffer <- newPinnedByteArray numBytes + results <- submitIO hasBlockIO $ V.fromList [ + IOOpRead h fileOffset buffer bufferOffset numBytes + | i <- [0..3] + , let fileOffset = fromIntegral i + bufferOffset = FS.BufferOffset i + numBytes = 1 + ] + print results + forM (take numBytes [0..]) $ \i -> + let bufferOffset = i + in readByteArray @Byte buffer i + :} + + Now we can combine @writeFile@ and @readFile@ into a very small example called + @writeReadFile@, which does what we set out to do: write a few bytes to a + (temporary) file and read them out again using 'submitIO'. We also print the + bytes that were written and the bytes that were read, so that the user can + check by hand whether the bytes match. + + >>> :{ + writeReadFile :: HasFS IO HandleIO -> HasBlockIO IO HandleIO -> IO () + writeReadFile hasFS hasBlockIO = do + let file = FS.mkFsPath ["simple_example.txt"] + let bytesIn = [1,2,3,4] + print bytesIn + writeFile hasFS hasBlockIO file bytesIn + bytesOut <- readFile hasFS hasBlockIO file 4 + print bytesOut + FS.removeFile hasFS file + :} + + In order to run @writeReadFile@, we will need 'HasFS' and 'HasBlockIO' + instances. This is where the separation between interface and implementation + shines: @writeReadFile@ is agnostic to the implementations of the the abstract + interfaces, so we could pick any implementations and slot them in. For this + example we will use the /real/ implementation from "System.FS.BlockIO.IO", but + we could have used the /simulated/ implementation from the @blockio:sim@ + sub-library just as well. We define the @example@ function, which uses + 'withIOHasBlockIO' to instantiate both a 'HasFS' and 'HasBlockIO' instance, + which we pass to 'writeReadFile'. + + >>> :{ + example :: IO () + example = + withIOHasBlockIO (MountPoint "") defaultIOCtxParams $ \hasFS hasBlockIO -> + writeReadFile hasFS hasBlockIO + :} + + Finally, we can run the example to produce some output. As we can see, the + input bytes match the output bytes. + + >>> example + [1,2,3,4] + [IOResult 1,IOResult 1,IOResult 1,IOResult 1] + [IOResult 1,IOResult 1,IOResult 1,IOResult 1] + [1,2,3,4] +-} diff --git a/blockio/src/System/FS/BlockIO/API.hs b/blockio/src/System/FS/BlockIO/API.hs index 1322ea4e9..f95e53b1f 100644 --- a/blockio/src/System/FS/BlockIO/API.hs +++ b/blockio/src/System/FS/BlockIO/API.hs @@ -65,24 +65,13 @@ import Text.Printf -- again. Instead, the user should create a new instance of the interface. -- -- Note: there are a bunch of functions in the interface that have nothing to do --- with submitting large batches of I/O operations. In fact, only 'close' and +-- with submitting large batches of I\/O operations. In fact, only 'close' and -- 'submitIO' are related to that. All other functions were put in this record -- for simplicity because the authors of the library needed them and it was more -- straightforward to add them here then to add them to @fs-api@. Still these -- unrelated functions could and should all be moved into @fs-api@ at some point -- in the future. -- --- === Implementations --- --- There are currently two known implementations of the interface: --- --- * An implementation using the real file system, which can be found in the --- "System.FS.BlockIO.IO" module. This implementation is platform-dependent. --- --- * An implementation using a simulated file system, which can be found in the --- @System.FS.BlockIO.Sim@ module of the @blockio:sim@ sublibrary. This --- implementation is uniform across platforms. --- data HasBlockIO m h = HasBlockIO { -- | (Idempotent) close the IO context that is required for running -- 'submitIO'. @@ -206,6 +195,7 @@ ioopByteCount (IOOpWrite _ _ _ _ c) = c -- | Number of read/written bytes. newtype IOResult = IOResult ByteCount + deriving stock (Show, Eq) deriving newtype VP.Prim newtype instance VUM.MVector s IOResult = MV_IOResult (VP.MVector s IOResult) diff --git a/blockio/src/System/FS/BlockIO/IO.hs b/blockio/src/System/FS/BlockIO/IO.hs index 0b798cadc..16d48a0ab 100644 --- a/blockio/src/System/FS/BlockIO/IO.hs +++ b/blockio/src/System/FS/BlockIO/IO.hs @@ -2,8 +2,9 @@ -- -- The implementation of the 'HasBlockIO' interface provided in this module is -- platform-dependent. Most importantly, on Linux, the implementation of --- 'submitIO' is backed by @blockio-uring@: a library for asynchronous I/O. On --- Windows and MacOS, the implementation of 'submitIO' only supports serial I/O. +-- 'submitIO' is backed by @blockio-uring@: a library for asynchronous I\/O. On +-- Windows and MacOS, the implementation of 'submitIO' only supports serial +-- I\/O. module System.FS.BlockIO.IO ( -- * Implementation details #impl# -- $impl @@ -36,9 +37,9 @@ import System.FS.IO (HandleIO, ioHasFS) reason, we include below some documentation about the effects of calling the interface functions on different platforms. - Note: if the @serialblockio@ Cabal flag is enabled, then the Linux implementation - uses a mocked context and serial I/O for 'close' and 'submitIO', just like the - MacOS and Windows implementations do. + Note: if the @serialblockio@ Cabal flag is enabled, then the Linux + implementation uses a mocked context and serial I\/O for 'close' and + 'submitIO', just like the MacOS and Windows implementations do. [IO context]: When an instance of the 'HasBlockIO' interface for Linux systems is initialised, an @io_uring@ context is created using the @@ -57,11 +58,11 @@ import System.FS.IO (HandleIO, ioHasFS) * MacOS: close the mocked context * Windows: close the mocked context - ['submitIO']: Submit a batch of I/O operations using: + ['submitIO']: Submit a batch of I\/O operations using: * Linux: the @submitIO@ function from the @blockio-uring@ package - * MacOS: serial I/O using a 'HasFS' - * Windows: serial I/O using a 'HasFS' + * MacOS: serial I\/O using a 'HasFS' + * Windows: serial I\/O using a 'HasFS' ['hSetNoCache']: diff --git a/blockio/src/System/FS/BlockIO/Serial.hs b/blockio/src/System/FS/BlockIO/Serial.hs index a6276ca7e..4f992bd21 100644 --- a/blockio/src/System/FS/BlockIO/Serial.hs +++ b/blockio/src/System/FS/BlockIO/Serial.hs @@ -28,7 +28,7 @@ import qualified System.FS.BlockIO.IO.Internal as IOI -> IO (API.HasBlockIO IO h) #-} -- | IO instantiation of 'HasBlockIO', using an existing 'HasFS'. Thus this --- implementation does not take advantage of parallel I/O. +-- implementation does not take advantage of parallel I\/O. serialHasBlockIO :: (MonadThrow m, MonadMVar m, PrimMonad m, Eq h) => (Handle h -> Bool -> m ())