Skip to content

Commit eb8d04c

Browse files
authored
Merge pull request #645 from IntersectMBO/jdral/issue-580
Fix key generation for the compact index tests
2 parents 9f5f844 + 1aecaa6 commit eb8d04c

File tree

5 files changed

+92
-59
lines changed

5 files changed

+92
-59
lines changed

src-extras/Database/LSMTree/Extras/Generators.hs

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ module Database.LSMTree.Extras.Generators (
3737
, isKeyForIndexCompact
3838
, KeyForIndexCompact (..)
3939
, BiasedKey (..)
40-
, BiasedKeyForIndexCompact (..)
4140
-- * helpers
4241
, shrinkVec
4342
) where
@@ -184,24 +183,24 @@ instance Arbitrary2 Entry where
184183
--
185184
-- Also useful for failing tests that have keys as inputs, because the printed
186185
-- 'WithSerialised' values will show both keys and their serialised form.
187-
data WithSerialised k = TestKey k SerialisedKey
186+
data WithSerialised k = WithSerialised k SerialisedKey
188187
deriving stock Show
189188

190189
instance Eq k => Eq (WithSerialised k) where
191-
TestKey k1 _ == TestKey k2 _ = k1 == k2
190+
WithSerialised k1 _ == WithSerialised k2 _ = k1 == k2
192191

193192
instance Ord k => Ord (WithSerialised k) where
194-
TestKey k1 _ `compare` TestKey k2 _ = k1 `compare` k2
193+
WithSerialised k1 _ `compare` WithSerialised k2 _ = k1 `compare` k2
195194

196195
instance (Arbitrary k, SerialiseKey k) => Arbitrary (WithSerialised k) where
197196
arbitrary = do
198197
x <- arbitrary
199-
pure $ TestKey x (serialiseKey x)
200-
shrink (TestKey k _) = [TestKey k' (serialiseKey k') | k' <- shrink k]
198+
pure $ WithSerialised x (serialiseKey x)
199+
shrink (WithSerialised k _) = [WithSerialised k' (serialiseKey k') | k' <- shrink k]
201200

202201
instance SerialiseKey k => SerialiseKey (WithSerialised k) where
203-
serialiseKey (TestKey _ (SerialisedKey bytes)) = bytes
204-
deserialiseKey bytes = TestKey (S.Class.deserialiseKey bytes) (SerialisedKey bytes)
202+
serialiseKey (WithSerialised _ (SerialisedKey bytes)) = bytes
203+
deserialiseKey bytes = WithSerialised (S.Class.deserialiseKey bytes) (SerialisedKey bytes)
205204

206205
{-------------------------------------------------------------------------------
207206
Other number newtypes
@@ -256,9 +255,14 @@ toAppend (MultiPageOneKey k n) = AppendMultiPage k n
256255
shrinkLogicalPageSummary :: Arbitrary k => LogicalPageSummary k -> [LogicalPageSummary k]
257256
shrinkLogicalPageSummary = \case
258257
OnePageOneKey k -> OnePageOneKey <$> shrink k
259-
OnePageManyKeys k1 k2 -> OnePageManyKeys <$> shrink k1 <*> shrink k2
260-
MultiPageOneKey k n -> [MultiPageOneKey k' n | k' <- shrink k]
261-
<> [MultiPageOneKey k n' | n' <- shrink n]
258+
OnePageManyKeys k1 k2 -> [
259+
OnePageManyKeys k1' k2'
260+
| (k1', k2') <- shrink (k1, k2)
261+
]
262+
MultiPageOneKey k n -> [
263+
MultiPageOneKey k' n'
264+
| (k', n') <- shrink (k, n)
265+
]
262266

263267
{-------------------------------------------------------------------------------
264268
Sequences of (logical\/true) pages
@@ -592,22 +596,6 @@ instance Arbitrary BiasedKey where
592596

593597
deriving newtype instance SerialiseKey BiasedKey
594598

595-
newtype BiasedKeyForIndexCompact =
596-
BiasedKeyForIndexCompact { getBiasedKeyForIndexCompact :: RawBytes }
597-
deriving stock (Eq, Ord, Show)
598-
deriving newtype NFData
599-
600-
instance Arbitrary BiasedKeyForIndexCompact where
601-
arbitrary = arbitraryBiasedKey BiasedKeyForIndexCompact genKeyForIndexCompact
602-
603-
shrink (BiasedKeyForIndexCompact rb) =
604-
[ BiasedKeyForIndexCompact rb'
605-
| rb' <- shrink rb
606-
, isKeyForIndexCompact rb'
607-
]
608-
609-
deriving newtype instance SerialiseKey BiasedKeyForIndexCompact
610-
611599
{-------------------------------------------------------------------------------
612600
Unsliced
613601
-------------------------------------------------------------------------------}

src/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ search k IndexCompact{..} =
420420
bitIndexFromToRev (BoundInclusive 0) (BoundInclusive i) (Bit False) icClashes
421421
-- The TB map is consulted to find the closest key smaller than k.
422422
!i2 = maybe (PageNo 0) snd $
423-
Map.lookupLE (unsafeNoAssertMakeUnslicedKey k) icTieBreaker
423+
Map.lookupLE (makeUnslicedKey k) icTieBreaker
424424
-- If i2 < i1, then it means the clashing pages were all just part
425425
-- of the same larger-than-page value. Entries are only included
426426
-- in the TB map if the clash was a *proper* clash.

src/Database/LSMTree/Internal/Unsliced.hs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ module Database.LSMTree.Internal.Unsliced (
1212
, makeUnslicedKey
1313
, unsafeMakeUnslicedKey
1414
, fromUnslicedKey
15-
, unsafeNoAssertMakeUnslicedKey
1615
) where
1716

1817
import Control.DeepSeq (NFData)
@@ -44,9 +43,6 @@ makeUnsliced bytes
4443
unsafeMakeUnsliced :: RawBytes -> Unsliced RawBytes
4544
unsafeMakeUnsliced bytes = assert (precondition bytes) (Unsliced (getByteArray bytes))
4645

47-
unsafeNoAssertMakeUnsliced :: RawBytes -> Unsliced RawBytes
48-
unsafeNoAssertMakeUnsliced bytes = Unsliced (getByteArray bytes)
49-
5046
fromUnsliced :: Unsliced RawBytes -> RawBytes
5147
fromUnsliced (Unsliced ba) = RawBytes (mkPrimVector 0 (sizeofByteArray ba) ba)
5248

@@ -66,9 +62,6 @@ makeUnslicedKey (SerialisedKey rb) = from (makeUnsliced rb)
6662
unsafeMakeUnslicedKey :: SerialisedKey -> Unsliced SerialisedKey
6763
unsafeMakeUnslicedKey (SerialisedKey rb) = from (unsafeMakeUnsliced rb)
6864

69-
unsafeNoAssertMakeUnslicedKey :: SerialisedKey -> Unsliced SerialisedKey
70-
unsafeNoAssertMakeUnslicedKey (SerialisedKey rb) = from (unsafeNoAssertMakeUnsliced rb)
71-
7265
fromUnslicedKey :: Unsliced SerialisedKey -> SerialisedKey
7366
fromUnslicedKey x = SerialisedKey (fromUnsliced (to x))
7467

test/Test/Database/LSMTree/Generators.hs

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,10 @@ tests = testGroup "Test.Database.LSMTree.Generators" [
6666
, testGroup "KeyForIndexCompact" $
6767
prop_arbitraryAndShrinkPreserveInvariant noTags $
6868
isKeyForIndexCompact . getKeyForIndexCompact
69-
, testGroup "BiasedKeyForIndexCompact" $
70-
prop_arbitraryAndShrinkPreserveInvariant noTags $
71-
isKeyForIndexCompact . getBiasedKeyForIndexCompact
72-
, testGroup "lists of key/op pairs" $
73-
[ testGroup "BiasedKey" $
74-
prop_arbitraryAndShrinkPreserveInvariant
75-
(labelTestKOps @BiasedKey)
76-
deepseqInvariant
77-
, testGroup "BiasedKeyForIndexCompact" $
78-
prop_arbitraryAndShrinkPreserveInvariant
79-
(labelTestKOps @BiasedKeyForIndexCompact)
80-
deepseqInvariant
81-
]
69+
, testGroup "BiasedKey" $
70+
prop_arbitraryAndShrinkPreserveInvariant
71+
(labelTestKOps @BiasedKey)
72+
deepseqInvariant
8273
, testGroup "helpers"
8374
[ testProperty "prop_shrinkVec" $ \vec ->
8475
shrinkVec (QC.shrink @Int) vec === map VP.fromList (QC.shrink (VP.toList vec))

test/Test/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ import qualified Data.Vector.Unboxed as VU
2727
import qualified Data.Vector.Unboxed.Base as VU
2828
import Data.Word
2929
import Database.LSMTree.Extras
30-
import Database.LSMTree.Extras.Generators as Gen
30+
import Database.LSMTree.Extras.Generators (ChunkSize (..),
31+
LogicalPageSummaries, LogicalPageSummary (..), Pages (..),
32+
genRawBytes, isKeyForIndexCompact, labelPages, toAppends)
3133
import Database.LSMTree.Extras.Index (Append (..), appendToCompact)
3234
import Database.LSMTree.Internal.BitMath
3335
import Database.LSMTree.Internal.Chunk as Chunk (toByteString)
@@ -36,6 +38,8 @@ import Database.LSMTree.Internal.Index.Compact
3638
import Database.LSMTree.Internal.Index.CompactAcc
3739
import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan,
3840
multiPage, singlePage)
41+
import Database.LSMTree.Internal.RawBytes (RawBytes (..))
42+
import qualified Database.LSMTree.Internal.RawBytes as RB
3943
import Database.LSMTree.Internal.Serialise
4044
import Numeric (showHex)
4145
import Prelude hiding (max, min, pi)
@@ -51,14 +55,16 @@ import Text.Printf (printf)
5155

5256
tests :: TestTree
5357
tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
54-
testProperty "prop_distribution @BiasedKeyForIndexCompact" $
55-
prop_distribution @BiasedKeyForIndexCompact
58+
testGroup "TestKey" $
59+
prop_arbitraryAndShrinkPreserveInvariant @TestKey noTags isTestKey
60+
, testProperty "prop_distribution @TestKey" $
61+
prop_distribution @TestKey
5662
, testProperty "prop_searchMinMaxKeysAfterConstruction" $
57-
prop_searchMinMaxKeysAfterConstruction @BiasedKeyForIndexCompact 100
63+
prop_searchMinMaxKeysAfterConstruction @TestKey 100
5864
, testProperty "prop_differentChunkSizesSameResults" $
59-
prop_differentChunkSizesSameResults @BiasedKeyForIndexCompact
65+
prop_differentChunkSizesSameResults @TestKey
6066
, testProperty "prop_singlesEquivMulti" $
61-
prop_singlesEquivMulti @BiasedKeyForIndexCompact
67+
prop_singlesEquivMulti @TestKey
6268
, testGroup "(De)serialisation" [
6369
testGroup "Chunks generator" $
6470
prop_arbitraryAndShrinkPreserveInvariant noTags chunksInvariant
@@ -119,14 +125,65 @@ tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
119125
, testProperty "prop_roundtrip_chunks" $
120126
prop_roundtrip_chunks
121127
, testProperty "prop_roundtrip" $
122-
prop_roundtrip @BiasedKeyForIndexCompact
128+
prop_roundtrip @TestKey
123129
, testProperty "prop_total_deserialisation" $ withMaxSuccess 10000
124130
prop_total_deserialisation
125131
, testProperty "prop_total_deserialisation_whitebox" $ withMaxSuccess 10000
126132
prop_total_deserialisation_whitebox
127133
]
128134
]
129135

136+
{-------------------------------------------------------------------------------
137+
Test key
138+
-------------------------------------------------------------------------------}
139+
140+
-- | Key type for compact index tests
141+
--
142+
-- Tests outside this module don't have to worry about generating clashing keys.
143+
-- We can assume that the compact index handles clashes correctly, because we
144+
-- test this extensively in this module already.
145+
newtype TestKey = TestKey RawBytes
146+
deriving stock (Show, Eq, Ord)
147+
deriving newtype SerialiseKey
148+
149+
-- | Generate keys with a non-neglible probability of clashes. This generates
150+
-- sliced keys too.
151+
--
152+
-- Note: recall that keys /clash/ only if their primary bits (first 8 bytes)
153+
-- match. It does not matter whether the other bytes do not match.
154+
instance Arbitrary TestKey where
155+
arbitrary = do
156+
-- Generate primary bits from a relatively small distribution. This
157+
-- ensures that we get clashes between keys with a non-negligible
158+
-- probability.
159+
primBits <- do
160+
lastPrefixByte <- QC.getSmall <$> arbitrary
161+
pure $ RB.pack ([0,0,0,0,0,0,0] <> [lastPrefixByte])
162+
-- The rest of the bits after the primary bits can be anything
163+
restBits <- genRawBytes
164+
-- The compact index should store keys without retaining unused memory.
165+
-- Therefore, we generate slices of keys too.
166+
prefix <- elements [RB.pack [], RB.pack [0]]
167+
suffix <- elements [RB.pack [], RB.pack [0]]
168+
-- Combine the bytes and make sure to take out only the slice we need.
169+
let bytes = prefix <> primBits <> restBits <> suffix
170+
n = RB.size primBits + RB.size restBits
171+
bytes' = RB.take n $ RB.drop (RB.size prefix) bytes
172+
pure $ TestKey bytes'
173+
174+
-- Shrink keys extensively: most failures will occur in small counterexamples,
175+
-- so we don't have to limit the number of shrinks as much.
176+
shrink (TestKey bytes) = [
177+
TestKey bytes'
178+
| let RawBytes vec = bytes
179+
, vec' <- VP.fromList <$> shrink (VP.toList vec)
180+
, let bytes' = RawBytes vec'
181+
, isKeyForIndexCompact bytes'
182+
]
183+
184+
isTestKey :: TestKey -> Bool
185+
isTestKey (TestKey bytes) = isKeyForIndexCompact bytes
186+
130187
{-------------------------------------------------------------------------------
131188
Properties
132189
-------------------------------------------------------------------------------}
@@ -319,11 +376,15 @@ fromListSingles maxcsize apps = runST $ do
319376

320377
labelIndex :: IndexCompact -> (Property -> Property)
321378
labelIndex ic =
322-
QC.tabulate "# Clashes" [showPowersOf10 nclashes]
323-
. QC.tabulate "# Contiguous clash runs" [showPowersOf10 (length nscontig)]
324-
. QC.tabulate "Length of contiguous clash runs" (fmap (showPowersOf10 . snd) nscontig)
379+
checkCoverage
380+
. QC.tabulate "# Clashes" [showPowersOf 2 nclashes]
381+
. QC.cover 60 (nclashes > 0) "Has clashes"
382+
. QC.tabulate "# Contiguous clash runs" [showPowersOf 2 (length nscontig)]
383+
. QC.cover 30 (not (null nscontig)) "Has contiguous clash runs"
384+
. QC.tabulate "Length of contiguous clash runs" (fmap (showPowersOf 2 . snd) nscontig)
325385
. QC.tabulate "Contiguous clashes contain multi-page values" (fmap (show . fst) nscontig)
326-
. QC.classify (multiPageValuesClash ic) "Has clashing multi-page values"
386+
. QC.cover 3 (any fst nscontig) "Has contiguous clashes that contain multi-page values"
387+
. QC.cover 0.1 (multiPageValuesClash ic) "Has clashing multi-page values"
327388
where nclashes = countClashes ic
328389
nscontig = countContiguousClashes ic
329390

0 commit comments

Comments
 (0)