@@ -1284,11 +1284,40 @@ enum PreambleAction {
1284
1284
Absent ,
1285
1285
}
1286
1286
1287
- // TODO: Add test cases for the all-preamble and all-trailer cases
1288
-
1289
1287
// When we schedule a chunk we use the repetition index (or, if none exists, just the # of items
1290
1288
// in each chunk) to map a user requested range into a set of ChunkInstruction objects which tell
1291
1289
// us how exactly to read from the chunk.
1290
+ //
1291
+ // Examples:
1292
+ //
1293
+ // | Chunk 0 | Chunk 1 | Chunk 2 | Chunk 3 |
1294
+ // | xxxxyyyyzzz | zzzzzzzzz | zzzzzzzzz | aaabbcc |
1295
+ //
1296
+ // Full read (0..6)
1297
+ //
1298
+ // Chunk 0: (several rows, ends with trailer)
1299
+ // preamble: absent
1300
+ // rows_to_skip: 0
1301
+ // rows_to_take: 3 (x, y, z)
1302
+ // take_trailer: true
1303
+ //
1304
+ // Chunk 1: (all preamble, ends with trailer)
1305
+ // preamble: take
1306
+ // rows_to_skip: 0
1307
+ // rows_to_take: 0
1308
+ // take_trailer: true
1309
+ //
1310
+ // Chunk 2: (all preamble, no trailer)
1311
+ // preamble: take
1312
+ // rows_to_skip: 0
1313
+ // rows_to_take: 0
1314
+ // take_trailer: false
1315
+ //
1316
+ // Chunk 3: (several rows, no trailer or preamble)
1317
+ // preamble: absent
1318
+ // rows_to_skip: 0
1319
+ // rows_to_take: 3 (a, b, c)
1320
+ // take_trailer: false
1292
1321
#[ derive( Clone , Debug , PartialEq , Eq ) ]
1293
1322
struct ChunkInstructions {
1294
1323
// The index of the chunk to read
@@ -1303,13 +1332,13 @@ struct ChunkInstructions {
1303
1332
//
1304
1333
// If this is non-zero then premable must not be Take
1305
1334
rows_to_skip : u64 ,
1306
- // How many complete (non-preamble / non-trailer) rows to take
1335
+ // How many rows to take. If a row splits across chunks then we will count the row in the first
1336
+ // chunk that contains the row.
1307
1337
rows_to_take : u64 ,
1308
1338
// A "trailer" is when a chunk ends with a partial list. If there is no repetition index there is
1309
1339
// never a trailer.
1310
1340
//
1311
- // It's possible for a chunk to be entirely trailer. This would mean the chunk starts with the beginning
1312
- // of a list and that list is continued in the next chunk.
1341
+ // A chunk that is all preamble may or may not have a trailer.
1313
1342
//
1314
1343
// If this is true then we want to include the trailer
1315
1344
take_trailer : bool ,
@@ -1401,7 +1430,10 @@ impl ChunkInstructions {
1401
1430
preamble : PreambleAction :: Take ,
1402
1431
rows_to_skip : 0 ,
1403
1432
rows_to_take : 0 ,
1404
- take_trailer : false ,
1433
+ // We still need to look at has_trailer to distinguish between "all preamble
1434
+ // and row ends at end of chunk" and "all preamble and row bleeds into next
1435
+ // chunk". Both cases will have 0 rows available.
1436
+ take_trailer : chunk. has_trailer ,
1405
1437
} ) ;
1406
1438
// Only set need_preamble = false if the chunk has at least one row,
1407
1439
// Or we are reaching the last block,
@@ -1441,13 +1473,11 @@ impl ChunkInstructions {
1441
1473
} else {
1442
1474
PreambleAction :: Absent
1443
1475
} ;
1444
- let mut rows_to_take_no_trailer = rows_to_take;
1445
1476
1446
1477
// Are we taking the trailer? If so, make sure we mark that we need the preamble
1447
1478
if rows_to_take == rows_avail && chunk. has_trailer {
1448
1479
take_trailer = true ;
1449
1480
need_preamble = true ;
1450
- rows_to_take_no_trailer -= 1 ;
1451
1481
} else {
1452
1482
need_preamble = false ;
1453
1483
} ;
@@ -1456,7 +1486,7 @@ impl ChunkInstructions {
1456
1486
preamble,
1457
1487
chunk_idx : block_index,
1458
1488
rows_to_skip : to_skip,
1459
- rows_to_take : rows_to_take_no_trailer ,
1489
+ rows_to_take,
1460
1490
take_trailer,
1461
1491
} ) ;
1462
1492
@@ -1498,7 +1528,7 @@ impl ChunkInstructions {
1498
1528
) -> ( ChunkDrainInstructions , bool ) {
1499
1529
// If we need the premable then we shouldn't be skipping anything
1500
1530
debug_assert ! ( !* need_preamble || * skip_in_chunk == 0 ) ;
1501
- let mut rows_avail = self . rows_to_take - * skip_in_chunk;
1531
+ let rows_avail = self . rows_to_take - * skip_in_chunk;
1502
1532
let has_preamble = self . preamble != PreambleAction :: Absent ;
1503
1533
let preamble_action = match ( * need_preamble, has_preamble) {
1504
1534
( true , true ) => PreambleAction :: Take ,
@@ -1507,16 +1537,16 @@ impl ChunkInstructions {
1507
1537
( false , false ) => PreambleAction :: Absent ,
1508
1538
} ;
1509
1539
1510
- // Did the scheduled chunk have a trailer? If so, we have one extra row available
1511
- if self . take_trailer {
1512
- rows_avail += 1 ;
1513
- }
1514
-
1515
1540
// How many rows are we actually taking in this take step (including the preamble
1516
1541
// and trailer both as individual rows)
1517
1542
let rows_taking = if * rows_desired >= rows_avail {
1518
1543
// We want all the rows. If there is a trailer we are grabbing it and will need
1519
1544
// the preamble of the next chunk
1545
+ // If there is a trailer and we are taking all the rows then we need the preamble
1546
+ // of the next chunk.
1547
+ //
1548
+ // Also, if this chunk is entirely preamble (rows_avail == 0 && !take_trailer) then we
1549
+ // need the preamble of the next chunk.
1520
1550
* need_preamble = self . take_trailer ;
1521
1551
rows_avail
1522
1552
} else {
@@ -1682,14 +1712,7 @@ impl StructuralPageScheduler for MiniBlockScheduler {
1682
1712
num_rows,
1683
1713
chunk_instructions
1684
1714
. iter( )
1685
- . map( |ci| {
1686
- let taken = ci. rows_to_take;
1687
- if ci. take_trailer {
1688
- taken + 1
1689
- } else {
1690
- taken
1691
- }
1692
- } )
1715
+ . map( |ci| ci. rows_to_take)
1693
1716
. sum:: <u64 >( )
1694
1717
) ;
1695
1718
@@ -1698,6 +1721,7 @@ impl StructuralPageScheduler for MiniBlockScheduler {
1698
1721
. map ( |ci| ci. chunk_idx )
1699
1722
. unique ( )
1700
1723
. collect :: < Vec < _ > > ( ) ;
1724
+
1701
1725
let mut loaded_chunks = self . lookup_chunks ( & chunks_needed) ;
1702
1726
let chunk_ranges = loaded_chunks
1703
1727
. iter ( )
@@ -3230,7 +3254,7 @@ const MINIBLOCK_ALIGNMENT: usize = 8;
3230
3254
/// If the data is wide then we zip together the repetition and definition value
3231
3255
/// with the value data into a single buffer. This approach is called "zipped".
3232
3256
///
3233
- /// If there is any repetition information then we create a repetition index (TODO)
3257
+ /// If there is any repetition information then we create a repetition index
3234
3258
///
3235
3259
/// In addition, the compression process may create zero or more metadata buffers.
3236
3260
/// For example, a dictionary compression will create dictionary metadata. Any
@@ -3681,9 +3705,9 @@ impl PrimitiveStructuralEncoder {
3681
3705
let repdef = RepDefBuilder :: serialize ( repdefs) ;
3682
3706
3683
3707
if let DataBlock :: AllNull ( _null_block) = data {
3684
- // If we got here then all the data is null but we have rep/def information that
3685
- // we need to store .
3686
- todo ! ( )
3708
+ // We should not be using mini-block for all- null. There are other structural
3709
+ // encodings for that .
3710
+ unreachable ! ( )
3687
3711
}
3688
3712
3689
3713
let num_items = data. num_values ( ) ;
@@ -4753,7 +4777,7 @@ mod tests {
4753
4777
chunk_idx: 0 ,
4754
4778
preamble: PreambleAction :: Absent ,
4755
4779
rows_to_skip: 0 ,
4756
- rows_to_take: 5 ,
4780
+ rows_to_take: 6 ,
4757
4781
take_trailer: true ,
4758
4782
} ,
4759
4783
ChunkInstructions {
@@ -4767,7 +4791,7 @@ mod tests {
4767
4791
chunk_idx: 2 ,
4768
4792
preamble: PreambleAction :: Absent ,
4769
4793
rows_to_skip: 0 ,
4770
- rows_to_take: 4 ,
4794
+ rows_to_take: 5 ,
4771
4795
take_trailer: true ,
4772
4796
} ,
4773
4797
ChunkInstructions {
@@ -4834,7 +4858,7 @@ mod tests {
4834
4858
chunk_idx: 0 ,
4835
4859
preamble: PreambleAction :: Absent ,
4836
4860
rows_to_skip: 5 ,
4837
- rows_to_take: 0 ,
4861
+ rows_to_take: 1 ,
4838
4862
take_trailer: true ,
4839
4863
} ,
4840
4864
ChunkInstructions {
0 commit comments