Skip to content

Commit 63cd878

Browse files
committed
Add end to end sqllogictest
1 parent 7e1fc4f commit 63cd878

File tree

4 files changed

+99
-8
lines changed

4 files changed

+99
-8
lines changed

datafusion/datasource-parquet/src/row_group_filter.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ impl RowGroupAccessPlanFilter {
202202

203203
// Check if any of the matched row groups are fully contained by the predicate
204204
self.identify_fully_matched_row_groups(
205-
fully_contained_candidates_original_idx,
205+
&fully_contained_candidates_original_idx,
206206
arrow_schema,
207207
parquet_schema,
208208
groups,
@@ -228,7 +228,7 @@ impl RowGroupAccessPlanFilter {
228228
/// Note: This optimization is relatively inexpensive for a limited number of row groups.
229229
fn identify_fully_matched_row_groups(
230230
&mut self,
231-
candidate_row_group_indices: Vec<usize>,
231+
candidate_row_group_indices: &[usize],
232232
arrow_schema: &Schema,
233233
parquet_schema: &SchemaDescriptor,
234234
groups: &[RowGroupMetaData],

datafusion/physical-plan/src/metrics/value.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -380,13 +380,24 @@ impl Display for PruningMetrics {
380380
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
381381
let matched = self.matched.load(Ordering::Relaxed);
382382
let total = self.pruned.load(Ordering::Relaxed) + matched;
383+
let fully_matched = self.fully_matched.load(Ordering::Relaxed);
383384

384-
write!(
385-
f,
386-
"{} total → {} matched",
387-
human_readable_count(total),
388-
human_readable_count(matched)
389-
)
385+
if fully_matched != 0 {
386+
write!(
387+
f,
388+
"{} total → {} matched -> {} fully matched",
389+
human_readable_count(total),
390+
human_readable_count(matched),
391+
human_readable_count(fully_matched)
392+
)
393+
} else {
394+
write!(
395+
f,
396+
"{} total → {} matched",
397+
human_readable_count(total),
398+
human_readable_count(matched)
399+
)
400+
}
390401
}
391402
}
392403

@@ -921,8 +932,11 @@ impl MetricValue {
921932
) => {
922933
let pruned = other_pruning_metrics.pruned.load(Ordering::Relaxed);
923934
let matched = other_pruning_metrics.matched.load(Ordering::Relaxed);
935+
let fully_matched =
936+
other_pruning_metrics.fully_matched.load(Ordering::Relaxed);
924937
pruning_metrics.add_pruned(pruned);
925938
pruning_metrics.add_matched(matched);
939+
pruning_metrics.add_fully_matched(fully_matched);
926940
}
927941
(
928942
Self::Ratio { ratio_metrics, .. },
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
statement ok
19+
set datafusion.execution.parquet.pushdown_filters = true;
20+
21+
22+
statement ok
23+
CREATE TABLE t AS VALUES
24+
('Anow Vole', 7),
25+
('Brown Bear', 133),
26+
('Gray Wolf', 82),
27+
('Lynx', 71),
28+
('Red Fox', 40),
29+
('Alpine Bat', 6),
30+
('Nlpine Ibex', 101),
31+
('Nlpine Goat', 76),
32+
('Nlpine Sheep', 83),
33+
('Europ. Mole', 4),
34+
('Polecat', 16),
35+
('Alpine Ibex', 97);
36+
37+
statement ok
38+
COPY (SELECT column1 as a, column2 as b FROM t)
39+
TO 'test_files/scratch/limit_pruning/data.parquet'
40+
STORED AS PARQUET
41+
OPTIONS (
42+
'format.max_row_group_size' '3'
43+
);
44+
45+
statement ok
46+
drop table t;
47+
48+
statement ok
49+
CREATE EXTERNAL TABLE t
50+
STORED AS PARQUET
51+
LOCATION 'test_files/scratch/limit_pruning/data.parquet';
52+
53+
54+
statement ok
55+
set datafusion.explain.analyze_level = summary;
56+
57+
# row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched
58+
# limit_pruned_row_groups=2 total → 0 matched
59+
query TT
60+
explain analyze select * from t where a > 'M' AND b >= 50 limit 3;
61+
----
62+
Plan with Metrics DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]}, projection=[a, b], limit=3, file_type=parquet, predicate=a@0 > M AND b@1 >= 50, pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > M AND b_null_count@4 != row_count@2 AND b_max@3 >= 50, required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_rows_pruned=3 total → 3 matched, limit_pruned_row_groups=2 total → 0 matched, bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>, scan_efficiency_ratio=<slt:ignore>]
63+
64+
# limit_pruned_row_groups=0 total → 0 matched
65+
# because of order by, scan needs to preserve sort, so limit pruning is disabled
66+
query TT
67+
explain analyze select * from t where a > 'M' AND b >= 50 order by a limit 3;
68+
----
69+
Plan with Metrics
70+
01)SortExec: TopK(fetch=3), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < Nlpine Sheep], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>]
71+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]}, projection=[a, b], file_type=parquet, predicate=a@0 > M AND b@1 >= 50 AND DynamicFilter [ a@0 < Nlpine Sheep ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > M AND b_null_count@4 != row_count@2 AND b_max@3 >= 50 AND a_null_count@1 != row_count@2 AND a_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_rows_pruned=9 total → 9 matched, limit_pruned_row_groups=0 total → 0 matched, bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>, scan_efficiency_ratio=<slt:ignore>]
72+
73+
statement ok
74+
drop table t;
75+
76+
statement ok
77+
reset datafusion.explain.analyze_level;
2.27 KB
Binary file not shown.

0 commit comments

Comments
 (0)