|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | + |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | + |
| 18 | +statement ok |
| 19 | +set datafusion.execution.parquet.pushdown_filters = true; |
| 20 | + |
| 21 | + |
| 22 | +statement ok |
| 23 | +CREATE TABLE t AS VALUES |
| 24 | + ('Anow Vole', 7), |
| 25 | + ('Brown Bear', 133), |
| 26 | + ('Gray Wolf', 82), |
| 27 | + ('Lynx', 71), |
| 28 | + ('Red Fox', 40), |
| 29 | + ('Alpine Bat', 6), |
| 30 | + ('Nlpine Ibex', 101), |
| 31 | + ('Nlpine Goat', 76), |
| 32 | + ('Nlpine Sheep', 83), |
| 33 | + ('Europ. Mole', 4), |
| 34 | + ('Polecat', 16), |
| 35 | + ('Alpine Ibex', 97); |
| 36 | + |
| 37 | +statement ok |
| 38 | +COPY (SELECT column1 as a, column2 as b FROM t) |
| 39 | +TO 'test_files/scratch/limit_pruning/data.parquet' |
| 40 | +STORED AS PARQUET |
| 41 | +OPTIONS ( |
| 42 | + 'format.max_row_group_size' '3' |
| 43 | +); |
| 44 | + |
| 45 | +statement ok |
| 46 | +drop table t; |
| 47 | + |
| 48 | +statement ok |
| 49 | +CREATE EXTERNAL TABLE t |
| 50 | +STORED AS PARQUET |
| 51 | +LOCATION 'test_files/scratch/limit_pruning/data.parquet'; |
| 52 | + |
| 53 | + |
| 54 | +statement ok |
| 55 | +set datafusion.explain.analyze_level = summary; |
| 56 | + |
| 57 | +# row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched |
| 58 | +# limit_pruned_row_groups=2 total → 0 matched |
| 59 | +query TT |
| 60 | +explain analyze select * from t where a > 'M' AND b >= 50 limit 3; |
| 61 | +---- |
| 62 | +Plan with Metrics DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]}, projection=[a, b], limit=3, file_type=parquet, predicate=a@0 > M AND b@1 >= 50, pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > M AND b_null_count@4 != row_count@2 AND b_max@3 >= 50, required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_rows_pruned=3 total → 3 matched, limit_pruned_row_groups=2 total → 0 matched, bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>, scan_efficiency_ratio=<slt:ignore>] |
| 63 | + |
| 64 | +# limit_pruned_row_groups=0 total → 0 matched |
| 65 | +# because of order by, scan needs to preserve sort, so limit pruning is disabled |
| 66 | +query TT |
| 67 | +explain analyze select * from t where a > 'M' AND b >= 50 order by a limit 3; |
| 68 | +---- |
| 69 | +Plan with Metrics |
| 70 | +01)SortExec: TopK(fetch=3), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < Nlpine Sheep], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>] |
| 71 | +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit_pruning/data.parquet]]}, projection=[a, b], file_type=parquet, predicate=a@0 > M AND b@1 >= 50 AND DynamicFilter [ a@0 < Nlpine Sheep ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > M AND b_null_count@4 != row_count@2 AND b_max@3 >= 50 AND a_null_count@1 != row_count@2 AND a_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[output_rows=3, elapsed_compute=<slt:ignore>, output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_rows_pruned=9 total → 9 matched, limit_pruned_row_groups=0 total → 0 matched, bytes_scanned=<slt:ignore>, metadata_load_time=<slt:ignore>, scan_efficiency_ratio=<slt:ignore>] |
| 72 | + |
| 73 | +statement ok |
| 74 | +drop table t; |
| 75 | + |
| 76 | +statement ok |
| 77 | +reset datafusion.explain.analyze_level; |
0 commit comments