diff --git a/.typos.toml b/.typos.toml index e9fa0028f5..796e778861 100644 --- a/.typos.toml +++ b/.typos.toml @@ -21,6 +21,7 @@ extend-ignore-identifiers-re = ["^bimap$"] [default.extend-words] AGS = "AGS" ags = "ags" +mor = "mor" [files] extend-exclude = ["**/testdata", "CHANGELOG.md", "**/public-api.txt"] diff --git a/Cargo.lock b/Cargo.lock index 0a2f0a8206..25f2581da9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3762,6 +3762,7 @@ dependencies = [ "bimap", "bytes", "chrono", + "crc32fast", "derive_builder", "expect-test", "fastnum", @@ -3875,6 +3876,7 @@ dependencies = [ "chrono", "http 1.4.0", "iceberg", + "iceberg-storage-opendal", "iceberg_test_utils", "itertools 0.13.0", "mockito", diff --git a/Cargo.toml b/Cargo.toml index f11112109a..271b2dbe54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ bytes = "1.11" cfg-if = "1" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } +crc32fast = "1.5" dashmap = "6" datafusion = "53.1.0" datafusion-cli = "53.0.0" diff --git a/crates/catalog/rest/Cargo.toml b/crates/catalog/rest/Cargo.toml index e043c195ef..8fed46e42c 100644 --- a/crates/catalog/rest/Cargo.toml +++ b/crates/catalog/rest/Cargo.toml @@ -44,6 +44,7 @@ typed-builder = { workspace = true } uuid = { workspace = true, features = ["v4"] } [dev-dependencies] +iceberg-storage-opendal = { workspace = true } iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } mockito = { workspace = true } tokio = { workspace = true } diff --git a/crates/catalog/rest/DEPENDENCIES.rust.tsv b/crates/catalog/rest/DEPENDENCIES.rust.tsv index 801170e88c..2e7958872d 100644 --- a/crates/catalog/rest/DEPENDENCIES.rust.tsv +++ b/crates/catalog/rest/DEPENDENCIES.rust.tsv @@ -1,316 +1,397 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -adler2@2.0.1 X X X -aead@0.5.2 X X -aes@0.8.4 X X -aes-gcm@0.10.3 X X -ahash@0.8.12 X X -aho-corasick@1.1.4 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -android_system_properties@0.1.5 X X -anyhow@1.0.102 X X -apache-avro@0.21.0 X -array-init@2.1.0 X X -arrow-arith@58.3.0 X -arrow-array@58.3.0 X X -arrow-buffer@58.3.0 X -arrow-cast@58.3.0 X -arrow-data@58.3.0 X -arrow-ipc@58.1.0 X -arrow-ord@58.3.0 X -arrow-schema@58.3.0 X -arrow-select@58.3.0 X -arrow-string@58.3.0 X -as-any@0.3.2 X X -async-lock@3.4.2 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.6.0 X -base64@0.22.1 X X -bigdecimal@0.4.10 X X -bimap@0.6.3 X X -bitflags@2.11.0 X X -block-buffer@0.10.4 X X -bnum@0.12.1 X X -bon@3.9.1 X X -bon-macros@3.9.1 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.20.2 X X -bytemuck@1.25.0 X X X -bytemuck_derive@1.10.2 X X X -byteorder@1.5.0 X X -bytes@1.11.1 X -cc@1.2.57 X X -cfg-if@1.0.4 X X -chrono@0.4.44 X X -cipher@0.4.4 X X -concurrent-queue@2.5.0 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.7 X X -ctr@0.9.2 X X -darling@0.20.11 X -darling@0.23.0 X -darling_core@0.20.11 X -darling_core@0.23.0 X -darling_macro@0.20.11 X -darling_macro@0.23.0 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.11 X -either@1.15.0 X X -equivalent@1.0.2 X X -erased-serde@0.4.10 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastnum@0.7.4 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.9 X X -flatbuffers@25.12.19 X -flate2@1.1.9 X X -fnv@1.0.7 X X -form_urlencoded@1.2.2 X X -futures@0.3.32 X X -futures-channel@0.3.32 X X -futures-core@0.3.32 X X -futures-executor@0.3.32 X X -futures-io@0.3.32 X X -futures-macro@0.3.32 X X -futures-sink@0.3.32 X X -futures-task@0.3.32 X X -futures-util@0.3.32 X X -generic-array@0.14.7 X -getrandom@0.2.17 X X -getrandom@0.3.4 X X -getrandom@0.4.2 X X -ghash@0.5.1 X X -gloo-timers@0.3.0 X X -h2@0.4.13 X -half@2.7.1 X X -hashbrown@0.16.1 X X -hashbrown@0.17.1 X X -heck@0.5.0 X X -http@1.4.0 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -hyper@1.8.1 X -hyper-util@0.1.20 X -iana-time-zone@0.1.65 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.9.0 X -iceberg-catalog-rest@0.9.0 X -iceberg_test_utils@0.9.0 X -icu_collections@2.1.1 X -icu_locale_core@2.1.1 X -icu_normalizer@2.1.1 X -icu_normalizer_data@2.1.1 X -icu_properties@2.1.2 X -icu_properties_data@2.1.2 X -icu_provider@2.1.1 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.13.0 X X -inout@0.1.4 X X -integer-encoding@3.0.4 X -inventory@0.3.22 X X -ipnet@2.12.0 X X -iri-string@0.7.11 X X -itertools@0.13.0 X X -itoa@1.0.18 X X -jobserver@0.1.34 X X -js-sys@0.3.91 X X -lazy_static@1.5.0 X X -lexical-core@1.0.6 X X -lexical-parse-float@1.0.6 X X -lexical-parse-integer@1.0.6 X X -lexical-util@1.0.7 X X -lexical-write-float@1.0.6 X X -lexical-write-integer@1.0.6 X X -libc@0.2.183 X X -libm@0.2.16 X -litemap@0.8.1 X -lock_api@0.4.14 X X -log@0.4.29 X X -lz4_flex@0.13.0 X -memchr@2.8.0 X X -miniz_oxide@0.8.9 X X X -mio@1.2.0 X -moka@0.12.15 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.3 X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-traits@0.2.19 X X -once_cell@1.21.4 X X -opaque-debug@0.3.1 X X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.5 X X -parking_lot_core@0.9.12 X X -parquet@58.1.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -pin-project-lite@0.2.17 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -polyval@0.6.2 X X -portable-atomic@1.13.1 X X -potential_utf@0.1.4 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.106 X X -quad-rand@0.2.3 X -quote@1.0.45 X X -r-efi@5.3.0 X X X -r-efi@6.0.0 X X X -rand@0.9.4 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.5 X X -redox_syscall@0.5.18 X -regex@1.12.3 X X -regex-automata@0.4.14 X X -regex-lite@0.1.9 X X -regex-syntax@0.8.10 X X -reqwest@0.12.28 X X -ring@0.17.14 X X -roaring@0.11.3 X X -rustc_version@0.4.1 X X -rustversion@1.0.22 X X -ryu@1.0.23 X X -scopeguard@1.2.0 X X -semver@1.0.27 X X -seq-macro@0.3.6 X X -serde@1.0.228 X X -serde-big-array@0.5.1 X X -serde_bytes@0.11.19 X X -serde_core@1.0.228 X X -serde_derive@1.0.228 X X -serde_json@1.0.149 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.21.0 X X -serde_with_macros@3.21.0 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simd-adler32@0.3.8 X -simdutf8@0.1.5 X X -slab@0.4.12 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.3 X X -stable_deref_trait@1.2.1 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.117 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -thiserror@2.0.18 X X -thiserror-impl@2.0.18 X X -thread_local@1.1.9 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.2 X -tokio@1.52.1 X -tokio-macros@2.7.0 X -tokio-util@0.7.18 X -tower@0.5.3 X -tower-http@0.6.8 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.44 X -tracing-attributes@0.1.31 X -tracing-core@0.1.36 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.23 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typeid@1.0.3 X X -typenum@1.20.1 X X -typetag@0.2.21 X X -typetag-impl@0.2.21 X X -unicode-ident@1.0.24 X X X -universal-hash@0.5.1 X X -untrusted@0.9.0 X -url@2.5.8 X X -utf8_iter@1.0.4 X X -uuid@1.23.0 X X -version_check@0.9.5 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasip2@1.0.2+wasi-0.2.9 X X X -wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X -wasm-bindgen@0.2.114 X X -wasm-bindgen-futures@0.4.64 X X -wasm-bindgen-macro@0.2.114 X X -wasm-bindgen-macro-support@0.2.114 X X -wasm-bindgen-shared@0.2.114 X X -web-sys@0.3.91 X X -windows-core@0.62.2 X X -windows-implement@0.60.2 X X -windows-interface@0.59.3 X X -windows-link@0.2.1 X X -windows-result@0.4.1 X X -windows-strings@0.5.1 X X -windows-sys@0.52.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.2 X X -windows-targets@0.52.6 X X -windows-targets@0.53.5 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.1 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.1 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.1 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.1 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.1 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.1 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.1 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.1 X X -wit-bindgen@0.51.0 X X X -writeable@0.6.2 X -yoke@0.8.1 X -yoke-derive@0.8.1 X -zerocopy@0.8.47 X X X -zerocopy-derive@0.8.47 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.2 X X -zerotrie@0.2.3 X -zerovec@0.11.5 X -zerovec-derive@0.11.2 X -zlib-rs@0.6.3 X -zmij@1.0.21 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib +adler2@2.0.1 X X X +aead@0.5.2 X X +aes@0.8.4 X X +aes-gcm@0.10.3 X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +android_system_properties@0.1.5 X X +anyhow@1.0.102 X X +apache-avro@0.21.0 X +array-init@2.1.0 X X +arrow-arith@58.3.0 X +arrow-array@58.3.0 X X +arrow-buffer@58.3.0 X +arrow-cast@58.3.0 X +arrow-data@58.3.0 X +arrow-ipc@58.1.0 X +arrow-ord@58.3.0 X +arrow-schema@58.3.0 X +arrow-select@58.3.0 X +arrow-string@58.3.0 X +as-any@0.3.2 X X +async-lock@3.4.2 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-lc-rs@1.16.2 X X +aws-lc-sys@0.39.0 X X X X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.10 X X +bimap@0.6.3 X X +bitflags@2.11.0 X X +block-buffer@0.10.4 X X +block-buffer@0.12.0 X X +bnum@0.12.1 X X +bon@3.9.1 X X +bon-macros@3.9.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.20.2 X X +bytemuck@1.25.0 X X X +bytemuck_derive@1.10.2 X X X +byteorder@1.5.0 X X +bytes@1.11.1 X +cc@1.2.57 X X +cfg-if@1.0.4 X X +chrono@0.4.44 X X +cipher@0.4.4 X X +cmake@0.1.57 X X +combine@4.6.7 X +concurrent-queue@2.5.0 X X +const-oid@0.10.2 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +crypto-common@0.2.2 X X +ctor@1.0.7 X X +ctr@0.9.2 X X +darling@0.20.11 X +darling@0.23.0 X +darling_core@0.20.11 X +darling_core@0.23.0 X +darling_macro@0.20.11 X +darling_macro@0.23.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +digest@0.11.3 X X +displaydoc@0.2.5 X X +dissimilar@1.0.11 X +dlv-list@0.5.2 X X +dunce@1.0.5 X X X +either@1.15.0 X X +equivalent@1.0.2 X X +erased-serde@0.4.10 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastnum@0.7.4 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.9 X X +flatbuffers@25.12.19 X +flate2@1.1.9 X X +fnv@1.0.7 X X +form_urlencoded@1.2.2 X X +fs_extra@1.3.0 X +futures@0.3.32 X X +futures-channel@0.3.32 X X +futures-core@0.3.32 X X +futures-executor@0.3.32 X X +futures-io@0.3.32 X X +futures-macro@0.3.32 X X +futures-sink@0.3.32 X X +futures-task@0.3.32 X X +futures-util@0.3.32 X X +generic-array@0.14.7 X +getrandom@0.2.17 X X +getrandom@0.3.4 X X +getrandom@0.4.2 X X +ghash@0.5.1 X X +gloo-timers@0.3.0 X X +h2@0.4.13 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.16.1 X X +hashbrown@0.17.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +hybrid-array@0.4.12 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.20 X +iana-time-zone@0.1.65 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.9.0 X +iceberg-catalog-rest@0.9.0 X +iceberg-storage-opendal@0.9.0 X +iceberg_test_utils@0.9.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.2 X +icu_properties_data@2.1.2 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.13.0 X X +inout@0.1.4 X X +integer-encoding@3.0.4 X +inventory@0.3.22 X X +ipnet@2.12.0 X X +iri-string@0.7.11 X X +itertools@0.13.0 X X +itoa@1.0.18 X X +jiff@0.2.23 X X +jiff-tzdb@0.1.6 X X +jiff-tzdb-platform@0.1.3 X X +jni@0.22.4 X X +jni-macros@0.22.4 X X +jni-sys@0.4.1 X X +jni-sys-macros@0.4.1 X X +jobserver@0.1.34 X X +js-sys@0.3.91 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.183 X X +libm@0.2.16 X +link-section@0.18.1 X X +linktime-proc-macro@0.2.0 X X +linux-raw-sys@0.12.1 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.13.0 X +md-5@0.11.0 X X +mea@0.6.3 X +memchr@2.8.0 X X +miniz_oxide@0.8.9 X X X +mio@1.2.0 X +moka@0.12.15 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +once_cell@1.21.4 X X +opaque-debug@0.3.1 X X +opendal@0.57.0 X +opendal-core@0.57.0 X +opendal-layer-concurrent-limit@0.57.0 X +opendal-layer-logging@0.57.0 X +opendal-layer-retry@0.57.0 X +opendal-layer-timeout@0.57.0 X +opendal-service-fs@0.57.0 X +opendal-service-s3@0.57.0 X +openssl-probe@0.2.1 X X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +ordered-multimap@0.7.3 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@58.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +pin-project-lite@0.2.17 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +polyval@0.6.2 X X +portable-atomic@1.13.1 X X +portable-atomic-util@0.2.6 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro2@1.0.106 X X +quad-rand@0.2.3 X +quick-xml@0.39.4 X +quote@1.0.45 X X +r-efi@5.3.0 X X X +r-efi@6.0.0 X X X +rand@0.9.4 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.5 X X +redox_syscall@0.5.18 X +regex@1.12.3 X X +regex-automata@0.4.14 X X +regex-lite@0.1.9 X X +regex-syntax@0.8.10 X X +reqsign-aws-v4@3.0.0 X +reqsign-core@3.0.0 X +reqsign-file-read-tokio@3.0.0 X +reqwest@0.12.28 X X +reqwest@0.13.3 X X +ring@0.17.14 X X +roaring@0.11.3 X X +rust-ini@0.21.3 X +rustc_version@0.4.1 X X +rustix@1.1.4 X X X +rustls@0.23.37 X X X +rustls-native-certs@0.8.3 X X X +rustls-pki-types@1.14.0 X X +rustls-platform-verifier@0.7.0 X X +rustls-platform-verifier-android@0.1.1 X X +rustls-webpki@0.103.13 X +rustversion@1.0.22 X X +ryu@1.0.23 X X +same-file@1.0.6 X X +schannel@0.1.29 X +scopeguard@1.2.0 X X +security-framework@3.7.0 X X +security-framework-sys@2.17.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde-big-array@0.5.1 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.149 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.21.0 X X +serde_with_macros@3.21.0 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simd_cesu8@1.1.1 X X +simdutf8@0.1.5 X X +slab@0.4.12 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.3 X X +stable_deref_trait@1.2.1 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.117 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +thiserror@2.0.18 X X +thiserror-impl@2.0.18 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.52.1 X +tokio-macros@2.7.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.18 X +tower@0.5.3 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.44 X +tracing-attributes@0.1.31 X +tracing-core@0.1.36 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.23 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typeid@1.0.3 X X +typenum@1.20.1 X X +typetag@0.2.21 X X +typetag-impl@0.2.21 X X +unicode-ident@1.0.24 X X X +universal-hash@0.5.1 X X +untrusted@0.9.0 X +url@2.5.8 X X +utf8_iter@1.0.4 X X +uuid@1.23.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.2+wasi-0.2.9 X X X +wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X +wasm-bindgen@0.2.114 X X +wasm-bindgen-futures@0.4.64 X X +wasm-bindgen-macro@0.2.114 X X +wasm-bindgen-macro-support@0.2.114 X X +wasm-bindgen-shared@0.2.114 X X +wasm-streams@0.5.0 X X +web-sys@0.3.91 X X +web-time@1.1.0 X X +webpki-root-certs@1.0.7 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.48.0 X X +windows-sys@0.52.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.48.5 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.48.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.48.5 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.48.5 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.48.5 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.48.5 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.48.5 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.48.5 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.51.0 X X X +writeable@0.6.2 X +xattr@1.6.1 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.47 X X X +zerocopy-derive@0.8.47 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.6.3 X +zmij@1.0.21 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/examples/pulse_dv_realdata.rs b/crates/catalog/rest/examples/pulse_dv_realdata.rs new file mode 100644 index 0000000000..3ed6fb3054 --- /dev/null +++ b/crates/catalog/rest/examples/pulse_dv_realdata.rs @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Real-data, cross-engine validation of V3 deletion-vector writes. +//! +//! Writes a `deletion-vector-v1` to a REAL Iceberg table via a REST catalog +//! (Polaris), so an INDEPENDENT engine (Doris / Spark / DuckDB) can read the +//! table back and confirm the deletes were applied. This is the gate before +//! opening the upstream RowDelta MoR DV-write PR (#2203). +//! +//! Prereqs: a clean V3 table with one data file and NO pre-existing deletion +//! vector (create it via Doris/Spark/DuckDB first), and a port-forward to +//! Polaris. Run from a host with S3 access for the warehouse bucket. +//! +//! ```bash +//! kubectl port-forward svc/polaris -n pulse-data 8181:8181 & +//! POLARIS_URI=http://localhost:8181/api/catalog \ +//! POLARIS_CREDENTIAL="$(kubectl get secret -n pulse-compute polaris-svc-spark \ +//! -o jsonpath='{.data.client-id}' | base64 -d):$(kubectl get secret -n \ +//! pulse-compute polaris-svc-spark -o jsonpath='{.data.client-secret}' | base64 -d)" \ +//! POLARIS_WAREHOUSE=bronze_dbnew \ +//! DV_NAMESPACE=zz_compactbench DV_TABLE=zz_rust_dv_test DV_DELETE_COUNT=3 \ +//! cargo run -p iceberg-catalog-rest --example pulse_dv_realdata +//! ``` +//! +//! Then verify in Doris: `SELECT COUNT(*) FROM bronze_dbnew.zz_compactbench.zz_rust_dv_test;` +//! should drop by `DV_DELETE_COUNT`. + +use std::collections::HashMap; +use std::sync::Arc; + +use iceberg::delete_vector::DeleteVector; +use iceberg::spec::DataContentType; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::{Catalog, CatalogBuilder, TableIdent}; +use iceberg_catalog_rest::RestCatalogBuilder; +use iceberg_storage_opendal::OpenDalResolvingStorageFactory; +use uuid::Uuid; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let uri = std::env::var("POLARIS_URI")?; + let credential = std::env::var("POLARIS_CREDENTIAL")?; + let warehouse = std::env::var("POLARIS_WAREHOUSE").unwrap_or_else(|_| "bronze_dbnew".into()); + let namespace = std::env::var("DV_NAMESPACE").unwrap_or_else(|_| "zz_compactbench".into()); + let table_name = std::env::var("DV_TABLE").unwrap_or_else(|_| "zz_rust_dv_test".into()); + let delete_count: u64 = std::env::var("DV_DELETE_COUNT") + .unwrap_or_else(|_| "3".into()) + .parse()?; + + // --- connect to Polaris (REST + OAuth2; S3 creds vended by the catalog) --- + let mut props = HashMap::new(); + props.insert("uri".to_string(), uri.clone()); + props.insert("warehouse".to_string(), warehouse); + props.insert("credential".to_string(), credential); + props.insert("scope".to_string(), "PRINCIPAL_ROLE:ALL".to_string()); + props.insert( + "oauth2-server-uri".to_string(), + format!("{}/v1/oauth/tokens", uri.trim_end_matches('/')), + ); + let catalog = RestCatalogBuilder::default() + .with_storage_factory(Arc::new(OpenDalResolvingStorageFactory::new())) + .load("polaris", props) + .await?; + + let ident = TableIdent::from_strs([namespace.as_str(), table_name.as_str()])?; + let table = catalog.load_table(&ident).await?; + println!( + "loaded {ident:?} (format_version={:?})", + table.metadata().format_version() + ); + + // --- find a live Data file in the current snapshot --- + let snapshot = table + .metadata() + .current_snapshot() + .ok_or("table has no current snapshot")?; + let manifest_list = table.manifest_list_reader(snapshot).load().await?; + let mut chosen = None; + for manifest_file in manifest_list.entries() { + let manifest = manifest_file.load_manifest(table.file_io()).await?; + for entry in manifest.entries() { + if entry.is_alive() && entry.data_file().content_type() == DataContentType::Data { + chosen = Some(entry.data_file().clone()); + break; + } + } + if chosen.is_some() { + break; + } + } + let data_file = chosen.ok_or("no live data file found in the table")?; + let referenced = data_file.file_path().to_string(); + let total_rows = data_file.record_count(); + println!("target data file: {referenced} ({total_rows} rows)"); + if delete_count > total_rows { + return Err(format!("DV_DELETE_COUNT {delete_count} > rows in file {total_rows}").into()); + } + + // --- build a DV deleting the first `delete_count` positions --- + let mut dv = DeleteVector::default(); + for pos in 0..delete_count { + dv.insert(pos); + } + + // --- write the DV to a Puffin file and build the PositionDeletes DataFile --- + let dv_location = format!( + "{}/data/rust-dv-{}.puffin", + table.metadata().location().trim_end_matches('/'), + Uuid::now_v7() + ); + let t_write_start = std::time::Instant::now(); + let dv_data_file = dv + .write_to_puffin_file( + table.file_io(), + dv_location.clone(), + referenced.clone(), + data_file.partition().clone(), + table.metadata().default_partition_spec_id(), + ) + .await?; + let t_write = t_write_start.elapsed(); + + // --- commit via RowDelta -> content=Deletes manifest + Operation::Delete --- + let t_commit_start = std::time::Instant::now(); + let tx = Transaction::new(&table); + let action = tx.row_delta().add_delete_files(vec![dv_data_file]); + let tx = action.apply(tx)?; + let updated = tx.commit(&catalog).await?; + let t_commit = t_commit_start.elapsed(); + println!( + "BENCH K={delete_count} write_to_puffin={:.3}s commit={:.3}s total={:.3}s", + t_write.as_secs_f64(), + t_commit.as_secs_f64(), + (t_write + t_commit).as_secs_f64() + ); + + println!( + "COMMITTED. new snapshot_id={:?}. Now verify with an independent engine \ + (Doris/Spark): COUNT(*) should be (previous count - {delete_count}).", + updated.metadata().current_snapshot_id() + ); + Ok(()) +} diff --git a/crates/examples/DEPENDENCIES.rust.tsv b/crates/examples/DEPENDENCIES.rust.tsv index a6c67ad6cc..5ab17eb6f9 100644 --- a/crates/examples/DEPENDENCIES.rust.tsv +++ b/crates/examples/DEPENDENCIES.rust.tsv @@ -1,319 +1,399 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -adler2@2.0.1 X X X -aead@0.5.2 X X -aes@0.8.4 X X -aes-gcm@0.10.3 X X -ahash@0.8.12 X X -aho-corasick@1.1.4 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -android_system_properties@0.1.5 X X -anyhow@1.0.102 X X -apache-avro@0.21.0 X -array-init@2.1.0 X X -arrow-arith@58.3.0 X -arrow-array@58.3.0 X X -arrow-buffer@58.3.0 X -arrow-cast@58.3.0 X -arrow-data@58.3.0 X -arrow-ipc@58.1.0 X -arrow-ord@58.3.0 X -arrow-schema@58.3.0 X -arrow-select@58.3.0 X -arrow-string@58.3.0 X -as-any@0.3.2 X X -async-lock@3.4.2 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.6.0 X -base64@0.22.1 X X -bigdecimal@0.4.10 X X -bimap@0.6.3 X X -bitflags@2.11.0 X X -block-buffer@0.10.4 X X -bnum@0.12.1 X X -bon@3.9.1 X X -bon-macros@3.9.1 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.20.2 X X -bytemuck@1.25.0 X X X -bytemuck_derive@1.10.2 X X X -byteorder@1.5.0 X X -bytes@1.11.1 X -cc@1.2.57 X X -cfg-if@1.0.4 X X -chrono@0.4.44 X X -cipher@0.4.4 X X -concurrent-queue@2.5.0 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.7 X X -ctr@0.9.2 X X -darling@0.20.11 X -darling@0.23.0 X -darling_core@0.20.11 X -darling_core@0.23.0 X -darling_macro@0.20.11 X -darling_macro@0.23.0 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.11 X -either@1.15.0 X X -equivalent@1.0.2 X X -erased-serde@0.4.10 X X -errno@0.3.14 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastnum@0.7.4 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.9 X X -flatbuffers@25.12.19 X -flate2@1.1.9 X X -fnv@1.0.7 X X -form_urlencoded@1.2.2 X X -futures@0.3.32 X X -futures-channel@0.3.32 X X -futures-core@0.3.32 X X -futures-executor@0.3.32 X X -futures-io@0.3.32 X X -futures-macro@0.3.32 X X -futures-sink@0.3.32 X X -futures-task@0.3.32 X X -futures-util@0.3.32 X X -generic-array@0.14.7 X -getrandom@0.2.17 X X -getrandom@0.3.4 X X -getrandom@0.4.2 X X -ghash@0.5.1 X X -gloo-timers@0.3.0 X X -h2@0.4.13 X -half@2.7.1 X X -hashbrown@0.16.1 X X -hashbrown@0.17.1 X X -heck@0.5.0 X X -http@1.4.0 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -hyper@1.8.1 X -hyper-util@0.1.20 X -iana-time-zone@0.1.65 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.9.0 X -iceberg-catalog-rest@0.9.0 X -iceberg-examples@0.9.0 X -iceberg_test_utils@0.9.0 X -icu_collections@2.1.1 X -icu_locale_core@2.1.1 X -icu_normalizer@2.1.1 X -icu_normalizer_data@2.1.1 X -icu_properties@2.1.2 X -icu_properties_data@2.1.2 X -icu_provider@2.1.1 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.13.0 X X -inout@0.1.4 X X -integer-encoding@3.0.4 X -inventory@0.3.22 X X -ipnet@2.12.0 X X -iri-string@0.7.11 X X -itertools@0.13.0 X X -itoa@1.0.18 X X -jobserver@0.1.34 X X -js-sys@0.3.91 X X -lazy_static@1.5.0 X X -lexical-core@1.0.6 X X -lexical-parse-float@1.0.6 X X -lexical-parse-integer@1.0.6 X X -lexical-util@1.0.7 X X -lexical-write-float@1.0.6 X X -lexical-write-integer@1.0.6 X X -libc@0.2.183 X X -libm@0.2.16 X -litemap@0.8.1 X -lock_api@0.4.14 X X -log@0.4.29 X X -lz4_flex@0.13.0 X -memchr@2.8.0 X X -miniz_oxide@0.8.9 X X X -mio@1.2.0 X -moka@0.12.15 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.3 X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-traits@0.2.19 X X -once_cell@1.21.4 X X -opaque-debug@0.3.1 X X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.5 X X -parking_lot_core@0.9.12 X X -parquet@58.1.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -pin-project-lite@0.2.17 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -polyval@0.6.2 X X -portable-atomic@1.13.1 X X -potential_utf@0.1.4 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.106 X X -quad-rand@0.2.3 X -quote@1.0.45 X X -r-efi@5.3.0 X X X -r-efi@6.0.0 X X X -rand@0.9.4 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.5 X X -redox_syscall@0.5.18 X -regex@1.12.3 X X -regex-automata@0.4.14 X X -regex-lite@0.1.9 X X -regex-syntax@0.8.10 X X -reqwest@0.12.28 X X -ring@0.17.14 X X -roaring@0.11.3 X X -rustc_version@0.4.1 X X -rustversion@1.0.22 X X -ryu@1.0.23 X X -scopeguard@1.2.0 X X -semver@1.0.27 X X -seq-macro@0.3.6 X X -serde@1.0.228 X X -serde-big-array@0.5.1 X X -serde_bytes@0.11.19 X X -serde_core@1.0.228 X X -serde_derive@1.0.228 X X -serde_json@1.0.149 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.21.0 X X -serde_with_macros@3.21.0 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -signal-hook-registry@1.4.8 X X -simd-adler32@0.3.8 X -simdutf8@0.1.5 X X -slab@0.4.12 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.3 X X -stable_deref_trait@1.2.1 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.117 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -thiserror@2.0.18 X X -thiserror-impl@2.0.18 X X -thread_local@1.1.9 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.2 X -tokio@1.52.1 X -tokio-macros@2.7.0 X -tokio-util@0.7.18 X -tower@0.5.3 X -tower-http@0.6.8 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.44 X -tracing-attributes@0.1.31 X -tracing-core@0.1.36 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.23 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typeid@1.0.3 X X -typenum@1.20.1 X X -typetag@0.2.21 X X -typetag-impl@0.2.21 X X -unicode-ident@1.0.24 X X X -universal-hash@0.5.1 X X -untrusted@0.9.0 X -url@2.5.8 X X -utf8_iter@1.0.4 X X -uuid@1.23.0 X X -version_check@0.9.5 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasip2@1.0.2+wasi-0.2.9 X X X -wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X -wasm-bindgen@0.2.114 X X -wasm-bindgen-futures@0.4.64 X X -wasm-bindgen-macro@0.2.114 X X -wasm-bindgen-macro-support@0.2.114 X X -wasm-bindgen-shared@0.2.114 X X -web-sys@0.3.91 X X -windows-core@0.62.2 X X -windows-implement@0.60.2 X X -windows-interface@0.59.3 X X -windows-link@0.2.1 X X -windows-result@0.4.1 X X -windows-strings@0.5.1 X X -windows-sys@0.52.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.2 X X -windows-targets@0.52.6 X X -windows-targets@0.53.5 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.1 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.1 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.1 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.1 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.1 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.1 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.1 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.1 X X -wit-bindgen@0.51.0 X X X -writeable@0.6.2 X -yoke@0.8.1 X -yoke-derive@0.8.1 X -zerocopy@0.8.47 X X X -zerocopy-derive@0.8.47 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.2 X X -zerotrie@0.2.3 X -zerovec@0.11.5 X -zerovec-derive@0.11.2 X -zlib-rs@0.6.3 X -zmij@1.0.21 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib +adler2@2.0.1 X X X +aead@0.5.2 X X +aes@0.8.4 X X +aes-gcm@0.10.3 X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +android_system_properties@0.1.5 X X +anyhow@1.0.102 X X +apache-avro@0.21.0 X +array-init@2.1.0 X X +arrow-arith@58.3.0 X +arrow-array@58.3.0 X X +arrow-buffer@58.3.0 X +arrow-cast@58.3.0 X +arrow-data@58.3.0 X +arrow-ipc@58.1.0 X +arrow-ord@58.3.0 X +arrow-schema@58.3.0 X +arrow-select@58.3.0 X +arrow-string@58.3.0 X +as-any@0.3.2 X X +async-lock@3.4.2 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-lc-rs@1.16.2 X X +aws-lc-sys@0.39.0 X X X X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.10 X X +bimap@0.6.3 X X +bitflags@2.11.0 X X +block-buffer@0.10.4 X X +block-buffer@0.12.0 X X +bnum@0.12.1 X X +bon@3.9.1 X X +bon-macros@3.9.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.20.2 X X +bytemuck@1.25.0 X X X +bytemuck_derive@1.10.2 X X X +byteorder@1.5.0 X X +bytes@1.11.1 X +cc@1.2.57 X X +cfg-if@1.0.4 X X +chrono@0.4.44 X X +cipher@0.4.4 X X +cmake@0.1.57 X X +combine@4.6.7 X +concurrent-queue@2.5.0 X X +const-oid@0.10.2 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +crypto-common@0.2.2 X X +ctor@1.0.7 X X +ctr@0.9.2 X X +darling@0.20.11 X +darling@0.23.0 X +darling_core@0.20.11 X +darling_core@0.23.0 X +darling_macro@0.20.11 X +darling_macro@0.23.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +digest@0.11.3 X X +displaydoc@0.2.5 X X +dissimilar@1.0.11 X +dlv-list@0.5.2 X X +dunce@1.0.5 X X X +either@1.15.0 X X +equivalent@1.0.2 X X +erased-serde@0.4.10 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastnum@0.7.4 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.9 X X +flatbuffers@25.12.19 X +flate2@1.1.9 X X +fnv@1.0.7 X X +form_urlencoded@1.2.2 X X +fs_extra@1.3.0 X +futures@0.3.32 X X +futures-channel@0.3.32 X X +futures-core@0.3.32 X X +futures-executor@0.3.32 X X +futures-io@0.3.32 X X +futures-macro@0.3.32 X X +futures-sink@0.3.32 X X +futures-task@0.3.32 X X +futures-util@0.3.32 X X +generic-array@0.14.7 X +getrandom@0.2.17 X X +getrandom@0.3.4 X X +getrandom@0.4.2 X X +ghash@0.5.1 X X +gloo-timers@0.3.0 X X +h2@0.4.13 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.16.1 X X +hashbrown@0.17.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +hybrid-array@0.4.12 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.20 X +iana-time-zone@0.1.65 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.9.0 X +iceberg-catalog-rest@0.9.0 X +iceberg-examples@0.9.0 X +iceberg-storage-opendal@0.9.0 X +iceberg_test_utils@0.9.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.2 X +icu_properties_data@2.1.2 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.13.0 X X +inout@0.1.4 X X +integer-encoding@3.0.4 X +inventory@0.3.22 X X +ipnet@2.12.0 X X +iri-string@0.7.11 X X +itertools@0.13.0 X X +itoa@1.0.18 X X +jiff@0.2.23 X X +jiff-tzdb@0.1.6 X X +jiff-tzdb-platform@0.1.3 X X +jni@0.22.4 X X +jni-macros@0.22.4 X X +jni-sys@0.4.1 X X +jni-sys-macros@0.4.1 X X +jobserver@0.1.34 X X +js-sys@0.3.91 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.183 X X +libm@0.2.16 X +link-section@0.18.1 X X +linktime-proc-macro@0.2.0 X X +linux-raw-sys@0.12.1 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.13.0 X +md-5@0.11.0 X X +mea@0.6.3 X +memchr@2.8.0 X X +miniz_oxide@0.8.9 X X X +mio@1.2.0 X +moka@0.12.15 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +once_cell@1.21.4 X X +opaque-debug@0.3.1 X X +opendal@0.57.0 X +opendal-core@0.57.0 X +opendal-layer-concurrent-limit@0.57.0 X +opendal-layer-logging@0.57.0 X +opendal-layer-retry@0.57.0 X +opendal-layer-timeout@0.57.0 X +opendal-service-fs@0.57.0 X +opendal-service-s3@0.57.0 X +openssl-probe@0.2.1 X X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +ordered-multimap@0.7.3 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@58.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +pin-project-lite@0.2.17 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +polyval@0.6.2 X X +portable-atomic@1.13.1 X X +portable-atomic-util@0.2.6 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro2@1.0.106 X X +quad-rand@0.2.3 X +quick-xml@0.39.4 X +quote@1.0.45 X X +r-efi@5.3.0 X X X +r-efi@6.0.0 X X X +rand@0.9.4 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.5 X X +redox_syscall@0.5.18 X +regex@1.12.3 X X +regex-automata@0.4.14 X X +regex-lite@0.1.9 X X +regex-syntax@0.8.10 X X +reqsign-aws-v4@3.0.0 X +reqsign-core@3.0.0 X +reqsign-file-read-tokio@3.0.0 X +reqwest@0.12.28 X X +reqwest@0.13.3 X X +ring@0.17.14 X X +roaring@0.11.3 X X +rust-ini@0.21.3 X +rustc_version@0.4.1 X X +rustix@1.1.4 X X X +rustls@0.23.37 X X X +rustls-native-certs@0.8.3 X X X +rustls-pki-types@1.14.0 X X +rustls-platform-verifier@0.7.0 X X +rustls-platform-verifier-android@0.1.1 X X +rustls-webpki@0.103.13 X +rustversion@1.0.22 X X +ryu@1.0.23 X X +same-file@1.0.6 X X +schannel@0.1.29 X +scopeguard@1.2.0 X X +security-framework@3.7.0 X X +security-framework-sys@2.17.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde-big-array@0.5.1 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.149 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.21.0 X X +serde_with_macros@3.21.0 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +signal-hook-registry@1.4.8 X X +simd-adler32@0.3.8 X +simd_cesu8@1.1.1 X X +simdutf8@0.1.5 X X +slab@0.4.12 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.3 X X +stable_deref_trait@1.2.1 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.117 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +thiserror@2.0.18 X X +thiserror-impl@2.0.18 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.52.1 X +tokio-macros@2.7.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.18 X +tower@0.5.3 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.44 X +tracing-attributes@0.1.31 X +tracing-core@0.1.36 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.23 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typeid@1.0.3 X X +typenum@1.20.1 X X +typetag@0.2.21 X X +typetag-impl@0.2.21 X X +unicode-ident@1.0.24 X X X +universal-hash@0.5.1 X X +untrusted@0.9.0 X +url@2.5.8 X X +utf8_iter@1.0.4 X X +uuid@1.23.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.2+wasi-0.2.9 X X X +wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X +wasm-bindgen@0.2.114 X X +wasm-bindgen-futures@0.4.64 X X +wasm-bindgen-macro@0.2.114 X X +wasm-bindgen-macro-support@0.2.114 X X +wasm-bindgen-shared@0.2.114 X X +wasm-streams@0.5.0 X X +web-sys@0.3.91 X X +web-time@1.1.0 X X +webpki-root-certs@1.0.7 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.48.0 X X +windows-sys@0.52.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.48.5 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.48.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.48.5 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.48.5 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.48.5 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.48.5 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.48.5 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.48.5 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.51.0 X X X +writeable@0.6.2 X +xattr@1.6.1 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.47 X X X +zerocopy-derive@0.8.47 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.6.3 X +zmij@1.0.21 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 9353a31842..a0af5dbd6a 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -52,6 +52,7 @@ base64 = { workspace = true } bimap = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } +crc32fast = { workspace = true } derive_builder = { workspace = true } expect-test = { workspace = true } fastnum = { workspace = true } diff --git a/crates/iceberg/public-api.txt b/crates/iceberg/public-api.txt index 653649e6cf..9592dc195f 100644 --- a/crates/iceberg/public-api.txt +++ b/crates/iceberg/public-api.txt @@ -169,6 +169,30 @@ impl serde_core::ser::Serialize for iceberg::compression::CompressionCodec pub fn iceberg::compression::CompressionCodec::serialize(&self, serializer: S) -> core::result::Result<::Ok, ::Error> impl<'de> serde_core::de::Deserialize<'de> for iceberg::compression::CompressionCodec pub fn iceberg::compression::CompressionCodec::deserialize>(deserializer: D) -> core::result::Result::Error> +pub mod iceberg::delete_vector +pub struct iceberg::delete_vector::DeleteVector +impl iceberg::delete_vector::DeleteVector +pub fn iceberg::delete_vector::DeleteVector::from_puffin_blob(blob: iceberg::puffin::Blob) -> iceberg::Result +pub fn iceberg::delete_vector::DeleteVector::insert(&mut self, pos: u64) -> bool +pub fn iceberg::delete_vector::DeleteVector::insert_positions(&mut self, positions: &[u64]) -> iceberg::Result +pub fn iceberg::delete_vector::DeleteVector::is_empty(&self) -> bool +pub fn iceberg::delete_vector::DeleteVector::iter(&self) -> iceberg::delete_vector::DeleteVectorIterator<'_> +pub fn iceberg::delete_vector::DeleteVector::len(&self) -> u64 +pub fn iceberg::delete_vector::DeleteVector::new(roaring_treemap: roaring::treemap::RoaringTreemap) -> iceberg::delete_vector::DeleteVector +pub fn iceberg::delete_vector::DeleteVector::to_puffin_blob(&self, properties: std::collections::hash::map::HashMap) -> iceberg::Result +pub async fn iceberg::delete_vector::DeleteVector::write_to_puffin_file(&self, file_io: &iceberg::io::FileIO, location: alloc::string::String, referenced_data_file: alloc::string::String, partition: iceberg::spec::Struct, partition_spec_id: i32) -> iceberg::Result +impl core::default::Default for iceberg::delete_vector::DeleteVector +pub fn iceberg::delete_vector::DeleteVector::default() -> iceberg::delete_vector::DeleteVector +impl core::fmt::Debug for iceberg::delete_vector::DeleteVector +pub fn iceberg::delete_vector::DeleteVector::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +impl core::ops::bit::BitOrAssign for iceberg::delete_vector::DeleteVector +pub fn iceberg::delete_vector::DeleteVector::bitor_assign(&mut self, other: Self) +pub struct iceberg::delete_vector::DeleteVectorIterator<'a> +impl iceberg::delete_vector::DeleteVectorIterator<'_> +pub fn iceberg::delete_vector::DeleteVectorIterator<'_>::advance_to(&mut self, pos: u64) +impl core::iter::traits::iterator::Iterator for iceberg::delete_vector::DeleteVectorIterator<'_> +pub type iceberg::delete_vector::DeleteVectorIterator<'_>::Item = u64 +pub fn iceberg::delete_vector::DeleteVectorIterator<'_>::next(&mut self) -> core::option::Option pub mod iceberg::encryption pub mod iceberg::encryption::kms pub struct iceberg::encryption::kms::GeneratedKey @@ -1210,10 +1234,18 @@ impl iceberg::puffin::PuffinReader pub async fn iceberg::puffin::PuffinReader::blob(&self, blob_metadata: &iceberg::puffin::BlobMetadata) -> iceberg::Result pub async fn iceberg::puffin::PuffinReader::file_metadata(&self) -> iceberg::Result<&iceberg::puffin::FileMetadata> pub fn iceberg::puffin::PuffinReader::new(input_file: iceberg::io::InputFile) -> Self +pub struct iceberg::puffin::PuffinWriteResult +pub iceberg::puffin::PuffinWriteResult::blobs_metadata: alloc::vec::Vec +pub iceberg::puffin::PuffinWriteResult::file_size_in_bytes: u64 +impl core::clone::Clone for iceberg::puffin::PuffinWriteResult +pub fn iceberg::puffin::PuffinWriteResult::clone(&self) -> iceberg::puffin::PuffinWriteResult +impl core::fmt::Debug for iceberg::puffin::PuffinWriteResult +pub fn iceberg::puffin::PuffinWriteResult::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result pub struct iceberg::puffin::PuffinWriter impl iceberg::puffin::PuffinWriter pub async fn iceberg::puffin::PuffinWriter::add(&mut self, blob: iceberg::puffin::Blob, compression_codec: iceberg::compression::CompressionCodec) -> iceberg::Result<()> pub async fn iceberg::puffin::PuffinWriter::close(self) -> iceberg::Result<()> +pub async fn iceberg::puffin::PuffinWriter::close_with_metadata(self) -> iceberg::Result pub async fn iceberg::puffin::PuffinWriter::new(output_file: &iceberg::io::OutputFile, properties: std::collections::hash::map::HashMap, compress_footer: bool) -> iceberg::Result pub const iceberg::puffin::APACHE_DATASKETCHES_THETA_V1: &str pub const iceberg::puffin::CREATED_BY_PROPERTY: &str @@ -3082,6 +3114,7 @@ pub fn iceberg::transaction::Transaction::expire_snapshots(&self) -> iceberg::tr pub fn iceberg::transaction::Transaction::fast_append(&self) -> iceberg::transaction::append::FastAppendAction pub fn iceberg::transaction::Transaction::new(table: &iceberg::table::Table) -> Self pub fn iceberg::transaction::Transaction::replace_sort_order(&self) -> iceberg::transaction::sort_order::ReplaceSortOrderAction +pub fn iceberg::transaction::Transaction::row_delta(&self) -> iceberg::transaction::row_delta::RowDeltaAction pub fn iceberg::transaction::Transaction::update_location(&self) -> iceberg::transaction::update_location::UpdateLocationAction pub fn iceberg::transaction::Transaction::update_schema(&self) -> iceberg::transaction::update_schema::UpdateSchemaAction pub fn iceberg::transaction::Transaction::update_statistics(&self) -> iceberg::transaction::update_statistics::UpdateStatisticsAction diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index df8a10193c..1ad074a731 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -15,20 +15,42 @@ // specific language governing permissions and limitations // under the License. +//! Iceberg V3 deletion vectors (`deletion-vector-v1`): a roaring-bitmap-backed set +//! of deleted row positions, serialized to and from Puffin blobs and files. + +use std::collections::HashMap; +use std::io::Cursor; use std::ops::BitOrAssign; +use crc32fast::Hasher; use roaring::RoaringTreemap; use roaring::bitmap::Iter; use roaring::treemap::BitmapIter; +use crate::io::FileIO; +use crate::puffin::{Blob, CompressionCodec, DELETION_VECTOR_V1, PuffinWriter}; +use crate::spec::{DataContentType, DataFile, DataFileBuilder, DataFileFormat, Struct}; use crate::{Error, ErrorKind, Result}; +/// Iceberg `deletion-vector-v1` Puffin blob magic bytes (Iceberg Puffin spec; +/// ported from risingwavelabs/iceberg-rust #113 — design reference only). +const DELETION_VECTOR_MAGIC_BYTES: [u8; 4] = [0xD1, 0xD3, 0x39, 0x64]; +/// Minimum blob size: u32 length (4) + magic (4) + u32 crc (4). +const MIN_SERIALIZED_DELETION_VECTOR_BLOB: usize = 12; +/// Puffin blob property: deletion vector cardinality (number of deleted positions). +pub(crate) const DELETION_VECTOR_PROPERTY_CARDINALITY: &str = "cardinality"; +/// Puffin blob property: referenced data file path the DV applies to. +pub(crate) const DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE: &str = "referenced-data-file"; + +/// A set of deleted row positions backed by a 64-bit roaring bitmap — the in-memory +/// form of an Iceberg V3 `deletion-vector-v1`. #[derive(Debug, Default)] pub struct DeleteVector { inner: RoaringTreemap, } impl DeleteVector { + /// Creates a delete vector that wraps an existing roaring treemap of positions. #[allow(unused)] pub fn new(roaring_treemap: RoaringTreemap) -> DeleteVector { DeleteVector { @@ -36,11 +58,13 @@ impl DeleteVector { } } + /// Returns an iterator over the deleted row positions in ascending order. pub fn iter(&self) -> DeleteVectorIterator<'_> { let outer = self.inner.bitmaps(); DeleteVectorIterator { outer, inner: None } } + /// Marks row position `pos` as deleted; returns `true` if it was newly added. pub fn insert(&mut self, pos: u64) -> bool { self.inner.insert(pos) } @@ -64,10 +88,210 @@ impl DeleteVector { Ok(positions.len()) } + /// Returns the number of deleted row positions. #[allow(unused)] pub fn len(&self) -> u64 { self.inner.len() } + + /// Returns `true` if there are no deleted positions in this vector. + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Serialize this delete vector into an Iceberg V3 `deletion-vector-v1` Puffin blob. + /// + /// Blob layout (Iceberg Puffin spec): `length(u32 BE = magic + bitmap) ‖ magic ‖ + /// portable 64-bit RoaringTreemap ‖ crc32(u32 BE over magic + bitmap)`. + /// `properties` must contain `cardinality` + `referenced-data-file`. + /// Ported from risingwavelabs/iceberg-rust #113 (design reference only). + pub fn to_puffin_blob(&self, properties: HashMap) -> Result { + Self::check_properties(&properties)?; + + let serialized_bitmap_size = self.inner.serialized_size(); + let combined_length = (DELETION_VECTOR_MAGIC_BYTES.len() + serialized_bitmap_size) as u32; + let mut data = Vec::with_capacity( + std::mem::size_of_val(&combined_length) + + DELETION_VECTOR_MAGIC_BYTES.len() + + serialized_bitmap_size + + 4, + ); + + data.extend_from_slice(&combined_length.to_be_bytes()); + data.extend_from_slice(&DELETION_VECTOR_MAGIC_BYTES); + + let bitmap_start = data.len(); + data.resize(bitmap_start + serialized_bitmap_size, 0); + { + let mut cursor = Cursor::new(&mut data[bitmap_start..]); + self.inner.serialize_into(&mut cursor).map_err(|err| { + Error::new( + ErrorKind::Unexpected, + "failed to serialize deletion vector bitmap".to_string(), + ) + .with_source(err) + })?; + } + + let mut hasher = Hasher::new(); + hasher.update(&data[4..]); + let crc = hasher.finalize(); + data.extend_from_slice(&crc.to_be_bytes()); + + Ok(Blob::builder() + .r#type(DELETION_VECTOR_V1.to_string()) + .fields(vec![]) + .snapshot_id(-1) + .sequence_number(-1) + .data(data) + .properties(properties) + .build()) + } + + /// Deserialize a delete vector from an Iceberg `deletion-vector-v1` Puffin blob. + pub fn from_puffin_blob(blob: Blob) -> Result { + if blob.blob_type() != DELETION_VECTOR_V1 { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("unsupported puffin blob type: {}", blob.blob_type()), + )); + } + + let data = blob.data(); + if data.len() < MIN_SERIALIZED_DELETION_VECTOR_BLOB { + return Err(Error::new( + ErrorKind::DataInvalid, + "serialized deletion vector blob too small".to_string(), + )); + } + + let magic = &data[4..8]; + if magic != DELETION_VECTOR_MAGIC_BYTES { + return Err(Error::new( + ErrorKind::DataInvalid, + "invalid deletion vector magic bytes".to_string(), + )); + } + + let combined_length = u32::from_be_bytes([data[0], data[1], data[2], data[3]]); + let expected_len = std::mem::size_of_val(&combined_length) + combined_length as usize + 4; + if expected_len != data.len() { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "serialized deletion vector length mismatch: expected {expected_len}, actual {}", + data.len() + ), + )); + } + + let bitmap_end = data.len() - 4; + let bitmap_data = &data[8..bitmap_end]; + + let mut hasher = Hasher::new(); + hasher.update(&data[4..bitmap_end]); + let expected_crc = hasher.finalize(); + let stored_crc = u32::from_be_bytes([ + data[data.len() - 4], + data[data.len() - 3], + data[data.len() - 2], + data[data.len() - 1], + ]); + if expected_crc != stored_crc { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("deletion vector crc mismatch: expected {expected_crc}, got {stored_crc}"), + )); + } + + let bitmap = + RoaringTreemap::deserialize_from(&mut Cursor::new(bitmap_data)).map_err(|err| { + Error::new( + ErrorKind::DataInvalid, + "failed to deserialize deletion vector bitmap".to_string(), + ) + .with_source(err) + })?; + + Ok(DeleteVector::new(bitmap)) + } + + fn check_properties(properties: &HashMap) -> Result<()> { + if !properties.contains_key(DELETION_VECTOR_PROPERTY_CARDINALITY) { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "deletion vector blob missing required property: {DELETION_VECTOR_PROPERTY_CARDINALITY}" + ), + )); + } + if !properties.contains_key(DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE) { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "deletion vector blob missing required property: {DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE}" + ), + )); + } + Ok(()) + } + + /// Write this delete vector to a `deletion-vector-v1` Puffin file at `location` and + /// return the V3 `DataFile{content=PositionDeletes, …}` to feed + /// `RowDeltaAction::add_delete_files`. Connects DV serialization → Puffin file → + /// delete-file metadata (offset/length) in one step (cf. RW `deletion_vector_writer.rs`). + pub async fn write_to_puffin_file( + &self, + file_io: &FileIO, + location: String, + referenced_data_file: String, + partition: Struct, + partition_spec_id: i32, + ) -> Result { + let cardinality = self.len(); + let properties = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + cardinality.to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + referenced_data_file.clone(), + ), + ]); + let blob = self.to_puffin_blob(properties)?; + + let output_file = file_io.new_output(&location)?; + let mut writer = PuffinWriter::new(&output_file, HashMap::new(), false).await?; + writer.add(blob, CompressionCodec::None).await?; + let result = writer.close_with_metadata().await?; + let file_size = result.file_size_in_bytes; + let blob_metadata = result.blobs_metadata.first().ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + "puffin metadata is empty after writing deletion vector", + ) + })?; + + DataFileBuilder::default() + .content(DataContentType::PositionDeletes) + .file_path(location) + .file_format(DataFileFormat::Puffin) + .partition(partition) + .partition_spec_id(partition_spec_id) + .record_count(cardinality) + .file_size_in_bytes(file_size) + .referenced_data_file(Some(referenced_data_file)) + .content_offset(Some(blob_metadata.offset() as i64)) + .content_size_in_bytes(Some(blob_metadata.length() as i64)) + .build() + .map_err(|err| { + Error::new( + ErrorKind::DataInvalid, + format!("failed to build deletion vector data file: {err}"), + ) + }) + } } // Ideally, we'd just wrap `roaring::RoaringTreemap`'s iterator, `roaring::treemap::Iter` here. @@ -76,6 +300,7 @@ impl DeleteVector { // There is a PR open on roaring to add this (https://github.com/RoaringBitmap/roaring-rs/pull/314) // and if that gets merged then we can simplify `DeleteVectorIterator` here, refactoring `advance_to` // to just a wrapper around the underlying iterator's method. +/// Iterator over the deleted row positions of a [`DeleteVector`], in ascending order. pub struct DeleteVectorIterator<'a> { // NB: `BitMapIter` was only exposed publicly in https://github.com/RoaringBitmap/roaring-rs/pull/316 // which is not yet released. As a consequence our Cargo.toml temporarily uses a git reference for @@ -113,6 +338,7 @@ impl Iterator for DeleteVectorIterator<'_> { } impl DeleteVectorIterator<'_> { + /// Advances the iterator so the next yielded position is `>= pos`. pub fn advance_to(&mut self, pos: u64) { let hi = (pos >> 32) as u32; let lo = pos as u32; @@ -198,4 +424,319 @@ mod tests { let res = dv.insert_positions(&positions); assert!(res.is_err()); } + + fn dv_props() -> HashMap { + HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "0".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "s3://bucket/data/f.parquet".to_string(), + ), + ]) + } + + /// Self round-trip: serialize → Puffin blob → deserialize recovers the positions, + /// validating the frame (length, magic, crc) and serialize/deserialize symmetry. + #[test] + fn test_dv_puffin_blob_roundtrip() { + let positions = [1u64, 5, 42, 100, 1 << 33, (1u64 << 33) + 7]; + let mut dv = DeleteVector::default(); + for p in positions { + dv.insert(p); + } + let blob = dv.to_puffin_blob(dv_props()).unwrap(); + assert_eq!(blob.blob_type(), DELETION_VECTOR_V1); + + let restored = DeleteVector::from_puffin_blob(blob).unwrap(); + let mut got: Vec = restored.iter().collect(); + got.sort(); + assert_eq!(got, positions.to_vec()); + } + + /// Spark-compatibility proxy: parse the serialized bitmap with the EXACT algorithm + /// pyiceberg's `_deserialize_bitmap` uses — `[u64 LE bucket count]` then per bucket + /// `[u32 LE high-key + 32-bit portable RoaringBitmap]`. If this recovers the + /// positions, the bytes are Iceberg/Spark portable (no Spark needed for the signal). + #[test] + fn test_dv_blob_is_iceberg_portable() { + use std::io::Read; + + use roaring::RoaringBitmap; + + let positions = [3u64, 7, 100, (1u64 << 33) + 5]; + let mut dv = DeleteVector::default(); + for p in positions { + dv.insert(p); + } + let blob = dv.to_puffin_blob(dv_props()).unwrap(); + let data = blob.data(); + + // Frame (certain): [u32 BE len][magic][bitmap][u32 BE crc] + assert_eq!(&data[4..8], &DELETION_VECTOR_MAGIC_BYTES); + let bitmap = &data[8..data.len() - 4]; + + // pyiceberg portable parse + let mut cur = Cursor::new(bitmap); + let mut count_buf = [0u8; 8]; + cur.read_exact(&mut count_buf).unwrap(); + let n_buckets = u64::from_le_bytes(count_buf); + + let mut recovered: Vec = Vec::new(); + for _ in 0..n_buckets { + let mut key_buf = [0u8; 4]; + cur.read_exact(&mut key_buf).unwrap(); + let hi = u32::from_le_bytes(key_buf) as u64; + let bm = RoaringBitmap::deserialize_from(&mut cur).unwrap(); + for lo in bm.iter() { + recovered.push((hi << 32) | u64::from(lo)); + } + } + recovered.sort(); + assert_eq!( + recovered, + positions.to_vec(), + "serialized bitmap is NOT Iceberg-portable — roaring serialize_into header \ + differs from pyiceberg layout; switch to hand-rolled portable framing" + ); + } + + /// Piece 2 — full Puffin-FILE round-trip in Rust: write a DV blob to a real + /// Puffin file via `PuffinWriter`, read it back via `PuffinReader`, and recover + /// the deleted positions. Proves the Puffin file framing, not just the blob bytes. + #[tokio::test] + async fn test_dv_puffin_file_roundtrip() { + use tempfile::TempDir; + + use crate::io::FileIO; + use crate::puffin::{CompressionCodec, PuffinReader, PuffinWriter}; + + let positions = [2u64, 9, 256, (1u64 << 33) + 11]; + let mut dv = DeleteVector::default(); + for p in positions { + dv.insert(p); + } + assert!(!dv.is_empty()); + + let mut props = dv_props(); + props.insert( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + dv.len().to_string(), + ); + let blob = dv.to_puffin_blob(props).unwrap(); + + let tmp = TempDir::new().unwrap(); + let path_buf = tmp.path().join("dv.puffin"); + let path = path_buf.to_str().unwrap(); + + let file_io = FileIO::new_with_fs(); + let output = file_io.new_output(path).unwrap(); + let mut writer = PuffinWriter::new(&output, HashMap::new(), false) + .await + .unwrap(); + writer.add(blob, CompressionCodec::None).await.unwrap(); + writer.close().await.unwrap(); + + let input = output.to_input_file(); + let reader = PuffinReader::new(input); + let meta = reader.file_metadata().await.unwrap().clone(); + assert_eq!(meta.blobs.len(), 1); + let read_blob = reader.blob(meta.blobs.first().unwrap()).await.unwrap(); + + let restored = DeleteVector::from_puffin_blob(read_blob).unwrap(); + let mut got: Vec = restored.iter().collect(); + got.sort(); + assert_eq!(got, positions.to_vec()); + } + + /// Piece 2.5 — glue: write a DV to a Puffin file and get back a V3 + /// `DataFile{PositionDeletes}` (offset/size/referenced-file filled), then read the + /// written file back and recover the positions. + #[tokio::test] + async fn test_dv_write_to_puffin_file() { + use tempfile::TempDir; + + use crate::puffin::PuffinReader; + + let positions = [4u64, 11, 512, (1u64 << 33) + 3]; + let mut dv = DeleteVector::default(); + for p in positions { + dv.insert(p); + } + + let tmp = TempDir::new().unwrap(); + let path_buf = tmp.path().join("dv2.puffin"); + let location = path_buf.to_str().unwrap().to_string(); + let file_io = FileIO::new_with_fs(); + + let data_file = dv + .write_to_puffin_file( + &file_io, + location.clone(), + "s3://bucket/data/x.parquet".to_string(), + Struct::empty(), + 0, + ) + .await + .unwrap(); + + assert_eq!(data_file.content_type(), DataContentType::PositionDeletes); + assert_eq!( + data_file.referenced_data_file().as_deref(), + Some("s3://bucket/data/x.parquet") + ); + assert!(data_file.content_offset().is_some()); + assert!(data_file.content_size_in_bytes().is_some()); + + // The written Puffin file reads back to the same positions. + let input = file_io.new_input(&location).unwrap(); + let reader = PuffinReader::new(input); + let meta = reader.file_metadata().await.unwrap().clone(); + let blob = reader.blob(meta.blobs.first().unwrap()).await.unwrap(); + let restored = DeleteVector::from_puffin_blob(blob).unwrap(); + let mut got: Vec = restored.iter().collect(); + got.sort(); + assert_eq!(got, positions.to_vec()); + } + + /// Cross-implementation byte-parity with Apache Iceberg-Java: the serialized + /// `deletion-vector-v1` payload for positions {1,3,5,7,9} must be byte-identical + /// to the Java-produced golden fixture (lifted from apache/iceberg test resources + /// `small-alternating-values-position-index.bin` via apache/iceberg-go). Proves our + /// roaring serialization + framing exactly match the Iceberg-Java reference. + #[test] + fn test_dv_payload_byte_identical_to_java_golden() { + let golden: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/puffin/deletion-vector-v1-payload.bin" + )); + + let mut dv = DeleteVector::default(); + for p in [1u64, 3, 5, 7, 9] { + dv.insert(p); + } + let props = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "5".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "data/test.parquet".to_string(), + ), + ]); + let blob = dv.to_puffin_blob(props).unwrap(); + + assert_eq!( + blob.data(), + golden, + "DV payload must be byte-identical to the apache/iceberg Java golden fixture" + ); + } + + /// Empty deletion vector round-trips (mirrors iceberg-go `TestSerializeDVEmpty`). + #[test] + fn test_dv_empty_roundtrip() { + let dv = DeleteVector::default(); + let props = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "0".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "data/empty.parquet".to_string(), + ), + ]); + let blob = dv.to_puffin_blob(props).unwrap(); + let restored = DeleteVector::from_puffin_blob(blob).unwrap(); + assert!(restored.is_empty()); + assert_eq!(restored.len(), 0); + } + + /// Positions straddling the 2^31 (Java-signed) and 2^32 (roaring bucket) + /// boundaries round-trip (mirrors iceberg-go `TestSerializeDVLargePositions`). + #[test] + fn test_dv_boundary_positions_roundtrip() { + let positions = [100u64, 101, 2_147_483_747, 2_147_483_748, (1u64 << 32) | 42]; + let mut dv = DeleteVector::default(); + for p in positions { + dv.insert(p); + } + let props = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "5".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "data/boundary.parquet".to_string(), + ), + ]); + let blob = dv.to_puffin_blob(props).unwrap(); + let restored = DeleteVector::from_puffin_blob(blob).unwrap(); + let mut got: Vec = restored.iter().collect(); + got.sort(); + assert_eq!(got, positions.to_vec()); + } + + /// Byte-parity with Apache Iceberg-Java: the **empty** DV payload + /// (`empty-position-index.bin` from apache/iceberg test resources). + #[test] + fn test_dv_payload_byte_identical_to_java_empty() { + let golden: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/puffin/empty-position-index.bin" + )); + let dv = DeleteVector::default(); + let props = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "0".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "data/test.parquet".to_string(), + ), + ]); + let blob = dv.to_puffin_blob(props).unwrap(); + assert_eq!( + blob.data(), + golden, + "empty DV payload must be byte-identical to the Java golden fixture" + ); + } + + /// Byte-parity with Apache Iceberg-Java: small + large positions spanning two + /// 16-bit roaring containers — {100, 101, INT_MAX+100, INT_MAX+101} per + /// `small-and-large-values-position-index.bin` from apache/iceberg test resources. + #[test] + fn test_dv_payload_byte_identical_to_java_small_and_large() { + let golden: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/puffin/small-and-large-values-position-index.bin" + )); + let mut dv = DeleteVector::default(); + for p in [100u64, 101, 2_147_483_747, 2_147_483_748] { + dv.insert(p); + } + let props = HashMap::from([ + ( + DELETION_VECTOR_PROPERTY_CARDINALITY.to_string(), + "4".to_string(), + ), + ( + DELETION_VECTOR_PROPERTY_REFERENCED_DATA_FILE.to_string(), + "data/test.parquet".to_string(), + ), + ]); + let blob = dv.to_puffin_blob(props).unwrap(); + assert_eq!( + blob.data(), + golden, + "small+large DV payload must be byte-identical to the Java golden fixture" + ); + } } diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 4e346460f5..4fd02f79e7 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -98,7 +98,7 @@ pub mod encryption; pub mod test_utils; pub mod writer; -mod delete_vector; +pub mod delete_vector; pub mod metadata_columns; pub mod puffin; /// Utility functions and modules. diff --git a/crates/iceberg/src/puffin/mod.rs b/crates/iceberg/src/puffin/mod.rs index 0e054cac51..a660e25b59 100644 --- a/crates/iceberg/src/puffin/mod.rs +++ b/crates/iceberg/src/puffin/mod.rs @@ -51,7 +51,7 @@ mod reader; pub use reader::PuffinReader; mod writer; -pub use writer::PuffinWriter; +pub use writer::{PuffinWriteResult, PuffinWriter}; #[cfg(test)] mod test_utils; diff --git a/crates/iceberg/src/puffin/writer.rs b/crates/iceberg/src/puffin/writer.rs index 4af4970b04..d77c533be0 100644 --- a/crates/iceberg/src/puffin/writer.rs +++ b/crates/iceberg/src/puffin/writer.rs @@ -26,6 +26,16 @@ use crate::io::{FileWrite, OutputFile}; use crate::puffin::blob::Blob; use crate::puffin::metadata::{BlobMetadata, FileMetadata, Flag}; +/// Result of finalizing a Puffin file: total bytes written + per-blob metadata +/// (offsets/lengths), needed to build delete-file `DataFile`s for MoR commits. +#[derive(Debug, Clone)] +pub struct PuffinWriteResult { + /// Total size of the written Puffin file in bytes. + pub file_size_in_bytes: u64, + /// Metadata (incl. offset + length) for each blob written into the file. + pub blobs_metadata: Vec, +} + /// Puffin writer pub struct PuffinWriter { writer: Box, @@ -87,12 +97,21 @@ impl PuffinWriter { Ok(()) } - /// Finalizes the Puffin file - pub async fn close(mut self) -> Result<()> { + /// Finalizes the Puffin file. + pub async fn close(self) -> Result<()> { + self.close_with_metadata().await.map(|_| ()) + } + + /// Finalizes the Puffin file and returns the written size + per-blob metadata + /// (offsets/lengths) — needed to build a `DataFile` for an added MoR delete file. + pub async fn close_with_metadata(mut self) -> Result { self.write_header_once().await?; self.write_footer().await?; self.writer.close().await?; - Ok(()) + Ok(PuffinWriteResult { + file_size_in_bytes: self.num_bytes_written, + blobs_metadata: self.written_blobs_metadata, + }) } async fn write(&mut self, bytes: Bytes) -> Result<()> { diff --git a/crates/iceberg/src/transaction/append.rs b/crates/iceberg/src/transaction/append.rs index 36fde117ab..50c71d0fc9 100644 --- a/crates/iceberg/src/transaction/append.rs +++ b/crates/iceberg/src/transaction/append.rs @@ -122,7 +122,7 @@ impl SnapshotProduceOperation for FastAppendOperation { async fn existing_manifest( &self, - snapshot_produce: &SnapshotProducer<'_>, + snapshot_produce: &mut SnapshotProducer<'_>, ) -> Result> { let Some(snapshot) = snapshot_produce.table.metadata().current_snapshot() else { return Ok(vec![]); diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index d78f41cd42..e0637778df 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -55,6 +55,7 @@ mod action; pub use action::*; mod append; mod expire_snapshots; +mod row_delta; mod snapshot; mod sort_order; mod update_location; @@ -75,6 +76,7 @@ use crate::table::Table; use crate::transaction::action::BoxedTransactionAction; use crate::transaction::append::FastAppendAction; use crate::transaction::expire_snapshots::ExpireSnapshotsAction; +use crate::transaction::row_delta::RowDeltaAction; use crate::transaction::sort_order::ReplaceSortOrderAction; use crate::transaction::update_location::UpdateLocationAction; use crate::transaction::update_properties::UpdatePropertiesAction; @@ -151,6 +153,16 @@ impl Transaction { FastAppendAction::new() } + /// Creates a row delta action for row-level modifications. + /// + /// RowDelta supports: + /// - Adding new data files (inserts) + /// - Removing data files (deletes in Copy-on-Write (COW) mode) + /// - Both operations in a single transaction (updates/merges) + pub fn row_delta(&self) -> RowDeltaAction { + RowDeltaAction::new() + } + /// Creates replace sort order action. pub fn replace_sort_order(&self) -> ReplaceSortOrderAction { ReplaceSortOrderAction::new() diff --git a/crates/iceberg/src/transaction/row_delta.rs b/crates/iceberg/src/transaction/row_delta.rs new file mode 100644 index 0000000000..0cc0503de7 --- /dev/null +++ b/crates/iceberg/src/transaction/row_delta.rs @@ -0,0 +1,577 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use async_trait::async_trait; +use uuid::Uuid; + +use crate::error::Result; +use crate::spec::{DataFile, ManifestContentType, ManifestEntry, ManifestFile, Operation}; +use crate::table::Table; +use crate::transaction::snapshot::{ + DefaultManifestProcess, SnapshotProduceOperation, SnapshotProducer, +}; +use crate::transaction::{ActionCommit, TransactionAction}; + +/// Transaction action for Copy-on-Write row-level modifications (UPDATE, DELETE, MERGE INTO). +/// +/// Corresponds to `org.apache.iceberg.RowDelta` in the Java implementation. +pub struct RowDeltaAction { + added_data_files: Vec, + removed_data_files: Vec, + /// MoR delete files (position/equality deletes, incl. V3 deletion vectors) to add. + added_delete_files: Vec, + commit_uuid: Option, + snapshot_properties: HashMap, + starting_snapshot_id: Option, +} + +impl RowDeltaAction { + pub(crate) fn new() -> Self { + Self { + added_data_files: vec![], + removed_data_files: vec![], + added_delete_files: vec![], + commit_uuid: None, + snapshot_properties: HashMap::default(), + starting_snapshot_id: None, + } + } + + /// Add new data files (INSERT rows or Copy-on-Write rewritten files). + pub fn add_data_files(mut self, data_files: impl IntoIterator) -> Self { + self.added_data_files.extend(data_files); + self + } + + /// Mark existing data files as deleted (Copy-on-Write mode). + /// + /// Corresponds to `removeRows(DataFile)` in the Java implementation. + pub fn remove_data_files(mut self, data_files: impl IntoIterator) -> Self { + self.removed_data_files.extend(data_files); + self + } + + /// Add Merge-on-Read delete files (position/equality deletes, incl. V3 deletion + /// vectors). Written into a content=Deletes manifest at commit time. + pub fn add_delete_files(mut self, delete_files: impl IntoIterator) -> Self { + self.added_delete_files.extend(delete_files); + self + } + + /// Set the commit UUID used for manifest file naming. + pub fn set_commit_uuid(mut self, commit_uuid: Uuid) -> Self { + self.commit_uuid = Some(commit_uuid); + self + } + + /// Attach custom key/value metadata to the snapshot summary. + pub fn set_snapshot_properties(mut self, snapshot_properties: HashMap) -> Self { + self.snapshot_properties = snapshot_properties; + self + } + + /// Reject the commit if the table has advanced past `snapshot_id` (optimistic concurrency). + pub fn validate_from_snapshot(mut self, snapshot_id: i64) -> Self { + self.starting_snapshot_id = Some(snapshot_id); + self + } +} + +#[async_trait] +impl TransactionAction for RowDeltaAction { + async fn commit(self: Arc, table: &Table) -> Result { + if let Some(expected_snapshot_id) = self.starting_snapshot_id + && table.metadata().current_snapshot_id() != Some(expected_snapshot_id) + { + return Err(crate::Error::new( + crate::ErrorKind::DataInvalid, + format!( + "Cannot commit RowDelta based on stale snapshot. Expected: {}, Current: {:?}", + expected_snapshot_id, + table.metadata().current_snapshot_id() + ), + )); + } + + let mut snapshot_producer = SnapshotProducer::new( + table, + self.commit_uuid.unwrap_or_else(Uuid::now_v7), + None, + self.snapshot_properties.clone(), + self.added_data_files.clone(), + ); + + // Validate newly added data files (partition value type-checks, etc.). + // removed_data_files are not re-validated: they are existing table files that were + // already validated when originally committed. This matches Java's MergingSnapshotProducer. + snapshot_producer.validate_added_data_files()?; + + // MoR delete files (position/equality deletes, incl. V3 deletion vectors) are + // written into a separate content=Deletes manifest by the snapshot producer. + snapshot_producer.set_added_delete_files(self.added_delete_files.clone()); + + let operation = RowDeltaOperation { + removed_data_files: self.removed_data_files.clone(), + has_added_data_files: !self.added_data_files.is_empty(), + has_added_delete_files: !self.added_delete_files.is_empty(), + }; + + snapshot_producer + .commit(operation, DefaultManifestProcess) + .await + } +} + +struct RowDeltaOperation { + removed_data_files: Vec, + has_added_data_files: bool, + has_added_delete_files: bool, +} + +impl SnapshotProduceOperation for RowDeltaOperation { + /// Operation type (mirrors Java `BaseRowDelta.operation()`): + /// - Any data files removed → `Overwrite` + /// - MoR delete files added → `Overwrite` if data files also added, else `Delete` + /// - Only data files added (or nothing) → `Append` + fn operation(&self) -> Operation { + if !self.removed_data_files.is_empty() { + Operation::Overwrite + } else if self.has_added_delete_files { + if self.has_added_data_files { + Operation::Overwrite + } else { + Operation::Delete + } + } else { + Operation::Append + } + } + + /// Delete entries are handled inside `existing_manifest` by rewriting the manifest. + async fn delete_entries( + &self, + _snapshot_produce: &SnapshotProducer<'_>, + ) -> Result> { + Ok(vec![]) + } + + /// Returns manifest files for the new snapshot. + /// + /// For each manifest in the previous snapshot: + /// - If it contains any file being removed: rewrite it with DELETED entries for removed files + /// and EXISTING entries for survivors, preserving original sequence numbers. + /// - Otherwise: carry it forward unchanged. + /// + /// This matches Java's `ManifestFilterManager.filterManifestWithDeletedFiles` logic. + async fn existing_manifest( + &self, + snapshot_produce: &mut SnapshotProducer<'_>, + ) -> Result> { + let Some(snapshot) = snapshot_produce.table.metadata().current_snapshot() else { + return Ok(vec![]); + }; + + let manifest_list = snapshot_produce + .table + .manifest_list_reader(snapshot) + .load() + .await?; + + let deleted_paths: HashSet<&str> = self + .removed_data_files + .iter() + .map(|f| f.file_path()) + .collect(); + + let mut result = Vec::new(); + for manifest_file in manifest_list.entries() { + if !manifest_file.has_added_files() && !manifest_file.has_existing_files() { + continue; + } + + let manifest = manifest_file + .load_manifest(snapshot_produce.table.file_io()) + .await?; + + let needs_rewrite = manifest + .entries() + .iter() + .any(|e| e.is_alive() && deleted_paths.contains(e.data_file().file_path())); + + if !needs_rewrite { + result.push(manifest_file.clone()); + continue; + } + + // Rewrite: deleted files → DELETED (new snapshot_id, original seq nums preserved), + // surviving files → EXISTING (all original fields preserved). + let mut writer = snapshot_produce.new_manifest_writer(ManifestContentType::Data)?; + for entry in manifest.entries() { + if deleted_paths.contains(entry.data_file().file_path()) { + writer.add_delete_entry((**entry).clone())?; + } else { + writer.add_existing_entry((**entry).clone())?; + } + } + result.push(writer.write_manifest_file().await?); + } + + Ok(result) + } + + fn removed_data_files(&self) -> &[DataFile] { + &self.removed_data_files + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::spec::{ + DataContentType, DataFile, DataFileBuilder, DataFileFormat, Literal, MAIN_BRANCH, + ManifestStatus, Struct, TableMetadataBuilder, + }; + use crate::table::Table; + use crate::transaction::tests::make_v2_minimal_table; + use crate::transaction::{Transaction, TransactionAction}; + use crate::{TableIdent, TableUpdate}; + + fn make_data_file(table: &Table, path: &str, size: u64) -> DataFile { + DataFileBuilder::default() + .content(DataContentType::Data) + .file_path(path.to_string()) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(size) + .record_count(10) + .partition_spec_id(table.metadata().default_partition_spec_id()) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .build() + .unwrap() + } + + /// Build a table that has `snapshot` as its current snapshot, backed by the same FileIO. + async fn table_with_snapshot(base: &Table, snapshot: crate::spec::Snapshot) -> Table { + let updated_metadata = + TableMetadataBuilder::new_from_metadata(base.metadata_ref().as_ref().clone(), None) + .set_branch_snapshot(snapshot, MAIN_BRANCH) + .unwrap() + .build() + .unwrap() + .metadata; + + Table::builder() + .metadata(updated_metadata) + .metadata_location("s3://bucket/test/location/metadata/v2.json".to_string()) + .identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap()) + .file_io(base.file_io().clone()) + .runtime(crate::test_utils::test_runtime()) + .build() + .unwrap() + } + + #[tokio::test] + async fn test_row_delta_add_only() { + let table = make_v2_minimal_table(); + let data_file = make_data_file(&table, "test/1.parquet", 100); + let action = Transaction::new(&table) + .row_delta() + .add_data_files(vec![data_file]); + + let mut commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = commit.take_updates(); + + if let TableUpdate::AddSnapshot { snapshot } = &updates[0] { + assert_eq!(snapshot.summary().operation, crate::spec::Operation::Append); + } else { + panic!("expected AddSnapshot"); + } + } + + #[tokio::test] + async fn test_row_delta_with_snapshot_properties() { + let table = make_v2_minimal_table(); + let data_file = make_data_file(&table, "test/1.parquet", 100); + let mut props = std::collections::HashMap::new(); + props.insert("key".to_string(), "value".to_string()); + let action = Transaction::new(&table) + .row_delta() + .set_snapshot_properties(props) + .add_data_files(vec![data_file]); + + let mut commit = Arc::new(action).commit(&table).await.unwrap(); + let updates = commit.take_updates(); + + if let TableUpdate::AddSnapshot { snapshot } = &updates[0] { + assert_eq!( + snapshot.summary().additional_properties.get("key").unwrap(), + "value" + ); + } else { + panic!("expected AddSnapshot"); + } + } + + #[tokio::test] + async fn test_row_delta_validate_from_snapshot() { + let table = make_v2_minimal_table(); + let data_file = make_data_file(&table, "test/1.parquet", 100); + let action = Transaction::new(&table) + .row_delta() + .validate_from_snapshot(99999) + .add_data_files(vec![data_file]); + + let result = Arc::new(action).commit(&table).await; + match result { + Ok(_) => panic!("expected DataInvalid error for stale snapshot"), + Err(e) => assert_eq!(e.kind(), crate::ErrorKind::DataInvalid), + } + } + + #[tokio::test] + async fn test_row_delta_empty_action() { + let table = make_v2_minimal_table(); + assert!( + Arc::new(Transaction::new(&table).row_delta()) + .commit(&table) + .await + .is_err() + ); + } + + #[tokio::test] + async fn test_row_delta_incompatible_partition_value() { + let table = make_v2_minimal_table(); + let bad_file = DataFileBuilder::default() + .content(DataContentType::Data) + .file_path("test/bad.parquet".to_string()) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(10) + .partition_spec_id(table.metadata().default_partition_spec_id()) + .partition(Struct::from_iter([Some(Literal::string("wrong"))])) + .build() + .unwrap(); + let action = Transaction::new(&table) + .row_delta() + .add_data_files(vec![bad_file]); + assert!(Arc::new(action).commit(&table).await.is_err()); + } + + /// MoR: adding a position-delete file via RowDelta commits a content=Deletes + /// manifest and an `Operation::Delete` snapshot (replaces the old "errors" test + /// now that `add_delete_files` is implemented). + #[tokio::test] + async fn test_row_delta_add_delete_files_mor() { + let base = make_v2_minimal_table(); + + // S1: append a data file. + let data_file = make_data_file(&base, "test/data.parquet", 100); + let mut c1 = Arc::new( + Transaction::new(&base) + .fast_append() + .add_data_files(vec![data_file]), + ) + .commit(&base) + .await + .unwrap(); + let snap_s1 = if let TableUpdate::AddSnapshot { snapshot } = + c1.take_updates().into_iter().next().unwrap() + { + snapshot + } else { + panic!("expected AddSnapshot"); + }; + let table_s1 = table_with_snapshot(&base, snap_s1).await; + + // S2: add a MoR position-delete file referencing the data file. + let delete_file = DataFileBuilder::default() + .content(DataContentType::PositionDeletes) + .file_path("test/pos-delete.parquet".to_string()) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(50) + .record_count(3) + .partition_spec_id(table_s1.metadata().default_partition_spec_id()) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .referenced_data_file(Some("test/data.parquet".to_string())) + .build() + .unwrap(); + let mut c2 = Arc::new( + Transaction::new(&table_s1) + .row_delta() + .add_delete_files(vec![delete_file]), + ) + .commit(&table_s1) + .await + .unwrap(); + let updates2 = c2.take_updates(); + let snap_s2 = if let TableUpdate::AddSnapshot { ref snapshot } = updates2[0] { + snapshot + } else { + panic!("expected AddSnapshot"); + }; + + // Only delete files added (no data adds/removes) → Operation::Delete. + assert_eq!(snap_s2.summary().operation, crate::spec::Operation::Delete); + + // A PositionDeletes entry must exist in the new snapshot's manifests. + let manifest_list = table_s1 + .manifest_list_reader(&std::sync::Arc::new(snap_s2.clone())) + .load() + .await + .unwrap(); + let mut found_position_delete = false; + for manifest_file in manifest_list.entries() { + let manifest = manifest_file + .load_manifest(table_s1.file_io()) + .await + .unwrap(); + for entry in manifest.entries() { + if entry.data_file().content_type() == DataContentType::PositionDeletes { + found_position_delete = true; + } + } + } + assert!( + found_position_delete, + "expected a PositionDeletes entry in the RowDelta snapshot's manifests" + ); + } + + /// End-to-end CoW test: append two files, then remove one via RowDelta. + /// + /// Verifies: + /// - The removed file appears as DELETED with correct sequence numbers. + /// - The surviving file appears as EXISTING with correct sequence numbers. + /// - The new file appears as ADDED. + /// - The snapshot summary counts `deleted-data-files = 1`. + #[tokio::test] + async fn test_row_delta_cow_manifest_rewrite() { + let base_table = make_v2_minimal_table(); + + // --- S1: append file-A and file-B --- + let file_a = make_data_file(&base_table, "test/a.parquet", 100); + let file_b = make_data_file(&base_table, "test/b.parquet", 200); + + let action1 = Transaction::new(&base_table) + .fast_append() + .add_data_files(vec![file_a.clone(), file_b.clone()]); + let mut commit1 = Arc::new(action1).commit(&base_table).await.unwrap(); + let updates1 = commit1.take_updates(); + + let snapshot_s1 = + if let TableUpdate::AddSnapshot { snapshot } = updates1.into_iter().next().unwrap() { + snapshot + } else { + panic!("expected AddSnapshot"); + }; + + let table_s1 = table_with_snapshot(&base_table, snapshot_s1).await; + + // --- S2: remove file-A (CoW), add file-C --- + let file_c = make_data_file(&table_s1, "test/c.parquet", 300); + let action2 = Transaction::new(&table_s1) + .row_delta() + .remove_data_files(vec![file_a.clone()]) + .add_data_files(vec![file_c.clone()]); + let mut commit2 = Arc::new(action2).commit(&table_s1).await.unwrap(); + let updates2 = commit2.take_updates(); + + let snapshot_s2 = if let TableUpdate::AddSnapshot { ref snapshot } = updates2[0] { + snapshot + } else { + panic!("expected AddSnapshot"); + }; + + assert_eq!( + snapshot_s2.summary().operation, + crate::spec::Operation::Overwrite + ); + + // Verify snapshot summary metrics + let props = &snapshot_s2.summary().additional_properties; + assert_eq!( + props.get("deleted-data-files").map(String::as_str), + Some("1"), + "summary should count 1 deleted file" + ); + + // Scan all manifest entries in S2 + let manifest_list = table_s1 + .manifest_list_reader(&std::sync::Arc::new(snapshot_s2.clone())) + .load() + .await + .unwrap(); + + let mut found_deleted_a = false; + let mut found_existing_b = false; + let mut found_added_c = false; + + for manifest_file in manifest_list.entries() { + let manifest = manifest_file + .load_manifest(table_s1.file_io()) + .await + .unwrap(); + for entry in manifest.entries() { + match entry.data_file().file_path() { + "test/a.parquet" => { + assert_eq!( + entry.status(), + ManifestStatus::Deleted, + "file-A must be DELETED" + ); + assert!( + entry.sequence_number().is_some(), + "DELETED entry must have sequence number" + ); + assert!( + entry.file_sequence_number.is_some(), + "DELETED entry must have file sequence number" + ); + found_deleted_a = true; + } + "test/b.parquet" => { + assert_eq!( + entry.status(), + ManifestStatus::Existing, + "file-B must be EXISTING" + ); + assert!( + entry.sequence_number().is_some(), + "EXISTING entry must have sequence number" + ); + found_existing_b = true; + } + "test/c.parquet" => { + found_added_c = true; + } + other => panic!("unexpected file in S2 manifests: {other}"), + } + } + } + + assert!(found_deleted_a, "file-A should have a DELETED entry in S2"); + assert!( + found_existing_b, + "file-B should have an EXISTING entry in S2" + ); + assert!(found_added_c, "file-C should have an ADDED entry in S2"); + } +} diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 8e47226072..f887a31e87 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -62,10 +62,6 @@ const META_ROOT_PATH: &str = "metadata"; /// 3. **Delete Entry Processing**: The `delete_entries()` method is intended for future delete /// operations to specify which manifest entries should be marked as deleted. pub(crate) trait SnapshotProduceOperation: Send + Sync { - /// Returns the operation type that will be recorded in the snapshot summary. - /// - /// This determines what kind of operation is being performed (e.g., `Append`, `Overwrite`), - /// which is stored in the snapshot metadata for tracking and auditing purposes. fn operation(&self) -> Operation; /// Returns manifest entries that should be marked as deleted in the new snapshot. @@ -75,18 +71,29 @@ pub(crate) trait SnapshotProduceOperation: Send + Sync { snapshot_produce: &SnapshotProducer, ) -> impl Future>> + Send; - /// Returns existing manifest files that should be included in the new snapshot. - /// - /// This method determines which manifest files from the current snapshot should be - /// carried forward to the new snapshot. The selection depends on the operation type: + /// Returns existing manifest files to carry forward (or rewrite) into the new snapshot. /// - /// - **Append operations**: Typically include all existing manifests - /// - **Overwrite operations**: May exclude manifests for partitions being overwritten - /// - **Delete operations**: May exclude manifests for partitions being deleted + /// Implementations that need to delete specific files within a manifest should rewrite that + /// manifest (DELETED + EXISTING entries) and return the rewritten `ManifestFile` here. + /// `&mut SnapshotProducer` is provided so that implementations can call + /// `snapshot_produce.new_manifest_writer()` to produce the rewritten manifest. fn existing_manifest( &self, - snapshot_produce: &SnapshotProducer<'_>, + snapshot_produce: &mut SnapshotProducer<'_>, ) -> impl Future>> + Send; + + /// Data files being removed in this operation (used for snapshot summary metrics). + fn removed_data_files(&self) -> &[DataFile] { + &[] + } + + /// Whether this Overwrite replaces the entire table content. When true, + /// `truncate_table_summary` sets `deleted-data-files` to the previous total. + /// Row-level operations (RowDelta) return false; full-table rewrites (future + /// OverwriteFiles / ReplacePartitions) return true. + fn is_truncate_full_table(&self) -> bool { + false + } } pub(crate) struct DefaultManifestProcess; @@ -116,6 +123,9 @@ pub(crate) struct SnapshotProducer<'a> { key_metadata: Option>, snapshot_properties: HashMap, added_data_files: Vec, + // Added MoR delete files (position/equality deletes, incl. V3 deletion vectors). + // Written into a separate content=Deletes manifest by `write_added_delete_manifest`. + added_delete_files: Vec, // A counter used to generate unique manifest file names. // It starts from 0 and increments for each new manifest file. // Note: This counter is limited to the range of (0..u64::MAX). @@ -137,6 +147,7 @@ impl<'a> SnapshotProducer<'a> { key_metadata, snapshot_properties, added_data_files, + added_delete_files: vec![], manifest_counter: (0..), } } @@ -242,7 +253,10 @@ impl<'a> SnapshotProducer<'a> { snapshot_id } - fn new_manifest_writer(&mut self, content: ManifestContentType) -> Result { + pub(crate) fn new_manifest_writer( + &mut self, + content: ManifestContentType, + ) -> Result { let new_manifest_path = format!( "{}/{}/{}-m{}.{}", self.table.metadata().location(), @@ -338,20 +352,87 @@ impl<'a> SnapshotProducer<'a> { writer.write_manifest_file().await } + /// Set the added MoR delete files to be written into a content=Deletes manifest. + pub(crate) fn set_added_delete_files(&mut self, delete_files: Vec) { + self.added_delete_files = delete_files; + } + + // Write a content=Deletes manifest for added MoR delete files (position/equality + // deletes, incl. V3 deletion vectors) and return the ManifestFile for the ManifestList. + async fn write_added_delete_manifest(&mut self) -> Result { + let added_delete_files = std::mem::take(&mut self.added_delete_files); + if added_delete_files.is_empty() { + return Err(Error::new( + ErrorKind::PreconditionFailed, + "No added delete files found when writing a delete manifest file", + )); + } + + let snapshot_id = self.snapshot_id; + let format_version = self.table.metadata().format_version(); + let manifest_entries = added_delete_files.into_iter().map(|delete_file| { + let builder = ManifestEntry::builder() + .status(crate::spec::ManifestStatus::Added) + .data_file(delete_file); + if format_version == FormatVersion::V1 { + builder.snapshot_id(snapshot_id).build() + } else { + builder.build() + } + }); + let mut writer = self.new_manifest_writer(ManifestContentType::Deletes)?; + for entry in manifest_entries { + writer.add_entry(entry)?; + } + writer.write_manifest_file().await + } + + // Write a data manifest containing DELETED-status entries and return the ManifestFile. + // Note: this is NOT an Iceberg "delete manifest" (content=Deletes for MoR delete files). + // It is a data manifest (content=Data) whose entries carry ManifestStatus::Deleted to + // record which data files were removed in Copy-on-Write mode. + async fn write_manifest_with_deleted_entries( + &mut self, + delete_entries: Vec, + ) -> Result { + if delete_entries.is_empty() { + return Err(Error::new( + ErrorKind::PreconditionFailed, + "No delete entries found when writing a delete manifest file", + )); + } + + let mut writer = self.new_manifest_writer(ManifestContentType::Data)?; + for entry in delete_entries { + // Use add_delete_entry() to preserve Deleted status instead of add_entry() + // which always overwrites status to Added + writer.add_delete_entry(entry)?; + } + writer.write_manifest_file().await + } + async fn manifest_file( &mut self, snapshot_produce_operation: &OP, manifest_process: &MP, ) -> Result> { + // Check if there's any content to add to the new snapshot + let delete_entries = snapshot_produce_operation.delete_entries(self).await?; + let has_delete_entries = !delete_entries.is_empty(); + // Assert current snapshot producer contains new content to add to new snapshot. // // TODO: Allowing snapshot property setup with no added data files is a workaround. // We should clean it up after all necessary actions are supported. // For details, please refer to https://github.com/apache/iceberg-rust/issues/1548 - if self.added_data_files.is_empty() && self.snapshot_properties.is_empty() { + if self.added_data_files.is_empty() + && self.added_delete_files.is_empty() + && self.snapshot_properties.is_empty() + && !has_delete_entries + { return Err(Error::new( ErrorKind::PreconditionFailed, - "No added data files or added snapshot properties found when write a manifest file", + "No added data files, delete entries, or snapshot properties found when write a manifest file", )); } @@ -364,8 +445,19 @@ impl<'a> SnapshotProducer<'a> { manifest_files.push(added_manifest); } - // # TODO - // Support process delete entries. + // Process added MoR delete files (content=Deletes manifest, e.g. V3 deletion vectors). + if !self.added_delete_files.is_empty() { + let added_delete_manifest = self.write_added_delete_manifest().await?; + manifest_files.push(added_delete_manifest); + } + + // Process delete entries. + if has_delete_entries { + let delete_manifest = self + .write_manifest_with_deleted_entries(delete_entries) + .await?; + manifest_files.push(delete_manifest); + } let manifest_files = manifest_process.process_manifests(self, manifest_files); Ok(manifest_files) @@ -402,6 +494,14 @@ impl<'a> SnapshotProducer<'a> { ); } + for data_file in snapshot_produce_operation.removed_data_files() { + summary_collector.remove_file( + data_file, + table_metadata.current_schema().clone(), + table_metadata.default_partition_spec().clone(), + ); + } + let previous_snapshot = table_metadata.current_snapshot(); let mut additional_properties = summary_collector.build(); @@ -415,7 +515,7 @@ impl<'a> SnapshotProducer<'a> { update_snapshot_summaries( summary, previous_snapshot.map(|s| s.summary()), - snapshot_produce_operation.operation() == Operation::Overwrite, + snapshot_produce_operation.is_truncate_full_table(), ) } diff --git a/crates/iceberg/testdata/puffin/deletion-vector-v1-payload.bin b/crates/iceberg/testdata/puffin/deletion-vector-v1-payload.bin new file mode 100644 index 0000000000..80829fae22 Binary files /dev/null and b/crates/iceberg/testdata/puffin/deletion-vector-v1-payload.bin differ diff --git a/crates/iceberg/testdata/puffin/empty-position-index.bin b/crates/iceberg/testdata/puffin/empty-position-index.bin new file mode 100644 index 0000000000..8bbc1265dc Binary files /dev/null and b/crates/iceberg/testdata/puffin/empty-position-index.bin differ diff --git a/crates/iceberg/testdata/puffin/small-and-large-values-position-index.bin b/crates/iceberg/testdata/puffin/small-and-large-values-position-index.bin new file mode 100644 index 0000000000..989dabf6ad Binary files /dev/null and b/crates/iceberg/testdata/puffin/small-and-large-values-position-index.bin differ diff --git a/crates/integrations/playground/DEPENDENCIES.rust.tsv b/crates/integrations/playground/DEPENDENCIES.rust.tsv index 011fa55a86..ac6f3b00de 100644 --- a/crates/integrations/playground/DEPENDENCIES.rust.tsv +++ b/crates/integrations/playground/DEPENDENCIES.rust.tsv @@ -1,517 +1,558 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 -adler2@2.0.1 X X X -aead@0.5.2 X X -aes@0.8.4 X X -aes-gcm@0.10.3 X X -ahash@0.8.12 X X -aho-corasick@1.1.4 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.21 X X -anstream@1.0.0 X X -anstyle@1.0.14 X X -anstyle-parse@0.2.7 X X -anstyle-parse@1.0.0 X X -anstyle-query@1.1.5 X X -anstyle-wincon@3.0.11 X X -anyhow@1.0.102 X X -apache-avro@0.21.0 X -ar_archive_writer@0.5.1 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@58.1.0 X -arrow-arith@58.3.0 X -arrow-array@58.3.0 X X -arrow-buffer@58.3.0 X -arrow-cast@58.3.0 X -arrow-csv@58.1.0 X -arrow-data@58.3.0 X -arrow-ipc@58.1.0 X -arrow-json@58.1.0 X -arrow-ord@58.3.0 X -arrow-row@58.1.0 X -arrow-schema@58.3.0 X -arrow-select@58.3.0 X -arrow-string@58.3.0 X -as-any@0.3.2 X X -async-compression@0.4.41 X X -async-lock@3.4.2 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -aws-config@1.8.15 X -aws-credential-types@1.2.14 X -aws-lc-rs@1.16.2 X X -aws-lc-sys@0.39.0 X X X X X -aws-runtime@1.7.2 X -aws-sdk-sso@1.97.0 X -aws-sdk-ssooidc@1.99.0 X -aws-sdk-sts@1.101.0 X -aws-sigv4@1.4.2 X -aws-smithy-async@1.2.14 X -aws-smithy-http@0.63.6 X -aws-smithy-http-client@1.1.12 X -aws-smithy-json@0.62.5 X -aws-smithy-observability@0.2.6 X -aws-smithy-query@0.60.15 X -aws-smithy-runtime@1.10.3 X -aws-smithy-runtime-api@1.11.6 X -aws-smithy-types@1.4.7 X -aws-smithy-xml@0.60.15 X -aws-types@1.3.14 X -backon@1.6.0 X -base64@0.22.1 X X -base64-simd@0.8.0 X -bigdecimal@0.4.10 X X -bimap@0.6.3 X X -bitflags@2.11.0 X X -blake2@0.10.6 X X -blake3@1.8.3 X X X -block-buffer@0.10.4 X X -bnum@0.12.1 X X -bon@3.9.1 X X -bon-macros@3.9.1 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.20.2 X X -bytemuck@1.25.0 X X X -bytemuck_derive@1.10.2 X X X -byteorder@1.5.0 X X -bytes@1.11.1 X -bytes-utils@0.1.4 X X -bzip2@0.6.1 X X -cc@1.2.57 X X -cfg-if@1.0.4 X X -cfg_aliases@0.2.1 X -chacha20@0.10.0 X X -chrono@0.4.44 X X -chrono-tz@0.10.4 X X -cipher@0.4.4 X X -clap@4.6.0 X X -clap_builder@4.6.0 X X -clap_derive@4.6.0 X X -clap_lex@1.1.0 X X -clipboard-win@5.4.1 X -cmake@0.1.57 X X -colorchoice@1.0.5 X X -comfy-table@7.2.2 X -compression-codecs@0.4.37 X X -compression-core@0.4.31 X X -concurrent-queue@2.5.0 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.4.2 X X X -core-foundation@0.10.1 X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -cpufeatures@0.3.0 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.7 X X -csv@1.4.0 X X -csv-core@0.1.13 X X -ctr@0.9.2 X X -darling@0.20.11 X -darling@0.23.0 X -darling_core@0.20.11 X -darling_core@0.23.0 X -darling_macro@0.20.11 X -darling_macro@0.23.0 X -dashmap@6.2.1 X -datafusion@53.1.0 X -datafusion-catalog@53.1.0 X -datafusion-catalog-listing@53.1.0 X -datafusion-cli@53.1.0 X -datafusion-common@53.1.0 X -datafusion-common-runtime@53.1.0 X -datafusion-datasource@53.1.0 X -datafusion-datasource-arrow@53.1.0 X -datafusion-datasource-avro@53.1.0 X -datafusion-datasource-csv@53.1.0 X -datafusion-datasource-json@53.1.0 X -datafusion-datasource-parquet@53.1.0 X -datafusion-doc@53.1.0 X -datafusion-execution@53.1.0 X -datafusion-expr@53.1.0 X -datafusion-expr-common@53.1.0 X -datafusion-functions@53.1.0 X -datafusion-functions-aggregate@53.1.0 X -datafusion-functions-aggregate-common@53.1.0 X -datafusion-functions-nested@53.1.0 X -datafusion-functions-table@53.1.0 X -datafusion-functions-window@53.1.0 X -datafusion-functions-window-common@53.1.0 X -datafusion-macros@53.1.0 X -datafusion-optimizer@53.1.0 X -datafusion-physical-expr@53.1.0 X -datafusion-physical-expr-adapter@53.1.0 X -datafusion-physical-expr-common@53.1.0 X -datafusion-physical-optimizer@53.1.0 X -datafusion-physical-plan@53.1.0 X -datafusion-pruning@53.1.0 X -datafusion-session@53.1.0 X -datafusion-sql@53.1.0 X -deranged@0.5.8 X X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -dirs@6.0.0 X X -dirs-sys@0.5.0 X X -displaydoc@0.2.5 X X -dissimilar@1.0.11 X -dunce@1.0.5 X X X -either@1.15.0 X X -endian-type@0.1.2 X -env_filter@1.0.0 X X -env_logger@0.11.9 X X -equivalent@1.0.2 X X -erased-serde@0.4.10 X X -errno@0.3.14 X X -error-code@3.3.2 X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastnum@0.7.4 X X -fastrand@2.3.0 X X -fd-lock@4.0.4 X X -find-msvc-tools@0.1.9 X X -fixedbitset@0.5.7 X X -flatbuffers@25.12.19 X -flate2@1.1.9 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -foldhash@0.2.0 X -form_urlencoded@1.2.2 X X -fs-err@3.3.0 X X -fs_extra@1.3.0 X -futures@0.3.32 X X -futures-channel@0.3.32 X X -futures-core@0.3.32 X X -futures-executor@0.3.32 X X -futures-io@0.3.32 X X -futures-macro@0.3.32 X X -futures-sink@0.3.32 X X -futures-task@0.3.32 X X -futures-util@0.3.32 X X -generic-array@0.14.7 X -getrandom@0.2.17 X X -getrandom@0.3.4 X X -getrandom@0.4.2 X X -ghash@0.5.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.13 X -half@2.7.1 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -hashbrown@0.16.1 X X -hashbrown@0.17.1 X X -heck@0.5.0 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@0.2.12 X X -http@1.4.0 X X -http-body@0.4.6 X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.3.0 X X -hyper@1.8.1 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.20 X -iana-time-zone@0.1.65 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.9.0 X -iceberg-catalog-rest@0.9.0 X -iceberg-datafusion@0.9.0 X -iceberg-playground@0.9.0 X -iceberg_test_utils@0.9.0 X -icu_collections@2.1.1 X -icu_locale_core@2.1.1 X -icu_normalizer@2.1.1 X -icu_normalizer_data@2.1.1 X -icu_properties@2.1.2 X -icu_properties_data@2.1.2 X -icu_provider@2.1.1 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.13.0 X X -inout@0.1.4 X X -integer-encoding@3.0.4 X -inventory@0.3.22 X X -ipnet@2.12.0 X X -iri-string@0.7.11 X X -is_terminal_polyfill@1.70.2 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.18 X X -jiff@0.2.23 X X -jobserver@0.1.34 X X -js-sys@0.3.91 X X -lazy_static@1.5.0 X X -lexical-core@1.0.6 X X -lexical-parse-float@1.0.6 X X -lexical-parse-integer@1.0.6 X X -lexical-util@1.0.7 X X -lexical-write-float@1.0.6 X X -lexical-write-integer@1.0.6 X X -libbz2-rs-sys@0.2.2 X -libc@0.2.183 X X -liblzma@0.4.6 X X -liblzma-sys@0.4.5 X X -libm@0.2.16 X -libmimalloc-sys@0.1.44 X -libredox@0.1.14 X -linux-raw-sys@0.12.1 X X X -litemap@0.8.1 X -lock_api@0.4.14 X X -log@0.4.29 X X -lz4_flex@0.13.0 X -md-5@0.10.6 X X -memchr@2.8.0 X X -mimalloc@0.1.48 X -miniz_oxide@0.8.9 X X X -mio@1.2.0 X -moka@0.12.15 X X -murmur3@0.5.2 X X -nibble_vec@0.1.0 X -nix@0.30.1 X -nu-ansi-term@0.50.3 X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-conv@0.2.0 X X -num-integer@0.1.46 X X -num-traits@0.2.19 X X -object@0.37.3 X X -object_store@0.13.2 X X -once_cell@1.21.4 X X -once_cell_polyfill@1.70.2 X X -opaque-debug@0.3.1 X X -openssl-probe@0.2.1 X X -option-ext@0.2.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -outref@0.5.2 X -parking@2.2.1 X X -parking_lot@0.12.5 X X -parking_lot_core@0.9.12 X X -parquet@58.1.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.3 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.17 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -polyval@0.6.2 X X -portable-atomic@1.13.1 X X -portable-atomic-util@0.2.6 X X -potential_utf@0.1.4 X -powerfmt@0.2.0 X X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.106 X X -psm@0.1.30 X X -quad-rand@0.2.3 X -quick-xml@0.39.4 X -quote@1.0.45 X X -r-efi@5.3.0 X X X -r-efi@6.0.0 X X X -radix_trie@0.2.1 X -rand@0.10.1 X X -rand@0.9.4 X X -rand_chacha@0.9.0 X X -rand_core@0.10.0 X X -rand_core@0.6.4 X X -rand_core@0.9.5 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.18 X -redox_users@0.5.2 X -regex@1.12.3 X X -regex-automata@0.4.14 X X -regex-lite@0.1.9 X X -regex-syntax@0.8.10 X X -reqwest@0.12.28 X X -ring@0.17.14 X X -roaring@0.11.3 X X -rustc_version@0.4.1 X X -rustix@1.1.4 X X X -rustls@0.23.37 X X X -rustls-native-certs@0.8.3 X X X -rustls-pki-types@1.14.0 X X -rustls-webpki@0.103.13 X -rustversion@1.0.22 X X -rustyline@17.0.2 X -ryu@1.0.23 X X -same-file@1.0.6 X X -schannel@0.1.29 X -scopeguard@1.2.0 X X -security-framework@3.7.0 X X -security-framework-sys@2.17.0 X X -semver@1.0.27 X X -seq-macro@0.3.6 X X -serde@1.0.228 X X -serde-big-array@0.5.1 X X -serde_bytes@0.11.19 X X -serde_core@1.0.228 X X -serde_derive@1.0.228 X X -serde_json@1.0.149 X X -serde_repr@0.1.20 X X -serde_spanned@0.6.9 X X -serde_urlencoded@0.7.1 X X -serde_with@3.21.0 X X -serde_with_macros@3.21.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -signal-hook-registry@1.4.8 X X -simd-adler32@0.3.8 X -simdutf8@0.1.5 X X -siphasher@1.0.2 X X -slab@0.4.12 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.3 X X -sqlparser@0.61.0 X -sqlparser_derive@0.5.0 X -stable_deref_trait@1.2.1 X X -stacker@0.1.23 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.117 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.27.0 X X -thiserror@2.0.18 X X -thiserror-impl@2.0.18 X X -thread_local@1.1.9 X X -thrift@0.17.0 X -time@0.3.47 X X -time-core@0.1.8 X X -tiny-keccak@2.0.2 X -tinystr@0.8.2 X -tokio@1.52.1 X -tokio-macros@2.7.0 X -tokio-rustls@0.26.4 X X -tokio-stream@0.1.18 X -tokio-util@0.7.18 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tower@0.5.3 X -tower-http@0.6.8 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.44 X -tracing-attributes@0.1.31 X -tracing-core@0.1.36 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.23 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typeid@1.0.3 X X -typenum@1.20.1 X X -typetag@0.2.21 X X -typetag-impl@0.2.21 X X -unicode-ident@1.0.24 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.2 X X -universal-hash@0.5.1 X X -untrusted@0.9.0 X -url@2.5.8 X X -urlencoding@2.1.3 X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.23.0 X X -version_check@0.9.5 X X -vsimd@0.8.0 X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasip2@1.0.2+wasi-0.2.9 X X X -wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X -wasm-bindgen@0.2.114 X X -wasm-bindgen-futures@0.4.64 X X -wasm-bindgen-macro@0.2.114 X X -wasm-bindgen-macro-support@0.2.114 X X -wasm-bindgen-shared@0.2.114 X X -wasm-streams@0.4.2 X X -web-sys@0.3.91 X X -web-time@1.1.0 X X -winapi-util@0.1.11 X X -windows-core@0.62.2 X X -windows-implement@0.60.2 X X -windows-interface@0.59.3 X X -windows-link@0.2.1 X X -windows-result@0.4.1 X X -windows-strings@0.5.1 X X -windows-sys@0.48.0 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.2 X X -windows-targets@0.48.5 X X -windows-targets@0.52.6 X X -windows-targets@0.53.5 X X -windows_aarch64_gnullvm@0.48.5 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.1 X X -windows_aarch64_msvc@0.48.5 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.1 X X -windows_i686_gnu@0.48.5 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.1 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.1 X X -windows_i686_msvc@0.48.5 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.1 X X -windows_x86_64_gnu@0.48.5 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.1 X X -windows_x86_64_gnullvm@0.48.5 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.1 X X -windows_x86_64_msvc@0.48.5 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.1 X X -winnow@0.7.15 X -wit-bindgen@0.51.0 X X X -writeable@0.6.2 X -xmlparser@0.13.6 X X -yoke@0.8.1 X -yoke-derive@0.8.1 X -zerocopy@0.8.47 X X X -zerocopy-derive@0.8.47 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.2 X X -zerotrie@0.2.3 X -zerovec@0.11.5 X -zerovec-derive@0.11.2 X -zlib-rs@0.6.3 X -zmij@1.0.21 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +aead@0.5.2 X X +aes@0.8.4 X X +aes-gcm@0.10.3 X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstream@1.0.0 X X +anstyle@1.0.14 X X +anstyle-parse@0.2.7 X X +anstyle-parse@1.0.0 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.102 X X +apache-avro@0.21.0 X +ar_archive_writer@0.5.1 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@58.1.0 X +arrow-arith@58.3.0 X +arrow-array@58.3.0 X X +arrow-buffer@58.3.0 X +arrow-cast@58.3.0 X +arrow-csv@58.1.0 X +arrow-data@58.3.0 X +arrow-ipc@58.1.0 X +arrow-json@58.1.0 X +arrow-ord@58.3.0 X +arrow-row@58.1.0 X +arrow-schema@58.3.0 X +arrow-select@58.3.0 X +arrow-string@58.3.0 X +as-any@0.3.2 X X +async-compression@0.4.41 X X +async-lock@3.4.2 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-config@1.8.15 X +aws-credential-types@1.2.14 X +aws-lc-rs@1.16.2 X X +aws-lc-sys@0.39.0 X X X X X +aws-runtime@1.7.2 X +aws-sdk-sso@1.97.0 X +aws-sdk-ssooidc@1.99.0 X +aws-sdk-sts@1.101.0 X +aws-sigv4@1.4.2 X +aws-smithy-async@1.2.14 X +aws-smithy-http@0.63.6 X +aws-smithy-http-client@1.1.12 X +aws-smithy-json@0.62.5 X +aws-smithy-observability@0.2.6 X +aws-smithy-query@0.60.15 X +aws-smithy-runtime@1.10.3 X +aws-smithy-runtime-api@1.11.6 X +aws-smithy-types@1.4.7 X +aws-smithy-xml@0.60.15 X +aws-types@1.3.14 X +backon@1.6.0 X +base64@0.22.1 X X +base64-simd@0.8.0 X +bigdecimal@0.4.10 X X +bimap@0.6.3 X X +bitflags@2.11.0 X X +blake2@0.10.6 X X +blake3@1.8.3 X X X +block-buffer@0.10.4 X X +block-buffer@0.12.0 X X +bnum@0.12.1 X X +bon@3.9.1 X X +bon-macros@3.9.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.20.2 X X +bytemuck@1.25.0 X X X +bytemuck_derive@1.10.2 X X X +byteorder@1.5.0 X X +bytes@1.11.1 X +bytes-utils@0.1.4 X X +bzip2@0.6.1 X X +cc@1.2.57 X X +cfg-if@1.0.4 X X +cfg_aliases@0.2.1 X +chacha20@0.10.0 X X +chrono@0.4.44 X X +chrono-tz@0.10.4 X X +cipher@0.4.4 X X +clap@4.6.0 X X +clap_builder@4.6.0 X X +clap_derive@4.6.0 X X +clap_lex@1.1.0 X X +clipboard-win@5.4.1 X +cmake@0.1.57 X X +colorchoice@1.0.5 X X +combine@4.6.7 X +comfy-table@7.2.2 X +compression-codecs@0.4.37 X X +compression-core@0.4.31 X X +concurrent-queue@2.5.0 X X +const-oid@0.10.2 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.4.2 X X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +cpufeatures@0.3.0 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +crypto-common@0.2.2 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +ctor@1.0.7 X X +ctr@0.9.2 X X +darling@0.20.11 X +darling@0.23.0 X +darling_core@0.20.11 X +darling_core@0.23.0 X +darling_macro@0.20.11 X +darling_macro@0.23.0 X +dashmap@6.2.1 X +datafusion@53.1.0 X +datafusion-catalog@53.1.0 X +datafusion-catalog-listing@53.1.0 X +datafusion-cli@53.1.0 X +datafusion-common@53.1.0 X +datafusion-common-runtime@53.1.0 X +datafusion-datasource@53.1.0 X +datafusion-datasource-arrow@53.1.0 X +datafusion-datasource-avro@53.1.0 X +datafusion-datasource-csv@53.1.0 X +datafusion-datasource-json@53.1.0 X +datafusion-datasource-parquet@53.1.0 X +datafusion-doc@53.1.0 X +datafusion-execution@53.1.0 X +datafusion-expr@53.1.0 X +datafusion-expr-common@53.1.0 X +datafusion-functions@53.1.0 X +datafusion-functions-aggregate@53.1.0 X +datafusion-functions-aggregate-common@53.1.0 X +datafusion-functions-nested@53.1.0 X +datafusion-functions-table@53.1.0 X +datafusion-functions-window@53.1.0 X +datafusion-functions-window-common@53.1.0 X +datafusion-macros@53.1.0 X +datafusion-optimizer@53.1.0 X +datafusion-physical-expr@53.1.0 X +datafusion-physical-expr-adapter@53.1.0 X +datafusion-physical-expr-common@53.1.0 X +datafusion-physical-optimizer@53.1.0 X +datafusion-physical-plan@53.1.0 X +datafusion-pruning@53.1.0 X +datafusion-session@53.1.0 X +datafusion-sql@53.1.0 X +deranged@0.5.8 X X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +digest@0.11.3 X X +dirs@6.0.0 X X +dirs-sys@0.5.0 X X +displaydoc@0.2.5 X X +dissimilar@1.0.11 X +dlv-list@0.5.2 X X +dunce@1.0.5 X X X +either@1.15.0 X X +endian-type@0.1.2 X +env_filter@1.0.0 X X +env_logger@0.11.9 X X +equivalent@1.0.2 X X +erased-serde@0.4.10 X X +errno@0.3.14 X X +error-code@3.3.2 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastnum@0.7.4 X X +fastrand@2.3.0 X X +fd-lock@4.0.4 X X +find-msvc-tools@0.1.9 X X +fixedbitset@0.5.7 X X +flatbuffers@25.12.19 X +flate2@1.1.9 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +foldhash@0.2.0 X +form_urlencoded@1.2.2 X X +fs-err@3.3.0 X X +fs_extra@1.3.0 X +futures@0.3.32 X X +futures-channel@0.3.32 X X +futures-core@0.3.32 X X +futures-executor@0.3.32 X X +futures-io@0.3.32 X X +futures-macro@0.3.32 X X +futures-sink@0.3.32 X X +futures-task@0.3.32 X X +futures-util@0.3.32 X X +generic-array@0.14.7 X +getrandom@0.2.17 X X +getrandom@0.3.4 X X +getrandom@0.4.2 X X +ghash@0.5.1 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.13 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +hashbrown@0.17.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@0.2.12 X X +http@1.4.0 X X +http-body@0.4.6 X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hybrid-array@0.4.12 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.20 X +iana-time-zone@0.1.65 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.9.0 X +iceberg-catalog-rest@0.9.0 X +iceberg-datafusion@0.9.0 X +iceberg-playground@0.9.0 X +iceberg-storage-opendal@0.9.0 X +iceberg_test_utils@0.9.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.2 X +icu_properties_data@2.1.2 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.13.0 X X +inout@0.1.4 X X +integer-encoding@3.0.4 X +inventory@0.3.22 X X +ipnet@2.12.0 X X +iri-string@0.7.11 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.18 X X +jiff@0.2.23 X X +jiff-tzdb@0.1.6 X X +jiff-tzdb-platform@0.1.3 X X +jni@0.22.4 X X +jni-macros@0.22.4 X X +jni-sys@0.4.1 X X +jni-sys-macros@0.4.1 X X +jobserver@0.1.34 X X +js-sys@0.3.91 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.183 X X +liblzma@0.4.6 X X +liblzma-sys@0.4.5 X X +libm@0.2.16 X +libmimalloc-sys@0.1.44 X +libredox@0.1.14 X +link-section@0.18.1 X X +linktime-proc-macro@0.2.0 X X +linux-raw-sys@0.12.1 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.13.0 X +md-5@0.10.6 X X +md-5@0.11.0 X X +mea@0.6.3 X +memchr@2.8.0 X X +mimalloc@0.1.48 X +miniz_oxide@0.8.9 X X X +mio@1.2.0 X +moka@0.12.15 X X +murmur3@0.5.2 X X +nibble_vec@0.1.0 X +nix@0.30.1 X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-conv@0.2.0 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.37.3 X X +object_store@0.13.2 X X +once_cell@1.21.4 X X +once_cell_polyfill@1.70.2 X X +opaque-debug@0.3.1 X X +opendal@0.57.0 X +opendal-core@0.57.0 X +opendal-layer-concurrent-limit@0.57.0 X +opendal-layer-logging@0.57.0 X +opendal-layer-retry@0.57.0 X +opendal-layer-timeout@0.57.0 X +opendal-service-fs@0.57.0 X +opendal-service-s3@0.57.0 X +openssl-probe@0.2.1 X X +option-ext@0.2.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +ordered-multimap@0.7.3 X +outref@0.5.2 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@58.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.17 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +polyval@0.6.2 X X +portable-atomic@1.13.1 X X +portable-atomic-util@0.2.6 X X +potential_utf@0.1.4 X +powerfmt@0.2.0 X X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro2@1.0.106 X X +psm@0.1.30 X X +quad-rand@0.2.3 X +quick-xml@0.39.4 X +quote@1.0.45 X X +r-efi@5.3.0 X X X +r-efi@6.0.0 X X X +radix_trie@0.2.1 X +rand@0.10.1 X X +rand@0.9.4 X X +rand_chacha@0.9.0 X X +rand_core@0.10.0 X X +rand_core@0.6.4 X X +rand_core@0.9.5 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +redox_users@0.5.2 X +regex@1.12.3 X X +regex-automata@0.4.14 X X +regex-lite@0.1.9 X X +regex-syntax@0.8.10 X X +reqsign-aws-v4@3.0.0 X +reqsign-core@3.0.0 X +reqsign-file-read-tokio@3.0.0 X +reqwest@0.12.28 X X +reqwest@0.13.3 X X +ring@0.17.14 X X +roaring@0.11.3 X X +rust-ini@0.21.3 X +rustc_version@0.4.1 X X +rustix@1.1.4 X X X +rustls@0.23.37 X X X +rustls-native-certs@0.8.3 X X X +rustls-pki-types@1.14.0 X X +rustls-platform-verifier@0.7.0 X X +rustls-platform-verifier-android@0.1.1 X X +rustls-webpki@0.103.13 X +rustversion@1.0.22 X X +rustyline@17.0.2 X +ryu@1.0.23 X X +same-file@1.0.6 X X +schannel@0.1.29 X +scopeguard@1.2.0 X X +security-framework@3.7.0 X X +security-framework-sys@2.17.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde-big-array@0.5.1 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.149 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_urlencoded@0.7.1 X X +serde_with@3.21.0 X X +serde_with_macros@3.21.0 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +signal-hook-registry@1.4.8 X X +simd-adler32@0.3.8 X +simd_cesu8@1.1.1 X X +simdutf8@0.1.5 X X +siphasher@1.0.2 X X +slab@0.4.12 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.3 X X +sqlparser@0.61.0 X +sqlparser_derive@0.5.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.23 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.117 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.27.0 X X +thiserror@2.0.18 X X +thiserror-impl@2.0.18 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +time@0.3.47 X X +time-core@0.1.8 X X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.52.1 X +tokio-macros@2.7.0 X +tokio-rustls@0.26.4 X X +tokio-stream@0.1.18 X +tokio-util@0.7.18 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_edit@0.22.27 X X +toml_write@0.1.2 X X +tower@0.5.3 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.44 X +tracing-attributes@0.1.31 X +tracing-core@0.1.36 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.23 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typeid@1.0.3 X X +typenum@1.20.1 X X +typetag@0.2.21 X X +typetag-impl@0.2.21 X X +unicode-ident@1.0.24 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +universal-hash@0.5.1 X X +untrusted@0.9.0 X +url@2.5.8 X X +urlencoding@2.1.3 X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.23.0 X X +version_check@0.9.5 X X +vsimd@0.8.0 X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.2+wasi-0.2.9 X X X +wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06 X X X +wasm-bindgen@0.2.114 X X +wasm-bindgen-futures@0.4.64 X X +wasm-bindgen-macro@0.2.114 X X +wasm-bindgen-macro-support@0.2.114 X X +wasm-bindgen-shared@0.2.114 X X +wasm-streams@0.4.2 X X +wasm-streams@0.5.0 X X +web-sys@0.3.91 X X +web-time@1.1.0 X X +webpki-root-certs@1.0.7 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.48.0 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.48.5 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.48.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.48.5 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.48.5 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.48.5 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.48.5 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.48.5 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.48.5 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.15 X +wit-bindgen@0.51.0 X X X +writeable@0.6.2 X +xattr@1.6.1 X X +xmlparser@0.13.6 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.47 X X X +zerocopy-derive@0.8.47 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.6.3 X +zmij@1.0.21 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X