diff --git a/Cargo.lock b/Cargo.lock index d380a3d6229..75f6b09d74d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -718,7 +718,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -1760,6 +1760,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "daachorse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36" + [[package]] name = "darling" version = "0.23.0" @@ -4728,6 +4734,12 @@ dependencies = [ "glob", ] +[[package]] +name = "jetscii" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e" + [[package]] name = "jiff" version = "0.2.22" @@ -6849,7 +6861,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.11.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -6881,7 +6893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -10120,10 +10132,18 @@ dependencies = [ name = "vortex-fsst" version = "0.1.0" dependencies = [ + "aho-corasick", + "arrow-array", + "arrow-schema", "codspeed-divan-compat", + "daachorse", "fsst-rs", + "jetscii", + "memchr", + "parquet", "prost 0.14.3", "rand 0.9.2", + "regex-automata", "rstest", "vortex-array", "vortex-buffer", diff --git a/Cargo.toml b/Cargo.toml index 0da5ee805ba..59d8bb09363 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,7 @@ rust-version = "1.90" version = "0.1.0" [workspace.dependencies] +aho-corasick = "1.1.3" anyhow = "1.0.97" arbitrary = "1.3.2" arc-swap = "1.8" @@ -121,6 +122,7 @@ cudarc = { version = "0.18.2", features = [ "cuda-12050", ] } custom-labels = "0.4.4" +daachorse = "1.0.0" dashmap = "6.1.0" datafusion = { version = "52", default-features = false, features = ["sql"] } datafusion-catalog = { version = "52" } @@ -155,6 +157,7 @@ indicatif = "0.18.0" insta = "1.43" inventory = "0.3.20" itertools = "0.14.0" +jetscii = "0.5.3" jiff = "0.2.0" kanal = "0.1.1" lending-iterator = "0.1.7" @@ -163,6 +166,7 @@ libloading = "0.8" liblzma = "0.4" log = { version = "0.4.21" } loom = { version = "0.7", features = ["checkpoint"] } +memchr = "2.8.0" memmap2 = "0.9.5" mimalloc = "0.1.42" moka = { version = "0.12.10", default-features = false } @@ -196,6 +200,7 @@ rand = "0.9.0" rand_distr = "0.5" ratatui = { version = "0.30", default-features = false } regex = "1.11.0" +regex-automata = "0.4" reqwest = { version = "0.12.4", features = [ "charset", "http2", diff --git a/encodings/fsst/Cargo.toml b/encodings/fsst/Cargo.toml index 0dd5ce55a22..a733612609c 100644 --- a/encodings/fsst/Cargo.toml +++ b/encodings/fsst/Cargo.toml @@ -30,7 +30,15 @@ vortex-session = { workspace = true } _test-harness = ["dep:rand", "vortex-array/_test-harness"] [dev-dependencies] +aho-corasick = { workspace = true } +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +daachorse = { workspace = true } divan = { workspace = true } +jetscii = { workspace = true } +memchr = { workspace = true } +parquet = { workspace = true } +regex-automata = { workspace = true } rand = { workspace = true } rstest = { workspace = true } vortex-array = { workspace = true, features = ["_test-harness"] } @@ -39,6 +47,10 @@ vortex-array = { workspace = true, features = ["_test-harness"] } name = "fsst_compress" harness = false +[[bench]] +name = "fsst_contains" +harness = false + [[bench]] name = "fsst_url_compare" harness = false diff --git a/encodings/fsst/benches/fsst_contains.rs b/encodings/fsst/benches/fsst_contains.rs new file mode 100644 index 00000000000..187be73cd5b --- /dev/null +++ b/encodings/fsst/benches/fsst_contains.rs @@ -0,0 +1,3650 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![allow( + clippy::unwrap_used, + clippy::cast_possible_truncation, + clippy::missing_safety_doc +)] + +use aho_corasick::AhoCorasick; +use daachorse::DoubleArrayAhoCorasick; +use divan::Bencher; +use fsst::ESCAPE_CODE; +use fsst::Symbol; +use memchr::memmem; +use rand::Rng; +use rand::SeedableRng; +use rand::rngs::StdRng; +use regex_automata::dfa::regex::Regex as DfaRegex; +use vortex_array::ToCanonical; +use vortex_array::accessor::ArrayAccessor; +use vortex_array::arrays::VarBinArray; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::match_each_integer_ptype; +use vortex_buffer::BitBufferMut; +use vortex_fsst::FSSTArray; +use vortex_fsst::fsst_compress; +use vortex_fsst::fsst_train_compressor; + +fn main() { + divan::main(); +} + +// --------------------------------------------------------------------------- +// URL generator +// --------------------------------------------------------------------------- + +const DOMAINS: &[&str] = &[ + "google.com", + "facebook.com", + "github.com", + "stackoverflow.com", + "amazon.com", + "reddit.com", + "twitter.com", + "youtube.com", + "wikipedia.org", + "microsoft.com", + "apple.com", + "netflix.com", + "linkedin.com", + "cloudflare.com", + "google.co.uk", + "docs.google.com", + "mail.google.com", + "maps.google.com", + "news.ycombinator.com", + "arxiv.org", +]; + +const PATHS: &[&str] = &[ + "/index.html", + "/about", + "/search?q=vortex", + "/user/profile/settings", + "/api/v2/data", + "/blog/2024/post", + "/products/item/12345", + "/docs/reference/guide", + "/login", + "/dashboard/analytics", +]; + +fn generate_urls(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(42); + (0..n) + .map(|_| { + let scheme = if rng.random_bool(0.8) { + "https" + } else { + "http" + }; + let domain = DOMAINS[rng.random_range(0..DOMAINS.len())]; + let path = PATHS[rng.random_range(0..PATHS.len())]; + format!("{scheme}://{domain}{path}") + }) + .collect() +} + +fn make_fsst_urls(n: usize) -> FSSTArray { + let urls = generate_urls(n); + let varbin = VarBinArray::from_iter( + urls.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +// --------------------------------------------------------------------------- +// KMP helpers +// --------------------------------------------------------------------------- + +fn kmp_failure_table(needle: &[u8]) -> Vec { + let mut failure = vec![0usize; needle.len()]; + let mut k = 0; + for i in 1..needle.len() { + while k > 0 && needle[k] != needle[i] { + k = failure[k - 1]; + } + if needle[k] == needle[i] { + k += 1; + } + failure[i] = k; + } + failure +} + +fn kmp_byte_transitions(needle: &[u8]) -> Vec { + let n_states = needle.len() + 1; + let accept = needle.len() as u16; + let failure = kmp_failure_table(needle); + + let mut table = vec![0u16; n_states * 256]; + for state in 0..n_states { + for byte in 0..256u16 { + if state == needle.len() { + table[state * 256 + byte as usize] = accept; + continue; + } + let mut s = state; + loop { + if byte as u8 == needle[s] { + s += 1; + break; + } + if s == 0 { + break; + } + s = failure[s - 1]; + } + table[state * 256 + byte as usize] = s as u16; + } + } + table +} + +// --------------------------------------------------------------------------- +// Approach 1: Original split-table DFA (baseline from production code) +// --------------------------------------------------------------------------- + +struct SplitTableDfa { + symbol_transitions: Vec, + escape_transitions: Vec, + n_symbols: usize, + accept_state: u16, +} + +impl SplitTableDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let n_symbols = symbols.len(); + let accept_state = needle.len() as u16; + let n_states = needle.len() + 1; + + let byte_table = kmp_byte_transitions(needle); + + let mut symbol_transitions = vec![0u16; n_states * n_symbols]; + for state in 0..n_states { + for code in 0..n_symbols { + if state as u16 == accept_state { + symbol_transitions[state * n_symbols + code] = accept_state; + continue; + } + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + symbol_transitions[state * n_symbols + code] = s; + } + } + + Self { + symbol_transitions, + escape_transitions: byte_table, + n_symbols, + accept_state, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u16; + let mut pos = 0; + while pos < codes.len() { + if state == self.accept_state { + return true; + } + let code = codes[pos]; + pos += 1; + if code == ESCAPE_CODE { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = self.symbol_transitions[state as usize * self.n_symbols + code as usize]; + } + } + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// Approach 2: Fused 256-entry table (unified lookup, sentinel for escapes) +// --------------------------------------------------------------------------- + +struct FusedTableDfa { + transitions: Vec, + escape_transitions: Vec, + accept_state: u16, + escape_sentinel: u16, +} + +impl FusedTableDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let n_symbols = symbols.len(); + let accept_state = needle.len() as u16; + let n_states = needle.len() + 1; + let escape_sentinel = n_states as u16 + 1; + + let byte_table = kmp_byte_transitions(needle); + + let mut symbol_transitions = vec![0u16; n_states * n_symbols]; + for state in 0..n_states { + for code in 0..n_symbols { + if state as u16 == accept_state { + symbol_transitions[state * n_symbols + code] = accept_state; + continue; + } + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + symbol_transitions[state * n_symbols + code] = s; + } + } + + let mut transitions = vec![0u16; n_states * 256]; + for state in 0..n_states { + for code in 0..n_symbols { + transitions[state * 256 + code] = symbol_transitions[state * n_symbols + code]; + } + transitions[state * 256 + ESCAPE_CODE as usize] = escape_sentinel; + } + + Self { + transitions, + escape_transitions: byte_table, + accept_state, + escape_sentinel, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u16; + let mut pos = 0; + while pos < codes.len() { + if state == self.accept_state { + return true; + } + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + state == self.accept_state + } + + /// No early exit — skip the accept_state check inside the loop. + /// Only check at the end. The accept state is sticky (transitions to itself), + /// so final state == accept means we matched at some point. + #[inline] + fn matches_no_early_exit(&self, codes: &[u8]) -> bool { + let mut state = 0u16; + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + state == self.accept_state + } + + /// Unsafe variant — eliminates bounds checks on table lookups. + #[inline] + unsafe fn matches_unchecked(&self, codes: &[u8]) -> bool { + unsafe { + let mut state = 0u16; + let mut pos = 0; + let transitions = self.transitions.as_ptr(); + let escape_transitions = self.escape_transitions.as_ptr(); + let len = codes.len(); + let codes_ptr = codes.as_ptr(); + + while pos < len { + if state == self.accept_state { + return true; + } + let code = *codes_ptr.add(pos); + pos += 1; + let next = *transitions.add(state as usize * 256 + code as usize); + if next == self.escape_sentinel { + if pos >= len { + return false; + } + let b = *codes_ptr.add(pos); + pos += 1; + state = *escape_transitions.add(state as usize * 256 + b as usize); + } else { + state = next; + } + } + state == self.accept_state + } + } + + /// No early exit + unsafe bounds elimination. + #[inline] + unsafe fn matches_no_exit_unchecked(&self, codes: &[u8]) -> bool { + unsafe { + let mut state = 0u16; + let mut pos = 0; + let transitions = self.transitions.as_ptr(); + let escape_transitions = self.escape_transitions.as_ptr(); + let len = codes.len(); + let codes_ptr = codes.as_ptr(); + + while pos < len { + let code = *codes_ptr.add(pos); + pos += 1; + let next = *transitions.add(state as usize * 256 + code as usize); + if next == self.escape_sentinel { + if pos >= len { + return false; + } + let b = *codes_ptr.add(pos); + pos += 1; + state = *escape_transitions.add(state as usize * 256 + b as usize); + } else { + state = next; + } + } + state == self.accept_state + } + } +} + +// --------------------------------------------------------------------------- +// Approach 3: Fused u32 table for SIMD gather (process 8 strings at once) +// --------------------------------------------------------------------------- + +#[cfg(target_arch = "x86_64")] +struct SimdGatherDfa { + /// u32 transition table, 256 entries per state. + transitions: Vec, + /// u32 escape transition table, 256 entries per state. + escape_transitions: Vec, + accept_state: u32, + escape_sentinel: u32, +} + +#[cfg(target_arch = "x86_64")] +impl SimdGatherDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + + Self { + transitions: fused.transitions.iter().map(|&v| v as u32).collect(), + escape_transitions: fused.escape_transitions.iter().map(|&v| v as u32).collect(), + accept_state: fused.accept_state as u32, + escape_sentinel: fused.escape_sentinel as u32, + } + } + + /// Scalar fallback using the u32 tables. + #[inline] + fn matches_scalar(&self, codes: &[u8]) -> bool { + let mut state = 0u32; + let mut pos = 0; + while pos < codes.len() { + if state == self.accept_state { + return true; + } + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + state == self.accept_state + } + + /// Process 8 strings simultaneously using AVX2 gather for transition lookups. + /// + /// Each iteration loads one code byte from each of 8 strings, computes + /// table indices, and uses VPGATHERDD to fetch 8 transitions at once. + #[cfg(target_feature = "avx2")] + #[inline] + unsafe fn matches_8_avx2( + &self, + all_bytes: &[u8], + starts: &[usize; 8], + ends: &[usize; 8], + ) -> [bool; 8] { + unsafe { + let transitions_ptr = self.transitions.as_ptr() as *const i32; + let escape_ptr = self.escape_transitions.as_ptr() as *const i32; + let bytes_ptr = all_bytes.as_ptr(); + let accept = self.accept_state; + let sentinel = self.escape_sentinel; + + let mut states = [0u32; 8]; + let mut pos: [usize; 8] = *starts; + let mut done = [false; 8]; + + loop { + let mut any_active = false; + + for k in 0..8 { + if done[k] { + continue; + } + if pos[k] >= ends[k] { + done[k] = true; + continue; + } + any_active = true; + + let code = *bytes_ptr.add(pos[k]); + pos[k] += 1; + let next = + *transitions_ptr.add(states[k] as usize * 256 + code as usize) as u32; + if next == sentinel { + if pos[k] >= ends[k] { + done[k] = true; + continue; + } + let b = *bytes_ptr.add(pos[k]); + pos[k] += 1; + states[k] = *escape_ptr.add(states[k] as usize * 256 + b as usize) as u32; + } else { + states[k] = next; + } + if states[k] == accept { + done[k] = true; + } + } + if !any_active { + break; + } + } + + std::array::from_fn(|k| states[k] == accept) + } + } +} + +// --------------------------------------------------------------------------- +// Approach 4: Branchless escape handling via combined table +// Instead of branching on escape sentinel, use a "code_advance" table that +// tells how many bytes to consume (1 for normal, 2 for escape), and a +// combined table that gives the right state for both cases. +// --------------------------------------------------------------------------- + +struct BranchlessEscapeDfa { + /// For each (state, first_byte, second_byte) triple, the next state. + /// But 256*256 per state is too large. Instead: + /// For non-escape codes: transitions[state * 256 + code] gives next state. + /// For escape code: transitions[state * 256 + 255] is unused; we use + /// escape_transitions[state * 256 + literal_byte]. + /// + /// The branchless trick: always read the next byte (speculatively). + /// Use a conditional move to select between the normal and escape path. + transitions: Vec, + escape_transitions: Vec, + /// 1 for normal codes, 2 for ESCAPE_CODE. + code_advance: [u8; 256], + accept_state: u16, +} + +impl BranchlessEscapeDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + + let mut code_advance = [1u8; 256]; + code_advance[ESCAPE_CODE as usize] = 2; + + Self { + transitions: fused.transitions, + escape_transitions: fused.escape_transitions, + code_advance, + accept_state: fused.accept_state, + } + } + + /// Branchless escape handling: speculatively read the next byte and + /// select between normal and escape transitions using conditional ops. + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + if codes.is_empty() { + return self.accept_state == 0; + } + let mut state = 0u16; + let mut pos = 0; + let len = codes.len(); + + while pos < len { + let code = codes[pos]; + let advance = self.code_advance[code as usize] as usize; + + // Speculatively read the next byte (needed for escapes). + // For non-escape codes this read is wasted but harmless. + let next_byte = if pos + 1 < len { codes[pos + 1] } else { 0 }; + + let normal_next = self.transitions[state as usize * 256 + code as usize]; + let escape_next = self.escape_transitions[state as usize * 256 + next_byte as usize]; + + // Select: if this is an escape code, use escape_next; otherwise normal_next. + let is_escape = code == ESCAPE_CODE; + state = if is_escape { escape_next } else { normal_next }; + + pos += advance; + + if state == self.accept_state { + return true; + } + } + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// Approach 5: u8 state table — halve table size (u16→u8) since states fit in +// a byte. Smaller tables = better cache utilization. +// --------------------------------------------------------------------------- + +struct CompactDfa { + /// u8 transitions, 256 entries per state. + transitions: Vec, + escape_transitions: Vec, + accept_state: u8, + escape_sentinel: u8, +} + +impl CompactDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + Self { + transitions: fused.transitions.iter().map(|&v| v as u8).collect(), + escape_transitions: fused.escape_transitions.iter().map(|&v| v as u8).collect(), + accept_state: fused.accept_state as u8, + escape_sentinel: fused.escape_sentinel as u8, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + if state == self.accept_state { + return true; + } + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + state == self.accept_state + } + + #[inline] + fn matches_no_early_exit(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + state == self.accept_state + } + + /// Unsafe no-exit variant. + #[inline] + unsafe fn matches_no_exit_unchecked(&self, codes: &[u8]) -> bool { + unsafe { + let mut state = 0u8; + let mut pos = 0; + let transitions = self.transitions.as_ptr(); + let escape_transitions = self.escape_transitions.as_ptr(); + let len = codes.len(); + let codes_ptr = codes.as_ptr(); + + while pos < len { + let code = *codes_ptr.add(pos); + pos += 1; + let next = *transitions.add(state as usize * 256 + code as usize); + if next == self.escape_sentinel { + if pos >= len { + return false; + } + let b = *codes_ptr.add(pos); + pos += 1; + state = *escape_transitions.add(state as usize * 256 + b as usize); + } else { + state = next; + } + } + state == self.accept_state + } + } +} + +// --------------------------------------------------------------------------- +// Approach 6: Streaming scan — process the ENTIRE codes buffer in one pass, +// resetting state at string boundaries. Avoids per-string slice overhead +// and is friendlier to the hardware prefetcher. +// --------------------------------------------------------------------------- + +#[inline(never)] +#[allow(dead_code)] +fn streaming_scan_fused( + dfa: &FusedTableDfa, + all_bytes: &[u8], + offsets: &[usize], + n: usize, +) -> BitBufferMut { + BitBufferMut::collect_bool(n, |i| { + // The collect_bool closure is called sequentially for i=0..n. + // We rely on the sequential access pattern being prefetch-friendly. + let start = offsets[i]; + let end = offsets[i + 1]; + dfa.matches(&all_bytes[start..end]) + }) +} + +/// True streaming: single pass through all_bytes with offset-based reset. +#[inline(never)] +fn streaming_scan_continuous( + dfa: &CompactDfa, + all_bytes: &[u8], + offsets: &[usize], + n: usize, + out: &mut BitBufferMut, +) { + let mut string_idx = 0; + let mut state = 0u8; + let mut next_boundary = offsets[1]; + let mut matched = false; + + let mut pos = offsets[0]; + let total_end = offsets[n]; + + while pos < total_end { + // Check if we've crossed into a new string. + while pos >= next_boundary { + // Record result for the just-finished string. + if matched || state == dfa.accept_state { + out.set(string_idx); + } + string_idx += 1; + if string_idx >= n { + return; + } + state = 0; + matched = false; + next_boundary = offsets[string_idx + 1]; + } + + let code = all_bytes[pos]; + pos += 1; + let next = dfa.transitions[state as usize * 256 + code as usize]; + if next == dfa.escape_sentinel { + if pos < next_boundary { + let b = all_bytes[pos]; + pos += 1; + state = dfa.escape_transitions[state as usize * 256 + b as usize]; + } + } else { + state = next; + } + if state == dfa.accept_state { + matched = true; + } + } + + // Handle the last string. + if string_idx < n && (matched || state == dfa.accept_state) { + out.set(string_idx); + } +} + +// --------------------------------------------------------------------------- +// Approach 7: Prefilter — build a bitmask of codes that could possibly +// contribute to matching the needle. Skip DFA for strings where no code +// belongs to that set. +// --------------------------------------------------------------------------- + +struct PrefilterDfa { + inner: CompactDfa, + /// For each code byte (0..255), true if that code could produce any byte + /// present in the needle (i.e., the symbol's bytes intersect needle's bytes). + relevant_codes: [bool; 256], +} + +impl PrefilterDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let inner = CompactDfa::new(symbols, symbol_lengths, needle); + + // Build set of bytes that appear in the needle. + let mut needle_bytes = [false; 256]; + for &b in needle { + needle_bytes[b as usize] = true; + } + + // For each symbol code, check if any of its bytes appear in the needle. + let mut relevant_codes = [false; 256]; + for (code, (sym, &sym_len)) in symbols.iter().zip(symbol_lengths.iter()).enumerate() { + let sym_bytes = sym.to_u64().to_le_bytes(); + for &b in &sym_bytes[..sym_len as usize] { + if needle_bytes[b as usize] { + relevant_codes[code] = true; + break; + } + } + } + // Escape code is always relevant (literal bytes could be anything). + relevant_codes[ESCAPE_CODE as usize] = true; + + Self { + inner, + relevant_codes, + } + } + + /// Quick check: does this code sequence contain any code that could + /// contribute to the needle match? + #[inline] + fn could_match(&self, codes: &[u8]) -> bool { + codes.iter().any(|&c| self.relevant_codes[c as usize]) + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + if !self.could_match(codes) { + return false; + } + self.inner.matches(codes) + } + + #[inline] + fn matches_no_early_exit(&self, codes: &[u8]) -> bool { + if !self.could_match(codes) { + return false; + } + self.inner.matches_no_early_exit(codes) + } +} + +// --------------------------------------------------------------------------- +// Approach 8: State-zero skip DFA — skip runs of codes that keep state=0. +// +// Precompute a 256-byte lookup: for each code byte, does transitioning from +// state 0 stay in state 0? If so, that code is "trivial" and can be skipped. +// Process codes in chunks: scan for the first non-trivial code, then run +// the scalar DFA from there. This is most effective when the needle is rare +// (most codes are trivial), which is the common case for selective predicates. +// --------------------------------------------------------------------------- + +struct StateZeroSkipDfa { + inner: CompactDfa, + /// For each code byte (0..255), true if it keeps state 0 → state 0. + trivial: [bool; 256], +} + +impl StateZeroSkipDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let inner = CompactDfa::new(symbols, symbol_lengths, needle); + + let mut trivial = [false; 256]; + for code in 0..256 { + // A code is trivial if from state 0 it goes back to state 0 + // and it's not the escape sentinel. + let next = inner.transitions[code]; // state 0 * 256 + code + trivial[code] = next == 0 && code as u8 != ESCAPE_CODE; + } + + Self { inner, trivial } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + // Skip leading trivial codes. + let mut start = 0; + while start < codes.len() && self.trivial[codes[start] as usize] { + start += 1; + } + if start == codes.len() { + return self.inner.accept_state == 0; + } + // Run the DFA from the first non-trivial code. + self.inner.matches_no_early_exit(&codes[start..]) + } +} + +// --------------------------------------------------------------------------- +// Approach 9: Shift-based DFA — pack all state transitions into a u64. +// +// For a DFA with S ≤ 21 states (3 bits each fit in 63 bits of a u64), +// we store the transitions for ALL states for a given input byte in one u64. +// Transition: next_state = (table[code_byte] >> (state * BITS)) & MASK +// +// The key advantage: the table load depends only on code_byte (known from +// the input stream), NOT on the current state. This breaks the load-use +// dependency chain that makes traditional table-lookup DFAs slow (~4 cycle +// L1 latency per transition). With the shift-based approach, the table +// value can be loaded while the previous transition's shift is executing. +// --------------------------------------------------------------------------- + +struct ShiftDfa { + /// For each code byte (0..255): a u64 packing all state transitions. + /// Bits [state*3 .. state*3+3) encode the next state for that input. + transitions: [u64; 256], + /// Same layout for escape byte transitions. + escape_transitions: [u64; 256], + accept_state: u8, + escape_sentinel: u8, +} + +impl ShiftDfa { + const BITS: u32 = 4; // bits per state (supports up to 16 states = 2^4) + const MASK: u64 = (1 << Self::BITS) - 1; + + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + assert!( + needle.len() + 2 <= (1 << Self::BITS), + "needle too long for 4-bit states (max 14 chars)" + ); + + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + + // Pack the fused u16 transitions into u64 shift tables. + let n_states = needle.len() + 1; + let escape_sentinel_u8 = fused.escape_sentinel as u8; + + let mut transitions = [0u64; 256]; + let mut escape_transitions = [0u64; 256]; + + for code_byte in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + let next = fused.transitions[state * 256 + code_byte]; + // Map the escape sentinel to a value that fits in 3 bits. + let next_u8 = if next == fused.escape_sentinel { + escape_sentinel_u8 + } else { + next as u8 + }; + packed |= (next_u8 as u64) << (state as u32 * Self::BITS); + } + transitions[code_byte] = packed; + } + + for byte_val in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + let next = fused.escape_transitions[state * 256 + byte_val] as u8; + packed |= (next as u64) << (state as u32 * Self::BITS); + } + escape_transitions[byte_val] = packed; + } + + Self { + transitions, + escape_transitions, + accept_state: fused.accept_state as u8, + escape_sentinel: escape_sentinel_u8, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + if state == self.accept_state { + return true; + } + let code = codes[pos]; + pos += 1; + // The table load depends only on `code`, not on `state`. + // The shift depends on `state` but is a fast register op. + let packed = self.transitions[code as usize]; + let next = ((packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + let esc_packed = self.escape_transitions[b as usize]; + state = ((esc_packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + } else { + state = next; + } + } + state == self.accept_state + } + + #[inline] + fn matches_no_early_exit(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + let packed = self.transitions[code as usize]; + let next = ((packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + let esc_packed = self.escape_transitions[b as usize]; + state = ((esc_packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + } else { + state = next; + } + } + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// Hybrid 1: Prefilter + ShiftDfa — skip strings with no relevant codes, +// then use the fastest DFA (ShiftDfa) for survivors. +// --------------------------------------------------------------------------- + +struct PrefilterShiftDfa { + inner: ShiftDfa, + relevant_codes: [bool; 256], +} + +impl PrefilterShiftDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let inner = ShiftDfa::new(symbols, symbol_lengths, needle); + + let mut needle_bytes = [false; 256]; + for &b in needle { + needle_bytes[b as usize] = true; + } + + let mut relevant_codes = [false; 256]; + for (code, (sym, &sym_len)) in symbols.iter().zip(symbol_lengths.iter()).enumerate() { + let sym_bytes = sym.to_u64().to_le_bytes(); + for &b in &sym_bytes[..sym_len as usize] { + if needle_bytes[b as usize] { + relevant_codes[code] = true; + break; + } + } + } + relevant_codes[ESCAPE_CODE as usize] = true; + + Self { + inner, + relevant_codes, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + if !codes.iter().any(|&c| self.relevant_codes[c as usize]) { + return false; + } + self.inner.matches_no_early_exit(codes) + } +} + +// --------------------------------------------------------------------------- +// Hybrid 2: StateZero skip + ShiftDfa — skip leading trivial codes, +// then use ShiftDfa for the remainder. +// --------------------------------------------------------------------------- + +struct StateZeroShiftDfa { + inner: ShiftDfa, + trivial: [bool; 256], +} + +impl StateZeroShiftDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let inner = ShiftDfa::new(symbols, symbol_lengths, needle); + + let mut trivial = [false; 256]; + for code in 0..256 { + let packed = inner.transitions[code]; + let next = (packed & ShiftDfa::MASK) as u8; + trivial[code] = next == 0 && code as u8 != ESCAPE_CODE; + } + + Self { inner, trivial } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut start = 0; + while start < codes.len() && self.trivial[codes[start] as usize] { + start += 1; + } + if start == codes.len() { + return self.inner.accept_state == 0; + } + self.inner.matches_no_early_exit(&codes[start..]) + } +} + +// --------------------------------------------------------------------------- +// Approach 9: Sheng DFA — use SSSE3 PSHUFB for transitions. +// +// The state is a byte position in an XMM register. For each input byte, +// we load a 16-byte shuffle mask and do PSHUFB(mask, state_vec). +// PSHUFB uses the low 4 bits of each byte lane as an index into the mask, +// producing the next state. With ≤16 states this is a single instruction. +// +// The shuffle mask load depends only on the input byte (not on state), +// so it can be loaded in parallel with the previous PSHUFB's execution. +// Throughput: ~1 byte/cycle (limited by PSHUFB throughput of 1/cycle on +// most microarchitectures). +// --------------------------------------------------------------------------- + +#[cfg(target_arch = "x86_64")] +struct ShengDfa { + /// 256 shuffle masks, one per possible input byte. + /// Each mask is 16 bytes: mask[i] = next_state when current state == i. + masks: Vec, + /// 256 escape masks for escaped byte values. + escape_masks: Vec, + accept_state: u8, + escape_sentinel: u8, +} + +#[cfg(target_arch = "x86_64")] +impl ShengDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + use std::arch::x86_64::_mm_set_epi8; + + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + let escape_sentinel = fused.escape_sentinel as u8; + + let mut masks = Vec::with_capacity(256); + let mut escape_masks = Vec::with_capacity(256); + + for code_byte in 0..256usize { + let mut mask_bytes = [0u8; 16]; + for state in 0..16 { + if state < needle.len() + 1 { + let next = fused.transitions[state * 256 + code_byte]; + mask_bytes[state] = if next == fused.escape_sentinel { + escape_sentinel + } else { + next as u8 + }; + } + } + masks.push(unsafe { + _mm_set_epi8( + mask_bytes[15] as i8, + mask_bytes[14] as i8, + mask_bytes[13] as i8, + mask_bytes[12] as i8, + mask_bytes[11] as i8, + mask_bytes[10] as i8, + mask_bytes[9] as i8, + mask_bytes[8] as i8, + mask_bytes[7] as i8, + mask_bytes[6] as i8, + mask_bytes[5] as i8, + mask_bytes[4] as i8, + mask_bytes[3] as i8, + mask_bytes[2] as i8, + mask_bytes[1] as i8, + mask_bytes[0] as i8, + ) + }); + } + + for byte_val in 0..256usize { + let mut mask_bytes = [0u8; 16]; + for state in 0..16 { + if state < needle.len() + 1 { + mask_bytes[state] = fused.escape_transitions[state * 256 + byte_val] as u8; + } + } + escape_masks.push(unsafe { + _mm_set_epi8( + mask_bytes[15] as i8, + mask_bytes[14] as i8, + mask_bytes[13] as i8, + mask_bytes[12] as i8, + mask_bytes[11] as i8, + mask_bytes[10] as i8, + mask_bytes[9] as i8, + mask_bytes[8] as i8, + mask_bytes[7] as i8, + mask_bytes[6] as i8, + mask_bytes[5] as i8, + mask_bytes[4] as i8, + mask_bytes[3] as i8, + mask_bytes[2] as i8, + mask_bytes[1] as i8, + mask_bytes[0] as i8, + ) + }); + } + + Self { + masks, + escape_masks, + accept_state: fused.accept_state as u8, + escape_sentinel, + } + } + + #[inline] + #[target_feature(enable = "ssse3")] + unsafe fn matches(&self, codes: &[u8]) -> bool { + use std::arch::x86_64::_mm_extract_epi8; + use std::arch::x86_64::_mm_set1_epi8; + use std::arch::x86_64::_mm_shuffle_epi8; + + unsafe { + let mut state_vec = _mm_set1_epi8(0); + let mut pos = 0; + + while pos < codes.len() { + let cur_state = _mm_extract_epi8::<0>(state_vec) as u8; + if cur_state == self.accept_state { + return true; + } + + let code = codes[pos]; + pos += 1; + + // One PSHUFB: the mask load depends only on `code`, not state. + let next_vec = _mm_shuffle_epi8(self.masks[code as usize], state_vec); + let next_state = _mm_extract_epi8::<0>(next_vec) as u8; + + if next_state == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state_vec = _mm_shuffle_epi8(self.escape_masks[b as usize], state_vec); + } else { + state_vec = next_vec; + } + } + + _mm_extract_epi8::<0>(state_vec) as u8 == self.accept_state + } + } + + #[inline] + #[target_feature(enable = "ssse3")] + unsafe fn matches_no_early_exit(&self, codes: &[u8]) -> bool { + use std::arch::x86_64::_mm_extract_epi8; + use std::arch::x86_64::_mm_set1_epi8; + use std::arch::x86_64::_mm_shuffle_epi8; + + unsafe { + let mut state_vec = _mm_set1_epi8(0); + let mut pos = 0; + + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + + let next_vec = _mm_shuffle_epi8(self.masks[code as usize], state_vec); + let next_state = _mm_extract_epi8::<0>(next_vec) as u8; + + if next_state == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state_vec = _mm_shuffle_epi8(self.escape_masks[b as usize], state_vec); + } else { + state_vec = next_vec; + } + } + + _mm_extract_epi8::<0>(state_vec) as u8 == self.accept_state + } + } +} + +// --------------------------------------------------------------------------- +// Approach 10: Speculative/Enumerated DFA — run from ALL start states at once. +// +// For a DFA with S states and a code sequence of length L, we process codes +// sequentially but track S states simultaneously. Each "state" in our vector +// is the result of starting from a different initial state. After processing +// the full sequence, we look up the result for initial state 0. +// +// Why is this useful? It enables processing codes in independent chunks: +// each chunk can run in parallel, and results are chained by composing +// the state-to-state mappings. For small S this is very efficient. +// --------------------------------------------------------------------------- + +struct EnumeratedDfa { + /// For each (state, code_byte): next state. 256 entries per state. + transitions: Vec, + escape_transitions: Vec, + n_states: usize, + accept_state: u16, + escape_sentinel: u16, +} + +impl EnumeratedDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let fused = FusedTableDfa::new(symbols, symbol_lengths, needle); + Self { + transitions: fused.transitions, + escape_transitions: fused.escape_transitions, + n_states: needle.len() + 1, + accept_state: fused.accept_state, + escape_sentinel: fused.escape_sentinel, + } + } + + /// Process a single code sequence by tracking all possible start states. + /// Returns true if starting from state 0 reaches accept. + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + // For each possible start state, track where it ends up. + // state_map[s] = "if we started in state s, we'd now be in state state_map[s]" + let ns = self.n_states; + let mut state_map: [u16; 16] = [0; 16]; // supports up to 16 states + for s in 0..ns { + state_map[s] = s as u16; + } + + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + + let next_fn = self.transitions.as_ptr(); + let esc_fn = self.escape_transitions.as_ptr(); + + if code == ESCAPE_CODE { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + for s in 0..ns { + let cur = state_map[s]; + state_map[s] = unsafe { *esc_fn.add(cur as usize * 256 + b as usize) }; + } + } else { + for s in 0..ns { + let cur = state_map[s]; + let next = unsafe { *next_fn.add(cur as usize * 256 + code as usize) }; + state_map[s] = if next == self.escape_sentinel { + // shouldn't happen for non-escape codes + cur + } else { + next + }; + } + } + + // Early exit: if starting from state 0 we've already accepted + if state_map[0] == self.accept_state { + return true; + } + } + + state_map[0] == self.accept_state + } + + /// Chunked parallel version: split codes into chunks, process each chunk + #[allow(dead_code)] + /// to get a state mapping, then compose mappings. + #[inline] + fn matches_chunked(&self, codes: &[u8], chunk_size: usize) -> bool { + if codes.is_empty() { + return self.accept_state == 0; + } + + let ns = self.n_states; + + // Process the full sequence but in chunks, building state maps that + // could theoretically be parallelized. + let mut global_map: [u16; 16] = [0; 16]; + for s in 0..ns { + global_map[s] = s as u16; + } + + // We still process sequentially here but the structure allows future + // parallelization with rayon/SIMD on independent chunks. + let mut pos = 0; + while pos < codes.len() { + let chunk_end = (pos + chunk_size).min(codes.len()); + + // Build mapping for this chunk: for each start state, what's the end state? + let mut chunk_map: [u16; 16] = [0; 16]; + for start_state in 0..ns { + let mut state = start_state as u16; + let mut p = pos; + while p < chunk_end { + let code = codes[p]; + p += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if p >= chunk_end { + // Escape spans chunk boundary — just do the lookup + // with byte 0 as placeholder, will be corrected + break; + } + let b = codes[p]; + p += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + } + chunk_map[start_state] = state; + } + + // Compose: global_map = chunk_map(global_map) + let mut new_global: [u16; 16] = [0; 16]; + for s in 0..ns { + new_global[s] = chunk_map[global_map[s] as usize]; + } + global_map = new_global; + + pos = chunk_end; + } + + global_map[0] == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// Approach 6: Speculative multi-string — process multiple strings, each with +// early-exit SIMD checking across the batch after each code step. +// --------------------------------------------------------------------------- + +impl FusedTableDfa { + /// Process N strings at once. After each code step, check if ALL strings + /// have resolved (accepted or exhausted). Uses u16 states packed for + /// potential SIMD comparison. + #[inline] + fn matches_multi_early_exit( + &self, + all_bytes: &[u8], + starts: &[usize; N], + ends: &[usize; N], + ) -> [bool; N] { + let mut states = [0u16; N]; + let mut pos = *starts; + let mut resolved = 0u32; // bitmask of resolved strings + + let all_resolved = (1u32 << N) - 1; + + loop { + if resolved == all_resolved { + break; + } + + let mut any_progress = false; + for k in 0..N { + if resolved & (1 << k) != 0 { + continue; + } + if pos[k] >= ends[k] { + resolved |= 1 << k; + continue; + } + any_progress = true; + + let code = all_bytes[pos[k]]; + pos[k] += 1; + let next = self.transitions[states[k] as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos[k] >= ends[k] { + resolved |= 1 << k; + continue; + } + let b = all_bytes[pos[k]]; + pos[k] += 1; + states[k] = self.escape_transitions[states[k] as usize * 256 + b as usize]; + } else { + states[k] = next; + } + if states[k] == self.accept_state { + resolved |= 1 << k; + } + } + if !any_progress { + break; + } + } + + std::array::from_fn(|k| states[k] == self.accept_state) + } +} + +// --------------------------------------------------------------------------- +// Pre-extracted data for alloc-free benchmarking +// --------------------------------------------------------------------------- + +struct PreparedArray { + all_bytes: Vec, + offsets: Vec, + n: usize, +} + +impl PreparedArray { + fn from_fsst(array: &FSSTArray) -> Self { + let codes = array.codes(); + let offsets_prim = codes.offsets().to_primitive(); + let all_bytes = codes.bytes(); + let all_bytes = all_bytes.as_slice().to_vec(); + let n = codes.len(); + + let offsets: Vec = match_each_integer_ptype!(offsets_prim.ptype(), |T| { + offsets_prim + .as_slice::() + .iter() + .map(|&v| v as usize) + .collect() + }); + + Self { + all_bytes, + offsets, + n, + } + } +} + +// --------------------------------------------------------------------------- +// Benchmark helpers +// --------------------------------------------------------------------------- + +#[inline(never)] +fn run_split(dfa: &SplitTableDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +#[inline(never)] +fn run_fused(dfa: &FusedTableDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +#[inline(never)] +fn run_fused_no_exit(dfa: &FusedTableDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches_no_early_exit(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +#[inline(never)] +fn run_fused_unsafe(dfa: &FusedTableDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if unsafe { dfa.matches_unchecked(&prep.all_bytes[start..end]) } { + out.set(i); + } + } +} + +#[inline(never)] +fn run_fused_no_exit_unsafe(dfa: &FusedTableDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if unsafe { dfa.matches_no_exit_unchecked(&prep.all_bytes[start..end]) } { + out.set(i); + } + } +} + +#[inline(never)] +fn run_branchless(dfa: &BranchlessEscapeDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +#[cfg(target_arch = "x86_64")] +#[inline(never)] +fn run_simd_gather_8(dfa: &SimdGatherDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + let mut i = 0; + while i + 8 <= prep.n { + let starts: [usize; 8] = std::array::from_fn(|k| prep.offsets[i + k]); + let ends: [usize; 8] = std::array::from_fn(|k| prep.offsets[i + k + 1]); + + #[cfg(target_feature = "avx2")] + let results = unsafe { dfa.matches_8_avx2(&prep.all_bytes, &starts, &ends) }; + #[cfg(not(target_feature = "avx2"))] + let results = { + let mut r = [false; 8]; + for k in 0..8 { + r[k] = dfa.matches_scalar(&prep.all_bytes[starts[k]..ends[k]]); + } + r + }; + + for k in 0..8 { + if results[k] { + out.set(i + k); + } + } + i += 8; + } + // Remainder + while i < prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches_scalar(&prep.all_bytes[start..end]) { + out.set(i); + } + i += 1; + } +} + +#[inline(never)] +fn run_compact(dfa: &CompactDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +#[inline(never)] +fn run_prefilter(dfa: &PrefilterDfa, prep: &PreparedArray, out: &mut BitBufferMut) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + } +} + +fn bench_decompress(array: &FSSTArray, needle: &[u8], out: &mut Vec) { + out.clear(); + let decompressor = array.decompressor(); + array.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + decompressed.windows(needle.len()).any(|w| w == needle) + } + None => false, + })); + }); +} + +// --------------------------------------------------------------------------- +// Alloc-free decompress + match: reuse a buffer, inline the decompress logic. +// This measures pure decompress+search cost without per-string allocation. +// --------------------------------------------------------------------------- + +/// Decompress FSST codes into `buf`, returning the number of bytes written. +/// This avoids all allocation by writing into a caller-provided buffer. +#[inline] +fn decompress_into(codes: &[u8], symbols: &[Symbol], symbol_lengths: &[u8], buf: &mut Vec) { + buf.clear(); + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + if code == ESCAPE_CODE { + if pos < codes.len() { + buf.push(codes[pos]); + pos += 1; + } + } else { + let sym = symbols[code as usize].to_u64().to_le_bytes(); + let len = symbol_lengths[code as usize] as usize; + buf.extend_from_slice(&sym[..len]); + } + } +} + +/// Alloc-free decompress + sliding window match using PreparedArray. +/// Pre-allocates the decompression buffer once outside the benchmark loop. +#[inline(never)] +fn run_decompress_match( + prep: &PreparedArray, + symbols: &[Symbol], + symbol_lengths: &[u8], + needle: &[u8], + buf: &mut Vec, + out: &mut BitBufferMut, +) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + decompress_into(&prep.all_bytes[start..end], symbols, symbol_lengths, buf); + if buf.windows(needle.len()).any(|w| w == needle) { + out.set(i); + } + } +} + +/// Alloc-free decompress + memmem match using PreparedArray. +#[inline(never)] +fn run_decompress_memmem( + prep: &PreparedArray, + symbols: &[Symbol], + symbol_lengths: &[u8], + needle: &[u8], + buf: &mut Vec, + out: &mut BitBufferMut, +) { + let finder = memmem::Finder::new(needle); + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + decompress_into(&prep.all_bytes[start..end], symbols, symbol_lengths, buf); + if finder.find(buf).is_some() { + out.set(i); + } + } +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +const N: usize = 100_000; +const NEEDLE: &[u8] = b"google"; + +// --------------------------------------------------------------------------- +// ClickBench-style URL generator (longer, more realistic URLs with query +// params, fragments, UTM tracking, referrers, etc.) +// --------------------------------------------------------------------------- + +const CB_DOMAINS: &[&str] = &[ + "www.google.com", + "yandex.ru", + "mail.ru", + "vk.com", + "www.youtube.com", + "www.facebook.com", + "ok.ru", + "go.mail.ru", + "www.avito.ru", + "pogoda.yandex.ru", + "news.yandex.ru", + "maps.yandex.ru", + "market.yandex.ru", + "afisha.yandex.ru", + "auto.ru", + "www.kinopoisk.ru", + "www.ozon.ru", + "www.wildberries.ru", + "aliexpress.ru", + "lenta.ru", +]; + +const CB_PATHS: &[&str] = &[ + "/search", + "/catalog/electronics/smartphones", + "/product/item/123456789", + "/news/2024/03/15/article-about-technology", + "/user/profile/settings/notifications", + "/api/v2/catalog/search", + "/checkout/cart/summary", + "/blog/2024/how-to-optimize-database-queries-for-better-performance", + "/category/home-and-garden/furniture/tables", + "/", +]; + +const CB_PARAMS: &[&str] = &[ + "?utm_source=google&utm_medium=cpc&utm_campaign=spring_sale_2024&utm_content=banner_v2", + "?q=buy+smartphone+online+cheap+free+shipping&category=electronics&sort=price_asc&page=3", + "?ref=main_page_carousel_block_position_4&sessionid=abc123def456", + "?from=tabbar&clid=2270455&text=weather+forecast+tomorrow", + "?lr=213&msid=1234567890.12345&suggest_reqid=abcdef&csg=12345", + "", + "", + "", + "?page=1&per_page=20", + "?source=serp&forceshow=1", +]; + +const CB_FRAGMENTS: &[&str] = &[ + "", + "", + "", + "#section-reviews", + "#comments", + "#price-history", + "", + "", + "", + "", +]; + +fn generate_clickbench_urls(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(123); + (0..n) + .map(|_| { + let scheme = if rng.random_bool(0.7) { + "https" + } else { + "http" + }; + let domain = CB_DOMAINS[rng.random_range(0..CB_DOMAINS.len())]; + let path = CB_PATHS[rng.random_range(0..CB_PATHS.len())]; + let params = CB_PARAMS[rng.random_range(0..CB_PARAMS.len())]; + let fragment = CB_FRAGMENTS[rng.random_range(0..CB_FRAGMENTS.len())]; + format!("{scheme}://{domain}{path}{params}{fragment}") + }) + .collect() +} + +fn make_fsst_clickbench_urls(n: usize) -> FSSTArray { + let urls = generate_clickbench_urls(n); + let varbin = VarBinArray::from_iter( + urls.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +const CB_NEEDLE: &[u8] = b"yandex"; + +// --------------------------------------------------------------------------- +// Log lines generator (Apache/nginx-style access logs) +// --------------------------------------------------------------------------- + +const LOG_METHODS: &[&str] = &["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"]; +const LOG_PATHS: &[&str] = &[ + "/api/v1/users", + "/api/v2/products/search", + "/healthcheck", + "/static/js/app.bundle.min.js", + "/favicon.ico", + "/login", + "/dashboard/analytics", + "/api/v1/orders/12345/status", + "/graphql", + "/metrics", +]; +const LOG_STATUS: &[u16] = &[ + 200, 200, 200, 200, 200, 201, 301, 302, 400, 403, 404, 500, 502, +]; +const LOG_IPS: &[&str] = &[ + "192.168.1.1", + "10.0.0.42", + "172.16.0.100", + "203.0.113.50", + "198.51.100.23", + "8.8.8.8", + "1.1.1.1", + "74.125.200.100", + "151.101.1.69", + "93.184.216.34", +]; +const LOG_UAS: &[&str] = &[ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", + "curl/7.81.0", + "python-requests/2.28.1", + "Go-http-client/1.1", + "Googlebot/2.1 (+http://www.google.com/bot.html)", +]; + +fn generate_log_lines(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(456); + (0..n) + .map(|_| { + let ip = LOG_IPS[rng.random_range(0..LOG_IPS.len())]; + let method = LOG_METHODS[rng.random_range(0..LOG_METHODS.len())]; + let path = LOG_PATHS[rng.random_range(0..LOG_PATHS.len())]; + let status = LOG_STATUS[rng.random_range(0..LOG_STATUS.len())]; + let size = rng.random_range(100..50000); + let ua = LOG_UAS[rng.random_range(0..LOG_UAS.len())]; + format!( + r#"{ip} - - [15/Mar/2024:10:{:02}:{:02} +0000] "{method} {path} HTTP/1.1" {status} {size} "-" "{ua}""#, + rng.random_range(0..60u32), + rng.random_range(0..60u32), + ) + }) + .collect() +} + +fn make_fsst_log_lines(n: usize) -> FSSTArray { + let lines = generate_log_lines(n); + let varbin = VarBinArray::from_iter( + lines.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +const LOG_NEEDLE: &[u8] = b"Googlebot"; + +// --------------------------------------------------------------------------- +// JSON strings generator (typical API response payloads) +// --------------------------------------------------------------------------- + +const JSON_NAMES: &[&str] = &[ + "Alice", "Bob", "Charlie", "Diana", "Eve", "Frank", "Grace", "Hank", "Ivy", "Jack", +]; +const JSON_CITIES: &[&str] = &[ + "New York", + "London", + "Tokyo", + "Berlin", + "Sydney", + "Toronto", + "Paris", + "Mumbai", + "São Paulo", + "Seoul", +]; +const JSON_TAGS: &[&str] = &[ + "premium", + "verified", + "admin", + "moderator", + "subscriber", + "trial", + "enterprise", + "developer", +]; + +fn generate_json_strings(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(789); + (0..n) + .map(|_| { + let name = JSON_NAMES[rng.random_range(0..JSON_NAMES.len())]; + let city = JSON_CITIES[rng.random_range(0..JSON_CITIES.len())]; + let age = rng.random_range(18..80u32); + let tag1 = JSON_TAGS[rng.random_range(0..JSON_TAGS.len())]; + let tag2 = JSON_TAGS[rng.random_range(0..JSON_TAGS.len())]; + let id = rng.random_range(10000..99999u32); + format!( + r#"{{"id":{id},"name":"{name}","age":{age},"city":"{city}","tags":["{tag1}","{tag2}"],"active":true}}"# + ) + }) + .collect() +} + +fn make_fsst_json_strings(n: usize) -> FSSTArray { + let jsons = generate_json_strings(n); + let varbin = VarBinArray::from_iter( + jsons.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +const JSON_NEEDLE: &[u8] = b"enterprise"; + +// --------------------------------------------------------------------------- +// File paths generator (Unix-style paths with various depths) +// --------------------------------------------------------------------------- + +const PATH_ROOTS: &[&str] = &[ + "/home/user", + "/var/log", + "/etc", + "/usr/local/bin", + "/opt/app", + "/tmp", + "/srv/www", + "/data/warehouse", +]; +const PATH_DIRS: &[&str] = &[ + "src", + "build", + "dist", + "node_modules", + "target/release", + "config", + ".cache", + "logs/2024", + "backups/daily", + "migrations", +]; +const PATH_FILES: &[&str] = &[ + "main.rs", + "index.ts", + "config.yaml", + "Dockerfile", + "schema.sql", + "app.log", + "data.parquet", + "model.onnx", + "README.md", + "package.json", +]; + +fn generate_file_paths(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(321); + (0..n) + .map(|_| { + let root = PATH_ROOTS[rng.random_range(0..PATH_ROOTS.len())]; + let dir = PATH_DIRS[rng.random_range(0..PATH_DIRS.len())]; + let file = PATH_FILES[rng.random_range(0..PATH_FILES.len())]; + let depth = rng.random_range(0..3u32); + let mut path = format!("{root}/{dir}"); + for _ in 0..depth { + let subdir = PATH_DIRS[rng.random_range(0..PATH_DIRS.len())]; + path.push('/'); + path.push_str(subdir); + } + path.push('/'); + path.push_str(file); + path + }) + .collect() +} + +fn make_fsst_file_paths(n: usize) -> FSSTArray { + let paths = generate_file_paths(n); + let varbin = VarBinArray::from_iter( + paths.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +const PATH_NEEDLE: &[u8] = b"target/release"; + +// --------------------------------------------------------------------------- +// Email addresses generator +// --------------------------------------------------------------------------- + +const EMAIL_USERS: &[&str] = &[ + "john.doe", + "jane.smith", + "admin", + "support", + "no-reply", + "sales.team", + "dev+test", + "marketing", + "info", + "contact.us", +]; +const EMAIL_DOMAINS: &[&str] = &[ + "gmail.com", + "yahoo.com", + "outlook.com", + "company.io", + "example.org", + "mail.ru", + "protonmail.com", + "fastmail.com", + "icloud.com", + "hey.com", +]; + +fn generate_emails(n: usize) -> Vec { + let mut rng = StdRng::seed_from_u64(654); + (0..n) + .map(|_| { + let user = EMAIL_USERS[rng.random_range(0..EMAIL_USERS.len())]; + let domain = EMAIL_DOMAINS[rng.random_range(0..EMAIL_DOMAINS.len())]; + let suffix = rng.random_range(0..1000u32); + format!("{user}{suffix}@{domain}") + }) + .collect() +} + +fn make_fsst_emails(n: usize) -> FSSTArray { + let emails = generate_emails(n); + let varbin = VarBinArray::from_iter( + emails.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +const EMAIL_NEEDLE: &[u8] = b"gmail"; + +/// Macro to reduce boilerplate for DFA benchmarks with pre-allocated output. +macro_rules! dfa_bench { + ($name:ident, $dfa_ty:ident, $run_fn:ident) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = $dfa_ty::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + $run_fn(&dfa, &prep, &mut out); + }); + } + }; +} + +// 1. Split table (production baseline) +dfa_bench!(split_table, SplitTableDfa, run_split); + +// 2. Fused 256-wide table +dfa_bench!(fused_table, FusedTableDfa, run_fused); + +// 3. Fused table, no early exit on accept +dfa_bench!(fused_no_early_exit, FusedTableDfa, run_fused_no_exit); + +// 4. Fused table, unsafe (no bounds checks) +dfa_bench!(fused_unsafe, FusedTableDfa, run_fused_unsafe); + +// 5. Fused table, no early exit + unsafe +dfa_bench!( + fused_no_exit_unsafe, + FusedTableDfa, + run_fused_no_exit_unsafe +); + +// 6. Branchless escape handling +dfa_bench!(branchless_escape, BranchlessEscapeDfa, run_branchless); + +// 7. SIMD gather (8 strings at a time, u32 table) +#[cfg(target_arch = "x86_64")] +#[divan::bench] +fn simd_gather_8(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = SimdGatherDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_simd_gather_8(&dfa, &prep, &mut out); + }); +} + +// 8. Decompress then search (worst-case baseline, allocates per string) +#[divan::bench] +fn decompress_then_search(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, NEEDLE, &mut out); + }); +} + +// 8b. Alloc-free decompress + sliding window match +#[divan::bench] +fn decompress_no_alloc(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity(256); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_decompress_match( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + NEEDLE, + &mut buf, + &mut out, + ); + }); +} + +// 8c. Alloc-free decompress + memmem (SIMD substring search) +#[divan::bench] +fn decompress_no_alloc_memmem(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity(256); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_decompress_memmem( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + NEEDLE, + &mut buf, + &mut out, + ); + }); +} + +// 9. Chunk-of-64: match 64 strings, stack-alloc results, then pack bits. +// This aligns with collect_bool's internal 64-bit chunking. +#[divan::bench] +fn fused_chunk_64(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +// 10. Chunk-of-64 with unsafe matches. +#[divan::bench] +fn fused_chunk_64_unsafe(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_exit_unchecked(&prep.all_bytes[start..end]) } + }) + }); +} + +// 11. Compact u8 table (halved table size) +dfa_bench!(compact_table, CompactDfa, run_compact); + +// 12. Compact u8 + collect_bool +#[divan::bench] +fn compact_chunk_64(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = CompactDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +// 13. Compact u8 + collect_bool + unsafe +#[divan::bench] +fn compact_chunk_64_unsafe(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = CompactDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_exit_unchecked(&prep.all_bytes[start..end]) } + }) + }); +} + +// 14. Prefilter (skip strings with no relevant codes) +dfa_bench!(prefilter, PrefilterDfa, run_prefilter); + +// 15. Prefilter + collect_bool +#[divan::bench] +fn prefilter_chunk_64(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = PrefilterDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +// 16. Streaming continuous scan (single pass through all codes) +#[divan::bench] +fn streaming_continuous(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = CompactDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + streaming_scan_continuous(&dfa, &prep.all_bytes, &prep.offsets, prep.n, &mut out); + }); +} + +// 17. Shift-based DFA (u64 packed transitions) +#[divan::bench] +fn shift_dfa(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShiftDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +// 18. Shift-based DFA, no early exit +#[divan::bench] +fn shift_dfa_no_exit(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShiftDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +// 19. Sheng DFA (PSHUFB transitions) +#[cfg(target_arch = "x86_64")] +#[divan::bench] +fn sheng_dfa(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShengDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches(&prep.all_bytes[start..end]) } + }) + }); +} + +// 20. Sheng DFA, no early exit +#[cfg(target_arch = "x86_64")] +#[divan::bench] +fn sheng_dfa_no_exit(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShengDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_early_exit(&prep.all_bytes[start..end]) } + }) + }); +} + +// 21. Enumerated DFA (track all start states) +#[divan::bench] +fn enumerated_dfa(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = EnumeratedDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +// 12. Multi-string early exit with bitmask (8 at a time) +#[divan::bench] +fn fused_multi_early_exit_8(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + let mut i = 0; + while i + 8 <= prep.n { + let starts: [usize; 8] = std::array::from_fn(|k| prep.offsets[i + k]); + let ends: [usize; 8] = std::array::from_fn(|k| prep.offsets[i + k + 1]); + let results = dfa.matches_multi_early_exit(&prep.all_bytes, &starts, &ends); + for k in 0..8 { + if results[k] { + out.set(i + k); + } + } + i += 8; + } + while i < prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + if dfa.matches(&prep.all_bytes[start..end]) { + out.set(i); + } + i += 1; + } + }); +} + +// Aho-Corasick on decompressed data: decompress each string then search with aho-corasick +#[divan::bench] +fn aho_corasick_decompress(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let ac = AhoCorasick::new([NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + ac.is_match(&decompressed) + } + None => false, + })); + }); + out + }); +} + +// Aho-Corasick on raw (canonicalized) bytes: decompress the whole array up front, +// then search each string using aho-corasick's SIMD-accelerated search +#[divan::bench] +fn aho_corasick_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let ac = AhoCorasick::new([NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => ac.is_match(bytes), + None => false, + })); + }); + out + }); +} + +// 13. Original collect_bool approach (includes alloc) +#[divan::bench] +fn split_table_collect_bool(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = SplitTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +// --------------------------------------------------------------------------- +// ClickBench-style URL benchmarks (longer URLs with query params, fragments) +// --------------------------------------------------------------------------- + +#[divan::bench] +fn cb_split_table(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = SplitTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +#[divan::bench] +fn cb_fused_table(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +#[divan::bench] +fn cb_fused_chunk_64(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +#[divan::bench] +fn cb_fused_chunk_64_unsafe(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = FusedTableDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_exit_unchecked(&prep.all_bytes[start..end]) } + }) + }); +} + +#[divan::bench] +fn cb_shift_dfa(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShiftDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +#[cfg(target_arch = "x86_64")] +#[divan::bench] +fn cb_sheng_dfa(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = ShengDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_early_exit(&prep.all_bytes[start..end]) } + }) + }); +} + +#[divan::bench] +fn cb_compact_chunk_64_unsafe(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = CompactDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + unsafe { dfa.matches_no_exit_unchecked(&prep.all_bytes[start..end]) } + }) + }); +} + +#[divan::bench] +fn cb_prefilter_chunk_64(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = PrefilterDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +#[divan::bench] +fn cb_streaming_continuous(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = CompactDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + streaming_scan_continuous(&dfa, &prep.all_bytes, &prep.offsets, prep.n, &mut out); + }); +} + +#[divan::bench] +fn cb_decompress_then_search(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, CB_NEEDLE, &mut out); + }); +} + +#[divan::bench] +fn cb_decompress_no_alloc(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity(512); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_decompress_match( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + CB_NEEDLE, + &mut buf, + &mut out, + ); + }); +} + +#[divan::bench] +fn cb_decompress_no_alloc_memmem(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity(512); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_decompress_memmem( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + CB_NEEDLE, + &mut buf, + &mut out, + ); + }); +} + +#[divan::bench] +fn cb_aho_corasick_decompress(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let ac = AhoCorasick::new([CB_NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + ac.is_match(&decompressed) + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn cb_aho_corasick_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let ac = AhoCorasick::new([CB_NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => ac.is_match(bytes), + None => false, + })); + }); + out + }); +} + +// --------------------------------------------------------------------------- +// Benchmarks for additional data types (log lines, JSON, file paths, emails) +// --------------------------------------------------------------------------- + +/// Macro for benchmarks on a specific data generator + needle combo. +macro_rules! data_bench { + ($name:ident, $make_fn:ident, $needle:expr, $dfa_ty:ident, $match_method:ident) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = $dfa_ty::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + $needle, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.$match_method(&prep.all_bytes[start..end]) + }) + }); + } + }; +} + +// Log lines: long strings (~150 chars), low match rate for "Googlebot" +data_bench!( + log_split_table, + make_fsst_log_lines, + LOG_NEEDLE, + SplitTableDfa, + matches +); +data_bench!( + log_shift_dfa, + make_fsst_log_lines, + LOG_NEEDLE, + ShiftDfa, + matches_no_early_exit +); +data_bench!( + log_compact_no_exit, + make_fsst_log_lines, + LOG_NEEDLE, + CompactDfa, + matches_no_early_exit +); +data_bench!( + log_fused_no_exit, + make_fsst_log_lines, + LOG_NEEDLE, + FusedTableDfa, + matches_no_early_exit +); + +#[divan::bench] +fn log_decompress(bencher: Bencher) { + let fsst = make_fsst_log_lines(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, LOG_NEEDLE, &mut out); + }); +} + +// JSON strings: structured data (~80-100 chars), searching for "enterprise" +data_bench!( + json_split_table, + make_fsst_json_strings, + JSON_NEEDLE, + SplitTableDfa, + matches +); +data_bench!( + json_shift_dfa, + make_fsst_json_strings, + JSON_NEEDLE, + ShiftDfa, + matches_no_early_exit +); +data_bench!( + json_compact_no_exit, + make_fsst_json_strings, + JSON_NEEDLE, + CompactDfa, + matches_no_early_exit +); +data_bench!( + json_fused_no_exit, + make_fsst_json_strings, + JSON_NEEDLE, + FusedTableDfa, + matches_no_early_exit +); + +#[divan::bench] +fn json_decompress(bencher: Bencher) { + let fsst = make_fsst_json_strings(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, JSON_NEEDLE, &mut out); + }); +} + +// File paths: medium-length (~40-80 chars), searching for "target/release" +data_bench!( + path_split_table, + make_fsst_file_paths, + PATH_NEEDLE, + SplitTableDfa, + matches +); +data_bench!( + path_shift_dfa, + make_fsst_file_paths, + PATH_NEEDLE, + ShiftDfa, + matches_no_early_exit +); +data_bench!( + path_compact_no_exit, + make_fsst_file_paths, + PATH_NEEDLE, + CompactDfa, + matches_no_early_exit +); +data_bench!( + path_fused_no_exit, + make_fsst_file_paths, + PATH_NEEDLE, + FusedTableDfa, + matches_no_early_exit +); + +#[divan::bench] +fn path_decompress(bencher: Bencher) { + let fsst = make_fsst_file_paths(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, PATH_NEEDLE, &mut out); + }); +} + +// Email addresses: short strings (~20-30 chars), searching for "gmail" +data_bench!( + email_split_table, + make_fsst_emails, + EMAIL_NEEDLE, + SplitTableDfa, + matches +); +data_bench!( + email_shift_dfa, + make_fsst_emails, + EMAIL_NEEDLE, + ShiftDfa, + matches_no_early_exit +); +data_bench!( + email_compact_no_exit, + make_fsst_emails, + EMAIL_NEEDLE, + CompactDfa, + matches_no_early_exit +); +data_bench!( + email_fused_no_exit, + make_fsst_emails, + EMAIL_NEEDLE, + FusedTableDfa, + matches_no_early_exit +); + +#[divan::bench] +fn email_decompress(bencher: Bencher) { + let fsst = make_fsst_emails(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, EMAIL_NEEDLE, &mut out); + }); +} + +// --------------------------------------------------------------------------- +// memchr::memmem benchmarks — SIMD-accelerated substring search on decompressed data +// --------------------------------------------------------------------------- + +#[divan::bench] +fn memmem_decompress_urls(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let finder = memmem::Finder::new(NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + finder.find(&decompressed).is_some() + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn memmem_on_raw_bytes_urls(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let finder = memmem::Finder::new(NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => finder.find(bytes).is_some(), + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn cb_memmem_decompress(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let finder = memmem::Finder::new(CB_NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + finder.find(&decompressed).is_some() + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn cb_memmem_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let finder = memmem::Finder::new(CB_NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => finder.find(bytes).is_some(), + None => false, + })); + }); + out + }); +} + +// --------------------------------------------------------------------------- +// Low match rate (~0.001%) benchmarks — needle appears in ~1/100K strings. +// Tests performance when almost no string matches (common in large datasets). +// Uses random alphanumeric strings with a rare injected match. +// --------------------------------------------------------------------------- + +const RARE_NEEDLE: &[u8] = b"xyzzy"; + +/// Generate N random alphanumeric strings (~40 chars each), injecting the needle +/// into approximately `match_rate` fraction of them. +fn generate_rare_match_strings(n: usize, match_rate: f64) -> Vec { + let mut rng = StdRng::seed_from_u64(999); + let charset: &[u8] = b"abcdefghijklmnopqrstuvwABCDEFGHIJKLMNOPQRSTUVW0123456789-_.:/"; + (0..n) + .map(|_| { + let len = rng.random_range(30..60); + let mut s: String = (0..len) + .map(|_| charset[rng.random_range(0..charset.len())] as char) + .collect(); + if rng.random_bool(match_rate) { + // Inject needle at random position + let pos = rng.random_range(0..s.len().saturating_sub(RARE_NEEDLE.len()) + 1); + s.replace_range( + pos..pos + RARE_NEEDLE.len().min(s.len() - pos), + std::str::from_utf8(RARE_NEEDLE).unwrap(), + ); + } + s + }) + .collect() +} + +fn make_fsst_rare_match(n: usize) -> FSSTArray { + let strings = generate_rare_match_strings(n, 0.00001); // ~0.001% + let varbin = VarBinArray::from_iter( + strings.iter().map(|s| Some(s.as_str())), + DType::Utf8(Nullability::NonNullable), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +data_bench!( + rare_split_table, + make_fsst_rare_match, + RARE_NEEDLE, + SplitTableDfa, + matches +); +data_bench!( + rare_shift_dfa, + make_fsst_rare_match, + RARE_NEEDLE, + ShiftDfa, + matches_no_early_exit +); +data_bench!( + rare_compact_no_exit, + make_fsst_rare_match, + RARE_NEEDLE, + CompactDfa, + matches_no_early_exit +); +data_bench!( + rare_fused_no_exit, + make_fsst_rare_match, + RARE_NEEDLE, + FusedTableDfa, + matches_no_early_exit +); + +#[divan::bench] +fn rare_decompress(bencher: Bencher) { + let fsst = make_fsst_rare_match(N); + let mut out = Vec::with_capacity(N); + bencher.bench_local(|| { + bench_decompress(&fsst, RARE_NEEDLE, &mut out); + }); +} + +#[divan::bench] +fn rare_memmem_decompress(bencher: Bencher) { + let fsst = make_fsst_rare_match(N); + let finder = memmem::Finder::new(RARE_NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + finder.find(&decompressed).is_some() + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn rare_prefilter(bencher: Bencher) { + let fsst = make_fsst_rare_match(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = PrefilterDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + RARE_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches_no_early_exit(&prep.all_bytes[start..end]) + }) + }); +} + +data_bench!( + rare_state_zero_skip, + make_fsst_rare_match, + RARE_NEEDLE, + StateZeroSkipDfa, + matches +); + +// State-zero skip on URLs (moderate match rate) +data_bench!( + state_zero_skip_urls, + make_fsst_urls, + NEEDLE, + StateZeroSkipDfa, + matches +); + +// State-zero skip on ClickBench URLs +#[divan::bench] +fn cb_state_zero_skip(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = StateZeroSkipDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +// --------------------------------------------------------------------------- +// Alloc-free decompress benchmarks for all data types +// --------------------------------------------------------------------------- + +macro_rules! decompress_no_alloc_bench { + ($name:ident, $make_fn:ident, $needle:expr, $bufsz:expr) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity($bufsz); + let mut out = BitBufferMut::new_unset(N); + bencher.bench_local(|| { + out.fill_range(0, N, false); + run_decompress_memmem( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + $needle, + &mut buf, + &mut out, + ); + }); + } + }; +} + +decompress_no_alloc_bench!( + log_decompress_no_alloc, + make_fsst_log_lines, + LOG_NEEDLE, + 256 +); +decompress_no_alloc_bench!( + json_decompress_no_alloc, + make_fsst_json_strings, + JSON_NEEDLE, + 256 +); +decompress_no_alloc_bench!( + path_decompress_no_alloc, + make_fsst_file_paths, + PATH_NEEDLE, + 256 +); +decompress_no_alloc_bench!( + email_decompress_no_alloc, + make_fsst_emails, + EMAIL_NEEDLE, + 64 +); +decompress_no_alloc_bench!( + rare_decompress_no_alloc, + make_fsst_rare_match, + RARE_NEEDLE, + 128 +); + +// --------------------------------------------------------------------------- +// regex-automata DFA benchmarks +// --------------------------------------------------------------------------- + +#[divan::bench] +fn regex_automata_dense_decompress(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let re = DfaRegex::new(std::str::from_utf8(NEEDLE).unwrap()).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + re.is_match(&decompressed) + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn regex_automata_dense_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let re = DfaRegex::new(std::str::from_utf8(NEEDLE).unwrap()).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => re.is_match(bytes), + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn regex_automata_sparse_decompress(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let dense = DfaRegex::new(std::str::from_utf8(NEEDLE).unwrap()).unwrap(); + let (fwd, rev) = ( + dense.forward().to_sparse().unwrap(), + dense.reverse().to_sparse().unwrap(), + ); + let re = regex_automata::dfa::regex::Regex::builder().build_from_dfas(fwd, rev); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + re.is_match(&decompressed) + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn regex_automata_sparse_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let dense = DfaRegex::new(std::str::from_utf8(NEEDLE).unwrap()).unwrap(); + let (fwd, rev) = ( + dense.forward().to_sparse().unwrap(), + dense.reverse().to_sparse().unwrap(), + ); + let re = regex_automata::dfa::regex::Regex::builder().build_from_dfas(fwd, rev); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => re.is_match(bytes), + None => false, + })); + }); + out + }); +} + +// --------------------------------------------------------------------------- +// jetscii benchmarks — PCMPESTRI-based substring search +// --------------------------------------------------------------------------- + +#[divan::bench] +fn jetscii_decompress(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let finder = jetscii::ByteSubstring::new(NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + finder.find(&decompressed).is_some() + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn jetscii_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let finder = jetscii::ByteSubstring::new(NEEDLE); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => finder.find(bytes).is_some(), + None => false, + })); + }); + out + }); +} + +// --------------------------------------------------------------------------- +// daachorse benchmarks — double-array Aho-Corasick +// --------------------------------------------------------------------------- + +#[divan::bench] +fn daachorse_decompress(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let ac = DoubleArrayAhoCorasick::::new([NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + let decompressor = fsst.decompressor(); + fsst.codes().with_iterator(|iter| { + out.extend(iter.map(|codes| match codes { + Some(c) => { + let decompressed = decompressor.decompress(c); + ac.find_iter(&decompressed).next().is_some() + } + None => false, + })); + }); + out + }); +} + +#[divan::bench] +fn daachorse_on_raw_bytes(bencher: Bencher) { + let fsst = make_fsst_urls(N); + let canonical = fsst.to_canonical().unwrap().into_varbinview(); + let ac = DoubleArrayAhoCorasick::::new([NEEDLE]).unwrap(); + bencher.bench_local(|| { + let mut out = Vec::with_capacity(N); + canonical.with_iterator(|iter| { + out.extend(iter.map(|s| match s { + Some(bytes) => ac.find_iter(bytes).next().is_some(), + None => false, + })); + }); + out + }); +} + +// --------------------------------------------------------------------------- +// Hybrid DFA benchmarks +// --------------------------------------------------------------------------- + +data_bench!( + prefilter_shift_urls, + make_fsst_urls, + NEEDLE, + PrefilterShiftDfa, + matches +); +data_bench!( + prefilter_shift_rare, + make_fsst_rare_match, + RARE_NEEDLE, + PrefilterShiftDfa, + matches +); +data_bench!( + state_zero_shift_urls, + make_fsst_urls, + NEEDLE, + StateZeroShiftDfa, + matches +); +data_bench!( + state_zero_shift_rare, + make_fsst_rare_match, + RARE_NEEDLE, + StateZeroShiftDfa, + matches +); + +#[divan::bench] +fn cb_prefilter_shift(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = PrefilterShiftDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +#[divan::bench] +fn cb_state_zero_shift(bencher: Bencher) { + let fsst = make_fsst_clickbench_urls(N); + let prep = PreparedArray::from_fsst(&fsst); + let dfa = StateZeroShiftDfa::new( + fsst.symbols().as_slice(), + fsst.symbol_lengths().as_slice(), + CB_NEEDLE, + ); + bencher.bench_local(|| { + BitBufferMut::collect_bool(prep.n, |i| { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + dfa.matches(&prep.all_bytes[start..end]) + }) + }); +} + +// --------------------------------------------------------------------------- +// Decompress-only benchmarks (no search) — measures the raw cost of FSST +// decompression for each dataset. Compare against DFA search on compressed +// codes to see the speedup from avoiding decompression entirely. +// --------------------------------------------------------------------------- + +/// Decompress all strings without searching. Measures pure decompression cost. +#[inline(never)] +fn run_decompress_only( + prep: &PreparedArray, + symbols: &[Symbol], + symbol_lengths: &[u8], + buf: &mut Vec, +) { + for i in 0..prep.n { + let start = prep.offsets[i]; + let end = prep.offsets[i + 1]; + decompress_into(&prep.all_bytes[start..end], symbols, symbol_lengths, buf); + // Force the compiler not to optimize away the decompression. + std::hint::black_box(buf.len()); + } +} + +macro_rules! decompress_only_bench { + ($name:ident, $make_fn:ident, $bufsz:expr) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let prep = PreparedArray::from_fsst(&fsst); + let symbols = fsst.symbols(); + let symbol_lengths = fsst.symbol_lengths(); + let mut buf = Vec::with_capacity($bufsz); + bencher.bench_local(|| { + run_decompress_only( + &prep, + symbols.as_slice(), + symbol_lengths.as_slice(), + &mut buf, + ); + }); + } + }; +} + +decompress_only_bench!(urls_decompress_only, make_fsst_urls, 256); +decompress_only_bench!(cb_decompress_only, make_fsst_clickbench_urls, 512); +decompress_only_bench!(log_decompress_only, make_fsst_log_lines, 256); +decompress_only_bench!(json_decompress_only, make_fsst_json_strings, 256); +decompress_only_bench!(path_decompress_only, make_fsst_file_paths, 256); +decompress_only_bench!(email_decompress_only, make_fsst_emails, 64); +decompress_only_bench!(rare_decompress_only, make_fsst_rare_match, 128); + +// --------------------------------------------------------------------------- +// Vortex array LIKE kernel benchmarks — end-to-end through the full vortex +// execution framework. This measures the production code path including +// array construction, kernel dispatch, and result materialization. +// --------------------------------------------------------------------------- + +use std::sync::LazyLock; + +use vortex_array::Canonical; +use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ConstantArray; +use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; +use vortex_array::scalar_fn::fns::like::Like; +use vortex_array::scalar_fn::fns::like::LikeOptions; +use vortex_array::session::ArraySession; +use vortex_session::VortexSession; + +static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + +macro_rules! vortex_like_bench { + ($name:ident, $make_fn:ident, $pattern:expr) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let len = fsst.len(); + let arr = fsst.into_array(); + let pattern = ConstantArray::new($pattern, len).into_array(); + bencher.bench_local(|| { + Like.try_new_array(len, LikeOptions::default(), [arr.clone(), pattern.clone()]) + .unwrap() + .into_array() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap() + }); + } + }; +} + +vortex_like_bench!(vortex_like_urls, make_fsst_urls, "%google%"); +vortex_like_bench!(vortex_like_cb, make_fsst_clickbench_urls, "%yandex%"); +vortex_like_bench!(vortex_like_log, make_fsst_log_lines, "%Googlebot%"); +vortex_like_bench!(vortex_like_json, make_fsst_json_strings, "%enterprise%"); +vortex_like_bench!(vortex_like_path, make_fsst_file_paths, "%target/release%"); +vortex_like_bench!(vortex_like_email, make_fsst_emails, "%gmail%"); +vortex_like_bench!(vortex_like_rare, make_fsst_rare_match, "%xyzzy%"); + +// Arrow LIKE benchmarks: decompress FSST → canonical, then run Arrow's LIKE +// (which uses memchr::memmem for %needle% patterns). +macro_rules! arrow_like_bench { + ($name:ident, $make_fn:ident, $pattern:expr) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let len = fsst.len(); + // Pre-decompress to canonical (VarBinViewArray) + let canonical = fsst + .into_array() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap() + .into_array(); + let pattern = ConstantArray::new($pattern, len).into_array(); + bencher.bench_local(|| { + Like.try_new_array( + len, + LikeOptions::default(), + [canonical.clone(), pattern.clone()], + ) + .unwrap() + .into_array() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap() + }); + } + }; +} + +arrow_like_bench!(arrow_like_urls, make_fsst_urls, "%google%"); +arrow_like_bench!(arrow_like_cb, make_fsst_clickbench_urls, "%yandex%"); +arrow_like_bench!(arrow_like_log, make_fsst_log_lines, "%Googlebot%"); +arrow_like_bench!(arrow_like_json, make_fsst_json_strings, "%enterprise%"); +arrow_like_bench!(arrow_like_rare, make_fsst_rare_match, "%xyzzy%"); + +// End-to-end: decompress + arrow LIKE (measures total cost including decompression) +macro_rules! e2e_arrow_like_bench { + ($name:ident, $make_fn:ident, $pattern:expr) => { + #[divan::bench] + fn $name(bencher: Bencher) { + let fsst = $make_fn(N); + let len = fsst.len(); + let arr = fsst.into_array(); + let pattern = ConstantArray::new($pattern, len).into_array(); + bencher.bench_local(|| { + // Decompress inside the timed section + let canonical = arr + .clone() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap() + .into_array(); + Like.try_new_array(len, LikeOptions::default(), [canonical, pattern.clone()]) + .unwrap() + .into_array() + .execute::(&mut SESSION.create_execution_ctx()) + .unwrap() + }); + } + }; +} + +e2e_arrow_like_bench!(e2e_arrow_urls, make_fsst_urls, "%google%"); +e2e_arrow_like_bench!(e2e_arrow_cb, make_fsst_clickbench_urls, "%yandex%"); +e2e_arrow_like_bench!(e2e_arrow_log, make_fsst_log_lines, "%Googlebot%"); +e2e_arrow_like_bench!(e2e_arrow_json, make_fsst_json_strings, "%enterprise%"); +e2e_arrow_like_bench!(e2e_arrow_rare, make_fsst_rare_match, "%xyzzy%"); diff --git a/encodings/fsst/examples/inspect_clickbench.rs b/encodings/fsst/examples/inspect_clickbench.rs new file mode 100644 index 00000000000..1d10ca8f9a8 --- /dev/null +++ b/encodings/fsst/examples/inspect_clickbench.rs @@ -0,0 +1,184 @@ +// Quick script: read ClickBench parquet, FSST-compress the URL column, +// dump the symbol table, and show how LIKE patterns encode into the DFA. + +use arrow_array::Array as ArrowArray; +use arrow_array::cast::AsArray; +use arrow_schema::DataType; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use vortex_array::ToCanonical; +use vortex_array::arrays::VarBinArray; +use vortex_array::dtype::{DType, Nullability}; + +fn main() { + let path = std::env::args() + .nth(1) + .unwrap_or_else(|| "vortex-bench/data/clickbench_partitioned/parquet/hits_0.parquet".into()); + + // --- 1. Read parquet, extract URL column --- + let file = std::fs::File::open(&path).expect("open parquet"); + let builder = ParquetRecordBatchReaderBuilder::try_new(file).expect("parquet builder"); + let schema = builder.schema().clone(); + + let url_idx = schema + .fields() + .iter() + .position(|f| f.name() == "URL") + .expect("no URL column"); + println!("URL column index: {url_idx}"); + + let reader = builder.build().expect("build reader"); + let batch = reader.into_iter().next().expect("no batches").expect("batch error"); + let url_col = batch.column(url_idx); + println!("Batch rows: {}, URL dtype: {:?}", batch.num_rows(), url_col.data_type()); + + let urls: Vec> = match url_col.data_type() { + DataType::Utf8 => { + let arr = url_col.as_string::(); + (0..arr.len()).map(|i| if arr.is_null(i) { None } else { Some(arr.value(i)) }).collect() + } + DataType::LargeUtf8 => { + let arr = url_col.as_string::(); + (0..arr.len()).map(|i| if arr.is_null(i) { None } else { Some(arr.value(i)) }).collect() + } + DataType::Utf8View => { + let arr = url_col.as_string_view(); + (0..arr.len()).map(|i| if arr.is_null(i) { None } else { Some(arr.value(i)) }).collect() + } + other => panic!("unexpected URL dtype: {other:?}"), + }; + + let n_urls = urls.len(); + let non_null = urls.iter().filter(|u| u.is_some()).count(); + println!("URLs: {n_urls} total, {non_null} non-null"); + + println!("\n=== Sample URLs ==="); + for (i, u) in urls.iter().enumerate().take(10) { + if let Some(s) = u { + let display = if s.len() > 100 { &s[..100] } else { s }; + println!(" [{i}] {display}"); + } else { + println!(" [{i}] NULL"); + } + } + + // --- 2. FSST compress --- + let varbin = VarBinArray::from_iter(urls.iter().copied(), DType::Utf8(Nullability::Nullable)); + let compressor = vortex_fsst::fsst_train_compressor(&varbin); + let fsst_arr = vortex_fsst::fsst_compress(varbin, &compressor); + + let symbols = fsst_arr.symbols(); + let symbol_lengths = fsst_arr.symbol_lengths(); + + println!("\n=== FSST Symbol Table ({} symbols) ===", symbols.len()); + println!("{:<6} {:<6} {:<20} {:<20}", "Code", "Len", "Hex", "ASCII"); + println!("{}", "-".repeat(60)); + + for (code, (sym, &len)) in symbols.iter().zip(symbol_lengths.iter()).enumerate() { + let bytes = sym.to_u64().to_le_bytes(); + let sym_bytes = &bytes[..len as usize]; + let hex: String = sym_bytes.iter().map(|b| format!("{b:02x}")).collect::>().join(" "); + let ascii: String = sym_bytes + .iter() + .map(|&b| if b.is_ascii_graphic() || b == b' ' { b as char } else { '.' }) + .collect(); + println!(" {code:<4} {len:<6} {hex:<20} {ascii:<20}"); + } + + // --- 3. Show how patterns encode --- + let patterns = [ + "google", "http", "://", ".com", "yandex", "mail", "search", "www.", + ]; + let escape_code = fsst::ESCAPE_CODE; + println!("\n=== Pattern Encoding (ESCAPE_CODE = 0x{escape_code:02x}) ==="); + + for pattern in &patterns { + println!("\nPattern \"{pattern}\":"); + let mut buf = vec![0u8; 2 * pattern.len() + 7]; + unsafe { compressor.compress_into(pattern.as_bytes(), &mut buf) }; + + // Walk codes and annotate what each one decodes to + print!(" encoded: "); + let mut pos = 0; + while pos < buf.len() { + let c = buf[pos]; + if c == escape_code { + pos += 1; + if pos < buf.len() { + let lit = buf[pos]; + let ch = if lit.is_ascii_graphic() || lit == b' ' { + format!("{}", lit as char) + } else { + format!("\\x{lit:02x}") + }; + print!("[ESC '{ch}'] "); + } + } else if (c as usize) < symbols.len() { + let sym = symbols[c as usize]; + let len = symbol_lengths[c as usize] as usize; + let bytes = sym.to_u64().to_le_bytes(); + let s: String = bytes[..len] + .iter() + .map(|&b| if b.is_ascii_graphic() || b == b' ' { b as char } else { '.' }) + .collect(); + print!("[0x{c:02x}→\"{s}\"] "); + } else { + print!("[0x{c:02x}?] "); + } + pos += 1; + } + println!(); + } + + // --- 4. Show sample compressed strings --- + println!("\n=== Sample Compressed Strings ==="); + let codes_varbin = fsst_arr.codes(); + let offsets = codes_varbin.offsets().to_primitive(); + let all_bytes = codes_varbin.bytes(); + let all_bytes = all_bytes.as_slice(); + + for i in 0..10.min(n_urls) { + let start: usize = offsets.as_slice::()[i] as usize; + let end: usize = offsets.as_slice::()[i + 1] as usize; + let string_codes = &all_bytes[start..end]; + let original = urls[i].unwrap_or("NULL"); + let orig_len = original.len(); + let comp_len = string_codes.len(); + let ratio = if orig_len > 0 { + comp_len as f64 / orig_len as f64 + } else { + 0.0 + }; + + let display_orig = if original.len() > 60 { &original[..60] } else { original }; + println!( + " [{i}] {orig_len}B -> {comp_len}B ({ratio:.2}x): \"{display_orig}...\"" + ); + + // Show first 30 code bytes with annotations + let show_len = string_codes.len().min(30); + let hex: String = string_codes[..show_len] + .iter() + .map(|b| { + if *b == escape_code { + "ESC".to_string() + } else { + format!("{b:02x}") + } + }) + .collect::>() + .join(" "); + println!(" codes: [{hex}{}]", if string_codes.len() > 30 { " ..." } else { "" }); + } + + // --- 5. Compression stats --- + let total_orig: usize = urls.iter().filter_map(|u| u.map(|s| s.len())).sum(); + let total_comp: usize = { + let off = offsets.as_slice::(); + off.last().copied().unwrap_or(0) as usize + }; + println!("\n=== Compression Stats ==="); + println!(" Original: {total_orig} bytes"); + println!(" Compressed: {total_comp} bytes"); + println!(" Ratio: {:.2}x", total_comp as f64 / total_orig as f64); + println!(" Savings: {:.1}%", (1.0 - total_comp as f64 / total_orig as f64) * 100.0); +} \ No newline at end of file diff --git a/encodings/fsst/src/compute/like.rs b/encodings/fsst/src/compute/like.rs new file mode 100644 index 00000000000..49b12ce8a98 --- /dev/null +++ b/encodings/fsst/src/compute/like.rs @@ -0,0 +1,936 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![allow(clippy::cast_possible_truncation)] + +use fsst::ESCAPE_CODE; +use fsst::Symbol; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::ToCanonical; +use vortex_array::arrays::BoolArray; +use vortex_array::match_each_integer_ptype; +use vortex_array::scalar_fn::fns::like::LikeKernel; +use vortex_array::scalar_fn::fns::like::LikeOptions; +use vortex_buffer::BitBuffer; +use vortex_buffer::BufferMut; +use vortex_error::VortexResult; + +use crate::FSSTArray; +use crate::FSSTVTable; + +impl LikeKernel for FSSTVTable { + #[allow(clippy::cast_possible_truncation)] + fn like( + array: &FSSTArray, + pattern: &ArrayRef, + options: LikeOptions, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(pattern_scalar) = pattern.as_constant() else { + return Ok(None); + }; + + if options.case_insensitive { + return Ok(None); + } + + let Some(pattern_str) = pattern_scalar.as_utf8().value() else { + return Ok(None); + }; + + let Some(like_kind) = LikeKind::parse(pattern_str) else { + return Ok(None); + }; + + let symbols = array.symbols(); + let symbol_lengths = array.symbol_lengths(); + let negated = options.negated; + + // Access the underlying codes VarBinArray buffers directly to avoid + // dyn Iterator overhead from with_iterator. + let codes = array.codes(); + let offsets = codes.offsets().to_primitive(); + let all_bytes = codes.bytes(); + let all_bytes = all_bytes.as_slice(); + let n = codes.len(); + + let result = match like_kind { + LikeKind::Prefix(prefix) => { + let prefix = prefix.as_bytes(); + let dfa = FsstPrefixDfa::new(symbols.as_slice(), symbol_lengths.as_slice(), prefix); + match_each_integer_ptype!(offsets.ptype(), |T| { + let off = offsets.as_slice::(); + dfa_scan_to_bitbuf(n, off, all_bytes, negated, |codes| dfa.matches(codes)) + }) + } + LikeKind::Contains(needle) => { + let needle = needle.as_bytes(); + let dfa = + FsstContainsDfa::new(symbols.as_slice(), symbol_lengths.as_slice(), needle); + match_each_integer_ptype!(offsets.ptype(), |T| { + let off = offsets.as_slice::(); + dfa_scan_to_bitbuf(n, off, all_bytes, negated, |codes| dfa.matches(codes)) + }) + } + }; + + // FSST delegates validity to its codes array, so we can read it + // directly without cloning the entire FSSTArray into an ArrayRef. + let validity = array + .codes() + .validity()? + .union_nullability(pattern_scalar.dtype().nullability()); + + Ok(Some(BoolArray::new(result, validity).into_array())) + } +} + +/// Scan all strings through a DFA matcher, packing results directly into a +/// `BitBuffer` one u64 word (64 strings) at a time. This avoids the overhead +/// of `BitBufferMut::collect_bool`'s cross-crate closure indirection and +/// guarantees the compiler can see the full loop body for optimization. +#[inline] +fn dfa_scan_to_bitbuf( + n: usize, + offsets: &[T], + all_bytes: &[u8], + negated: bool, + matcher: F, +) -> BitBuffer +where + T: vortex_array::dtype::IntegerPType, + F: Fn(&[u8]) -> bool, +{ + let n_words = n / 64; + let remainder = n % 64; + let mut words: BufferMut = BufferMut::with_capacity(n.div_ceil(64)); + + for chunk in 0..n_words { + let base = chunk * 64; + let mut word = 0u64; + let mut start: usize = offsets[base].as_(); + for bit in 0..64 { + let end: usize = offsets[base + bit + 1].as_(); + word |= ((matcher(&all_bytes[start..end]) != negated) as u64) << bit; + start = end; + } + // SAFETY: we allocated capacity for n.div_ceil(64) words. + unsafe { words.push_unchecked(word) }; + } + + if remainder != 0 { + let base = n_words * 64; + let mut word = 0u64; + let mut start: usize = offsets[base].as_(); + for bit in 0..remainder { + let end: usize = offsets[base + bit + 1].as_(); + word |= ((matcher(&all_bytes[start..end]) != negated) as u64) << bit; + start = end; + } + unsafe { words.push_unchecked(word) }; + } + + BitBuffer::new(words.into_byte_buffer().freeze(), n) +} + +/// The subset of LIKE patterns we can handle without decompression. +enum LikeKind<'a> { + /// `prefix%` + Prefix(&'a str), + /// `%needle%` + Contains(&'a str), +} + +impl<'a> LikeKind<'a> { + fn parse(pattern: &'a str) -> Option { + if pattern == "%" { + return Some(LikeKind::Prefix("")); + } + + // Find first wildcard. + let first_wild = pattern.find(['%', '_'])?; + + // `_` as first wildcard means we can't handle it. + if pattern.as_bytes()[first_wild] == b'_' { + return None; + } + + // `prefix%` — single trailing % + if first_wild > 0 && &pattern[first_wild..] == "%" { + return Some(LikeKind::Prefix(&pattern[..first_wild])); + } + + // `%needle%` — leading and trailing %, no inner wildcards + if first_wild == 0 + && pattern.len() > 2 + && pattern.as_bytes()[pattern.len() - 1] == b'%' + && !pattern[1..pattern.len() - 1].contains(['%', '_']) + { + return Some(LikeKind::Contains(&pattern[1..pattern.len() - 1])); + } + + None + } +} + +// --------------------------------------------------------------------------- +// DFA for prefix matching (LIKE 'prefix%') +// --------------------------------------------------------------------------- + +/// Precomputed shift-based DFA for prefix matching on FSST codes. +/// +/// States 0..prefix_len track match progress, plus ACCEPT and FAIL. +/// Uses the same shift-based approach as the contains DFA: all state +/// transitions packed into a `u64` per code byte. For prefixes longer +/// than 13 characters, falls back to a fused u8 table. +struct FsstPrefixDfa { + /// Packed transitions: `(table[code] >> (state * 4)) & 0xF` gives next state. + transitions: [u64; 256], + /// Packed escape transitions for literal bytes. + escape_transitions: [u64; 256], + accept_state: u8, + fail_state: u8, +} + +impl FsstPrefixDfa { + const BITS: u32 = 4; + const MASK: u64 = (1 << Self::BITS) - 1; + + fn new(symbols: &[Symbol], symbol_lengths: &[u8], prefix: &[u8]) -> Self { + // prefix.len() + 2 states (0..prefix_len, accept, fail) must fit in 4 bits. + debug_assert!(prefix.len() + 2 <= (1 << Self::BITS)); + + let n_symbols = symbols.len(); + let accept_state = prefix.len() as u8; + let fail_state = prefix.len() as u8 + 1; + let n_states = prefix.len() + 2; + + // Build per-symbol and per-escape-byte transitions into flat tables. + let mut sym_trans = vec![fail_state; n_states * n_symbols]; + let mut esc_trans = vec![fail_state; n_states * 256]; + + for state in 0..n_states { + if state as u8 == accept_state { + for code in 0..n_symbols { + sym_trans[state * n_symbols + code] = accept_state; + } + for b in 0..256 { + esc_trans[state * 256 + b] = accept_state; + } + continue; + } + if state as u8 == fail_state { + continue; + } + + for code in 0..n_symbols { + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let remaining = prefix.len() - state; + let cmp = sym_len.min(remaining); + + if sym[..cmp] == prefix[state..state + cmp] { + let next = state + cmp; + sym_trans[state * n_symbols + code] = if next >= prefix.len() { + accept_state + } else { + next as u8 + }; + } + } + + for b in 0..256usize { + if b as u8 == prefix[state] { + let next = state + 1; + esc_trans[state * 256 + b] = if next >= prefix.len() { + accept_state + } else { + next as u8 + }; + } + } + } + + // Fuse symbol transitions into a 256-wide table. + let escape_sentinel = fail_state + 1; + let mut fused = vec![fail_state; n_states * 256]; + for state in 0..n_states { + for code in 0..n_symbols { + fused[state * 256 + code] = sym_trans[state * n_symbols + code]; + } + fused[state * 256 + ESCAPE_CODE as usize] = escape_sentinel; + } + + // Pack into u64 shift tables. + let mut transitions = [0u64; 256]; + for code_byte in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + packed |= (fused[state * 256 + code_byte] as u64) << (state as u32 * Self::BITS); + } + transitions[code_byte] = packed; + } + + let mut escape_transitions = [0u64; 256]; + for byte_val in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + packed |= (esc_trans[state * 256 + byte_val] as u64) << (state as u32 * Self::BITS); + } + escape_transitions[byte_val] = packed; + } + + Self { + transitions, + escape_transitions, + accept_state, + fail_state, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + let packed = self.transitions[code as usize]; + let next = ((packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + if next == self.fail_state + 1 { + // Escape sentinel: read literal byte. + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + let esc_packed = self.escape_transitions[b as usize]; + state = ((esc_packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + } else { + state = next; + } + if state == self.accept_state { + return true; + } + if state == self.fail_state { + return false; + } + } + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// DFA for contains matching (LIKE '%needle%') +// --------------------------------------------------------------------------- + +/// Precomputed KMP-based DFA for substring matching on FSST codes. +/// +/// Uses a shift-based DFA that packs all state transitions into a `u64` per +/// code byte. The table load depends only on the code byte (not on the current +/// state), breaking the load-use dependency chain that makes traditional +/// table-lookup DFAs slow (~4 cycle L1 latency per transition). With the +/// shift-based approach, the table value can be loaded while the previous +/// transition's shift is executing. +/// +/// For needles longer than [`ShiftDfa::MAX_NEEDLE_LEN`], falls back to a +/// fused 256-entry u8 table. +enum FsstContainsDfa { + /// Branchless escape-folded DFA for short needles (len <= 7). + Branchless(Box), + /// Shift-based DFA for medium needles (len 8-14). + Shift(Box), + /// Fused u8 table DFA for long needles (len > 14). + Fused(FusedDfa), +} + +impl FsstContainsDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + if needle.len() <= BranchlessShiftDfa::MAX_NEEDLE_LEN { + FsstContainsDfa::Branchless(Box::new(BranchlessShiftDfa::new( + symbols, + symbol_lengths, + needle, + ))) + } else if needle.len() <= ShiftDfa::MAX_NEEDLE_LEN { + FsstContainsDfa::Shift(Box::new(ShiftDfa::new(symbols, symbol_lengths, needle))) + } else { + FsstContainsDfa::Fused(FusedDfa::new(symbols, symbol_lengths, needle)) + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + match self { + FsstContainsDfa::Branchless(dfa) => dfa.matches(codes), + FsstContainsDfa::Shift(dfa) => dfa.matches(codes), + FsstContainsDfa::Fused(dfa) => dfa.matches(codes), + } + } +} + +/// Branchless escape-folded DFA for short needles (len <= 7). +/// +/// Folds escape handling into the state space so that `matches()` is +/// completely branchless (except for loop control). The state layout is: +/// - States 0..N-1: normal match-progress states +/// - State N: accept (sticky for all inputs) +/// - States N+1..2N: escape states (state `s+N+1` means "was in state `s`, +/// just consumed ESCAPE_CODE") +/// +/// Total states: 2N+1. With 4-bit packing, max N=7. +struct BranchlessShiftDfa { + /// For each code byte (0..255): a `u64` packing all state transitions. + /// Bits `[state*4 .. state*4+4)` encode the next state for that input. + transitions: [u64; 256], + accept_state: u8, +} + +impl BranchlessShiftDfa { + const BITS: u32 = 4; + const MASK: u64 = (1 << Self::BITS) - 1; + /// Maximum needle length: need 2N+1 states to fit in 16 slots (4 bits). + /// 2*7+1 = 15 <= 16, so max N = 7. + const MAX_NEEDLE_LEN: usize = 7; + + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let n = needle.len(); + debug_assert!(n <= Self::MAX_NEEDLE_LEN); + + let n_symbols = symbols.len(); + let accept_state = n as u8; + let n_normal_states = n + 1; // states 0..n (inclusive, n = accept) + let total_states = 2 * n + 1; + debug_assert!(total_states <= (1 << Self::BITS)); + + let byte_table = kmp_byte_transitions(needle); + + // Build per-symbol transitions for normal states (0..n, where n=accept). + let mut sym_trans = vec![0u8; n_normal_states * n_symbols]; + for state in 0..n_normal_states { + for code in 0..n_symbols { + if state as u8 == accept_state { + sym_trans[state * n_symbols + code] = accept_state; + continue; + } + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state as u16 { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + sym_trans[state * n_symbols + code] = s as u8; + } + } + + // Build the fused transition table with 2N+1 states. + let mut fused = vec![0u8; total_states * 256]; + + for code_byte in 0..256usize { + // Normal states 0..n-1 (not yet accepted) + for s in 0..n { + if code_byte == ESCAPE_CODE as usize { + // Transition to escape state s+n+1 + fused[s * 256 + code_byte] = (s + n + 1) as u8; + } else if code_byte < n_symbols { + fused[s * 256 + code_byte] = sym_trans[s * n_symbols + code_byte]; + } + // else: invalid symbol code, stays 0 (reset) + } + + // Accept state n: sticky + fused[n * 256 + code_byte] = accept_state; + + // Escape states n+1..2n: byte-level KMP transition + for s in 0..n { + let esc_state = s + n + 1; + // After escape, use byte-level transition from state s. + // Result is always a normal state (0..n). + let next = byte_table[s * 256 + code_byte] as u8; + fused[esc_state * 256 + code_byte] = next; + } + } + + // Pack into u64 shift table. + let mut transitions = [0u64; 256]; + for code_byte in 0..256usize { + let mut packed = 0u64; + for state in 0..total_states { + packed |= (fused[state * 256 + code_byte] as u64) << (state as u32 * Self::BITS); + } + transitions[code_byte] = packed; + } + + Self { + transitions, + accept_state, + } + } + + /// Completely branchless matching (except loop control). + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + for &code in codes { + let packed = self.transitions[code as usize]; + state = ((packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + } + state == self.accept_state + } +} + +/// Shift-based DFA: packs all state transitions into a `u64` per input byte. +/// +/// For a DFA with S states (S <= 16, using 4 bits each), we store transitions +/// for ALL states in one `u64`. Transition: `next = (table[code] >> (state * 4)) & 0xF`. +/// +/// Supports needles up to 14 characters (needle.len() + 2 <= 16 to fit escape +/// sentinel). This covers virtually all practical LIKE patterns. +struct ShiftDfa { + /// For each code byte (0..255): a `u64` packing all state transitions. + /// Bits `[state*4 .. state*4+4)` encode the next state for that input. + transitions: [u64; 256], + /// Same layout for escape byte transitions. + escape_transitions: [u64; 256], + accept_state: u8, + escape_sentinel: u8, +} + +impl ShiftDfa { + const BITS: u32 = 4; + const MASK: u64 = (1 << Self::BITS) - 1; + /// Maximum needle length: 2^BITS - 2 (need room for accept + sentinel). + const MAX_NEEDLE_LEN: usize = (1 << Self::BITS) - 2; + + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + debug_assert!(needle.len() <= Self::MAX_NEEDLE_LEN); + + let n_symbols = symbols.len(); + let n_states = needle.len() + 1; + let accept_state = needle.len() as u8; + let escape_sentinel = needle.len() as u8 + 1; + + let byte_table = kmp_byte_transitions(needle); + + // Build per-symbol transitions into a flat table first. + let mut sym_trans = vec![0u16; n_states * n_symbols]; + for state in 0..n_states { + for code in 0..n_symbols { + if state as u8 == accept_state { + sym_trans[state * n_symbols + code] = accept_state as u16; + continue; + } + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state as u16 { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + sym_trans[state * n_symbols + code] = s; + } + } + + // Build fused 256-wide table, then pack into u64 shift tables. + let mut fused = vec![0u8; n_states * 256]; + for state in 0..n_states { + for code in 0..n_symbols { + fused[state * 256 + code] = sym_trans[state * n_symbols + code] as u8; + } + fused[state * 256 + ESCAPE_CODE as usize] = escape_sentinel; + } + + let mut transitions = [0u64; 256]; + for code_byte in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + let next = fused[state * 256 + code_byte]; + packed |= (next as u64) << (state as u32 * Self::BITS); + } + transitions[code_byte] = packed; + } + + let mut escape_transitions = [0u64; 256]; + for byte_val in 0..256usize { + let mut packed = 0u64; + for state in 0..n_states { + let next = byte_table[state * 256 + byte_val] as u8; + packed |= (next as u64) << (state as u32 * Self::BITS); + } + escape_transitions[byte_val] = packed; + } + + Self { + transitions, + escape_transitions, + accept_state, + escape_sentinel, + } + } + + /// Match with iterator-based traversal. + /// + /// Using `iter.next()` instead of manual index + bounds check helps the + /// compiler eliminate redundant bounds checks. + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut iter = codes.iter(); + while let Some(&code) = iter.next() { + let packed = self.transitions[code as usize]; + let next = ((packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + if next == self.escape_sentinel { + let Some(&b) = iter.next() else { + return false; + }; + let esc_packed = self.escape_transitions[b as usize]; + state = ((esc_packed >> (state as u32 * Self::BITS)) & Self::MASK) as u8; + } else { + state = next; + } + } + state == self.accept_state + } +} + +/// Fused 256-entry u8 table DFA. Fallback for needles > 14 characters. +struct FusedDfa { + transitions: Vec, + escape_transitions: Vec, + accept_state: u8, + escape_sentinel: u8, +} + +impl FusedDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let n_symbols = symbols.len(); + let accept_state = needle.len() as u8; + let n_states = needle.len() + 1; + let escape_sentinel = needle.len() as u8 + 1; + + let byte_table = kmp_byte_transitions(needle); + + let mut symbol_transitions = vec![0u16; n_states * n_symbols]; + for state in 0..n_states { + for code in 0..n_symbols { + if state as u8 == accept_state { + symbol_transitions[state * n_symbols + code] = accept_state as u16; + continue; + } + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state as u16 { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + symbol_transitions[state * n_symbols + code] = s; + } + } + + let mut transitions = vec![0u8; n_states * 256]; + for state in 0..n_states { + for code in 0..n_symbols { + transitions[state * 256 + code] = + symbol_transitions[state * n_symbols + code] as u8; + } + transitions[state * 256 + ESCAPE_CODE as usize] = escape_sentinel; + } + + let escape_transitions: Vec = byte_table.iter().map(|&v| v as u8).collect(); + + Self { + transitions, + escape_transitions, + accept_state, + escape_sentinel, + } + } + + #[inline] + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u8; + let mut pos = 0; + while pos < codes.len() { + let code = codes[pos]; + pos += 1; + let next = self.transitions[state as usize * 256 + code as usize]; + if next == self.escape_sentinel { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + state = next; + } + if state == self.accept_state { + return true; + } + } + false + } +} + +// --------------------------------------------------------------------------- +// KMP helpers +// --------------------------------------------------------------------------- + +fn kmp_byte_transitions(needle: &[u8]) -> Vec { + let n_states = needle.len() + 1; + let accept = needle.len() as u16; + let failure = kmp_failure_table(needle); + + let mut table = vec![0u16; n_states * 256]; + for state in 0..n_states { + for byte in 0..256u16 { + if state == needle.len() { + table[state * 256 + byte as usize] = accept; + continue; + } + let mut s = state; + loop { + if byte as u8 == needle[s] { + s += 1; + break; + } + if s == 0 { + break; + } + s = failure[s - 1]; + } + table[state * 256 + byte as usize] = s as u16; + } + } + table +} + +fn kmp_failure_table(needle: &[u8]) -> Vec { + let mut failure = vec![0usize; needle.len()]; + let mut k = 0; + for i in 1..needle.len() { + while k > 0 && needle[k] != needle[i] { + k = failure[k - 1]; + } + if needle[k] == needle[i] { + k += 1; + } + failure[i] = k; + } + failure +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use vortex_array::Canonical; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::VarBinArray; + use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; + use vortex_array::assert_arrays_eq; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::scalar_fn::fns::like::Like; + use vortex_array::scalar_fn::fns::like::LikeKernel; + use vortex_array::scalar_fn::fns::like::LikeOptions; + use vortex_array::session::ArraySession; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::FSSTArray; + use crate::FSSTVTable; + use crate::fsst_compress; + use crate::fsst_train_compressor; + + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + + fn make_fsst(strings: &[Option<&str>], nullability: Nullability) -> FSSTArray { + let varbin = VarBinArray::from_iter(strings.iter().copied(), DType::Utf8(nullability)); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) + } + + fn run_like(array: FSSTArray, pattern: &str, opts: LikeOptions) -> VortexResult { + let len = array.len(); + let arr = array.into_array(); + let pattern = ConstantArray::new(pattern, len).into_array(); + let result = Like + .try_new_array(len, opts, [arr, pattern])? + .into_array() + .execute::(&mut SESSION.create_execution_ctx())?; + Ok(result.into_bool()) + } + + #[test] + fn test_like_prefix() -> VortexResult<()> { + let fsst = make_fsst( + &[ + Some("http://example.com"), + Some("http://test.org"), + Some("ftp://files.net"), + Some("http://vortex.dev"), + Some("ssh://server.io"), + ], + Nullability::NonNullable, + ); + let result = run_like(fsst, "http%", LikeOptions::default())?; + assert_arrays_eq!( + &result, + &BoolArray::from_iter([true, true, false, true, false]) + ); + Ok(()) + } + + #[test] + fn test_like_prefix_with_nulls() -> VortexResult<()> { + let fsst = make_fsst( + &[Some("hello"), None, Some("help"), None, Some("goodbye")], + Nullability::Nullable, + ); + let result = run_like(fsst, "hel%", LikeOptions::default())?; + assert_arrays_eq!( + &result, + &BoolArray::from_iter([Some(true), None, Some(true), None, Some(false)]) + ); + Ok(()) + } + + #[test] + fn test_like_contains() -> VortexResult<()> { + let fsst = make_fsst( + &[ + Some("hello world"), + Some("say hello"), + Some("goodbye"), + Some("hellooo"), + ], + Nullability::NonNullable, + ); + let result = run_like(fsst, "%hello%", LikeOptions::default())?; + assert_arrays_eq!(&result, &BoolArray::from_iter([true, true, false, true])); + Ok(()) + } + + #[test] + fn test_like_contains_cross_symbol() -> VortexResult<()> { + let fsst = make_fsst( + &[ + Some("the quick brown fox jumps over the lazy dog"), + Some("a short string"), + Some("the lazy dog sleeps"), + Some("no match"), + ], + Nullability::NonNullable, + ); + let result = run_like(fsst, "%lazy dog%", LikeOptions::default())?; + assert_arrays_eq!(&result, &BoolArray::from_iter([true, false, true, false])); + Ok(()) + } + + #[test] + fn test_not_like_contains() -> VortexResult<()> { + let fsst = make_fsst( + &[Some("foobar_sdf"), Some("sdf_start"), Some("nothing")], + Nullability::NonNullable, + ); + let opts = LikeOptions { + negated: true, + case_insensitive: false, + }; + let result = run_like(fsst, "%sdf%", opts)?; + assert_arrays_eq!(&result, &BoolArray::from_iter([false, false, true])); + Ok(()) + } + + #[test] + fn test_like_match_all() -> VortexResult<()> { + let fsst = make_fsst( + &[Some("abc"), Some(""), Some("xyz")], + Nullability::NonNullable, + ); + let result = run_like(fsst, "%", LikeOptions::default())?; + assert_arrays_eq!(&result, &BoolArray::from_iter([true, true, true])); + Ok(()) + } + + /// Call `LikeKernel::like` directly on the FSSTArray and verify it + /// returns `Some(...)` (i.e. the kernel handles it, rather than + /// returning `None` which would mean "fall back to decompress"). + #[test] + fn test_like_prefix_kernel_handles() -> VortexResult<()> { + let fsst = make_fsst( + &[Some("http://a.com"), Some("ftp://b.com")], + Nullability::NonNullable, + ); + let pattern = ConstantArray::new("http%", fsst.len()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + + let result = + ::like(&fsst, &pattern, LikeOptions::default(), &mut ctx)?; + assert!(result.is_some(), "FSST LikeKernel should handle prefix%"); + assert_arrays_eq!(result.unwrap(), BoolArray::from_iter([true, false])); + Ok(()) + } + + /// Same direct-call check for the contains pattern `%needle%`. + #[test] + fn test_like_contains_kernel_handles() -> VortexResult<()> { + let fsst = make_fsst( + &[Some("hello world"), Some("goodbye")], + Nullability::NonNullable, + ); + let pattern = ConstantArray::new("%world%", fsst.len()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + + let result = + ::like(&fsst, &pattern, LikeOptions::default(), &mut ctx)?; + assert!(result.is_some(), "FSST LikeKernel should handle %needle%"); + assert_arrays_eq!(result.unwrap(), BoolArray::from_iter([true, false])); + Ok(()) + } + + /// Patterns we can't handle should return `None` (fall back). + #[test] + fn test_like_kernel_falls_back_for_complex_pattern() -> VortexResult<()> { + let fsst = make_fsst(&[Some("abc"), Some("def")], Nullability::NonNullable); + let mut ctx = SESSION.create_execution_ctx(); + + // Suffix pattern — not handled. + let pattern = ConstantArray::new("%abc", fsst.len()).into_array(); + let result = + ::like(&fsst, &pattern, LikeOptions::default(), &mut ctx)?; + assert!(result.is_none(), "suffix pattern should fall back"); + + // Underscore wildcard — not handled. + let pattern = ConstantArray::new("a_c", fsst.len()).into_array(); + let result = + ::like(&fsst, &pattern, LikeOptions::default(), &mut ctx)?; + assert!(result.is_none(), "underscore pattern should fall back"); + + // Case-insensitive — not handled. + let pattern = ConstantArray::new("abc%", fsst.len()).into_array(); + let opts = LikeOptions { + negated: false, + case_insensitive: true, + }; + let result = ::like(&fsst, &pattern, opts, &mut ctx)?; + assert!(result.is_none(), "ilike should fall back"); + + Ok(()) + } +} diff --git a/encodings/fsst/src/compute/mod.rs b/encodings/fsst/src/compute/mod.rs index 0c98126e098..2a98abfb1b3 100644 --- a/encodings/fsst/src/compute/mod.rs +++ b/encodings/fsst/src/compute/mod.rs @@ -4,6 +4,7 @@ mod cast; mod compare; mod filter; +mod like; use vortex_array::ArrayRef; use vortex_array::DynArray; diff --git a/encodings/fsst/src/kernel.rs b/encodings/fsst/src/kernel.rs index daf49b74690..7e2bdab70d7 100644 --- a/encodings/fsst/src/kernel.rs +++ b/encodings/fsst/src/kernel.rs @@ -5,6 +5,7 @@ use vortex_array::arrays::dict::TakeExecuteAdaptor; use vortex_array::arrays::filter::FilterExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use vortex_array::scalar_fn::fns::binary::CompareExecuteAdaptor; +use vortex_array::scalar_fn::fns::like::LikeExecuteAdaptor; use crate::FSSTVTable; @@ -12,6 +13,7 @@ pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet:: ParentKernelSet::lift(&CompareExecuteAdaptor(FSSTVTable)), ParentKernelSet::lift(&FilterExecuteAdaptor(FSSTVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(FSSTVTable)), + ParentKernelSet::lift(&LikeExecuteAdaptor(FSSTVTable)), ]); #[cfg(test)] diff --git a/encodings/fsst/src/tests.rs b/encodings/fsst/src/tests.rs index fd64c65e291..1efc6d4fa87 100644 --- a/encodings/fsst/src/tests.rs +++ b/encodings/fsst/src/tests.rs @@ -1,10 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +#![allow(clippy::cast_possible_truncation, clippy::unnecessary_map_or)] + use vortex_array::ArrayRef; use vortex_array::DynArray; use vortex_array::IntoArray; use vortex_array::ToCanonical; +use vortex_array::arrays::VarBinArray; use vortex_array::arrays::varbin::builder::VarBinBuilder; use vortex_array::assert_arrays_eq; use vortex_array::assert_nth_scalar; @@ -13,6 +16,7 @@ use vortex_array::dtype::Nullability; use vortex_buffer::buffer; use vortex_mask::Mask; +use crate::FSSTArray; use crate::FSSTVTable; use crate::fsst_compress; use crate::fsst_train_compressor; @@ -98,3 +102,672 @@ fn test_fsst_array_ops() { assert_arrays_eq!(fsst_array.to_array(), canonical_array); } + +// --------------------------------------------------------------------------- +// DFA-based prefix and contains matching on FSST-compressed codes. +// +// The key idea: precompute a transition table so that each FSST code +// (which decodes to 1–8 bytes) maps to a single table lookup instead +// of a per-byte inner loop. This makes the matching loop O(|codes|) +// rather than O(|decoded_string|). +// --------------------------------------------------------------------------- + +use fsst::ESCAPE_CODE; +use fsst::Symbol; +use vortex_array::accessor::ArrayAccessor; + +/// Build the KMP failure (partial-match) table for `needle`. +fn kmp_failure_table(needle: &[u8]) -> Vec { + let mut failure = vec![0usize; needle.len()]; + let mut k = 0; + for i in 1..needle.len() { + while k > 0 && needle[k] != needle[i] { + k = failure[k - 1]; + } + if needle[k] == needle[i] { + k += 1; + } + failure[i] = k; + } + failure +} + +/// Build a full KMP byte-level transition table. +/// +/// `byte_transitions[state * 256 + byte] = next_state` +/// +/// This is the classic DFA form of KMP: for every (state, byte) pair we +/// know the next state without branching through the failure chain at +/// match time. +fn kmp_byte_transitions(needle: &[u8]) -> Vec { + let n_states = needle.len() + 1; + let accept = needle.len() as u16; + let failure = kmp_failure_table(needle); + + let mut table = vec![0u16; n_states * 256]; + for state in 0..n_states { + for byte in 0..256u16 { + if state == needle.len() { + // Accept is absorbing. + table[state * 256 + byte as usize] = accept; + continue; + } + let mut s = state; + loop { + if byte as u8 == needle[s] { + s += 1; + break; + } + if s == 0 { + break; + } + s = failure[s - 1]; + } + table[state * 256 + byte as usize] = s as u16; + } + } + table +} + +// --------------------------------------------------------------------------- +// FsstPrefixDfa — one table-lookup per code for `starts_with` +// --------------------------------------------------------------------------- + +/// DFA whose states track how many leading bytes of `prefix` have been +/// matched. Transitions are precomputed per (state, symbol-code) so the +/// hot loop does one table lookup per FSST code. +/// +/// States: +/// 0 .. prefix.len()-1 — matched that many prefix bytes +/// prefix.len() — ACCEPT (whole prefix matched) +/// prefix.len()+1 — FAIL (absorbing dead state) +struct FsstPrefixDfa { + /// `symbol_transitions[state * n_symbols + code]` + symbol_transitions: Vec, + /// `escape_transitions[state * 256 + escaped_byte]` + escape_transitions: Vec, + n_symbols: usize, + accept_state: u16, + fail_state: u16, +} + +impl FsstPrefixDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], prefix: &[u8]) -> Self { + let n_symbols = symbols.len(); + let accept_state = prefix.len() as u16; + let fail_state = prefix.len() as u16 + 1; + let n_states = prefix.len() + 2; + + let mut symbol_transitions = vec![fail_state; n_states * n_symbols]; + let mut escape_transitions = vec![fail_state; n_states * 256]; + + for state in 0..n_states { + // Accept and fail are absorbing. + if state as u16 == accept_state { + for code in 0..n_symbols { + symbol_transitions[state * n_symbols + code] = accept_state; + } + for b in 0..256 { + escape_transitions[state * 256 + b] = accept_state; + } + continue; + } + if state as u16 == fail_state { + // Already filled with fail_state. + continue; + } + + // Symbol transitions: simulate matching all symbol bytes. + for code in 0..n_symbols { + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + let remaining = prefix.len() - state; + let cmp = sym_len.min(remaining); + + if sym[..cmp] == prefix[state..state + cmp] { + let next = state + cmp; + symbol_transitions[state * n_symbols + code] = if next >= prefix.len() { + accept_state + } else { + next as u16 + }; + } + // else: stays fail_state (default) + } + + // Escape transitions: single byte. + for b in 0..256usize { + if b as u8 == prefix[state] { + let next = state + 1; + escape_transitions[state * 256 + b] = if next >= prefix.len() { + accept_state + } else { + next as u16 + }; + } + // else: stays fail_state + } + } + + Self { + symbol_transitions, + escape_transitions, + n_symbols, + accept_state, + fail_state, + } + } + + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u16; + let mut pos = 0; + + while pos < codes.len() { + if state == self.accept_state { + return true; + } + if state == self.fail_state { + return false; + } + + let code = codes[pos]; + pos += 1; + + if code == ESCAPE_CODE { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + debug_assert!( + (code as usize) < self.n_symbols, + "code {code} >= n_symbols {}", + self.n_symbols, + ); + state = self.symbol_transitions[state as usize * self.n_symbols + code as usize]; + } + } + + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// FsstContainsDfa — one table-lookup per code for substring search +// --------------------------------------------------------------------------- + +/// DFA that checks whether the decoded string contains `needle`. +/// +/// Built by precomputing, for each (KMP-state, symbol-code), the state +/// reached after feeding all of that symbol's bytes through the KMP +/// automaton. Escape codes fall back to the byte-level KMP table +/// (one lookup per escaped byte, but escapes are rare). +struct FsstContainsDfa { + /// `symbol_transitions[state * n_symbols + code]` + symbol_transitions: Vec, + /// `escape_transitions[state * 256 + byte]` (= the KMP byte-level table) + escape_transitions: Vec, + n_symbols: usize, + accept_state: u16, +} + +impl FsstContainsDfa { + fn new(symbols: &[Symbol], symbol_lengths: &[u8], needle: &[u8]) -> Self { + let n_symbols = symbols.len(); + let accept_state = needle.len() as u16; + let n_states = needle.len() + 1; + + // Byte-level KMP DFA — also used directly for escape transitions. + let byte_table = kmp_byte_transitions(needle); + + // Per-symbol transitions: simulate feeding all symbol bytes. + let mut symbol_transitions = vec![0u16; n_states * n_symbols]; + for state in 0..n_states { + for code in 0..n_symbols { + if state as u16 == accept_state { + symbol_transitions[state * n_symbols + code] = accept_state; + continue; + } + + let sym = symbols[code].to_u64().to_le_bytes(); + let sym_len = symbol_lengths[code] as usize; + + let mut s = state as u16; + for &b in &sym[..sym_len] { + if s == accept_state { + break; + } + s = byte_table[s as usize * 256 + b as usize]; + } + symbol_transitions[state * n_symbols + code] = s; + } + } + + Self { + symbol_transitions, + escape_transitions: byte_table, + n_symbols, + accept_state, + } + } + + fn matches(&self, codes: &[u8]) -> bool { + let mut state = 0u16; + let mut pos = 0; + + while pos < codes.len() { + if state == self.accept_state { + return true; + } + + let code = codes[pos]; + pos += 1; + + if code == ESCAPE_CODE { + if pos >= codes.len() { + return false; + } + let b = codes[pos]; + pos += 1; + state = self.escape_transitions[state as usize * 256 + b as usize]; + } else { + debug_assert!( + (code as usize) < self.n_symbols, + "code {code} >= n_symbols {}", + self.n_symbols, + ); + state = self.symbol_transitions[state as usize * self.n_symbols + code as usize]; + } + } + + state == self.accept_state + } +} + +// --------------------------------------------------------------------------- +// Helpers that apply the DFAs across an FSSTArray +// --------------------------------------------------------------------------- + +fn fsst_prefix_match(array: &FSSTArray, prefix: &[u8]) -> Vec { + if prefix.is_empty() { + return vec![true; array.len()]; + } + let dfa = FsstPrefixDfa::new( + array.symbols().as_slice(), + array.symbol_lengths().as_slice(), + prefix, + ); + array.codes().with_iterator(|iter| { + iter.map(|codes| match codes { + Some(c) => dfa.matches(c), + None => false, + }) + .collect() + }) +} + +fn fsst_contains_match(array: &FSSTArray, needle: &[u8]) -> Vec { + if needle.is_empty() { + return vec![true; array.len()]; + } + let dfa = FsstContainsDfa::new( + array.symbols().as_slice(), + array.symbol_lengths().as_slice(), + needle, + ); + array.codes().with_iterator(|iter| { + iter.map(|codes| match codes { + Some(c) => dfa.matches(c), + None => false, + }) + .collect() + }) +} + +fn make_fsst(strings: &[Option<&str>]) -> FSSTArray { + let varbin = VarBinArray::from_iter( + strings.iter().copied(), + DType::Utf8(if strings.iter().any(|s| s.is_none()) { + Nullability::Nullable + } else { + Nullability::NonNullable + }), + ); + let compressor = fsst_train_compressor(&varbin); + fsst_compress(varbin, &compressor) +} + +// ---- prefix tests ---- + +#[test] +fn test_prefix_basic() { + let fsst = make_fsst(&[ + Some("http://example.com"), + Some("http://test.org"), + Some("ftp://files.net"), + Some("http://vortex.dev"), + Some("ssh://server.io"), + ]); + assert_eq!( + fsst_prefix_match(&fsst, b"http"), + [true, true, false, true, false], + ); +} + +#[test] +fn test_prefix_empty() { + let fsst = make_fsst(&[Some("abc"), Some(""), Some("xyz")]); + assert_eq!(fsst_prefix_match(&fsst, b""), [true, true, true]); +} + +#[test] +fn test_prefix_no_match() { + let fsst = make_fsst(&[Some("abc"), Some("def"), Some("ghi")]); + assert_eq!(fsst_prefix_match(&fsst, b"xyz"), [false, false, false]); +} + +#[test] +fn test_prefix_mid_symbol_boundary() { + let fsst = make_fsst(&[ + Some("abcdef"), + Some("abcxyz"), + Some("abdxyz"), + Some("xyzabc"), + ]); + assert_eq!(fsst_prefix_match(&fsst, b"ab"), [true, true, true, false],); +} + +#[test] +fn test_prefix_empty_strings() { + let fsst = make_fsst(&[Some(""), Some("a"), Some(""), Some("abc")]); + assert_eq!(fsst_prefix_match(&fsst, b"a"), [false, true, false, true],); +} + +#[test] +fn test_prefix_long_repeated() { + let fsst = make_fsst(&[ + Some("the quick brown fox jumps"), + Some("the quick red fox sleeps"), + Some("the slow brown dog sits"), + Some("a totally different string"), + Some("the quick brown fox runs"), + ]); + assert_eq!( + fsst_prefix_match(&fsst, b"the quick"), + [true, true, false, false, true], + ); +} + +// ---- contains tests ---- + +#[test] +fn test_contains_basic() { + let fsst = make_fsst(&[ + Some("hello world"), + Some("say hello"), + Some("goodbye"), + Some("hellooo"), + ]); + assert_eq!( + fsst_contains_match(&fsst, b"hello"), + [true, true, false, true], + ); +} + +#[test] +fn test_contains_empty_needle() { + let fsst = make_fsst(&[Some("abc"), Some("")]); + assert_eq!(fsst_contains_match(&fsst, b""), [true, true]); +} + +#[test] +fn test_contains_no_match() { + let fsst = make_fsst(&[Some("abc"), Some("def"), Some("ghi")]); + assert_eq!(fsst_contains_match(&fsst, b"xyz"), [false, false, false],); +} + +#[test] +fn test_contains_at_end() { + let fsst = make_fsst(&[ + Some("foobar_sdf"), + Some("sdf_start"), + Some("mid_sdf_mid"), + Some("nothing"), + ]); + assert_eq!( + fsst_contains_match(&fsst, b"sdf"), + [true, true, true, false], + ); +} + +#[test] +fn test_contains_overlapping_pattern() { + let fsst = make_fsst(&[Some("aaab"), Some("aab"), Some("ab"), Some("b")]); + assert_eq!( + fsst_contains_match(&fsst, b"aab"), + [true, true, false, false], + ); +} + +#[test] +fn test_contains_cross_symbol_boundary() { + let fsst = make_fsst(&[ + Some("abcdefgh"), + Some("xxcdexx"), + Some("nothing_here"), + Some("abcde_fgh"), + ]); + assert_eq!( + fsst_contains_match(&fsst, b"cde"), + [true, true, false, true], + ); +} + +#[test] +fn test_contains_long_strings() { + let fsst = make_fsst(&[ + Some("the quick brown fox jumps over the lazy dog"), + Some("a]short"), + Some("the lazy dog sleeps"), + Some("no match here at all"), + ]); + assert_eq!( + fsst_contains_match(&fsst, b"lazy dog"), + [true, false, true, false], + ); +} + +// ---- DFA correctness: verify against brute-force decompress-and-check ---- + +#[test] +fn test_dfa_matches_decompressed_prefix() { + let strings: Vec> = vec![ + Some("http://example.com/page/1"), + Some("https://secure.example.com"), + Some("ftp://files.example.com"), + Some("http://another.site.org"), + Some("mailto:user@example.com"), + Some("http://x"), + Some("h"), + Some(""), + ]; + let fsst = make_fsst(&strings); + + for prefix in [ + b"".as_slice(), + b"h", + b"ht", + b"htt", + b"http", + b"http://", + b"http://example", + ] { + let dfa_result = fsst_prefix_match(&fsst, prefix); + let expected: Vec = strings + .iter() + .map(|s| s.map_or(false, |s| s.as_bytes().starts_with(prefix))) + .collect(); + assert_eq!( + dfa_result, + expected, + "prefix = {:?}", + std::str::from_utf8(prefix) + ); + } +} + +#[test] +fn test_dfa_matches_decompressed_contains() { + let strings: Vec> = vec![ + Some("the quick brown fox jumps over the lazy dog"), + Some("a lazy cat sleeps"), + Some("nothing to see here"), + Some("foxes are quick"), + Some(""), + Some("lazy"), + ]; + let fsst = make_fsst(&strings); + + for needle in [ + b"".as_slice(), + b"lazy", + b"quick", + b"fox", + b"the", + b"zzz", + b"lazy dog", + ] { + let dfa_result = fsst_contains_match(&fsst, needle); + let expected: Vec = strings + .iter() + .map(|s| { + s.map_or(false, |s| { + if needle.is_empty() { + true + } else { + s.as_bytes().windows(needle.len()).any(|w| w == needle) + } + }) + }) + .collect(); + assert_eq!( + dfa_result, + expected, + "needle = {:?}", + std::str::from_utf8(needle) + ); + } +} + +// --------------------------------------------------------------------------- +// Symbol-table sizing: how many FSST symbols do representative columns produce? +// --------------------------------------------------------------------------- + +#[test] +fn clickbench_like_fsst_symbol_counts() { + use rand::Rng; + use rand::SeedableRng; + use rand::rngs::StdRng; + + let mut rng = StdRng::seed_from_u64(42); + + let domains = [ + "google.com", + "facebook.com", + "github.com", + "stackoverflow.com", + "amazon.com", + "reddit.com", + "twitter.com", + "youtube.com", + "wikipedia.org", + "microsoft.com", + "apple.com", + "netflix.com", + "linkedin.com", + "cloudflare.com", + "google.co.uk", + "docs.google.com", + "mail.google.com", + "maps.google.com", + "news.ycombinator.com", + "arxiv.org", + ]; + let paths = [ + "/index.html", + "/about", + "/search?q=vortex", + "/user/profile/settings", + "/api/v2/data", + "/blog/2024/post", + "/products/item/12345", + "/docs/reference/guide", + "/login", + "/dashboard/analytics", + ]; + + // URL column + let urls: Vec> = (0..10_000) + .map(|_| { + let scheme = if rng.random_bool(0.8) { + "https" + } else { + "http" + }; + let domain = domains[rng.random_range(0..domains.len())]; + let path = paths[rng.random_range(0..paths.len())]; + Some(format!("{scheme}://{domain}{path}")) + }) + .collect(); + let url_fsst = make_fsst(&urls.iter().map(|s| s.as_deref()).collect::>()); + + // Title column: short sentences + let titles = [ + "Breaking News: Major Event Unfolds", + "How to Learn Rust in 2024", + "Top 10 Programming Languages", + "Weather Forecast for Today", + "New Study Reveals Surprising Results", + "Product Review: Latest Smartphone", + "Travel Guide: Best Destinations", + "Cooking Recipe: Quick and Easy Pasta", + "Sports Update: Championship Finals", + "Technology Trends to Watch", + ]; + let titles_repeated: Vec> = + titles.iter().copied().cycle().take(10_000).map(Some).collect(); + let title_fsst = make_fsst(&titles_repeated); + + // SearchPhrase column: mostly empty, some short queries + let phrases: Vec> = (0..10_000) + .map(|i| match i % 20 { + 0 => Some("vortex database"), + 1 => Some("rust programming"), + 2 => Some("clickhouse benchmark"), + 3 => Some("data compression"), + _ => Some(""), + }) + .collect(); + let phrase_fsst = make_fsst(&phrases); + + // Referer column: URLs with more empty strings + let referers: Vec> = (0..10_000) + .map(|_| { + if rng.random_bool(0.3) { + Some(String::new()) + } else { + let domain = domains[rng.random_range(0..domains.len())]; + Some(format!("https://{domain}/")) + } + }) + .collect(); + let referer_fsst = make_fsst(&referers.iter().map(|s| s.as_deref()).collect::>()); + + eprintln!("=== FSST symbol counts for representative clickbench columns ==="); + eprintln!("URL: {} symbols", url_fsst.symbols().len()); + eprintln!("Title: {} symbols", title_fsst.symbols().len()); + eprintln!("SearchPhrase: {} symbols", phrase_fsst.symbols().len()); + eprintln!("Referer: {} symbols", referer_fsst.symbols().len()); +} diff --git a/vortex-duckdb/src/datasource.rs b/vortex-duckdb/src/datasource.rs index 6bbcc990b0a..a0b0f2afab7 100644 --- a/vortex-duckdb/src/datasource.rs +++ b/vortex-duckdb/src/datasource.rs @@ -403,7 +403,7 @@ impl TableFunction for T { // If we plumb row count estimation into the layout tree, perhaps we could use zone maps // etc. to return estimates. But this function is probably called too late anyway. Maybe // we need our own cardinality heuristics. - Ok(false) + Ok(true) } fn cardinality(bind_data: &Self::BindData) -> Cardinality { diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 5054fcd27f3..e5def21f5eb 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -96,10 +96,10 @@ impl DictReader { ) .vortex_expect("must construct dict values array evaluation") .map_err(Arc::new) - .map(move |array| { - let array = array?; - Ok(SharedArray::new(array).into_array()) - }) + // .map(move |array| { + // let array = array?; + // Ok(SharedArray::new(array).into_array()) + // }) .boxed() .shared() })