diff --git a/changelog.d/1542.fix.md b/changelog.d/1542.fix.md new file mode 100644 index 000000000..80790c6a7 --- /dev/null +++ b/changelog.d/1542.fix.md @@ -0,0 +1 @@ +fix handling of OR conjunctions in the datadog search query parser diff --git a/src/datadog/search/grammar.rs b/src/datadog/search/grammar.rs index 0e9497b6a..4e5bc6d2f 100644 --- a/src/datadog/search/grammar.rs +++ b/src/datadog/search/grammar.rs @@ -3,9 +3,9 @@ use itertools::Itertools; use pest::iterators::Pair; use pest_derive::Parser; -use super::node::{ - BooleanBuilder, Comparison, ComparisonValue, LuceneClause, LuceneOccur, QueryNode, Range, -}; +use crate::datadog_search_syntax::BooleanType; + +use super::node::{Comparison, ComparisonValue, QueryNode, Range}; #[derive(Debug, Parser)] #[grammar = "src/datadog/search/grammar.pest"] @@ -33,32 +33,28 @@ impl QueryVisitor { fn visit_query(token: Pair, default_field: &str) -> QueryNode { let contents = token.into_inner(); - let mut clauses: Vec = Vec::new(); - let mut modifier: Option = None; + let mut is_not: bool = false; + + // AND takes precedence over OR. + // We will combine each consecutive clause in an AND group, + // and create a new and_group every time we encounter an OR. + // Finally, we will combine all the and_groups with OR. + + let mut and_groups: Vec = Vec::new(); + + let mut and_group: Vec = Vec::new(); + for node in contents { - let clause: Option = match node.as_rule() { + let query_node: Option = match node.as_rule() { Rule::multiterm => Some(Self::visit_multiterm(node, default_field)), Rule::conjunction => { let inner = node.into_inner().next().unwrap(); match inner.as_rule() { - Rule::AND => { - // If our conjunction is AND and the previous clause was - // just a SHOULD, we make the previous clause a MUST and - // our new clause will also be a MUST - let lastitem = clauses.last_mut().unwrap(); - if let LuceneOccur::Should = lastitem.occur { - lastitem.occur = LuceneOccur::Must; - }; - } + Rule::AND => (), Rule::OR => { - // If our conjunction is OR and the previous clause was - // a MUST, we make the previous clause a SHOULD and our - // new clause will also be a SHOULD - let lastitem = clauses.last_mut().unwrap(); - if let LuceneOccur::Must = lastitem.occur { - lastitem.occur = LuceneOccur::Should; - }; - modifier.get_or_insert(LuceneOccur::Should); + // close the current and_group and create a new one + and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group)); + and_group = Vec::new(); } _ => unreachable!(), }; @@ -69,77 +65,42 @@ impl QueryVisitor { match inner.as_rule() { Rule::PLUS => (), Rule::NOT => { - modifier = Some(LuceneOccur::MustNot); + is_not = true; } _ => unreachable!(), }; None } - Rule::clause => { - let query_node = Self::visit_clause(node, default_field); - Some(LuceneClause { - occur: modifier.take().unwrap_or(LuceneOccur::Must), - node: query_node, - }) - } + Rule::clause => Some(Self::visit_clause(node, default_field)), _ => unreachable!(), }; // If we found a clause to add to our list, add it - if let Some(c) = clause { - clauses.push(c); - } - } - if clauses.len() == 1 { - let single = clauses.pop().unwrap(); - match single { - LuceneClause { - occur: LuceneOccur::MustNot, - node: QueryNode::MatchAllDocs, - } => QueryNode::MatchNoDocs, - // I hate Boxing! Every allocation is a personal failing :( - LuceneClause { - occur: LuceneOccur::MustNot, - node, - } => QueryNode::NegatedNode { - node: Box::new(node), - }, - LuceneClause { occur: _, node } => node, - } - } else { - let mut and_builder = BooleanBuilder::and(); - let mut or_builder = BooleanBuilder::or(); - let (mut has_must, mut has_must_not, mut has_should) = (false, false, false); - for c in clauses { - let LuceneClause { node, occur } = c; - match occur { - LuceneOccur::Must => { - and_builder.add_node(node); - has_must = true; - } - LuceneOccur::MustNot => { - and_builder.add_node(QueryNode::NegatedNode { - node: Box::new(node), - }); - has_must_not = true; - } - LuceneOccur::Should => { - or_builder.add_node(node); - has_should = true; - } + if let Some(mut n) = query_node { + if is_not { + is_not = false; + + n = QueryNode::NegatedNode { node: Box::new(n) } } + + and_group.push(n); } - if has_must || !has_should { - and_builder.build() - } else if !has_must_not { - or_builder.build() - } else { - and_builder.add_node(or_builder.build()); - and_builder.build() + } + + and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group)); + let query_node = QueryNode::new_boolean(BooleanType::Or, and_groups); + + if let QueryNode::NegatedNode { node } = query_node { + // if the node is a negated MatchAllDocs, return MatchNoDocs + if let QueryNode::MatchAllDocs = *node { + return QueryNode::MatchNoDocs; } + return QueryNode::NegatedNode { node }; } + + query_node } - fn visit_multiterm(token: Pair, default_field: &str) -> LuceneClause { + fn visit_multiterm(token: Pair, default_field: &str) -> QueryNode { let contents = token.into_inner(); let mut terms: Vec = Vec::new(); for node in contents { @@ -149,12 +110,9 @@ impl QueryVisitor { _ => unreachable!(), } } - LuceneClause { - occur: LuceneOccur::Must, - node: QueryNode::AttributeTerm { - attr: String::from(default_field), - value: terms.join(" "), - }, + QueryNode::AttributeTerm { + attr: String::from(default_field), + value: terms.join(" "), } } diff --git a/src/datadog/search/node.rs b/src/datadog/search/node.rs index d798e18a0..957e5ced5 100644 --- a/src/datadog/search/node.rs +++ b/src/datadog/search/node.rs @@ -94,45 +94,6 @@ pub enum BooleanType { Or, } -/// Builder structure to create Boolean QueryNodes. Not strictly necessary, -/// however they're a bit more ergonomic to manipulate than reaching into -/// enums all the time. -pub struct BooleanBuilder { - /// The type of Boolean operation this node will represent. - oper: BooleanType, - /// A list of QueryNodes involved in this boolean operation. - nodes: Vec, -} - -impl BooleanBuilder { - /// Create a BooleanBuilder to produce an AND-type Boolean QueryNode. - pub fn and() -> Self { - Self { - oper: BooleanType::And, - nodes: vec![], - } - } - - /// Create a BooleanBuilder to produce an OR-type Boolean QueryNode. - pub fn or() -> Self { - Self { - oper: BooleanType::Or, - nodes: vec![], - } - } - - /// Add a QueryNode to this boolean conjunction. - pub fn add_node(&mut self, node: QueryNode) { - self.nodes.push(node); - } - - /// Consume this builder and output the finished QueryNode. - pub fn build(self) -> QueryNode { - let Self { oper, nodes } = self; - QueryNode::Boolean { oper, nodes } - } -} - /// QueryNodes represent specific search criteria to be enforced. #[derive(Clone, Debug, PartialEq)] pub enum QueryNode { @@ -343,6 +304,19 @@ impl QueryNode { format!("{attr}:") } } + + /// Group a list of nodes into a single node, using the given conjunction. + /// If the group has only one node, return a clone of that node. + pub fn new_boolean(conjunction: BooleanType, nodes: Vec) -> QueryNode { + if nodes.len() == 1 { + return nodes.into_iter().next().expect("Known to have length 1"); + } + + QueryNode::Boolean { + oper: conjunction, + nodes, + } + } } impl<'de> Deserialize<'de> for QueryNode { @@ -368,20 +342,6 @@ impl Serialize for QueryNode { } } -/// Enum representing Lucene's concept of whether a node should occur. -#[derive(Debug)] -pub enum LuceneOccur { - Must, - Should, - MustNot, -} - -#[derive(Debug)] -pub struct LuceneClause { - pub occur: LuceneOccur, - pub node: QueryNode, -} - static ESCAPE_RE: LazyLock = LazyLock::new(|| Regex::new("^\"(.+)\"$").unwrap()); /// Escapes surrounding `"` quotes when distinguishing between quoted terms isn't needed. diff --git a/src/datadog/search/parser.rs b/src/datadog/search/parser.rs index fa0474e5f..cf4a9cc13 100644 --- a/src/datadog/search/parser.rs +++ b/src/datadog/search/parser.rs @@ -158,21 +158,15 @@ mod tests { let cases = ["foo bar baz AND qux quux quuz"]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "_default_" && value == "foo bar") && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "_default_" && value == "baz") && matches!(nodes[2], QueryNode::AttributeTerm { ref attr, ref value } if attr == "_default_" && value == "qux") - && matches!(nodes[3], QueryNode::AttributeTerm { ref attr, ref value } if attr == "_default_" && value == "quux quuz"), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") - } + && matches!(nodes[3], QueryNode::AttributeTerm { ref attr, ref value } if attr == "_default_" && value == "quux quuz") + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); } } @@ -477,20 +471,14 @@ mod tests { let cases = ["foo:bar baz:qux quux:quuz"]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "foo" && value == "bar") && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") - && matches!(nodes[2], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz"), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") - } + && matches!(nodes[2], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz") + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); } } @@ -504,67 +492,60 @@ mod tests { ]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if matches!(nodes[0], QueryNode::NegatedNode { ref node } if matches!(**node, QueryNode::AttributeTerm {ref attr, ref value } if attr == "foo" && value == "bar")) && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") - && matches!(nodes[2], QueryNode::NegatedNode { ref node } if matches!(**node, QueryNode::AttributeTerm {ref attr, ref value } if attr == "quux" && value == "quuz")), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") - } + && matches!(nodes[2], QueryNode::NegatedNode { ref node } if matches!(**node, QueryNode::AttributeTerm {ref attr, ref value } if attr == "quux" && value == "quuz")) + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); } } #[test] - fn parses_boolean_nodes_with_explicit_operators() { + fn parses_boolean_nodes_with_or_not() { let cases = [ - "foo:bar OR baz:qux AND quux:quuz", - "foo:bar || baz:qux && quux:quuz", + "foo:bar OR -baz:qux quux:quuz", + "foo:bar OR NOT baz:qux quux:quuz", + "foo:bar OR -baz:qux AND quux:quuz", + "foo:bar OR NOT baz:qux AND quux:quuz", ]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( - matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") - && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz"), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") - } + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::Or, ref nodes } if + matches!(nodes[0], QueryNode::AttributeTerm {ref attr, ref value } if attr == "foo" && value == "bar") + && matches!(nodes[1], QueryNode::Boolean { oper: BooleanType::And, ref nodes } if + matches!(nodes[0], QueryNode::NegatedNode { ref node } if matches!(**node, QueryNode::AttributeTerm {ref attr, ref value } if attr == "baz" && value == "qux") && + matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz")) + ) + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); } } #[test] - fn parses_boolean_nodes_with_implicit_and_explicit_operators() { + fn parses_boolean_nodes_with_implicit_or_explicit_operators() { let cases = [ "foo:bar OR baz:qux quux:quuz", "foo:bar || baz:qux quux:quuz", + "foo:bar OR baz:qux AND quux:quuz", + "foo:bar || baz:qux && quux:quuz", ]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( - matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz"), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") - } + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::Or, ref nodes } if + matches!(nodes[0], QueryNode::AttributeTerm {ref attr, ref value } if attr == "foo" && value == "bar") + && matches!(nodes[1], QueryNode::Boolean { oper: BooleanType::And, ref nodes } if + matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") && + matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz") + ) + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); } } @@ -573,22 +554,86 @@ mod tests { let cases = ["foo:bar (baz:qux quux:quuz)"]; for query in cases.iter() { let res = parse(query); - if let QueryNode::Boolean { - oper: BooleanType::And, - ref nodes, - } = res - { - assert!( + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if matches!(nodes[0], QueryNode::AttributeTerm {ref attr, ref value } if attr == "foo" && value == "bar") && matches!(nodes[1], QueryNode::Boolean { oper: BooleanType::And, ref nodes } if matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "quux" && value == "quuz") - ), - "Unable to properly parse '{query:?}' - got {res:?}" - ); - } else { - panic!("Unable to properly parse '{query:?}' - got {res:?}") + ) + ), + "Unable to properly parse '{query:?}' - got {res:?}" + ); + } + } + + #[test] + fn parses_nested_boolean_query_node_with_or() { + let cases = ["(foo:bar OR baz:qux) quux:quuz"]; + for query in cases.iter() { + let res = parse(query); + + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if + matches!(nodes[0], QueryNode::Boolean { oper: BooleanType::Or, ref nodes } if + matches!(nodes[0], QueryNode::AttributeTerm { ref attr, ref value } if attr == "foo" && value == "bar") && + matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == "baz" && value == "qux") + ) && matches!(nodes[1], QueryNode::AttributeTerm {ref attr, ref value } if attr == "quux" && value == "quuz")), + "Unable to properly parse '{query:?}' - got {res:?}" + ); + } + } + + #[test] + fn parses_negated_parenthesized_default_multiterm_query() { + let cases = ["NOT (foo bar)", "-(foo bar)"]; + for query in cases.iter() { + let res = parse(query); + if let QueryNode::NegatedNode { ref node } = res + && let QueryNode::AttributeTerm { + ref attr, + ref value, + } = **node + && attr == DEFAULT_FIELD + && value == "foo bar" + { + continue; + } + panic!("Unable to properly parse '{query:?}' - got {res:?}") + } + } + + #[test] + fn parses_multiterm_with_leading_not_without_parens() { + let cases = ["NOT foo bar", "- foo bar"]; // NOT only applies to the first term + for query in cases.iter() { + let res = parse(query); + + assert!( + matches!(res, QueryNode::Boolean { oper: BooleanType::And, ref nodes } if + matches!(nodes[0], QueryNode::NegatedNode { ref node } if matches!(**node, QueryNode::AttributeTerm { ref attr, ref value } if attr == DEFAULT_FIELD && value == "foo")) + && matches!(nodes[1], QueryNode::AttributeTerm { ref attr, ref value } if attr == DEFAULT_FIELD && value == "bar")), + "Unable to properly parse '{query:?}' - got {res:?}" + ); + } + } + + #[test] + fn parses_negated_parenthesized_fielded_multiterm_query() { + let cases = ["NOT foo:(bar baz)", "-foo:(bar baz)"]; + for query in cases.iter() { + let res = parse(query); + if let QueryNode::NegatedNode { ref node } = res + && let QueryNode::AttributeTerm { + ref attr, + ref value, + } = **node + && attr == "foo" + && value == "bar baz" + { + continue; } + panic!("Unable to properly parse '{query:?}' - got {res:?}") } } }