From ca691e92f196d5c7ab395b3d41d55d912b8a7a6a Mon Sep 17 00:00:00 2001 From: Tim Graham Date: Wed, 25 Jun 2025 17:37:24 -0400 Subject: [PATCH 01/18] Create django_mongodb_backend.expressions package --- django_mongodb_backend/__init__.py | 2 +- django_mongodb_backend/expressions/__init__.py | 0 .../{expressions.py => expressions/builtins.py} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 django_mongodb_backend/expressions/__init__.py rename django_mongodb_backend/{expressions.py => expressions/builtins.py} (99%) diff --git a/django_mongodb_backend/__init__.py b/django_mongodb_backend/__init__.py index 00700421a..d21566d9c 100644 --- a/django_mongodb_backend/__init__.py +++ b/django_mongodb_backend/__init__.py @@ -8,7 +8,7 @@ from .aggregates import register_aggregates # noqa: E402 from .checks import register_checks # noqa: E402 -from .expressions import register_expressions # noqa: E402 +from .expressions.builtins import register_expressions # noqa: E402 from .fields import register_fields # noqa: E402 from .functions import register_functions # noqa: E402 from .indexes import register_indexes # noqa: E402 diff --git a/django_mongodb_backend/expressions/__init__.py b/django_mongodb_backend/expressions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/django_mongodb_backend/expressions.py b/django_mongodb_backend/expressions/builtins.py similarity index 99% rename from django_mongodb_backend/expressions.py rename to django_mongodb_backend/expressions/builtins.py index 46eef56da..4f6575052 100644 --- a/django_mongodb_backend/expressions.py +++ b/django_mongodb_backend/expressions/builtins.py @@ -25,7 +25,7 @@ ) from django.db.models.sql import Query -from .query_utils import process_lhs +from ..query_utils import process_lhs def case(self, compiler, connection): From 0756e6f27ec3a6d355fac470fedb0b3b73214606 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 21:31:04 -0300 Subject: [PATCH 02/18] Adapt query and compiler for operator support. --- django_mongodb_backend/compiler.py | 136 +++++++++++++++--- .../expressions/builtins.py | 4 +- django_mongodb_backend/expressions/search.py | 11 ++ .../fields/embedded_model.py | 6 +- django_mongodb_backend/query.py | 3 + 5 files changed, 134 insertions(+), 26 deletions(-) create mode 100644 django_mongodb_backend/expressions/search.py diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index a99834cf4..cb4cff4ea 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -16,6 +16,7 @@ from django.utils.functional import cached_property from pymongo import ASCENDING, DESCENDING +from .expressions.search import SearchExpression, SearchVector from .query import MongoQuery, wrap_database_errors @@ -33,6 +34,8 @@ def __init__(self, *args, **kwargs): # A list of OrderBy objects for this query. self.order_by_objs = None self.subqueries = [] + # Atlas search calls + self.search_pipeline = [] def _get_group_alias_column(self, expr, annotation_group_idx): """Generate a dummy field for use in the ids fields in $group.""" @@ -56,6 +59,29 @@ def _get_column_from_expression(self, expr, alias): column_target.set_attributes_from_name(alias) return Col(self.collection_name, column_target) + def _get_replace_expr(self, sub_expr, group, alias): + column_target = sub_expr.output_field.clone() + column_target.db_column = alias + column_target.set_attributes_from_name(alias) + inner_column = Col(self.collection_name, column_target) + if getattr(sub_expr, "distinct", False): + # If the expression should return distinct values, use + # $addToSet to deduplicate. + rhs = sub_expr.as_mql(self, self.connection, resolve_inner_expression=True) + group[alias] = {"$addToSet": rhs} + replacing_expr = sub_expr.copy() + replacing_expr.set_source_expressions([inner_column, None]) + else: + group[alias] = sub_expr.as_mql(self, self.connection) + replacing_expr = inner_column + # Count must return 0 rather than null. + if isinstance(sub_expr, Count): + replacing_expr = Coalesce(replacing_expr, 0) + # Variance = StdDev^2 + if isinstance(sub_expr, Variance): + replacing_expr = Power(replacing_expr, 2) + return replacing_expr + def _prepare_expressions_for_pipeline(self, expression, target, annotation_group_idx): """ Prepare expressions for the aggregation pipeline. @@ -79,29 +105,33 @@ def _prepare_expressions_for_pipeline(self, expression, target, annotation_group alias = ( f"__aggregation{next(annotation_group_idx)}" if sub_expr != expression else target ) - column_target = sub_expr.output_field.clone() - column_target.db_column = alias - column_target.set_attributes_from_name(alias) - inner_column = Col(self.collection_name, column_target) - if sub_expr.distinct: - # If the expression should return distinct values, use - # $addToSet to deduplicate. - rhs = sub_expr.as_mql(self, self.connection, resolve_inner_expression=True) - group[alias] = {"$addToSet": rhs} - replacing_expr = sub_expr.copy() - replacing_expr.set_source_expressions([inner_column, None]) - else: - group[alias] = sub_expr.as_mql(self, self.connection) - replacing_expr = inner_column - # Count must return 0 rather than null. - if isinstance(sub_expr, Count): - replacing_expr = Coalesce(replacing_expr, 0) - # Variance = StdDev^2 - if isinstance(sub_expr, Variance): - replacing_expr = Power(replacing_expr, 2) - replacements[sub_expr] = replacing_expr + replacements[sub_expr] = self._get_replace_expr(sub_expr, group, alias) return replacements, group + def _prepare_search_expressions_for_pipeline(self, expression, search_idx, replacements): + searches = {} + for sub_expr in self._get_search_expressions(expression): + if sub_expr not in replacements: + alias = f"__search_expr.search{next(search_idx)}" + replacements[sub_expr] = self._get_replace_expr(sub_expr, searches, alias) + + def _prepare_search_query_for_aggregation_pipeline(self, order_by): + replacements = {} + annotation_group_idx = itertools.count(start=1) + for expr in self.query.annotation_select.values(): + self._prepare_search_expressions_for_pipeline(expr, annotation_group_idx, replacements) + + for expr, _ in order_by: + self._prepare_search_expressions_for_pipeline(expr, annotation_group_idx, replacements) + + self._prepare_search_expressions_for_pipeline( + self.having, annotation_group_idx, replacements + ) + self._prepare_search_expressions_for_pipeline( + self.get_where(), annotation_group_idx, replacements + ) + return replacements + def _prepare_annotations_for_aggregation_pipeline(self, order_by): """Prepare annotations for the aggregation pipeline.""" replacements = {} @@ -206,9 +236,57 @@ def _build_aggregation_pipeline(self, ids, group): pipeline.append({"$unset": "_id"}) return pipeline + def _compound_searches_queries(self, search_replacements): + if not search_replacements: + return [] + if len(search_replacements) > 1: + has_search = any(not isinstance(search, SearchVector) for search in search_replacements) + has_vector_search = any( + isinstance(search, SearchVector) for search in search_replacements + ) + if has_search and has_vector_search: + raise ValueError( + "Cannot combine a `$vectorSearch` with a `$search` operator. " + "If you need to combine them, consider restructuring your query logic or " + "running them as separate queries." + ) + if not has_search: + raise ValueError( + "Cannot combine two `$vectorSearch` operator. " + "If you need to combine them, consider restructuring your query logic or " + "running them as separate queries." + ) + raise ValueError( + "Only one $search operation is allowed per query. " + f"Received {len(search_replacements)} search expressions. " + "To combine multiple search expressions, use either a CompoundExpression for " + "fine-grained control or CombinedSearchExpression for simple logical combinations." + ) + pipeline = [] + for search, result_col in search_replacements.items(): + score_function = ( + "vectorSearchScore" if isinstance(search, SearchVector) else "searchScore" + ) + pipeline.extend( + [ + search.as_mql(self, self.connection), + { + "$addFields": { + result_col.as_mql(self, self.connection, as_path=True): { + "$meta": score_function + } + } + }, + ] + ) + return pipeline + def pre_sql_setup(self, with_col_aliases=False): extra_select, order_by, group_by = super().pre_sql_setup(with_col_aliases=with_col_aliases) - group, all_replacements = self._prepare_annotations_for_aggregation_pipeline(order_by) + search_replacements = self._prepare_search_query_for_aggregation_pipeline(order_by) + group, group_replacements = self._prepare_annotations_for_aggregation_pipeline(order_by) + all_replacements = {**search_replacements, **group_replacements} + self.search_pipeline = self._compound_searches_queries(search_replacements) # query.group_by is either: # - None: no GROUP BY # - True: group by select fields @@ -233,6 +311,9 @@ def pre_sql_setup(self, with_col_aliases=False): for target, expr in self.query.annotation_select.items() } self.order_by_objs = [expr.replace_expressions(all_replacements) for expr, _ in order_by] + if (where := self.get_where()) and search_replacements: + where = where.replace_expressions(search_replacements) + self.set_where(where) return extra_select, order_by, group_by def execute_sql( @@ -555,10 +636,16 @@ def get_lookup_pipeline(self): return result def _get_aggregate_expressions(self, expr): + return self._get_all_expressions_of_type(expr, Aggregate) + + def _get_search_expressions(self, expr): + return self._get_all_expressions_of_type(expr, SearchExpression) + + def _get_all_expressions_of_type(self, expr, target_type): stack = [expr] while stack: expr = stack.pop() - if isinstance(expr, Aggregate): + if isinstance(expr, target_type): yield expr elif hasattr(expr, "get_source_expressions"): stack.extend(expr.get_source_expressions()) @@ -627,6 +714,9 @@ def _get_ordering(self): def get_where(self): return getattr(self, "where", self.query.where) + def set_where(self, value): + self.where = value + def explain_query(self): # Validate format (none supported) and options. options = self.connection.ops.explain_query_prefix( diff --git a/django_mongodb_backend/expressions/builtins.py b/django_mongodb_backend/expressions/builtins.py index 4f6575052..da95d5fe2 100644 --- a/django_mongodb_backend/expressions/builtins.py +++ b/django_mongodb_backend/expressions/builtins.py @@ -53,7 +53,7 @@ def case(self, compiler, connection): } -def col(self, compiler, connection): # noqa: ARG001 +def col(self, compiler, connection, as_path=False): # noqa: ARG001 # If the column is part of a subquery and belongs to one of the parent # queries, it will be stored for reference using $let in a $lookup stage. # If the query is built with `alias_cols=False`, treat the column as @@ -71,7 +71,7 @@ def col(self, compiler, connection): # noqa: ARG001 # Add the column's collection's alias for columns in joined collections. has_alias = self.alias and self.alias != compiler.collection_name prefix = f"{self.alias}." if has_alias else "" - return f"${prefix}{self.target.column}" + return f"{prefix}{self.target.column}" if as_path else f"${prefix}{self.target.column}" def col_pairs(self, compiler, connection): diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py new file mode 100644 index 000000000..9cdd132e3 --- /dev/null +++ b/django_mongodb_backend/expressions/search.py @@ -0,0 +1,11 @@ +from django.db.models import Expression + + +class SearchExpression(Expression): + """Base expression node for MongoDB Atlas `$search` stages.""" + + +class SearchVector(SearchExpression): + """ + Atlas Search expression that performs vector similarity search on embedded vectors. + """ diff --git a/django_mongodb_backend/fields/embedded_model.py b/django_mongodb_backend/fields/embedded_model.py index 4b49a4710..b7f562841 100644 --- a/django_mongodb_backend/fields/embedded_model.py +++ b/django_mongodb_backend/fields/embedded_model.py @@ -184,12 +184,16 @@ def get_transform(self, name): f"{suggestion}" ) - def as_mql(self, compiler, connection): + def as_mql(self, compiler, connection, as_path=False): previous = self key_transforms = [] while isinstance(previous, KeyTransform): key_transforms.insert(0, previous.key_name) previous = previous.lhs + if as_path: + mql = previous.as_mql(compiler, connection, as_path=True) + mql_path = ".".join(key_transforms) + return f"{mql}.{mql_path}" mql = previous.as_mql(compiler, connection) for key in key_transforms: mql = {"$getField": {"input": mql, "field": key}} diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index d59bc1631..e6290ead4 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -49,6 +49,7 @@ def __init__(self, compiler): self.lookup_pipeline = None self.project_fields = None self.aggregation_pipeline = compiler.aggregation_pipeline + self.search_pipeline = compiler.search_pipeline self.extra_fields = None self.combinator_pipeline = None # $lookup stage that encapsulates the pipeline for performing a nested @@ -81,6 +82,8 @@ def get_cursor(self): def get_pipeline(self): pipeline = [] + if self.search_pipeline: + pipeline.extend(self.search_pipeline) if self.lookup_pipeline: pipeline.extend(self.lookup_pipeline) for query in self.subqueries or (): From b2b98f6df62611fdc25c9152b59210dee0007e2d Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 22:06:01 -0300 Subject: [PATCH 03/18] Add Search operators. --- .pre-commit-config.yaml | 2 +- django_mongodb_backend/expressions/search.py | 684 ++++++++++++++++++- 2 files changed, 681 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a3301328..188c8f3cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -81,4 +81,4 @@ repos: rev: "v2.2.6" hooks: - id: codespell - args: ["-L", "nin"] + args: ["-L", "nin", "-L", "searchin"] diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index 9cdd132e3..8516fd24d 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -1,11 +1,687 @@ -from django.db.models import Expression +from django.db.models import Expression, FloatField +from django.db.models.expressions import F, Value + + +def cast_as_value(value): + if value is None: + return None + return Value(value) if not hasattr(value, "resolve_expression") else value + + +def cast_as_field(path): + return F(path) if isinstance(path, str) else path class SearchExpression(Expression): - """Base expression node for MongoDB Atlas `$search` stages.""" + """Base expression node for MongoDB Atlas `$search` stages. + + This class bridges Django's `Expression` API with the MongoDB Atlas + Search engine. Subclasses produce the operator document placed under + **$search** and expose the stage to queryset methods such as + `annotate()`, `filter()`, or `order_by()`. + """ + + output_field = FloatField() + + def __str__(self): + cls = self.identity[0] + kwargs = dict(self.identity[1:]) + arg_str = ", ".join(f"{k}={v!r}" for k, v in kwargs.items()) + return f"<{cls.__name__}({arg_str})>" + + def __repr__(self): + return str(self) + + def as_sql(self, compiler, connection): + return "", [] + + def _get_indexed_fields(self, mappings): + if isinstance(mappings, list): + for definition in mappings: + yield from self._get_indexed_fields(definition) + else: + for field, definition in mappings.get("fields", {}).items(): + yield field + for path in self._get_indexed_fields(definition): + yield f"{field}.{path}" + + def _get_query_index(self, fields, compiler): + fields = set(fields) + for search_indexes in compiler.collection.list_search_indexes(): + mappings = search_indexes["latestDefinition"]["mappings"] + indexed_fields = set(self._get_indexed_fields(mappings)) + if mappings["dynamic"] or fields.issubset(indexed_fields): + return search_indexes["name"] + return "default" + + def search_operator(self, compiler, connection): + raise NotImplementedError + + def as_mql(self, compiler, connection): + index = self._get_query_index(self.get_search_fields(compiler, connection), compiler) + return {"$search": {**self.search_operator(compiler, connection), "index": index}} + + +class SearchAutocomplete(SearchExpression): + """ + Atlas Search expression that matches input using the `autocomplete` operator. + + This expression enables autocomplete behavior by querying against a field + indexed as `"type": "autocomplete"` in MongoDB Atlas. It can be used in + `filter()`, `annotate()` or any context that accepts a Django expression. + + Example: + SearchAutocomplete("title", "harry", fuzzy={"maxEdits": 1}) + + Args: + path: The document path to search (as string or expression). + query: The input string to autocomplete. + fuzzy: Optional dictionary of fuzzy matching parameters. + token_order: Optional value for `"tokenOrder"`; controls sequential vs. + any-order token matching. + score: Optional expression to adjust score relevance (e.g., `{"boost": {"value": 5}}`). + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/ + """ + + def __init__(self, path, query, fuzzy=None, token_order=None, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.fuzzy = cast_as_value(fuzzy) + self.token_order = cast_as_value(token_order) + self.score = score + super().__init__() + + def get_source_expressions(self): + return [self.path, self.query, self.fuzzy, self.token_order] + + def set_source_expressions(self, exprs): + self.path, self.query, self.fuzzy, self.token_order = exprs + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + if self.fuzzy is not None: + params["fuzzy"] = self.fuzzy.value + if self.token_order is not None: + params["tokenOrder"] = self.token_order.value + return {"autocomplete": params} + + +class SearchEquals(SearchExpression): + """ + Atlas Search expression that matches documents with a field equal to the given value. + + This expression uses the `equals` operator to perform exact matches + on fields indexed in a MongoDB Atlas Search index. + + Example: + SearchEquals("category", "fiction") + + Args: + path: The document path to compare (as string or expression). + value: The exact value to match against. + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/equals/ + """ + + def __init__(self, path, value, score=None): + self.path = cast_as_field(path) + self.value = cast_as_value(value) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.value] + + def set_source_expressions(self, exprs): + self.path, self.value = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "value": self.value.value, + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + return {"equals": params} + + +class SearchExists(SearchExpression): + """ + Atlas Search expression that matches documents where a field exists. + + This expression uses the `exists` operator to check whether a given + path is present in the document. Useful for filtering documents that + include (or exclude) optional fields. + + Example: + SearchExists("metadata__author") + + Args: + path: The document path to check (as string or expression). + score: Optional expression to modify the relevance score. + Reference: https://www.mongodb.com/docs/atlas/atlas-search/exists/ + """ + + def __init__(self, path, score=None): + self.path = cast_as_field(path) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path] + + def set_source_expressions(self, exprs): + (self.path,) = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + return {"exists": params} + + +class SearchIn(SearchExpression): + """ + Atlas Search expression that matches documents where the field value is in a given list. + + This expression uses the `in` operator to match documents whose field + contains a value from the provided array of values. + + Example: + SearchIn("status", ["pending", "approved", "rejected"]) + + Args: + path: The document path to match against (as string or expression). + value: A list of values to check for membership. + score: Optional expression to adjust the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/in/ + """ + + def __init__(self, path, value, score=None): + self.path = cast_as_field(path) + self.value = cast_as_value(value) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.value] + + def set_source_expressions(self, exprs): + self.path, self.value = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "value": self.value.value, + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + return {"in": params} + + +class SearchPhrase(SearchExpression): + """ + Atlas Search expression that matches a phrase in the specified field. + + This expression uses the `phrase` operator to search for exact or near exact + sequences of terms. It supports optional slop (word distance) and synonym sets. -class SearchVector(SearchExpression): + Example: + SearchPhrase("description__text", "climate change", slop=2) + + Args: + path: The document path to search (as string or expression). + query: The phrase to match as a single string or list of terms. + slop: Optional maximum word distance allowed between phrase terms. + synonyms: Optional name of a synonym mapping defined in the Atlas index. + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/phrase/ """ - Atlas Search expression that performs vector similarity search on embedded vectors. + + def __init__(self, path, query, slop=None, synonyms=None, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.slop = cast_as_value(slop) + self.synonyms = cast_as_value(synonyms) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.query, self.slop, self.synonyms] + + def set_source_expressions(self, exprs): + self.path, self.query, self.slop, self.synonyms = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + if self.slop is not None: + params["slop"] = self.slop.value + if self.synonyms is not None: + params["synonyms"] = self.synonyms.value + return {"phrase": params} + + +class SearchQueryString(SearchExpression): """ + Atlas Search expression that matches using a Lucene-style query string. + + This expression uses the `queryString` operator to parse and execute + full-text queries written in a simplified Lucene syntax. It supports + advanced constructs like boolean operators, wildcards, and field-specific terms. + + Example: + SearchQueryString("content__text", "django AND (search OR query)") + + Args: + path: The document path to query (as string or expression). + query: The Lucene-style query string. + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/queryString/ + """ + + def __init__(self, path, query, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.query] + + def set_source_expressions(self, exprs): + self.path, self.query = exprs + + def search_operator(self, compiler, connection): + params = { + "defaultPath": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + return {"queryString": params} + + +class SearchRange(SearchExpression): + """ + Atlas Search expression that filters documents within a range of values. + + This expression uses the `range` operator to match numeric, date, or + other comparable fields based on upper and/or lower bounds. + + Example: + SearchRange("published__year", gte=2000, lt=2020) + + Args: + path: The document path to filter (as string or expression). + lt: Optional exclusive upper bound (`<`). + lte: Optional inclusive upper bound (`<=`). + gt: Optional exclusive lower bound (`>`). + gte: Optional inclusive lower bound (`>=`). + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/range/ + """ + + def __init__(self, path, lt=None, lte=None, gt=None, gte=None, score=None): + self.path = cast_as_field(path) + self.lt = cast_as_value(lt) + self.lte = cast_as_value(lte) + self.gt = cast_as_value(gt) + self.gte = cast_as_value(gte) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.lt, self.lte, self.gt, self.gte] + + def set_source_expressions(self, exprs): + self.path, self.lt, self.lte, self.gt, self.gte = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + } + if self.score is not None: + params["score"] = self.score.as_mql(compiler, connection) + if self.lt is not None: + params["lt"] = self.lt.value + if self.lte is not None: + params["lte"] = self.lte.value + if self.gt is not None: + params["gt"] = self.gt.value + if self.gte is not None: + params["gte"] = self.gte.value + return {"range": params} + + +class SearchRegex(SearchExpression): + """ + Atlas Search expression that matches strings using a regular expression. + + This expression uses the `regex` operator to apply a regular expression + against the contents of a specified field. + + Example: + SearchRegex("username", r"^admin_") + + Args: + path: The document path to match (as string or expression). + query: The regular expression pattern to apply. + allow_analyzed_field: Whether to allow matching against analyzed fields (default is False). + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/regex/ + """ + + def __init__(self, path, query, allow_analyzed_field=None, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.allow_analyzed_field = cast_as_value(allow_analyzed_field) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.query, self.allow_analyzed_field] + + def set_source_expressions(self, exprs): + self.path, self.query, self.allow_analyzed_field = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + if self.allow_analyzed_field is not None: + params["allowAnalyzedField"] = self.allow_analyzed_field.value + return {"regex": params} + + +class SearchText(SearchExpression): + """ + Atlas Search expression that performs full-text search using the `text` operator. + + This expression matches terms in a specified field with options for + fuzzy matching, match criteria, and synonyms. + + Example: + SearchText("description__content", "mongodb", fuzzy={"maxEdits": 1}, match_criteria="all") + + Args: + path: The document path to search (as string or expression). + query: The search term or phrase. + fuzzy: Optional dictionary to configure fuzzy matching parameters. + match_criteria: Optional criteria for term matching (e.g., "all" or "any"). + synonyms: Optional name of a synonym mapping defined in the Atlas index. + score: Optional expression to adjust relevance scoring. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/text/ + """ + + def __init__(self, path, query, fuzzy=None, match_criteria=None, synonyms=None, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.fuzzy = cast_as_value(fuzzy) + self.match_criteria = cast_as_value(match_criteria) + self.synonyms = cast_as_value(synonyms) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.query, self.fuzzy, self.match_criteria, self.synonyms] + + def set_source_expressions(self, exprs): + self.path, self.query, self.fuzzy, self.match_criteria, self.synonyms = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + if self.fuzzy is not None: + params["fuzzy"] = self.fuzzy.value + if self.match_criteria is not None: + params["matchCriteria"] = self.match_criteria.value + if self.synonyms is not None: + params["synonyms"] = self.synonyms.value + return {"text": params} + + +class SearchWildcard(SearchExpression): + """ + Atlas Search expression that matches strings using wildcard patterns. + + This expression uses the `wildcard` operator to search for terms + matching a pattern with `*` and `?` wildcards. + + Example: + SearchWildcard("filename", "report_202?_final*") + + Args: + path: The document path to search (as string or expression). + query: The wildcard pattern to match. + allow_analyzed_field: Whether to allow matching against analyzed fields (default is False). + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/wildcard/ + """ + + def __init__(self, path, query, allow_analyzed_field=None, score=None): + self.path = cast_as_field(path) + self.query = cast_as_value(query) + self.allow_analyzed_field = cast_as_value(allow_analyzed_field) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.query, self.allow_analyzed_field] + + def set_source_expressions(self, exprs): + self.path, self.query, self.allow_analyzed_field = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "query": self.query.value, + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + if self.allow_analyzed_field is not None: + params["allowAnalyzedField"] = self.allow_analyzed_field.value + return {"wildcard": params} + + +class SearchGeoShape(SearchExpression): + """ + Atlas Search expression that filters documents by spatial relationship with a geometry. + + This expression uses the `geoShape` operator to match documents where + a geo field relates to a specified geometry by a spatial relation. + + Example: + SearchGeoShape("location", "within", {"type": "Polygon", "coordinates": [...]}) + + Args: + path: The document path to the geo field (as string or expression). + relation: The spatial relation to test (e.g., "within", "intersects", "disjoint"). + geometry: The GeoJSON geometry to compare against. + score: Optional expression to modify the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/geoShape/ + """ + + def __init__(self, path, relation, geometry, score=None): + self.path = cast_as_field(path) + self.relation = cast_as_value(relation) + self.geometry = cast_as_value(geometry) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.relation, self.geometry] + + def set_source_expressions(self, exprs): + self.path, self.relation, self.geometry = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + "relation": self.relation.value, + "geometry": self.geometry.value, + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + return {"geoShape": params} + + +class SearchGeoWithin(SearchExpression): + """ + Atlas Search expression that filters documents with geo fields + contained within a specified shape. + + This expression uses the `geoWithin` operator to match documents where + the geo field lies entirely within the given geometry. + + Example: + SearchGeoWithin("location", "Polygon", {"type": "Polygon", "coordinates": [...]}) + + Args: + path: The document path to the geo field (as string or expression). + kind: The GeoJSON geometry type (e.g., "Polygon", "MultiPolygon"). + geo_object: The GeoJSON geometry defining the boundary. + score: Optional expression to adjust the relevance score. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/ + """ + + def __init__(self, path, kind, geo_object, score=None): + self.path = cast_as_field(path) + self.kind = cast_as_value(kind) + self.geo_object = cast_as_value(geo_object) + self.score = score + super().__init__() + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [self.path, self.kind, self.geo_object] + + def set_source_expressions(self, exprs): + self.path, self.kind, self.geo_object = exprs + + def search_operator(self, compiler, connection): + params = { + "path": self.path.as_mql(compiler, connection, as_path=True), + self.kind.value: self.geo_object.value, + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + return {"geoWithin": params} + + +class SearchMoreLikeThis(SearchExpression): + """ + Atlas Search expression that finds documents similar to given examples. + + This expression uses the `moreLikeThis` operator to search for documents + that resemble the specified sample documents. + + Example: + SearchMoreLikeThis([{"_id": ObjectId("...")}, {"title": "Example"}]) + + Args: + documents: A list of example documents or expressions to find similar documents. + score: Optional expression to modify the relevance scoring. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/ + """ + + def __init__(self, documents, score=None): + self.documents = cast_as_value(documents) + self.score = score + super().__init__() + + def get_source_expressions(self): + return [self.documents] + + def set_source_expressions(self, exprs): + (self.documents,) = exprs + + def search_operator(self, compiler, connection): + params = { + "like": self.documents.as_mql(compiler, connection), + } + if self.score: + params["score"] = self.score.as_mql(compiler, connection) + return {"moreLikeThis": params} + + def get_search_fields(self, compiler, connection): + needed_fields = set() + for doc in self.documents.value: + needed_fields.update(set(doc.keys())) + return needed_fields + + +class SearchScoreOption(Expression): + """Class to mutate scoring on a search operation""" + + def __init__(self, definitions=None): + self._definitions = definitions + + def as_mql(self, compiler, connection): + return self._definitions From 754f429a6f47b0723413d1f230d46a0752a096cd Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 23:23:00 -0300 Subject: [PATCH 04/18] Make operators combinable and add compound expressions. --- django_mongodb_backend/expressions/search.py | 203 ++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index 8516fd24d..71bd9c5d8 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -12,7 +12,65 @@ def cast_as_field(path): return F(path) if isinstance(path, str) else path -class SearchExpression(Expression): +class Operator: + AND = "AND" + OR = "OR" + NOT = "NOT" + + def __init__(self, operator): + self.operator = operator + + def __eq__(self, other): + if isinstance(other, str): + return self.operator == other + return self.operator == other.operator + + def negate(self): + if self.operator == self.AND: + return Operator(self.OR) + if self.operator == self.OR: + return Operator(self.AND) + return Operator(self.operator) + + def __hash__(self): + return hash(self.operator) + + def __str__(self): + return self.operator + + def __repr__(self): + return self.operator + + +class SearchCombinable: + def _combine(self, other, connector): + if not isinstance(self, CompoundExpression | CombinedSearchExpression): + lhs = CompoundExpression(must=[self]) + else: + lhs = self + if other and not isinstance(other, CompoundExpression | CombinedSearchExpression): + rhs = CompoundExpression(must=[other]) + else: + rhs = other + return CombinedSearchExpression(lhs, connector, rhs) + + def __invert__(self): + return self._combine(None, Operator(Operator.NOT)) + + def __and__(self, other): + return self._combine(other, Operator(Operator.AND)) + + def __rand__(self, other): + return self._combine(other, Operator(Operator.AND)) + + def __or__(self, other): + return self._combine(other, Operator(Operator.OR)) + + def __ror__(self, other): + return self._combine(other, Operator(Operator.OR)) + + +class SearchExpression(SearchCombinable, Expression): """Base expression node for MongoDB Atlas `$search` stages. This class bridges Django's `Expression` API with the MongoDB Atlas @@ -677,6 +735,149 @@ def get_search_fields(self, compiler, connection): return needed_fields +class CompoundExpression(SearchExpression): + """ + Compound expression that combines multiple search clauses using boolean logic. + + This expression corresponds to the `compound` operator in MongoDB Atlas Search, + allowing fine-grained control by combining multiple sub-expressions with + `must`, `must_not`, `should`, and `filter` clauses. + + Example: + CompoundExpression( + must=[expr1, expr2], + must_not=[expr3], + should=[expr4], + minimum_should_match=1 + ) + + Args: + must: List of expressions that **must** match. + must_not: List of expressions that **must not** match. + should: List of expressions that **should** match (optional relevance boost). + filter: List of expressions to filter results without affecting relevance. + score: Optional expression to adjust scoring. + minimum_should_match: Minimum number of `should` clauses that must match. + + Reference: https://www.mongodb.com/docs/atlas/atlas-search/compound/ + """ + + def __init__( + self, + must=None, + must_not=None, + should=None, + filter=None, + score=None, + minimum_should_match=None, + ): + self.must = must or [] + self.must_not = must_not or [] + self.should = should or [] + self.filter = filter or [] + self.score = score + self.minimum_should_match = minimum_should_match + + def get_search_fields(self, compiler, connection): + fields = set() + for clause in self.must + self.should + self.filter + self.must_not: + fields.update(clause.get_search_fields(compiler, connection)) + return fields + + def resolve_expression( + self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False + ): + c = self.copy() + c.is_summary = summarize + c.must = [ + expr.resolve_expression(query, allow_joins, reuse, summarize) for expr in self.must + ] + c.must_not = [ + expr.resolve_expression(query, allow_joins, reuse, summarize) for expr in self.must_not + ] + c.should = [ + expr.resolve_expression(query, allow_joins, reuse, summarize) for expr in self.should + ] + c.filter = [ + expr.resolve_expression(query, allow_joins, reuse, summarize) for expr in self.filter + ] + return c + + def search_operator(self, compiler, connection): + params = {} + if self.must: + params["must"] = [clause.search_operator(compiler, connection) for clause in self.must] + if self.must_not: + params["mustNot"] = [ + clause.search_operator(compiler, connection) for clause in self.must_not + ] + if self.should: + params["should"] = [ + clause.search_operator(compiler, connection) for clause in self.should + ] + if self.filter: + params["filter"] = [ + clause.search_operator(compiler, connection) for clause in self.filter + ] + if self.minimum_should_match is not None: + params["minimumShouldMatch"] = self.minimum_should_match + return {"compound": params} + + def negate(self): + return CompoundExpression(must_not=[self]) + + +class CombinedSearchExpression(SearchExpression): + """ + Combines two search expressions with a logical operator. + + This expression allows combining two Atlas Search expressions + (left-hand side and right-hand side) using a boolean operator + such as `and`, `or`, or `not`. + + Example: + CombinedSearchExpression(expr1, "and", expr2) + + Args: + lhs: The left-hand search expression. + operator: The boolean operator as a string (e.g., "and", "or", "not"). + rhs: The right-hand search expression. + """ + + def __init__(self, lhs, operator, rhs): + self.lhs = lhs + self.operator = operator + self.rhs = rhs + + def get_source_expressions(self): + return [self.lhs, self.rhs] + + def set_source_expressions(self, exprs): + self.lhs, self.rhs = exprs + + @staticmethod + def resolve(node, negated=False): + if node is None: + return None + # Leaf, resolve the compoundExpression + if isinstance(node, CompoundExpression): + return node.negate() if negated else node + # Apply De Morgan's Laws. + operator = node.operator.negate() if negated else node.operator + negated = negated != (node.operator == Operator.NOT) + lhs_compound = node.resolve(node.lhs, negated) + rhs_compound = node.resolve(node.rhs, negated) + if operator == Operator.OR: + return CompoundExpression(should=[lhs_compound, rhs_compound], minimum_should_match=1) + if operator == Operator.AND: + return CompoundExpression(must=[lhs_compound, rhs_compound]) + return lhs_compound + + def as_mql(self, compiler, connection): + expression = self.resolve(self) + return expression.as_mql(compiler, connection) + + class SearchScoreOption(Expression): """Class to mutate scoring on a search operation""" From 4ec87ea6c088de996a3cf74ce67ee83089e22d5b Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 23:24:02 -0300 Subject: [PATCH 05/18] Add vector search operator. --- django_mongodb_backend/expressions/search.py | 103 +++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index 71bd9c5d8..ea9606cb5 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -1,3 +1,4 @@ +from django.db import NotSupportedError from django.db.models import Expression, FloatField from django.db.models.expressions import F, Value @@ -878,6 +879,108 @@ def as_mql(self, compiler, connection): return expression.as_mql(compiler, connection) +class SearchVector(SearchExpression): + """ + Atlas Search expression that performs vector similarity search on embedded vectors. + + This expression uses the **knnBeta** operator to find documents whose vector + embeddings are most similar to a given query vector. + + Example: + SearchVector("embedding", [0.1, 0.2, 0.3], limit=10, num_candidates=100) + + Args: + path: The document path to the vector field (as string or expression). + query_vector: The query vector to compare against. + limit: Maximum number of matching documents to return. + num_candidates: Optional number of candidates to consider during search. + exact: Optional flag to enforce exact matching. + filter: Optional filter expression to narrow candidate documents. + + Reference: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ + """ + + def __init__( + self, + path, + query_vector, + limit, + num_candidates=None, + exact=None, + filter=None, + ): + self.path = cast_as_field(path) + self.query_vector = cast_as_value(query_vector) + self.limit = cast_as_value(limit) + self.num_candidates = cast_as_value(num_candidates) + self.exact = cast_as_value(exact) + self.filter = cast_as_value(filter) + super().__init__() + + def __invert__(self): + return ValueError("SearchVector cannot be negated") + + def __and__(self, other): + raise NotSupportedError("SearchVector cannot be combined") + + def __rand__(self, other): + raise NotSupportedError("SearchVector cannot be combined") + + def __or__(self, other): + raise NotSupportedError("SearchVector cannot be combined") + + def __ror__(self, other): + raise NotSupportedError("SearchVector cannot be combined") + + def get_search_fields(self, compiler, connection): + return {self.path.as_mql(compiler, connection, as_path=True)} + + def get_source_expressions(self): + return [ + self.path, + self.query_vector, + self.limit, + self.num_candidates, + self.exact, + self.filter, + ] + + def set_source_expressions(self, exprs): + ( + self.path, + self.query_vector, + self.limit, + self.num_candidates, + self.exact, + self.filter, + ) = exprs + + def _get_query_index(self, fields, compiler): + for search_indexes in compiler.collection.list_search_indexes(): + if search_indexes["type"] == "vectorSearch": + index_field = { + field["path"] for field in search_indexes["latestDefinition"]["fields"] + } + if fields.issubset(index_field): + return search_indexes["name"] + return "default" + + def as_mql(self, compiler, connection): + params = { + "index": self._get_query_index(self.get_search_fields(compiler, connection), compiler), + "path": self.path.as_mql(compiler, connection, as_path=True), + "queryVector": self.query_vector.value, + "limit": self.limit.value, + } + if self.num_candidates is not None: + params["numCandidates"] = self.num_candidates.value + if self.exact is not None: + params["exact"] = self.exact.value + if self.filter is not None: + params["filter"] = self.filter.as_mql(compiler, connection) + return {"$vectorSearch": params} + + class SearchScoreOption(Expression): """Class to mutate scoring on a search operation""" From 2d3c8d33572bb0c298523a7f3dd49ef6bd935458 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 23:24:20 -0300 Subject: [PATCH 06/18] Add search lookup. --- django_mongodb_backend/expressions/search.py | 29 +++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index ea9606cb5..33d470aa0 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -1,6 +1,9 @@ from django.db import NotSupportedError -from django.db.models import Expression, FloatField +from django.db.models import CharField, Expression, FloatField, TextField from django.db.models.expressions import F, Value +from django.db.models.lookups import Lookup + +from ..query_utils import process_lhs, process_rhs def cast_as_value(value): @@ -989,3 +992,27 @@ def __init__(self, definitions=None): def as_mql(self, compiler, connection): return self._definitions + + +class SearchTextLookup(Lookup): + lookup_name = "search" + + def __init__(self, lhs, rhs): + super().__init__(lhs, rhs) + self.lhs = SearchText(self.lhs, self.rhs) + self.rhs = Value(0) + + def __str__(self): + return f"SearchText({self.lhs}, {self.rhs})" + + def __repr__(self): + return f"SearchText({self.lhs}, {self.rhs})" + + def as_mql(self, compiler, connection): + lhs_mql = process_lhs(self, compiler, connection) + value = process_rhs(self, compiler, connection) + return {"$gte": [lhs_mql, value]} + + +CharField.register_lookup(SearchTextLookup) +TextField.register_lookup(SearchTextLookup) From 8e2403a62bea16d99946b48d65289ad2fa312eab Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 23:24:48 -0300 Subject: [PATCH 07/18] Add test search --- tests/queries_/models.py | 21 +- tests/queries_/test_search.py | 809 ++++++++++++++++++++++++++++++++++ 2 files changed, 829 insertions(+), 1 deletion(-) create mode 100644 tests/queries_/test_search.py diff --git a/tests/queries_/models.py b/tests/queries_/models.py index 015102248..21af6fafd 100644 --- a/tests/queries_/models.py +++ b/tests/queries_/models.py @@ -1,6 +1,12 @@ from django.db import models -from django_mongodb_backend.fields import ObjectIdAutoField, ObjectIdField +from django_mongodb_backend.fields import ( + ArrayField, + EmbeddedModelField, + ObjectIdAutoField, + ObjectIdField, +) +from django_mongodb_backend.models import EmbeddedModel class Author(models.Model): @@ -53,3 +59,16 @@ class Meta: def __str__(self): return str(self.pk) + + +class Writer(EmbeddedModel): + name = models.CharField(max_length=10) + + +class Article(models.Model): + headline = models.CharField(max_length=100) + number = models.IntegerField() + body = models.TextField() + location = models.JSONField(null=True) + plot_embedding = ArrayField(models.FloatField(), size=3, null=True) + writer = EmbeddedModelField(Writer, null=True) diff --git a/tests/queries_/test_search.py b/tests/queries_/test_search.py new file mode 100644 index 000000000..dc7b94272 --- /dev/null +++ b/tests/queries_/test_search.py @@ -0,0 +1,809 @@ +import unittest +from collections.abc import Callable +from time import monotonic, sleep + +from django.db import connection +from django.db.models import Q +from django.db.utils import DatabaseError +from django.test import TransactionTestCase, skipUnlessDBFeature +from pymongo.operations import SearchIndexModel + +from django_mongodb_backend.expressions.search import ( + CompoundExpression, + SearchAutocomplete, + SearchEquals, + SearchExists, + SearchGeoShape, + SearchGeoWithin, + SearchIn, + SearchMoreLikeThis, + SearchPhrase, + SearchRange, + SearchRegex, + SearchScoreOption, + SearchText, + SearchVector, + SearchWildcard, +) + +from .models import Article, Writer + + +def _wait_for_assertion(timeout: float = 120, interval: float = 0.5) -> None: + """Generic to block until the predicate returns true + + Args: + timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. + interval (float, optional): Interval to check predicate. Defaults to DELAY. + + Raises: + AssertionError: _description_ + """ + + @staticmethod + def inner_wait_loop(predicate: Callable): + """ + Waits until the given predicate stops raising AssertionError or DatabaseError. + + Args: + predicate (Callable): A function that raises AssertionError (or DatabaseError) + if a condition is not yet met. It should refresh its query each time + it's called (e.g., by using `qs.all()` to avoid cached results). + + Raises: + AssertionError or DatabaseError: If the predicate keeps failing beyond the timeout. + """ + start = monotonic() + while True: + try: + predicate() + except (AssertionError, DatabaseError): + if monotonic() - start > timeout: + raise + sleep(interval) + else: + break + + return inner_wait_loop + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchUtilsMixin(TransactionTestCase): + available_apps = [] + models_to_clean = [Article] + + def tearDown(self): + for model in self.models_to_clean: + collection = self._get_collection(model) + collection.delete_many({}) + + @classmethod + def setUpClass(cls): + super().setUpClass() + # Register the cleanup to run after all tests in this class + cls.addClassCleanup(cls.drop_search_indexes) + + @staticmethod + def _get_collection(model): + return connection.database.get_collection(model._meta.db_table) + + def create_search_index(self, model, index_name, definition, type="search"): + collection = self._get_collection(model) + idx = SearchIndexModel(definition=definition, name=index_name, type=type) + collection.create_search_index(idx) + + @classmethod + def drop_search_indexes(cls): + for model in cls.models_to_clean: + collection = cls._get_collection(model) + for search_indexes in collection.list_search_indexes(): + collection.drop_search_index(search_indexes["name"]) + + wait_for_assertion = _wait_for_assertion(timeout=3) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchEqualsTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "equals_headline_index", + { + "mappings": { + "dynamic": False, + "fields": {"headline": {"type": "token"}, "number": {"type": "number"}}, + } + }, + ) + self.article = Article.objects.create(headline="cross", number=1, body="body") + Article.objects.create(headline="other thing", number=2, body="body") + + def test_search_equals(self): + qs = Article.objects.annotate(score=SearchEquals(path="headline", value="cross")) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_boost_score(self): + boost_score = SearchScoreOption({"boost": {"value": 3}}) + + qs = Article.objects.annotate( + score=SearchEquals(path="headline", value="cross", score=boost_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertGreaterEqual(scored.score, 3.0) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchEquals(path="headline", value="cross", score=constant_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + def test_function_score(self): + function_score = SearchScoreOption( + { + "function": { + "path": { + "value": "number", + "undefined": 0, + }, + } + } + ) + + qs = Article.objects.annotate( + score=SearchEquals(path="headline", value="cross", score=function_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 1.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchAutocompleteTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "autocomplete_headline_index", + { + "mappings": { + "dynamic": False, + "fields": { + "headline": { + "type": "autocomplete", + "analyzer": "lucene.standard", + "tokenization": "edgeGram", + "minGrams": 3, + "maxGrams": 5, + "foldDiacritics": False, + }, + "writer": { + "type": "document", + "fields": { + "name": { + "type": "autocomplete", + "analyzer": "lucene.standard", + "tokenization": "edgeGram", + "minGrams": 3, + "maxGrams": 5, + "foldDiacritics": False, + } + }, + }, + }, + } + }, + ) + self.article = Article.objects.create( + headline="crossing and something", + number=2, + body="river", + writer=Writer(name="Joselina A. Ramirez"), + ) + Article.objects.create(headline="Some random text", number=3, body="river") + + def test_search_autocomplete(self): + qs = Article.objects.annotate( + score=SearchAutocomplete( + path="headline", + query="crossing", + token_order="sequential", # noqa: S106 + fuzzy={"maxEdits": 2}, + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_search_autocomplete_embedded_model(self): + qs = Article.objects.annotate( + score=SearchAutocomplete(path="writer__name", query="Joselina") + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchAutocomplete( + path="headline", + query="crossing", + token_order="sequential", # noqa: S106 + fuzzy={"maxEdits": 2}, + score=constant_score, + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchExistsTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "exists_body_index", + {"mappings": {"dynamic": False, "fields": {"body": {"type": "token"}}}}, + ) + self.article = Article.objects.create(headline="ignored", number=3, body="something") + + def test_search_exists(self): + qs = Article.objects.annotate(score=SearchExists(path="body")) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate(score=SearchExists(path="body", score=constant_score)) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchInTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "in_headline_index", + {"mappings": {"dynamic": False, "fields": {"headline": {"type": "token"}}}}, + ) + self.article = Article.objects.create(headline="cross", number=1, body="a") + Article.objects.create(headline="road", number=2, body="b") + + def test_search_in(self): + qs = Article.objects.annotate(score=SearchIn(path="headline", value=["cross", "river"])) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchIn(path="headline", value=["cross", "river"], score=constant_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchPhraseTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "phrase_body_index", + {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, + ) + self.article = Article.objects.create( + headline="irrelevant", number=1, body="the quick brown fox" + ) + Article.objects.create(headline="cheetah", number=2, body="fastest animal") + + def test_search_phrase(self): + qs = Article.objects.annotate(score=SearchPhrase(path="body", query="quick brown")) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchPhrase(path="body", query="quick brown", score=constant_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchRangeTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "range_number_index", + {"mappings": {"dynamic": False, "fields": {"number": {"type": "number"}}}}, + ) + Article.objects.create(headline="x", number=5, body="z") + self.number20 = Article.objects.create(headline="y", number=20, body="z") + + def test_search_range(self): + qs = Article.objects.annotate(score=SearchRange(path="number", gte=10, lt=30)) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.number20])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchRange(path="number", gte=10, lt=30, score=constant_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.number20])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchRegexTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "regex_headline_index", + { + "mappings": { + "dynamic": False, + "fields": {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, + } + }, + ) + self.article = Article.objects.create(headline="hello world", number=1, body="abc") + Article.objects.create(headline="hola mundo", number=2, body="abc") + + def test_search_regex(self): + qs = Article.objects.annotate( + score=SearchRegex(path="headline", query="hello.*", allow_analyzed_field=True) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchRegex( + path="headline", query="hello.*", allow_analyzed_field=True, score=constant_score + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchTextTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "text_body_index", + {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, + ) + self.article = Article.objects.create( + headline="ignored", number=1, body="The lazy dog sleeps" + ) + Article.objects.create(headline="ignored", number=2, body="The sleepy bear") + + def test_search_text(self): + qs = Article.objects.annotate(score=SearchText(path="body", query="lazy")) + self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + + def test_search_lookup(self): + qs = Article.objects.filter(body__search="lazy") + self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + + def test_search_text_with_fuzzy_and_criteria(self): + qs = Article.objects.annotate( + score=SearchText( + path="body", query="lazzy", fuzzy={"maxEdits": 2}, match_criteria="all" + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchText( + path="body", + query="lazzy", + fuzzy={"maxEdits": 2}, + match_criteria="all", + score=constant_score, + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchWildcardTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "wildcard_headline_index", + { + "mappings": { + "dynamic": False, + "fields": {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, + } + }, + ) + self.article = Article.objects.create(headline="dark-knight", number=1, body="") + Article.objects.create(headline="batman", number=2, body="") + + def test_search_wildcard(self): + qs = Article.objects.annotate(score=SearchWildcard(path="headline", query="dark-*")) + self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + + def test_constant_score(self): + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchWildcard(path="headline", query="dark-*", score=constant_score) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchGeoShapeTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "geoshape_location_index", + { + "mappings": { + "dynamic": False, + "fields": {"location": {"type": "geo", "indexShapes": True}}, + } + }, + ) + self.article = Article.objects.create( + headline="any", number=1, body="", location={"type": "Point", "coordinates": [40, 5]} + ) + Article.objects.create( + headline="any", number=2, body="", location={"type": "Point", "coordinates": [400, 50]} + ) + + def test_search_geo_shape(self): + polygon = { + "type": "Polygon", + "coordinates": [[[30, 0], [50, 0], [50, 10], [30, 10], [30, 0]]], + } + qs = Article.objects.annotate( + score=SearchGeoShape(path="location", relation="within", geometry=polygon) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + polygon = { + "type": "Polygon", + "coordinates": [[[30, 0], [50, 0], [50, 10], [30, 10], [30, 0]]], + } + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchGeoShape( + path="location", relation="within", geometry=polygon, score=constant_score + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchGeoWithinTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "geowithin_location_index", + {"mappings": {"dynamic": False, "fields": {"location": {"type": "geo"}}}}, + ) + self.article = Article.objects.create( + headline="geo", number=2, body="", location={"type": "Point", "coordinates": [40, 5]} + ) + Article.objects.create( + headline="geo2", number=3, body="", location={"type": "Point", "coordinates": [-40, -5]} + ) + + def test_search_geo_within(self): + polygon = { + "type": "Polygon", + "coordinates": [[[30, 0], [50, 0], [50, 10], [30, 10], [30, 0]]], + } + qs = Article.objects.annotate( + score=SearchGeoWithin( + path="location", + kind="geometry", + geo_object=polygon, + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + + def test_constant_score(self): + polygon = { + "type": "Polygon", + "coordinates": [[[30, 0], [50, 0], [50, 10], [30, 10], [30, 0]]], + } + constant_score = SearchScoreOption({"constant": {"value": 10}}) + qs = Article.objects.annotate( + score=SearchGeoWithin( + path="location", + kind="geometry", + geo_object=polygon, + score=constant_score, + ) + ) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + scored = qs.first() + self.assertAlmostEqual(scored.score, 10.0, places=2) + + +@skipUnlessDBFeature("supports_atlas_search") +@unittest.expectedFailure +class SearchMoreLikeThisTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "mlt_index", + { + "mappings": { + "dynamic": False, + "fields": {"body": {"type": "string"}, "headline": {"type": "string"}}, + } + }, + ) + self.article1 = Article.objects.create( + headline="Space exploration", number=1, body="Webb telescope" + ) + self.article2 = Article.objects.create( + headline="The commodities fall", + number=2, + body="Commodities dropped sharply due to inflation concerns", + ) + Article.objects.create( + headline="irrelevant", + number=3, + body="This is a completely unrelated article about cooking", + ) + + def test_search_more_like_this(self): + like_docs = [ + {"headline": self.article1.headline, "body": self.article1.body}, + {"headline": self.article2.headline, "body": self.article2.body}, + ] + like_docs = [{"body": "NASA launches new satellite to explore the galaxy"}] + qs = Article.objects.annotate(score=SearchMoreLikeThis(documents=like_docs)).order_by( + "score" + ) + self.wait_for_assertion( + lambda: self.assertQuerySetEqual( + qs.all(), [self.article1, self.article2], lambda a: a.headline + ) + ) + + +@skipUnlessDBFeature("supports_atlas_search") +class CompoundSearchTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "compound_index", + { + "mappings": { + "dynamic": False, + "fields": { + "headline": [{"type": "token"}, {"type": "string"}], + "body": {"type": "string"}, + "number": {"type": "number"}, + }, + } + }, + ) + self.mars_mission = Article.objects.create( + number=1, + headline="space exploration", + body="NASA launches a new mission to Mars, aiming to study surface geology", + ) + + self.exoplanet = Article.objects.create( + number=2, + headline="space exploration", + body="Astronomers discover exoplanets orbiting distant stars using Webb telescope", + ) + + self.icy_moons = Article.objects.create( + number=3, + headline="space exploration", + body="ESA prepares a robotic expedition to explore the icy moons of Jupiter", + ) + + self.comodities_drop = Article.objects.create( + number=4, + headline="astronomy news", + body="Commodities dropped sharply due to inflation concerns", + ) + + def test_expression(self): + must_expr = SearchEquals(path="headline", value="space exploration") + must_not_expr = SearchPhrase(path="body", query="icy moons") + should_expr = SearchPhrase(path="body", query="exoplanets") + + compound = CompoundExpression( + must=[must_expr or should_expr], + must_not=[must_not_expr], + should=[should_expr], + minimum_should_match=1, + ) + + qs = Article.objects.annotate(score=compound).order_by("score") + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.exoplanet])) + + def test_operations(self): + expr = SearchEquals(path="headline", value="space exploration") & ~SearchEquals( + path="number", value=3 + ) + qs = Article.objects.annotate(score=expr) + self.wait_for_assertion( + lambda: self.assertCountEqual(qs.all(), [self.mars_mission, self.exoplanet]) + ) + + def test_mixed_scores(self): + boost_score = SearchScoreOption({"boost": {"value": 5}}) + constant_score = SearchScoreOption({"constant": {"value": 20}}) + function_score = SearchScoreOption( + {"function": {"path": {"value": "number", "undefined": 0}}} + ) + + must_expr = SearchEquals(path="headline", value="space exploration", score=boost_score) + should_expr = SearchPhrase(path="body", query="exoplanets", score=constant_score) + must_not_expr = SearchPhrase(path="body", query="icy moons", score=function_score) + + compound = CompoundExpression( + must=[must_expr], + must_not=[must_not_expr], + should=[should_expr], + ) + qs = Article.objects.annotate(score=compound).order_by("-score") + self.wait_for_assertion( + lambda: self.assertListEqual(list(qs.all()), [self.exoplanet, self.mars_mission]) + ) + # Exoplanet should rank first because of the constant 20 bump. + self.assertEqual(qs.first(), self.exoplanet) + + def test_operationss_with_function_score(self): + function_score = SearchScoreOption( + {"function": {"path": {"value": "number", "undefined": 0}}} + ) + + expr = SearchEquals( + path="headline", + value="space exploration", + score=function_score, + ) & ~SearchEquals(path="number", value=3) + + qs = Article.objects.annotate(score=expr).order_by("-score") + + self.wait_for_assertion( + lambda: self.assertListEqual(list(qs.all()), [self.exoplanet, self.mars_mission]) + ) + # Returns mars_mission (score≈1) and exoplanet (score≈2) then; exoplanet first. + self.assertEqual(qs.first(), self.exoplanet) + + def test_multiple_search(self): + msg = ( + "Only one $search operation is allowed per query. Received 2 search expressions. " + "To combine multiple search expressions, use either a CompoundExpression for " + "fine-grained control or CombinedSearchExpression for simple logical combinations." + ) + with self.assertRaisesMessage(ValueError, msg): + Article.objects.annotate( + score1=SearchEquals(path="headline", value="space exploration"), + score2=~SearchEquals(path="number", value=3), + ).order_by("score1", "score2").first() + + with self.assertRaisesMessage(ValueError, msg): + Article.objects.filter( + Q(headline__search="space exploration"), Q(headline__search="space exploration 2") + ).first() + + def test_multiple_type_search(self): + msg = ( + "Cannot combine a `$vectorSearch` with a `$search` operator. " + "If you need to combine them, consider " + "restructuring your query logic or running them as separate queries." + ) + with self.assertRaisesMessage(ValueError, msg): + Article.objects.annotate( + score1=SearchEquals(path="headline", value="space exploration"), + score2=SearchVector( + path="headline", + query_vector=[1, 2, 3], + num_candidates=5, + limit=2, + ), + ).order_by("score1", "score2").first() + + def test_multiple_vector_search(self): + msg = ( + "Cannot combine two `$vectorSearch` operator. If you need to combine them, " + "consider restructuring your query logic or running them as separate queries." + ) + with self.assertRaisesMessage(ValueError, msg): + Article.objects.annotate( + score1=SearchVector( + path="headline", + query_vector=[1, 2, 3], + num_candidates=5, + limit=2, + ), + score2=SearchVector( + path="headline", + query_vector=[1, 2, 4], + num_candidates=5, + limit=2, + ), + ).order_by("score1", "score2").first() + + def test_search_and_filter(self): + qs = Article.objects.filter(headline__search="space exploration", number__gt=2) + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.icy_moons])) + + +@skipUnlessDBFeature("supports_atlas_search") +class SearchVectorTest(SearchUtilsMixin): + def setUp(self): + super().setUp() + self.create_search_index( + Article, + "vector_index", + { + "fields": [ + { + "type": "vector", + "path": "plot_embedding", + "numDimensions": 3, + "similarity": "cosine", + "quantization": "scalar", + } + ] + }, + type="vectorSearch", + ) + + self.mars = Article.objects.create( + headline="Mars landing", + number=1, + body="The rover has landed on Mars", + plot_embedding=[0.1, 0.2, 0.3], + ) + self.cooking = Article.objects.create( + headline="Cooking tips", + number=2, + body="This article is about pasta", + plot_embedding=[0.9, 0.8, 0.7], + ) + + def test_vector_search(self): + vector_query = [0.1, 0.2, 0.3] + expr = SearchVector( + path="plot_embedding", + query_vector=vector_query, + num_candidates=5, + limit=2, + ) + qs = Article.objects.annotate(score=expr).order_by("-score") + self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.mars, self.cooking])) From 56034a4cc887652088f8542c80a75c5535e4b887 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Mon, 21 Jul 2025 23:25:02 -0300 Subject: [PATCH 08/18] Add combinable test --- .../test_combinable_search_expression.py | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 tests/expressions_/test_combinable_search_expression.py diff --git a/tests/expressions_/test_combinable_search_expression.py b/tests/expressions_/test_combinable_search_expression.py new file mode 100644 index 000000000..2ff597050 --- /dev/null +++ b/tests/expressions_/test_combinable_search_expression.py @@ -0,0 +1,76 @@ +from django.test import SimpleTestCase + +from django_mongodb_backend.expressions.search import ( + CombinedSearchExpression, + CompoundExpression, + SearchEquals, +) + + +class CombinedSearchExpressionResolutionTest(SimpleTestCase): + def test_combined_expression_and_or_not_resolution(self): + A = SearchEquals(path="headline", value="A") + B = SearchEquals(path="headline", value="B") + C = SearchEquals(path="headline", value="C") + D = SearchEquals(path="headline", value="D") + expr = (~A | B) & (C | D) + solved = CombinedSearchExpression.resolve(expr) + self.assertIsInstance(solved, CompoundExpression) + solved_A = CompoundExpression(must_not=[CompoundExpression(must=[A])]) + solved_B = CompoundExpression(must=[B]) + solved_C = CompoundExpression(must=[C]) + solved_D = CompoundExpression(must=[D]) + self.assertCountEqual(solved.must[0].should, [solved_A, solved_B]) + self.assertEqual(solved.must[0].minimum_should_match, 1) + self.assertEqual(solved.must[1].should, [solved_C, solved_D]) + + def test_combined_expression_de_morgans_resolution(self): + A = SearchEquals(path="headline", value="A") + B = SearchEquals(path="headline", value="B") + C = SearchEquals(path="headline", value="C") + D = SearchEquals(path="headline", value="D") + expr = ~(A | B) & (C | D) + solved_A = CompoundExpression(must_not=[CompoundExpression(must=[A])]) + solved_B = CompoundExpression(must_not=[CompoundExpression(must=[B])]) + solved_C = CompoundExpression(must=[C]) + solved_D = CompoundExpression(must=[D]) + solved = CombinedSearchExpression.resolve(expr) + self.assertIsInstance(solved, CompoundExpression) + self.assertCountEqual(solved.must[0].must, [solved_A, solved_B]) + self.assertEqual(solved.must[0].minimum_should_match, None) + self.assertEqual(solved.must[1].should, [solved_C, solved_D]) + self.assertEqual(solved.minimum_should_match, None) + + def test_combined_expression_doble_negation(self): + A = SearchEquals(path="headline", value="A") + expr = ~~A + solved = CombinedSearchExpression.resolve(expr) + solved_A = CompoundExpression(must=[A]) + self.assertIsInstance(solved, CompoundExpression) + self.assertEqual(solved, solved_A) + + def test_combined_expression_long_right_tree(self): + A = SearchEquals(path="headline", value="A") + B = SearchEquals(path="headline", value="B") + C = SearchEquals(path="headline", value="C") + D = SearchEquals(path="headline", value="D") + solved_A = CompoundExpression(must=[A]) + solved_B = CompoundExpression(must_not=[CompoundExpression(must=[B])]) + solved_C = CompoundExpression(must=[C]) + solved_D = CompoundExpression(must=[D]) + expr = A & ~(B & ~(C & D)) + solved = CombinedSearchExpression.resolve(expr) + self.assertIsInstance(solved, CompoundExpression) + self.assertEqual(len(solved.must), 2) + self.assertEqual(solved.must[0], solved_A) + self.assertEqual(len(solved.must[1].should), 2) + self.assertEqual(solved.must[1].should[0], solved_B) + self.assertCountEqual(solved.must[1].should[1].must, [solved_C, solved_D]) + expr = A | ~(B | ~(C | D)) + solved = CombinedSearchExpression.resolve(expr) + self.assertIsInstance(solved, CompoundExpression) + self.assertEqual(len(solved.should), 2) + self.assertEqual(solved.should[0], solved_A) + self.assertEqual(len(solved.should[1].must), 2) + self.assertEqual(solved.should[1].must[0], solved_B) + self.assertCountEqual(solved.should[1].must[1].should, [solved_C, solved_D]) From a9c09df436e66f4bc38f8bc9e5c1b988479b249f Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 22 Jul 2025 00:51:49 -0300 Subject: [PATCH 09/18] Test clean up. --- tests/queries_/test_search.py | 151 ++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 70 deletions(-) diff --git a/tests/queries_/test_search.py b/tests/queries_/test_search.py index dc7b94272..f0d07082c 100644 --- a/tests/queries_/test_search.py +++ b/tests/queries_/test_search.py @@ -72,41 +72,39 @@ class SearchUtilsMixin(TransactionTestCase): available_apps = [] models_to_clean = [Article] - def tearDown(self): - for model in self.models_to_clean: - collection = self._get_collection(model) - collection.delete_many({}) - @classmethod def setUpClass(cls): super().setUpClass() # Register the cleanup to run after all tests in this class - cls.addClassCleanup(cls.drop_search_indexes) + cls.addClassCleanup(cls.drop_search_indexes_and_data) @staticmethod def _get_collection(model): return connection.database.get_collection(model._meta.db_table) - def create_search_index(self, model, index_name, definition, type="search"): - collection = self._get_collection(model) + @classmethod + def create_search_index(cls, model, index_name, definition, type="search"): + collection = cls._get_collection(model) idx = SearchIndexModel(definition=definition, name=index_name, type=type) collection.create_search_index(idx) @classmethod - def drop_search_indexes(cls): + def drop_search_indexes_and_data(cls): for model in cls.models_to_clean: collection = cls._get_collection(model) for search_indexes in collection.list_search_indexes(): collection.drop_search_index(search_indexes["name"]) + collection.delete_many({}) wait_for_assertion = _wait_for_assertion(timeout=3) @skipUnlessDBFeature("supports_atlas_search") class SearchEqualsTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "equals_headline_index", { @@ -116,7 +114,7 @@ def setUp(self): } }, ) - self.article = Article.objects.create(headline="cross", number=1, body="body") + cls.article = Article.objects.create(headline="cross", number=1, body="body") Article.objects.create(headline="other thing", number=2, body="body") def test_search_equals(self): @@ -164,9 +162,10 @@ def test_function_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchAutocompleteTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "autocomplete_headline_index", { @@ -198,7 +197,7 @@ def setUp(self): } }, ) - self.article = Article.objects.create( + cls.article = Article.objects.create( headline="crossing and something", number=2, body="river", @@ -241,14 +240,15 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchExistsTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "exists_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "token"}}}}, ) - self.article = Article.objects.create(headline="ignored", number=3, body="something") + cls.article = Article.objects.create(headline="ignored", number=3, body="something") def test_search_exists(self): qs = Article.objects.annotate(score=SearchExists(path="body")) @@ -264,14 +264,15 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchInTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "in_headline_index", {"mappings": {"dynamic": False, "fields": {"headline": {"type": "token"}}}}, ) - self.article = Article.objects.create(headline="cross", number=1, body="a") + cls.article = Article.objects.create(headline="cross", number=1, body="a") Article.objects.create(headline="road", number=2, body="b") def test_search_in(self): @@ -290,14 +291,15 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchPhraseTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "phrase_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, ) - self.article = Article.objects.create( + cls.article = Article.objects.create( headline="irrelevant", number=1, body="the quick brown fox" ) Article.objects.create(headline="cheetah", number=2, body="fastest animal") @@ -318,15 +320,16 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchRangeTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "range_number_index", {"mappings": {"dynamic": False, "fields": {"number": {"type": "number"}}}}, ) Article.objects.create(headline="x", number=5, body="z") - self.number20 = Article.objects.create(headline="y", number=20, body="z") + cls.number20 = Article.objects.create(headline="y", number=20, body="z") def test_search_range(self): qs = Article.objects.annotate(score=SearchRange(path="number", gte=10, lt=30)) @@ -344,9 +347,10 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchRegexTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "regex_headline_index", { @@ -356,7 +360,7 @@ def setUp(self): } }, ) - self.article = Article.objects.create(headline="hello world", number=1, body="abc") + cls.article = Article.objects.create(headline="hello world", number=1, body="abc") Article.objects.create(headline="hola mundo", number=2, body="abc") def test_search_regex(self): @@ -379,14 +383,15 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchTextTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "text_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, ) - self.article = Article.objects.create( + cls.article = Article.objects.create( headline="ignored", number=1, body="The lazy dog sleeps" ) Article.objects.create(headline="ignored", number=2, body="The sleepy bear") @@ -425,9 +430,10 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchWildcardTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "wildcard_headline_index", { @@ -437,7 +443,7 @@ def setUp(self): } }, ) - self.article = Article.objects.create(headline="dark-knight", number=1, body="") + cls.article = Article.objects.create(headline="dark-knight", number=1, body="") Article.objects.create(headline="batman", number=2, body="") def test_search_wildcard(self): @@ -456,9 +462,10 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchGeoShapeTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "geoshape_location_index", { @@ -468,7 +475,7 @@ def setUp(self): } }, ) - self.article = Article.objects.create( + cls.article = Article.objects.create( headline="any", number=1, body="", location={"type": "Point", "coordinates": [40, 5]} ) Article.objects.create( @@ -503,14 +510,15 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") class SearchGeoWithinTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "geowithin_location_index", {"mappings": {"dynamic": False, "fields": {"location": {"type": "geo"}}}}, ) - self.article = Article.objects.create( + cls.article = Article.objects.create( headline="geo", number=2, body="", location={"type": "Point", "coordinates": [40, 5]} ) Article.objects.create( @@ -553,9 +561,10 @@ def test_constant_score(self): @skipUnlessDBFeature("supports_atlas_search") @unittest.expectedFailure class SearchMoreLikeThisTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "mlt_index", { @@ -565,10 +574,10 @@ def setUp(self): } }, ) - self.article1 = Article.objects.create( + cls.article1 = Article.objects.create( headline="Space exploration", number=1, body="Webb telescope" ) - self.article2 = Article.objects.create( + cls.article2 = Article.objects.create( headline="The commodities fall", number=2, body="Commodities dropped sharply due to inflation concerns", @@ -597,9 +606,10 @@ def test_search_more_like_this(self): @skipUnlessDBFeature("supports_atlas_search") class CompoundSearchTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "compound_index", { @@ -613,25 +623,25 @@ def setUp(self): } }, ) - self.mars_mission = Article.objects.create( + cls.mars_mission = Article.objects.create( number=1, headline="space exploration", body="NASA launches a new mission to Mars, aiming to study surface geology", ) - self.exoplanet = Article.objects.create( + cls.exoplanet = Article.objects.create( number=2, headline="space exploration", body="Astronomers discover exoplanets orbiting distant stars using Webb telescope", ) - self.icy_moons = Article.objects.create( + cls.icy_moons = Article.objects.create( number=3, headline="space exploration", body="ESA prepares a robotic expedition to explore the icy moons of Jupiter", ) - self.comodities_drop = Article.objects.create( + cls.comodities_drop = Article.objects.create( number=4, headline="astronomy news", body="Commodities dropped sharply due to inflation concerns", @@ -765,9 +775,10 @@ def test_search_and_filter(self): @skipUnlessDBFeature("supports_atlas_search") class SearchVectorTest(SearchUtilsMixin): - def setUp(self): - super().setUp() - self.create_search_index( + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.create_search_index( Article, "vector_index", { @@ -784,13 +795,13 @@ def setUp(self): type="vectorSearch", ) - self.mars = Article.objects.create( + cls.mars = Article.objects.create( headline="Mars landing", number=1, body="The rover has landed on Mars", plot_embedding=[0.1, 0.2, 0.3], ) - self.cooking = Article.objects.create( + cls.cooking = Article.objects.create( headline="Cooking tips", number=2, body="This article is about pasta", From f0ab51eb5eb5830f48c8765f1d1a5d556c69b389 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 22 Jul 2025 02:15:25 -0300 Subject: [PATCH 10/18] Add dalayed assertion methods in unit test. --- tests/queries_/test_search.py | 149 +++++++++++++++------------------- 1 file changed, 67 insertions(+), 82 deletions(-) diff --git a/tests/queries_/test_search.py b/tests/queries_/test_search.py index f0d07082c..478a62a17 100644 --- a/tests/queries_/test_search.py +++ b/tests/queries_/test_search.py @@ -1,9 +1,11 @@ import unittest from collections.abc import Callable +from functools import wraps from time import monotonic, sleep from django.db import connection from django.db.models import Q +from django.db.models.query import QuerySet from django.db.utils import DatabaseError from django.test import TransactionTestCase, skipUnlessDBFeature from pymongo.operations import SearchIndexModel @@ -29,42 +31,30 @@ from .models import Article, Writer -def _wait_for_assertion(timeout: float = 120, interval: float = 0.5) -> None: - """Generic to block until the predicate returns true - - Args: - timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. - interval (float, optional): Interval to check predicate. Defaults to DELAY. - - Raises: - AssertionError: _description_ - """ - - @staticmethod - def inner_wait_loop(predicate: Callable): - """ - Waits until the given predicate stops raising AssertionError or DatabaseError. - - Args: - predicate (Callable): A function that raises AssertionError (or DatabaseError) - if a condition is not yet met. It should refresh its query each time - it's called (e.g., by using `qs.all()` to avoid cached results). - - Raises: - AssertionError or DatabaseError: If the predicate keeps failing beyond the timeout. - """ - start = monotonic() - while True: - try: - predicate() - except (AssertionError, DatabaseError): - if monotonic() - start > timeout: - raise - sleep(interval) - else: - break - - return inner_wait_loop +def _delayed_assertion(timeout: float = 120, interval: float = 0.5): + def decorator(assert_func): + @wraps(assert_func) + def wrapper(self, fetch, *args, **kwargs): + start = monotonic() + if not isinstance(fetch, Callable | QuerySet): + raise ValueError( + "The first argument to a delayed assertion must be a QuerySet or a callable " + "that returns the value to be asserted." + ) + if isinstance(fetch, QuerySet): + fetch = fetch.all + while True: + try: + return assert_func(self, fetch(), *args, **kwargs) + except (AssertionError, DatabaseError): + if monotonic() - start > timeout: + raise + sleep(interval) + + wrapper.__name__ = f"delayed{assert_func.__name__.title()}" + return wrapper + + return decorator @skipUnlessDBFeature("supports_atlas_search") @@ -72,6 +62,12 @@ class SearchUtilsMixin(TransactionTestCase): available_apps = [] models_to_clean = [Article] + delayedAssertCountEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertCountEqual) + delayedAssertListEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertListEqual) + delayedAssertQuerySetEqual = _delayed_assertion(timeout=2)( + TransactionTestCase.assertQuerySetEqual + ) + @classmethod def setUpClass(cls): super().setUpClass() @@ -96,8 +92,6 @@ def drop_search_indexes_and_data(cls): collection.drop_search_index(search_indexes["name"]) collection.delete_many({}) - wait_for_assertion = _wait_for_assertion(timeout=3) - @skipUnlessDBFeature("supports_atlas_search") class SearchEqualsTest(SearchUtilsMixin): @@ -119,7 +113,7 @@ def setUpClass(cls): def test_search_equals(self): qs = Article.objects.annotate(score=SearchEquals(path="headline", value="cross")) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs, [self.article]) def test_boost_score(self): boost_score = SearchScoreOption({"boost": {"value": 3}}) @@ -127,7 +121,7 @@ def test_boost_score(self): qs = Article.objects.annotate( score=SearchEquals(path="headline", value="cross", score=boost_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs, [self.article]) scored = qs.first() self.assertGreaterEqual(scored.score, 3.0) @@ -136,7 +130,7 @@ def test_constant_score(self): qs = Article.objects.annotate( score=SearchEquals(path="headline", value="cross", score=constant_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -155,7 +149,7 @@ def test_function_score(self): qs = Article.objects.annotate( score=SearchEquals(path="headline", value="cross", score=function_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 1.0, places=2) @@ -214,13 +208,13 @@ def test_search_autocomplete(self): fuzzy={"maxEdits": 2}, ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_search_autocomplete_embedded_model(self): qs = Article.objects.annotate( score=SearchAutocomplete(path="writer__name", query="Joselina") ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) @@ -233,7 +227,7 @@ def test_constant_score(self): score=constant_score, ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -252,12 +246,12 @@ def setUpClass(cls): def test_search_exists(self): qs = Article.objects.annotate(score=SearchExists(path="body")) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) qs = Article.objects.annotate(score=SearchExists(path="body", score=constant_score)) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -277,14 +271,14 @@ def setUpClass(cls): def test_search_in(self): qs = Article.objects.annotate(score=SearchIn(path="headline", value=["cross", "river"])) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) qs = Article.objects.annotate( score=SearchIn(path="headline", value=["cross", "river"], score=constant_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -306,14 +300,14 @@ def setUpClass(cls): def test_search_phrase(self): qs = Article.objects.annotate(score=SearchPhrase(path="body", query="quick brown")) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) qs = Article.objects.annotate( score=SearchPhrase(path="body", query="quick brown", score=constant_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -333,14 +327,14 @@ def setUpClass(cls): def test_search_range(self): qs = Article.objects.annotate(score=SearchRange(path="number", gte=10, lt=30)) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.number20])) + self.delayedAssertCountEqual(qs.all, [self.number20]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) qs = Article.objects.annotate( score=SearchRange(path="number", gte=10, lt=30, score=constant_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.number20])) + self.delayedAssertCountEqual(qs.all, [self.number20]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -367,7 +361,7 @@ def test_search_regex(self): qs = Article.objects.annotate( score=SearchRegex(path="headline", query="hello.*", allow_analyzed_field=True) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) @@ -376,7 +370,7 @@ def test_constant_score(self): path="headline", query="hello.*", allow_analyzed_field=True, score=constant_score ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -398,11 +392,11 @@ def setUpClass(cls): def test_search_text(self): qs = Article.objects.annotate(score=SearchText(path="body", query="lazy")) - self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_search_lookup(self): qs = Article.objects.filter(body__search="lazy") - self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_search_text_with_fuzzy_and_criteria(self): qs = Article.objects.annotate( @@ -410,7 +404,7 @@ def test_search_text_with_fuzzy_and_criteria(self): path="body", query="lazzy", fuzzy={"maxEdits": 2}, match_criteria="all" ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) @@ -423,7 +417,7 @@ def test_constant_score(self): score=constant_score, ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -448,14 +442,14 @@ def setUpClass(cls): def test_search_wildcard(self): qs = Article.objects.annotate(score=SearchWildcard(path="headline", query="dark-*")) - self.wait_for_assertion(lambda: self.assertCountEqual([self.article], qs.all())) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): constant_score = SearchScoreOption({"constant": {"value": 10}}) qs = Article.objects.annotate( score=SearchWildcard(path="headline", query="dark-*", score=constant_score) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -490,7 +484,7 @@ def test_search_geo_shape(self): qs = Article.objects.annotate( score=SearchGeoShape(path="location", relation="within", geometry=polygon) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): polygon = { @@ -503,7 +497,7 @@ def test_constant_score(self): path="location", relation="within", geometry=polygon, score=constant_score ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -537,7 +531,7 @@ def test_search_geo_within(self): geo_object=polygon, ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) def test_constant_score(self): polygon = { @@ -553,7 +547,7 @@ def test_constant_score(self): score=constant_score, ) ) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.article])) + self.delayedAssertCountEqual(qs.all, [self.article]) scored = qs.first() self.assertAlmostEqual(scored.score, 10.0, places=2) @@ -597,10 +591,8 @@ def test_search_more_like_this(self): qs = Article.objects.annotate(score=SearchMoreLikeThis(documents=like_docs)).order_by( "score" ) - self.wait_for_assertion( - lambda: self.assertQuerySetEqual( - qs.all(), [self.article1, self.article2], lambda a: a.headline - ) + self.delayedAssertQuerySetEqual( + qs.all, [self.article1, self.article2], lambda a: a.headline ) @@ -660,16 +652,14 @@ def test_expression(self): ) qs = Article.objects.annotate(score=compound).order_by("score") - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.exoplanet])) + self.delayedAssertCountEqual(qs.all, [self.exoplanet]) def test_operations(self): expr = SearchEquals(path="headline", value="space exploration") & ~SearchEquals( path="number", value=3 ) qs = Article.objects.annotate(score=expr) - self.wait_for_assertion( - lambda: self.assertCountEqual(qs.all(), [self.mars_mission, self.exoplanet]) - ) + self.delayedAssertCountEqual(qs.all, [self.mars_mission, self.exoplanet]) def test_mixed_scores(self): boost_score = SearchScoreOption({"boost": {"value": 5}}) @@ -688,9 +678,7 @@ def test_mixed_scores(self): should=[should_expr], ) qs = Article.objects.annotate(score=compound).order_by("-score") - self.wait_for_assertion( - lambda: self.assertListEqual(list(qs.all()), [self.exoplanet, self.mars_mission]) - ) + self.delayedAssertListEqual(lambda: list(qs.all()), [self.exoplanet, self.mars_mission]) # Exoplanet should rank first because of the constant 20 bump. self.assertEqual(qs.first(), self.exoplanet) @@ -706,10 +694,7 @@ def test_operationss_with_function_score(self): ) & ~SearchEquals(path="number", value=3) qs = Article.objects.annotate(score=expr).order_by("-score") - - self.wait_for_assertion( - lambda: self.assertListEqual(list(qs.all()), [self.exoplanet, self.mars_mission]) - ) + self.delayedAssertListEqual(lambda: list(qs.all()), [self.exoplanet, self.mars_mission]) # Returns mars_mission (score≈1) and exoplanet (score≈2) then; exoplanet first. self.assertEqual(qs.first(), self.exoplanet) @@ -770,7 +755,7 @@ def test_multiple_vector_search(self): def test_search_and_filter(self): qs = Article.objects.filter(headline__search="space exploration", number__gt=2) - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.icy_moons])) + self.delayedAssertCountEqual(qs.all, [self.icy_moons]) @skipUnlessDBFeature("supports_atlas_search") @@ -817,4 +802,4 @@ def test_vector_search(self): limit=2, ) qs = Article.objects.annotate(score=expr).order_by("-score") - self.wait_for_assertion(lambda: self.assertCountEqual(qs.all(), [self.mars, self.cooking])) + self.delayedAssertCountEqual(qs.all, [self.mars, self.cooking]) From 38663fc49bd4cd54f147b7177858319f4a834a03 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 25 Jul 2025 11:05:21 -0300 Subject: [PATCH 11/18] Support operator as string --- django_mongodb_backend/expressions/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index 33d470aa0..418f506b3 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -850,7 +850,7 @@ class CombinedSearchExpression(SearchExpression): def __init__(self, lhs, operator, rhs): self.lhs = lhs - self.operator = operator + self.operator = Operator(operator) if not isinstance(operator, Operator) else operator self.rhs = rhs def get_source_expressions(self): From fb1db85deb3f2ddaedadb7ec7a8d9d682d023f78 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 25 Jul 2025 11:05:34 -0300 Subject: [PATCH 12/18] Update docstring --- django_mongodb_backend/expressions/search.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/django_mongodb_backend/expressions/search.py b/django_mongodb_backend/expressions/search.py index 418f506b3..addf47d88 100644 --- a/django_mongodb_backend/expressions/search.py +++ b/django_mongodb_backend/expressions/search.py @@ -884,10 +884,11 @@ def as_mql(self, compiler, connection): class SearchVector(SearchExpression): """ - Atlas Search expression that performs vector similarity search on embedded vectors. + Atlas Search expression that performs vector similarity search using `$vectorSearch`. - This expression uses the **knnBeta** operator to find documents whose vector - embeddings are most similar to a given query vector. + This expression uses the `$vectorSearch` stage to retrieve documents whose vector + embeddings are most similar to a given query vector, according to approximate or + exact nearest-neighbor search. Example: SearchVector("embedding", [0.1, 0.2, 0.3], limit=10, num_candidates=100) @@ -897,7 +898,7 @@ class SearchVector(SearchExpression): query_vector: The query vector to compare against. limit: Maximum number of matching documents to return. num_candidates: Optional number of candidates to consider during search. - exact: Optional flag to enforce exact matching. + exact: Optional flag to enforce exact matching (default is approximate). filter: Optional filter expression to narrow candidate documents. Reference: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ From aee4c1b678a7a3ccbe2ca30beb989b55d7ebc8c5 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 25 Jul 2025 20:32:37 -0300 Subject: [PATCH 13/18] Add docs --- docs/source/ref/models/search.rst | 629 ++++++++++++++++++++++++++++++ 1 file changed, 629 insertions(+) create mode 100644 docs/source/ref/models/search.rst diff --git a/docs/source/ref/models/search.rst b/docs/source/ref/models/search.rst new file mode 100644 index 000000000..d2204521e --- /dev/null +++ b/docs/source/ref/models/search.rst @@ -0,0 +1,629 @@ +================ +Atlas search +================ + +The database functions in the ``django_mongodb_backend.expressions.search`` module ease +the use of MongoDB Atlas search's `full text and vector search engine +`_. + +For the examples in this document, we'll use the following models: + +.. code-block:: pycon + + from django.db import models + from django_mongodb_backend.models import EmbeddedModel + from django_mongodb_backend.fields import ArrayField, EmbeddedModelField + + class Writer(EmbeddedModel): + name = models.CharField(max_length=10) + + + class Article(models.Model): + headline = models.CharField(max_length=100) + number = models.IntegerField() + body = models.TextField() + location = models.JSONField(null=True) + plot_embedding = ArrayField(models.FloatField(), size=3, null=True) + writer = EmbeddedModelField(Writer, null=True) + + +``SearchEquals`` +================ +Atlas Search expression that matches documents where a field is equal to a given value. + +This expression uses the ``equals`` operator to perform exact matches +on fields indexed in a MongoDB Atlas Search index. + +.. _SearchEquals docs: https://www.mongodb.com/docs/atlas/atlas-search/equals/ + + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchEquals + >>> Article.objects.annotate(score=SearchEquals(path="headline", value="title")) + ]> + +The ``path`` argument can be either the name of a field (as a string), or a +:class:`~django.db.models.expressions.Col` instance. The ``value`` argument must be a string +or a :class:`~django.db.models.expressions.Value`. + +``SearchEquals`` objects can be reused and combined with other search expressions. + +See :ref:`search-operations-combinable` + + +``SearchAutocomplete`` +====================== + +Atlas Search expression that enables autocomplete behavior on string fields. + +This expression uses the ``autocomplete`` operator to match the input query against +a field indexed with ``"type": "autocomplete"`` in a MongoDB Atlas Search index. + +.. _SearchAutocomplete docs: https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchAutocomplete + >>> Article.objects.annotate(score=SearchAutocomplete(path="headline", query="harry")) + ]> + +The ``path`` argument specifies the field to search and can be a string or a +:class:`~django.db.models.expressions.Col`. The ``query`` is the user input string +to autocomplete and can be passed as a string or a +:class:`~django.db.models.expressions.Value`. + +Optional arguments: + +- ``fuzzy``: A dictionary with fuzzy matching options such as ``{"maxEdits": 1}``. +- ``token_order``: Controls token sequence behavior. Accepts values like ``"sequential"`` or ``"any"``. +- ``score``: An optional score expression such as ``{"boost": {"value": 5}}``. + +``SearchAutocomplete`` expressions can be reused and composed with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchExists`` +================ + +Atlas Search expression that matches documents where a field exists. + +This expression uses the ``exists`` operator to check whether the specified path +is present in the document. It's useful for filtering documents that include +(or exclude) optional fields. + +.. _SearchExists docs: https://www.mongodb.com/docs/atlas/atlas-search/exists/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchExists + >>> Article.objects.annotate(score=SearchExists(path="writer__name")) + ]> + +The ``path`` argument specifies the document path to check and can be provided as a string +or a :class:`~django.db.models.expressions.Col`. + +An optional ``score`` argument can be used to modify the relevance score of the result. + +``SearchExists`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchIn`` +============ + +Atlas Search expression that matches documents where a field's value is in a given list. + +This expression uses the ``in`` operator to match documents whose field contains +a value from the provided array. + +.. _SearchIn docs: https://www.mongodb.com/docs/atlas/atlas-search/in/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchIn + >>> Article.objects.annotate(score=SearchIn(path="status", value=["pending", "approved"])) + ]> + +The ``path`` argument can be the name of a field (as a string) or a +:class:`~django.db.models.expressions.Col`. The ``value`` must be a list +of values or a :class:`~django.db.models.expressions.Value`. + +An optional ``score`` argument can be used to customize relevance scoring. + +``SearchIn`` expressions can be reused and combined with other search +expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchPhrase`` +================ + +Atlas Search expression that matches a phrase in the specified field. + +This expression uses the ``phrase`` operator to find exact or near-exact sequences +of terms. It supports optional slop (term distance) and synonym mappings defined +in the Atlas Search index. + +.. _SearchPhrase docs: https://www.mongodb.com/docs/atlas/atlas-search/phrase/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchPhrase + >>> Article.objects.annotate( + ... score=SearchPhrase(path="body", query="climate change", slop=2) + ... ) + ]> + +The ``path`` argument specifies the field to search and can be a string or a +:class:`~django.db.models.expressions.Col`. The ``query`` is the phrase to match, +passed as a string or a list of strings (terms). + +Optional arguments: + +- ``slop``: The maximum number of terms allowed between phrase terms. +- ``synonyms``: The name of a synonym mapping defined in your Atlas index. +- ``score``: An optional score expression to adjust relevance. + +``SearchPhrase`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchQueryString`` +===================== + +Atlas Search expression that matches using a Lucene-style query string. + +This expression uses the ``queryString`` operator to parse and execute full-text +queries written in a simplified Lucene syntax. It supports features like boolean +operators, wildcards, and field-specific terms. + +.. _SearchQueryString docs: https://www.mongodb.com/docs/atlas/atlas-search/queryString/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchQueryString + >>> Article.objects.annotate( + ... score=SearchQueryString(path="body", query="django AND (search OR query)") + ... ) + ]> + +The ``path`` argument can be a string or a :class:`~django.db.models.expressions.Col` +representing the field to query. The ``query`` argument is a Lucene-style query string. + +An optional ``score`` argument may be used to adjust relevance scoring. + +``SearchQueryString`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchRange`` +=============== + +Atlas Search expression that filters documents within a specified range of values. + +This expression uses the ``range`` operator to match numeric, date, or other comparable +fields based on upper and/or lower bounds. + +.. _SearchRange docs: https://www.mongodb.com/docs/atlas/atlas-search/range/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchRange + >>> Article.objects.annotate(score=SearchRange(path="number", gte=2000, lt=2020)) + ]> + +The ``path`` argument specifies the field to filter and can be a string or a +:class:`~django.db.models.expressions.Col`. + +Optional arguments: + +- ``lt``: Exclusive upper bound (``<``) +- ``lte``: Inclusive upper bound (``<=``) +- ``gt``: Exclusive lower bound (``>``) +- ``gte``: Inclusive lower bound (``>=``) +- ``score``: An optional score expression to influence relevance + +``SearchRange`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchRegex`` +=============== + +Atlas Search expression that matches string fields using a regular expression. + +This expression uses the ``regex`` operator to apply a regular expression pattern +to the contents of a specified field. + +.. _SearchRegex docs: https://www.mongodb.com/docs/atlas/atlas-search/regex/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchRegex + >>> Article.objects.annotate(score=SearchRegex(path="headline", query=r"^Breaking_")) + ]> + +The ``path`` argument specifies the field to search and can be provided as a string +or a :class:`~django.db.models.expressions.Col`. The ``query`` is a regular expression +string that will be applied to the field contents. + +Optional arguments: + +- ``allow_analyzed_field``: Boolean indicating whether to allow matching against analyzed fields (defaults to ``False``). +- ``score``: An optional score expression to adjust relevance. + +``SearchRegex`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchText`` +============== + +Atlas Search expression that performs full-text search using the ``text`` operator. + +This expression matches terms in the specified field and supports fuzzy matching, +match criteria, and synonym mappings. + +.. _SearchText docs: https://www.mongodb.com/docs/atlas/atlas-search/text/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchText + >>> Article.objects.annotate( + ... score=SearchText( + ... path="body", query="mongodb", fuzzy={"maxEdits": 1}, match_criteria="all" + ... ) + ... ) + ]> + +The ``path`` argument specifies the field to search and can be provided as a string +or a :class:`~django.db.models.expressions.Col`. The ``query`` argument is the search +term or phrase. + +Optional arguments: + +- ``fuzzy``: A dictionary of fuzzy matching options, such as ``{"maxEdits": 1}``. +- ``match_criteria``: Whether to match ``"all"`` or ``"any"`` terms (defaults to Atlas Search behavior). +- ``synonyms``: The name of a synonym mapping defined in your Atlas index. +- ``score``: An optional expression to influence relevance scoring. + +``SearchText`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchWildcard`` +================== + +Atlas Search expression that matches strings using wildcard patterns. + +This expression uses the ``wildcard`` operator to search for terms matching +a pattern with ``*`` (any sequence of characters) and ``?`` (any single character) wildcards. + +.. _SearchWildcard docs: https://www.mongodb.com/docs/atlas/atlas-search/wildcard/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchWildcard + >>> Article.objects.annotate( + ... score=SearchWildcard(path="headline", query="report_202?_final*") + ... ) + ]> + +The ``path`` argument specifies the field to search and can be a string or a +:class:`~django.db.models.expressions.Col`. The ``query`` is a wildcard string +that may include ``*`` and ``?``. + +Optional arguments: + +- ``allow_analyzed_field``: Boolean that allows matching against analyzed fields (defaults to ``False``). +- ``score``: An optional expression to adjust relevance. + +``SearchWildcard`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchGeoShape`` +================== + +Atlas Search expression that filters documents based on spatial relationships with a geometry. + +This expression uses the ``geoShape`` operator to match documents where a geo field +has a specified spatial relation to a given GeoJSON geometry. + +.. _SearchGeoShape docs: https://www.mongodb.com/docs/atlas/atlas-search/geoShape/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchGeoShape + >>> polygon = {"type": "Polygon", "coordinates": [[[0, 0], [3, 6], [6, 1], [0, 0]]]} + >>> Article.objects.annotate( + ... score=SearchGeoShape(path="location", relation="within", geometry=polygon) + ... ) + ]> + +The ``path`` argument specifies the field to filter and can be a string or a +:class:`~django.db.models.expressions.Col`. + +Required arguments: + +- ``relation``: The spatial relation to test. Valid values include ``"within"``, ``"intersects"``, and ``"disjoint"``. +- ``geometry``: A GeoJSON geometry object to compare against. + +Optional: + +- ``score``: An optional expression to modify the relevance score. + +``SearchGeoShape`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchGeoWithin`` +=================== + +Atlas Search expression that filters documents with geo fields contained within a specified shape. + +This expression uses the ``geoWithin`` operator to match documents where the geo field lies +entirely within the provided GeoJSON geometry. + +.. _SearchGeoWithin docs: https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchGeoWithin + >>> polygon = {"type": "Polygon", "coordinates": [[[0, 0], [3, 6], [6, 1], [0, 0]]]} + >>> Article.objects.annotate( + ... score=SearchGeoWithin(path="location", kind="Polygon", geo_object=polygon) + ... ) + ]> + +The ``path`` argument specifies the geo field to filter and can be a string or a +:class:`~django.db.models.expressions.Col`. + +Required arguments: + +- ``kind``: The GeoJSON geometry type, such as ``"Polygon"`` or ``"MultiPolygon"``. +- ``geo_object``: The GeoJSON geometry defining the spatial boundary. + +Optional: + +- ``score``: An optional expression to adjust the relevance score. + +``SearchGeoWithin`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``SearchMoreLikeThis`` +====================== + +Atlas Search expression that finds documents similar to the provided examples. + +This expression uses the ``moreLikeThis`` operator to retrieve documents that +resemble one or more example documents. + +.. _SearchMoreLikeThis docs: https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/ + +.. code-block:: pycon + + >>> from bson import ObjectId + >>> from django_mongodb_backend.expressions.search import SearchMoreLikeThis + >>> Article.objects.annotate( + ... score=SearchMoreLikeThis( + ... [{"_id": ObjectId("66cabc1234567890abcdefff")}, {"title": "Example"}] + ... ) + ... ) + ]> + +The ``documents`` argument must be a list of example documents or expressions +that serve as references for similarity. + +Optional: + +- ``score``: An optional expression to adjust the relevance score of the results. + +``SearchMoreLikeThis`` expressions can be reused and combined with other search expressions. + +See also: :ref:`search-operations-combinable` + + +``CompoundExpression`` +====================== + +Compound expression that combines multiple search clauses using boolean logic. + +This expression uses the ``compound`` operator in MongoDB Atlas Search to +combine sub-expressions with ``must``, ``must_not``, ``should``, and ``filter`` clauses. +It enables fine-grained control over how multiple conditions contribute to document matching and scoring. + +.. _CompoundExpression docs: https://www.mongodb.com/docs/atlas/atlas-search/compound/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import CompoundExpression, SearchText + >>> expr1 = SearchText("headline", "mongodb") + >>> expr2 = SearchText("body", "atlas") + >>> expr3 = SearchText("body", "deprecated") + >>> expr4 = SearchText("tags", "database") + >>> Article.objects.annotate( + ... score=CompoundExpression( + ... must=[expr1, expr2], must_not=[expr3], should=[expr4], minimum_should_match=1 + ... ) + ... ) + ]> + +Arguments: + +- ``must``: A list of expressions that **must** match. +- ``must_not``: A list of expressions that **must not** match. +- ``should``: A list of optional expressions that **should** match. These can improve scoring. +- ``filter``: A list of expressions used for filtering without affecting relevance scoring. +- ``minimum_should_match``: The minimum number of ``should`` clauses that must match. +- ``score``: An optional expression to adjust the final score. + +``CompoundExpression`` is useful for building advanced and flexible query logic in Atlas Search. + +See also: :ref:`search-operations-combinable` + + +``CombinedSearchExpression`` +============================ + +Expression that combines two Atlas Search expressions using a boolean operator. + +This expression is used internally when combining search expressions with +Python’s bitwise operators (``&``, ``|``, ``~``), and corresponds to +logical operators such as ``and``, ``or``, and ``not``. + +.. note:: + This expression is typically created when using the combinable interface + (e.g., ``expr1 & expr2``). It can also be constructed manually. + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import CombinedSearchExpression + >>> CombinedSearchExpression(expr1, "and", expr2) + +Args: + +- ``lhs``: The left-hand side search expression. +- ``operator``: A string representing the logical operator (``"and"``, ``"or"``, or ``"not"``). +- ``rhs``: The right-hand side search expression. + +This is the underlying expression used to support operator overloading in Atlas Search expressions. + +.. _search-operations-combinable: + +**Combinable expressions** +-------------------------- + +All Atlas Search expressions subclassed from ``SearchExpression`` can be combined +using Python's bitwise operators: + +- ``&`` → ``and`` +- ``|`` → ``or`` +- ``~`` → ``not`` (unary) + +This allows for more expressive and readable search logic: + +.. code-block:: pycon + + >>> expr = SearchText("headline", "mongodb") & ~SearchText("body", "deprecated") + >>> Article.objects.annotate(score=expr) + +Under the hood, these expressions are translated into ``CombinedSearchExpression`` instances. + +``CombinedSearchExpression`` can also be reused and nested with other compound expressions. + + +``SearchVector`` +================ + +Atlas Search expression that performs vector similarity search using the ``$vectorSearch`` stage. + +This expression retrieves documents whose vector field is most similar to a given query vector, +using either approximate or exact nearest-neighbor search. + +.. _SearchVector docs: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchVector + >>> Article.objects.annotate( + ... score=SearchVector( + ... path="plot_embedding", + ... query_vector=[0.1, 0.2, 0.3], + ... limit=10, + ... num_candidates=100, + ... exact=True, + ... ) + ... ) + ]> + +Arguments: + +- ``path``: The document path to the vector field (string or :class:`~django.db.models.expressions.Col`). +- ``query_vector``: The input vector used for similarity comparison. +- ``limit``: The maximum number of matching documents to return. +- ``num_candidates``: (Optional) The number of candidate documents considered during search. +- ``exact``: (Optional) Whether to enforce exact search instead of approximate (defaults to ``False``). +- ``filter``: (Optional) A filter expression to restrict the candidate documents. + +.. warning:: + + ``SearchVector`` expressions cannot be combined using logical operators such as ``&``, ``|``, or ``~``. + Attempting to do so will raise an error. + +``SearchVector`` is typically used on its own in the ``score`` annotation and cannot be nested or composed. + + +``SearchScoreOption`` +===================== + +Expression used to control or mutate the relevance score in an Atlas Search expression. + +This expression can be passed to most Atlas Search operators through the ``score`` argument +to customize how MongoDB calculates and applies scoring. + +It directly maps to the ``score`` option of the relevant Atlas Search operator. + +.. _SearchScoreOption docs: https://www.mongodb.com/docs/atlas/atlas-search/scoring/ + +.. code-block:: pycon + + >>> from django_mongodb_backend.expressions.search import SearchText, SearchScoreOption + >>> boost = SearchScoreOption({"boost": {"value": 5}}) + >>> Article.objects.annotate(score=SearchText(path="body", query="django", score=boost)) + + +Accepted options depend on the underlying operator and may include: + +- ``boost``: Increases the score of documents matching a specific clause. +- ``constant``: Applies a fixed score to all matches. +- ``function``: Uses a mathematical function to compute the score dynamically. +- ``path``: Scores documents based on the value of a field. + +The ``SearchScoreOption`` is a low-level utility used to build the ``score`` subdocument +and can be reused across multiple search expressions. + +It is typically passed as the ``score`` parameter to any search expression that supports it. + + +The ``search`` lookup +====================== + +Django lookup to enable Atlas Search full-text querying via the ``search`` lookup. + +This lookup allows using the ``search`` lookup on Django ``CharField`` and ``TextField`` +to perform Atlas Search ``text`` queries seamlessly within Django ORM filters. + +It internally creates a ``SearchText`` expression on the left-hand side and compares its +score with zero to filter matching documents. + +.. code-block:: pycon + + >>> from django.db.models import CharField, TextField + >>> from django_mongodb_backend.expressions.search import SearchTextLookup + + >>> # Example usage in a filter + >>> Article.objects.filter(headline__search="mongodb") + +The lookup is automatically registered on ``CharField`` and ``TextField``, enabling +expressions like ``fieldname__search='query'``. + +Under the hood: + +- The left-hand side of the lookup is wrapped into a ``SearchText`` expression. +- The lookup compiles to a MongoDB query that filters documents with a score greater or equal to zero. + +This allows for concise and idiomatic integration of Atlas Search within Django filters. From d65da2a4f7bebb819ae7ba4c431d2895e512360d Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 25 Jul 2025 22:21:26 -0300 Subject: [PATCH 14/18] Edit docs. --- docs/source/ref/models/search.rst | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/source/ref/models/search.rst b/docs/source/ref/models/search.rst index d2204521e..912973bb6 100644 --- a/docs/source/ref/models/search.rst +++ b/docs/source/ref/models/search.rst @@ -124,7 +124,7 @@ a value from the provided array. .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchIn - >>> Article.objects.annotate(score=SearchIn(path="status", value=["pending", "approved"])) + >>> Article.objects.annotate(score=SearchIn(path="number", value=[1, 2])) ]> The ``path`` argument can be the name of a field (as a string) or a @@ -392,7 +392,7 @@ The ``path`` argument specifies the geo field to filter and can be a string or a Required arguments: -- ``kind``: The GeoJSON geometry type, such as ``"Polygon"`` or ``"MultiPolygon"``. +- ``kind``: The GeoJSON geometry type ``circle``, ``box``, or ``geometry``. - ``geo_object``: The GeoJSON geometry defining the spatial boundary. Optional: @@ -454,7 +454,7 @@ It enables fine-grained control over how multiple conditions contribute to docum >>> expr1 = SearchText("headline", "mongodb") >>> expr2 = SearchText("body", "atlas") >>> expr3 = SearchText("body", "deprecated") - >>> expr4 = SearchText("tags", "database") + >>> expr4 = SearchText("headline", "database") >>> Article.objects.annotate( ... score=CompoundExpression( ... must=[expr1, expr2], must_not=[expr3], should=[expr4], minimum_should_match=1 @@ -492,6 +492,8 @@ logical operators such as ``and``, ``or``, and ``not``. .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import CombinedSearchExpression + >>> expr1 = SearchText("headline", "mongodb") + >>> expr2 = SearchText("body", "atlas") >>> CombinedSearchExpression(expr1, "and", expr2) Args: @@ -545,7 +547,7 @@ using either approximate or exact nearest-neighbor search. ... query_vector=[0.1, 0.2, 0.3], ... limit=10, ... num_candidates=100, - ... exact=True, + ... exact=False, ... ) ... ) ]> @@ -584,7 +586,7 @@ It directly maps to the ``score`` option of the relevant Atlas Search operator. >>> from django_mongodb_backend.expressions.search import SearchText, SearchScoreOption >>> boost = SearchScoreOption({"boost": {"value": 5}}) >>> Article.objects.annotate(score=SearchText(path="body", query="django", score=boost)) - + ]> Accepted options depend on the underlying operator and may include: @@ -612,11 +614,8 @@ score with zero to filter matching documents. .. code-block:: pycon - >>> from django.db.models import CharField, TextField - >>> from django_mongodb_backend.expressions.search import SearchTextLookup - - >>> # Example usage in a filter >>> Article.objects.filter(headline__search="mongodb") + ]> The lookup is automatically registered on ``CharField`` and ``TextField``, enabling expressions like ``fieldname__search='query'``. From e7f4d22b516247005fa43c4334b1e7abe583c6ed Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 25 Jul 2025 23:39:26 -0300 Subject: [PATCH 15/18] Update docs. --- docs/source/ref/models/index.rst | 1 + docs/source/ref/models/search.rst | 391 +++++++++++++++++++----------- 2 files changed, 253 insertions(+), 139 deletions(-) diff --git a/docs/source/ref/models/index.rst b/docs/source/ref/models/index.rst index 42f7a12bb..4639417f4 100644 --- a/docs/source/ref/models/index.rst +++ b/docs/source/ref/models/index.rst @@ -11,3 +11,4 @@ Model API reference. querysets models indexes + search diff --git a/docs/source/ref/models/search.rst b/docs/source/ref/models/search.rst index 912973bb6..7fe666286 100644 --- a/docs/source/ref/models/search.rst +++ b/docs/source/ref/models/search.rst @@ -2,9 +2,9 @@ Atlas search ================ -The database functions in the ``django_mongodb_backend.expressions.search`` module ease -the use of MongoDB Atlas search's `full text and vector search engine -`_. +The database functions in the ``django_mongodb_backend.expressions.search`` +module ease the use of MongoDB Atlas search's `full text and vector search +engine `_. For the examples in this document, we'll use the following models: @@ -29,12 +29,13 @@ For the examples in this document, we'll use the following models: ``SearchEquals`` ================ -Atlas Search expression that matches documents where a field is equal to a given value. +Atlas Search expression that matches documents where a field is equal to a +given value. This expression uses the ``equals`` operator to perform exact matches on fields indexed in a MongoDB Atlas Search index. -.. _SearchEquals docs: https://www.mongodb.com/docs/atlas/atlas-search/equals/ +`SearchEquals docs `_ .. code-block:: pycon @@ -44,10 +45,11 @@ on fields indexed in a MongoDB Atlas Search index. ]> The ``path`` argument can be either the name of a field (as a string), or a -:class:`~django.db.models.expressions.Col` instance. The ``value`` argument must be a string -or a :class:`~django.db.models.expressions.Value`. +:class:`~django.db.models.expressions.Col` instance. The ``value`` argument +must be a string or a :class:`~django.db.models.expressions.Value`. -``SearchEquals`` objects can be reused and combined with other search expressions. +``SearchEquals`` objects can be reused and combined with other search +expressions. See :ref:`search-operations-combinable` @@ -57,29 +59,36 @@ See :ref:`search-operations-combinable` Atlas Search expression that enables autocomplete behavior on string fields. -This expression uses the ``autocomplete`` operator to match the input query against -a field indexed with ``"type": "autocomplete"`` in a MongoDB Atlas Search index. +This expression uses the ``autocomplete`` operator to match the input query +against a field indexed with ``"type": "autocomplete"`` in a MongoDB Atlas +Search index. -.. _SearchAutocomplete docs: https://www.mongodb.com/docs/atlas/atlas-search/autocomplete/ +`SearchAutocomplete docs `_ .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchAutocomplete >>> Article.objects.annotate(score=SearchAutocomplete(path="headline", query="harry")) - ]> + , + + ]> The ``path`` argument specifies the field to search and can be a string or a -:class:`~django.db.models.expressions.Col`. The ``query`` is the user input string -to autocomplete and can be passed as a string or a +:class:`~django.db.models.expressions.Col`. The ``query`` is the user input +string to autocomplete and can be passed as a string or a :class:`~django.db.models.expressions.Value`. Optional arguments: -- ``fuzzy``: A dictionary with fuzzy matching options such as ``{"maxEdits": 1}``. -- ``token_order``: Controls token sequence behavior. Accepts values like ``"sequential"`` or ``"any"``. +- ``fuzzy``: A dictionary with fuzzy matching options such as + ``{"maxEdits": 1}``. +- ``token_order``: Controls token sequence behavior. Accepts values like + ``"sequential"`` or ``"any"``. - ``score``: An optional score expression such as ``{"boost": {"value": 5}}``. -``SearchAutocomplete`` expressions can be reused and composed with other search expressions. +``SearchAutocomplete`` expressions can be reused and composed with other +search expressions. See also: :ref:`search-operations-combinable` @@ -89,24 +98,29 @@ See also: :ref:`search-operations-combinable` Atlas Search expression that matches documents where a field exists. -This expression uses the ``exists`` operator to check whether the specified path -is present in the document. It's useful for filtering documents that include -(or exclude) optional fields. +This expression uses the ``exists`` operator to check whether the specified +path is present in the document. It's useful for filtering documents that +include (or exclude) optional fields. -.. _SearchExists docs: https://www.mongodb.com/docs/atlas/atlas-search/exists/ +`SearchExists docs `_ .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchExists >>> Article.objects.annotate(score=SearchExists(path="writer__name")) - ]> + , + + ]> -The ``path`` argument specifies the document path to check and can be provided as a string -or a :class:`~django.db.models.expressions.Col`. +The ``path`` argument specifies the document path to check and can be provided +as a string or a :class:`~django.db.models.expressions.Col`. -An optional ``score`` argument can be used to modify the relevance score of the result. +An optional ``score`` argument can be used to modify the relevance score of the +result. -``SearchExists`` expressions can be reused and combined with other search expressions. +``SearchExists`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -114,18 +128,22 @@ See also: :ref:`search-operations-combinable` ``SearchIn`` ============ -Atlas Search expression that matches documents where a field's value is in a given list. +Atlas Search expression that matches documents where a field's value is in a +given list. -This expression uses the ``in`` operator to match documents whose field contains -a value from the provided array. +This expression uses the ``in`` operator to match documents whose field +contains a value from the provided array. -.. _SearchIn docs: https://www.mongodb.com/docs/atlas/atlas-search/in/ +`SearchIn docs `_ .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchIn >>> Article.objects.annotate(score=SearchIn(path="number", value=[1, 2])) - ]> + , + + ]> The ``path`` argument can be the name of a field (as a string) or a :class:`~django.db.models.expressions.Col`. The ``value`` must be a list @@ -144,11 +162,11 @@ See also: :ref:`search-operations-combinable` Atlas Search expression that matches a phrase in the specified field. -This expression uses the ``phrase`` operator to find exact or near-exact sequences -of terms. It supports optional slop (term distance) and synonym mappings defined -in the Atlas Search index. +This expression uses the ``phrase`` operator to find exact or near-exact +sequences of terms. It supports optional slop (term distance) and synonym +mappings defined in the Atlas Search index. -.. _SearchPhrase docs: https://www.mongodb.com/docs/atlas/atlas-search/phrase/ +`SearchPhrase docs `_ .. code-block:: pycon @@ -156,11 +174,14 @@ in the Atlas Search index. >>> Article.objects.annotate( ... score=SearchPhrase(path="body", query="climate change", slop=2) ... ) - ]> + , + + ]> The ``path`` argument specifies the field to search and can be a string or a -:class:`~django.db.models.expressions.Col`. The ``query`` is the phrase to match, -passed as a string or a list of strings (terms). +:class:`~django.db.models.expressions.Col`. The ``query`` is the phrase to +match, passed as a string or a list of strings (terms). Optional arguments: @@ -168,7 +189,8 @@ Optional arguments: - ``synonyms``: The name of a synonym mapping defined in your Atlas index. - ``score``: An optional score expression to adjust relevance. -``SearchPhrase`` expressions can be reused and combined with other search expressions. +``SearchPhrase`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -178,11 +200,11 @@ See also: :ref:`search-operations-combinable` Atlas Search expression that matches using a Lucene-style query string. -This expression uses the ``queryString`` operator to parse and execute full-text -queries written in a simplified Lucene syntax. It supports features like boolean -operators, wildcards, and field-specific terms. +This expression uses the ``queryString`` operator to parse and execute +full-text queries written in a simplified Lucene syntax. It supports features +like boolean operators, wildcards, and field-specific terms. -.. _SearchQueryString docs: https://www.mongodb.com/docs/atlas/atlas-search/queryString/ +`SearchQueryString docs `_ .. code-block:: pycon @@ -190,14 +212,19 @@ operators, wildcards, and field-specific terms. >>> Article.objects.annotate( ... score=SearchQueryString(path="body", query="django AND (search OR query)") ... ) - ]> + , + + ]> -The ``path`` argument can be a string or a :class:`~django.db.models.expressions.Col` -representing the field to query. The ``query`` argument is a Lucene-style query string. +The ``path`` argument can be a string or a +:class:`~django.db.models.expressions.Col` representing the field to query. +The ``query`` argument is a Lucene-style query string. An optional ``score`` argument may be used to adjust relevance scoring. -``SearchQueryString`` expressions can be reused and combined with other search expressions. +``SearchQueryString`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -205,18 +232,22 @@ See also: :ref:`search-operations-combinable` ``SearchRange`` =============== -Atlas Search expression that filters documents within a specified range of values. +Atlas Search expression that filters documents within a specified range of +values. -This expression uses the ``range`` operator to match numeric, date, or other comparable -fields based on upper and/or lower bounds. +This expression uses the ``range`` operator to match numeric, date, or other +comparable fields based on upper and/or lower bounds. -.. _SearchRange docs: https://www.mongodb.com/docs/atlas/atlas-search/range/ +`SearchRange docs `_ .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchRange >>> Article.objects.annotate(score=SearchRange(path="number", gte=2000, lt=2020)) - ]> + , + + ]> The ``path`` argument specifies the field to filter and can be a string or a :class:`~django.db.models.expressions.Col`. @@ -229,7 +260,8 @@ Optional arguments: - ``gte``: Inclusive lower bound (``>=``) - ``score``: An optional score expression to influence relevance -``SearchRange`` expressions can be reused and combined with other search expressions. +``SearchRange`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -239,27 +271,32 @@ See also: :ref:`search-operations-combinable` Atlas Search expression that matches string fields using a regular expression. -This expression uses the ``regex`` operator to apply a regular expression pattern -to the contents of a specified field. +This expression uses the ``regex`` operator to apply a regular expression +pattern to the contents of a specified field. -.. _SearchRegex docs: https://www.mongodb.com/docs/atlas/atlas-search/regex/ +`SearchRegex docs `_ .. code-block:: pycon >>> from django_mongodb_backend.expressions.search import SearchRegex >>> Article.objects.annotate(score=SearchRegex(path="headline", query=r"^Breaking_")) - ]> + , + + ]> -The ``path`` argument specifies the field to search and can be provided as a string -or a :class:`~django.db.models.expressions.Col`. The ``query`` is a regular expression -string that will be applied to the field contents. +The ``path`` argument specifies the field to search and can be provided as a +string or a :class:`~django.db.models.expressions.Col`. The ``query`` is a +regular expression string that will be applied to the field contents. Optional arguments: -- ``allow_analyzed_field``: Boolean indicating whether to allow matching against analyzed fields (defaults to ``False``). +- ``allow_analyzed_field``: Boolean indicating whether to allow matching + against analyzed fields (defaults to ``False``). - ``score``: An optional score expression to adjust relevance. -``SearchRegex`` expressions can be reused and combined with other search expressions. +``SearchRegex`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -267,12 +304,13 @@ See also: :ref:`search-operations-combinable` ``SearchText`` ============== -Atlas Search expression that performs full-text search using the ``text`` operator. +Atlas Search expression that performs full-text search using the ``text`` +operator. -This expression matches terms in the specified field and supports fuzzy matching, -match criteria, and synonym mappings. +This expression matches terms in the specified field and supports fuzzy +matching, match criteria, and synonym mappings. -.. _SearchText docs: https://www.mongodb.com/docs/atlas/atlas-search/text/ +`SearchText docs `_ .. code-block:: pycon @@ -282,20 +320,26 @@ match criteria, and synonym mappings. ... path="body", query="mongodb", fuzzy={"maxEdits": 1}, match_criteria="all" ... ) ... ) - ]> + , + + ]> -The ``path`` argument specifies the field to search and can be provided as a string -or a :class:`~django.db.models.expressions.Col`. The ``query`` argument is the search -term or phrase. +The ``path`` argument specifies the field to search and can be provided as a +string or a :class:`~django.db.models.expressions.Col`. The ``query`` argument +is the search term or phrase. Optional arguments: -- ``fuzzy``: A dictionary of fuzzy matching options, such as ``{"maxEdits": 1}``. -- ``match_criteria``: Whether to match ``"all"`` or ``"any"`` terms (defaults to Atlas Search behavior). +- ``fuzzy``: A dictionary of fuzzy matching options, such as + ``{"maxEdits": 1}``. +- ``match_criteria``: Whether to match ``"all"`` or ``"any"`` terms (defaults + to Atlas Search behavior). - ``synonyms``: The name of a synonym mapping defined in your Atlas index. - ``score``: An optional expression to influence relevance scoring. -``SearchText`` expressions can be reused and combined with other search expressions. +``SearchText`` expressions can be reused and combined with other search + expressions. See also: :ref:`search-operations-combinable` @@ -306,9 +350,10 @@ See also: :ref:`search-operations-combinable` Atlas Search expression that matches strings using wildcard patterns. This expression uses the ``wildcard`` operator to search for terms matching -a pattern with ``*`` (any sequence of characters) and ``?`` (any single character) wildcards. +a pattern with ``*`` (any sequence of characters) and ``?`` (any single +character) wildcards. -.. _SearchWildcard docs: https://www.mongodb.com/docs/atlas/atlas-search/wildcard/ +`SearchWildcard docs `_ .. code-block:: pycon @@ -316,7 +361,10 @@ a pattern with ``*`` (any sequence of characters) and ``?`` (any single characte >>> Article.objects.annotate( ... score=SearchWildcard(path="headline", query="report_202?_final*") ... ) - ]> + , + + ]> The ``path`` argument specifies the field to search and can be a string or a :class:`~django.db.models.expressions.Col`. The ``query`` is a wildcard string @@ -324,10 +372,12 @@ that may include ``*`` and ``?``. Optional arguments: -- ``allow_analyzed_field``: Boolean that allows matching against analyzed fields (defaults to ``False``). +- ``allow_analyzed_field``: Boolean that allows matching against analyzed + fields (defaults to ``False``). - ``score``: An optional expression to adjust relevance. -``SearchWildcard`` expressions can be reused and combined with other search expressions. +``SearchWildcard`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -335,12 +385,13 @@ See also: :ref:`search-operations-combinable` ``SearchGeoShape`` ================== -Atlas Search expression that filters documents based on spatial relationships with a geometry. +Atlas Search expression that filters documents based on spatial relationships +with a geometry. -This expression uses the ``geoShape`` operator to match documents where a geo field -has a specified spatial relation to a given GeoJSON geometry. +This expression uses the ``geoShape`` operator to match documents where a geo +field has a specified spatial relation to a given GeoJSON geometry. -.. _SearchGeoShape docs: https://www.mongodb.com/docs/atlas/atlas-search/geoShape/ +`SearchGeoShape docs `_ .. code-block:: pycon @@ -349,21 +400,26 @@ has a specified spatial relation to a given GeoJSON geometry. >>> Article.objects.annotate( ... score=SearchGeoShape(path="location", relation="within", geometry=polygon) ... ) - ]> + , + + ]> The ``path`` argument specifies the field to filter and can be a string or a :class:`~django.db.models.expressions.Col`. Required arguments: -- ``relation``: The spatial relation to test. Valid values include ``"within"``, ``"intersects"``, and ``"disjoint"``. +- ``relation``: The spatial relation to test. Valid values include + ``"within"``, ``"intersects"``, and ``"disjoint"``. - ``geometry``: A GeoJSON geometry object to compare against. Optional: - ``score``: An optional expression to modify the relevance score. -``SearchGeoShape`` expressions can be reused and combined with other search expressions. +``SearchGeoShape`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -371,12 +427,13 @@ See also: :ref:`search-operations-combinable` ``SearchGeoWithin`` =================== -Atlas Search expression that filters documents with geo fields contained within a specified shape. +Atlas Search expression that filters documents with geo fields contained within +a specified shape. -This expression uses the ``geoWithin`` operator to match documents where the geo field lies -entirely within the provided GeoJSON geometry. +This expression uses the ``geoWithin`` operator to match documents where the +geo field lies entirely within the provided GeoJSON geometry. -.. _SearchGeoWithin docs: https://www.mongodb.com/docs/atlas/atlas-search/geoWithin/ +`SearchGeoWithin docs `_ .. code-block:: pycon @@ -385,10 +442,13 @@ entirely within the provided GeoJSON geometry. >>> Article.objects.annotate( ... score=SearchGeoWithin(path="location", kind="Polygon", geo_object=polygon) ... ) - ]> + , + + ]> -The ``path`` argument specifies the geo field to filter and can be a string or a -:class:`~django.db.models.expressions.Col`. +The ``path`` argument specifies the geo field to filter and can be a string or +a :class:`~django.db.models.expressions.Col`. Required arguments: @@ -399,7 +459,8 @@ Optional: - ``score``: An optional expression to adjust the relevance score. -``SearchGeoWithin`` expressions can be reused and combined with other search expressions. +``SearchGeoWithin`` expressions can be reused and combined with other search + expressions. See also: :ref:`search-operations-combinable` @@ -412,7 +473,7 @@ Atlas Search expression that finds documents similar to the provided examples. This expression uses the ``moreLikeThis`` operator to retrieve documents that resemble one or more example documents. -.. _SearchMoreLikeThis docs: https://www.mongodb.com/docs/atlas/atlas-search/morelikethis/ +`SearchMoreLikeThis docs `_ .. code-block:: pycon @@ -423,16 +484,21 @@ resemble one or more example documents. ... [{"_id": ObjectId("66cabc1234567890abcdefff")}, {"title": "Example"}] ... ) ... ) - ]> + , + + ]> The ``documents`` argument must be a list of example documents or expressions that serve as references for similarity. Optional: -- ``score``: An optional expression to adjust the relevance score of the results. +- ``score``: An optional expression to adjust the relevance score of the + results. -``SearchMoreLikeThis`` expressions can be reused and combined with other search expressions. +``SearchMoreLikeThis`` expressions can be reused and combined with other search +expressions. See also: :ref:`search-operations-combinable` @@ -443,10 +509,11 @@ See also: :ref:`search-operations-combinable` Compound expression that combines multiple search clauses using boolean logic. This expression uses the ``compound`` operator in MongoDB Atlas Search to -combine sub-expressions with ``must``, ``must_not``, ``should``, and ``filter`` clauses. -It enables fine-grained control over how multiple conditions contribute to document matching and scoring. +combine sub-expressions with ``must``, ``must_not``, ``should``, and ``filter`` +clauses. It enables fine-grained control over how multiple conditions +contribute to document matching and scoring. -.. _CompoundExpression docs: https://www.mongodb.com/docs/atlas/atlas-search/compound/ +`CompoundExpression docs `_ .. code-block:: pycon @@ -460,18 +527,22 @@ It enables fine-grained control over how multiple conditions contribute to docum ... must=[expr1, expr2], must_not=[expr3], should=[expr4], minimum_should_match=1 ... ) ... ) - ]> + ]> Arguments: - ``must``: A list of expressions that **must** match. - ``must_not``: A list of expressions that **must not** match. -- ``should``: A list of optional expressions that **should** match. These can improve scoring. -- ``filter``: A list of expressions used for filtering without affecting relevance scoring. -- ``minimum_should_match``: The minimum number of ``should`` clauses that must match. +- ``should``: A list of optional expressions that **should** match. + These can improve scoring. +- ``filter``: A list of expressions used for filtering without affecting + relevance scoring. +- ``minimum_should_match``: The minimum number of ``should`` clauses that + must match. - ``score``: An optional expression to adjust the final score. -``CompoundExpression`` is useful for building advanced and flexible query logic in Atlas Search. +``CompoundExpression`` is useful for building advanced and flexible query + logic in Atlas Search. See also: :ref:`search-operations-combinable` @@ -495,22 +566,43 @@ logical operators such as ``and``, ``or``, and ``not``. >>> expr1 = SearchText("headline", "mongodb") >>> expr2 = SearchText("body", "atlas") >>> CombinedSearchExpression(expr1, "and", expr2) + CombinedSearchExpression( + lhs=SearchText( + path='headline', + query='mongodb', + fuzzy=None, + match_criteria=None, + synonyms=None, + score=None + ), + operator='and', + rhs=SearchText( + path='body', + query='atlas', + fuzzy=None, + match_criteria=None, + synonyms=None, + score=None + ) + ) Args: - ``lhs``: The left-hand side search expression. -- ``operator``: A string representing the logical operator (``"and"``, ``"or"``, or ``"not"``). +- ``operator``: A string representing the logical operator (``"and"``, ``"or"`` + , or ``"not"``). - ``rhs``: The right-hand side search expression. -This is the underlying expression used to support operator overloading in Atlas Search expressions. +This is the underlying expression used to support operator overloading in +Atlas Search expressions. .. _search-operations-combinable: **Combinable expressions** -------------------------- -All Atlas Search expressions subclassed from ``SearchExpression`` can be combined -using Python's bitwise operators: +All Atlas Search expressions subclassed from ``SearchExpression`` +can be combined using Python's bitwise operators: - ``&`` → ``and`` - ``|`` → ``or`` @@ -522,21 +614,28 @@ This allows for more expressive and readable search logic: >>> expr = SearchText("headline", "mongodb") & ~SearchText("body", "deprecated") >>> Article.objects.annotate(score=expr) + , + + ]> -Under the hood, these expressions are translated into ``CombinedSearchExpression`` instances. +Under the hood, these expressions are translated into +``CombinedSearchExpression`` instances. -``CombinedSearchExpression`` can also be reused and nested with other compound expressions. +``CombinedSearchExpression`` can also be reused and nested with other compound +expressions. ``SearchVector`` ================ -Atlas Search expression that performs vector similarity search using the ``$vectorSearch`` stage. +Atlas Search expression that performs vector similarity search using the +``$vectorSearch`` stage. -This expression retrieves documents whose vector field is most similar to a given query vector, -using either approximate or exact nearest-neighbor search. +This expression retrieves documents whose vector field is most similar to a +given query vector, using either approximate or exact nearest-neighbor search. -.. _SearchVector docs: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ +`SearchVector docs `_ .. code-block:: pycon @@ -554,32 +653,38 @@ using either approximate or exact nearest-neighbor search. Arguments: -- ``path``: The document path to the vector field (string or :class:`~django.db.models.expressions.Col`). +- ``path``: The document path to the vector field (string or + :class:`~django.db.models.expressions.Col`). - ``query_vector``: The input vector used for similarity comparison. - ``limit``: The maximum number of matching documents to return. -- ``num_candidates``: (Optional) The number of candidate documents considered during search. -- ``exact``: (Optional) Whether to enforce exact search instead of approximate (defaults to ``False``). -- ``filter``: (Optional) A filter expression to restrict the candidate documents. +- ``num_candidates``: (Optional) The number of candidate documents considered + during search. +- ``exact``: (Optional) Whether to enforce exact search instead of approximate + (defaults to ``False``). +- ``filter``: (Optional) A filter expression to restrict the candidate + documents. .. warning:: - ``SearchVector`` expressions cannot be combined using logical operators such as ``&``, ``|``, or ``~``. - Attempting to do so will raise an error. + ``SearchVector`` expressions cannot be combined using logical operators + such as ``&``, ``|``, or ``~``. Attempting to do so will raise an error. -``SearchVector`` is typically used on its own in the ``score`` annotation and cannot be nested or composed. +``SearchVector`` is typically used on its own in the ``score`` annotation and +cannot be nested or composed. ``SearchScoreOption`` ===================== -Expression used to control or mutate the relevance score in an Atlas Search expression. +Expression used to control or mutate the relevance score in an Atlas Search +expression. -This expression can be passed to most Atlas Search operators through the ``score`` argument -to customize how MongoDB calculates and applies scoring. +This expression can be passed to most Atlas Search operators through the +``score`` argument to customize how MongoDB calculates and applies scoring. It directly maps to the ``score`` option of the relevant Atlas Search operator. -.. _SearchScoreOption docs: https://www.mongodb.com/docs/atlas/atlas-search/scoring/ +`SearchScoreOption docs: `_ .. code-block:: pycon @@ -595,34 +700,42 @@ Accepted options depend on the underlying operator and may include: - ``function``: Uses a mathematical function to compute the score dynamically. - ``path``: Scores documents based on the value of a field. -The ``SearchScoreOption`` is a low-level utility used to build the ``score`` subdocument -and can be reused across multiple search expressions. +The ``SearchScoreOption`` is a low-level utility used to build the ``score`` +subdocument and can be reused across multiple search expressions. -It is typically passed as the ``score`` parameter to any search expression that supports it. +It is typically passed as the ``score`` parameter to any search expression that +supports it. The ``search`` lookup ====================== -Django lookup to enable Atlas Search full-text querying via the ``search`` lookup. +Django lookup to enable Atlas Search full-text querying via the ``search`` +lookup. -This lookup allows using the ``search`` lookup on Django ``CharField`` and ``TextField`` -to perform Atlas Search ``text`` queries seamlessly within Django ORM filters. +This lookup allows using the ``search`` lookup on Django ``CharField`` and +``TextField`` to perform Atlas Search ``text`` queries seamlessly within Django +ORM filters. -It internally creates a ``SearchText`` expression on the left-hand side and compares its -score with zero to filter matching documents. +It internally creates a ``SearchText`` expression on the left-hand side and +compares its score with zero to filter matching documents. .. code-block:: pycon >>> Article.objects.filter(headline__search="mongodb") - ]> + , + + ]> -The lookup is automatically registered on ``CharField`` and ``TextField``, enabling -expressions like ``fieldname__search='query'``. +The lookup is automatically registered on ``CharField`` and ``TextField``, +enabling expressions like ``fieldname__search='query'``. Under the hood: - The left-hand side of the lookup is wrapped into a ``SearchText`` expression. -- The lookup compiles to a MongoDB query that filters documents with a score greater or equal to zero. +- The lookup compiles to a MongoDB query that filters documents with a score + greater or equal to zero. -This allows for concise and idiomatic integration of Atlas Search within Django filters. +This allows for concise and idiomatic integration of Atlas Search within Django +filters. From d32b7dee151a9f779bcb61be73bc4727b89c00d5 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Sun, 27 Jul 2025 14:32:48 -0300 Subject: [PATCH 16/18] add available_apps to unit test --- tests/queries_/test_search.py | 79 +++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/tests/queries_/test_search.py b/tests/queries_/test_search.py index 478a62a17..396611926 100644 --- a/tests/queries_/test_search.py +++ b/tests/queries_/test_search.py @@ -59,7 +59,7 @@ def wrapper(self, fetch, *args, **kwargs): @skipUnlessDBFeature("supports_atlas_search") class SearchUtilsMixin(TransactionTestCase): - available_apps = [] + available_apps = ["queries_"] models_to_clean = [Article] delayedAssertCountEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertCountEqual) @@ -72,7 +72,7 @@ class SearchUtilsMixin(TransactionTestCase): def setUpClass(cls): super().setUpClass() # Register the cleanup to run after all tests in this class - cls.addClassCleanup(cls.drop_search_indexes_and_data) + cls.addClassCleanup(cls.drop_search_indexes) @staticmethod def _get_collection(model): @@ -85,12 +85,11 @@ def create_search_index(cls, model, index_name, definition, type="search"): collection.create_search_index(idx) @classmethod - def drop_search_indexes_and_data(cls): + def drop_search_indexes(cls): for model in cls.models_to_clean: collection = cls._get_collection(model) for search_indexes in collection.list_search_indexes(): collection.drop_search_index(search_indexes["name"]) - collection.delete_many({}) @skipUnlessDBFeature("supports_atlas_search") @@ -108,7 +107,10 @@ def setUpClass(cls): } }, ) - cls.article = Article.objects.create(headline="cross", number=1, body="body") + + def setUp(self): + super().setUp() + self.article = Article.objects.create(headline="cross", number=1, body="body") Article.objects.create(headline="other thing", number=2, body="body") def test_search_equals(self): @@ -191,7 +193,10 @@ def setUpClass(cls): } }, ) - cls.article = Article.objects.create( + + def setUp(self): + super().setUp() + self.article = Article.objects.create( headline="crossing and something", number=2, body="river", @@ -242,7 +247,10 @@ def setUpClass(cls): "exists_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "token"}}}}, ) - cls.article = Article.objects.create(headline="ignored", number=3, body="something") + + def setUp(self): + super().setUp() + self.article = Article.objects.create(headline="ignored", number=3, body="something") def test_search_exists(self): qs = Article.objects.annotate(score=SearchExists(path="body")) @@ -266,7 +274,10 @@ def setUpClass(cls): "in_headline_index", {"mappings": {"dynamic": False, "fields": {"headline": {"type": "token"}}}}, ) - cls.article = Article.objects.create(headline="cross", number=1, body="a") + + def setUp(self): + super().setUp() + self.article = Article.objects.create(headline="cross", number=1, body="a") Article.objects.create(headline="road", number=2, body="b") def test_search_in(self): @@ -293,7 +304,10 @@ def setUpClass(cls): "phrase_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, ) - cls.article = Article.objects.create( + + def setUp(self): + super().setUp() + self.article = Article.objects.create( headline="irrelevant", number=1, body="the quick brown fox" ) Article.objects.create(headline="cheetah", number=2, body="fastest animal") @@ -323,7 +337,10 @@ def setUpClass(cls): {"mappings": {"dynamic": False, "fields": {"number": {"type": "number"}}}}, ) Article.objects.create(headline="x", number=5, body="z") - cls.number20 = Article.objects.create(headline="y", number=20, body="z") + + def setUp(self): + super().setUp() + self.number20 = Article.objects.create(headline="y", number=20, body="z") def test_search_range(self): qs = Article.objects.annotate(score=SearchRange(path="number", gte=10, lt=30)) @@ -354,7 +371,10 @@ def setUpClass(cls): } }, ) - cls.article = Article.objects.create(headline="hello world", number=1, body="abc") + + def setUp(self): + super().setUp() + self.article = Article.objects.create(headline="hello world", number=1, body="abc") Article.objects.create(headline="hola mundo", number=2, body="abc") def test_search_regex(self): @@ -385,7 +405,10 @@ def setUpClass(cls): "text_body_index", {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, ) - cls.article = Article.objects.create( + + def setUp(self): + super().setUp() + self.article = Article.objects.create( headline="ignored", number=1, body="The lazy dog sleeps" ) Article.objects.create(headline="ignored", number=2, body="The sleepy bear") @@ -437,7 +460,10 @@ def setUpClass(cls): } }, ) - cls.article = Article.objects.create(headline="dark-knight", number=1, body="") + + def setUp(self): + super().setUp() + self.article = Article.objects.create(headline="dark-knight", number=1, body="") Article.objects.create(headline="batman", number=2, body="") def test_search_wildcard(self): @@ -469,7 +495,10 @@ def setUpClass(cls): } }, ) - cls.article = Article.objects.create( + + def setUp(self): + super().setUp() + self.article = Article.objects.create( headline="any", number=1, body="", location={"type": "Point", "coordinates": [40, 5]} ) Article.objects.create( @@ -512,7 +541,10 @@ def setUpClass(cls): "geowithin_location_index", {"mappings": {"dynamic": False, "fields": {"location": {"type": "geo"}}}}, ) - cls.article = Article.objects.create( + + def setUp(self): + super().setUp() + self.article = Article.objects.create( headline="geo", number=2, body="", location={"type": "Point", "coordinates": [40, 5]} ) Article.objects.create( @@ -615,25 +647,28 @@ def setUpClass(cls): } }, ) - cls.mars_mission = Article.objects.create( + + def setUp(self): + super().setUp() + self.mars_mission = Article.objects.create( number=1, headline="space exploration", body="NASA launches a new mission to Mars, aiming to study surface geology", ) - cls.exoplanet = Article.objects.create( + self.exoplanet = Article.objects.create( number=2, headline="space exploration", body="Astronomers discover exoplanets orbiting distant stars using Webb telescope", ) - cls.icy_moons = Article.objects.create( + self.icy_moons = Article.objects.create( number=3, headline="space exploration", body="ESA prepares a robotic expedition to explore the icy moons of Jupiter", ) - cls.comodities_drop = Article.objects.create( + self.comodities_drop = Article.objects.create( number=4, headline="astronomy news", body="Commodities dropped sharply due to inflation concerns", @@ -780,13 +815,15 @@ def setUpClass(cls): type="vectorSearch", ) - cls.mars = Article.objects.create( + def setUp(self): + super().setUp() + self.mars = Article.objects.create( headline="Mars landing", number=1, body="The rover has landed on Mars", plot_embedding=[0.1, 0.2, 0.3], ) - cls.cooking = Article.objects.create( + self.cooking = Article.objects.create( headline="Cooking tips", number=2, body="This article is about pasta", From 7cd43d61f15664ab6c5242010bb46aaaed54d6eb Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Sun, 27 Jul 2025 16:53:47 -0300 Subject: [PATCH 17/18] Simplify clean up call --- tests/queries_/test_search.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tests/queries_/test_search.py b/tests/queries_/test_search.py index 396611926..113e0ba88 100644 --- a/tests/queries_/test_search.py +++ b/tests/queries_/test_search.py @@ -60,7 +60,6 @@ def wrapper(self, fetch, *args, **kwargs): @skipUnlessDBFeature("supports_atlas_search") class SearchUtilsMixin(TransactionTestCase): available_apps = ["queries_"] - models_to_clean = [Article] delayedAssertCountEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertCountEqual) delayedAssertListEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertListEqual) @@ -68,12 +67,6 @@ class SearchUtilsMixin(TransactionTestCase): TransactionTestCase.assertQuerySetEqual ) - @classmethod - def setUpClass(cls): - super().setUpClass() - # Register the cleanup to run after all tests in this class - cls.addClassCleanup(cls.drop_search_indexes) - @staticmethod def _get_collection(model): return connection.database.get_collection(model._meta.db_table) @@ -84,12 +77,10 @@ def create_search_index(cls, model, index_name, definition, type="search"): idx = SearchIndexModel(definition=definition, name=index_name, type=type) collection.create_search_index(idx) - @classmethod - def drop_search_indexes(cls): - for model in cls.models_to_clean: - collection = cls._get_collection(model) - for search_indexes in collection.list_search_indexes(): - collection.drop_search_index(search_indexes["name"]) + def drop_index(): + collection.drop_search_index(index_name) + + cls.addClassCleanup(drop_index) @skipUnlessDBFeature("supports_atlas_search") From 0fdb066c3547f7e581cd13b48bab3bd1c2d5d623 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Sun, 27 Jul 2025 18:02:58 -0300 Subject: [PATCH 18/18] Add change log --- docs/source/releases/5.2.x.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/releases/5.2.x.rst b/docs/source/releases/5.2.x.rst index 6bddff118..e3f0a5e75 100644 --- a/docs/source/releases/5.2.x.rst +++ b/docs/source/releases/5.2.x.rst @@ -17,6 +17,12 @@ New features - Added :class:`~.fields.PolymorphicEmbeddedModelField` and :class:`~.fields.PolymorphicEmbeddedModelArrayField` for storing a model instance or list of model instances that may be of more than one model class. +- Added support for MongoDB Atlas Search expressions, including + ``SearchAutocomplete``, ``SearchEquals``, ``SearchVector``, and others. +- Added support for the ``search`` lookup on + :class:`~django.db.models.TextField` and + :class:`~django.db.models.CharField`. + Bug fixes ---------