googleapis
diff --git a/‎bigframes/bigquery/_operations/ai.py‎
Lines changed: 74 additions & 23 deletions b/‎bigframes/bigquery/_operations/ai.py‎
Lines changed: 74 additions & 23 deletions
diff --git a/‎bigframes/core/array_value.py‎
Lines changed: 0 additions & 8 deletions b/‎bigframes/core/array_value.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎bigframes/core/backports.py‎
Lines changed: 33 additions & 0 deletions b/‎bigframes/core/backports.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 12 additions & 0 deletions b/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/ai_ops.py‎
Lines changed: 19 additions & 2 deletions b/‎bigframes/core/compile/sqlglot/expressions/ai_ops.py‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎bigframes/core/indexes/base.py‎
Lines changed: 6 additions & 2 deletions b/‎bigframes/core/indexes/base.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 3 additions & 1 deletion b/‎bigframes/dataframe.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/dtypes.py‎
Lines changed: 46 additions & 4 deletions b/‎bigframes/dtypes.py‎
Lines changed: 46 additions & 4 deletions
diff --git a/‎bigframes/exceptions.py‎
Lines changed: 4 additions & 0 deletions b/‎bigframes/exceptions.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bigframes/operations/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎bigframes/operations/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -348,20 +348,20 @@ def if_(
     provides optimization such that not all rows are evaluated with the LLM.
 
     **Examples:**
-    >>> import bigframes.pandas as bpd
-    >>> import bigframes.bigquery as bbq
-    >>> bpd.options.display.progress_bar = None
-    >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"])
-    >>> bbq.ai.if_((us_state, " has a city called Springfield"))
-    0     True
-    1     True
-    2    False
-    dtype: boolean
-
-    >>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))]
-    0    Massachusetts
-    1         Illinois
-    dtype: string
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+        >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"])
+        >>> bbq.ai.if_((us_state, " has a city called Springfield"))
+        0     True
+        1     True
+        2    False
+        dtype: boolean
+
+        >>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))]
+        0    Massachusetts
+        1         Illinois
+        dtype: string
 
     Args:
         prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
@@ -386,6 +386,56 @@ def if_(
     return series_list[0]._apply_nary_op(operator, series_list[1:])
 
 
+@log_adapter.method_logger(custom_base_name="bigquery_ai")
+def classify(
+    input: PROMPT_TYPE,
+    categories: tuple[str, ...] | list[str],
+    *,
+    connection_id: str | None = None,
+) -> series.Series:
+    """
+    Classifies a given input into one of the specified categories. It will always return one of the provided categories best fit the prompt input.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+        >>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']})
+        >>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish'])
+        >>> df
+          creature    type
+        0      Cat  Mammal
+        1   Salmon    Fish
+        <BLANKLINE>
+        [2 rows x 2 columns]
+
+    Args:
+        input (Series | List[str|Series] | Tuple[str|Series, ...]):
+            A mixture of Series and string literals that specifies the input to send to the model. The Series can be BigFrames Series
+            or pandas Series.
+        categories (tuple[str, ...] | list[str]):
+            Categories to classify the input into.
+        connection_id (str, optional):
+            Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
+            If not provided, the connection from the current session will be used.
+
+    Returns:
+        bigframes.series.Series: A new series of strings.
+    """
+
+    prompt_context, series_list = _separate_context_and_series(input)
+    assert len(series_list) > 0
+
+    operator = ai_ops.AIClassify(
+        prompt_context=tuple(prompt_context),
+        categories=tuple(categories),
+        connection_id=_resolve_connection_id(series_list[0], connection_id),
+    )
+
+    return series_list[0]._apply_nary_op(operator, series_list[1:])
+
+
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def score(
     prompt: PROMPT_TYPE,
@@ -398,15 +448,16 @@ def score(
     rubric with examples in the prompt.
 
     **Examples:**
-    >>> import bigframes.pandas as bpd
-    >>> import bigframes.bigquery as bbq
-    >>> bpd.options.display.progress_bar = None
-    >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"])
-    >>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP
-    0    2.0
-    1    1.0
-    2    3.0
-    dtype: Float64
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+        >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"])
+        >>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP
+        0    2.0
+        1    1.0
+        2    3.0
+        dtype: Float64
 
     Args:
         prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
 
@@ -18,7 +18,6 @@
 import functools
 import typing
 from typing import Iterable, List, Mapping, Optional, Sequence, Tuple
-import warnings
 
 import google.cloud.bigquery
 import pandas
@@ -37,7 +36,6 @@
 import bigframes.core.tree_properties
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
-import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
 
@@ -101,12 +99,6 @@ def from_table(
     ):
         if offsets_col and primary_key:
             raise ValueError("must set at most one of 'offests', 'primary_key'")
-        if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
-            msg = bfe.format_message(
-                "JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
-                "is a preview feature and subject to change."
-            )
-            warnings.warn(msg, bfe.PreviewWarning)
         # define data source only for needed columns, this makes row-hashing cheaper
         table_def = nodes.GbqTable.from_table(table, columns=schema.names)
 
 
@@ -0,0 +1,33 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helpers for working across versions of different depenencies."""
+
+from typing import List
+
+import pyarrow
+
+
+def pyarrow_struct_type_fields(struct_type: pyarrow.StructType) -> List[pyarrow.Field]:
+    """StructType.fields was added in pyarrow 18.
+
+    See: https://arrow.apache.org/docs/18.0/python/generated/pyarrow.StructType.html
+    """
+
+    if hasattr(struct_type, "fields"):
+        return struct_type.fields
+
+    return [
+        struct_type.field(field_index) for field_index in range(struct_type.num_fields)
+    ]
@@ -2039,6 +2039,18 @@ def ai_if(*values: ibis_types.Value, op: ops.AIIf) -> ibis_types.StructValue:
     ).to_expr()
 
 
+@scalar_op_compiler.register_nary_op(ops.AIClassify, pass_op=True)
+def ai_classify(
+    *values: ibis_types.Value, op: ops.AIClassify
+) -> ibis_types.StructValue:
+
+    return ai_ops.AIClassify(
+        _construct_prompt(values, op.prompt_context),  # type: ignore
+        op.categories,  # type: ignore
+        op.connection_id,  # type: ignore
+    ).to_expr()
+
+
 @scalar_op_compiler.register_nary_op(ops.AIScore, pass_op=True)
 def ai_score(*values: ibis_types.Value, op: ops.AIScore) -> ibis_types.StructValue:
 
 
@@ -61,6 +61,21 @@ def _(*exprs: TypedExpr, op: ops.AIIf) -> sge.Expression:
     return sge.func("AI.IF", *args)
 
 
+@register_nary_op(ops.AIClassify, pass_op=True)
+def _(*exprs: TypedExpr, op: ops.AIClassify) -> sge.Expression:
+    category_literals = [sge.Literal.string(cat) for cat in op.categories]
+    categories_arg = sge.Kwarg(
+        this="categories", expression=sge.array(*category_literals)
+    )
+
+    args = [
+        _construct_prompt(exprs, op.prompt_context, param_name="input"),
+        categories_arg,
+    ] + _construct_named_args(op)
+
+    return sge.func("AI.CLASSIFY", *args)
+
+
 @register_nary_op(ops.AIScore, pass_op=True)
 def _(*exprs: TypedExpr, op: ops.AIScore) -> sge.Expression:
     args = [_construct_prompt(exprs, op.prompt_context)] + _construct_named_args(op)
@@ -69,7 +84,9 @@ def _(*exprs: TypedExpr, op: ops.AIScore) -> sge.Expression:
 
 
 def _construct_prompt(
-    exprs: tuple[TypedExpr, ...], prompt_context: tuple[str | None, ...]
+    exprs: tuple[TypedExpr, ...],
+    prompt_context: tuple[str | None, ...],
+    param_name: str = "prompt",
 ) -> sge.Kwarg:
     prompt: list[str | sge.Expression] = []
     column_ref_idx = 0
@@ -80,7 +97,7 @@ def _construct_prompt(
         else:
             prompt.append(sge.Literal.string(elem))
 
-    return sge.Kwarg(this="prompt", expression=sge.Tuple(expressions=prompt))
+    return sge.Kwarg(this=param_name, expression=sge.Tuple(expressions=prompt))
 
 
 def _construct_named_args(op: ops.NaryOp) -> list[sge.Kwarg]:
 
@@ -171,12 +171,16 @@ def shape(self) -> typing.Tuple[int]:
 
     @property
     def dtype(self):
-        return self._block.index.dtypes[0] if self.nlevels == 1 else np.dtype("O")
+        dtype = self._block.index.dtypes[0] if self.nlevels == 1 else np.dtype("O")
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype([dtype])
+        return dtype
 
     @property
     def dtypes(self) -> pandas.Series:
+        dtypes = self._block.index.dtypes
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype(dtypes)
         return pandas.Series(
-            data=self._block.index.dtypes,
+            data=dtypes,
             index=typing.cast(typing.Tuple, self._block.index.names),
         )
 
 
@@ -321,7 +321,9 @@ def at(self) -> indexers.AtDataFrameIndexer:
 
     @property
     def dtypes(self) -> pandas.Series:
-        return pandas.Series(data=self._block.dtypes, index=self._block.column_labels)
+        dtypes = self._block.dtypes
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype(dtypes)
+        return pandas.Series(data=dtypes, index=self._block.column_labels)
 
     @property
     def columns(self) -> pandas.Index:
 
@@ -20,6 +20,7 @@
 import textwrap
 import typing
 from typing import Any, Dict, List, Literal, Sequence, Union
+import warnings
 
 import bigframes_vendored.constants as constants
 import db_dtypes  # type: ignore
@@ -30,6 +31,9 @@
 import pyarrow as pa
 import shapely.geometry  # type: ignore
 
+import bigframes.core.backports
+import bigframes.exceptions
+
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
     pd.BooleanDtype,
@@ -62,7 +66,8 @@
 # No arrow equivalent
 GEO_DTYPE = gpd.array.GeometryDtype()
 # JSON
-# TODO: switch to pyarrow.json_(pyarrow.string()) when available.
+# TODO(https://github.com/pandas-dev/pandas/issues/60958): switch to
+# pyarrow.json_(pyarrow.string()) when pandas 3+ and pyarrow 18+ is installed.
 JSON_ARROW_TYPE = db_dtypes.JSONArrowType()
 JSON_DTYPE = pd.ArrowDtype(JSON_ARROW_TYPE)
 OBJ_REF_DTYPE = pd.ArrowDtype(
@@ -368,8 +373,7 @@ def get_struct_fields(type_: ExpressionType) -> dict[str, Dtype]:
     assert isinstance(type_.pyarrow_dtype, pa.StructType)
     struct_type = type_.pyarrow_dtype
     result: dict[str, Dtype] = {}
-    for field_no in range(struct_type.num_fields):
-        field = struct_type.field(field_no)
+    for field in bigframes.core.backports.pyarrow_struct_type_fields(struct_type):
         result[field.name] = arrow_dtype_to_bigframes_dtype(field.type)
     return result
 
@@ -547,7 +551,8 @@ def arrow_type_to_literal(
         return [arrow_type_to_literal(arrow_type.value_type)]
     if pa.types.is_struct(arrow_type):
         return {
-            field.name: arrow_type_to_literal(field.type) for field in arrow_type.fields
+            field.name: arrow_type_to_literal(field.type)
+            for field in bigframes.core.backports.pyarrow_struct_type_fields(arrow_type)
         }
     if pa.types.is_string(arrow_type):
         return "string"
@@ -915,3 +920,40 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
 
 
 TIMEDELTA_DESCRIPTION_TAG = "#microseconds"
+
+
+def contains_db_dtypes_json_arrow_type(type_):
+    if isinstance(type_, db_dtypes.JSONArrowType):
+        return True
+
+    if isinstance(type_, pa.ListType):
+        return contains_db_dtypes_json_arrow_type(type_.value_type)
+
+    if isinstance(type_, pa.StructType):
+        return any(
+            contains_db_dtypes_json_arrow_type(field.type)
+            for field in bigframes.core.backports.pyarrow_struct_type_fields(type_)
+        )
+    return False
+
+
+def contains_db_dtypes_json_dtype(dtype):
+    if not isinstance(dtype, pd.ArrowDtype):
+        return False
+
+    return contains_db_dtypes_json_arrow_type(dtype.pyarrow_dtype)
+
+
+def warn_on_db_dtypes_json_dtype(dtypes):
+    """Warn that the JSON dtype is changing.
+
+    Note: only call this function if the user is explicitly checking the
+    dtypes.
+    """
+    if any(contains_db_dtypes_json_dtype(dtype) for dtype in dtypes):
+        msg = bigframes.exceptions.format_message(
+            "JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_()) "
+            "instead of using `db_dtypes` in the future when available in pandas "
+            "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow."
+        )
+        warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
@@ -111,6 +111,10 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
     """Remote Function and Managed UDF with axis=1 preview."""
 
 
+class JSONDtypeWarning(PreviewWarning):
+    """JSON dtype will be pd.ArrowDtype(pa.json_()) in the future."""
+
+
 class FunctionConflictTypeHintWarning(UserWarning):
     """Conflicting type hints in a BigFrames function."""
 
 
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 from bigframes.operations.ai_ops import (
+    AIClassify,
     AIGenerate,
     AIGenerateBool,
     AIGenerateDouble,
@@ -419,6 +420,7 @@
     "geo_y_op",
     "GeoStDistanceOp",
     # AI ops
+    "AIClassify",
     "AIGenerate",
     "AIGenerateBool",
     "AIGenerateDouble",