googleapis
diff --git a/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 90 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 16 additions & 11 deletions b/‎bigframes/dataframe.py‎
Lines changed: 16 additions & 11 deletions
diff --git a/‎bigframes/series.py‎
Lines changed: 27 additions & 5 deletions b/‎bigframes/series.py‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 60 additions & 6 deletions b/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 60 additions & 6 deletions
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import functools
 import typing
 
 import pandas as pd
@@ -292,6 +293,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.EndsWithOp)
+def _(op: ops.EndsWithOp, expr: TypedExpr) -> sge.Expression:
+    if not op.pat:
+        return sge.false()
+
+    def to_endswith(pat: str) -> sge.Expression:
+        return sge.func("ENDS_WITH", expr.expr, sge.convert(pat))
+
+    conditions = [to_endswith(pat) for pat in op.pat]
+    return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions)
+
+
 @UNARY_OP_REGISTRATION.register(ops.exp_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Case(
@@ -344,6 +357,27 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.func("ST_BOUNDARY", expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.GeoStBufferOp)
+def _(op: ops.GeoStBufferOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func(
+        "ST_BUFFER",
+        expr.expr,
+        sge.convert(op.buffer_radius),
+        sge.convert(op.num_seg_quarter_circle),
+        sge.convert(op.use_spheroid),
+    )
+
+
+@UNARY_OP_REGISTRATION.register(ops.geo_st_centroid_op)
+def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func("ST_CENTROID", expr.expr)
+
+
+@UNARY_OP_REGISTRATION.register(ops.geo_st_convexhull_op)
+def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func("ST_CONVEXHULL", expr.expr)
+
+
 @UNARY_OP_REGISTRATION.register(ops.geo_st_geogfromtext_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr)
@@ -516,6 +550,17 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Lower(this=expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.MapOp)
+def _(op: ops.MapOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Case(
+        this=expr.expr,
+        ifs=[
+            sge.If(this=sge.convert(key), true=sge.convert(value))
+            for key, value in op.mappings
+        ],
+    )
+
+
 @UNARY_OP_REGISTRATION.register(ops.minute_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr)
@@ -601,6 +646,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     )
 
 
+@UNARY_OP_REGISTRATION.register(ops.StartsWithOp)
+def _(op: ops.StartsWithOp, expr: TypedExpr) -> sge.Expression:
+    if not op.pat:
+        return sge.false()
+
+    def to_startswith(pat: str) -> sge.Expression:
+        return sge.func("STARTS_WITH", expr.expr, sge.convert(pat))
+
+    conditions = [to_startswith(pat) for pat in op.pat]
+    return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions)
+
+
 @UNARY_OP_REGISTRATION.register(ops.StrStripOp)
 def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression:
     return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
@@ -624,6 +681,11 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     )
 
 
+@UNARY_OP_REGISTRATION.register(ops.StringSplitOp)
+def _(op: ops.StringSplitOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Split(this=expr.expr, expression=sge.convert(op.pat))
+
+
 @UNARY_OP_REGISTRATION.register(ops.StrGetOp)
 def _(op: ops.StrGetOp, expr: TypedExpr) -> sge.Expression:
     return sge.Substring(
@@ -776,3 +838,31 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
 @UNARY_OP_REGISTRATION.register(ops.year_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr)
+
+
+@UNARY_OP_REGISTRATION.register(ops.ZfillOp)
+def _(op: ops.ZfillOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Case(
+        ifs=[
+            sge.If(
+                this=sge.EQ(
+                    this=sge.Substring(
+                        this=expr.expr, start=sge.convert(1), length=sge.convert(1)
+                    ),
+                    expression=sge.convert("-"),
+                ),
+                true=sge.Concat(
+                    expressions=[
+                        sge.convert("-"),
+                        sge.func(
+                            "LPAD",
+                            sge.Substring(this=expr.expr, start=sge.convert(1)),
+                            sge.convert(op.width - 1),
+                            sge.convert("0"),
+                        ),
+                    ]
+                ),
+            )
+        ],
+        default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")),
+    )
@@ -2828,6 +2828,19 @@ def itertuples(
             for item in df.itertuples(index=index, name=name):
                 yield item
 
+    def _apply_callable(self, condition):
+        """Executes the possible callable condition as needed."""
+        if callable(condition):
+            # When it's a bigframes function.
+            if hasattr(condition, "bigframes_bigquery_function"):
+                return self.apply(condition, axis=1)
+
+            # When it's a plain Python function.
+            return condition(self)
+
+        # When it's not a callable.
+        return condition
+
     def where(self, cond, other=None):
         if isinstance(other, bigframes.series.Series):
             raise ValueError("Seires is not a supported replacement type!")
@@ -2839,16 +2852,8 @@ def where(self, cond, other=None):
 
         # Execute it with the DataFrame when cond or/and other is callable.
         # It can be either a plain python function or remote/managed function.
-        if callable(cond):
-            if hasattr(cond, "bigframes_bigquery_function"):
-                cond = self.apply(cond, axis=1)
-            else:
-                cond = cond(self)
-        if callable(other):
-            if hasattr(other, "bigframes_bigquery_function"):
-                other = self.apply(other, axis=1)
-            else:
-                other = other(self)
+        cond = self._apply_callable(cond)
+        other = self._apply_callable(other)
 
         aligned_block, (_, _) = self._block.join(cond._block, how="left")
         # No left join is needed when 'other' is None or constant.
@@ -2899,7 +2904,7 @@ def where(self, cond, other=None):
         return result
 
     def mask(self, cond, other=None):
-        return self.where(~cond, other=other)
+        return self.where(~self._apply_callable(cond), other=other)
 
     def dropna(
         self,
 
@@ -1904,9 +1904,22 @@ def _groupby_values(
         )
 
     def apply(
-        self, func, by_row: typing.Union[typing.Literal["compat"], bool] = "compat"
+        self,
+        func,
+        by_row: typing.Union[typing.Literal["compat"], bool] = "compat",
+        *,
+        args: typing.Tuple = (),
     ) -> Series:
-        # TODO(shobs, b/274645634): Support convert_dtype, args, **kwargs
+        # Note: This signature differs from pandas.Series.apply. Specifically,
+        # `args` is keyword-only and `by_row` is a custom parameter here. Full
+        # alignment would involve breaking changes. However, given that by_row
+        # is not frequently used, we defer any such changes until there is a
+        # clear need based on user feedback.
+        #
+        # See pandas docs for reference:
+        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.apply.html
+
+        # TODO(shobs, b/274645634): Support convert_dtype, **kwargs
         # is actually a ternary op
 
         if by_row not in ["compat", False]:
@@ -1950,10 +1963,19 @@ def apply(
                 raise
 
         # We are working with bigquery function at this point
-        result_series = self._apply_unary_op(
-            ops.RemoteFunctionOp(function_def=func.udf_def, apply_on_null=True)
-        )
+        if args:
+            result_series = self._apply_nary_op(
+                ops.NaryRemoteFunctionOp(function_def=func.udf_def), args
+            )
+            # TODO(jialuo): Investigate why `_apply_nary_op` drops the series
+            # `name`. Manually reassigning it here as a temporary fix.
+            result_series.name = self.name
+        else:
+            result_series = self._apply_unary_op(
+                ops.RemoteFunctionOp(function_def=func.udf_def, apply_on_null=True)
+            )
         result_series = func._post_process_series(result_series)
+
         return result_series
 
     def combine(
 
@@ -965,7 +965,7 @@ def float_parser(row):
         )
 
 
-def test_managed_function_df_where(session, dataset_id, scalars_dfs):
+def test_managed_function_df_where_mask(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -987,15 +987,15 @@ def is_sum_positive(a, b):
         pd_int64_df = scalars_pandas_df[int64_cols]
         pd_int64_df_filtered = pd_int64_df.dropna()
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(is_sum_positive_mf).to_pandas()
         # Pandas doesn't support such case, use following as workaround.
         pd_result = pd_int64_df_filtered.where(pd_int64_df_filtered.sum(axis=1) > 0)
 
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
-        # Make sure the read_gbq_function path works for this function.
+        # Make sure the read_gbq_function path works for dataframe.where method.
         is_sum_positive_ref = session.read_gbq_function(
             function_name=is_sum_positive_mf.bigframes_bigquery_function
         )
@@ -1012,14 +1012,27 @@ def is_sum_positive(a, b):
             bf_result_gbq, pd_result_gbq, check_dtype=False
         )
 
+        # Test callable condition in dataframe.mask method.
+        bf_result_gbq = bf_int64_df_filtered.mask(
+            is_sum_positive_ref, -bf_int64_df_filtered
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.mask(
+            pd_int64_df_filtered.sum(axis=1) > 0, -pd_int64_df_filtered
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(
             is_sum_positive_mf, session.bqclient, ignore_failures=False
         )
 
 
-def test_managed_function_df_where_series(session, dataset_id, scalars_dfs):
+def test_managed_function_df_where_mask_series(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -1041,14 +1054,14 @@ def is_sum_positive_series(s):
         pd_int64_df = scalars_pandas_df[int64_cols]
         pd_int64_df_filtered = pd_int64_df.dropna()
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(is_sum_positive_series).to_pandas()
         pd_result = pd_int64_df_filtered.where(is_sum_positive_series)
 
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
-        # Make sure the read_gbq_function path works for this function.
+        # Make sure the read_gbq_function path works for dataframe.where method.
         is_sum_positive_series_ref = session.read_gbq_function(
             function_name=is_sum_positive_series_mf.bigframes_bigquery_function,
             is_row_processor=True,
@@ -1070,6 +1083,19 @@ def func_for_other(x):
             bf_result_gbq, pd_result_gbq, check_dtype=False
         )
 
+        # Test callable condition in dataframe.mask method.
+        bf_result_gbq = bf_int64_df_filtered.mask(
+            is_sum_positive_series_ref, func_for_other
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.mask(
+            is_sum_positive_series, func_for_other
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(
@@ -1121,3 +1147,31 @@ def _is_positive(s):
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(is_positive_mf, session.bqclient, ignore_failures=False)
+
+
+def test_managed_function_series_apply_args(session, dataset_id, scalars_dfs):
+    try:
+
+        with pytest.warns(bfe.PreviewWarning, match="udf is in preview."):
+
+            @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
+            def foo_list(x: int, y0: float, y1: bytes, y2: bool) -> list[str]:
+                return [str(x), str(y0), str(y1), str(y2)]
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_result = (
+            scalars_df["int64_too"]
+            .apply(foo_list, args=(12.34, b"hello world", False))
+            .to_pandas()
+        )
+        pd_result = scalars_pandas_df["int64_too"].apply(
+            foo_list, args=(12.34, b"hello world", False)
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(foo_list, session.bqclient, ignore_failures=False)