From 0ed173fbb602447b9fabcf105dd211a518963ab9 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Thu, 3 Jul 2025 10:21:28 -0400 Subject: [PATCH 01/12] feat: add support for AddToCell in Data Client --- google/cloud/bigtable/data/mutations.py | 67 +++++++++++++ tests/unit/data/test_mutations.py | 128 ++++++++++++++++++++++++ 2 files changed, 195 insertions(+) diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index 2f4e441ed..cf23d0d0e 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -123,6 +123,14 @@ def _from_dict(cls, input_dict: dict[str, Any]) -> Mutation: instance = DeleteAllFromFamily(details["family_name"]) elif "delete_from_row" in input_dict: instance = DeleteAllFromRow() + elif "add_to_cell" in input_dict: + details = input_dict["add_to_cell"] + instance = AddToCell( + details["family_name"], + details["column_qualifier"]["raw_value"], + details["input"]["int_value"], + details["timestamp"]["raw_timestamp_micros"], + ) except KeyError as e: raise ValueError("Invalid mutation dictionary") from e if instance is None: @@ -276,6 +284,65 @@ def _to_dict(self) -> dict[str, Any]: } +@dataclass +class AddToCell(Mutation): + """ + Mutation to add a value to an aggregate cell. + + + Args: + family: The name of the column family to which the cell belongs. + qualifier: The column qualifier of the cell. + value: The value to be accumulated into the cell. + timestamp_micros: The timestamp of the cell. + + Raises: + TypeError: If `qualifier` is not `bytes` or `str`. + TypeError: If `value` is not `int`. + ValueError: If `timestamp_micros` is less than `_SERVER_SIDE_TIMESTAMP`. + """ + + def __init__( + self, + family: str, + qualifier: bytes | str, + value: int, + timestamp_micros: int | None = None, + ): + qualifier = qualifier.encode() if isinstance(qualifier, str) else qualifier + if not isinstance(qualifier, bytes): + raise TypeError("qualifier must be bytes or str") + if not isinstance(value, int): + raise TypeError("value must be int") + if abs(value) > _MAX_INCREMENT_VALUE: + raise ValueError( + "int values must be between -2**63 and 2**63 (64-bit signed int)" + ) + + if timestamp_micros is None: + # use current timestamp, with milisecond precision + timestamp_micros = time.time_ns() // 1000 + timestamp_micros = timestamp_micros - (timestamp_micros % 1000) + + self.family = family + self.qualifier = qualifier + self.value = value + self.timestamp_micros = timestamp_micros + + def _to_dict(self) -> dict[str, Any]: + return { + "add_to_cell": { + "family_name": self.family, + "column_qualifier": {"raw_value": self.qualifier}, + "timestamp": {"raw_timestamp_micros": self.timestamp_micros}, + "input": {"int_value": self.value}, + } + } + + def is_idempotent(self) -> bool: + return False + + class RowMutationEntry: """ A single entry in a `MutateRows` request. diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index 485c86e42..38a095154 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -706,3 +706,131 @@ def test__from_dict(self): assert len(instance.mutations) == 1 assert isinstance(instance.mutations[0], mutations.DeleteAllFromFamily) assert instance.mutations[0].family_to_delete == "test_family" + + +class TestAddToCell: + def _target_class(self): + from google.cloud.bigtable.data.mutations import AddToCell + + return AddToCell + + def _make_one(self, *args, **kwargs): + return self._target_class()(*args, **kwargs) + + @pytest.mark.parametrize("input_val", [2**64, -(2**64)]) + def test_ctor_large_int(self, input_val): + with pytest.raises(ValueError) as e: + self._make_one(family="f", qualifier=b"b", value=input_val) + assert "int values must be between" in str(e.value) + + @pytest.mark.parametrize("input_val", ["", "a", "abc", "hello world!"]) + def test_ctor_str_value(self, input_val): + with pytest.raises(ValueError) as e: + self._make_one(family="f", qualifier=b"b", value=input_val) + assert "value must be int" in str(e.value) + + def test_ctor(self): + """Ensure constructor sets expected values""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = 1234 + expected_timestamp = 1234567890 + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.value == expected_value + assert instance.timestamp_micros == expected_timestamp + + def test_ctor_negative_timestamp(self): + """Only positive or -1 timestamps are valid""" + with pytest.raises(ValueError) as e: + self._make_one("test-family", b"test-qualifier", b"test-value", -2) + assert ( + "timestamp_micros must be positive (or -1 for server-side timestamp)" + in str(e.value) + ) + + @pytest.mark.parametrize( + "timestamp_ns,expected_timestamp_micros", + [ + (0, 0), + (1, 0), + (123, 0), + (999, 0), + (999_999, 0), + (1_000_000, 1000), + (1_234_567, 1000), + (1_999_999, 1000), + (2_000_000, 2000), + (1_234_567_890_123, 1_234_567_000), + ], + ) + def test_ctor_no_timestamp(self, timestamp_ns, expected_timestamp_micros): + """If no timestamp is given, should use current time with millisecond precision""" + with mock.patch("time.time_ns", return_value=timestamp_ns): + instance = self._make_one("test-family", b"test-qualifier", 1234) + assert instance.timestamp_micros == expected_timestamp_micros + + def test__to_dict(self): + """ensure dict representation is as expected""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = 1234 + expected_timestamp = 123456789 + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["add_to_cell"] + got_inner_dict = got_dict["add_to_cell"] + assert got_inner_dict["family_name"] == expected_family + assert got_inner_dict["column_qualifier"]["raw_value"] == expected_qualifier + assert got_inner_dict["timestamp_micros"]["raw_timestamp_micros"] == expected_timestamp + assert got_inner_dict["value"]["int_value"] == expected_value + assert len(got_inner_dict.keys()) == 4 + + def test__to_pb(self): + """ensure proto representation is as expected""" + import google.cloud.bigtable_v2.types.data as data_pb + + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = 1234 + expected_timestamp = 123456789 + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) + got_pb = instance._to_pb() + assert isinstance(got_pb, data_pb.Mutation) + assert got_pb.set_cell.family_name == expected_family + assert got_pb.set_cell.column_qualifier.raw_value == expected_qualifier + assert got_pb.set_cell.timestamp_micros.raw_timestamp_micros == expected_timestamp + assert got_pb.set_cell.value.int_value == expected_value + + @pytest.mark.parametrize( + "timestamp", + [ + (1234567890), + (1), + (0), + (-1), + (None), + ], + ) + def test_is_idempotent(self, timestamp, expected_value): + """is_idempotent is not based on whether an explicit timestamp is set""" + instance = self._make_one( + "test-family", b"test-qualifier", 1234, timestamp + ) + assert not instance.is_idempotent() + + def test___str__(self): + """Str representation of mutations should be to_dict""" + instance = self._make_one( + "test-family", b"test-qualifier", 1234, 1234567890 + ) + str_value = instance.__str__() + dict_value = instance._to_dict() + assert str_value == str(dict_value) From c5ee6762815f175321dc2fb180bf4268849ad89d Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Thu, 3 Jul 2025 11:15:54 -0400 Subject: [PATCH 02/12] fix? --- google/cloud/bigtable/data/mutations.py | 15 +++++++----- tests/unit/data/test_mutations.py | 31 ++++++++++--------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index cf23d0d0e..14633973f 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -307,7 +307,7 @@ def __init__( family: str, qualifier: bytes | str, value: int, - timestamp_micros: int | None = None, + timestamp: int | None = None, ): qualifier = qualifier.encode() if isinstance(qualifier, str) else qualifier if not isinstance(qualifier, bytes): @@ -319,22 +319,25 @@ def __init__( "int values must be between -2**63 and 2**63 (64-bit signed int)" ) - if timestamp_micros is None: + if timestamp is None: # use current timestamp, with milisecond precision - timestamp_micros = time.time_ns() // 1000 - timestamp_micros = timestamp_micros - (timestamp_micros % 1000) + timestamp = time.time_ns() // 1000 + timestamp = timestamp - (timestamp % 1000) + + if timestamp < 0: + raise ValueError("timestamp must be positive") self.family = family self.qualifier = qualifier self.value = value - self.timestamp_micros = timestamp_micros + self.timestamp = timestamp def _to_dict(self) -> dict[str, Any]: return { "add_to_cell": { "family_name": self.family, "column_qualifier": {"raw_value": self.qualifier}, - "timestamp": {"raw_timestamp_micros": self.timestamp_micros}, + "timestamp": {"raw_timestamp_micros": self.timestamp}, "input": {"int_value": self.value}, } } diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index 38a095154..6b3227cfa 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -725,7 +725,7 @@ def test_ctor_large_int(self, input_val): @pytest.mark.parametrize("input_val", ["", "a", "abc", "hello world!"]) def test_ctor_str_value(self, input_val): - with pytest.raises(ValueError) as e: + with pytest.raises(TypeError) as e: self._make_one(family="f", qualifier=b"b", value=input_val) assert "value must be int" in str(e.value) @@ -744,13 +744,10 @@ def test_ctor(self): assert instance.timestamp_micros == expected_timestamp def test_ctor_negative_timestamp(self): - """Only positive or -1 timestamps are valid""" + """Only positive timestamps are valid""" with pytest.raises(ValueError) as e: - self._make_one("test-family", b"test-qualifier", b"test-value", -2) - assert ( - "timestamp_micros must be positive (or -1 for server-side timestamp)" - in str(e.value) - ) + self._make_one("test-family", b"test-qualifier", 1234, -2) + assert "timestamp must be positive" in str(e.value) @pytest.mark.parametrize( "timestamp_ns,expected_timestamp_micros", @@ -787,8 +784,8 @@ def test__to_dict(self): got_inner_dict = got_dict["add_to_cell"] assert got_inner_dict["family_name"] == expected_family assert got_inner_dict["column_qualifier"]["raw_value"] == expected_qualifier - assert got_inner_dict["timestamp_micros"]["raw_timestamp_micros"] == expected_timestamp - assert got_inner_dict["value"]["int_value"] == expected_value + assert got_inner_dict["timestamp"]["raw_timestamp_micros"] == expected_timestamp + assert got_inner_dict["input"]["int_value"] == expected_value assert len(got_inner_dict.keys()) == 4 def test__to_pb(self): @@ -806,8 +803,8 @@ def test__to_pb(self): assert isinstance(got_pb, data_pb.Mutation) assert got_pb.set_cell.family_name == expected_family assert got_pb.set_cell.column_qualifier.raw_value == expected_qualifier - assert got_pb.set_cell.timestamp_micros.raw_timestamp_micros == expected_timestamp - assert got_pb.set_cell.value.int_value == expected_value + assert got_pb.set_cell.timestamp.raw_timestamp_micros == expected_timestamp + assert got_pb.set_cell.input.int_value == expected_value @pytest.mark.parametrize( "timestamp", @@ -819,18 +816,14 @@ def test__to_pb(self): (None), ], ) - def test_is_idempotent(self, timestamp, expected_value): - """is_idempotent is not based on whether an explicit timestamp is set""" - instance = self._make_one( - "test-family", b"test-qualifier", 1234, timestamp - ) + def test_is_idempotent(self, timestamp): + """is_idempotent is not based on the timestamp""" + instance = self._make_one("test-family", b"test-qualifier", 1234, timestamp) assert not instance.is_idempotent() def test___str__(self): """Str representation of mutations should be to_dict""" - instance = self._make_one( - "test-family", b"test-qualifier", 1234, 1234567890 - ) + instance = self._make_one("test-family", b"test-qualifier", 1234, 1234567890) str_value = instance.__str__() dict_value = instance._to_dict() assert str_value == str(dict_value) From 2691cc54a020f4cab9647c4ac1bef133712e955b Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Thu, 10 Jul 2025 11:29:19 -0400 Subject: [PATCH 03/12] fix tests --- tests/unit/data/test_mutations.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index 6b3227cfa..fb2ef15c7 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -741,7 +741,7 @@ def test_ctor(self): assert instance.family == expected_family assert instance.qualifier == expected_qualifier assert instance.value == expected_value - assert instance.timestamp_micros == expected_timestamp + assert instance.timestamp == expected_timestamp def test_ctor_negative_timestamp(self): """Only positive timestamps are valid""" @@ -768,7 +768,7 @@ def test_ctor_no_timestamp(self, timestamp_ns, expected_timestamp_micros): """If no timestamp is given, should use current time with millisecond precision""" with mock.patch("time.time_ns", return_value=timestamp_ns): instance = self._make_one("test-family", b"test-qualifier", 1234) - assert instance.timestamp_micros == expected_timestamp_micros + assert instance.timestamp == expected_timestamp_micros def test__to_dict(self): """ensure dict representation is as expected""" @@ -801,10 +801,10 @@ def test__to_pb(self): ) got_pb = instance._to_pb() assert isinstance(got_pb, data_pb.Mutation) - assert got_pb.set_cell.family_name == expected_family - assert got_pb.set_cell.column_qualifier.raw_value == expected_qualifier - assert got_pb.set_cell.timestamp.raw_timestamp_micros == expected_timestamp - assert got_pb.set_cell.input.int_value == expected_value + assert got_pb.add_to_cell.family_name == expected_family + assert got_pb.add_to_cell.column_qualifier.raw_value == expected_qualifier + assert got_pb.add_to_cell.timestamp.raw_timestamp_micros == expected_timestamp + assert got_pb.add_to_cell.input.int_value == expected_value @pytest.mark.parametrize( "timestamp", @@ -812,7 +812,6 @@ def test__to_pb(self): (1234567890), (1), (0), - (-1), (None), ], ) From b8c6f244e3eaa29d517a72c6239044fb0ca4e2f7 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Tue, 15 Jul 2025 09:49:46 -0400 Subject: [PATCH 04/12] pr feedback --- google/cloud/bigtable/data/mutations.py | 30 +++++++++++++++---------- tests/unit/data/test_mutations.py | 25 ++------------------- 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index 14633973f..9fd2dad58 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -287,19 +287,28 @@ def _to_dict(self) -> dict[str, Any]: @dataclass class AddToCell(Mutation): """ - Mutation to add a value to an aggregate cell. + Adds an int64 value to an aggregate cell. The column family must be an + aggregate family and have an "int64" input type or this mutation will be + rejected. + Note: The timestamp values are in microseconds but must match the + granularity of the table (defaults to `MILLIS`). Therefore, the given value + must be a multiple of 1000 (millisecond granularity). For example: + `1571902339435000`. Args: family: The name of the column family to which the cell belongs. qualifier: The column qualifier of the cell. value: The value to be accumulated into the cell. - timestamp_micros: The timestamp of the cell. + timestamp_micros: The timestamp of the cell. Must be provided for + cell aggregation to work correctly. + Raises: TypeError: If `qualifier` is not `bytes` or `str`. TypeError: If `value` is not `int`. - ValueError: If `timestamp_micros` is less than `_SERVER_SIDE_TIMESTAMP`. + TypeError: If `timestamp_micros` is not `int`. + ValueError: If `timestamp_micros` is less than 0. """ def __init__( @@ -307,30 +316,27 @@ def __init__( family: str, qualifier: bytes | str, value: int, - timestamp: int | None = None, + timestamp_micros: int, ): qualifier = qualifier.encode() if isinstance(qualifier, str) else qualifier if not isinstance(qualifier, bytes): raise TypeError("qualifier must be bytes or str") if not isinstance(value, int): raise TypeError("value must be int") + if not isinstance(timestamp_micros, int): + raise TypeError("value must be int") if abs(value) > _MAX_INCREMENT_VALUE: raise ValueError( "int values must be between -2**63 and 2**63 (64-bit signed int)" ) - if timestamp is None: - # use current timestamp, with milisecond precision - timestamp = time.time_ns() // 1000 - timestamp = timestamp - (timestamp % 1000) - - if timestamp < 0: - raise ValueError("timestamp must be positive") + if timestamp_micros < 0: + raise ValueError("timestamp must be non-negative") self.family = family self.qualifier = qualifier self.value = value - self.timestamp = timestamp + self.timestamp = timestamp_micros def _to_dict(self) -> dict[str, Any]: return { diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index fb2ef15c7..853e5ccea 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -744,31 +744,10 @@ def test_ctor(self): assert instance.timestamp == expected_timestamp def test_ctor_negative_timestamp(self): - """Only positive timestamps are valid""" + """Only non-negative timestamps are valid""" with pytest.raises(ValueError) as e: self._make_one("test-family", b"test-qualifier", 1234, -2) - assert "timestamp must be positive" in str(e.value) - - @pytest.mark.parametrize( - "timestamp_ns,expected_timestamp_micros", - [ - (0, 0), - (1, 0), - (123, 0), - (999, 0), - (999_999, 0), - (1_000_000, 1000), - (1_234_567, 1000), - (1_999_999, 1000), - (2_000_000, 2000), - (1_234_567_890_123, 1_234_567_000), - ], - ) - def test_ctor_no_timestamp(self, timestamp_ns, expected_timestamp_micros): - """If no timestamp is given, should use current time with millisecond precision""" - with mock.patch("time.time_ns", return_value=timestamp_ns): - instance = self._make_one("test-family", b"test-qualifier", 1234) - assert instance.timestamp == expected_timestamp_micros + assert "timestamp must be non-negative" in str(e.value) def test__to_dict(self): """ensure dict representation is as expected""" From 96f537585bb0663901ba5b5b567f657633271a94 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 11 Jul 2025 12:16:45 -0700 Subject: [PATCH 05/12] added system tests for AddToCell --- tests/system/data/__init__.py | 1 + tests/system/data/setup_fixtures.py | 3 +- tests/system/data/test_system_async.py | 62 +++++++++++++++++++++++- tests/system/data/test_system_autogen.py | 55 ++++++++++++++++++++- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/tests/system/data/__init__.py b/tests/system/data/__init__.py index f2952b2cd..2b35cea8f 100644 --- a/tests/system/data/__init__.py +++ b/tests/system/data/__init__.py @@ -16,3 +16,4 @@ TEST_FAMILY = "test-family" TEST_FAMILY_2 = "test-family-2" +TEST_AGGREGATE_FAMILY = "test-aggregate-family" diff --git a/tests/system/data/setup_fixtures.py b/tests/system/data/setup_fixtures.py index a77ffc008..169e2396b 100644 --- a/tests/system/data/setup_fixtures.py +++ b/tests/system/data/setup_fixtures.py @@ -20,7 +20,7 @@ import os import uuid -from . import TEST_FAMILY, TEST_FAMILY_2 +from . import TEST_FAMILY, TEST_FAMILY_2, TEST_AGGREGATE_FAMILY # authorized view subset to allow all qualifiers ALLOW_ALL = "" @@ -183,6 +183,7 @@ def authorized_view_id( "family_subsets": { TEST_FAMILY: ALL_QUALIFIERS, TEST_FAMILY_2: ALL_QUALIFIERS, + TEST_AGGREGATE_FAMILY: ALL_QUALIFIERS, }, }, }, diff --git a/tests/system/data/test_system_async.py b/tests/system/data/test_system_async.py index b59131414..736fea1f6 100644 --- a/tests/system/data/test_system_async.py +++ b/tests/system/data/test_system_async.py @@ -27,7 +27,7 @@ from google.cloud.bigtable.data._cross_sync import CrossSync -from . import TEST_FAMILY, TEST_FAMILY_2 +from . import TEST_FAMILY, TEST_FAMILY_2, TEST_AGGREGATE_FAMILY __CROSS_SYNC_OUTPUT__ = "tests.system.data.test_system_autogen" @@ -76,6 +76,27 @@ async def add_row( await self.target.client._gapic_client.mutate_row(request) self.rows.append(row_key) + @CrossSync.convert + async def add_aggregate_row( + self, row_key, *, family=TEST_AGGREGATE_FAMILY, qualifier=b"q", input=0 + ): + request = { + "table_name": self.target.table_name, + "row_key": row_key, + "mutations": [ + { + "add_to_cell": { + "family_name": family, + "column_qualifier": {"raw_value": qualifier}, + "timestamp": {"raw_timestamp_micros": 0}, + "input": {"int_value": input}, + } + } + ], + } + await self.target.client._gapic_client.mutate_row(request) + self.rows.append(row_key) + @CrossSync.convert async def delete_rows(self): if self.rows: @@ -132,7 +153,17 @@ def column_family_config(self): """ from google.cloud.bigtable_admin_v2 import types - return {TEST_FAMILY: types.ColumnFamily(), TEST_FAMILY_2: types.ColumnFamily()} + int_aggregate_type = types.Type.Aggregate( + input_type=types.Type(int64_type={"encoding": {"big_endian_bytes": {}}}), + sum={}, + ) + return { + TEST_FAMILY: types.ColumnFamily(), + TEST_FAMILY_2: types.ColumnFamily(), + TEST_AGGREGATE_FAMILY: types.ColumnFamily( + value_type=types.Type(aggregate_type=int_aggregate_type) + ), + } @pytest.fixture(scope="session") def init_table_id(self): @@ -281,6 +312,33 @@ async def test_mutation_set_cell(self, target, temp_rows): # ensure cell is updated assert (await self._retrieve_cell_value(target, row_key)) == new_value + @CrossSync.pytest + @pytest.mark.usefixtures("target") + @CrossSync.Retry( + predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 + ) + async def test_mutation_add_to_cell(self, target, temp_rows): + """ + Test add to cell mutation + """ + from google.cloud.bigtable.data.mutations import AddToCell, DeleteAllFromFamily + + row_key = b"add_to_cell" + family = TEST_AGGREGATE_FAMILY + qualifier = b"test-qualifier" + # add row to temp_rows, for future deletion + await temp_rows.add_aggregate_row(row_key, family=family, qualifier=qualifier) + # set and check cell value + await target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp=0)) + encoded_result = await self._retrieve_cell_value(target, row_key) + int_result = int.from_bytes(encoded_result, byteorder="big") + assert int_result == 1 + # update again + await target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp=0)) + encoded_result = await self._retrieve_cell_value(target, row_key) + int_result = int.from_bytes(encoded_result, byteorder="big") + assert int_result == 10 + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't use splits" ) diff --git a/tests/system/data/test_system_autogen.py b/tests/system/data/test_system_autogen.py index 6b2006d7b..e5b8c4b96 100644 --- a/tests/system/data/test_system_autogen.py +++ b/tests/system/data/test_system_autogen.py @@ -26,7 +26,7 @@ from google.cloud.environment_vars import BIGTABLE_EMULATOR from google.type import date_pb2 from google.cloud.bigtable.data._cross_sync import CrossSync -from . import TEST_FAMILY, TEST_FAMILY_2 +from . import TEST_FAMILY, TEST_FAMILY_2, TEST_AGGREGATE_FAMILY TARGETS = ["table"] if not os.environ.get(BIGTABLE_EMULATOR): @@ -66,6 +66,26 @@ def add_row( self.target.client._gapic_client.mutate_row(request) self.rows.append(row_key) + def add_aggregate_row( + self, row_key, *, family=TEST_AGGREGATE_FAMILY, qualifier=b"q", input=0 + ): + request = { + "table_name": self.target.table_name, + "row_key": row_key, + "mutations": [ + { + "add_to_cell": { + "family_name": family, + "column_qualifier": {"raw_value": qualifier}, + "timestamp": {"raw_timestamp_micros": 0}, + "input": {"int_value": input}, + } + } + ], + } + self.target.client._gapic_client.mutate_row(request) + self.rows.append(row_key) + def delete_rows(self): if self.rows: request = { @@ -106,7 +126,17 @@ def column_family_config(self): """specify column families to create when creating a new test table""" from google.cloud.bigtable_admin_v2 import types - return {TEST_FAMILY: types.ColumnFamily(), TEST_FAMILY_2: types.ColumnFamily()} + int_aggregate_type = types.Type.Aggregate( + input_type=types.Type(int64_type={"encoding": {"big_endian_bytes": {}}}), + sum={}, + ) + return { + TEST_FAMILY: types.ColumnFamily(), + TEST_FAMILY_2: types.ColumnFamily(), + TEST_AGGREGATE_FAMILY: types.ColumnFamily( + value_type=types.Type(aggregate_type=int_aggregate_type) + ), + } @pytest.fixture(scope="session") def init_table_id(self): @@ -225,6 +255,27 @@ def test_mutation_set_cell(self, target, temp_rows): target.mutate_row(row_key, mutation) assert self._retrieve_cell_value(target, row_key) == new_value + @pytest.mark.usefixtures("target") + @CrossSync._Sync_Impl.Retry( + predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 + ) + def test_mutation_add_to_cell(self, target, temp_rows): + """Test add to cell mutation""" + from google.cloud.bigtable.data.mutations import AddToCell + + row_key = b"add_to_cell" + family = TEST_AGGREGATE_FAMILY + qualifier = b"test-qualifier" + temp_rows.add_aggregate_row(row_key, family=family, qualifier=qualifier) + target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp=0)) + encoded_result = self._retrieve_cell_value(target, row_key) + int_result = int.from_bytes(encoded_result, byteorder="big") + assert int_result == 1 + target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp=0)) + encoded_result = self._retrieve_cell_value(target, row_key) + int_result = int.from_bytes(encoded_result, byteorder="big") + assert int_result == 10 + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't use splits" ) From 897fc30d072e6a7b45440dd63d76ecf3166488f8 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Wed, 16 Jul 2025 08:15:44 -0400 Subject: [PATCH 06/12] pr feedback --- google/cloud/bigtable/data/mutations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index 9fd2dad58..f19b1e49e 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -308,6 +308,7 @@ class AddToCell(Mutation): TypeError: If `qualifier` is not `bytes` or `str`. TypeError: If `value` is not `int`. TypeError: If `timestamp_micros` is not `int`. + ValueError: If `value` is out of bounds for a 64-bit signed int. ValueError: If `timestamp_micros` is less than 0. """ @@ -324,7 +325,7 @@ def __init__( if not isinstance(value, int): raise TypeError("value must be int") if not isinstance(timestamp_micros, int): - raise TypeError("value must be int") + raise TypeError("timestamp_micros must be int") if abs(value) > _MAX_INCREMENT_VALUE: raise ValueError( "int values must be between -2**63 and 2**63 (64-bit signed int)" From 409fa2c60142b1cb2c693d097950e8c4740a6b7b Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Wed, 16 Jul 2025 09:08:59 -0400 Subject: [PATCH 07/12] fix system test param name --- tests/system/data/test_system_autogen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/data/test_system_autogen.py b/tests/system/data/test_system_autogen.py index e5b8c4b96..d5ad9156f 100644 --- a/tests/system/data/test_system_autogen.py +++ b/tests/system/data/test_system_autogen.py @@ -267,11 +267,11 @@ def test_mutation_add_to_cell(self, target, temp_rows): family = TEST_AGGREGATE_FAMILY qualifier = b"test-qualifier" temp_rows.add_aggregate_row(row_key, family=family, qualifier=qualifier) - target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp=0)) + target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp_micros=0)) encoded_result = self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 1 - target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp=0)) + target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp_micros=0)) encoded_result = self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 10 From 09e87976d506bfe462056c9ec6668c6b719851c0 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Thu, 17 Jul 2025 10:12:53 -0400 Subject: [PATCH 08/12] fix --- tests/system/data/test_system_async.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/system/data/test_system_async.py b/tests/system/data/test_system_async.py index 736fea1f6..04499cb18 100644 --- a/tests/system/data/test_system_async.py +++ b/tests/system/data/test_system_async.py @@ -321,7 +321,7 @@ async def test_mutation_add_to_cell(self, target, temp_rows): """ Test add to cell mutation """ - from google.cloud.bigtable.data.mutations import AddToCell, DeleteAllFromFamily + from google.cloud.bigtable.data.mutations import AddToCell row_key = b"add_to_cell" family = TEST_AGGREGATE_FAMILY @@ -329,12 +329,12 @@ async def test_mutation_add_to_cell(self, target, temp_rows): # add row to temp_rows, for future deletion await temp_rows.add_aggregate_row(row_key, family=family, qualifier=qualifier) # set and check cell value - await target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp=0)) + await target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp_micros=0)) encoded_result = await self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 1 # update again - await target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp=0)) + await target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp_micros=0)) encoded_result = await self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 10 From 405581e698f505a6fb128025cde9655c5544499f Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Thu, 17 Jul 2025 10:15:17 -0400 Subject: [PATCH 09/12] timestamp_micros required --- tests/unit/data/test_mutations.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index 853e5ccea..e34d99637 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -720,13 +720,13 @@ def _make_one(self, *args, **kwargs): @pytest.mark.parametrize("input_val", [2**64, -(2**64)]) def test_ctor_large_int(self, input_val): with pytest.raises(ValueError) as e: - self._make_one(family="f", qualifier=b"b", value=input_val) + self._make_one(family="f", qualifier=b"b", value=input_val, timestamp_micros=123) assert "int values must be between" in str(e.value) @pytest.mark.parametrize("input_val", ["", "a", "abc", "hello world!"]) def test_ctor_str_value(self, input_val): with pytest.raises(TypeError) as e: - self._make_one(family="f", qualifier=b"b", value=input_val) + self._make_one(family="f", qualifier=b"b", value=input_val, timestamp_micros=123) assert "value must be int" in str(e.value) def test_ctor(self): @@ -791,7 +791,6 @@ def test__to_pb(self): (1234567890), (1), (0), - (None), ], ) def test_is_idempotent(self, timestamp): From 205c35dd3f57453e013cdda734c68127fc6a165a Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Fri, 18 Jul 2025 08:05:24 -0400 Subject: [PATCH 10/12] blacken --- tests/system/data/test_system_async.py | 8 ++++++-- tests/unit/data/test_mutations.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/system/data/test_system_async.py b/tests/system/data/test_system_async.py index 04499cb18..6747a73e0 100644 --- a/tests/system/data/test_system_async.py +++ b/tests/system/data/test_system_async.py @@ -329,12 +329,16 @@ async def test_mutation_add_to_cell(self, target, temp_rows): # add row to temp_rows, for future deletion await temp_rows.add_aggregate_row(row_key, family=family, qualifier=qualifier) # set and check cell value - await target.mutate_row(row_key, AddToCell(family, qualifier, 1, timestamp_micros=0)) + await target.mutate_row( + row_key, AddToCell(family, qualifier, 1, timestamp_micros=0) + ) encoded_result = await self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 1 # update again - await target.mutate_row(row_key, AddToCell(family, qualifier, 9, timestamp_micros=0)) + await target.mutate_row( + row_key, AddToCell(family, qualifier, 9, timestamp_micros=0) + ) encoded_result = await self._retrieve_cell_value(target, row_key) int_result = int.from_bytes(encoded_result, byteorder="big") assert int_result == 10 diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index e34d99637..ac1c8bd6b 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -720,13 +720,17 @@ def _make_one(self, *args, **kwargs): @pytest.mark.parametrize("input_val", [2**64, -(2**64)]) def test_ctor_large_int(self, input_val): with pytest.raises(ValueError) as e: - self._make_one(family="f", qualifier=b"b", value=input_val, timestamp_micros=123) + self._make_one( + family="f", qualifier=b"b", value=input_val, timestamp_micros=123 + ) assert "int values must be between" in str(e.value) @pytest.mark.parametrize("input_val", ["", "a", "abc", "hello world!"]) def test_ctor_str_value(self, input_val): with pytest.raises(TypeError) as e: - self._make_one(family="f", qualifier=b"b", value=input_val, timestamp_micros=123) + self._make_one( + family="f", qualifier=b"b", value=input_val, timestamp_micros=123 + ) assert "value must be int" in str(e.value) def test_ctor(self): From 14eadb6da2383a679c11d118cb4cbde4f3eb2f84 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 18 Jul 2025 14:39:41 -0700 Subject: [PATCH 11/12] fix ci --- tests/system/data/test_system_async.py | 18 ++++++++++++++---- tests/system/data/test_system_autogen.py | 16 ++++++++++++---- tests/unit/data/test_mutations.py | 12 ++++++++++++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/system/data/test_system_async.py b/tests/system/data/test_system_async.py index 6747a73e0..9f79916fb 100644 --- a/tests/system/data/test_system_async.py +++ b/tests/system/data/test_system_async.py @@ -1185,7 +1185,7 @@ async def test_execute_query_simple(self, client, table_id, instance_id): predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 ) async def test_execute_against_target( - self, client, instance_id, table_id, temp_rows + self, client, instance_id, table_id, temp_rows, column_family_config ): await temp_rows.add_row(b"row_key_1") result = await client.execute_query( @@ -1200,7 +1200,9 @@ async def test_execute_against_target( assert family_map[b"q"] == b"test-value" assert len(rows[0][TEST_FAMILY_2]) == 0 md = result.metadata - assert len(md) == 3 + # we expect it to fetch each column family, plus _key + # add additional families here if column_family_config changes + assert len(md) == len(column_family_config) + 1 assert md["_key"].column_type == SqlType.Bytes() assert md[TEST_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() @@ -1208,6 +1210,9 @@ async def test_execute_against_target( assert md[TEST_FAMILY_2].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() ) + assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( + SqlType.Bytes(), SqlType.Int64() + ) @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), @@ -1310,7 +1315,7 @@ async def test_execute_query_params(self, client, table_id, instance_id): predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 ) async def test_execute_metadata_on_empty_response( - self, client, instance_id, table_id, temp_rows + self, client, instance_id, table_id, temp_rows, column_family_config ): await temp_rows.add_row(b"row_key_1") result = await client.execute_query( @@ -1320,7 +1325,9 @@ async def test_execute_metadata_on_empty_response( assert len(rows) == 0 md = result.metadata - assert len(md) == 3 + # we expect it to fetch each column family, plus _key + # add additional families here if column_family_config change + assert len(md) == len(column_family_config) + 1 assert md["_key"].column_type == SqlType.Bytes() assert md[TEST_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() @@ -1328,3 +1335,6 @@ async def test_execute_metadata_on_empty_response( assert md[TEST_FAMILY_2].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() ) + assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( + SqlType.Bytes(), SqlType.Int64() + ) \ No newline at end of file diff --git a/tests/system/data/test_system_autogen.py b/tests/system/data/test_system_autogen.py index d5ad9156f..46e9c2215 100644 --- a/tests/system/data/test_system_autogen.py +++ b/tests/system/data/test_system_autogen.py @@ -966,7 +966,9 @@ def test_execute_query_simple(self, client, table_id, instance_id): @CrossSync._Sync_Impl.Retry( predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 ) - def test_execute_against_target(self, client, instance_id, table_id, temp_rows): + def test_execute_against_target( + self, client, instance_id, table_id, temp_rows, column_family_config + ): temp_rows.add_row(b"row_key_1") result = client.execute_query("SELECT * FROM `" + table_id + "`", instance_id) rows = [r for r in result] @@ -977,7 +979,7 @@ def test_execute_against_target(self, client, instance_id, table_id, temp_rows): assert family_map[b"q"] == b"test-value" assert len(rows[0][TEST_FAMILY_2]) == 0 md = result.metadata - assert len(md) == 3 + assert len(md) == len(column_family_config) + 1 assert md["_key"].column_type == SqlType.Bytes() assert md[TEST_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() @@ -985,6 +987,9 @@ def test_execute_against_target(self, client, instance_id, table_id, temp_rows): assert md[TEST_FAMILY_2].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() ) + assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( + SqlType.Bytes(), SqlType.Int64() + ) @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't support SQL" @@ -1074,7 +1079,7 @@ def test_execute_query_params(self, client, table_id, instance_id): predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 ) def test_execute_metadata_on_empty_response( - self, client, instance_id, table_id, temp_rows + self, client, instance_id, table_id, temp_rows, column_family_config ): temp_rows.add_row(b"row_key_1") result = client.execute_query( @@ -1083,7 +1088,7 @@ def test_execute_metadata_on_empty_response( rows = [r for r in result] assert len(rows) == 0 md = result.metadata - assert len(md) == 3 + assert len(md) == len(column_family_config) + 1 assert md["_key"].column_type == SqlType.Bytes() assert md[TEST_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() @@ -1091,3 +1096,6 @@ def test_execute_metadata_on_empty_response( assert md[TEST_FAMILY_2].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Bytes() ) + assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( + SqlType.Bytes(), SqlType.Int64() + ) diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index ac1c8bd6b..17050162c 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -117,6 +117,17 @@ def test_size(self, test_dict): {"delete_from_family": {"family_name": "foo"}}, ), (mutations.DeleteAllFromRow, {"delete_from_row": {}}), + ( + mutations.AddToCell, + { + "add_to_cell": { + "family_name": "foo", + "column_qualifier": {"raw_value": b"bar"}, + "timestamp": {"raw_timestamp_micros": 12345}, + "input": {"int_value": 123}, + } + }, + ), ], ) def test__from_dict(self, expected_class, input_dict): @@ -162,6 +173,7 @@ def test__from_dict_wrong_subclass(self): mutations.DeleteRangeFromColumn("foo", b"bar"), mutations.DeleteAllFromFamily("foo"), mutations.DeleteAllFromRow(), + mutations.AddToCell("foo", b"bar", 123, 456), ] for instance in subclasses: others = [other for other in subclasses if other != instance] From d33e798471bc93140ddcd47d0f590c5d6435a7e2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 18 Jul 2025 14:41:38 -0700 Subject: [PATCH 12/12] fix lint --- tests/system/data/test_system_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/data/test_system_async.py b/tests/system/data/test_system_async.py index 9f79916fb..0dd6e8100 100644 --- a/tests/system/data/test_system_async.py +++ b/tests/system/data/test_system_async.py @@ -1337,4 +1337,4 @@ async def test_execute_metadata_on_empty_response( ) assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Int64() - ) \ No newline at end of file + )