diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py index c5d6fe3e5f..7cf9d9bd6d 100644 --- a/bigframes/session/bq_caching_executor.py +++ b/bigframes/session/bq_caching_executor.py @@ -247,7 +247,7 @@ def _export_gbq( ) sql = compiled.sql - if (existing_table is not None) and _if_schema_match( + if (existing_table is not None) and _is_schema_match( existing_table.schema, array_value.schema ): # b/409086472: Uses DML for table appends and replacements to avoid @@ -690,16 +690,16 @@ def _result_schema( ) -def _if_schema_match( - table_schema: Tuple[bigquery.SchemaField, ...], schema: schemata.ArraySchema +def _is_schema_match( + table_schema: Tuple[bigquery.SchemaField, ...], + schema: schemata.ArraySchema, ) -> bool: if len(table_schema) != len(schema.items): return False - for field in table_schema: - if field.name not in schema.names: + for field, schema_item in zip(table_schema, schema.items): + if field.name != schema_item.column: return False - if bigframes.dtypes.convert_schema_field(field)[1] != schema.get_type( - field.name - ): + _, field_dtype = bigframes.dtypes.convert_schema_field(field) + if field_dtype != schema_item.dtype: return False return True diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py index 3da3544cbb..b40dcca7d7 100644 --- a/tests/system/small/test_dataframe_io.py +++ b/tests/system/small/test_dataframe_io.py @@ -631,6 +631,13 @@ def test_to_gbq_if_exists_is_replace(scalars_dfs, dataset_id): assert len(gcs_df) == len(scalars_pandas_df) pd.testing.assert_index_equal(gcs_df.columns, scalars_pandas_df.columns) + # When replacing a table with same schema but different column order + reordered_df = scalars_df[scalars_df.columns[::-1]] + reordered_df.to_gbq(destination_table, if_exists="replace") + gcs_df = pandas_gbq.read_gbq(destination_table, index_col="rowindex") + assert len(gcs_df) == len(scalars_pandas_df) + pd.testing.assert_index_equal(gcs_df.columns, reordered_df.columns) + # When replacing a table with different schema partitial_scalars_df = scalars_df.drop(columns=["string_col"]) partitial_scalars_df.to_gbq(destination_table, if_exists="replace")