From 91ddfd223362860ca9c0dd64aa9f78bdd4bf6a37 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Mon, 7 Jul 2025 17:51:27 +0000 Subject: [PATCH 1/2] feat: adds date_format to load job and external config --- google/cloud/bigquery/external_config.py | 14 ++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 15 +++++++++++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++++ tests/unit/test_external_config.py | 7 +++++++ 5 files changed, 72 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cb8141cd0..06179c10d 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,20 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e56ce16f0..10f861706 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,19 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +902,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 10df46fb3..8635f1d6c 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,10 +38,14 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.DATE_FORMAT = "%Y-%m-%d" + def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + + config["dateFormat"] = self.DATE_FORMAT config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -153,6 +157,11 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.destination_encryption_configuration) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -195,6 +204,8 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.date_format) + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.job import LoadJobConfig @@ -571,6 +582,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "dateFormat": self.DATE_FORMAT, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +611,9 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + + config.date_format = self.DATE_FORMAT + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 3a681c476..e0322dcf0 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7f84a9f5b..ba4645b32 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + DATE_FORMAT = "MM/DD/YYYY" + BASE_RESOURCE = { "sourceFormat": "", "sourceUris": SOURCE_URIS, @@ -33,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "dateFormat": DATE_FORMAT, } def test_from_api_repr_base(self): @@ -79,6 +82,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.date_format = self.DATE_FORMAT exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -92,6 +96,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "dateFormat": self.DATE_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -128,6 +133,8 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.date_format, self.DATE_FORMAT) + def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() From c17b6de719b64b4ff58b171f69ba285b67424057 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 10 Jul 2025 09:21:38 +0000 Subject: [PATCH 2/2] adds date_format to new to/from_api_repr tests --- google/cloud/bigquery/external_config.py | 1 + google/cloud/bigquery/job/load.py | 2 ++ tests/unit/job/test_load.py | 10 ++++------ tests/unit/job/test_load_config.py | 3 +++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 5cdf40908..54b7bf396 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -862,6 +862,7 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._properties["dateFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not have specific time zone information (e.g. 2024-04-20 12:34:56). The expected diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 093e7861b..277478d81 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -561,6 +561,7 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._set_sub_prop("dateFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp values that have no specific time zone. @@ -922,6 +923,7 @@ def date_format(self): """ return self.configuration.date_format + @property def time_zone(self): """See :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index f35f06bc4..82baa03c7 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.reference_file_schema_uri) - if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -156,15 +155,14 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) - if "timeZone" in config: - self.assertEqual(job.time_zone, config["timeZone"]) - else: - self.assertIsNone(job.time_zone) - if "dateFormat" in config: self.assertEqual(job.date_format, config["dateFormat"]) else: self.assertIsNone(job.date_format) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) def test_ctor(self): client = _make_client(project=self.PROJECT) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 8f8cc7d1e..5b7f8175b 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -958,6 +958,7 @@ def test_column_name_character_map_none(self): }, "useAvroLogicalTypes": True, "writeDisposition": "WRITE_TRUNCATE", + "dateFormat": "%Y-%m-%d", "timeZone": "America/New_York", "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", @@ -999,6 +1000,7 @@ def test_from_api_repr(self): ) self.assertTrue(config.use_avro_logical_types) self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.date_format, "%Y-%m-%d") self.assertEqual(config.time_zone, "America/New_York") self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) @@ -1033,6 +1035,7 @@ def test_to_api_repr(self): ) config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.date_format = "%Y-%m-%d" config.time_zone = "America/New_York" parquet_options = ParquetOptions() parquet_options.enable_list_inference = True