Skip to content

Commit 26a0d0a

Browse files
authored
fix: Move 'grouped_tables' into _retrieve_tables (#430)
Signed-off-by: xuans <[email protected]>
1 parent 7157c24 commit 26a0d0a

File tree

2 files changed

+5
-8
lines changed

2 files changed

+5
-8
lines changed

databuilder/extractor/base_bigquery_extractor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ class BaseBigQueryExtractor(Extractor):
3333
DEFAULT_PAGE_SIZE = 300
3434
NUM_RETRIES = 3
3535
DATE_LENGTH = 8
36-
SHARDED_TABLE_KEY_FORMAT = '{dataset_id}/{table_id}'
3736

3837
def init(self, conf: ConfigTree) -> None:
3938
# should use key_path, or cred_key if the former doesn't exist

databuilder/extractor/bigquery_metadata_extractor.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ class BigQueryMetadataExtractor(BaseBigQueryExtractor):
2828

2929
def init(self, conf: ConfigTree) -> None:
3030
BaseBigQueryExtractor.init(self, conf)
31-
self.grouped_tables: Set[str] = set([])
3231
self.iter = iter(self._iterate_over_tables())
3332

3433
def _retrieve_tables(self, dataset: DatasetRef) -> Any:
34+
grouped_tables: Set[str] = set([])
35+
3536
for page in self._page_table_list_results(dataset):
3637
if 'tables' not in page:
3738
continue
@@ -47,16 +48,13 @@ def _retrieve_tables(self, dataset: DatasetRef) -> Any:
4748
# If the last eight characters are digits, we assume the table is of a table date range type
4849
# and then we only need one schema definition
4950
table_prefix = table_id[:-BigQueryMetadataExtractor.DATE_LENGTH]
50-
table_id = table_prefix
51-
sharded_table_key = BigQueryMetadataExtractor.SHARDED_TABLE_KEY_FORMAT.format(
52-
dataset_id=tableRef['datasetId'],
53-
table_id=table_id)
54-
if sharded_table_key in self.grouped_tables:
51+
if table_prefix in grouped_tables:
5552
# If one table in the date range is processed, then ignore other ones
5653
# (it adds too much metadata)
5754
continue
5855

59-
self.grouped_tables.add(sharded_table_key)
56+
table_id = table_prefix
57+
grouped_tables.add(table_prefix)
6058

6159
table = self.bigquery_service.tables().get(
6260
projectId=tableRef['projectId'],

0 commit comments

Comments
 (0)