Skip to content

Commit 622d7bf

Browse files
authored
fix(profile):bigquery - Check for every table if it is partitioned to not hit table quota (#4074)
1 parent 782e66f commit 622d7bf

File tree

1 file changed

+14
-43
lines changed
  • metadata-ingestion/src/datahub/ingestion/source/sql

1 file changed

+14
-43
lines changed

metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py

Lines changed: 14 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
-- Filter out special partitions (https://cloud.google.com/bigquery/docs/partitioned-tables#date_timestamp_partitioned_tables)
9898
and p.partition_id not in ('__NULL__', '__UNPARTITIONED__')
9999
and STORAGE_TIER='ACTIVE'
100+
and p.table_name= '{table}'
100101
group by
101102
c.table_catalog,
102103
c.table_schema,
@@ -108,8 +109,6 @@
108109
c.table_schema,
109110
c.table_name,
110111
c.column_name
111-
limit {limit}
112-
offset {offset}
113112
""".strip()
114113

115114
SHARDED_TABLE_REGEX = r"^(.+)[_](\d{4}|\d{6}|\d{8}|\d{10})$"
@@ -289,7 +288,6 @@ class BigQueryDatasetKey(ProjectIdKey):
289288

290289
class BigQuerySource(SQLAlchemySource):
291290
config: BigQueryConfig
292-
partiton_columns: Dict[str, Dict[str, BigQueryPartitionColumn]] = dict()
293291
maximum_shard_ids: Dict[str, str] = dict()
294292
lineage_metadata: Optional[Dict[str, Set[str]]] = None
295293

@@ -486,50 +484,23 @@ def _create_lineage_map(self, entries: Iterable[QueryEvent]) -> Dict[str, Set[st
486484
lineage_map[destination_table_str].add(ref_table_str)
487485
return lineage_map
488486

489-
def get_latest_partitions_for_schema(self, schema: str) -> None:
490-
query_limit: int = 500
491-
offset: int = 0
487+
def get_latest_partition(
488+
self, schema: str, table: str
489+
) -> Optional[BigQueryPartitionColumn]:
492490
url = self.config.get_sql_alchemy_url()
493491
engine = create_engine(url, **self.config.options)
494492
with engine.connect() as con:
495493
inspector = inspect(con)
496-
partitions = {}
497-
498-
def get_partition_columns(
499-
project_id: str, schema: str, limit: int, offset: int
500-
) -> int:
501-
sql = BQ_GET_LATEST_PARTITION_TEMPLATE.format(
502-
project_id=project_id,
503-
schema=schema,
504-
limit=limit,
505-
offset=offset,
506-
)
507-
result = con.execute(sql)
508-
row_count: int = 0
509-
for row in result:
510-
partition = BigQueryPartitionColumn(**row)
511-
partitions[partition.table_name] = partition
512-
row_count = row_count + 1
513-
return row_count
514-
515-
res_size = get_partition_columns(
516-
self.get_db_name(inspector), schema, query_limit, offset
494+
sql = BQ_GET_LATEST_PARTITION_TEMPLATE.format(
495+
project_id=self.get_db_name(inspector), schema=schema, table=table
517496
)
518-
while res_size == query_limit:
519-
offset = offset + query_limit
520-
res_size = get_partition_columns(
521-
self.get_db_name(inspector), schema, query_limit, offset
522-
)
523-
524-
self.partiton_columns[schema] = partitions
525-
526-
def get_latest_partition(
527-
self, schema: str, table: str
528-
) -> Optional[BigQueryPartitionColumn]:
529-
if schema not in self.partiton_columns:
530-
self.get_latest_partitions_for_schema(schema)
531-
532-
return self.partiton_columns[schema].get(table)
497+
result = con.execute(sql)
498+
# Bigquery only supports one partition column
499+
# https://stackoverflow.com/questions/62886213/adding-multiple-partitioned-columns-to-bigquery-table-from-sql-query
500+
row = result.fetchone()
501+
if row:
502+
return BigQueryPartitionColumn(**row)
503+
return None
533504

534505
def get_shard_from_table(self, table: str) -> Tuple[str, Optional[str]]:
535506
match = re.search(SHARDED_TABLE_REGEX, table, re.IGNORECASE)
@@ -627,7 +598,7 @@ def is_dataset_eligable_profiling(
627598

628599
(project_id, schema, table) = dataset_name.split(".")
629600
if not self.is_latest_shard(project_id=project_id, table=table, schema=schema):
630-
logger.warning(
601+
logger.debug(
631602
f"{dataset_name} is sharded but not the latest shard, skipping..."
632603
)
633604
return False

0 commit comments

Comments
 (0)