|
97 | 97 | -- Filter out special partitions (https://cloud.google.com/bigquery/docs/partitioned-tables#date_timestamp_partitioned_tables)
|
98 | 98 | and p.partition_id not in ('__NULL__', '__UNPARTITIONED__')
|
99 | 99 | and STORAGE_TIER='ACTIVE'
|
| 100 | + and p.table_name= '{table}' |
100 | 101 | group by
|
101 | 102 | c.table_catalog,
|
102 | 103 | c.table_schema,
|
|
108 | 109 | c.table_schema,
|
109 | 110 | c.table_name,
|
110 | 111 | c.column_name
|
111 |
| -limit {limit} |
112 |
| -offset {offset} |
113 | 112 | """.strip()
|
114 | 113 |
|
115 | 114 | SHARDED_TABLE_REGEX = r"^(.+)[_](\d{4}|\d{6}|\d{8}|\d{10})$"
|
@@ -289,7 +288,6 @@ class BigQueryDatasetKey(ProjectIdKey):
|
289 | 288 |
|
290 | 289 | class BigQuerySource(SQLAlchemySource):
|
291 | 290 | config: BigQueryConfig
|
292 |
| - partiton_columns: Dict[str, Dict[str, BigQueryPartitionColumn]] = dict() |
293 | 291 | maximum_shard_ids: Dict[str, str] = dict()
|
294 | 292 | lineage_metadata: Optional[Dict[str, Set[str]]] = None
|
295 | 293 |
|
@@ -486,50 +484,23 @@ def _create_lineage_map(self, entries: Iterable[QueryEvent]) -> Dict[str, Set[st
|
486 | 484 | lineage_map[destination_table_str].add(ref_table_str)
|
487 | 485 | return lineage_map
|
488 | 486 |
|
489 |
| - def get_latest_partitions_for_schema(self, schema: str) -> None: |
490 |
| - query_limit: int = 500 |
491 |
| - offset: int = 0 |
| 487 | + def get_latest_partition( |
| 488 | + self, schema: str, table: str |
| 489 | + ) -> Optional[BigQueryPartitionColumn]: |
492 | 490 | url = self.config.get_sql_alchemy_url()
|
493 | 491 | engine = create_engine(url, **self.config.options)
|
494 | 492 | with engine.connect() as con:
|
495 | 493 | inspector = inspect(con)
|
496 |
| - partitions = {} |
497 |
| - |
498 |
| - def get_partition_columns( |
499 |
| - project_id: str, schema: str, limit: int, offset: int |
500 |
| - ) -> int: |
501 |
| - sql = BQ_GET_LATEST_PARTITION_TEMPLATE.format( |
502 |
| - project_id=project_id, |
503 |
| - schema=schema, |
504 |
| - limit=limit, |
505 |
| - offset=offset, |
506 |
| - ) |
507 |
| - result = con.execute(sql) |
508 |
| - row_count: int = 0 |
509 |
| - for row in result: |
510 |
| - partition = BigQueryPartitionColumn(**row) |
511 |
| - partitions[partition.table_name] = partition |
512 |
| - row_count = row_count + 1 |
513 |
| - return row_count |
514 |
| - |
515 |
| - res_size = get_partition_columns( |
516 |
| - self.get_db_name(inspector), schema, query_limit, offset |
| 494 | + sql = BQ_GET_LATEST_PARTITION_TEMPLATE.format( |
| 495 | + project_id=self.get_db_name(inspector), schema=schema, table=table |
517 | 496 | )
|
518 |
| - while res_size == query_limit: |
519 |
| - offset = offset + query_limit |
520 |
| - res_size = get_partition_columns( |
521 |
| - self.get_db_name(inspector), schema, query_limit, offset |
522 |
| - ) |
523 |
| - |
524 |
| - self.partiton_columns[schema] = partitions |
525 |
| - |
526 |
| - def get_latest_partition( |
527 |
| - self, schema: str, table: str |
528 |
| - ) -> Optional[BigQueryPartitionColumn]: |
529 |
| - if schema not in self.partiton_columns: |
530 |
| - self.get_latest_partitions_for_schema(schema) |
531 |
| - |
532 |
| - return self.partiton_columns[schema].get(table) |
| 497 | + result = con.execute(sql) |
| 498 | + # Bigquery only supports one partition column |
| 499 | + # https://stackoverflow.com/questions/62886213/adding-multiple-partitioned-columns-to-bigquery-table-from-sql-query |
| 500 | + row = result.fetchone() |
| 501 | + if row: |
| 502 | + return BigQueryPartitionColumn(**row) |
| 503 | + return None |
533 | 504 |
|
534 | 505 | def get_shard_from_table(self, table: str) -> Tuple[str, Optional[str]]:
|
535 | 506 | match = re.search(SHARDED_TABLE_REGEX, table, re.IGNORECASE)
|
@@ -627,7 +598,7 @@ def is_dataset_eligable_profiling(
|
627 | 598 |
|
628 | 599 | (project_id, schema, table) = dataset_name.split(".")
|
629 | 600 | if not self.is_latest_shard(project_id=project_id, table=table, schema=schema):
|
630 |
| - logger.warning( |
| 601 | + logger.debug( |
631 | 602 | f"{dataset_name} is sharded but not the latest shard, skipping..."
|
632 | 603 | )
|
633 | 604 | return False
|
|
0 commit comments