Skip to content

Commit ec062b6

Browse files
authored
feat(glue): make ownership configurable in glue source (#4078)
1 parent 622d7bf commit ec062b6

File tree

2 files changed

+14
-7
lines changed
  • metadata-ingestion

2 files changed

+14
-7
lines changed

metadata-ingestion/source_docs/glue.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
9292
| `ignore_unsupported_connectors` | | `True` | Whether to ignore unsupported connectors. If disabled, an error will be raised. |
9393
| `emit_s3_lineage` | | `True` | Whether to emit S3-to-Glue lineage. |
9494
| `glue_s3_lineage_direction` | | `upstream` | If `upstream`, S3 is upstream to Glue. If `downstream` S3 is downstream to Glue. |
95+
| `extract_owners` | | `True` | When enabled, extracts ownership from Glue directly and overwrites existing owners. When disabled, ownership is left empty for datasets. |
9596

9697
## Compatibility
9798

metadata-ingestion/src/datahub/ingestion/source/aws/glue.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
class GlueSourceConfig(AwsSourceConfig):
5050

51+
extract_owners: Optional[bool] = True
5152
extract_transforms: Optional[bool] = True
5253
underlying_platform: Optional[str] = None
5354
ignore_unsupported_connectors: Optional[bool] = True
@@ -89,6 +90,7 @@ class GlueSource(Source):
8990

9091
def __init__(self, config: GlueSourceConfig, ctx: PipelineContext):
9192
super().__init__(ctx)
93+
self.extract_owners = config.extract_owners
9294
self.source_config = config
9395
self.report = GlueSourceReport()
9496
self.glue_client = config.glue_client
@@ -612,7 +614,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
612614
yield dataset_wu
613615

614616
def _extract_record(self, table: Dict, table_name: str) -> MetadataChangeEvent:
615-
def get_owner() -> OwnershipClass:
617+
def get_owner() -> Optional[OwnershipClass]:
616618
owner = table.get("Owner")
617619
if owner:
618620
owners = [
@@ -621,11 +623,10 @@ def get_owner() -> OwnershipClass:
621623
type=OwnershipTypeClass.DATAOWNER,
622624
)
623625
]
624-
else:
625-
owners = []
626-
return OwnershipClass(
627-
owners=owners,
628-
)
626+
return OwnershipClass(
627+
owners=owners,
628+
)
629+
return None
629630

630631
def get_dataset_properties() -> DatasetPropertiesClass:
631632
return DatasetPropertiesClass(
@@ -680,7 +681,12 @@ def get_schema_metadata(glue_source: GlueSource) -> SchemaMetadata:
680681
)
681682

682683
dataset_snapshot.aspects.append(Status(removed=False))
683-
dataset_snapshot.aspects.append(get_owner())
684+
685+
if self.extract_owners:
686+
optional_owner_aspect = get_owner()
687+
if optional_owner_aspect is not None:
688+
dataset_snapshot.aspects.append(optional_owner_aspect)
689+
684690
dataset_snapshot.aspects.append(get_dataset_properties())
685691
dataset_snapshot.aspects.append(get_schema_metadata(self))
686692

0 commit comments

Comments
 (0)