Skip to content

Commit efd5105

Browse files
authored
#36 optionally send copy to jdbc (#39)
1 parent 315a298 commit efd5105

12 files changed

+452
-226
lines changed

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# test via (provide payload):
1414
# curl http://localhost:9000/2015-03-31/functions/function/invocations -d "{payload}"
1515
#
16-
# Deploy to AWS Lambda via ACR
16+
# Deploy to AWS Lambda via ECR
1717

1818
FROM --platform=linux/arm64 public.ecr.aws/lambda/python:3.13-arm64
1919

@@ -53,7 +53,7 @@ RUN \
5353
echo "###################" && \
5454
echo "### pip installs ###" && \
5555
echo "###################" && \
56-
pip install requests==2.31.0 urllib3==1.26.18 setuptools cryptography jsonschema PyJWT && \
56+
pip install requests==2.31.0 urllib3==1.26.18 setuptools cryptography jsonschema PyJWT psycopg2-binary && \
5757
echo "######################" && \
5858
echo "### confluent-kafka ###" && \
5959
echo "######################" && \
@@ -71,7 +71,7 @@ RUN \
7171

7272
# Lambda and SASL_SSL_Artifacts
7373
COPY $SASL_SSL_ARTIFACTS /opt/sasl_ssl_artifacts/
74-
COPY src/event_gate_lambda.py $LAMBDA_TASK_ROOT
74+
COPY src/ $LAMBDA_TASK_ROOT/
7575
COPY conf $LAMBDA_TASK_ROOT/conf
7676

7777
# Mark librdkafka to LD_LIBRARY_PATH

conf/access.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
2-
"run.topic": [
2+
"public.cps.za.runs": [
33
"FooBarUser"
44
],
5-
"edla.change.topic": [
5+
"public.cps.za.dlchange": [
66
"FooUser",
77
"BarUser"
88
]

conf/config.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{
22
"access_config": "s3://<redacted>/access.json",
3-
"topics_config": "s3://<redacted>/topics.json",
43
"token_provider_url": "https://<redacted>",
54
"token_public_key_url": "https://<redacted>",
65
"kafka_bootstrap_server": "localhost:9092",

conf/topic_dlchange.json

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"type": "object",
3+
"properties": {
4+
"event_id": {
5+
"type": "string",
6+
"description": "Unique identifier for the event (GUID)"
7+
},
8+
"tenant_id": {
9+
"type": "string",
10+
"description": "Application ID or ServiceNow identifier"
11+
},
12+
"source_app": {
13+
"type": "string",
14+
"description": " Standardized source application name (aqueduct, unify, lum, etc)"
15+
},
16+
"source_app_version": {
17+
"type": "string",
18+
"description": "Source application version (SemVer preferred)"
19+
},
20+
"environment": {
21+
"type": "string",
22+
"description": "Environment (dev, uat, pre-prod, prod, test or others)"
23+
},
24+
"timestamp_event": {
25+
"type": "number",
26+
"description": "Timestamp of the event in epoch milliseconds"
27+
},
28+
"catalog_id": {
29+
"type": "string",
30+
"description": "Identifier for the data definition (Glue/Hive) database and table name for example "
31+
},
32+
"operation": {
33+
"type": "string",
34+
"enum": ["overwrite", "append", "archive", "delete"],
35+
"description": "Operation performed"
36+
},
37+
"location": {
38+
"type": "string",
39+
"description": "Location of the data"
40+
},
41+
"format": {
42+
"type": "string",
43+
"description": "Format of the data (parquet, delta, crunch, etc)."
44+
},
45+
"format_options": {
46+
"type": "object",
47+
"description": "When possible, add additional options related to the format"
48+
},
49+
"additional_info": {
50+
"type": "object",
51+
"description": "Optional additional fields structured as an inner JSON"
52+
}
53+
},
54+
"required": ["event_id", "tenant_id", "source_app", "source_app_version", "environment", "timestamp_event", "catalog_id", "operation", "format"]
55+
}

conf/topic_runs.json

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"type": "object",
3+
"properties": {
4+
"event_id": {
5+
"type": "string",
6+
"description": "Unique identifier for the event (GUID), generated for each unique event, for de-duplication purposes"
7+
},
8+
"job_ref": {
9+
"type": "string",
10+
"description": "Identifier of the job in it’s respective system (e.g. Spark Application Id, Glue Job Id, EMR Step Id, etc)."
11+
},
12+
"tenant_id ": {
13+
"type": "string",
14+
"description": "Application ID (4 letter code) or ServiceNow identifier related to the pipeline/domain/process owner (tenant of the tool)"
15+
},
16+
"source_app": {
17+
"type": "string",
18+
"description": "Standardized source application name (aqueduct, unify, lum, etc)"
19+
},
20+
"source_app_version": {
21+
"type": "string",
22+
"description": "Source application version (SemVer preferred)"
23+
},
24+
"environment": {
25+
"type": "string",
26+
"description": "Environment (dev, uat, pre-prod, prod, test or others)"
27+
},
28+
"timestamp_start": {
29+
"type": "number",
30+
"description": "Start timestamp of the run in epoch milliseconds"
31+
},
32+
"timestamp_end": {
33+
"type": "number",
34+
"description": "End timestamp of the run in epoch milliseconds"
35+
},
36+
"jobs": {
37+
"type": "array",
38+
"description": "List of individual jobs withing the run",
39+
"items": {
40+
"type": "object",
41+
"properties": {
42+
"catalog_id": {
43+
"type": "string",
44+
"description": "Identifier for the data definition (Glue/Hive) database and table name for example"
45+
},
46+
"status": {
47+
"type": "string",
48+
"enum": ["succeeded", "failed", "killed", "skipped"],
49+
"description": "Status of the job."
50+
},
51+
"timestamp_start": {
52+
"type": "number",
53+
"description": "Start timestamp of a job that is a part of a run in epoch milliseconds"
54+
},
55+
"timestamp_end": {
56+
"type": "number",
57+
"description": "End timestamp of a job that is a part of a run in epoch milliseconds"
58+
},
59+
"message": {
60+
"type": "string",
61+
"description": "Job status/error message."
62+
},
63+
"additional_info": {
64+
"type": "object",
65+
"description": "Optional additional fields structured as an inner JSON"
66+
}
67+
},
68+
"required": ["catalog_id", "status", "timestamp_start", "timestamp_end"]
69+
}
70+
}
71+
},
72+
"required": ["event_id", "job_ref", "tenant_id", "source_app", "source_app_version", "environment", "timestamp_start", "timestamp_end", "jobs"]
73+
}

conf/topics.json

Lines changed: 0 additions & 119 deletions
This file was deleted.

0 commit comments

Comments
 (0)