-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
267 lines (246 loc) · 7.32 KB
/
docker-compose.yaml
File metadata and controls
267 lines (246 loc) · 7.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
services:
# --- Infrastructure ---
kafka:
image: apache/kafka:3.9.0
hostname: kafka
volumes:
- ./test/kafka-server.properties:/opt/kafka/config/kraft/server.properties:ro
environment:
CLUSTER_ID: millpond-local-dev-cluster-001
command: >
bash -c "
/opt/kafka/bin/kafka-storage.sh format -t $$CLUSTER_ID -c /opt/kafka/config/kraft/server.properties --ignore-formatted &&
/opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/kraft/server.properties
"
healthcheck:
test: /opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server kafka:9094
interval: 5s
timeout: 10s
retries: 10
postgres:
image: postgres:17
hostname: postgres
ports:
- "127.0.0.1:5433:5432"
environment:
POSTGRES_USER: ducklake
POSTGRES_PASSWORD: ducklake
POSTGRES_DB: ducklake
healthcheck:
test: pg_isready -U ducklake -d ducklake
interval: 2s
timeout: 5s
retries: 5
tmpfs:
- /var/lib/postgresql/data
minio:
image: minio/minio:latest
hostname: minio
ports:
- "127.0.0.1:9000:9000"
- "127.0.0.1:9001:9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
MINIO_PROMETHEUS_AUTH_TYPE: public
command: server /data --console-address ":9001"
healthcheck:
test: mc ready local
interval: 2s
timeout: 5s
retries: 5
minio-init:
image: minio/mc:latest
depends_on:
minio:
condition: service_healthy
entrypoint: >
/bin/sh -c "
mc alias set local http://minio:9000 minioadmin minioadmin &&
mc mb --ignore-existing local/ducklake &&
mc anonymous set download local/ducklake
"
# --- Topic Setup ---
kafka-init:
image: apache/kafka:3.9.0
depends_on:
kafka:
condition: service_healthy
entrypoint:
- /bin/sh
- -c
- |
/opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9094 --create --if-not-exists --topic test-events --partitions 8 --replication-factor 1 &&
echo "Waiting for consumer group coordinator..." &&
for i in $(seq 1 20); do
/opt/kafka/bin/kafka-consumer-groups.sh --bootstrap-server kafka:9094 --group millpond-test-events-events --describe >/dev/null 2>&1 && echo "Coordinator ready." && break
echo " attempt $i/20..."
sleep 1
done
# --- Producer: writes sample JSON to Kafka ---
producer:
build:
context: .
dockerfile: Dockerfile
args:
MILLPOND_VERSION: "0.0.0.dev0"
depends_on:
kafka-init:
condition: service_completed_successfully
entrypoint: python
command: /app/test/producer.py
deploy:
replicas: ${PRODUCER_REPLICAS:-2}
environment:
KAFKA_BOOTSTRAP_SERVERS: kafka:9094
KAFKA_TOPIC: test-events
KAFKA_PARTITION_COUNT: 8
RECORDS_PER_SECOND: 1000
TOTAL_RECORDS: -1
healthcheck:
test: ["CMD-SHELL", "true"] # producer has no HTTP server; override Dockerfile HEALTHCHECK
interval: 10s
volumes:
- ./test:/app/test:ro
# --- Millpond Consumers ---
millpond-0:
build:
context: .
dockerfile: Dockerfile
args:
MILLPOND_VERSION: "0.0.0.dev0"
depends_on:
kafka-init:
condition: service_completed_successfully
postgres:
condition: service_healthy
minio-init:
condition: service_completed_successfully
deploy:
resources:
limits:
memory: 512M
environment: &millpond-env
KAFKA_BOOTSTRAP_SERVERS: kafka:9094
KAFKA_TOPIC: test-events
REPLICA_COUNT: 2
POD_NAME: millpond-test-0
DUCKLAKE_TABLE: events
DUCKLAKE_DATA_PATH: s3://ducklake/data
DUCKLAKE_RDS_HOST: postgres
DUCKLAKE_RDS_PORT: "5432"
DUCKLAKE_RDS_DATABASE: ducklake
DUCKLAKE_RDS_USERNAME: ducklake
DUCKLAKE_RDS_PASSWORD: ducklake
DUCKLAKE_CONNECTION: ":memory:"
FLUSH_SIZE: 5000
FLUSH_INTERVAL_MS: 5000
DUCKLAKE_PARTITION_BY: "year(_inserted_at),month(_inserted_at),day(_inserted_at),hour(_inserted_at)"
DUCKDB_S3_ENDPOINT: minio:9000
DUCKDB_S3_ACCESS_KEY_ID: minioadmin
DUCKDB_S3_SECRET_ACCESS_KEY: minioadmin
DUCKDB_S3_USE_SSL: "false"
DUCKDB_S3_URL_STYLE: path
millpond-1:
build:
context: .
dockerfile: Dockerfile
args:
MILLPOND_VERSION: "0.0.0.dev0"
entrypoint: ["sh", "-c", "sleep 2 && exec millpond"]
depends_on:
kafka-init:
condition: service_completed_successfully
postgres:
condition: service_healthy
minio-init:
condition: service_completed_successfully
deploy:
resources:
limits:
memory: 512M
environment:
<<: *millpond-env
POD_NAME: millpond-test-1
# --- Metrics Exporters ---
kafka-exporter:
image: danielqsj/kafka-exporter:latest
depends_on:
kafka:
condition: service_healthy
command: ["--kafka.server=kafka:9094"]
postgres-exporter:
image: prometheuscommunity/postgres-exporter:latest
depends_on:
postgres:
condition: service_healthy
environment:
# nosemgrep: trailofbits.generic.postgres-insecure-sslmode.postgres-insecure-sslmode
DATA_SOURCE_NAME: "postgresql://ducklake:ducklake@postgres:5432/ducklake?sslmode=disable" # local dev — no TLS available
# --- Metrics & Dashboarding ---
prometheus:
image: prom/prometheus:latest
ports:
- "127.0.0.1:9091:9090"
configs:
- source: prometheus-config
target: /etc/prometheus/prometheus.yml
grafana:
image: grafana/grafana-oss:latest
ports:
- "127.0.0.1:3000:3000"
environment:
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
GF_AUTH_DISABLE_LOGIN_FORM: "true"
configs:
- source: grafana-datasources
target: /etc/grafana/provisioning/datasources/prometheus.yml
- source: grafana-dashboard-provider
target: /etc/grafana/provisioning/dashboards/dashboards.yml
- source: grafana-dashboard-millpond
target: /var/lib/grafana/dashboards/millpond.json
configs:
prometheus-config:
content: |
global:
scrape_interval: 5s
scrape_configs:
- job_name: millpond
static_configs:
- targets:
- millpond-0:8000
- millpond-1:8000
- job_name: kafka
static_configs:
- targets: ["kafka-exporter:9308"]
- job_name: postgres
static_configs:
- targets: ["postgres-exporter:9187"]
- job_name: minio
metrics_path: /minio/v2/metrics/cluster
static_configs:
- targets: ["minio:9000"]
grafana-datasources:
content: |
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
grafana-dashboard-provider:
content: |
apiVersion: 1
providers:
- name: millpond
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false
grafana-dashboard-millpond:
file: ./grafana/dashboards/millpond.json