Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: "3.9"

services:
# Generic ClickHouse Service
clickhouse:
image: "clickhouse/clickhouse-server:22.10"
ulimits:
nofile:
soft: 262144
hard: 262144
volumes:
- ./configs/clickhouse/config_overrides.xml:/etc/clickhouse-server/config.d/config_overrides.xml # config overrides on default
- ./configs/clickhouse/keeper.xml:/etc/clickhouse-server/config.d/keeper.xml # zookeeper/keeper client configs
- ./configs/clickhouse/clusters.xml:/etc/clickhouse-server/config.d/clusters.xml # cluster configs
- ./configs/clickhouse/macros.xml:/etc/clickhouse-server/config.d/macros.xml # macros configs
- ./sql:/ch-replica-sql # sql files
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<clickhouse>
<remote_servers replace="replace"> <!-- `replace` attribute ensures that configs are replaced and not merged with the base config.xml -->

<!-- A cluster with replication for ingest of data. -->
<cluster_hits>
<!-- Service Discovery -->
<path>/clickhouse/discovery/cluster_hits</path>
<shard from_env="SHARD" />
</cluster_hits>

</remote_servers>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<clickhouse>

<!-- Remove MySQL port -->
<mysql_port remove="remove" />

<!-- Remove PostgreSQL port -->
<postgresql_port remove="remove" />

<!--Default <default_replica_path>/clickhouse/tables/{uuid}/{shard}</default_replica_path> -->
<default_replica_path replace="replace">/clickhouse/tables/{database}.{table}/{shard}</default_replica_path>

<!--Add prometheus based monitoring -->
<prometheus>
<endpoint>/metrics</endpoint>
<port>8001</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>

</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<clickhouse>
<zookeeper>
<node index="1">
<host>clickhouse-server-01</host>
<port>9181</port>
</node>
<node index="2">
<host>clickhouse-server-02</host>
<port>9181</port>
</node>
<node index="3">
<host>clickhouse-server-03</host>
<port>9181</port>
</node>
</zookeeper>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<clickhouse>
<!-- Experimental Zookeeper replacement -->
<keeper_server>
<!-- The TCP communication port for keeper's clients. Use tcp_port_secure and proper certs for security -->
<tcp_port>9181</tcp_port>
<!-- The ID of the server in RAFT Configuration. This ID is used to identify itself from other servers -->
<server_id from_env="KEEPER_SERVER_ID" />
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>

<!-- Coordination settings like the following are useful for debugging -->
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<snapshot_distance>75</snapshot_distance>
<!-- This may be too verbose
<raft_logs_level>trace</raft_logs_level>
-->
</coordination_settings>

<raft_configuration>
<!-- Whether the communicaion between servers should be secure or not -->
<secure>false</secure>
<server>
<id>1001</id>
<hostname>clickhouse-server-01</hostname>
<port>9234</port>
</server>
<server>
<id>2002</id>
<hostname>clickhouse-server-02</hostname>
<port>9234</port>
</server>
<server>
<id>3003</id>
<hostname>clickhouse-server-03</hostname>
<port>9234</port>
</server>
<server>
<id>1000</id>
<hostname>clickhouse-server-10</hostname>
<port>9234</port>
<priority>1</priority>
</server>
<server>
<id>1100</id>
<hostname>clickhouse-server-11</hostname>
<port>9234</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<clickhouse>
<macros>
<datacenter>neverland</datacenter>
<shard from_env="SHARD" />
<replica from_env="HOSTNAME" />
</macros>
</clickhouse>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Global config
global:
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ["localhost:9090"]

# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "clickhouse-server"
static_configs:
- targets:
# for serv in {01..11}; do echo clickhouse-server-$serv:8001 ;done
- "clickhouse-server-01:8001"
- "clickhouse-server-02:8001"
- "clickhouse-server-03:8001"
- "clickhouse-server-04:8001"
- "clickhouse-server-05:8001"
- "clickhouse-server-06:8001"
- "clickhouse-server-07:8001"
- "clickhouse-server-08:8001"
- "clickhouse-server-09:8001"
- "clickhouse-server-10:8001"
- "clickhouse-server-11:8001"
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
version: "3.9"

services:
# Prometheus Container
prometheus:
image: "prom/prometheus"
hostname: prometheus-svc-01
container_name: prometheus-svc-01
volumes:
- ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- 9090:9090 # http port

# Server-1
clickhouse-server-01:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-01
hostname: ch01
ports:
- 18123:8123
volumes:
- ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs
environment:
- SHARD=001
- KEEPER_SERVER_ID=1001
# Server-2
clickhouse-server-02:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-02
hostname: ch02
volumes:
- ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs
environment:
- SHARD=002
- KEEPER_SERVER_ID=2002
# Server-3
clickhouse-server-03:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-03
hostname: ch03
volumes:
- ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs
environment:
- SHARD=003
- KEEPER_SERVER_ID=3003
# Server-4
clickhouse-server-04:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-04
hostname: ch04
environment:
- SHARD=001
# Server-5
clickhouse-server-05:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-05
hostname: ch05
environment:
- SHARD=002
# Server-6
clickhouse-server-06:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-06
hostname: ch06
environment:
- SHARD=003
# Server-7
clickhouse-server-07:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-07
hostname: ch07
environment:
- SHARD=001
# Server-8
clickhouse-server-08:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-08
hostname: ch08
environment:
- SHARD=002
# Server-9
clickhouse-server-09:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-09
hostname: ch09
environment:
- SHARD=003
# Server-10
clickhouse-server-10:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-10
hostname: ch10
volumes:
- ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs
environment:
- KEEPER_SERVER_ID=1000
# Server-11
clickhouse-server-11:
extends:
file: clickhouse-service.yml
service: clickhouse
container_name: clickhouse-server-11
hostname: ch11
volumes:
- ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs
environment:
- KEEPER_SERVER_ID=1100
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# ClickHouse - Replication using ClickHouse Keeper

An example of replicating data in ClickHouse using clickhouse-keeper. In the following example, there are total of `11` nodes. There are `5` nodes forming a clickhouse-keeper coordination system and `9` nodes as part of the `cluster_hits` data cluster.

The cluster is monitored using [Prometheus](https://prometheus.io/) with each server sharing server metrics on port `8001`.

## Testing

To test the ClickHouse behaviour, do the following:

```bash
# run the containers
docker-compose up --detach --build

# ssh to clickhouse-server-01 (or any of other servers)
# and use the clickhouse-client to run the queries
# under `sql` which maps to `/ch-replica-sql` in the container
docker exec -it clickhouse-server-01 bash

# debug command (to debug configs): `docker-compose run clickhouse-server-01 bash`

clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/1-*.sql
clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/2-*.sql

# see the tables being created on other servers

# add more data and see how the cluster behaves (for example SMT)
clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/3-insert.sql

# get results
clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/4-results.sql

# get state from keeper
clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/5-keeper.sql
# Check both replicated and unreplicated SMTs on all server
# The unreplicated SMT would only have data on the insert server and not on the replica servers, since Materialized Views are triggered
# only on insert and NOT on replication.

```



## Cleanup

```sh
# Exit the container and cleanup
docker-compose down --volumes

# Use `docker-compose down --rmi all --volumes` with above to images as well
# Remove everything (and remove volumes). Networks are not removed here.
docker-compose rm --force --stop -v

# if the container is already stopped
docker-compose rm --force -v

```
Loading