diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/clickhouse-service.yml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/clickhouse-service.yml new file mode 100644 index 0000000..4b6d11a --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/clickhouse-service.yml @@ -0,0 +1,16 @@ +version: "3.9" + +services: + # Generic ClickHouse Service + clickhouse: + image: "clickhouse/clickhouse-server:22.10" + ulimits: + nofile: + soft: 262144 + hard: 262144 + volumes: + - ./configs/clickhouse/config_overrides.xml:/etc/clickhouse-server/config.d/config_overrides.xml # config overrides on default + - ./configs/clickhouse/keeper.xml:/etc/clickhouse-server/config.d/keeper.xml # zookeeper/keeper client configs + - ./configs/clickhouse/clusters.xml:/etc/clickhouse-server/config.d/clusters.xml # cluster configs + - ./configs/clickhouse/macros.xml:/etc/clickhouse-server/config.d/macros.xml # macros configs + - ./sql:/ch-replica-sql # sql files diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/clusters.xml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/clusters.xml new file mode 100644 index 0000000..c3c979b --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/clusters.xml @@ -0,0 +1,12 @@ + + + + + + + /clickhouse/discovery/cluster_hits + + + + + diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/config_overrides.xml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/config_overrides.xml new file mode 100644 index 0000000..a275d83 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/config_overrides.xml @@ -0,0 +1,21 @@ + + + + + + + + + + /clickhouse/tables/{database}.{table}/{shard} + + + + /metrics + 8001 + true + true + true + + + diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper.xml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper.xml new file mode 100644 index 0000000..3504c70 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper.xml @@ -0,0 +1,16 @@ + + + + clickhouse-server-01 + 9181 + + + clickhouse-server-02 + 9181 + + + clickhouse-server-03 + 9181 + + + diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper_server.xml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper_server.xml new file mode 100644 index 0000000..62fcc1a --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/keeper_server.xml @@ -0,0 +1,52 @@ + + + + + 9181 + + + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + + 5000 + 10000 + 75 + + + + + + false + + 1001 + clickhouse-server-01 + 9234 + + + 2002 + clickhouse-server-02 + 9234 + + + 3003 + clickhouse-server-03 + 9234 + + + 1000 + clickhouse-server-10 + 9234 + 1 + + + 1100 + clickhouse-server-11 + 9234 + + + + diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/macros.xml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/macros.xml new file mode 100644 index 0000000..e367725 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/clickhouse/macros.xml @@ -0,0 +1,7 @@ + + + neverland + + + + diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/prometheus/prometheus.yml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/prometheus/prometheus.yml new file mode 100644 index 0000000..675aca1 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/configs/prometheus/prometheus.yml @@ -0,0 +1,46 @@ +# Global config +global: + scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: "prometheus" + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ["localhost:9090"] + + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: "clickhouse-server" + static_configs: + - targets: + # for serv in {01..11}; do echo clickhouse-server-$serv:8001 ;done + - "clickhouse-server-01:8001" + - "clickhouse-server-02:8001" + - "clickhouse-server-03:8001" + - "clickhouse-server-04:8001" + - "clickhouse-server-05:8001" + - "clickhouse-server-06:8001" + - "clickhouse-server-07:8001" + - "clickhouse-server-08:8001" + - "clickhouse-server-09:8001" + - "clickhouse-server-10:8001" + - "clickhouse-server-11:8001" diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/docker-compose.yml b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/docker-compose.yml new file mode 100644 index 0000000..c4ae4ab --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/docker-compose.yml @@ -0,0 +1,127 @@ +version: "3.9" + +services: + # Prometheus Container + prometheus: + image: "prom/prometheus" + hostname: prometheus-svc-01 + container_name: prometheus-svc-01 + volumes: + - ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + ports: + - 9090:9090 # http port + + # Server-1 + clickhouse-server-01: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-01 + hostname: ch01 + ports: + - 18123:8123 + volumes: + - ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs + environment: + - SHARD=001 + - KEEPER_SERVER_ID=1001 + # Server-2 + clickhouse-server-02: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-02 + hostname: ch02 + volumes: + - ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs + environment: + - SHARD=002 + - KEEPER_SERVER_ID=2002 + # Server-3 + clickhouse-server-03: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-03 + hostname: ch03 + volumes: + - ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs + environment: + - SHARD=003 + - KEEPER_SERVER_ID=3003 + # Server-4 + clickhouse-server-04: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-04 + hostname: ch04 + environment: + - SHARD=001 + # Server-5 + clickhouse-server-05: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-05 + hostname: ch05 + environment: + - SHARD=002 + # Server-6 + clickhouse-server-06: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-06 + hostname: ch06 + environment: + - SHARD=003 + # Server-7 + clickhouse-server-07: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-07 + hostname: ch07 + environment: + - SHARD=001 + # Server-8 + clickhouse-server-08: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-08 + hostname: ch08 + environment: + - SHARD=002 + # Server-9 + clickhouse-server-09: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-09 + hostname: ch09 + environment: + - SHARD=003 + # Server-10 + clickhouse-server-10: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-10 + hostname: ch10 + volumes: + - ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs + environment: + - KEEPER_SERVER_ID=1000 + # Server-11 + clickhouse-server-11: + extends: + file: clickhouse-service.yml + service: clickhouse + container_name: clickhouse-server-11 + hostname: ch11 + volumes: + - ./configs/clickhouse/keeper_server.xml:/etc/clickhouse-server/config.d/keeper_server.xml # clickhouse-keeper configs + environment: + - KEEPER_SERVER_ID=1100 diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/readme.md b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/readme.md new file mode 100644 index 0000000..adca513 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/readme.md @@ -0,0 +1,56 @@ +# ClickHouse - Replication using ClickHouse Keeper + +An example of replicating data in ClickHouse using clickhouse-keeper. In the following example, there are total of `11` nodes. There are `5` nodes forming a clickhouse-keeper coordination system and `9` nodes as part of the `cluster_hits` data cluster. + +The cluster is monitored using [Prometheus](https://prometheus.io/) with each server sharing server metrics on port `8001`. + +## Testing + +To test the ClickHouse behaviour, do the following: + +```bash +# run the containers +docker-compose up --detach --build + +# ssh to clickhouse-server-01 (or any of other servers) +# and use the clickhouse-client to run the queries +# under `sql` which maps to `/ch-replica-sql` in the container +docker exec -it clickhouse-server-01 bash + +# debug command (to debug configs): `docker-compose run clickhouse-server-01 bash` + +clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/1-*.sql +clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/2-*.sql + +# see the tables being created on other servers + +# add more data and see how the cluster behaves (for example SMT) +clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/3-insert.sql + +# get results +clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/4-results.sql + +# get state from keeper +clickhouse-client --multiline --multiquery --format Pretty < /ch-replica-sql/5-keeper.sql +# Check both replicated and unreplicated SMTs on all server +# The unreplicated SMT would only have data on the insert server and not on the replica servers, since Materialized Views are triggered +# only on insert and NOT on replication. + +``` + + + +## Cleanup + +```sh +# Exit the container and cleanup +docker-compose down --volumes + +# Use `docker-compose down --rmi all --volumes` with above to images as well +# Remove everything (and remove volumes). Networks are not removed here. +docker-compose rm --force --stop -v + +# if the container is already stopped +docker-compose rm --force -v + +``` diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/1-schema-base-tables.sql b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/1-schema-base-tables.sql new file mode 100644 index 0000000..c976170 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/1-schema-base-tables.sql @@ -0,0 +1,74 @@ +/* Decrease DDL timeout to 5 seconds in case some server is down. */ +SET distributed_ddl_task_timeout = 5; + +/* Create a database for storage table */ +CREATE DATABASE IF NOT EXISTS test; + +/* Create a separate database for Materialized Views in case you want to recreate them */ +CREATE DATABASE IF NOT EXISTS test_mvs; + +/* Create a database for views (for search) */ +CREATE DATABASE IF NOT EXISTS test_views; + +/* Create a local storage table */ +/* Note: using `{table}` in Replicated can cause issues when doing ALTER RENAME on the table since it will expand to different path */ +CREATE TABLE IF NOT EXISTS test.test_table_local +( + `insert_timestamp` DateTime, + `val` UInt64 +) +ENGINE = ReplicatedMergeTree +PARTITION BY toYearWeek(insert_timestamp) +ORDER BY insert_timestamp +TTL insert_timestamp + toIntervalDay(120); + +/* Create a SummingMergeTree (SMT) for hourly data */ +CREATE TABLE IF NOT EXISTS test.test_table_hourly_smt_local +( + `insert_timestamp` DateTime, + `val` UInt64, + `total` UInt64 +) +ENGINE = ReplicatedSummingMergeTree +PARTITION BY toYearWeek(insert_timestamp) +ORDER BY (insert_timestamp, val) +TTL insert_timestamp + toIntervalDay(120); + +/* Create a Materialized View from the base ingest table to the SMT */ +CREATE MATERIALIZED VIEW IF NOT EXISTS test_mvs.test_table_hourly_smt_mv_local TO test.test_table_hourly_smt_local AS +SELECT + toStartOfHour(insert_timestamp) AS insert_timestamp, + val, + count() as total +FROM test.test_table_local +GROUP BY + insert_timestamp, + val; + +/* Create an Unreplicated SummingMergeTree (SMT) for hourly data */ +/* Even though the table itself is unreplicated, we use the existing cluster (which has configs for replication but they don't affect DDLs) */ +CREATE TABLE IF NOT EXISTS test.test_table_hourly_unreplicated_smt_local +( + `insert_timestamp` DateTime, + `val` UInt64, + `total` UInt64 +) +ENGINE = SummingMergeTree() +PARTITION BY toYearWeek(insert_timestamp) +ORDER BY (insert_timestamp, val) +TTL insert_timestamp + toIntervalDay(120); + +/* Create a Materialized View from the base ingest table to the Unreplicated SMT */ +CREATE MATERIALIZED VIEW IF NOT EXISTS test_mvs.test_table_hourly_unreplicated_smt_mv_local TO test.test_table_hourly_unreplicated_smt_local AS +SELECT + toStartOfHour(insert_timestamp) AS insert_timestamp, + val, + count() as total +FROM test.test_table_local +GROUP BY + insert_timestamp, + val; + + + +;; diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/2-schema-distributed-tables.sql b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/2-schema-distributed-tables.sql new file mode 100644 index 0000000..4629619 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/2-schema-distributed-tables.sql @@ -0,0 +1,16 @@ +/* Decrease DDL timeout to 5 seconds in case some server is down. */ +SET distributed_ddl_task_timeout = 5; + +/* Create a database to store the distributed table */ +CREATE DATABASE IF NOT EXISTS test; + +/* Create a distributed table for raw data */ +/* Note: we don't specify a sharding key here */ +CREATE TABLE IF NOT EXISTS test.test_table AS test.test_table_local +ENGINE = Distributed(cluster_hits, test, test_table_local); + +/* Create a distributed table for SMT */ +CREATE TABLE IF NOT EXISTS test.test_table_hourly_smt AS test.test_table_hourly_smt_local +ENGINE = Distributed(cluster_hits, test, test_table_hourly_smt_local); + +;; diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/3-insert.sql b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/3-insert.sql new file mode 100644 index 0000000..e7b20e6 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/3-insert.sql @@ -0,0 +1,10 @@ + +/* Insert random data into the base table */ +INSERT INTO test.test_table_local SELECT + now(), + rand() % 97 /* Random numbers in range [0, 97) */ +FROM system.numbers +LIMIT 500; + + +;; diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/4-results.sql b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/4-results.sql new file mode 100644 index 0000000..0949b84 --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/4-results.sql @@ -0,0 +1,10 @@ + +/* Get Results from inserts */ + +SELECT * from test.test_table_hourly_smt; + +SELECT count() from test.test_table_hourly_smt; + +SELECT uniq(val), uniqCombined(val) from test.test_table; + +;; diff --git a/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/5-keeper.sql b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/5-keeper.sql new file mode 100644 index 0000000..b5d5cdf --- /dev/null +++ b/clickhouse/replicated-with-clickhouse-keeper-and-service-discovery/sql/5-keeper.sql @@ -0,0 +1,31 @@ + +/* Get Information from the keeper */ + +SELECT * FROM system.zookeeper WHERE path='/clickhouse'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_hourly_smt_local'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_hourly_smt_local/001'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas/ch04'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas/ch04/parts'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas/ch04/queue'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas/ch04/is_active'; + +SELECT * FROM system.zookeeper WHERE path='/clickhouse/tables/test.test_table_local/001/replicas/ch04/is_lost'; + +/* Dropping Dead Replica: https://clickhouse.com/docs/en/sql-reference/statements/system/#query_language-system-drop-replica */ +/* Live replicas use `DROP TABLE` */ + +;;