Skip to content

Commit ebd6347

Browse files
authored
Improve the dump speed using LOAD DATA INFILE statement (#14)
1 parent 7ee8f40 commit ebd6347

File tree

17 files changed

+678
-59
lines changed

17 files changed

+678
-59
lines changed

.github/workflows/golang-ci.lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ jobs:
1919
- name: golangci-lint
2020
uses: golangci/golangci-lint-action@v2
2121
with:
22-
version: v1.32
22+
version: v1.32.2

.golangci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ linters:
7070
- gomnd
7171

7272
service:
73-
golangci-lint-version: 1.32.x # use the fixed version to not introduce new linters unexpectedly
73+
golangci-lint-version: 1.32.2

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,26 @@ build: gen
1515
go build ${LDFLAGS} -o bin/${BINARY} cmd/mymy/main.go
1616
go build -buildmode=plugin -o bin/plugins/mymy_filter.so cmd/plugins/filter/main.go
1717
cp cmd/plugins/filter/cfg.yml bin/plugins/filter.plugin.yml
18+
go build -o bin/dump_benchmark cmd/dump_benchmark/main.go
1819

1920
.PHONY: lint
2021
lint:
2122
golangci-lint run -v ./...
2223

24+
.PHONY: fmt
25+
fmt:
26+
go fmt ./...
27+
2328
.PHONY: run
2429
run: build
2530
bin/${BINARY} -config=config/dev.conf.yml
2631

32+
.PHONY: run_dump_benchmark
33+
run_dump_benchmark: build
34+
rm -rf bin/tmp/*
35+
rm -f bin/state.info
36+
bin/dump_benchmark -config=config/dump_benchmark.conf.yml
37+
2738
.PHONY: run_short_tests
2839
run_short_tests:
2940
go test -count=1 -v -short ./...

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ GRANT PROCESS, RELOAD, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'repl'@'%
3333
FLUSH PRIVILEGES;
3434
```
3535

36+
MyMy supports two dump options:
37+
38+
1. Mutate and import the initial data row by row. It is safe to use but it is really slow.
39+
2. Use the `LOAD DATA LOCAL INFILE` statement which faster significantly but requires enabling the option
40+
`local_infile` on the database side. Read more [here](https://dev.mysql.com/doc/refman/8.0/en/load-data.html).
41+
42+
To use the second approach set option `load_in_file_enabled` to true.
43+
3644
## API
3745

3846
Replicator exposes several debug endpoints:

cmd/dump_benchmark/main.go

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"flag"
6+
"os"
7+
"time"
8+
9+
"github.com/city-mobil/go-mymy/internal/bridge"
10+
"github.com/city-mobil/go-mymy/internal/client"
11+
"github.com/city-mobil/go-mymy/internal/config"
12+
_ "github.com/go-sql-driver/mysql"
13+
"github.com/rs/zerolog"
14+
"github.com/rs/zerolog/log"
15+
)
16+
17+
var (
18+
configPath = flag.String("config", "", "Config file path")
19+
)
20+
21+
var (
22+
logger zerolog.Logger
23+
)
24+
25+
const (
26+
sourceRows = 20000
27+
)
28+
29+
func main() {
30+
flag.Parse()
31+
cfg, err := config.ReadFromFile(*configPath)
32+
if err != nil {
33+
log.Fatal().Err(err).Msgf("failed to read config")
34+
}
35+
36+
logger = zerolog.New(os.Stdout).Level(zerolog.DebugLevel).With().Timestamp().Logger()
37+
38+
factory := bridge.NewEventHandlerPluginFactory(cfg.App.PluginDir)
39+
b, err := bridge.New(cfg, factory, logger)
40+
if err != nil {
41+
logger.Fatal().Err(err).Msg("could not establish MySQL bridge")
42+
}
43+
44+
sOpts := cfg.Replication.SourceOpts
45+
sClient, err := client.New(&client.Config{
46+
Addr: sOpts.Addr,
47+
User: sOpts.User,
48+
Password: sOpts.Password,
49+
Database: sOpts.Database,
50+
Charset: sOpts.Charset,
51+
MaxRetries: 2,
52+
})
53+
if err != nil {
54+
logger.Fatal().Err(err).Msg("could not connect to source db")
55+
}
56+
57+
uOpts := cfg.Replication.UpstreamOpts
58+
uClient, err := client.New(&client.Config{
59+
Addr: uOpts.Addr,
60+
User: uOpts.User,
61+
Password: uOpts.Password,
62+
Database: uOpts.Database,
63+
Charset: uOpts.Charset,
64+
MaxRetries: uOpts.MaxRetries,
65+
MaxOpenConns: uOpts.MaxOpenConns,
66+
MaxIdleConns: uOpts.MaxIdleConns,
67+
ConnectTimeout: uOpts.ConnectTimeout,
68+
WriteTimeout: uOpts.WriteTimeout,
69+
})
70+
if err != nil {
71+
logger.Fatal().Err(err).Msg("could not connect to upstream db")
72+
}
73+
74+
initDBs(sClient, uClient)
75+
defer truncateTables(sClient, uClient)
76+
77+
start := time.Now()
78+
go func() {
79+
<-b.WaitDumpDone()
80+
81+
end := time.Since(start)
82+
logger.Info().Msgf("dump finished in %d ms", end.Milliseconds())
83+
84+
cerr := b.Close()
85+
if cerr != nil {
86+
logger.Error().Err(cerr).Msg("got error on closing replicator")
87+
}
88+
}()
89+
90+
err = b.Run()
91+
if err != nil {
92+
logger.Error().Err(err).Msg("got sync error")
93+
}
94+
95+
if !hasSyncedData(uClient) {
96+
logger.Error().Err(err).Msg("not enough data in the upstream database")
97+
}
98+
}
99+
100+
func initDBs(sClient, uClient *client.SQLClient) {
101+
truncateTables(sClient, uClient)
102+
103+
query := "INSERT INTO city.users (id, username, password, name, email) VALUES (?, ?, ?, ?, ?)"
104+
for i := 1; i <= sourceRows; i++ {
105+
_, err := sClient.Exec(context.Background(), query, i, "bob", "12345", "Bob", "[email protected]")
106+
if err != nil {
107+
logger.Fatal().Err(err).Msgf("could not insert the row №%d", i)
108+
}
109+
}
110+
}
111+
112+
func truncateTables(sClient, uClient *client.SQLClient) {
113+
_, err := sClient.Exec(context.Background(), "TRUNCATE city.users")
114+
if err != nil {
115+
logger.Fatal().Err(err).Msg("could not truncate source table")
116+
}
117+
118+
_, err = uClient.Exec(context.Background(), "TRUNCATE town.clients")
119+
if err != nil {
120+
logger.Fatal().Err(err).Msg("could not truncate upstream table")
121+
}
122+
}
123+
124+
func hasSyncedData(upstream *client.SQLClient) bool {
125+
res := upstream.QueryRow(context.Background(), "SELECT COUNT(*) FROM town.clients")
126+
127+
var cnt int
128+
if err := res.Scan(&cnt); err != nil {
129+
log.Fatal().Err(err).Msg("could not fetch rows count in upstream")
130+
}
131+
132+
return cnt == sourceRows
133+
}

cmd/plugins/filter/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//nolint:unused,deadcode
1+
//nolint:deadcode,unused
22
package main
33

44
import (

config/dev.conf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ replication:
2020
source:
2121
dump:
2222
exec_path: '/usr/bin/mysqldump'
23+
load_in_file_enabled: false
24+
load_in_file_flush_threshold: 10000
2325
skip_master_data: false
2426
extra_options:
2527
- '--column-statistics=0'

config/dump_benchmark.conf.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
app:
2+
listen_addr: ':8081'
3+
data_file: 'state.info'
4+
plugin_dir: 'plugins'
5+
health:
6+
seconds_behind_master: 5
7+
logging:
8+
level: 'info'
9+
syslog_enabled: false
10+
file_enabled: true
11+
file_name: 'logs/mymy.dev.log'
12+
file_max_size: 256
13+
file_max_backups: 3
14+
file_max_age: 5
15+
16+
replication:
17+
server_id: 17389
18+
gtid_mode: true
19+
20+
source:
21+
dump:
22+
load_in_file_enabled: false
23+
load_in_file_flush_threshold: 10000
24+
skip_master_data: false
25+
extra_options:
26+
- '--column-statistics=0'
27+
addr: '127.0.0.1:13306'
28+
user: 'repl'
29+
password: 'repl'
30+
database: 'city'
31+
charset: 'utf8'
32+
33+
upstream:
34+
addr: '127.0.0.1:13307'
35+
user: 'repl'
36+
password: 'repl'
37+
database: 'town'
38+
charset: 'utf8'
39+
max_retries: 3
40+
max_open_conns: 500
41+
max_idle_conns: 500
42+
connect_timeout: '500ms'
43+
write_timeout: '500ms'
44+
45+
rules:
46+
- source:
47+
table: 'users'
48+
upstream:
49+
plugin:
50+
name: 'mymy_filter'
51+
config: 'plugins/filter.plugin.yml'

config/mymy.conf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ replication:
2020
source:
2121
dump:
2222
exec_path: '/usr/bin/mysqldump'
23+
load_in_file_enabled: false
24+
load_in_file_flush_threshold: 10000
2325
skip_master_data: false
2426
extra_options:
2527
- '--column-statistics=0'

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ services:
2121
- MYSQL_USER=repl
2222
- MYSQL_PASSWORD=repl
2323
- MYSQL_ROOT_PASSWORD=root_pwd
24-
command: --bind-address=0.0.0.0 --ssl=0 --default-authentication-plugin=mysql_native_password
24+
command: --bind-address=0.0.0.0 --ssl=0 --default-authentication-plugin=mysql_native_password --local_infile=1
2525
volumes:
2626
- ./docker/upstream/init.d:/docker-entrypoint-initdb.d
2727
ports:

0 commit comments

Comments
 (0)