-
Notifications
You must be signed in to change notification settings - Fork 61
test: fix flaky test #453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
test: fix flaky test #453
Conversation
0b1890e
to
e7c2e9c
Compare
e002994
to
76e4a2f
Compare
test_helpers/pool_helper.go
Outdated
// Wait for the role to be applied. | ||
for { | ||
time.Sleep(10 * time.Millisecond) | ||
|
||
var reason string | ||
|
||
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | ||
switch { | ||
case err != nil: | ||
reason = fmt.Sprintf("failed to get box.info: %s", err) | ||
case len(data) < 1: | ||
reason = "box.info is empty" | ||
default: | ||
status, statusFound := data[0].(map[interface{}]interface{})["status"] | ||
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | ||
switch { | ||
case !statusFound: | ||
reason = "box.info.status is missing" | ||
case status != "running": | ||
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | ||
case !readonlyFound: | ||
reason = "box.info.ro is missing" | ||
case readonly != isReplica: | ||
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | ||
} | ||
} | ||
|
||
if len(reason) == 0 { | ||
break | ||
} | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | ||
default: | ||
continue | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is more readable version of this code, imo:
// Wait for the role to be applied. | |
for { | |
time.Sleep(10 * time.Millisecond) | |
var reason string | |
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | |
switch { | |
case err != nil: | |
reason = fmt.Sprintf("failed to get box.info: %s", err) | |
case len(data) < 1: | |
reason = "box.info is empty" | |
default: | |
status, statusFound := data[0].(map[interface{}]interface{})["status"] | |
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | |
switch { | |
case !statusFound: | |
reason = "box.info.status is missing" | |
case status != "running": | |
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | |
case !readonlyFound: | |
reason = "box.info.ro is missing" | |
case readonly != isReplica: | |
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | |
} | |
} | |
if len(reason) == 0 { | |
break | |
} | |
select { | |
case <-ctx.Done(): | |
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | |
default: | |
continue | |
} | |
} | |
var reason string | |
// Wait for the role to be applied. | |
for { | |
select { | |
case <-time.After(10 * time.Millisecond): | |
case <-ctx.Done(): | |
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason) | |
} | |
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get() | |
switch { | |
case err != nil: | |
reason = fmt.Sprintf("failed to get box.info: %s", err) | |
continue | |
case len(data) < 1: | |
reason = "box.info is empty" | |
continue | |
} | |
status, statusFound := data[0].(map[interface{}]interface{})["status"] | |
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"] | |
switch { | |
case !statusFound: | |
reason = "box.info.status is missing" | |
case status != "running": | |
reason = fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status) | |
case !readonlyFound: | |
reason = "box.info.ro is missing" | |
case readonly != isReplica: | |
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica) | |
default: | |
return nil | |
} | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be further optimised for readability with something like that:
func checkInfoStatus(data []interface{}) string {
status, statusFound := data[0].(map[interface{}]interface{})["status"]
switch {
case !statusFound:
return "box.info.status is missing"
case status != "running":
return fmt.Sprintf("box.info.status='%s' (waiting for 'running')", status)
default:
return ""
}
}
func checkInfoRO(data []interface{}, isReplica bool) string {
readonly, readonlyFound := data[0].(map[interface{}]interface{})["ro"]
switch {
case !readonlyFound:
reason = "box.info.ro is missing"
case readonly != isReplica:
reason = fmt.Sprintf("box.info.ro='%v' (waiting for '%v')", readonly, isReplica)
default:
return nil
}
}
func SetInstanceRO(ctx context.Context, dialer tarantool.Dialer, connOpts tarantool.Opts,
isReplica bool) error {
conn, err := tarantool.Connect(ctx, dialer, connOpts)
if err != nil {
return err
}
defer conn.Close()
req := tarantool.NewCallRequest("box.cfg").
Args([]interface{}{map[string]bool{"read_only": isReplica}})
if _, err := conn.Do(req).Get(); err != nil {
return err
}
var reason string
// Wait for the role to be applied.
for {
select {
case <-time.After(10 * time.Millisecond):
case <-ctx.Done():
return fmt.Errorf("%w: failed to apply role, the last reason: %s", ctx.Err(), reason)
}
data, err := conn.Do(tarantool.NewCallRequest("box.info")).Get()
switch {
case err != nil:
reason = fmt.Sprintf("failed to get box.info: %s", err)
case len(data) < 1:
reason = "box.info is empty"
case checkInfoStatus(data) != "":
reason = checkInfoStatus(data)
case checkInfoRO(data) != "":
reason = checkInfoRO(data)
default:
return nil
}
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rearranged in a slightly different manner that seems more readable.
test_helpers/pool_helper.go
Outdated
errs := make([]error, len(dialers)) | ||
var wg sync.WaitGroup | ||
for i, dialer := range dialers { | ||
ctx, cancel := GetConnectContext() | ||
err := SetInstanceRO(ctx, dialer, connOpts, roles[i]) | ||
cancel() | ||
if err != nil { | ||
return err | ||
} | ||
wg.Add(1) | ||
// Pass loop variables to avoid its closure. | ||
go func(i int, dialer tarantool.Dialer) { | ||
defer wg.Done() | ||
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i]) | ||
}(i, dialer) | ||
} | ||
wg.Wait() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually it's better not to use wg.Add()
in a cycle, to avoid possible race conditions if starting goroutine and awaiter are different ones.
There's no problem in that code, but rule of thumb is to avoid that kind of problems without double-checking: if we know number of workers before running them - use wg.Add(count)
.
errs := make([]error, len(dialers))
var wg sync.WaitGroup
wg.Add(len(dialers))
for i, dialer := range dialers {
// Pass loop variables to avoid its closure.
go func(i int, dialer tarantool.Dialer) {
defer wg.Done()
errs[i] = SetInstanceRO(ctx, dialer, connOpts, roles[i])
}(i, dialer)
}
wg.Wait()
it's just a nitpicking, but it can save some time in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Theoretically, I understand these changes and believe they should work, but I can't guarantee it.
It's up to you to agree or disagree with style comments, since it's test code.
76e4a2f
to
537f38f
Compare
test_helpers/pool_helper.go
Outdated
ctx, cancel := GetConnectContext() | ||
defer cancel() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks better to add a context param to the SetClusterRO
call instead of the hidden-confusing-context-timeout.
We could do that since we don't guarantee a backward compatibility for the test_helper
package.
func SetClusterRO(dialers []tarantool.Dialer, connOpts tarantool.Opts,
roles []bool) error {
->
func SetClusterRO(ctx context.Context, dialers []tarantool.Dialer,
connOpts tarantool.Opts, roles []bool) error {
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand that we can modify API here, but it doesn't seem to be the case.
Context is not a kind of global entity, it is always created in some function and then passed into some function that accepts context. Does this make call of such a context-creator-function as hidden-confusing-context? For me context here is just a kind of implementation details. It doesn't seem that caller need to know about it and I see no problem that he doesn't.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a matter of expected behavior from a call from a code-reader point of view. I insist on this change and the addition of a cotext as a first call argument. Please, add it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
537f38f
to
f443836
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please see unresolved comment above.
df9cc0f
to
92cbe14
Compare
General context approach is used in multi-instances functions -- context is passed as a first argument.
Helper method that performs initial assigning of master/replica roles and is widely used in ConnectionPool tests was adjusted to wait for the roles to be applied successfully. Prior to this patch it doesn't, so sometimes subsequent test code might work unexpectedly (the problem was caught with TestConnectionHandlerOpenUpdateClose) Closes #452
92cbe14
to
f859fca
Compare
Helper method that performs initial assigning of master/replica roles and is widely used in
ConnectionPool
tests was adjusted to wait for the roles to be applied successfully.Prior to this patch it doesn't, so sometimes subsequent test code might work unexpectedly (the problem was caught with
TestConnectionHandlerOpenUpdateClose
)Related issues:
Closes #452