Skip to content

Commit 9b74331

Browse files
committed
sql: Deflake TestTxnObeysTableModificationTime
This commit rewrites this test and a local execution with `--race --stress` succeeded for at least 10 minutes. Release note: None
1 parent 509f8c8 commit 9b74331

File tree

2 files changed

+80
-196
lines changed

2 files changed

+80
-196
lines changed

pkg/sql/catalog/lease/BUILD.bazel

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ go_test(
7777
embed = [":lease"],
7878
deps = [
7979
"//pkg/base",
80+
"//pkg/jobs",
8081
"//pkg/jobs/jobspb",
8182
"//pkg/keys",
8283
"//pkg/kv",
@@ -105,7 +106,6 @@ go_test(
105106
"//pkg/sql/rowenc/keyside",
106107
"//pkg/sql/sem/tree",
107108
"//pkg/sql/sessiondata",
108-
"//pkg/sql/sqltestutils",
109109
"//pkg/sql/tests",
110110
"//pkg/sql/types",
111111
"//pkg/storage",
@@ -114,7 +114,6 @@ go_test(
114114
"//pkg/testutils/skip",
115115
"//pkg/testutils/sqlutils",
116116
"//pkg/testutils/testcluster",
117-
"//pkg/util",
118117
"//pkg/util/admission",
119118
"//pkg/util/encoding",
120119
"//pkg/util/hlc",

pkg/sql/catalog/lease/lease_test.go

Lines changed: 79 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/cockroachdb/cockroach-go/v2/crdb"
2626
"github.com/cockroachdb/cockroach/pkg/base"
27+
"github.com/cockroachdb/cockroach/pkg/jobs"
2728
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
2829
"github.com/cockroachdb/cockroach/pkg/keys"
2930
"github.com/cockroachdb/cockroach/pkg/kv"
@@ -49,15 +50,13 @@ import (
4950
"github.com/cockroachdb/cockroach/pkg/sql/rowenc/keyside"
5051
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
5152
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
52-
"github.com/cockroachdb/cockroach/pkg/sql/sqltestutils"
5353
"github.com/cockroachdb/cockroach/pkg/sql/tests"
5454
"github.com/cockroachdb/cockroach/pkg/sql/types"
5555
"github.com/cockroachdb/cockroach/pkg/testutils"
5656
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
5757
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
5858
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
5959
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
60-
"github.com/cockroachdb/cockroach/pkg/util"
6160
"github.com/cockroachdb/cockroach/pkg/util/encoding"
6261
"github.com/cockroachdb/cockroach/pkg/util/hlc"
6362
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
@@ -981,219 +980,105 @@ func TestTxnObeysTableModificationTime(t *testing.T) {
981980
defer leaktest.AfterTest(t)()
982981

983982
params := createTestServerParams()
984-
params.ScanMaxIdleTime = time.Millisecond
985-
s, sqlDB, kvDB := serverutils.StartServer(t, params)
983+
params.Knobs.JobsTestingKnobs = jobs.NewTestingKnobsWithShortIntervals()
984+
s, sqlDB, _ := serverutils.StartServer(t, params)
986985
defer s.Stopper().Stop(context.Background())
987986

988-
// Disable strict GC TTL enforcement because we're going to shove a zero-value
989-
// TTL into the system with AddImmediateGCZoneConfig.
990-
defer sqltestutils.DisableGCTTLStrictEnforcement(t, sqlDB)()
991-
992-
_, err := sqlDB.Exec(`SET CLUSTER SETTING sql.gc_job.wait_for_gc.interval = '1s';`)
993-
require.NoError(t, err)
994-
995-
// Refresh protected timestamp cache immediately to make MVCC GC queue to
996-
// process GC immediately.
997-
_, err = sqlDB.Exec(`SET CLUSTER SETTING kv.protectedts.poll_interval = '1s';`)
998-
require.NoError(t, err)
999-
1000-
// This test intentionally relies on uncontended transactions not being pushed
1001-
// in order to verify what it claims to verify. The default closed timestamp
1002-
// interval in 20.1+ is 3s. When run under the race detector, the process can
1003-
// stall for upwards of 3s leading to the write transaction getting pushed.
1004-
//
1005-
// In order to mitigate that push, we increase the target_duration when the
1006-
// test is run under race.
1007-
if util.RaceEnabled || skip.Stress() {
1008-
_, err := sqlDB.Exec(
1009-
"SET CLUSTER SETTING kv.closed_timestamp.target_duration = '120s'")
1010-
require.NoError(t, err)
1011-
}
1012-
1013-
if _, err := sqlDB.Exec(`
987+
_, err := sqlDB.Exec(`
1014988
CREATE DATABASE t;
1015989
CREATE TABLE t.kv (k CHAR PRIMARY KEY, v CHAR);
1016990
INSERT INTO t.kv VALUES ('a', 'b');
1017-
`); err != nil {
1018-
t.Fatal(err)
1019-
}
1020-
tableDesc := desctestutils.TestingGetPublicTableDescriptor(kvDB, keys.SystemSQLCodec, "t", "kv")
1021-
1022-
// A read-write transaction that uses the old version of the descriptor.
1023-
txReadWrite, err := sqlDB.Begin()
1024-
if err != nil {
1025-
t.Fatal(err)
1026-
}
1027-
1028-
// A read-only transaction that uses the old version of the descriptor.
1029-
txRead, err := sqlDB.Begin()
1030-
if err != nil {
1031-
t.Fatal(err)
1032-
}
1033-
1034-
// A write-only transaction that uses the old version of the descriptor.
1035-
txWrite, err := sqlDB.Begin()
1036-
if err != nil {
1037-
t.Fatal(err)
1038-
}
1039-
1040-
// Modify the table descriptor.
1041-
if _, err := sqlDB.Exec(`ALTER TABLE t.kv ADD m CHAR DEFAULT 'z';`); err != nil {
1042-
t.Fatal(err)
1043-
}
1044-
1045-
rows, err := txReadWrite.Query(`SELECT * FROM t.kv`)
1046-
if err != nil {
1047-
t.Fatal(err)
1048-
}
991+
`)
992+
require.NoError(t, err)
1049993

1050-
checkSelectResults := func(rows *gosql.Rows) {
1051-
defer func() {
1052-
if err := rows.Close(); err != nil {
1053-
t.Fatal(err)
1054-
}
1055-
}()
1056-
for rows.Next() {
1057-
// The transaction is unable to see column m.
994+
// requireOneRow ensures `res` contains only one row with two
995+
// columns ("a", "b").
996+
requireOneRow := func(res *gosql.Rows) {
997+
for res.Next() {
1058998
var k, v, m string
1059-
if err := rows.Scan(&k, &v, &m); !testutils.IsError(
1060-
err, "expected 2 destination arguments in Scan, not 3",
1061-
) {
1062-
t.Fatalf("err = %v", err)
1063-
}
1064-
err = rows.Scan(&k, &v)
1065-
if err != nil {
1066-
t.Fatal(err)
1067-
}
1068-
if k != "a" || v != "b" {
1069-
t.Fatalf("didn't find expected row: %s %s", k, v)
1070-
}
999+
require.Error(t, res.Scan(&k, &v, &m))
1000+
require.NoError(t, res.Scan(&k, &v))
1001+
require.Equal(t, "a", k)
1002+
require.Equal(t, "b", v)
10711003
}
1004+
require.NoError(t, res.Close())
10721005
}
10731006

1074-
checkSelectResults(rows)
1075-
1076-
rows, err = txRead.Query(`SELECT * FROM t.kv`)
1077-
if err != nil {
1078-
t.Fatal(err)
1079-
}
1080-
1081-
checkSelectResults(rows)
1082-
1083-
// Read-only transaction commits just fine.
1084-
if err := txRead.Commit(); err != nil {
1085-
t.Fatal(err)
1086-
}
1087-
1088-
// This INSERT will cause the transaction to be pushed,
1089-
// which will be detected when we attempt to Commit() below.
1090-
if _, err := txReadWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`); err != nil {
1091-
t.Fatal(err)
1092-
}
1093-
1094-
checkDeadlineErr := func(err error, t *testing.T) {
1007+
// A helper to assert err is a transaction restart error with an error string
1008+
// that matches the supplied regex.
1009+
requireRestartTransactionErrWithMsg := func(t *testing.T, err error, re string) {
10951010
var pqe (*pq.Error)
10961011
if !errors.As(err, &pqe) || pgcode.MakeCode(string(pqe.Code)) != pgcode.SerializationFailure ||
1097-
!testutils.IsError(err, "RETRY_COMMIT_DEADLINE_EXCEEDED") {
1098-
t.Fatalf("expected deadline exceeded, got: %v", err)
1012+
!testutils.IsError(err, re) {
1013+
t.Fatalf("expected a %v error, got: %v", re, err)
10991014
}
11001015
}
11011016

1102-
// The transaction read at one timestamp and wrote at another so it
1103-
// has to be restarted because the spans read were modified by the backfill.
1104-
checkDeadlineErr(txReadWrite.Commit(), t)
1105-
1106-
// This INSERT will cause the transaction to be pushed transparently,
1107-
// which will be detected when we attempt to Commit() below only because
1108-
// a deadline has been set.
1109-
if _, err := txWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`); err != nil {
1110-
t.Fatal(err)
1111-
}
1112-
1113-
checkDeadlineErr(txWrite.Commit(), t)
1114-
1115-
// Test the deadline exceeded error with a CREATE/DROP INDEX.
1116-
txWrite, err = sqlDB.Begin()
1117-
if err != nil {
1118-
t.Fatal(err)
1119-
}
1120-
txUpdate, err := sqlDB.Begin()
1121-
if err != nil {
1122-
t.Fatal(err)
1123-
}
1124-
1125-
// Modify the table descriptor.
1126-
if _, err := sqlDB.Exec(`CREATE INDEX foo ON t.kv (v)`); err != nil {
1127-
t.Fatal(err)
1128-
}
1129-
1130-
// This INSERT will cause the transaction to be pushed transparently,
1131-
// which will be detected when we attempt to Commit() below only because
1132-
// a deadline has been set.
1133-
if _, err := txWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`); err != nil {
1134-
t.Fatal(err)
1017+
// requireWriteTooOldErr ensures `err` is a WriteTooOldError.
1018+
requireWriteTooOldErr := func(t *testing.T, err error) {
1019+
requireRestartTransactionErrWithMsg(t, err, "WriteTooOldError")
11351020
}
11361021

1137-
checkDeadlineErr(txWrite.Commit(), t)
1138-
1139-
if _, err := txUpdate.Exec(`UPDATE t.kv SET v = 'c' WHERE k = 'a';`); err != nil {
1140-
t.Fatal(err)
1022+
// requireSessionExpiredErr ensures `err` is a liveness session expired error.
1023+
requireSessionExpiredErr := func(t *testing.T, err error) {
1024+
requireRestartTransactionErrWithMsg(t, err, "liveness session expired")
11411025
}
11421026

1143-
checkDeadlineErr(txUpdate.Commit(), t)
1144-
1145-
txWrite, err = sqlDB.Begin()
1146-
if err != nil {
1147-
t.Fatal(err)
1148-
}
1149-
txRead, err = sqlDB.Begin()
1150-
if err != nil {
1151-
t.Fatal(err)
1152-
}
1027+
// A read-write transaction that uses the old version of the descriptor.
1028+
txReadWrite, err := sqlDB.Begin()
1029+
require.NoError(t, err)
1030+
// A read-only transaction that uses the old version of the descriptor.
1031+
txRead, err := sqlDB.Begin()
1032+
require.NoError(t, err)
1033+
// A write-only transaction that uses the old version of the descriptor.
1034+
txWrite, err := sqlDB.Begin()
1035+
require.NoError(t, err)
11531036

11541037
// Modify the table descriptor.
1155-
if _, err := sqlDB.Exec(`DROP INDEX t.kv@foo`); err != nil {
1156-
t.Fatal(err)
1157-
}
1158-
1159-
rows, err = txRead.Query(`SELECT k, v FROM t.kv@foo`)
1160-
if err != nil {
1161-
t.Fatal(err)
1162-
}
1163-
checkSelectResults(rows)
1164-
1165-
// Uses old descriptor and inserts values into index span which
1166-
// will be cleaned up.
1167-
if _, err := txWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`); err != nil {
1168-
t.Fatal(err)
1169-
}
1170-
1171-
if err := txRead.Commit(); err != nil {
1172-
t.Fatal(err)
1173-
}
1174-
if err := txWrite.Commit(); err != nil {
1175-
t.Fatal(err)
1176-
}
1177-
1178-
tableSpan := tableDesc.TableSpan(keys.SystemSQLCodec)
1179-
1180-
// Allow async schema change waiting for GC to complete (when dropping an
1181-
// index) and clear the index keys.
1182-
if _, err := sqltestutils.AddImmediateGCZoneConfig(sqlDB, tableDesc.GetID()); err != nil {
1183-
t.Fatal(err)
1184-
}
1185-
1186-
// Reset closed timestamp so that deleted keys can be GC'ed within the
1187-
// SucceedSoon window.
1188-
if util.RaceEnabled || skip.Stress() {
1189-
_, err := sqlDB.Exec(
1190-
"SET CLUSTER SETTING kv.closed_timestamp.target_duration = '3s'")
1191-
require.NoError(t, err)
1192-
}
1193-
1194-
testutils.SucceedsSoon(t, func() error {
1195-
return tests.CheckKeyCountIncludingTombstonedE(t, s, tableSpan, 2)
1196-
})
1038+
_, err = sqlDB.Exec(`ALTER TABLE t.kv ADD m CHAR DEFAULT 'z';`)
1039+
require.NoError(t, err)
1040+
// Wait a short bit so that the SCHEMA CHANGE GC job created by the above ADD COLUMN
1041+
// gets to run, which will mark the old primary index of `t.kv` as tombstoned.
1042+
time.Sleep(1 * time.Second)
1043+
1044+
// Read-only transaction:
1045+
// 1). reads uses a historical version of `t.kv` and sees only two columns `k`
1046+
// and `v`;
1047+
// 2). it commits just fine;
1048+
rows, err := txRead.Query(`SELECT * FROM t.kv`)
1049+
require.NoError(t, err)
1050+
requireOneRow(rows)
1051+
require.NoError(t, txRead.Commit())
1052+
1053+
// Read-write transaction:
1054+
// 1). reads uses a historical version of `t.kv` and sees only two columns `k`
1055+
// and `v`;
1056+
// 2). writes to that historical version of `t.kv` attempt to write to the old
1057+
// primary index, which has previously been written with a higher timestamp
1058+
// (when the SCHEMA CHANGE GC job lays a range tombstone on it). This is a
1059+
// write-write conflict, and it will bump up transaction's write_ts, perform a
1060+
// read refresh which would fail (bc reading from that new and higher
1061+
// timestamp would return zero row, instead of one), and thus a
1062+
// TransactionRetryWriteTooOld error will be returned.
1063+
rows, err = txReadWrite.Query(`SELECT * FROM t.kv`)
1064+
require.NoError(t, err)
1065+
requireOneRow(rows)
1066+
_, err = txReadWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`)
1067+
requireWriteTooOldErr(t, err)
1068+
require.NoError(t, txReadWrite.Rollback())
1069+
1070+
// Write-only transaction:
1071+
// 1). writes, similarly to the read-write transaction, will trigger a read
1072+
// refresh and this time it will succeed (bc there is no reads).
1073+
// 2). commits, however, will update the transaction's deadline, and the
1074+
// leased descriptor such an "old" transaction is using is the very original
1075+
// version (v1) of `t.kv` that expires at the modification time of v2 of
1076+
// `t.kv`. But the bumped commit timestamp is larger than the leased
1077+
// descriptor's expiration time, leading to a "liveness session expired"
1078+
// error.
1079+
_, err = txWrite.Exec(`INSERT INTO t.kv VALUES ('c', 'd');`)
1080+
require.NoError(t, err)
1081+
requireSessionExpiredErr(t, txWrite.Commit())
11971082
}
11981083

11991084
// Test that a lease on a table descriptor is always acquired on the latest

0 commit comments

Comments
 (0)