Skip to content

Commit a89cd70

Browse files
committed
Fix ordering of query results on non-unique indexes
The query results for non-unique indexes were sorted properly by the secondary key, but not by primary due to the substition scheme not properly retaining the order. Fix this by using the following scheme: 0x00 => 0x0101 0x01 => 0x0102 This ensures that the original ordering is retained. Additionally also encode the secondary key using this scheme to make sure 0x00 in the secondary key does not mess the results. The TestDB_Quick was extended to validate that the query results for non-unique indexes are properly sorted by both the primary and secondary keys. Signed-off-by: Jussi Maki <[email protected]>
1 parent 41cd258 commit a89cd70

File tree

8 files changed

+214
-92
lines changed

8 files changed

+214
-92
lines changed

any_table.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ func (t AnyTable) Get(txn ReadTxn, index string, key string) (any, Revision, boo
6969
if !ok {
7070
break
7171
}
72-
secondary, _ := decodeNonUniqueKey(k)
73-
if len(secondary) == len(rawKey) {
72+
if nonUniqueKey(k).secondaryLen() == len(rawKey) {
7473
return obj.data, obj.revision, true, nil
7574
}
7675
}

db_test.go

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,31 +1150,26 @@ func TestWriteJSON(t *testing.T) {
11501150
func Test_nonUniqueKey(t *testing.T) {
11511151
// empty keys
11521152
key := encodeNonUniqueKey(nil, nil)
1153-
secondary, _ := decodeNonUniqueKey(key)
1154-
assert.Len(t, secondary, 0)
1153+
nuk := nonUniqueKey(key)
1154+
assert.Equal(t, 0, nuk.secondaryLen())
11551155

11561156
// empty primary
11571157
key = encodeNonUniqueKey(nil, []byte("foo"))
1158-
secondary, _ = decodeNonUniqueKey(key)
1159-
assert.Equal(t, string(secondary), "foo")
1158+
nuk = nonUniqueKey(key)
1159+
assert.Zero(t, nuk.primaryLen())
1160+
assert.Equal(t, 3, nuk.secondaryLen())
11601161

11611162
// empty secondary
11621163
key = encodeNonUniqueKey([]byte("quux"), []byte{})
1163-
secondary, _ = decodeNonUniqueKey(key)
1164-
assert.Len(t, secondary, 0)
1164+
nuk = nonUniqueKey(key)
1165+
assert.Zero(t, nuk.secondaryLen())
11651166

11661167
// non-empty
11671168
key = encodeNonUniqueKey([]byte("foo"), []byte("quux"))
1168-
secondary, primary := decodeNonUniqueKey(key)
1169-
assert.EqualValues(t, secondary, "quux")
1170-
assert.EqualValues(t, primary, "foo")
1171-
1172-
// non-empty, primary with substitutions:
1173-
// 0x0 => 0xfe, 0xfe => 0xfd01, 0xfd => 0xfd00
1174-
key = encodeNonUniqueKey([]byte{0x0, 0xfd, 0xfe}, []byte("quux"))
1175-
secondary, primary = decodeNonUniqueKey(key)
1176-
assert.EqualValues(t, secondary, "quux")
1177-
assert.EqualValues(t, primary, []byte{0xfe, 0xfd, 0x01, 0xfd, 0x00})
1169+
nuk = nonUniqueKey(key)
1170+
assert.Equal(t, 4, nuk.secondaryLen())
1171+
assert.Equal(t, 3, nuk.primaryLen())
1172+
assert.EqualValues(t, "foo", nuk.encodedPrimary())
11781173
}
11791174

11801175
func Test_validateTableName(t *testing.T) {

http.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ type QueryResponse struct {
124124

125125
func runQuery(indexTxn indexReadTxn, lowerbound bool, queryKey []byte, onObject func(object) error) {
126126
var iter *part.Iterator[object]
127+
if !indexTxn.unique {
128+
queryKey = encodeNonUniqueBytes(queryKey)
129+
}
127130
if lowerbound {
128131
iter = indexTxn.LowerBound(queryKey)
129132
} else {
@@ -137,8 +140,7 @@ func runQuery(indexTxn indexReadTxn, lowerbound bool, queryKey []byte, onObject
137140
match = func(k []byte) bool { return len(k) == len(queryKey) }
138141
default:
139142
match = func(k []byte) bool {
140-
secondary, _ := decodeNonUniqueKey(k)
141-
return len(secondary) == len(queryKey)
143+
return nonUniqueKey(k).secondaryLen() == len(queryKey)
142144
}
143145
}
144146
for key, obj, ok := iter.Next(); ok; key, obj, ok = iter.Next() {

iterator.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,21 @@ func nonUniqueSeq[Obj any](iter *part.Iterator[object], prefixSearch bool, searc
113113
break
114114
}
115115

116-
secondary, primary := decodeNonUniqueKey(key)
116+
nuk := nonUniqueKey(key)
117+
secondaryLen := nuk.secondaryLen()
117118

118119
switch {
119-
case !prefixSearch && len(secondary) != len(searchKey):
120+
case !prefixSearch && secondaryLen != len(searchKey):
120121
// This a List(), thus secondary key must match length exactly.
121122
continue
122-
case prefixSearch && len(secondary) < len(searchKey):
123+
case prefixSearch && secondaryLen < len(searchKey):
123124
// This is Prefix(), thus key must be equal or longer to search key.
124125
continue
125126
}
126127

127128
if prefixSearch {
129+
primary := nuk.encodedPrimary()
130+
128131
// When doing a prefix search on a non-unique index we may see the
129132
// same object multiple times since multiple keys may point it.
130133
// Skip if we've already seen this object.
@@ -157,8 +160,10 @@ func nonUniqueLowerBoundSeq[Obj any](iter *part.Iterator[object], searchKey []by
157160
// With a non-unique index we have a composite key <secondary><primary><secondary len>.
158161
// This means we need to check every key that it's larger or equal to the search key.
159162
// Just seeking to the first one isn't enough as the secondary key length may vary.
160-
secondary, primary := decodeNonUniqueKey(key)
163+
nuk := nonUniqueKey(key)
164+
secondary := nuk.encodedSecondary()
161165
if bytes.Compare(secondary, searchKey) >= 0 {
166+
primary := nuk.encodedPrimary()
162167
if _, found := visited[string(primary)]; found {
163168
continue
164169
}

quick_test.go

Lines changed: 110 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
package statedb
22

33
import (
4+
"bytes"
45
"cmp"
6+
"fmt"
57
"iter"
8+
"math/rand"
9+
"reflect"
610
"strings"
711
"testing"
812
"testing/quick"
@@ -13,6 +17,18 @@ import (
1317

1418
var quickConfig = &quick.Config{
1519
MaxCount: 5000,
20+
21+
// Make 1-8 byte long strings as input data. Keep the strings shorter
22+
// than the default quick value generation to hit the more interesting cases
23+
// often.
24+
Values: func(args []reflect.Value, rand *rand.Rand) {
25+
for i := range args {
26+
numBytes := 1 + rand.Intn(8)
27+
bs := make([]byte, numBytes)
28+
rand.Read(bs)
29+
args[i] = reflect.ValueOf(string(bs))
30+
}
31+
},
1632
}
1733

1834
// Use an object with strings for both primary and secondary
@@ -23,11 +39,8 @@ type quickObj struct {
2339
A, B string
2440
}
2541

26-
func (q quickObj) getA() string {
27-
return q.A
28-
}
29-
func (q quickObj) getB() string {
30-
return q.B
42+
func (q quickObj) String() string {
43+
return fmt.Sprintf("%x %x", []byte(q.A), []byte(q.B))
3144
}
3245

3346
var (
@@ -52,13 +65,27 @@ var (
5265
}
5366
)
5467

55-
func isOrdered[A cmp.Ordered, B any](t *testing.T, it iter.Seq2[A, B]) bool {
56-
var prev A
68+
func isOrdered(t *testing.T, aFirst bool, it iter.Seq2[quickObj, Revision]) bool {
69+
var prev quickObj
5770
for a := range it {
58-
if cmp.Compare(a, prev) < 0 {
59-
t.Logf("isOrdered: %#v < %#v!", a, prev)
60-
return false
71+
if aFirst {
72+
if ret := cmp.Compare(a.A, prev.A); ret < 0 {
73+
t.Logf("isOrdered(A): %s < %s!", a, prev)
74+
return false
75+
} else if ret == 0 && cmp.Compare(a.B, prev.B) < 0 {
76+
t.Logf("isOrdered(B): %s < %s!", a, prev)
77+
return false
78+
}
79+
} else {
80+
if ret := cmp.Compare(a.B, prev.B); ret < 0 {
81+
t.Logf("isOrdered(B): %s < %s!", a, prev)
82+
return false
83+
} else if ret == 0 && cmp.Compare(a.A, prev.A) < 0 {
84+
t.Logf("isOrdered(A): %s < %s!", a, prev)
85+
return false
86+
}
6187
}
88+
6289
prev = a
6390
}
6491
return true
@@ -106,15 +133,15 @@ func TestDB_Quick(t *testing.T) {
106133
// Check queries against the primary index
107134
//
108135

109-
if numExpected != seqLen(Map(table.All(rtxn), quickObj.getA)) {
136+
if numExpected != seqLen(table.All(rtxn)) {
110137
t.Logf("All() via aIndex wrong length")
111138
return false
112139
}
113-
if numExpected != seqLen(Map(table.Prefix(rtxn, aIndex.Query("")), quickObj.getA)) {
140+
if numExpected != seqLen(table.Prefix(rtxn, aIndex.Query(""))) {
114141
t.Logf("Prefix() via aIndex wrong length")
115142
return false
116143
}
117-
if numExpected != seqLen(Map(table.LowerBound(rtxn, aIndex.Query("")), quickObj.getA)) {
144+
if numExpected != seqLen(table.LowerBound(rtxn, aIndex.Query(""))) {
118145
t.Logf("LowerBound() via aIndex wrong length")
119146
return false
120147
}
@@ -151,20 +178,20 @@ func TestDB_Quick(t *testing.T) {
151178
for anyObj := range anyObjs {
152179
obj := anyObj.(quickObj)
153180
if cmp.Compare(obj.A, a) < 0 {
154-
t.Logf("AnyTable.LowerBound() order wrong")
181+
t.Logf("AnyTable.LowerBound(%x) order wrong: %x < %x", []byte(a), []byte(obj.A), []byte(a))
155182
return false
156183
}
157184
}
158185

159-
if !isOrdered(t, Map(table.All(rtxn), quickObj.getA)) {
186+
if !isOrdered(t, true, table.All(rtxn)) {
160187
t.Logf("All() wrong order")
161188
return false
162189
}
163-
if !isOrdered(t, Map(table.Prefix(rtxn, aIndex.Query("")), quickObj.getA)) {
190+
if !isOrdered(t, true, table.Prefix(rtxn, aIndex.Query(""))) {
164191
t.Logf("Prefix() via aIndex wrong order")
165192
return false
166193
}
167-
if !isOrdered(t, Map(table.LowerBound(rtxn, aIndex.Query("")), quickObj.getA)) {
194+
if !isOrdered(t, true, table.LowerBound(rtxn, aIndex.Query(""))) {
168195
t.Logf("LowerBound() via aIndex wrong order")
169196
return false
170197
}
@@ -178,6 +205,7 @@ func TestDB_Quick(t *testing.T) {
178205
t.Logf("Prefix() via bIndex wrong length")
179206
return false
180207
}
208+
181209
if numExpected != seqLen(table.LowerBound(rtxn, bIndex.Query(""))) {
182210
t.Logf("LowerBound() via bIndex wrong length")
183211
return false
@@ -187,7 +215,7 @@ func TestDB_Quick(t *testing.T) {
187215
// not be the one that we just inserted.
188216
obj, _, found = table.Get(rtxn, bIndex.Query(b))
189217
if !found || obj.B != b {
190-
t.Logf("Get() via bIndex not found or wrong B")
218+
t.Logf("Get(%q) via bIndex not found (%v) or wrong B (%q vs %q)", b, found, obj.B, b)
191219
return false
192220
}
193221

@@ -224,7 +252,7 @@ func TestDB_Quick(t *testing.T) {
224252
for anyObj := range anyObjs {
225253
obj := anyObj.(quickObj)
226254
if !strings.HasPrefix(obj.B, b) {
227-
t.Logf("AnyTable.Prefix() via bIndex has wrong prefix")
255+
t.Logf("AnyTable.Prefix() via bIndex has wrong prefix: %q vs %q", obj.B, b)
228256
return false
229257
}
230258
}
@@ -253,17 +281,75 @@ func TestDB_Quick(t *testing.T) {
253281
}
254282

255283
// Iterating over the secondary index returns the objects in order
256-
// defined by the "B" key.
257-
if !isOrdered(t, Map(table.Prefix(rtxn, bIndex.Query("")), quickObj.getB)) {
258-
t.Logf("Prefix() via bIndex has wrong order")
284+
// defined by the "B" key first and then by the "A" key.
285+
if !isOrdered(t, false, table.Prefix(rtxn, bIndex.Query(""))) {
286+
t.Logf("Prefix() via bIndex wrong order")
287+
rtxn.getTxn().mustIndexReadTxn(table, table.indexPos("b")).PrintTree()
259288
return false
260289
}
261-
if !isOrdered(t, Map(table.LowerBound(rtxn, bIndex.Query("")), quickObj.getB)) {
262-
t.Logf("LowerBound() via bIndex has wrong order")
290+
if !isOrdered(t, false, table.LowerBound(rtxn, bIndex.Query(""))) {
291+
t.Logf("Prefix() via bIndex wrong order")
263292
return false
264293
}
265294
return true
266295
}
267296

268297
require.NoError(t, quick.Check(check, quickConfig))
269298
}
299+
300+
func Test_Quick_nonUniqueKey(t *testing.T) {
301+
check := func(p1, s1, p2, s2 []byte) bool {
302+
key1 := encodeNonUniqueKey(p1, s1)
303+
expectedLen := encodedLength(p1) + 1 + encodedLength(s1) + 2
304+
minLen := len(p1) + 1 + len(s1) + 2
305+
if expectedLen < minLen {
306+
t.Logf("expected length too short (%d), must be >= %d", expectedLen, minLen)
307+
return false
308+
}
309+
if len(key1) != expectedLen {
310+
t.Logf("length mismatch, expected %d, got %d", expectedLen, len(key1))
311+
return false
312+
}
313+
314+
nuk1 := nonUniqueKey(key1)
315+
if len(nuk1.encodedPrimary()) < len(p1) {
316+
t.Logf("encodedPrimary() length (%d) shorter than original (%d)", len(nuk1.encodedPrimary()), len(p1))
317+
return false
318+
}
319+
if len(nuk1.encodedPrimary()) != encodedLength(p1) {
320+
t.Logf("encodedPrimary() length (%d) does not match encodedLength() (%d)", len(nuk1.encodedPrimary()), len(p1))
321+
return false
322+
}
323+
if len(nuk1.encodedSecondary()) < len(s1) {
324+
t.Logf("encodedSecondary() length (%d) shorter than original (%d)", len(nuk1.encodedSecondary()), len(s1))
325+
}
326+
if len(nuk1.encodedSecondary()) != encodedLength(s1) {
327+
t.Logf("encodedSecondary() length (%d) does not match encodedLength() (%d)", len(nuk1.encodedSecondary()), len(s1))
328+
return false
329+
}
330+
331+
// Do another key and check that ordering is preserved.
332+
key2 := encodeNonUniqueKey(p2, s2)
333+
scmp := bytes.Compare(s1, s2)
334+
pcmp := bytes.Compare(p1, p2)
335+
kcmp := bytes.Compare(key1, key2)
336+
337+
if (scmp == 0 && pcmp != kcmp) /* secondary key matches, primary key determines order */ ||
338+
(scmp != 0 && scmp != kcmp) /* secondary key determines order */ {
339+
t.Logf("ordering not preserved: key1=%v key2=%v, p1=%v, s1=%v, p2=%v, s2=%v", key1, key2, p1, s1, p2, s2)
340+
return false
341+
}
342+
return true
343+
}
344+
require.NoError(t, quick.Check(check, &quick.Config{
345+
MaxCount: 50000,
346+
Values: func(args []reflect.Value, rand *rand.Rand) {
347+
for i := range args {
348+
numBytes := 1 + rand.Intn(8)
349+
bs := make([]byte, numBytes)
350+
rand.Read(bs)
351+
args[i] = reflect.ValueOf(bs)
352+
}
353+
},
354+
}))
355+
}

table.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,7 @@ func (t *genTable[Obj]) GetWatch(txn ReadTxn, q Query[Obj]) (obj Obj, revision u
321321
}
322322

323323
// Check that we have a full match on the key
324-
secondary, _ := decodeNonUniqueKey(key)
325-
if len(secondary) == len(q.key) {
324+
if nonUniqueKey(key).secondaryLen() == len(q.key) {
326325
break
327326
}
328327
}

0 commit comments

Comments
 (0)