Skip to content

Commit bfda256

Browse files
marcobambiniclaude
andcommitted
fix(vtab): streaming mode ignored ORDER BY due to wrong orderByConsumed flag (#43)
vFullScanBestIndex checked VECTOR_COLUMN_K (column 2) to detect top-k mode, but with SQLite table-valued functions, positional arg 2 (the vector blob) always maps to column 2 — so orderByConsumed was always set to 1, even in streaming mode. SQLite then skipped sorting entirely. Fix: detect top-k mode by checking VECTOR_COLUMN_MEMIDX (column 3), which only has a constraint when the 4th positional arg (k) is provided. Added regression test covering stream + JOIN + ORDER BY + LIMIT. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 269e102 commit bfda256

File tree

3 files changed

+109
-11
lines changed

3 files changed

+109
-11
lines changed

src/sqlite-vector.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,8 +2030,13 @@ static int vFullScanDisconnect (sqlite3_vtab *pVtab) {
20302030
}
20312031

20322032
static int vFullScanBestIndex (sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) {
2033-
bool has_k = false;
2034-
int k_index = -1;
2033+
// With positional args to the table-valued function:
2034+
// 3 args: f('tbl','col',vector) → columns 0,1,2 constrained (streaming)
2035+
// 4 args: f('tbl','col',vector,k) → columns 0,1,2,3 constrained (top-k)
2036+
// Column 2 (K) always receives the vector blob (positional arg 2).
2037+
// Column 3 (MEMIDX) receives the actual k integer only with 4 args.
2038+
// So top-k mode is determined by whether MEMIDX is constrained, not K.
2039+
bool has_topk = false;
20352040

20362041
const struct sqlite3_index_constraint *pConstraint = pIdxInfo->aConstraint;
20372042
for(int i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
@@ -2047,26 +2052,25 @@ static int vFullScanBestIndex (sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo)
20472052
pIdxInfo->aConstraintUsage[i].omit = 1;
20482053
break;
20492054
case VECTOR_COLUMN_K:
2050-
has_k = true;
2051-
k_index = i;
2055+
pIdxInfo->aConstraintUsage[i].argvIndex = 3;
2056+
pIdxInfo->aConstraintUsage[i].omit = 1;
20522057
break;
20532058
case VECTOR_COLUMN_MEMIDX:
2059+
has_topk = true;
20542060
pIdxInfo->aConstraintUsage[i].argvIndex = 4;
20552061
pIdxInfo->aConstraintUsage[i].omit = 1;
20562062
break;
20572063
}
20582064
}
20592065

2060-
if (has_k) {
2061-
// top-k mode
2062-
pIdxInfo->aConstraintUsage[k_index].argvIndex = 3;
2063-
pIdxInfo->aConstraintUsage[k_index].omit = 1;
2066+
if (has_topk) {
2067+
// top-k mode: 4 positional args, argv[3] has the k integer
20642068
pIdxInfo->estimatedCost = (double)1;
20652069
pIdxInfo->estimatedRows = 100;
20662070
pIdxInfo->orderByConsumed = 1;
20672071
pIdxInfo->idxNum = 1;
20682072
} else {
2069-
// streaming mode
2073+
// streaming mode: 3 positional args, no sorting guaranteed
20702074
pIdxInfo->estimatedCost = 1e8;
20712075
pIdxInfo->estimatedRows = 100000;
20722076
pIdxInfo->idxNum = 2;

src/sqlite-vector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
extern "C" {
2525
#endif
2626

27-
#define SQLITE_VECTOR_VERSION "0.9.90"
27+
#define SQLITE_VECTOR_VERSION "0.9.91"
2828

2929
SQLITE_VECTOR_API int sqlite3_vector_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
3030

test/test_vector.c

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ static int scan_cb(void *ctx, int ncols, char **vals, char **names) {
8686
return 0;
8787
}
8888

89+
/* Like scan_cb but reads distance from column 0 (for queries where distance is first). */
90+
static int scan_cb_col0(void *ctx, int ncols, char **vals, char **names) {
91+
(void)names;
92+
scan_result *r = (scan_result *)ctx;
93+
if (r->count < 64 && ncols >= 1 && vals[0]) {
94+
r->distances[r->count] = atof(vals[0]);
95+
}
96+
r->count++;
97+
return 0;
98+
}
99+
89100
/* ---------- Test: basics ---------- */
90101

91102
static void test_basics(sqlite3 *db) {
@@ -386,7 +397,90 @@ int main(void) {
386397
test_quantize_scan(db, "bit", "1BIT", 8, bit_vecs, bit_nvecs, bit_query);
387398
}
388399

389-
/* 4. Backward-compat aliases */
400+
/* 4. Streaming ORDER BY (regression test for issue #43) */
401+
printf("\n=== Streaming ORDER BY ===\n");
402+
{
403+
/*
404+
* Regression test: vector_full_scan_stream with ORDER BY + LIMIT
405+
* must return results sorted by distance. A bug in vFullScanBestIndex
406+
* caused orderByConsumed=1 even in streaming mode, so SQLite skipped
407+
* sorting and returned rows in table order.
408+
*
409+
* We insert vectors in REVERSE distance order (farthest first) so
410+
* that table-order != distance-order, exposing the bug.
411+
*/
412+
const char *tbl = "tfs_stream_order";
413+
const char *vecs_ordered[] = {
414+
"[10.0, 0.0, 0.0, 0.0]", /* id 1: far */
415+
"[0.0, 10.0, 0.0, 0.0]", /* id 2: far */
416+
"[5.0, 5.0, 0.0, 0.0]", /* id 3: far */
417+
"[2.0, 0.0, 0.0, 0.0]", /* id 4: mid */
418+
"[1.0, 1.0, 0.0, 0.0]", /* id 5: mid */
419+
"[0.6, 0.6, 0.6, 0.6]", /* id 6: close */
420+
"[0.4, 0.4, 0.4, 0.4]", /* id 7: close */
421+
"[0.5, 0.5, 0.5, 0.5]", /* id 8: exact */
422+
"[0.3, 0.3, 0.3, 0.3]", /* id 9: close */
423+
"[0.7, 0.7, 0.7, 0.7]", /* id 10: close */
424+
};
425+
const char *qvec = "[0.5, 0.5, 0.5, 0.5]";
426+
427+
if (setup_table(db, tbl, "f32", "L2", 4, vecs_ordered, 10) == 0) {
428+
/* Helper callback: distance is column 0 in these queries */
429+
struct { int count; double distances[64]; } r;
430+
431+
/* vector_full_scan_stream + JOIN + ORDER BY distance LIMIT */
432+
{
433+
memset(&r, 0, sizeof(r));
434+
char sql[1024];
435+
snprintf(sql, sizeof(sql),
436+
"SELECT vss.distance, t.id"
437+
" FROM \"%s\" t"
438+
" INNER JOIN vector_full_scan_stream('%s', 'v', vector_as_f32('%s'))"
439+
" AS vss ON vss.rowid = t.id"
440+
" ORDER BY vss.distance LIMIT 5;",
441+
tbl, tbl, qvec);
442+
char *err = NULL;
443+
rc = sqlite3_exec(db, sql, scan_cb_col0, &r, &err);
444+
ASSERT(rc == SQLITE_OK, "stream+JOIN+ORDER BY executes");
445+
if (err) { printf(" err: %s\n", err); sqlite3_free(err); }
446+
447+
ASSERT(r.count == 5, "stream+JOIN+ORDER BY returns 5 rows");
448+
449+
int sorted = 1;
450+
for (int i = 1; i < r.count; i++) {
451+
if (r.distances[i] < r.distances[i - 1]) sorted = 0;
452+
}
453+
ASSERT(sorted, "stream+JOIN+ORDER BY distances sorted");
454+
ASSERT(r.distances[0] < 0.01, "stream+JOIN+ORDER BY closest is ~0");
455+
}
456+
457+
/* vector_full_scan (no k, streaming) + ORDER BY distance LIMIT */
458+
{
459+
memset(&r, 0, sizeof(r));
460+
char sql[1024];
461+
snprintf(sql, sizeof(sql),
462+
"SELECT distance, rowid"
463+
" FROM vector_full_scan_stream('%s', 'v', vector_as_f32('%s'))"
464+
" ORDER BY distance LIMIT 5;",
465+
tbl, qvec);
466+
char *err = NULL;
467+
rc = sqlite3_exec(db, sql, scan_cb_col0, &r, &err);
468+
ASSERT(rc == SQLITE_OK, "stream+ORDER BY executes");
469+
if (err) { printf(" err: %s\n", err); sqlite3_free(err); }
470+
471+
ASSERT(r.count == 5, "stream+ORDER BY returns 5 rows");
472+
473+
int sorted = 1;
474+
for (int i = 1; i < r.count; i++) {
475+
if (r.distances[i] < r.distances[i - 1]) sorted = 0;
476+
}
477+
ASSERT(sorted, "stream+ORDER BY distances sorted");
478+
ASSERT(r.distances[0] < 0.01, "stream+ORDER BY closest is ~0");
479+
}
480+
}
481+
}
482+
483+
/* 5. Backward-compat aliases */
390484
printf("\n=== Backward-compat aliases ===\n");
391485
{
392486
/* Set up a table for alias tests */

0 commit comments

Comments
 (0)