2828# OTHER valid rows, which sqlmap's fuzzy page comparison conflates with the anchor row, producing
2929# false positives. See PROVE_DESIGN.md.)
3030#
31- # Truth table measured on a live OWASP-CRS platform across 16 engines (MySQL/MySQL5, MariaDB/TiDB ,
32- # PostgreSQL, CockroachDB, CrateDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse, H2, HSQLDB ,
33- # Derby, MonetDB, IRIS, Trino); only the zero-false-positive rules are kept (see _classify). With
34- # anchor value 2:
31+ # Signatures were measured against every SQL engine on a live OWASP-CRS platform (MySQL/MySQL5,
32+ # MariaDB/TiDB, PostgreSQL, CockroachDB, CrateDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse,
33+ # H2, HSQLDB, Derby, MonetDB, IRIS, Trino) and encoded as an exact-signature WHITELIST in _classify()
34+ # (only measured signatures classify; anything else -> None). With anchor value 2:
3535#
3636# * 2^0=2 -> '^' is bitwise XOR (MySQL/MSSQL/MonetDB: 2^0=2) vs exponentiation (PostgreSQL: 2^0=1)
3737# vs no such operator (SQLite/Oracle/... -> error, so false)
5252 ("shift" , "1<<2=4" ),
5353)
5454
55+ # Canary for the trustworthiness gate: a syntactically-invalid expression (a trailing operator) that
56+ # a real SQL back-end can only read as FALSE - the appended clause is a parse error, the query fails,
57+ # no row. A false-positive / noise channel (a WAF, a reflection, or a backend that ignores the
58+ # injected tail and reads every probe the same) reads it as TRUE, which is proof the boolean oracle
59+ # is trash, so the heuristic returns None (a true negative) rather than a bogus DBMS from a
60+ # meaningless signature. It uses a trailing-operator form, distinct from the '<n> <m>' no-operator
61+ # form already exercised by sqlmap's earlier false-positive check, so it adds new information.
62+ DIALECT_CANARY = "2+"
63+
64+ # Exact operator-dialect signature -> back-end DBMS. Strict WHITELIST re-derived from the live
65+ # measurement above: ONLY these signatures classify; any other - an engine not measured here, or a
66+ # false-positive / noise channel - returns None. This deliberately replaces earlier partial-condition
67+ # rules, which would confidently mis-map physically-impossible signatures onto a DBMS (e.g. the
68+ # all-true 'reads everything as true' noise, where '^' would be XOR and exponentiation at once).
69+ _SIGNATURE_DBMS = {
70+ # xor pgpow intdiv bitor shift
71+ (True , False , False , True , True ): DBMS .MYSQL , # MySQL / MariaDB / TiDB
72+ (False , True , True , True , True ): DBMS .PGSQL , # PostgreSQL
73+ (False , True , False , True , True ): DBMS .PGSQL , # CockroachDB (pgwire; has '<<' -> shift True)
74+ (False , True , True , True , False ): DBMS .PGSQL , # CrateDB
75+ (True , False , True , True , False ): DBMS .MSSQL , # Microsoft SQL Server (no bit-shift)
76+ (True , False , True , True , True ): DBMS .MONETDB , # MonetDB (as MSSQL but has '<<')
77+ (False , False , True , True , True ): DBMS .SQLITE , # SQLite
78+ }
79+
5580def _classify (signature ):
5681 """
57- Maps a measured (xor, pgpow, intdiv, bitor) operator-dialect signature to a back-end
58- DBMS, or returns None when the signature does not *uniquely* identify a major DBMS (so
59- detection proceeds unchanged - the heuristic never wrong-foots the scan).
60-
61- Rules below are the subset of the measured 11-engine truth table that maps with zero
62- false positives. Engines whose operator profile is not distinctive enough (Oracle's
63- all-false signature, which a minimal engine like ClickHouse/H2/Firebird/HSQLDB/Derby or
64- a fully WAF-blocked channel also produces) deliberately fall through to None:
82+ Maps an exact operator-dialect signature (xor, pgpow, intdiv, bitor, shift) to a back-end DBMS
83+ through a strict whitelist of live-measured signatures, or returns None when the signature is not
84+ a known DBMS fingerprint - an engine not measured, or a noise / false-positive channel - so
85+ detection proceeds unchanged and the heuristic never wrong-foots the scan.
6586
66- >>> _classify((True, False, False, True, True)) # MySQL / MariaDB / TiDB
87+ >>> _classify((True, False, False, True, True)) # MySQL / MariaDB / TiDB
6788 'MySQL'
68- >>> _classify((True, False, True, True, False)) # Microsoft SQL Server (no bit-shift)
69- 'Microsoft SQL Server'
70- >>> _classify((True, False, True, True, True)) # MonetDB (same xor/intdiv as MSSQL, but has '<<')
71- 'MonetDB'
72- >>> _classify((False, True, True, True, False)) # PostgreSQL
89+ >>> _classify((False, True, True, True, True)) # PostgreSQL
90+ 'PostgreSQL'
91+ >>> _classify((False, True, False, True, True)) # CockroachDB -> PostgreSQL family
7392 'PostgreSQL'
74- >>> _classify((False, True, False , True, False)) # CockroachDB (pgwire) -> PostgreSQL family
93+ >>> _classify((False, True, True , True, False)) # CrateDB -> PostgreSQL family
7594 'PostgreSQL'
76- >>> _classify((False, False, True, True, True)) # SQLite
95+ >>> _classify((True, False, True, True, False)) # Microsoft SQL Server (no bit-shift)
96+ 'Microsoft SQL Server'
97+ >>> _classify((True, False, True, True, True)) # MonetDB (as MSSQL but has '<<')
98+ 'MonetDB'
99+ >>> _classify((False, False, True, True, True)) # SQLite
77100 'SQLite'
78- >>> _classify((False, False, True, False, False)) is None # Firebird/HSQLDB/Derby/H2/Trino -> no prior
101+ >>> _classify((True, True, True, True, True)) is None # 'reads everything true' noise -> None
102+ True
103+ >>> _classify((False, False, False, False, False)) is None # all-false (Oracle/ClickHouse/IRIS/blocked) -> None
79104 True
80- >>> _classify((False, False, False , False, False)) is None # all-false (Oracle/ClickHouse/IRIS/blocked) -> no prior
105+ >>> _classify((False, False, True , False, False)) is None # Firebird/H2/HSQLDB/Derby/Trino -> not distinctive
81106 True
82107 """
83108
84- xor , pgpow , intdiv , bitor , shift = signature
85-
86- if pgpow : # '^' is exponentiation -> PostgreSQL family
87- return DBMS .PGSQL
88- if xor and intdiv : # '^' is XOR AND integer division -> SQL Server ...
89- # ... except MonetDB shares this exact signature; it alone has a working bit-shift operator
90- # ('1<<2=4'), SQL Server has none -> split the collision (measured zero-FP across 16 engines).
91- return DBMS .MONETDB if shift else DBMS .MSSQL
92- if xor and not intdiv : # '^' is XOR AND real division -> MySQL family
93- return DBMS .MYSQL
94- if not xor and intdiv and bitor : # no '^', integer division, bitwise '|' -> SQLite
95- return DBMS .SQLITE
96-
97- return None
109+ return _SIGNATURE_DBMS .get (tuple (bool (_ ) for _ in signature ))
98110
99111def dialectCheckDbms (injection ):
100112 """
101113 Keyword-free back-end DBMS heuristic via operator-dialect differentials, evaluated through the
102114 given (boolean-capable) injection. Complements heuristicCheckDbms() - which is skipped when the
103115 WAF/IPS is dropping requests and otherwise relies on SELECT/quote payloads - because every probe
104- here is built from operator semantics alone. Returns the DBMS name or None; an ambiguous or
105- WAF-blocked channel yields None, leaving the scan unchanged.
116+ here is built from operator semantics alone. Returns the DBMS name or None; an ambiguous,
117+ WAF-blocked or false-positive channel yields None, leaving the scan unchanged.
106118 """
107119
108120 retVal = None
@@ -114,9 +126,12 @@ def dialectCheckDbms(injection):
114126 kb .injection = injection
115127
116128 try :
117- # channel sanity: a tautology must read TRUE and a contradiction FALSE, otherwise the
118- # boolean oracle is unreliable and the all-false signature (Oracle-like) would be meaningless
119- if checkBooleanExpression ("2=2" ) and not checkBooleanExpression ("2=3" ):
129+ # Trustworthiness gate: a real boolean oracle reads a tautology TRUE, a contradiction FALSE,
130+ # and a syntactically-invalid canary FALSE (the appended clause is a parse error -> the query
131+ # fails). A false-positive / noise channel reads them all alike - the canary as TRUE - which
132+ # is proof the oracle is trash, so classification is skipped (a true negative) instead of
133+ # emitting a bogus DBMS from a meaningless signature.
134+ if checkBooleanExpression ("2=2" ) and not checkBooleanExpression ("2=3" ) and not checkBooleanExpression (DIALECT_CANARY ):
120135 signature = tuple (bool (checkBooleanExpression (expr )) for _ , expr in DIALECT_PROBES )
121136 retVal = _classify (signature )
122137 finally :
0 commit comments