Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3909,6 +3909,11 @@
"CREATE TEMPORARY TABLE ... USING ... is a deprecated syntax. To overcome the issue, please use CREATE TEMPORARY VIEW instead."
]
},
"EMPTY_IN_PREDICATE" : {
"message" : [
"IN predicate requires at least one value. Empty IN clauses like 'IN ()' are not allowed. Consider using 'WHERE FALSE' if you need an always-false condition, or provide at least one value in the IN list."
]
},
"EMPTY_PARTITION_VALUE" : {
"message" : [
"Partition key <partKey> must set value."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1206,7 +1206,7 @@ booleanExpression

predicate
: errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
| errorCapturingNot? kind=IN (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
| errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN
| errorCapturingNot? kind=RLIKE pattern=valueExpression
| errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,13 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
ctx)
}

def emptyInPredicateError(ctx: ParserRuleContext): Throwable = {
new ParseException(
errorClass = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
messageParameters = Map.empty,
ctx)
}

/**
* Throws an internal error for unexpected parameter markers found during AST building. This
* should be unreachable in normal operation due to grammar-level blocking.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2837,6 +2837,10 @@ class AstBuilder extends DataTypeAstBuilder
case SqlBaseParser.IN if ctx.query != null =>
invertIfNotDefined(InSubquery(getValueExpressions(e), ListQuery(plan(ctx.query))))
case SqlBaseParser.IN =>
// Validate that IN clause is not empty
if (ctx.expression.isEmpty) {
throw QueryParsingErrors.emptyInPredicateError(ctx)
}
invertIfNotDefined(In(e, ctx.expression.asScala.map(expression).toSeq))
case SqlBaseParser.LIKE | SqlBaseParser.ILIKE =>
Option(ctx.quantifier).map(_.getType) match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,7 @@ class PlanParserSuite extends AnalysisTest {
checkError(
exception = parseException(sql2),
condition = "PARSE_SYNTAX_ERROR",
parameters = Map("error" -> "'IN'", "hint" -> ""))
parameters = Map("error" -> "'INTO'", "hint" -> ""))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the error message before and after this change for this test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey Allison,

This is the before and after this change for this test case:

Before:

[scala> spark.sql("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))").show()
org.apache.spark.sql.catalyst.parser.ParseException:
[PARSE_SYNTAX_ERROR] Syntax error at or near 'IN'. SQLSTATE: 42601 (line 1, pos 25)

== SQL ==
SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))
-------------------------^^^

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(parsers.scala:285)
  at org.apache.spark.sql.catalyst.parser.AbstractParser.parse(parsers.scala:97)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:54)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(AbstractSqlParser.scala:93)
  at org.apache.spark.sql.classic.SparkSession.$anonfun$sql$5(SparkSession.scala:492)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
  at org.apache.spark.sql.classic.SparkSession.$anonfun$sql$4(SparkSession.scala:491)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:490)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:504)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:513)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:91)
  ... 42 elided

After:

[scala> spark.sql("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))").show()
org.apache.spark.sql.catalyst.parser.ParseException:
[PARSE_SYNTAX_ERROR] Syntax error at or near 'INTO'. SQLSTATE: 42601 (line 1, pos 36)

== SQL ==
SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))
------------------------------------^^^

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(parsers.scala:267)
  at org.apache.spark.sql.catalyst.parser.AbstractParser.parse(parsers.scala:78)
  at org.apache.spark.sql.execution.SparkSqlParser.super$parse(SparkSqlParser.scala:163)
  at org.apache.spark.sql.execution.SparkSqlParser.$anonfun$parseInternal$1(SparkSqlParser.scala:163)
  at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:107)
  at org.apache.spark.sql.execution.SparkSqlParser.parseInternal(SparkSqlParser.scala:163)
  at org.apache.spark.sql.execution.SparkSqlParser.parseWithParameters(SparkSqlParser.scala:70)
  at org.apache.spark.sql.execution.SparkSqlParser.parsePlanWithParameters(SparkSqlParser.scala:84)
  at org.apache.spark.sql.classic.SparkSession.$anonfun$sql$6(SparkSession.scala:573)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
  at org.apache.spark.sql.classic.SparkSession.$anonfun$sql$4(SparkSession.scala:572)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:563)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:591)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:682)
  at org.apache.spark.sql.classic.SparkSession.sql(SparkSession.scala:92)
  ... 42 elided

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @allisonwang-db , could you check this output and let me know, thanks!

}

test("relation in v2 catalog") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,40 @@ Project [NOT cast(null as int) IN (cast(1 as int),cast(2 as int),cast(null as in
+- OneRowRelation


-- !query
select 1 in ()
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
"sqlState" : "42000",
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 10,
"stopIndex" : 14,
"fragment" : "in ()"
} ]
}


-- !query
select 1 not in ()
-- !query analysis
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
"sqlState" : "42000",
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 10,
"stopIndex" : 18,
"fragment" : "not in ()"
} ]
}


-- !query
select 1 between 0 and 2
-- !query analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ select 1 not in ('2', '3', '4', null);
select null not in (1, 2, 3);
select null not in (1, 2, null);

-- Empty IN clause (negative case - should error)
select 1 in ();
select 1 not in ();

-- Between
select 1 between 0 and 2;
select 0.5 between 0 and 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,42 @@ struct<(NOT (NULL IN (1, 2, NULL))):boolean>
-- !query output
NULL

-- !query
select 1 in ()
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
"sqlState" : "42000",
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 10,
"stopIndex" : 14,
"fragment" : "in ()"
} ]
}


-- !query
select 1 not in ()
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
{
"errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
"sqlState" : "42000",
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 10,
"stopIndex" : 18,
"fragment" : "not in ()"
} ]
}

-- !query
select 1 between 0 and 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -726,4 +726,42 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
start = 32,
stop = 58))
}

test("INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE: Empty IN clause") {
// Test with single column IN ()
// PredicateContext captures "IN ()" starting at position 33
checkError(
exception = parseException("SELECT * FROM range(10) WHERE id IN ()"),
condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
sqlState = "42000",
parameters = Map.empty,
context = ExpectedContext(
fragment = "IN ()",
start = 33,
stop = 37))

// Test with expression IN ()
// PredicateContext captures "IN ()" starting at position 39
checkError(
exception = parseException("SELECT * FROM range(10) WHERE (id + 1) IN ()"),
condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
sqlState = "42000",
parameters = Map.empty,
context = ExpectedContext(
fragment = "IN ()",
start = 39,
stop = 43))

// Test with NOT IN ()
// PredicateContext captures "NOT IN ()" starting at position 33
checkError(
exception = parseException("SELECT * FROM range(10) WHERE id NOT IN ()"),
condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE",
sqlState = "42000",
parameters = Map.empty,
context = ExpectedContext(
fragment = "NOT IN ()",
start = 33,
stop = 41))
}
}