From 3258ae2c1477b2e5781989d9fadffd4bcb86a91e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 31 Mar 2026 13:47:47 +0000 Subject: [PATCH 1/4] Replace all RDDs in SQLTestData with DataFrames Co-authored-by: Isaac --- .../org/apache/spark/sql/DataFrameSuite.scala | 32 +++++----- .../org/apache/spark/sql/SQLQuerySuite.scala | 14 ++--- .../scala/org/apache/spark/sql/UDFSuite.scala | 4 +- .../arrow/ArrowConvertersSuite.scala | 2 +- .../columnar/InMemoryColumnarQuerySuite.scala | 12 ++-- .../datasources/json/JsonSuite.scala | 4 +- .../apache/spark/sql/test/SQLTestData.scala | 63 +++++++++---------- 7 files changed, 65 insertions(+), 66 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index fdeed60e19292..2ed2d23a7a0fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -555,20 +555,20 @@ class DataFrameSuite extends QueryTest Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2))) checkAnswer( - arrayData.toDF().orderBy($"data".getItem(0).asc), - arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(0)).toSeq) + arrayData.orderBy($"data".getItem(0).asc), + arrayData.as[ArrayData].collect().sortBy(_.data(0)).toSeq) checkAnswer( - arrayData.toDF().orderBy($"data".getItem(0).desc), - arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(0)).reverse.toSeq) + arrayData.orderBy($"data".getItem(0).desc), + arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.toSeq) checkAnswer( - arrayData.toDF().orderBy($"data".getItem(1).asc), - arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(1)).toSeq) + arrayData.orderBy($"data".getItem(1).asc), + arrayData.as[ArrayData].collect().sortBy(_.data(1)).toSeq) checkAnswer( - arrayData.toDF().orderBy($"data".getItem(1).desc), - arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(1)).reverse.toSeq) + arrayData.orderBy($"data".getItem(1).desc), + arrayData.as[ArrayData].collect().sortBy(_.data(1)).reverse.toSeq) } test("limit") { @@ -577,12 +577,12 @@ class DataFrameSuite extends QueryTest testData.take(10).toSeq) checkAnswer( - arrayData.toDF().limit(1), - arrayData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq))) + arrayData.limit(1), + arrayData.take(1).toSeq) checkAnswer( - mapData.toDF().limit(1), - mapData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq))) + mapData.limit(1), + mapData.take(1).toSeq) // SPARK-12340: overstep the bounds of Int in SparkPlan.executeTake checkAnswer( @@ -597,12 +597,12 @@ class DataFrameSuite extends QueryTest testData.collect().drop(90).toSeq) checkAnswer( - arrayData.toDF().offset(99), - arrayData.collect().drop(99).map(r => Row.fromSeq(r.productIterator.toSeq))) + arrayData.offset(99), + arrayData.collect().drop(99).toSeq) checkAnswer( - mapData.toDF().offset(99), - mapData.collect().drop(99).map(r => Row.fromSeq(r.productIterator.toSeq))) + mapData.offset(99), + mapData.collect().drop(99).toSeq) } test("limit with offset") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ed04a66651b7d..48b5727aaa928 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -551,19 +551,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] ASC"), - arrayData.collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).toSeq) checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] DESC"), - arrayData.collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] ASC"), - mapData.collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq) + mapData.as[MapData].collect().sortBy(_.data(1)).toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] DESC"), - mapData.collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq) + mapData.as[MapData].collect().sortBy(_.data(1)).reverse.toSeq) } test("external sorting") { @@ -1007,7 +1007,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark StructField("f3", BooleanType, false) :: StructField("f4", IntegerType, true) :: Nil) - val rowRDD1 = unparsedStrings.map { r => + val rowRDD1 = unparsedStrings.as[String].rdd.map { r => val values = r.split(",").map(_.trim) val v4 = try values(3).toInt catch { case _: NumberFormatException => null @@ -1037,7 +1037,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark StructField("f12", BooleanType, false) :: Nil), false) :: StructField("f2", MapType(StringType, IntegerType, true), false) :: Nil) - val rowRDD2 = unparsedStrings.map { r => + val rowRDD2 = unparsedStrings.as[String].rdd.map { r => val values = r.split(",").map(_.trim) val v4 = try values(3).toInt catch { case _: NumberFormatException => null @@ -1064,7 +1064,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Row(4, 2147483644) :: Nil) // The value of a MapType column can be a mutable map. - val rowRDD3 = unparsedStrings.map { r => + val rowRDD3 = unparsedStrings.as[String].rdd.map { r => val values = r.split(",").map(_.trim) val v4 = try values(3).toInt catch { case _: NumberFormatException => null diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index 31ed0f26d9b95..1f9655ff8546e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -359,11 +359,11 @@ class UDFSuite extends QueryTest with SharedSparkSession { sql(""" | SELECT tmp.t.* FROM | (SELECT arrayDataFunc(data, nestedData) AS t FROM arrayData) tmp - """.stripMargin).toDF(), arrayData.toDF()) + """.stripMargin).toDF(), arrayData) checkAnswer( sql(""" | SELECT mapDataFunc(data) AS t FROM mapData - """.stripMargin).toDF(), mapData.toDF()) + """.stripMargin).toDF(), mapData) checkAnswer( sql(""" | SELECT tmp.t.* FROM diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index 95cd97c2c742d..0d7d15b8fcd0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -1304,7 +1304,7 @@ class ArrowConvertersSuite extends SharedSparkSession { } test("interval is supported for arrow") { - val collected = calendarIntervalData.toDF().toArrowBatchRdd.collect() + val collected = calendarIntervalData.toArrowBatchRdd.collect() assert(collected.length == 1) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 4f07d3d1c0300..dda9feed5cbf1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -185,25 +185,25 @@ class InMemoryColumnarQuerySuite extends QueryTest test("SPARK-1678 regression: compression must not lose repeated values") { checkAnswer( sql("SELECT * FROM repeatedData"), - repeatedData.collect().toSeq.map(Row.fromTuple)) + repeatedData.collect().toSeq) spark.catalog.cacheTable("repeatedData") checkAnswer( sql("SELECT * FROM repeatedData"), - repeatedData.collect().toSeq.map(Row.fromTuple)) + repeatedData.collect().toSeq) } test("with null values") { checkAnswer( sql("SELECT * FROM nullableRepeatedData"), - nullableRepeatedData.collect().toSeq.map(Row.fromTuple)) + nullableRepeatedData.collect().toSeq) spark.catalog.cacheTable("nullableRepeatedData") checkAnswer( sql("SELECT * FROM nullableRepeatedData"), - nullableRepeatedData.collect().toSeq.map(Row.fromTuple)) + nullableRepeatedData.collect().toSeq) } test("SPARK-2729 regression: timestamp data type") { @@ -226,13 +226,13 @@ class InMemoryColumnarQuerySuite extends QueryTest test("SPARK-3320 regression: batched column buffer building should work with empty partitions") { checkAnswer( sql("SELECT * FROM withEmptyParts"), - withEmptyParts.collect().toSeq.map(Row.fromTuple)) + withEmptyParts.collect().toSeq) spark.catalog.cacheTable("withEmptyParts") checkAnswer( sql("SELECT * FROM withEmptyParts"), - withEmptyParts.collect().toSeq.map(Row.fromTuple)) + withEmptyParts.collect().toSeq) } test("SPARK-4182 Caching complex types") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index b4f194e7438f7..699ddf8201672 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -1252,7 +1252,7 @@ abstract class JsonSuite StructField("f4", ArrayType(StringType), nullable = true) :: StructField("f5", IntegerType, true) :: Nil) - val rowRDD1 = unparsedStrings.map { r => + val rowRDD1 = unparsedStrings.as[String].rdd.map { r => val values = r.split(",").map(_.trim) val v5 = try values(3).toInt catch { case _: NumberFormatException => null @@ -1275,7 +1275,7 @@ abstract class JsonSuite StructField("f12", BooleanType, false) :: Nil), false) :: StructField("f2", MapType(StringType, IntegerType, true), false) :: Nil) - val rowRDD2 = unparsedStrings.map { r => + val rowRDD2 = unparsedStrings.as[String].rdd.map { r => val values = r.split(",").map(_.trim) val v4 = try values(3).toInt catch { case _: NumberFormatException => null diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala index bd5dd038f5d44..4caa83acc877c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.test import java.nio.charset.StandardCharsets import java.time.{Duration, Period} -import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSessionProvider import org.apache.spark.sql.classic import org.apache.spark.sql.classic.{DataFrame, SQLImplicits} @@ -156,44 +155,44 @@ private[sql] trait SQLTestData extends SparkSessionProvider { self => df } - protected lazy val arrayData: RDD[ArrayData] = { - val rdd = spark.sparkContext.parallelize( + protected lazy val arrayData: DataFrame = { + val df = spark.sparkContext.parallelize( ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: - ArrayData(Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) - rdd.toDF().createOrReplaceTempView("arrayData") - rdd + ArrayData(Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil).toDF() + df.createOrReplaceTempView("arrayData") + df } - protected lazy val mapData: RDD[MapData] = { - val rdd = spark.sparkContext.parallelize( + protected lazy val mapData: DataFrame = { + val df = spark.sparkContext.parallelize( MapData(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) :: MapData(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) :: MapData(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) :: MapData(Map(1 -> "a4", 2 -> "b4")) :: - MapData(Map(1 -> "a5")) :: Nil) - rdd.toDF().createOrReplaceTempView("mapData") - rdd + MapData(Map(1 -> "a5")) :: Nil).toDF() + df.createOrReplaceTempView("mapData") + df } - protected lazy val calendarIntervalData: RDD[IntervalData] = { - val rdd = spark.sparkContext.parallelize( - IntervalData(new CalendarInterval(1, 1, 1)) :: Nil) - rdd.toDF().createOrReplaceTempView("calendarIntervalData") - rdd + protected lazy val calendarIntervalData: DataFrame = { + val df = spark.sparkContext.parallelize( + IntervalData(new CalendarInterval(1, 1, 1)) :: Nil).toDF() + df.createOrReplaceTempView("calendarIntervalData") + df } - protected lazy val repeatedData: RDD[StringData] = { - val rdd = spark.sparkContext.parallelize(List.fill(2)(StringData("test"))) - rdd.toDF().createOrReplaceTempView("repeatedData") - rdd + protected lazy val repeatedData: DataFrame = { + val df = spark.sparkContext.parallelize(List.fill(2)(StringData("test"))).toDF() + df.createOrReplaceTempView("repeatedData") + df } - protected lazy val nullableRepeatedData: RDD[StringData] = { - val rdd = spark.sparkContext.parallelize( + protected lazy val nullableRepeatedData: DataFrame = { + val df = spark.sparkContext.parallelize( List.fill(2)(StringData(null)) ++ - List.fill(2)(StringData("test"))) - rdd.toDF().createOrReplaceTempView("nullableRepeatedData") - rdd + List.fill(2)(StringData("test"))).toDF() + df.createOrReplaceTempView("nullableRepeatedData") + df } protected lazy val nullInts: DataFrame = { @@ -231,19 +230,19 @@ private[sql] trait SQLTestData extends SparkSessionProvider { self => df } - protected lazy val unparsedStrings: RDD[String] = { + protected lazy val unparsedStrings: DataFrame = { spark.sparkContext.parallelize( "1, A1, true, null" :: "2, B2, false, null" :: "3, C3, true, null" :: - "4, D4, true, 2147483644" :: Nil) + "4, D4, true, 2147483644" :: Nil).toDF("value") } - // An RDD with 4 elements and 8 partitions - protected lazy val withEmptyParts: RDD[IntField] = { - val rdd = spark.sparkContext.parallelize((1 to 4).map(IntField), 8) - rdd.toDF().createOrReplaceTempView("withEmptyParts") - rdd + // A DataFrame with 4 elements and 8 partitions + protected lazy val withEmptyParts: DataFrame = { + val df = spark.sparkContext.parallelize((1 to 4).map(IntField), 8).toDF() + df.createOrReplaceTempView("withEmptyParts") + df } protected lazy val person: DataFrame = { From 1bcb07959f1fe946a70377f80a3767e9d6a6ba2b Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 1 Apr 2026 00:32:41 +0000 Subject: [PATCH 2/4] Fix compilation: add missing ArrayData import and Row.fromTuple conversion Co-authored-by: Isaac --- .../scala/org/apache/spark/sql/DataFrameSuite.scala | 10 +++++----- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 2ed2d23a7a0fd..6ac594b92ec12 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -45,7 +45,7 @@ import org.apache.spark.sql.expressions.{Aggregator, Window} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} -import org.apache.spark.sql.test.SQLTestData.{ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2} +import org.apache.spark.sql.test.SQLTestData.{ArrayData, ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2} import org.apache.spark.sql.types._ import org.apache.spark.tags.SlowSQLTest import org.apache.spark.unsafe.types.CalendarInterval @@ -556,19 +556,19 @@ class DataFrameSuite extends QueryTest checkAnswer( arrayData.orderBy($"data".getItem(0).asc), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq) checkAnswer( arrayData.orderBy($"data".getItem(0).desc), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq) checkAnswer( arrayData.orderBy($"data".getItem(1).asc), - arrayData.as[ArrayData].collect().sortBy(_.data(1)).toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq) checkAnswer( arrayData.orderBy($"data".getItem(1).desc), - arrayData.as[ArrayData].collect().sortBy(_.data(1)).reverse.toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq) } test("limit") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 48b5727aaa928..59a69b6f5be46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -551,19 +551,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] ASC"), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq) checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] DESC"), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.toSeq) + arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] ASC"), - mapData.as[MapData].collect().sortBy(_.data(1)).toSeq) + mapData.as[MapData].collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] DESC"), - mapData.as[MapData].collect().sortBy(_.data(1)).reverse.toSeq) + mapData.as[MapData].collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq) } test("external sorting") { From 7c554b6be5641068bcbd476477ad4aaec759c9e9 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 1 Apr 2026 03:22:20 +0000 Subject: [PATCH 3/4] Simplify sortBy to use getAs on Row directly instead of as[CaseClass] Co-authored-by: Isaac --- .../scala/org/apache/spark/sql/DataFrameSuite.scala | 10 +++++----- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 6ac594b92ec12..d64e5cf4ddeec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -45,7 +45,7 @@ import org.apache.spark.sql.expressions.{Aggregator, Window} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} -import org.apache.spark.sql.test.SQLTestData.{ArrayData, ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2} +import org.apache.spark.sql.test.SQLTestData.{ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2} import org.apache.spark.sql.types._ import org.apache.spark.tags.SlowSQLTest import org.apache.spark.unsafe.types.CalendarInterval @@ -556,19 +556,19 @@ class DataFrameSuite extends QueryTest checkAnswer( arrayData.orderBy($"data".getItem(0).asc), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(0)).toSeq) checkAnswer( arrayData.orderBy($"data".getItem(0).desc), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(0)).reverse.toSeq) checkAnswer( arrayData.orderBy($"data".getItem(1).asc), - arrayData.as[ArrayData].collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(1)).toSeq) checkAnswer( arrayData.orderBy($"data".getItem(1).desc), - arrayData.as[ArrayData].collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(1)).reverse.toSeq) } test("limit") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 59a69b6f5be46..096588d6e6dad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -551,19 +551,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] ASC"), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(0)).toSeq) checkAnswer( sql("SELECT * FROM arrayData ORDER BY data[0] DESC"), - arrayData.as[ArrayData].collect().sortBy(_.data(0)).reverse.map(Row.fromTuple).toSeq) + arrayData.collect().sortBy(_.getAs[Seq[Int]](0)(0)).reverse.toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] ASC"), - mapData.as[MapData].collect().sortBy(_.data(1)).map(Row.fromTuple).toSeq) + mapData.collect().sortBy(_.getAs[Map[Int, String]](0)(1)).toSeq) checkAnswer( sql("SELECT * FROM mapData ORDER BY data[1] DESC"), - mapData.as[MapData].collect().sortBy(_.data(1)).reverse.map(Row.fromTuple).toSeq) + mapData.collect().sortBy(_.getAs[Map[Int, String]](0)(1)).reverse.toSeq) } test("external sorting") { From a047aa1a080976d36d6653709bc3e4590f9b0450 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 1 Apr 2026 03:35:33 +0000 Subject: [PATCH 4/4] Remove unnecessary .toSeq on Array[Row] passed to checkAnswer Co-authored-by: Isaac --- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d64e5cf4ddeec..9c2f4188992ed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -578,11 +578,11 @@ class DataFrameSuite extends QueryTest checkAnswer( arrayData.limit(1), - arrayData.take(1).toSeq) + arrayData.take(1)) checkAnswer( mapData.limit(1), - mapData.take(1).toSeq) + mapData.take(1)) // SPARK-12340: overstep the bounds of Int in SparkPlan.executeTake checkAnswer( @@ -598,11 +598,11 @@ class DataFrameSuite extends QueryTest checkAnswer( arrayData.offset(99), - arrayData.collect().drop(99).toSeq) + arrayData.collect().drop(99)) checkAnswer( mapData.offset(99), - mapData.collect().drop(99).toSeq) + mapData.collect().drop(99)) } test("limit with offset") {