-
Notifications
You must be signed in to change notification settings - Fork 259
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
Below query is failing on 25.10. This is when AQE is enabled.
Simple repro case:
spark.sql("SELECT id, value FROM VALUES (1, 10.0), (2, 20.0) AS t(id, value)").createOrReplaceTempView("t_simple")
spark.sql("""
SELECT SUM(value) AS s
FROM t_simple
""").persist()
spark.sql("""
SELECT SUM(value) AS s
FROM t_simple
""").collect()
Failure:
Traceback (most recent call last):
File "<stdin>", line 4, in <module>
File "/home/nartal/apache-spark-4.0-released/spark-4.0.0-bin-hadoop3/python/pyspark/sql/classic/dataframe.py", line 443, in collect
sock_info = self._jdf.collectToPython()
File "/home/nartal/apache-spark-4.0-released/spark-4.0.0-bin-hadoop3/python/lib/py4j-0.10.9.9-src.zip/py4j/java_gateway.py", line 1362, in __call__
File "/home/nartal/apache-spark-4.0-released/spark-4.0.0-bin-hadoop3/python/pyspark/errors/exceptions/captured.py", line 282, in deco
return f(*a, **kw)
File "/home/nartal/apache-spark-4.0-released/spark-4.0.0-bin-hadoop3/python/lib/py4j-0.10.9.9-src.zip/py4j/protocol.py", line 327, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o103.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 10) (10.110.46.105 executor driver): java.lang.IllegalStateException: DATA ACCESS MUST BE ON A HOST VECTOR
at com.nvidia.spark.rapids.GpuColumnVectorBase.isNullAt(GpuColumnVectorBase.java:53)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:402)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:901)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:901)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:374)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:338)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:171)
at org.apache.spark.scheduler.Task.run(Task.scala:147)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$5(Executor.scala:647)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:650)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$3(DAGScheduler.scala:2935)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2935)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2927)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2927)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1295)
at scala.Option.foreach(Option.scala:437)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1009)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2484)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2505)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2524)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2549)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1057)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:417)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1056)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:462)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$executeCollect$1(AdaptiveSparkPlanExec.scala:402)
at org.apache.spark.sql.execution.adaptive.ResultQueryStageExec.$anonfun$doMaterialize$1(QueryStageExec.scala:325)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$4(SQLExecution.scala:318)
at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:268)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$3(SQLExecution.scala:316)
at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$2(SQLExecution.scala:312)
at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1768)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:833)
Caused by: java.lang.IllegalStateException: DATA ACCESS MUST BE ON A HOST VECTOR
at com.nvidia.spark.rapids.GpuColumnVectorBase.isNullAt(GpuColumnVectorBase.java:53)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
Expected behavior
The above query should pass without any errors
Environment details (please complete the following information)
Additional context
related PR: #13434
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working