diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 1df4cdc7a6b7f..1696adfaef525 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -198,7 +198,7 @@ jobs: if [[ "$INCLUDED_TAGS" != "" ]]; then ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae elif [[ "$MODULES_TO_TEST" == "connect" ]]; then - ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae + ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/client/integration-tests,sql/connect/common,sql/connect/server package test -fae elif [[ "$EXCLUDED_TAGS" != "" ]]; then ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then diff --git a/assembly/pom.xml b/assembly/pom.xml index a85ac5d9bc837..efc354654d2e0 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -150,20 +150,6 @@ - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - org.apache.maven.plugins - maven-install-plugin - - true - - org.apache.maven.plugins diff --git a/dev/lint-scala b/dev/lint-scala index 19c8853c787db..42cdf29df2f66 100755 --- a/dev/lint-scala +++ b/dev/lint-scala @@ -35,13 +35,14 @@ ERRORS=$(./build/mvn \ -pl sql/connect/common \ -pl sql/connect/server \ -pl sql/connect/client/jvm \ + -pl sql/connect/client/integration-tests \ 2>&1 | grep -e "Unformatted files found" \ ) if test ! -z "$ERRORS"; then echo -e "The scalafmt check failed on sql/connect or sql/connect at following occurrences:\n\n$ERRORS\n" echo "Before submitting your change, please make sure to format your code using the following command:" - echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl sql/api -pl sql/connect/common -pl sql/connect/server -pl sql/connect/client/jvm" + echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl sql/api -pl sql/connect/common -pl sql/connect/server -pl sql/connect/client/jvm -pl sql/connect/client/integration-tests" exit 1 else echo -e "Scalafmt checks passed." diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index db2581afd2d50..c69e665df423e 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -338,6 +338,7 @@ def __hash__(self): sbt_test_goals=[ "connect/test", "connect-client-jvm/test", + "connect-client-integration-tests/test", ], ) diff --git a/dev/test-jars.txt b/dev/test-jars.txt index bd8fc93bc9f0f..159afd3cd8edb 100644 --- a/dev/test-jars.txt +++ b/dev/test-jars.txt @@ -3,8 +3,8 @@ core/src/test/resources/TestHelloV3_2.13.jar core/src/test/resources/TestUDTF.jar data/artifact-tests/junitLargeJar.jar data/artifact-tests/smallJar.jar -sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar -sql/connect/client/jvm/src/test/resources/udf2.13.jar +sql/connect/client/integration-tests/src/test/resources/TestHelloV2_2.13.jar +sql/connect/client/integration-tests/src/test/resources/udf2.13.jar sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar sql/connect/common/src/test/resources/artifact-tests/smallJar.jar sql/core/src/test/resources/SPARK-33084.jar diff --git a/pom.xml b/pom.xml index a7c8874e58b70..0a4765b3969f9 100644 --- a/pom.xml +++ b/pom.xml @@ -98,6 +98,7 @@ sql/connect/server sql/connect/common sql/connect/client/jvm + sql/connect/client/integration-tests assembly examples repl diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index bc901b0fe6be2..a2fc60dde372e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -52,9 +52,9 @@ object BuildCommons { val streamingProjects@Seq(streaming, streamingKafka010) = Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _)) - val connectProjects@Seq(connectCommon, connect, connectClient, connectShims) = - Seq("connect-common", "connect", "connect-client-jvm", "connect-shims") - .map(ProjectRef(buildLocation, _)) + val connectProjects@Seq(connectCommon, connect, connectClient, connectClientIT, connectShims) = + Seq("connect-common", "connect", "connect-client-jvm", "connect-client-integration-tests", + "connect-shims").map(ProjectRef(buildLocation, _)) val allProjects@Seq( core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, @@ -369,7 +369,7 @@ object SparkBuild extends PomBuild { Seq( spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn, unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, - variant, connectShims, profiler + variant, connectShims, connectClientIT, profiler ).contains(x) } @@ -417,6 +417,7 @@ object SparkBuild extends PomBuild { enable(SparkConnectCommon.settings)(connectCommon) enable(SparkConnect.settings)(connect) enable(SparkConnectClient.settings)(connectClient) + enable(SparkConnectClientIT.settings)(connectClientIT) /* Protobuf settings */ enable(SparkProtobuf.settings)(protobuf) @@ -804,7 +805,6 @@ object SparkConnect { object SparkConnectClient { import BuildCommons.protoVersion - val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.") lazy val settings = Seq( // For some reason the resolution from the imported Maven build does not work for some @@ -828,18 +828,6 @@ object SparkConnectClient { ) }, - buildTestDeps := { - (LocalProject("assembly") / Compile / Keys.`package`).value - (LocalProject("catalyst") / Test / Keys.`package`).value - }, - - // SPARK-42538: Make sure the `${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars` is available for testing. - // At the same time, the build of `connect`, `connect-client-jvm` and `sql` will be triggered by `assembly` build, - // so no additional configuration is required. - test := ((Test / test) dependsOn (buildTestDeps)).value, - - testOnly := ((Test / testOnly) dependsOn (buildTestDeps)).evaluated, - (assembly / test) := { }, (assembly / logLevel) := Level.Info, @@ -859,11 +847,11 @@ object SparkConnectClient { }, (assembly / assemblyShadeRules) := Seq( + ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.com.google.protobuf.@1").inAll, ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.client.io.grpc.@1").inAll, ShadeRule.rename("com.google.**" -> "org.sparkproject.connect.client.com.google.@1").inAll, ShadeRule.rename("io.netty.**" -> "org.sparkproject.connect.client.io.netty.@1").inAll, ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.client.org.checkerframework.@1").inAll, - ShadeRule.rename("javax.annotation.**" -> "org.sparkproject.connect.client.javax.annotation.@1").inAll, ShadeRule.rename("io.perfmark.**" -> "org.sparkproject.connect.client.io.perfmark.@1").inAll, ShadeRule.rename("org.codehaus.**" -> "org.sparkproject.connect.client.org.codehaus.@1").inAll, ShadeRule.rename("android.annotation.**" -> "org.sparkproject.connect.client.android.annotation.@1").inAll @@ -878,6 +866,44 @@ object SparkConnectClient { ) } +object SparkConnectClientIT { + val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.") + + def filterSparkDependencies( + files: Seq[Attributed[File]], + scalaBinaryVer: String): Seq[Attributed[File]] = { + val packageNames = Seq( + "spark-connect-client-jvm", + "spark-connect-common", + "spark-connect", + "spark-sql" + ).map(name => s"${name}_$scalaBinaryVer") + + files.filterNot(f => packageNames.exists(name => f.toString.contains(name))) + } + + lazy val settings = Seq( + buildTestDeps := { + (LocalProject("connect-client-jvm") / assembly).value + (LocalProject("catalyst") / Test / Keys.`package`).value + }, + + (Test / compile) := ((Test / compile) dependsOn buildTestDeps).value, + test := ((Test / test) dependsOn (buildTestDeps)).value, + testOnly := ((Test / testOnly) dependsOn (buildTestDeps)).evaluated, + + (Test / dependencyClasspath) := + filterSparkDependencies((Test / dependencyClasspath).value, scalaBinaryVersion.value), + (Test / fullClasspath) := + filterSparkDependencies((Test / fullClasspath).value, scalaBinaryVersion.value), + (Test / unmanagedJars) += { + val jarName = s"spark-connect-client-jvm-assembly-${version.value}.jar" + val basePath = s"${baseDirectory.value}/../jvm/target/scala-${scalaBinaryVersion.value}" + Attributed.blank(file(s"$basePath/$jarName")) + } + ) +} + object SparkProtobuf { import BuildCommons.protoVersion diff --git a/sql/connect/client/integration-tests/pom.xml b/sql/connect/client/integration-tests/pom.xml new file mode 100644 index 0000000000000..b7d8570e58a8a --- /dev/null +++ b/sql/connect/client/integration-tests/pom.xml @@ -0,0 +1,117 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.13 + 4.1.0-SNAPSHOT + ../../../../pom.xml + + + spark-connect-client-integration-tests_2.13 + jar + Spark Project Connect Client Integration Test + https://spark.apache.org/ + + connect-client-integration-tests + + + + + org.apache.spark + spark-connect-client-jvm_${scala.binary.version} + + + org.apache.spark + spark-connect-common_${scala.binary.version} + + + ${project.version} + test + + + org.apache.spark + spark-sql-api_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-sql-api_${scala.binary.version} + ${project.version} + tests + test + + + org.apache.spark + spark-tags_${scala.binary.version} + test-jar + test + + + org.apache.spark + spark-common-utils_${scala.binary.version} + ${project.version} + tests + test + + + org.apache.spark + spark-assembly_${scala.binary.version} + ${project.version} + pom + + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.apache.spark + spark-connect_${scala.binary.version} + + + org.apache.spark + spark-connect-common_${scala.binary.version} + + + test + + + com.lihaoyi + ammonite_${scala.version} + ${ammonite.version} + + + + org.jline + * + + + test + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + diff --git a/sql/connect/client/jvm/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java b/sql/connect/client/integration-tests/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java similarity index 91% rename from sql/connect/client/jvm/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java rename to sql/connect/client/integration-tests/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java index afb046af98594..907105e370c08 100644 --- a/sql/connect/client/jvm/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java +++ b/sql/connect/client/integration-tests/src/test/java/org/apache/spark/sql/JavaEncoderSuite.java @@ -28,7 +28,6 @@ import static org.apache.spark.sql.functions.*; import static org.apache.spark.sql.RowFactory.create; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.connect.test.IntegrationTestUtils; import org.apache.spark.sql.connect.test.SparkConnectServerUtils; import org.apache.spark.sql.types.StructType; @@ -40,18 +39,14 @@ public class JavaEncoderSuite implements Serializable { @BeforeAll public static void setup() { - Assumptions.assumeTrue(IntegrationTestUtils.isAssemblyJarsDirExists(), - "Skipping all tests because assembly jars directory does not exist."); spark = SparkConnectServerUtils.createSparkSession(); } @AfterAll public static void tearDown() { - if (spark != null) { - spark.stop(); - spark = null; - SparkConnectServerUtils.stop(); - } + spark.stop(); + spark = null; + SparkConnectServerUtils.stop(); } private static BigDecimal bigDec(long unscaled, int scale) { diff --git a/sql/connect/client/jvm/src/test/resources/StubClassDummyUdf.scala b/sql/connect/client/integration-tests/src/test/resources/StubClassDummyUdf.scala similarity index 100% rename from sql/connect/client/jvm/src/test/resources/StubClassDummyUdf.scala rename to sql/connect/client/integration-tests/src/test/resources/StubClassDummyUdf.scala diff --git a/sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar b/sql/connect/client/integration-tests/src/test/resources/TestHelloV2_2.13.jar similarity index 100% rename from sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar rename to sql/connect/client/integration-tests/src/test/resources/TestHelloV2_2.13.jar diff --git a/sql/connect/client/integration-tests/src/test/resources/log4j2.properties b/sql/connect/client/integration-tests/src/test/resources/log4j2.properties new file mode 100644 index 0000000000000..550fd261b6fb5 --- /dev/null +++ b/sql/connect/client/integration-tests/src/test/resources/log4j2.properties @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +rootLogger.level = info +rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} + +appender.file.type = File +appender.file.name = File +appender.file.fileName = target/unit-tests.log +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn diff --git a/sql/connect/client/jvm/src/test/resources/udf2.13 b/sql/connect/client/integration-tests/src/test/resources/udf2.13 similarity index 100% rename from sql/connect/client/jvm/src/test/resources/udf2.13 rename to sql/connect/client/integration-tests/src/test/resources/udf2.13 diff --git a/sql/connect/client/jvm/src/test/resources/udf2.13.jar b/sql/connect/client/integration-tests/src/test/resources/udf2.13.jar similarity index 100% rename from sql/connect/client/jvm/src/test/resources/udf2.13.jar rename to sql/connect/client/integration-tests/src/test/resources/udf2.13.jar diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala similarity index 99% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala index c07b624e8f8fe..c138f54d22dd9 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala +++ b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala @@ -173,7 +173,8 @@ class ReplE2ESuite extends ConnectFunSuite with RemoteSparkSession with BeforeAn // scalastyle:off classforname line.size.limit val sparkHome = IntegrationTestUtils.sparkHome val testJar = Paths - .get(s"$sparkHome/sql/connect/client/jvm/src/test/resources/TestHelloV2_$scalaVersion.jar") + .get( + s"$sparkHome/sql/connect/client/integration-tests/src/test/resources/TestHelloV2_$scalaVersion.jar") .toFile assume(testJar.exists(), "Missing TestHelloV2 jar!") diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/CatalogSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CheckpointSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/CheckpointSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/CheckpointSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/CheckpointSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientDataFrameStatSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/ClientDataFrameStatSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientDataFrameStatSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/ClientDataFrameStatSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameNaFunctionSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/DataFrameNaFunctionSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/DataFrameNaFunctionSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/DataFrameNaFunctionSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/KeyValueGroupedDatasetE2ETestSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/KeyValueGroupedDatasetE2ETestSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/KeyValueGroupedDatasetE2ETestSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/KeyValueGroupedDatasetE2ETestSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/MergeIntoE2ETestSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/MergeIntoE2ETestSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/MergeIntoE2ETestSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/MergeIntoE2ETestSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionBuilderImplementationBindingSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/SparkSessionBuilderImplementationBindingSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionBuilderImplementationBindingSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/SparkSessionBuilderImplementationBindingSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/SparkSessionE2ESuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/StaticProcedureSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/StaticProcedureSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/StaticProcedureSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/StaticProcedureSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/StubbingTestSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/StubbingTestSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/StubbingTestSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/StubbingTestSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala similarity index 98% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala index 42fc0ccfed721..24ac66cb90eca 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala +++ b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/UDFClassLoadingE2ESuite.scala @@ -21,7 +21,7 @@ import java.nio.file.{Files, Paths} import scala.util.Properties -import com.google.protobuf.ByteString +import org.sparkproject.com.google.protobuf.ByteString import org.apache.spark.connect.proto import org.apache.spark.sql.connect.common.ProtoDataTypes diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/UserDefinedFunctionE2ETestSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/UserDefinedFunctionE2ETestSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/UserDefinedFunctionE2ETestSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/UserDefinedFunctionE2ETestSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/ClientStreamingQuerySuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/ClientStreamingQuerySuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/ClientStreamingQuerySuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/ClientStreamingQuerySuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/FlatMapGroupsWithStateStreamingSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/FlatMapGroupsWithStateStreamingSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/FlatMapGroupsWithStateStreamingSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/FlatMapGroupsWithStateStreamingSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/TransformWithStateConnectSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/TransformWithStateConnectSuite.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/streaming/TransformWithStateConnectSuite.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/streaming/TransformWithStateConnectSuite.scala diff --git a/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/ConnectFunSuite.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/ConnectFunSuite.scala new file mode 100644 index 0000000000000..012acbbb9ea62 --- /dev/null +++ b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/ConnectFunSuite.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect.test + +import java.nio.file.Path + +import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite + +import org.apache.spark.connect.proto +import org.apache.spark.sql.Column +import org.apache.spark.sql.connect.ColumnNodeToProtoConverter + +/** + * The basic testsuite the client tests should extend from. + */ +trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite + + // Borrowed from SparkFunSuite + protected def getWorkspaceFilePath(first: String, more: String*): Path = { + if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) { + fail("spark.test.home or SPARK_HOME is not set.") + } + val sparkHome = sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME")) + java.nio.file.Paths.get(sparkHome, first +: more: _*) + } + + protected def baseResourcePath: Path = { + getWorkspaceFilePath( + "sql", + "connect", + "client", + "integration-tests", + "src", + "test", + "resources").toAbsolutePath + } + + protected def commonResourcePath: Path = { + getWorkspaceFilePath("sql", "connect", "common", "src", "test", "resources").toAbsolutePath + } + + protected def toExpr(c: Column): proto.Expression = ColumnNodeToProtoConverter.toExpr(c) +} diff --git a/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala new file mode 100644 index 0000000000000..8303ed29e52bb --- /dev/null +++ b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect.test + +import java.io.File +import java.nio.file.{Files, Paths} + +import scala.util.Properties.versionNumberString + +import org.scalatest.Assertions.fail + +import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION} +import org.apache.spark.util.SparkFileUtils + +object IntegrationTestUtils { + + // System properties used for testing and debugging + private val DEBUG_SC_JVM_CLIENT = "spark.debug.sc.jvm.client" + private val DEBUG_SC_JVM_CLIENT_ENV = "SPARK_DEBUG_SC_JVM_CLIENT" + // Enable this flag to print all server logs to the console + private[sql] val isDebug = { + System.getProperty(DEBUG_SC_JVM_CLIENT, "false").toBoolean || + Option(System.getenv(DEBUG_SC_JVM_CLIENT_ENV)).exists(_.toBoolean) + } + + private[sql] lazy val scalaVersion = { + versionNumberString.split('.') match { + case Array(major, minor, _*) => major + "." + minor + case _ => versionNumberString + } + } + + private[sql] lazy val scalaDir = s"scala-$scalaVersion" + + private[sql] lazy val sparkHome: String = { + if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) { + fail("spark.test.home or SPARK_HOME is not set.") + } + sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME")) + } + + private[sql] lazy val connectClientHomeDir = s"$sparkHome/sql/connect/client/integration-tests" + + private[sql] lazy val connectClientTestClassDir = { + s"$connectClientHomeDir/target/$scalaDir/test-classes" + } + + private[sql] def debugConfigs: Seq[String] = { + val log4j2 = s"$connectClientHomeDir/src/test/resources/log4j2.properties" + if (isDebug) { + Seq( + // Enable to see the server plan change log + // "--conf", + // "spark.sql.planChangeLog.level=WARN", + + // Enable to see the server grpc received + // "--conf", + // "spark.connect.grpc.interceptor.classes=" + + // "org.apache.spark.sql.connect.service.LoggingInterceptor", + + // Redirect server log into console + "--conf", + s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2") + } else Seq.empty + } + + // Log server start stop debug info into console + // scalastyle:off println + private[sql] def debug(msg: String): Unit = if (isDebug) println(msg) + // scalastyle:on println + private[sql] def debug(error: Throwable): Unit = if (isDebug) error.printStackTrace() + + private[sql] lazy val isSparkHiveJarAvailable: Boolean = { + val filePath = s"$sparkHome/assembly/target/$scalaDir/jars/" + + s"spark-hive_$scalaVersion-$SPARK_VERSION.jar" + Files.exists(Paths.get(filePath)) + } + + private[sql] def cleanUpHiveClassesDirIfNeeded(): Unit = { + def delete(f: File): Unit = { + if (f.exists()) { + SparkFileUtils.deleteRecursively(f) + } + } + delete(new File(s"$sparkHome/sql/hive/target/$scalaDir/classes")) + delete(new File(s"$sparkHome/sql/hive/target/$scalaDir/test-classes")) + } + + /** + * Find a jar in the Spark project artifacts. It requires a build first (e.g. build/sbt package, + * build/mvn clean install -DskipTests) so that this method can find the jar in the target + * folders. + * + * @return + * the jar + */ + private[sql] def findJar( + path: String, + sbtName: String, + mvnName: String, + test: Boolean = false): File = { + val jar = tryFindJar(path, sbtName, mvnName, test).getOrElse({ + val suffix = if (test) "-tests.jar" else ".jar" + val sbtFileName = s"$sbtName(.*)$suffix" + val mvnFileName = s"$mvnName(.*)$SPARK_VERSION$suffix" + throw new RuntimeException(s"Failed to find the jar: $sbtFileName or $mvnFileName " + + s"inside folder: ${getTargetFilePath(path)}. This file can be generated by similar to " + + s"the following command: build/sbt package|assembly") + }) + debug("Using jar: " + jar.getCanonicalPath) + jar + } + + private[sql] def tryFindJar( + path: String, + sbtName: String, + mvnName: String, + test: Boolean = false): Option[File] = { + val targetDir = getTargetFilePath(path).toFile + assert( + targetDir.exists(), + s"Fail to locate the target folder: '${targetDir.getCanonicalPath}'. " + + s"SPARK_HOME='${new File(sparkHome).getCanonicalPath}'. " + + "Make sure the spark project jars has been built (e.g. using build/sbt package)" + + "and the env variable `SPARK_HOME` is set correctly.") + val suffix = if (test) "-tests.jar" else ".jar" + // It is possible there are more than one: one built by maven, and another by SBT, + // Return the first one found. + recursiveListFiles(targetDir).find { f => + // SBT jar + (f.getParentFile.getName == scalaDir && + f.getName.startsWith(sbtName) && f.getName.endsWith(suffix)) || + // Maven Jar + (f.getParent.endsWith("target") && + f.getName.startsWith(mvnName) && + f.getName.endsWith(s"$SPARK_VERSION$suffix")) + } + } + + private def getTargetFilePath(path: String): java.nio.file.Path = { + Paths.get(sparkHome, path, "target").toAbsolutePath + } + + private def recursiveListFiles(f: File): Array[File] = { + val these = f.listFiles + these ++ these.filter(_.isDirectory).flatMap(recursiveListFiles) + } +} diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/QueryTest.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/QueryTest.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/QueryTest.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/QueryTest.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala similarity index 91% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala index 7a3b6a6261e1f..4ec056da9f17d 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala +++ b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/RemoteSparkSession.scala @@ -23,12 +23,10 @@ import java.util.concurrent.TimeUnit import scala.concurrent.duration.FiniteDuration -import org.scalactic.source.Position -import org.scalatest.{BeforeAndAfterAll, Suite, Tag} +import org.scalatest.{BeforeAndAfterAll, Suite} import org.scalatest.concurrent.Eventually.eventually import org.scalatest.concurrent.Futures.timeout -import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite -import org.scalatest.time.SpanSugar._ // scalastyle:ignore +import org.scalatest.time.SpanSugar._ import org.apache.spark.SparkBuildInfo import org.apache.spark.sql.connect.SparkSession @@ -207,18 +205,14 @@ object SparkConnectServerUtils { } } -trait RemoteSparkSession - extends AnyFunSuite // scalastyle:ignore funsuite - with BeforeAndAfterAll { self: Suite => +trait RemoteSparkSession extends BeforeAndAfterAll { self: Suite => import SparkConnectServerUtils._ var spark: SparkSession = _ protected lazy val serverPort: Int = port override def beforeAll(): Unit = { super.beforeAll() - if (IntegrationTestUtils.isAssemblyJarsDirExists) { - spark = createSparkSession() - } + spark = createSparkSession() } override def afterAll(): Unit = { @@ -230,13 +224,4 @@ trait RemoteSparkSession spark = null super.afterAll() } - - override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit - pos: Position): Unit = { - if (IntegrationTestUtils.isAssemblyJarsDirExists) { - super.test(testName, testTags: _*)(testFun) - } else { - super.ignore(testName, testTags: _*)(testFun) - } - } } diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/SQLHelper.scala b/sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/SQLHelper.scala similarity index 100% rename from sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/SQLHelper.scala rename to sql/connect/client/integration-tests/src/test/scala/org/apache/spark/sql/connect/test/SQLHelper.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala index c7b4748f12221..36e235096750a 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala @@ -189,3 +189,5 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll { testImplicit(Set(1, 2, 4)) } } + +private[sql] case class MyType(id: Long, a: Double, b: Double) diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala index 6e20db5d34938..d38d9f3017a90 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/test/IntegrationTestUtils.scala @@ -90,11 +90,6 @@ object IntegrationTestUtils { Files.exists(Paths.get(filePath)) } - lazy val isAssemblyJarsDirExists: Boolean = { - val filePath = s"$sparkHome/assembly/target/$scalaDir/jars/" - Files.exists(Paths.get(filePath)) - } - private[sql] def cleanUpHiveClassesDirIfNeeded(): Unit = { def delete(f: File): Unit = { if (f.exists()) {