From cce86c353c4ced319c27bfea20a45dc236f01b4d Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 15:52:51 +0100 Subject: [PATCH 1/7] init --- .github/workflows/build_and_test.yml | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 83a1578640699..40895813a4526 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -92,6 +92,7 @@ jobs: pyspark_pandas_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark-pandas')))"` pyspark=`./dev/is-changed.py -m $pyspark_modules` pandas=`./dev/is-changed.py -m $pyspark_pandas_modules` + connect_examples=`./dev/is-changed.py -m "connect-examples"` if [[ "${{ github.repository }}" != 'apache/spark' ]]; then yarn=`./dev/is-changed.py -m yarn` kubernetes=`./dev/is-changed.py -m kubernetes` @@ -127,6 +128,7 @@ jobs: \"k8s-integration-tests\" : \"$kubernetes\", \"buf\" : \"$buf\", \"ui\" : \"$ui\", + \"connect-examples\": \"$connect_examples\" }" echo $precondition # For debugging # Remove `\n` to avoid "Invalid format" error @@ -1290,3 +1292,35 @@ jobs: cd ui-test npm install --save-dev node --experimental-vm-modules node_modules/.bin/jest + + connect-examples-build: + name: Build spark-server-library-example + needs: precondition + if: fromJson(needs.precondition.outputs.required).connect-examples == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ inputs.java }} + + - name: Build spark-server-library-example + run: | + cd connect-examples/spark-server-library-example + mvn clean package \ No newline at end of file From 92922c5e17e82c066e0ce6c0ab845597f8e857c2 Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 15:53:49 +0100 Subject: [PATCH 2/7] lint --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 40895813a4526..da74675ca17ce 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1323,4 +1323,4 @@ jobs: - name: Build spark-server-library-example run: | cd connect-examples/spark-server-library-example - mvn clean package \ No newline at end of file + mvn clean package From f2c9f925aa4d695fe534ea8d65321eca0b93525e Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 17:33:27 +0100 Subject: [PATCH 3/7] typo --- .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index da74675ca17ce..4b852614018f2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1294,7 +1294,7 @@ jobs: node --experimental-vm-modules node_modules/.bin/jest connect-examples-build: - name: Build spark-server-library-example + name: Build server-library-example needs: precondition if: fromJson(needs.precondition.outputs.required).connect-examples == 'true' runs-on: ubuntu-latest @@ -1320,7 +1320,7 @@ jobs: distribution: zulu java-version: ${{ inputs.java }} - - name: Build spark-server-library-example + - name: Build server-library-example run: | - cd connect-examples/spark-server-library-example + cd connect-examples/server-library-example mvn clean package From dcf75eb1dcbd9b819fa2f00bacd5c30e1d959dcd Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 18:20:08 +0100 Subject: [PATCH 4/7] naming --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4b852614018f2..eac4939c7c2a9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1294,7 +1294,7 @@ jobs: node --experimental-vm-modules node_modules/.bin/jest connect-examples-build: - name: Build server-library-example + name: "Build module: server-library-example" needs: precondition if: fromJson(needs.precondition.outputs.required).connect-examples == 'true' runs-on: ubuntu-latest From cd834454774b5a54a1cccdc80601327d2beb90bf Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 19:23:34 +0100 Subject: [PATCH 5/7] shading --- connect-examples/server-library-example/client/pom.xml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/connect-examples/server-library-example/client/pom.xml b/connect-examples/server-library-example/client/pom.xml index 364920a2ec22b..36f3617cd1dbe 100644 --- a/connect-examples/server-library-example/client/pom.xml +++ b/connect-examples/server-library-example/client/pom.xml @@ -99,6 +99,15 @@ <shadedArtifactAttached>false</shadedArtifactAttached> <promoteTransitiveDependencies>true</promoteTransitiveDependencies> <createDependencyReducedPom>false</createDependencyReducedPom> + <relocations> + <relocation> + <pattern>com.google</pattern> + <shadedPattern>org.sparkproject.com.google</shadedPattern> + <excludes> + <exclude>com.google.common.**</exclude> + </excludes> + </relocation> + </relocations> <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc--> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> From 243f389e046454937db3dc98219db200c6fb82e3 Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Thu, 6 Mar 2025 20:24:02 +0100 Subject: [PATCH 6/7] make naming consistent --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index eac4939c7c2a9..d695763727cfc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1294,7 +1294,7 @@ jobs: node --experimental-vm-modules node_modules/.bin/jest connect-examples-build: - name: "Build module: server-library-example" + name: "Build modules: server-library-example" needs: precondition if: fromJson(needs.precondition.outputs.required).connect-examples == 'true' runs-on: ubuntu-latest From 8c77814eb4bb43d50a4dbb9cd67cacf2b89e45fc Mon Sep 17 00:00:00 2001 From: vicennial <venkata.gudesa@databricks.com> Date: Tue, 11 Mar 2025 21:35:38 +0100 Subject: [PATCH 7/7] use snapshot --- .../server-library-example/README.md | 24 +++++++---------- .../server-library-example/client/pom.xml | 27 ++++++++++--------- .../examples/serverlibrary/CustomTable.scala | 3 ++- .../serverlibrary/CustomTableBuilder.scala | 2 +- .../serverlibrary/CustomTableExample.scala | 2 +- .../server-library-example/pom.xml | 5 ++-- 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/connect-examples/server-library-example/README.md b/connect-examples/server-library-example/README.md index 6028a66cd5c7b..b179731885c44 100644 --- a/connect-examples/server-library-example/README.md +++ b/connect-examples/server-library-example/README.md @@ -78,30 +78,26 @@ reading, writing and processing data in the custom format. The plugins (`CustomC mvn clean package ``` -3. **Download the `4.0.0-preview2` release to use as the Spark Connect Server**: - - Choose a distribution from https://archive.apache.org/dist/spark/spark-4.0.0-preview2/. - - Example: `curl -L https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz | tar xz` - -4. **Copy relevant JARs to the root of the unpacked Spark distribution**: +3. **Copy relevant JARs to the root of the unpacked Spark distribution**: ```bash - cp \ - <SPARK_HOME>/connect-examples/server-library-example/resources/spark-daria_2.13-1.2.3.jar \ - <SPARK_HOME>/connect-examples/server-library-example/common/target/spark-server-library-example-common-1.0.0.jar \ - <SPARK_HOME>/connect-examples/server-library-example/server/target/spark-server-library-example-server-extension-1.0.0.jar \ - . + cp \ + connect-examples/server-library-example/resources/spark-daria_2.13-1.2.3.jar \ + connect-examples/server-library-example/common/target/spark-server-library-example-common-1.0.0.jar \ + connect-examples/server-library-example/server/target/spark-server-library-example-server-extension-1.0.0.jar \ + . ``` -5. **Start the Spark Connect Server with the relevant JARs**: +4. **Start the Spark Connect Server with the relevant JARs**: ```bash bin/spark-connect-shell \ --jars spark-server-library-example-server-extension-1.0.0.jar,spark-server-library-example-common-1.0.0.jar,spark-daria_2.13-1.2.3.jar \ --conf spark.connect.extensions.relation.classes=org.apache.connect.examples.serverlibrary.CustomRelationPlugin \ --conf spark.connect.extensions.command.classes=org.apache.connect.examples.serverlibrary.CustomCommandPlugin ``` -6. **In a different terminal, navigate back to the root of the sample project and start the client**: +5. **In a different terminal, start the client**: ```bash - java -cp client/target/spark-server-library-client-package-scala-1.0.0.jar org.apache.connect.examples.serverlibrary.CustomTableExample + java -cp connect-examples/server-library-example/client/target/spark-server-library-client-package-scala-1.0.0.jar org.apache.connect.examples.serverlibrary.CustomTableExample ``` -7. **Notice the printed output in the client terminal as well as the creation of the cloned table**: +6. **Notice the printed output in the client terminal as well as the creation of the cloned table**: ```protobuf Explaining plan for custom table: sample_table with path: <SPARK_HOME>/spark/connect-examples/server-library-example/client/../resources/dummy_data.custom == Parsed Logical Plan == diff --git a/connect-examples/server-library-example/client/pom.xml b/connect-examples/server-library-example/client/pom.xml index 36f3617cd1dbe..62e3a80b9d95e 100644 --- a/connect-examples/server-library-example/client/pom.xml +++ b/connect-examples/server-library-example/client/pom.xml @@ -37,12 +37,6 @@ <groupId>org.apache.connect.examples.serverlibrary</groupId> <artifactId>spark-server-library-example-common</artifactId> <version>1.0.0</version> - <exclusions> - <exclusion> - <groupId>com.google.protobuf</groupId> - <artifactId>protobuf-java</artifactId> - </exclusion> - </exclusions> </dependency> <!-- spark-connect-common contains proto definitions that we require to build custom commands/relations/expressions --> <dependency> @@ -62,7 +56,11 @@ <artifactId>scala-library</artifactId> <version>${scala.version}</version> </dependency> - + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${connect.guava.version}</version> + </dependency> </dependencies> <build> @@ -99,13 +97,18 @@ <shadedArtifactAttached>false</shadedArtifactAttached> <promoteTransitiveDependencies>true</promoteTransitiveDependencies> <createDependencyReducedPom>false</createDependencyReducedPom> - <relocations> - <relocation> - <pattern>com.google</pattern> - <shadedPattern>org.sparkproject.com.google</shadedPattern> + <filters> + <filter> + <artifact>com.fasterxml.jackson.core:jackson-core</artifact> <excludes> - <exclude>com.google.common.**</exclude> + <exclude>META-INF/versions/**</exclude> </excludes> + </filter> + </filters> + <relocations> + <relocation> + <pattern>com.google.common</pattern> + <shadedPattern>org.sparkproject.guava</shadedPattern> </relocation> </relocations> <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc--> diff --git a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala index 782a246d92984..a23b3e120c381 100644 --- a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala +++ b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala @@ -19,7 +19,8 @@ package org.apache.connect.examples.serverlibrary import com.google.protobuf.Any import org.apache.spark.connect.proto.Command -import org.apache.spark.sql.{functions, Column, DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.{functions, Column, Row} +import org.apache.spark.sql.connect.{Dataset, SparkSession} import org.apache.connect.examples.serverlibrary.proto import org.apache.connect.examples.serverlibrary.proto.CreateTable.Column.{DataType => ProtoDataType} diff --git a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala index a1b8ffdb8dd72..d45e03b4aa284 100644 --- a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala +++ b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala @@ -19,7 +19,7 @@ package org.apache.connect.examples.serverlibrary import com.google.protobuf.Any import org.apache.spark.connect.proto.Command -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.connect.SparkSession import org.apache.connect.examples.serverlibrary.CustomTable diff --git a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala index 8470465cd7a0b..92f7334fa6be2 100644 --- a/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala +++ b/connect-examples/server-library-example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala @@ -21,7 +21,7 @@ import java.nio.file.{Path, Paths} import com.google.protobuf.Any import org.apache.spark.connect.proto.Command -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.connect.SparkSession import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType} import org.apache.connect.examples.serverlibrary.proto diff --git a/connect-examples/server-library-example/pom.xml b/connect-examples/server-library-example/pom.xml index 1723f3b0154fa..73996c90cfda5 100644 --- a/connect-examples/server-library-example/pom.xml +++ b/connect-examples/server-library-example/pom.xml @@ -36,7 +36,8 @@ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <scala.binary>2.13</scala.binary> <scala.version>2.13.15</scala.version> - <protobuf.version>3.25.4</protobuf.version> - <spark.version>4.0.0-preview2</spark.version> + <protobuf.version>4.29.3</protobuf.version> + <spark.version>4.1.0-SNAPSHOT</spark.version> + <connect.guava.version>33.4.0-jre</connect.guava.version> </properties> </project>