apache · vicennial · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -92,6 +92,7 @@ jobs:
           pyspark_pandas_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark-pandas')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
           pandas=`./dev/is-changed.py -m $pyspark_pandas_modules`
+          connect_examples=`./dev/is-changed.py -m "connect-examples"`
           if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
             yarn=`./dev/is-changed.py -m yarn`
             kubernetes=`./dev/is-changed.py -m kubernetes`
@@ -127,6 +128,7 @@ jobs:
               \"k8s-integration-tests\" : \"$kubernetes\",
               \"buf\" : \"$buf\",
               \"ui\" : \"$ui\",
+              \"connect-examples\": \"$connect_examples\"
             }"
           echo $precondition # For debugging
           # Remove `\n` to avoid "Invalid format" error
@@ -1290,3 +1292,35 @@ jobs:
           cd ui-test
           npm install --save-dev
           node --experimental-vm-modules node_modules/.bin/jest
+
+  connect-examples-build:
+    name: "Build modules: server-library-example"
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).connect-examples == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit" --allow-empty
+
+      - name: Set up Java
+        uses: actions/setup-java@v4
+        with:
+          distribution: zulu
+          java-version: ${{ inputs.java }}
+
+      - name: Build server-library-example
+        run: |
+          cd connect-examples/server-library-example
+          mvn clean package
diff --git a/connect-examples/server-library-example/README.md b/connect-examples/server-library-example/README.md
@@ -78,30 +78,26 @@ reading, writing and processing data in the custom format. The plugins (`CustomC
    mvn clean package
    ```
 
-3. **Download the `4.0.0-preview2` release to use as the Spark Connect Server**:
-   - Choose a distribution from https://archive.apache.org/dist/spark/spark-4.0.0-preview2/.
-   - Example: `curl -L https://archive.apache.org/dist/spark/spark-4.0.0-preview2/spark-4.0.0-preview2-bin-hadoop3.tgz | tar xz`
-
-4. **Copy relevant JARs to the root of the unpacked Spark distribution**:
+3. **Copy relevant JARs to the root of the unpacked Spark distribution**:
    ```bash
-    cp \
-    <SPARK_HOME>/connect-examples/server-library-example/resources/spark-daria_2.13-1.2.3.jar \
-    <SPARK_HOME>/connect-examples/server-library-example/common/target/spark-server-library-example-common-1.0.0.jar \
-    <SPARK_HOME>/connect-examples/server-library-example/server/target/spark-server-library-example-server-extension-1.0.0.jar \
-    .
+   cp \
+   connect-examples/server-library-example/resources/spark-daria_2.13-1.2.3.jar \
+   connect-examples/server-library-example/common/target/spark-server-library-example-common-1.0.0.jar \
+   connect-examples/server-library-example/server/target/spark-server-library-example-server-extension-1.0.0.jar \
+   .
    ```
-5. **Start the Spark Connect Server with the relevant JARs**:
+4. **Start the Spark Connect Server with the relevant JARs**:
    ```bash
     bin/spark-connect-shell \
    --jars spark-server-library-example-server-extension-1.0.0.jar,spark-server-library-example-common-1.0.0.jar,spark-daria_2.13-1.2.3.jar \
    --conf spark.connect.extensions.relation.classes=org.apache.connect.examples.serverlibrary.CustomRelationPlugin \
    --conf spark.connect.extensions.command.classes=org.apache.connect.examples.serverlibrary.CustomCommandPlugin
    ```
-6. **In a different terminal, navigate back to the root of the sample project and start the client**:
+5. **In a different terminal, start the client**:
    ```bash
-   java -cp client/target/spark-server-library-client-package-scala-1.0.0.jar org.apache.connect.examples.serverlibrary.CustomTableExample
+   java -cp connect-examples/server-library-example/client/target/spark-server-library-client-package-scala-1.0.0.jar org.apache.connect.examples.serverlibrary.CustomTableExample
    ```
-7. **Notice the printed output in the client terminal as well as the creation of the cloned table**:
+6. **Notice the printed output in the client terminal as well as the creation of the cloned table**:
 ```protobuf
 Explaining plan for custom table: sample_table with path: <SPARK_HOME>/spark/connect-examples/server-library-example/client/../resources/dummy_data.custom
 == Parsed Logical Plan ==

diff --git a/connect-examples/server-library-example/client/pom.xml b/connect-examples/server-library-example/client/pom.xml
@@ -37,12 +37,6 @@
       <groupId>org.apache.connect.examples.serverlibrary</groupId>
       <artifactId>spark-server-library-example-common</artifactId>
       <version>1.0.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.protobuf</groupId>
-          <artifactId>protobuf-java</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <!-- spark-connect-common contains proto definitions that we require to build custom commands/relations/expressions -->
     <dependency>
@@ -62,7 +56,11 @@
       <artifactId>scala-library</artifactId>
       <version>${scala.version}</version>
     </dependency>
-
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${connect.guava.version}</version>
+    </dependency>
   </dependencies>
 
   <build>
@@ -99,6 +97,20 @@
               <shadedArtifactAttached>false</shadedArtifactAttached>
               <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
               <createDependencyReducedPom>false</createDependencyReducedPom>
+              <filters>
+                <filter>
+                  <artifact>com.fasterxml.jackson.core:jackson-core</artifact>
+                  <excludes>
+                    <exclude>META-INF/versions/**</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <relocations>
+                <relocation>
+                  <pattern>com.google.common</pattern>
+                  <shadedPattern>org.sparkproject.guava</shadedPattern>
+                </relocation>
+              </relocations>
               <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc-->
               <transformers>
                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>

diff --git a/...example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala b/...example/client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTable.scala
@@ -19,7 +19,8 @@ package org.apache.connect.examples.serverlibrary
 
 import com.google.protobuf.Any
 import org.apache.spark.connect.proto.Command
-import org.apache.spark.sql.{functions, Column, DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.{functions, Column, Row}
+import org.apache.spark.sql.connect.{Dataset, SparkSession}
 
 import org.apache.connect.examples.serverlibrary.proto
 import org.apache.connect.examples.serverlibrary.proto.CreateTable.Column.{DataType => ProtoDataType}

diff --git a/.../client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala b/.../client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableBuilder.scala
@@ -19,7 +19,7 @@ package org.apache.connect.examples.serverlibrary
 
 import com.google.protobuf.Any
 import org.apache.spark.connect.proto.Command
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.SparkSession
 
 import org.apache.connect.examples.serverlibrary.CustomTable
 

diff --git a/.../client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala b/.../client/src/main/scala/org/apache/connect/examples/serverlibrary/CustomTableExample.scala
@@ -21,7 +21,7 @@ import java.nio.file.{Path, Paths}
 
 import com.google.protobuf.Any
 import org.apache.spark.connect.proto.Command
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.SparkSession
 import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}
 
 import org.apache.connect.examples.serverlibrary.proto

diff --git a/connect-examples/server-library-example/pom.xml b/connect-examples/server-library-example/pom.xml
@@ -36,7 +36,8 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <scala.binary>2.13</scala.binary>
     <scala.version>2.13.15</scala.version>
-    <protobuf.version>3.25.4</protobuf.version>
-    <spark.version>4.0.0-preview2</spark.version>
+    <protobuf.version>4.29.3</protobuf.version>
+    <spark.version>4.1.0-SNAPSHOT</spark.version>
+    <connect.guava.version>33.4.0-jre</connect.guava.version>
   </properties>
 </project>