From e44e0a99c2c5c905bbbf1a431b92fe4c8eb9c204 Mon Sep 17 00:00:00 2001
From: Tian Gao <gaogaotiantian@hotmail.com>
Date: Mon, 30 Mar 2026 15:41:25 -0700
Subject: [PATCH 1/3] Add Java error classes to Python side

---
 python/pyspark/errors/error_classes.py     | 36 +++++++++++++++++++---
 python/pyspark/errors/tests/test_errors.py | 21 +++++++++++--
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index 12094d61336e0..c641d0e95fdce 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -15,14 +15,42 @@
 # limitations under the License.
 #
 
+import glob
 import json
 import importlib.resources
+import os
+import zipfile
+from pyspark.find_spark_home import _find_spark_home
 
 # Note: Though we call them "error classes" here, the proper name is "error conditions",
 #   hence why the name of the JSON file is different.
 #   For more information, please see: https://issues.apache.org/jira/browse/SPARK-46810
 #   This discrepancy will be resolved as part of: https://issues.apache.org/jira/browse/SPARK-47429
-ERROR_CLASSES_JSON = (
-    importlib.resources.files("pyspark.errors").joinpath("error-conditions.json").read_text()
-)
-ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
+
+
+def get_error_classes():
+    python_error_classes_json = (
+        importlib.resources.files("pyspark.errors").joinpath("error-conditions.json").read_text()
+    )
+    python_error_classes_map = json.loads(python_error_classes_json)
+
+    # We load the Java error classes from the jars so Python recognizes them too
+    java_error_classes_map = {}
+    spark_home = _find_spark_home()
+
+    # Released spark packages have the jars in SPARK_HOME/jars, and development builds have them
+    # in assembly/target
+    for bin_dir in ("jars", "assembly/target/scala-*/jars"):
+        bin_path = os.path.join(spark_home, bin_dir)
+        jars = glob.glob(os.path.join(bin_path, "spark-common-utils_*.jar"))
+        if jars:
+            with zipfile.ZipFile(jars[0]) as zf:
+                with zf.open("error/error-conditions.json") as f:
+                    java_error_classes_json = f.read().decode("utf-8")
+                    java_error_classes_map = json.loads(java_error_classes_json)
+                    break
+
+    return java_error_classes_map | python_error_classes_map
+
+
+ERROR_CLASSES_MAP = get_error_classes()
diff --git a/python/pyspark/errors/tests/test_errors.py b/python/pyspark/errors/tests/test_errors.py
index 7c25c6965cbd1..8d64c0decd662 100644
--- a/python/pyspark/errors/tests/test_errors.py
+++ b/python/pyspark/errors/tests/test_errors.py
@@ -16,19 +16,23 @@
 # limitations under the License.
 #
 
+import importlib.resources
 import json
 import unittest
 
 from pyspark.errors import PySparkRuntimeError, PySparkValueError
-from pyspark.errors.error_classes import ERROR_CLASSES_JSON
 from pyspark.errors.utils import ErrorClassesReader
 
 
 class ErrorsTest(unittest.TestCase):
     def test_error_classes_sorted(self):
         # Test error classes is sorted alphabetically
-        error_reader = ErrorClassesReader()
-        error_class_names = list(error_reader.error_info_map.keys())
+        ERROR_CLASSES_JSON = (
+            importlib.resources.files("pyspark.errors")
+            .joinpath("error-conditions.json")
+            .read_text()
+        )
+        error_class_names = list(json.loads(ERROR_CLASSES_JSON).keys())
         for i in range(len(error_class_names) - 1):
             self.assertTrue(
                 error_class_names[i] < error_class_names[i + 1],
@@ -48,6 +52,12 @@ def detect_duplication(pairs):
                 error_classes_json[name] = message
             return error_classes_json
 
+        ERROR_CLASSES_JSON = (
+            importlib.resources.files("pyspark.errors")
+            .joinpath("error-conditions.json")
+            .read_text()
+        )
+
         json.loads(ERROR_CLASSES_JSON, object_pairs_hook=detect_duplication)
 
     def test_invalid_error_class(self):
@@ -108,6 +118,11 @@ def test_breaking_change_info(self):
         subclass_map = error_reader.error_info_map["TEST_ERROR_WITH_SUB_CLASS"]["sub_class"]
         self.assertEqual(breaking_change_info2, subclass_map["SUBCLASS"]["breaking_change_info"])
 
+    def test_java_error_classes(self):
+        error_reader = ErrorClassesReader()
+        msg = error_reader.get_error_message("AGGREGATE_OUT_OF_MEMORY", {})
+        self.assertEqual(msg, "No enough memory for aggregation")
+
     def test_sqlstate(self):
         error = PySparkRuntimeError(errorClass="APPLICATION_NAME_NOT_SET", messageParameters={})
         self.assertIsNone(error.getSqlState())

From 1138a5332d926fb89f4824adf0993467e978bc3b Mon Sep 17 00:00:00 2001
From: Tian Gao <gaogaotiantian@hotmail.com>
Date: Mon, 30 Mar 2026 17:29:07 -0700
Subject: [PATCH 2/3] Add type hint

---
 python/pyspark/errors/error_classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index c641d0e95fdce..30844ffecd317 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -28,7 +28,7 @@
 #   This discrepancy will be resolved as part of: https://issues.apache.org/jira/browse/SPARK-47429
 
 
-def get_error_classes():
+def get_error_classes() -> dict[str, dict]:
     python_error_classes_json = (
         importlib.resources.files("pyspark.errors").joinpath("error-conditions.json").read_text()
     )

From 3f1e67c5971c46956f73108b0d7d215cee76d8f1 Mon Sep 17 00:00:00 2001
From: Tian Gao <gaogaotiantian@hotmail.com>
Date: Mon, 30 Mar 2026 17:29:44 -0700
Subject: [PATCH 3/3] Fix error doc gen

---
 python/pyspark/errors_doc_gen.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py
index 53b8b8d1e12f1..93187e3955709 100644
--- a/python/pyspark/errors_doc_gen.py
+++ b/python/pyspark/errors_doc_gen.py
@@ -1,7 +1,7 @@
+import importlib.resources
+import json
 import re
 
-from pyspark.errors.error_classes import ERROR_CLASSES_MAP
-
 
 def generate_errors_doc(output_rst_file_path: str) -> None:
     """
@@ -47,7 +47,13 @@ def generate_errors_doc(output_rst_file_path: str) -> None:
 """
     with open(output_rst_file_path, "w") as f:
         f.write(header + "\n\n")
-        for error_key, error_details in ERROR_CLASSES_MAP.items():
+        python_error_classes_json = (
+            importlib.resources.files("pyspark.errors")
+            .joinpath("error-conditions.json")
+            .read_text()
+        )
+        python_error_classes_map = json.loads(python_error_classes_json)
+        for error_key, error_details in python_error_classes_map.items():
             f.write(error_key + "\n")
             # The length of the error class name and underline must be the same
             # to satisfy the RST format.