Add DWARF Variant Part Support for Polymorphic Variables in CUDA Debug Info

jiel-nv · jiel-nv · commit 387f756ac6aa · 2025-10-23T19:21:35.000Z
diff --git a/numba_cuda/numba/cuda/debuginfo.py b/numba_cuda/numba/cuda/debuginfo.py
@@ -13,6 +13,58 @@
 from numba.cuda.types import GridGroup
 
 
+# Check if CUDA Toolkit and llvmlite support polymorphic debug info
+def _get_llvmlite_version():
+    """Get llvmlite version as tuple (major, minor, patch)."""
+    try:
+        import llvmlite
+        version_str = llvmlite.__version__
+        # Parse version string like "0.46.0" or "0.46.0dev"
+        parts = version_str.split('.')
+        major = int(parts[0])
+        minor = int(parts[1])
+        return (major, minor)
+    except Exception:
+        return (0, 0)
+
+def _check_polymorphic_debug_info_support():
+    """Check if CTK and llvmlite support polymorphic debug info.
+
+    Returns:
+        tuple: (supported: bool, use_typed_const: bool)
+        - supported: Whether feature is supported at all
+        - use_typed_const: True for typed constant,
+                           False for node reference
+    """
+    try:
+        from numba.cuda.cudadrv import runtime
+        ctk_version = runtime.get_version()
+        llvmlite_version = _get_llvmlite_version()
+
+        # Support should be available with CTK newer than 13.1
+        if ctk_version <= (13, 1):
+            return (False, False)
+
+        # llvmlite > 0.45: use typed constant
+        # llvmlite <= 0.45: use node reference
+        use_typed_const = llvmlite_version > (0, 45)
+        return (True, use_typed_const)
+
+    except Exception:
+        return (False, False)
+
+# Check support and determine mode
+(DEBUG_POLY_SUPPORTED,
+ DEBUG_POLY_USE_TYPED_CONST) = _check_polymorphic_debug_info_support()
+
+# Set config based on polymorphic debug info support
+if not hasattr(config, "CUDA_DEBUG_POLY"):
+    config.CUDA_DEBUG_POLY = DEBUG_POLY_SUPPORTED
+if not hasattr(config, "CUDA_DEBUG_POLY_USE_TYPED_CONST"):
+    config.CUDA_DEBUG_POLY_USE_TYPED_CONST = (
+        DEBUG_POLY_USE_TYPED_CONST
+    )
+
 @contextmanager
 def suspend_emission(builder):
     """Suspends the emission of debug_metadata for the duration of the context
@@ -619,7 +671,10 @@ def _var_type(self, lltype, size, datamodel=None):
                 # Ignore the "tag" field, focus on the "payload" field which
                 # contains the data types in memory
                 if field == "payload":
-                    for mod in model.inner_models():
+                    # Store metadata dictionaries to create members later
+                    member_metadata_dicts = []
+
+                    for index, mod in enumerate(model.inner_models()):
                         dtype = mod.get_value_type()
                         membersize = self.cgctx.get_abi_sizeof(dtype)
                         basetype = self._var_type(
@@ -632,33 +687,103 @@ def _var_type(self, lltype, size, datamodel=None):
                         # Use a prefix "_" on type names as field names
                         membername = "_" + typename
                         memberwidth = _BYTE_SIZE * membersize
+                        # Build the metadata dictionary
+                        metadata_dict = {
+                            "tag": ir.DIToken("DW_TAG_member"),
+                            "name": membername,
+                            "baseType": basetype,
+                            # DW_TAG_member size is in bits
+                            "size": memberwidth,
+                        }
+                        if config.CUDA_DEBUG_POLY:
+                            # Polymorphic debug info with DW_TAG_variant
+                            # extraData depends on llvmlite version
+                            if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
+                                metadata_dict["extraData"] = (
+                                    ir.IntType(8)(index)
+                                )
+                            else:
+                                # Use metadata node reference
+                                metadata_dict["extraData"] = (
+                                    m.add_metadata([ir.IntType(8)(index)])
+                                )
+                            # Add offset to each variant member
+                            # Offset equals the element's own width
+                            metadata_dict["offset"] = memberwidth
+                        member_metadata_dicts.append(metadata_dict)
+                        if memberwidth > maxwidth:
+                            maxwidth = memberwidth
+
+                    # Create the member DIDerivedTypes
+                    for metadata_dict in member_metadata_dicts:
                         derived_type = m.add_debug_info(
-                            "DIDerivedType",
-                            {
-                                "tag": ir.DIToken("DW_TAG_member"),
-                                "name": membername,
-                                "baseType": basetype,
-                                # DW_TAG_member size is in bits
-                                "size": memberwidth,
-                            },
+                            "DIDerivedType", metadata_dict
                         )
                         meta.append(derived_type)
-                        if memberwidth > maxwidth:
-                            maxwidth = memberwidth
 
-            fake_union_name = "dbg_poly_union"
-            return m.add_debug_info(
-                "DICompositeType",
-                {
-                    "file": self.difile,
-                    "tag": ir.DIToken("DW_TAG_union_type"),
-                    "name": fake_union_name,
-                    "identifier": str(lltype),
-                    "elements": m.add_metadata(meta),
-                    "size": maxwidth,
-                },
-                is_distinct=True,
-            )
+            if config.CUDA_DEBUG_POLY:
+                # Polymorphic variable debug info generation
+                wrapper_struct_size = 2 * maxwidth
+                discriminator = m.add_debug_info(
+                    "DIDerivedType",
+                    {
+                        "tag": ir.DIToken("DW_TAG_member"),
+                        "name": "discriminator",
+                        "baseType": m.add_debug_info("DIBasicType", {
+                            "name": "int",
+                            "size": _BYTE_SIZE,
+                            "encoding": ir.DIToken("DW_ATE_unsigned")
+                        }),
+                        "size": _BYTE_SIZE,
+                        "flags": ir.DIToken("DIFlagArtificial"),
+                    },
+                )
+                # Create the final variant_part with actual members
+                variant_elements_metadata = m.add_metadata(meta)
+                variant_unique_identifier = str(id(variant_elements_metadata))
+                variant_part_type = m.add_debug_info(
+                    "DICompositeType",
+                    {
+                        "file": self.difile,
+                        "tag": ir.DIToken("DW_TAG_variant_part"),
+                        "name": "variant_part",
+                        "identifier": variant_unique_identifier,
+                        "elements": variant_elements_metadata,
+                        "size": maxwidth,
+                        "discriminator": discriminator,
+                    },
+                )
+                # Create elements metadata for wrapper struct
+                elements_metadata = m.add_metadata(
+                    [discriminator, variant_part_type]
+                )
+                unique_identifier = str(id(elements_metadata))
+                wrapper_struct = m.add_debug_info(
+                    "DICompositeType",
+                    {
+                        "file": self.difile,
+                        "tag": ir.DIToken("DW_TAG_structure_type"),
+                        "name": "variant_wrapper_struct",
+                        "identifier": unique_identifier,
+                        "elements": elements_metadata,
+                        "size": wrapper_struct_size,
+                    },
+                )
+                return wrapper_struct
+            else:
+                fake_union_name = "dbg_poly_union"
+                return m.add_debug_info(
+                    "DICompositeType",
+                    {
+                        "file": self.difile,
+                        "tag": ir.DIToken("DW_TAG_union_type"),
+                        "name": fake_union_name,
+                        "identifier": str(lltype),
+                        "elements": m.add_metadata(meta),
+                        "size": maxwidth,
+                    },
+                    is_distinct = True,
+                )
         # For other cases, use upstream Numba implementation
         return super()._var_type(lltype, size, datamodel=datamodel)
 
diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py
@@ -1689,6 +1689,56 @@ def storevar(self, value, name, argidx=None):
         """
         Store the value into the given variable.
         """
+        # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
+        if config.CUDA_DEBUG_POLY:
+            src_name = name.split(".")[0]
+            if src_name in self.poly_var_typ_map:
+                # Ensure allocation happens first (if needed)
+                fetype = self.typeof(name)
+                self._alloca_var(name, fetype)
+                # Discriminant and data are located in the same union
+                ptr = self.poly_var_loc_map[src_name]
+                # Firstly write discriminant to the beginning of union as i8
+                dtype = types.UnionType(self.poly_var_typ_map[src_name])
+                # Compute discriminant = index of type in sorted union
+                if isinstance(fetype, types.Literal):
+                    lookup_type = fetype.literal_type
+                else:
+                    lookup_type = fetype
+                discriminant_val = list(dtype.types).index(lookup_type)
+                # Bitcast union pointer directly to i8* and write
+                # discriminant at offset 0
+                discriminant_ptr = self.builder.bitcast(
+                    ptr,
+                    llvm_ir.PointerType(llvm_ir.IntType(8))
+                )
+                discriminant_i8 = llvm_ir.Constant(
+                    llvm_ir.IntType(8),
+                    discriminant_val
+                )
+                self.builder.store(discriminant_i8, discriminant_ptr)
+                # Secondly write data at offset = sizeof(fetype) in bytes
+                lltype = self.context.get_value_type(fetype)
+                sizeof_bytes = self.context.get_abi_sizeof(lltype)
+                # Bitcast to i8* and use byte-level GEP
+                byte_ptr = self.builder.bitcast(
+                    ptr,
+                    llvm_ir.PointerType(llvm_ir.IntType(8))
+                )
+                data_byte_ptr = self.builder.gep(
+                    byte_ptr,
+                    [llvm_ir.Constant(llvm_ir.IntType(32),
+                                      sizeof_bytes)],
+                )
+                # Cast to the correct type pointer
+                castptr = self.builder.bitcast(
+                    data_byte_ptr,
+                    llvm_ir.PointerType(lltype)
+                )
+                self.builder.store(value, castptr)
+                return
+
+        # For non-polymorphic variables, use parent implementation
         super().storevar(value, name, argidx)
 
         # Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
@@ -1814,8 +1864,12 @@ def _alloca_var(self, name, fetype):
                     datamodel = self.context.data_model_manager[dtype]
                     # UnionType has sorted set of types, max at last index
                     maxsizetype = dtype.types[-1]
-                    # Create a single element aggregate type
-                    aggr_type = types.UniTuple(maxsizetype, 1)
+                    if config.CUDA_DEBUG_POLY:
+                        # allocate double the size for (discriminant, data)
+                        aggr_type = types.UniTuple(maxsizetype, 2)
+                    else:
+                        # allocate single element for data only
+                        aggr_type = types.UniTuple(maxsizetype, 1)
                     lltype = self.context.get_value_type(aggr_type)
                     ptr = self.alloca_lltype(src_name, lltype, datamodel)
                     # save the location of the union type for polymorphic var
@@ -1866,9 +1920,31 @@ def getvar(self, name):
             src_name = name.split(".")[0]
             fetype = self.typeof(name)
             lltype = self.context.get_value_type(fetype)
-            castptr = self.builder.bitcast(
-                self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
-            )
+            ptr = self.poly_var_loc_map[src_name]
+
+            if config.CUDA_DEBUG_POLY:
+                # With CUDA_DEBUG_POLY enabled, read value at
+                # offset = sizeof(fetype) in bytes
+                sizeof_bytes = self.context.get_abi_sizeof(lltype)
+                # Bitcast to i8* and use byte-level GEP
+                byte_ptr = self.builder.bitcast(
+                    ptr,
+                    llvm_ir.PointerType(llvm_ir.IntType(8))
+                )
+                value_byte_ptr = self.builder.gep(
+                    byte_ptr,
+                    [llvm_ir.Constant(llvm_ir.IntType(32),
+                                      sizeof_bytes)],
+                )
+                # Cast to the correct type pointer
+                castptr = self.builder.bitcast(
+                    value_byte_ptr, llvm_ir.PointerType(lltype)
+                )
+            else:
+                # Otherwise, just bitcast to the correct type
+                castptr = self.builder.bitcast(
+                    ptr, llvm_ir.PointerType(lltype)
+                )
             return castptr
         else:
             return super().getvar(name)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -7,6 +7,7 @@
 from numba import cuda
 from numba.core import types
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.core import config
 from textwrap import dedent
 import math
 import itertools
@@ -403,6 +404,8 @@ def f(x):
         match = re.compile(pat).search(llvm_ir)
         self.assertIsNone(match, msg=llvm_ir)
 
+    @unittest.skipIf(config.CUDA_DEBUG_POLY,
+                     "Uses old union format, not variant_part")
     def test_union_poly_types(self):
         sig = (types.int32, types.int32)
 
@@ -460,6 +463,47 @@ def a_union_use_case(arg, results):
         expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
         self.assertIn(expected, out.getvalue())
 
+    @unittest.skipUnless(config.CUDA_DEBUG_POLY,
+                         "CUDA_DEBUG_POLY not enabled")
+    def test_poly_variant_part(self):
+        """Test polymorphic variables with DW_TAG_variant_part.
+
+        This test verifies that when CUDA_DEBUG_POLY is enabled,
+        polymorphic variables generate proper DWARF5 variant_part
+        debug information with discriminator and variant members.
+        """
+        # Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
+        if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
+            extradata_pattern = "i8 {{[0-9]+}}"
+        else:
+            extradata_pattern = "{{![0-9]+}}"
+
+        @cuda.jit("void()", debug=True, opt=False)
+        def f():
+            foo = 100  # noqa: F841
+            foo = 3.14  # noqa: F841
+            foo = True  # noqa: F841
+            foo = np.int32(42)  # noqa: F841
+
+        llvm_ir = f.inspect_llvm()[tuple()]
+
+        # Build FileCheck pattern dynamically based on config
+        # Capture node IDs and verify the hierarchical structure
+        check_pattern = """
+            CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
+            CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
+            CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
+            CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
+            CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
+            CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
+            CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
+            CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
+            CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
+            CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
+        """.replace("EXTRADATA", extradata_pattern)
+
+        self.assertFileCheckMatches(llvm_ir, check_pattern)
+
     def test_DW_LANG(self):
         @cuda.jit(debug=True, opt=False)
         def foo():