Skip to content

Commit 387f756

Browse files
committed
Add DWARF Variant Part Support for Polymorphic Variables in CUDA Debug Info
1 parent 2567b28 commit 387f756

File tree

3 files changed

+274
-29
lines changed

3 files changed

+274
-29
lines changed

numba_cuda/numba/cuda/debuginfo.py

Lines changed: 149 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,58 @@
1313
from numba.cuda.types import GridGroup
1414

1515

16+
# Check if CUDA Toolkit and llvmlite support polymorphic debug info
17+
def _get_llvmlite_version():
18+
"""Get llvmlite version as tuple (major, minor, patch)."""
19+
try:
20+
import llvmlite
21+
version_str = llvmlite.__version__
22+
# Parse version string like "0.46.0" or "0.46.0dev"
23+
parts = version_str.split('.')
24+
major = int(parts[0])
25+
minor = int(parts[1])
26+
return (major, minor)
27+
except Exception:
28+
return (0, 0)
29+
30+
def _check_polymorphic_debug_info_support():
31+
"""Check if CTK and llvmlite support polymorphic debug info.
32+
33+
Returns:
34+
tuple: (supported: bool, use_typed_const: bool)
35+
- supported: Whether feature is supported at all
36+
- use_typed_const: True for typed constant,
37+
False for node reference
38+
"""
39+
try:
40+
from numba.cuda.cudadrv import runtime
41+
ctk_version = runtime.get_version()
42+
llvmlite_version = _get_llvmlite_version()
43+
44+
# Support should be available with CTK newer than 13.1
45+
if ctk_version <= (13, 1):
46+
return (False, False)
47+
48+
# llvmlite > 0.45: use typed constant
49+
# llvmlite <= 0.45: use node reference
50+
use_typed_const = llvmlite_version > (0, 45)
51+
return (True, use_typed_const)
52+
53+
except Exception:
54+
return (False, False)
55+
56+
# Check support and determine mode
57+
(DEBUG_POLY_SUPPORTED,
58+
DEBUG_POLY_USE_TYPED_CONST) = _check_polymorphic_debug_info_support()
59+
60+
# Set config based on polymorphic debug info support
61+
if not hasattr(config, "CUDA_DEBUG_POLY"):
62+
config.CUDA_DEBUG_POLY = DEBUG_POLY_SUPPORTED
63+
if not hasattr(config, "CUDA_DEBUG_POLY_USE_TYPED_CONST"):
64+
config.CUDA_DEBUG_POLY_USE_TYPED_CONST = (
65+
DEBUG_POLY_USE_TYPED_CONST
66+
)
67+
1668
@contextmanager
1769
def suspend_emission(builder):
1870
"""Suspends the emission of debug_metadata for the duration of the context
@@ -619,7 +671,10 @@ def _var_type(self, lltype, size, datamodel=None):
619671
# Ignore the "tag" field, focus on the "payload" field which
620672
# contains the data types in memory
621673
if field == "payload":
622-
for mod in model.inner_models():
674+
# Store metadata dictionaries to create members later
675+
member_metadata_dicts = []
676+
677+
for index, mod in enumerate(model.inner_models()):
623678
dtype = mod.get_value_type()
624679
membersize = self.cgctx.get_abi_sizeof(dtype)
625680
basetype = self._var_type(
@@ -632,33 +687,103 @@ def _var_type(self, lltype, size, datamodel=None):
632687
# Use a prefix "_" on type names as field names
633688
membername = "_" + typename
634689
memberwidth = _BYTE_SIZE * membersize
690+
# Build the metadata dictionary
691+
metadata_dict = {
692+
"tag": ir.DIToken("DW_TAG_member"),
693+
"name": membername,
694+
"baseType": basetype,
695+
# DW_TAG_member size is in bits
696+
"size": memberwidth,
697+
}
698+
if config.CUDA_DEBUG_POLY:
699+
# Polymorphic debug info with DW_TAG_variant
700+
# extraData depends on llvmlite version
701+
if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
702+
metadata_dict["extraData"] = (
703+
ir.IntType(8)(index)
704+
)
705+
else:
706+
# Use metadata node reference
707+
metadata_dict["extraData"] = (
708+
m.add_metadata([ir.IntType(8)(index)])
709+
)
710+
# Add offset to each variant member
711+
# Offset equals the element's own width
712+
metadata_dict["offset"] = memberwidth
713+
member_metadata_dicts.append(metadata_dict)
714+
if memberwidth > maxwidth:
715+
maxwidth = memberwidth
716+
717+
# Create the member DIDerivedTypes
718+
for metadata_dict in member_metadata_dicts:
635719
derived_type = m.add_debug_info(
636-
"DIDerivedType",
637-
{
638-
"tag": ir.DIToken("DW_TAG_member"),
639-
"name": membername,
640-
"baseType": basetype,
641-
# DW_TAG_member size is in bits
642-
"size": memberwidth,
643-
},
720+
"DIDerivedType", metadata_dict
644721
)
645722
meta.append(derived_type)
646-
if memberwidth > maxwidth:
647-
maxwidth = memberwidth
648723

649-
fake_union_name = "dbg_poly_union"
650-
return m.add_debug_info(
651-
"DICompositeType",
652-
{
653-
"file": self.difile,
654-
"tag": ir.DIToken("DW_TAG_union_type"),
655-
"name": fake_union_name,
656-
"identifier": str(lltype),
657-
"elements": m.add_metadata(meta),
658-
"size": maxwidth,
659-
},
660-
is_distinct=True,
661-
)
724+
if config.CUDA_DEBUG_POLY:
725+
# Polymorphic variable debug info generation
726+
wrapper_struct_size = 2 * maxwidth
727+
discriminator = m.add_debug_info(
728+
"DIDerivedType",
729+
{
730+
"tag": ir.DIToken("DW_TAG_member"),
731+
"name": "discriminator",
732+
"baseType": m.add_debug_info("DIBasicType", {
733+
"name": "int",
734+
"size": _BYTE_SIZE,
735+
"encoding": ir.DIToken("DW_ATE_unsigned")
736+
}),
737+
"size": _BYTE_SIZE,
738+
"flags": ir.DIToken("DIFlagArtificial"),
739+
},
740+
)
741+
# Create the final variant_part with actual members
742+
variant_elements_metadata = m.add_metadata(meta)
743+
variant_unique_identifier = str(id(variant_elements_metadata))
744+
variant_part_type = m.add_debug_info(
745+
"DICompositeType",
746+
{
747+
"file": self.difile,
748+
"tag": ir.DIToken("DW_TAG_variant_part"),
749+
"name": "variant_part",
750+
"identifier": variant_unique_identifier,
751+
"elements": variant_elements_metadata,
752+
"size": maxwidth,
753+
"discriminator": discriminator,
754+
},
755+
)
756+
# Create elements metadata for wrapper struct
757+
elements_metadata = m.add_metadata(
758+
[discriminator, variant_part_type]
759+
)
760+
unique_identifier = str(id(elements_metadata))
761+
wrapper_struct = m.add_debug_info(
762+
"DICompositeType",
763+
{
764+
"file": self.difile,
765+
"tag": ir.DIToken("DW_TAG_structure_type"),
766+
"name": "variant_wrapper_struct",
767+
"identifier": unique_identifier,
768+
"elements": elements_metadata,
769+
"size": wrapper_struct_size,
770+
},
771+
)
772+
return wrapper_struct
773+
else:
774+
fake_union_name = "dbg_poly_union"
775+
return m.add_debug_info(
776+
"DICompositeType",
777+
{
778+
"file": self.difile,
779+
"tag": ir.DIToken("DW_TAG_union_type"),
780+
"name": fake_union_name,
781+
"identifier": str(lltype),
782+
"elements": m.add_metadata(meta),
783+
"size": maxwidth,
784+
},
785+
is_distinct = True,
786+
)
662787
# For other cases, use upstream Numba implementation
663788
return super()._var_type(lltype, size, datamodel=datamodel)
664789

numba_cuda/numba/cuda/lowering.py

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,56 @@ def storevar(self, value, name, argidx=None):
16891689
"""
16901690
Store the value into the given variable.
16911691
"""
1692+
# Handle polymorphic variables with CUDA_DEBUG_POLY enabled
1693+
if config.CUDA_DEBUG_POLY:
1694+
src_name = name.split(".")[0]
1695+
if src_name in self.poly_var_typ_map:
1696+
# Ensure allocation happens first (if needed)
1697+
fetype = self.typeof(name)
1698+
self._alloca_var(name, fetype)
1699+
# Discriminant and data are located in the same union
1700+
ptr = self.poly_var_loc_map[src_name]
1701+
# Firstly write discriminant to the beginning of union as i8
1702+
dtype = types.UnionType(self.poly_var_typ_map[src_name])
1703+
# Compute discriminant = index of type in sorted union
1704+
if isinstance(fetype, types.Literal):
1705+
lookup_type = fetype.literal_type
1706+
else:
1707+
lookup_type = fetype
1708+
discriminant_val = list(dtype.types).index(lookup_type)
1709+
# Bitcast union pointer directly to i8* and write
1710+
# discriminant at offset 0
1711+
discriminant_ptr = self.builder.bitcast(
1712+
ptr,
1713+
llvm_ir.PointerType(llvm_ir.IntType(8))
1714+
)
1715+
discriminant_i8 = llvm_ir.Constant(
1716+
llvm_ir.IntType(8),
1717+
discriminant_val
1718+
)
1719+
self.builder.store(discriminant_i8, discriminant_ptr)
1720+
# Secondly write data at offset = sizeof(fetype) in bytes
1721+
lltype = self.context.get_value_type(fetype)
1722+
sizeof_bytes = self.context.get_abi_sizeof(lltype)
1723+
# Bitcast to i8* and use byte-level GEP
1724+
byte_ptr = self.builder.bitcast(
1725+
ptr,
1726+
llvm_ir.PointerType(llvm_ir.IntType(8))
1727+
)
1728+
data_byte_ptr = self.builder.gep(
1729+
byte_ptr,
1730+
[llvm_ir.Constant(llvm_ir.IntType(32),
1731+
sizeof_bytes)],
1732+
)
1733+
# Cast to the correct type pointer
1734+
castptr = self.builder.bitcast(
1735+
data_byte_ptr,
1736+
llvm_ir.PointerType(lltype)
1737+
)
1738+
self.builder.store(value, castptr)
1739+
return
1740+
1741+
# For non-polymorphic variables, use parent implementation
16921742
super().storevar(value, name, argidx)
16931743

16941744
# Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
@@ -1814,8 +1864,12 @@ def _alloca_var(self, name, fetype):
18141864
datamodel = self.context.data_model_manager[dtype]
18151865
# UnionType has sorted set of types, max at last index
18161866
maxsizetype = dtype.types[-1]
1817-
# Create a single element aggregate type
1818-
aggr_type = types.UniTuple(maxsizetype, 1)
1867+
if config.CUDA_DEBUG_POLY:
1868+
# allocate double the size for (discriminant, data)
1869+
aggr_type = types.UniTuple(maxsizetype, 2)
1870+
else:
1871+
# allocate single element for data only
1872+
aggr_type = types.UniTuple(maxsizetype, 1)
18191873
lltype = self.context.get_value_type(aggr_type)
18201874
ptr = self.alloca_lltype(src_name, lltype, datamodel)
18211875
# save the location of the union type for polymorphic var
@@ -1866,9 +1920,31 @@ def getvar(self, name):
18661920
src_name = name.split(".")[0]
18671921
fetype = self.typeof(name)
18681922
lltype = self.context.get_value_type(fetype)
1869-
castptr = self.builder.bitcast(
1870-
self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
1871-
)
1923+
ptr = self.poly_var_loc_map[src_name]
1924+
1925+
if config.CUDA_DEBUG_POLY:
1926+
# With CUDA_DEBUG_POLY enabled, read value at
1927+
# offset = sizeof(fetype) in bytes
1928+
sizeof_bytes = self.context.get_abi_sizeof(lltype)
1929+
# Bitcast to i8* and use byte-level GEP
1930+
byte_ptr = self.builder.bitcast(
1931+
ptr,
1932+
llvm_ir.PointerType(llvm_ir.IntType(8))
1933+
)
1934+
value_byte_ptr = self.builder.gep(
1935+
byte_ptr,
1936+
[llvm_ir.Constant(llvm_ir.IntType(32),
1937+
sizeof_bytes)],
1938+
)
1939+
# Cast to the correct type pointer
1940+
castptr = self.builder.bitcast(
1941+
value_byte_ptr, llvm_ir.PointerType(lltype)
1942+
)
1943+
else:
1944+
# Otherwise, just bitcast to the correct type
1945+
castptr = self.builder.bitcast(
1946+
ptr, llvm_ir.PointerType(lltype)
1947+
)
18721948
return castptr
18731949
else:
18741950
return super().getvar(name)

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from numba import cuda
88
from numba.core import types
99
from numba.cuda.testing import CUDATestCase
10+
from numba.cuda.core import config
1011
from textwrap import dedent
1112
import math
1213
import itertools
@@ -403,6 +404,8 @@ def f(x):
403404
match = re.compile(pat).search(llvm_ir)
404405
self.assertIsNone(match, msg=llvm_ir)
405406

407+
@unittest.skipIf(config.CUDA_DEBUG_POLY,
408+
"Uses old union format, not variant_part")
406409
def test_union_poly_types(self):
407410
sig = (types.int32, types.int32)
408411

@@ -460,6 +463,47 @@ def a_union_use_case(arg, results):
460463
expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
461464
self.assertIn(expected, out.getvalue())
462465

466+
@unittest.skipUnless(config.CUDA_DEBUG_POLY,
467+
"CUDA_DEBUG_POLY not enabled")
468+
def test_poly_variant_part(self):
469+
"""Test polymorphic variables with DW_TAG_variant_part.
470+
471+
This test verifies that when CUDA_DEBUG_POLY is enabled,
472+
polymorphic variables generate proper DWARF5 variant_part
473+
debug information with discriminator and variant members.
474+
"""
475+
# Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
476+
if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
477+
extradata_pattern = "i8 {{[0-9]+}}"
478+
else:
479+
extradata_pattern = "{{![0-9]+}}"
480+
481+
@cuda.jit("void()", debug=True, opt=False)
482+
def f():
483+
foo = 100 # noqa: F841
484+
foo = 3.14 # noqa: F841
485+
foo = True # noqa: F841
486+
foo = np.int32(42) # noqa: F841
487+
488+
llvm_ir = f.inspect_llvm()[tuple()]
489+
490+
# Build FileCheck pattern dynamically based on config
491+
# Capture node IDs and verify the hierarchical structure
492+
check_pattern = """
493+
CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
494+
CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
495+
CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
496+
CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
497+
CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
498+
CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
499+
CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
500+
CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
501+
CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
502+
CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
503+
""".replace("EXTRADATA", extradata_pattern)
504+
505+
self.assertFileCheckMatches(llvm_ir, check_pattern)
506+
463507
def test_DW_LANG(self):
464508
@cuda.jit(debug=True, opt=False)
465509
def foo():

0 commit comments

Comments
 (0)