2020import threading
2121import asyncio
2222import pathlib
23+ import subprocess
24+ import tempfile
25+ import re
2326from itertools import product
2427from abc import ABCMeta , abstractmethod
2528from ctypes import (c_int , byref , c_size_t , c_char , c_char_p , addressof ,
3538from .drvapi import API_PROTOTYPES
3639from .drvapi import cu_occupancy_b2d_size , cu_stream_callback_pyobj , cu_uuid
3740from .mappings import FILE_EXTENSION_MAP
38- from .linkable_code import LinkableCode
41+ from .linkable_code import LinkableCode , LTOIR , Fatbin , Object
3942from numba .cuda .utils import _readenv
4043from numba .cuda .cudadrv import enums , drvapi , nvrtc
4144
@@ -2664,12 +2667,18 @@ def add_cu_file(self, path):
26642667 cu = f .read ()
26652668 self .add_cu (cu , os .path .basename (path ))
26662669
2667- def add_file_guess_ext (self , path_or_code ):
2670+ def add_file_guess_ext (self , path_or_code , ignore_nonlto = False ):
26682671 """
26692672 Add a file or LinkableCode object to the link. If a file is
26702673 passed, the type will be inferred from the extension. A LinkableCode
26712674 object represents a file already in memory.
2675+
2676+ When `ignore_nonlto` is set to true, do not add code that will not
2677+ be LTO-ed in the linking process. This is useful in inspecting the
2678+ LTO-ed portion of the PTX when linker is added with objects that can be
2679+ both LTO-ed and not LTO-ed.
26722680 """
2681+
26732682 if isinstance (path_or_code , str ):
26742683 ext = pathlib .Path (path_or_code ).suffix
26752684 if ext == '' :
@@ -2685,6 +2694,26 @@ def add_file_guess_ext(self, path_or_code):
26852694 "Don't know how to link file with extension "
26862695 f"{ ext } "
26872696 )
2697+
2698+ if ignore_nonlto :
2699+ warn_and_return = False
2700+ if kind in (
2701+ FILE_EXTENSION_MAP ["fatbin" ], FILE_EXTENSION_MAP ["o" ]
2702+ ):
2703+ entry_types = inspect_obj_content (path_or_code )
2704+ if "nvvm" not in entry_types :
2705+ warn_and_return = True
2706+ elif kind != FILE_EXTENSION_MAP ["ltoir" ]:
2707+ warn_and_return = True
2708+
2709+ if warn_and_return :
2710+ warnings .warn (
2711+ f"Not adding { path_or_code } as it is not "
2712+ "optimizable at link time, and `ignore_nonlto == "
2713+ "True`."
2714+ )
2715+ return
2716+
26882717 self .add_file (path_or_code , kind )
26892718 return
26902719 else :
@@ -2697,6 +2726,25 @@ def add_file_guess_ext(self, path_or_code):
26972726 if path_or_code .kind == "cu" :
26982727 self .add_cu (path_or_code .data , path_or_code .name )
26992728 else :
2729+ if ignore_nonlto :
2730+ warn_and_return = False
2731+ if isinstance (path_or_code , (Fatbin , Object )):
2732+ with tempfile .NamedTemporaryFile ("w" ) as fp :
2733+ fp .write (path_or_code .data )
2734+ entry_types = inspect_obj_content (fp .name )
2735+ if "nvvm" not in entry_types :
2736+ warn_and_return = True
2737+ elif not isinstance (path_or_code , LTOIR ):
2738+ warn_and_return = True
2739+
2740+ if warn_and_return :
2741+ warnings .warn (
2742+ f"Not adding { path_or_code .name } as it is not "
2743+ "optimizable at link time, and `ignore_nonlto == "
2744+ "True`."
2745+ )
2746+ return
2747+
27002748 self .add_data (
27012749 path_or_code .data , path_or_code .kind , path_or_code .name
27022750 )
@@ -3046,6 +3094,28 @@ def add_file(self, path, kind):
30463094 name = pathlib .Path (path ).name
30473095 self .add_data (data , kind , name )
30483096
3097+ def add_cu (self , cu , name ):
3098+ """Add CUDA source in a string to the link. The name of the source
3099+ file should be specified in `name`."""
3100+ with driver .get_active_context () as ac :
3101+ dev = driver .get_device (ac .devnum )
3102+ cc = dev .compute_capability
3103+
3104+ program , log = nvrtc .compile (cu , name , cc , ltoir = self .lto )
3105+
3106+ if not self .lto and config .DUMP_ASSEMBLY :
3107+ print (("ASSEMBLY %s" % name ).center (80 , "-" ))
3108+ print (program )
3109+ print ("=" * 80 )
3110+
3111+ suffix = ".ltoir" if self .lto else ".ptx"
3112+ program_name = os .path .splitext (name )[0 ] + suffix
3113+ # Link the program's PTX or LTOIR using the normal linker mechanism
3114+ if self .lto :
3115+ self .add_ltoir (program , program_name )
3116+ else :
3117+ self .add_ptx (program .encode (), program_name )
3118+
30493119 def add_data (self , data , kind , name ):
30503120 if kind == FILE_EXTENSION_MAP ["cubin" ]:
30513121 fn = self ._linker .add_cubin
@@ -3067,6 +3137,12 @@ def add_data(self, data, kind, name):
30673137 except NvJitLinkError as e :
30683138 raise LinkerError from e
30693139
3140+ def get_linked_ptx (self ):
3141+ try :
3142+ return self ._linker .get_linked_ptx ()
3143+ except NvJitLinkError as e :
3144+ raise LinkerError from e
3145+
30703146 def complete (self ):
30713147 try :
30723148 return self ._linker .get_linked_cubin ()
@@ -3342,3 +3418,28 @@ def get_version():
33423418 Return the driver version as a tuple of (major, minor)
33433419 """
33443420 return driver .get_version ()
3421+
3422+
3423+ def inspect_obj_content (objpath : str ):
3424+ """
3425+ Given path to a fatbin or object, use `cuobjdump` to examine its content
3426+ Return the set of entries in the object.
3427+ """
3428+ code_types :set [str ] = set ()
3429+
3430+ try :
3431+ out = subprocess .run (["cuobjdump" , objpath ], check = True ,
3432+ capture_output = True )
3433+ except FileNotFoundError as e :
3434+ msg = ("cuobjdump has not been found. You may need "
3435+ "to install the CUDA toolkit and ensure that "
3436+ "it is available on your PATH.\n " )
3437+ raise RuntimeError (msg ) from e
3438+
3439+ objtable = out .stdout .decode ('utf-8' )
3440+ entry_pattern = r"Fatbin (.*) code"
3441+ for line in objtable .split ("\n " ):
3442+ if match := re .match (entry_pattern , line ):
3443+ code_types .add (match .group (1 ))
3444+
3445+ return code_types
0 commit comments