diff --git a/CHANGELOG.md b/CHANGELOG.md
index de3eac1..f2f776f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,19 @@ package _DOES NOT_ follow the PVP, or indeed any sensible version scheme,
 because NVIDIA are A-OK introducing breaking changes in minor updates.
 
 
+## [0.12.8.0] - ???
+### Added
+  * Support for CUDA-12
+      - Thanks to @noahmartinwilliams on GitHub for helping out!
+
+### Removed
+  * The following modules have been deprecated for a long time, and have
+    finally been removed in CUDA-12:
+      - `Foreign.CUDA.Driver.Texture`
+      - `Foreign.CUDA.Runtime.Texture`
+    Support for Texture Objects (their replacement) is missing in these
+    bindings so far. Contributions welcome.
+
 ## [0.11.0.1] - 2023-08-15
 ### Fixed
   * Build fixes for GHC 9.2 .. 9.6
diff --git a/README.md b/README.md
index e14eff8..8af77b6 100644
--- a/README.md
+++ b/README.md
@@ -145,3 +145,6 @@ An incomplete list of missing bindings. Pull requests welcome!
 - cuGraphMemAllocNodeGetParams
 - cuGraphMemFreeNodeGetParams
 
+### CUDA-12
+
+A lot. PRs welcome.
diff --git a/cbits/stubs.c b/cbits/stubs.c
index 3b363ee..67f9280 100644
--- a/cbits/stubs.c
+++ b/cbits/stubs.c
@@ -22,17 +22,6 @@ cudaError_t cudaConfigureCall_simple(unsigned int gridX, unsigned int gridY, uns
 }
 #endif
 
-CUresult cuTexRefSetAddress2D_simple(CUtexref tex, CUarray_format format, unsigned int numChannels, CUdeviceptr dptr, size_t width, size_t height, size_t pitch)
-{
-    CUDA_ARRAY_DESCRIPTOR desc;
-    desc.Format      = format;
-    desc.NumChannels = numChannels;
-    desc.Width       = width;
-    desc.Height      = height;
-
-    return cuTexRefSetAddress2D(tex, &desc, dptr, pitch);
-}
-
 CUresult cuMemcpy2DHtoD(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int dstXInBytes, unsigned int dstY, void* srcHost, unsigned int srcPitch, unsigned int srcXInBytes, unsigned int srcY, unsigned int widthInBytes, unsigned int height)
 {
     CUDA_MEMCPY2D desc;
@@ -284,11 +273,6 @@ CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N)
 {
     return cuMemsetD32_v2(dstDevice, ui, N);
 }
-
-CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes)
-{
-    return cuTexRefSetAddress_v2(ByteOffset, hTexRef, dptr, bytes);
-}
 #endif
 
 #if CUDA_VERSION >= 4000
diff --git a/cuda.cabal b/cuda.cabal
index e401bda..d9d8ebd 100644
--- a/cuda.cabal
+++ b/cuda.cabal
@@ -1,7 +1,7 @@
 cabal-version:          1.24
 
 Name:                   cuda
-Version:                0.11.0.1
+Version:                0.12.8.0
 Synopsis:               FFI binding to the CUDA interface for programming NVIDIA GPUs
 Description:
     The CUDA library provides a direct, general purpose C-like SPMD programming
@@ -121,7 +121,6 @@ Library
       Foreign.CUDA.Driver.Module.Query
       Foreign.CUDA.Driver.Profiler
       Foreign.CUDA.Driver.Stream
-      Foreign.CUDA.Driver.Texture
       Foreign.CUDA.Driver.Unified
       Foreign.CUDA.Driver.Utils
 
@@ -133,7 +132,6 @@ Library
       Foreign.CUDA.Runtime.Exec
       Foreign.CUDA.Runtime.Marshal
       Foreign.CUDA.Runtime.Stream
-      Foreign.CUDA.Runtime.Texture
       Foreign.CUDA.Runtime.Utils
 
       -- Extras
@@ -151,6 +149,7 @@ Library
   build-depends:
       base              >= 4.7 && < 5
     , bytestring        >= 0.10.4
+    , containers
     , filepath          >= 1.0
     , template-haskell
     , uuid-types        >= 1.0
diff --git a/src/Foreign/CUDA/Analysis/Device.chs b/src/Foreign/CUDA/Analysis/Device.chs
index bfeeb4a..4a44e8b 100644
--- a/src/Foreign/CUDA/Analysis/Device.chs
+++ b/src/Foreign/CUDA/Analysis/Device.chs
@@ -19,8 +19,12 @@ module Foreign.CUDA.Analysis.Device (
 
 #include "cbits/stubs.h"
 
+import qualified Data.Set as Set
+import Data.Set (Set)
 import Data.Int
+import Data.IORef
 import Text.Show.Describe
+import System.IO.Unsafe
 
 import Debug.Trace
 
@@ -179,7 +183,17 @@ data DeviceResources = DeviceResources
 deviceResources :: DeviceProperties -> DeviceResources
 deviceResources = resources . computeCapability
   where
-    -- This is mostly extracted from tables in the CUDA occupancy calculator.
+    -- Sources:
+    -- [1] https://github.com/NVIDIA/cuda-samples/blob/7b60178984e96bc09d066077d5455df71fee2a9f/Common/helper_cuda.h
+    --    - for: coresPerMP (line 643 _ConvertSMVer2Cores)
+    --    - for: architecture names (line 695 _ConvertSMVer2ArchName)
+    -- [2] https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications-technical-specifications-per-compute-capability
+    --    - for: maxGridsPerDevice
+    --    - archived here: https://web.archive.org/web/20250409220108/https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications-technical-specifications-per-compute-capability
+    --    - reproduced here: https://en.wikipedia.org/w/index.php?title=CUDA&oldid=1285775690#Technical_specification (note: link to specific page version)
+    -- [3] NVidia Nsight Compute
+    --    - for: the other fields
+    --    - left top "Start Activity" -> "Occupancy Calculator" -> "Launch"; tab "GPU Data"
     --
     resources compute = case compute of
       Compute 1 0 -> resources (Compute 1 1)      -- Tesla G80
@@ -283,7 +297,7 @@ deviceResources = resources . computeCapability
         }
       Compute 5 2 -> (resources (Compute 5 0))    -- Maxwell GM20x
         { sharedMemPerMP        = 98304
-        , maxRegPerBlock        = 32768
+        , maxRegPerBlock        = 32768  -- value from [3], wrong in [2]?
         , warpAllocUnit         = 2
         }
       Compute 5 3 -> (resources (Compute 5 0))    -- Maxwell GM20B
@@ -318,9 +332,15 @@ deviceResources = resources . computeCapability
         }
       Compute 6 2 -> (resources (Compute 6 0))    -- Pascal GP10B
         { coresPerMP            = 128
-        , warpsPerMP            = 128
-        , threadBlocksPerMP     = 4096
-        , maxRegPerBlock        = 32768
+        -- Commit 4f75ea889c2ade2bd3eab377b51bb5bbd28bfbae changed warpsPerMP
+        -- to 128, but [2] and [3] say 64 like CC 6.0; reverted back to 64 to
+        -- match NVIDIA documentation.
+        -- That commit also changed threadsPerMP (later mistakenly translated
+        -- to threadBlocksPerMP in 9df19adec8efc9df761deab40cf04d27810d97d3)
+        -- from 2048 to 4096, but again [2] and [3] retain 2048 so we keep it
+        -- at that.
+        , warpsPerMP            = 64
+        , maxRegPerBlock        = 32768  -- value from [2], wrong in [3]?
         , warpAllocUnit         = 4
         , maxGridsPerDevice     = 16
         }
@@ -346,7 +366,7 @@ deviceResources = resources . computeCapability
 
       Compute 7 2 -> (resources (Compute 7 0))    -- Volta GV10B
         { maxGridsPerDevice     = 16
-        , maxSharedMemPerBlock  = 49152
+        , maxSharedMemPerBlock  = 49152  -- unsure why this is here; [2] and [3] say still 98304
         }
 
       Compute 7 5 -> (resources (Compute 7 0))    -- Turing TU1xx
@@ -376,14 +396,91 @@ deviceResources = resources . computeCapability
         , warpRegAllocUnit      = 256
         , maxGridsPerDevice     = 128
         }
-
       Compute 8 6 -> (resources (Compute 8 0))    -- Ampere GA102
-        { warpsPerMP            = 48
+        { coresPerMP            = 128
+        , warpsPerMP            = 48
         , threadsPerMP          = 1536
         , threadBlocksPerMP     = 16
         , sharedMemPerMP        = 102400
         , maxSharedMemPerBlock  = 102400
         }
+      Compute 8 7 -> (resources (Compute 8 0))    -- Ampere
+        { coresPerMP            = 128
+        , warpsPerMP            = 48
+        , threadsPerMP          = 1536
+        , threadBlocksPerMP     = 16
+        }
+      Compute 8 9 -> (resources (Compute 8 0))    -- Ada
+        { coresPerMP            = 128
+        , warpsPerMP            = 48
+        , threadsPerMP          = 1536
+        , threadBlocksPerMP     = 24
+        , sharedMemPerMP        = 102400
+        , maxSharedMemPerBlock  = 102400
+        }
+
+      Compute 9 0 -> DeviceResources              -- Hopper
+        { threadsPerWarp        = 32
+        , coresPerMP            = 128
+        , warpsPerMP            = 64
+        , threadsPerMP          = 2048
+        , threadBlocksPerMP     = 32
+        , sharedMemPerMP        = 233472
+        , maxSharedMemPerBlock  = 233472
+        , regFileSizePerMP      = 65536
+        , maxRegPerBlock        = 65536
+        , regAllocUnit          = 256
+        , regAllocationStyle    = Warp
+        , maxRegPerThread       = 255
+        , sharedMemAllocUnit    = 128
+        , warpAllocUnit         = 4
+        , warpRegAllocUnit      = 256
+        , maxGridsPerDevice     = 128
+        }
+
+      Compute 10 0 -> DeviceResources             -- Blackwell
+        { threadsPerWarp        = 32
+        , coresPerMP            = 128
+        , warpsPerMP            = 64
+        , threadsPerMP          = 2048
+        , threadBlocksPerMP     = 32
+        , sharedMemPerMP        = 233472
+        , maxSharedMemPerBlock  = 233472
+        , regFileSizePerMP      = 65536
+        , maxRegPerBlock        = 65536
+        , regAllocUnit          = 256
+        , regAllocationStyle    = Warp
+        , maxRegPerThread       = 255
+        , sharedMemAllocUnit    = 128
+        , warpAllocUnit         = 4
+        , warpRegAllocUnit      = 256
+        , maxGridsPerDevice     = 128
+        }
+      Compute 10 1 -> (resources (Compute 10 0))  -- Blackwell
+        { warpsPerMP            = 48
+        , threadsPerMP          = 1536
+        , threadBlocksPerMP     = 24
+        }
+
+      Compute 12 0 -> DeviceResources             -- Blackwell
+        { threadsPerWarp        = 32
+        , coresPerMP            = 128
+        , warpsPerMP            = 48
+        , threadsPerMP          = 1536
+        , threadBlocksPerMP     = 24
+        , sharedMemPerMP        = 102400
+        , maxSharedMemPerBlock  = 102400
+        , regFileSizePerMP      = 65536
+        , maxRegPerBlock        = 65536
+        , regAllocUnit          = 256
+        , regAllocationStyle    = Warp
+        , maxRegPerThread       = 255
+        , sharedMemAllocUnit    = 128
+        , warpAllocUnit         = 4
+        , warpRegAllocUnit      = 256
+        , maxGridsPerDevice     = 128
+        }
+
 
       -- Something might have gone wrong, or the library just needs to be
       -- updated for the next generation of hardware, in which case we just want
@@ -393,7 +490,30 @@ deviceResources = resources . computeCapability
       -- However, it should be OK because all library functions run in IO, so it
       -- is likely the user code is as well.
       --
-      _           -> trace warning $ resources (Compute 6 0)
-        where warning = unlines [ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
-                                , "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]
-
+      _ -> case warningForCC compute of
+             Just warning -> trace warning defaultResources
+             Nothing      -> defaultResources
+
+    defaultResources = resources (Compute 6 0)
+
+    -- All this logic is to ensure the warning is only shown once per unknown
+    -- compute capability. This sounds not worth it, but in practice, it is:
+    -- empirically, an unknown compute capability often leads to /screenfuls/
+    -- of warnings in accelerate-llvm-ptx otherwise.
+    {-# NOINLINE warningForCC #-}
+    warningForCC :: Compute -> Maybe String
+    warningForCC compute = unsafePerformIO $ do
+      unseen <- atomicModifyIORef' warningShown $ \seen ->
+                  -- This is just one tree traversal; lookup-insert would be two traversals.
+                  let seen' = Set.insert compute seen
+                  in (seen', Set.size seen' > Set.size seen)
+      return $ if unseen
+        then Just $ unlines
+               [ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
+               , "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues"
+               , "*** (This warning will only be shown once for this compute capability)" ]
+        else Nothing
+
+    {-# NOINLINE warningShown #-}
+    warningShown :: IORef (Set Compute)
+    warningShown = unsafePerformIO $ newIORef mempty
diff --git a/src/Foreign/CUDA/Analysis/Occupancy.hs b/src/Foreign/CUDA/Analysis/Occupancy.hs
index b7a0ea9..6a6cb8f 100644
--- a/src/Foreign/CUDA/Analysis/Occupancy.hs
+++ b/src/Foreign/CUDA/Analysis/Occupancy.hs
@@ -28,6 +28,14 @@
 -- the number in the @.cubin@ file to the amount you dynamically allocate at run
 -- time to get the correct shared memory usage.
 --
+-- __Warning__: Like the official Occupancy Calculator in NVidia Nsight
+-- Compute, the calculator in this module does not support or consider Thread
+-- Block Clusters
+-- (<https://docs.nvidia.com/cuda/cuda-c-programming-guide/#thread-block-clusters>)
+-- that have been introduced with compute capability 9.0 (Hopper). If you use
+-- thread block clusters in your kernels, the results you get with the
+-- functions in this module may not be accurate. Profile and measure.
+--
 -- /Notes About Occupancy/
 --
 -- Higher occupancy does not necessarily mean higher performance.  If a kernel
diff --git a/src/Foreign/CUDA/Driver/Graph/Capture.chs b/src/Foreign/CUDA/Driver/Graph/Capture.chs
index 8876542..160ff70 100644
--- a/src/Foreign/CUDA/Driver/Graph/Capture.chs
+++ b/src/Foreign/CUDA/Driver/Graph/Capture.chs
@@ -152,13 +152,23 @@ status = requireSDK 'status 10.0
 #if CUDA_VERSION < 10010
 info :: Stream -> IO (Status, Int64)
 info = requireSDK 'info 10.1
-#else
+#elif CUDA_VERSION < 12000
 {# fun unsafe cuStreamGetCaptureInfo as info
   { useStream `Stream'
   , alloca-   `Status' peekEnum*
   , alloca-   `Int64'  peekIntConv*
   }
   -> `()' checkStatus*- #}
+#else
+{# fun unsafe cuStreamGetCaptureInfo_v2 as info
+  { useStream `Stream'
+  , alloca-   `Status' peekEnum*
+  , alloca-   `Int64'  peekIntConv*
+  , alloca-   `Graph'
+  , alloca-   `Node'
+  , alloca-   `CSize'
+  }
+  -> `()' checkStatus*- #}
 #endif
 
 
diff --git a/src/Foreign/CUDA/Driver/Module/Query.chs b/src/Foreign/CUDA/Driver/Module/Query.chs
index 0998b3a..e484d8e 100644
--- a/src/Foreign/CUDA/Driver/Module/Query.chs
+++ b/src/Foreign/CUDA/Driver/Module/Query.chs
@@ -16,7 +16,7 @@
 module Foreign.CUDA.Driver.Module.Query (
 
   -- ** Querying module inhabitants
-  getFun, getPtr, getTex,
+  getFun, getPtr,
 
 ) where
 
@@ -28,7 +28,6 @@ import Foreign.CUDA.Driver.Error
 import Foreign.CUDA.Driver.Exec
 import Foreign.CUDA.Driver.Marshal                      ( peekDeviceHandle )
 import Foreign.CUDA.Driver.Module.Base
-import Foreign.CUDA.Driver.Texture
 import Foreign.CUDA.Internal.C2HS
 import Foreign.CUDA.Ptr
 
@@ -92,26 +91,6 @@ getPtr !mdl !name = do
   -> `Status' cToEnum #}
 
 
--- |
--- Return a handle to a texture reference. This texture reference handle
--- should not be destroyed, as the texture will be destroyed automatically
--- when the module is unloaded.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html#group__CUDA__MODULE_1g9607dcbf911c16420d5264273f2b5608>
---
-{-# INLINEABLE getTex #-}
-getTex :: Module -> ShortByteString -> IO Texture
-getTex !mdl !name = resultIfFound "texture" name =<< cuModuleGetTexRef mdl name
-
-{-# INLINE cuModuleGetTexRef #-}
-{# fun unsafe cuModuleGetTexRef
-  { alloca-       `Texture'         peekTex*
-  , useModule     `Module'
-  , useAsCString* `ShortByteString'
-  }
-  -> `Status' cToEnum #}
-
-
 --------------------------------------------------------------------------------
 -- Internal
 --------------------------------------------------------------------------------
diff --git a/src/Foreign/CUDA/Driver/Stream.chs b/src/Foreign/CUDA/Driver/Stream.chs
index b85dc9a..70f2034 100644
--- a/src/Foreign/CUDA/Driver/Stream.chs
+++ b/src/Foreign/CUDA/Driver/Stream.chs
@@ -334,6 +334,7 @@ write32 _   _   _      _     = requireSDK 'write32 8.0
 write32 ptr val stream flags = nothingIfOk =<< cuStreamWriteValue32 stream ptr val flags
 
 {-# INLINE cuStreamWriteValue32 #-}
+#if CUDA_VERSION < 12000
 {# fun unsafe cuStreamWriteValue32
   { useStream       `Stream'
   , useDeviceHandle `DevicePtr Word32'
@@ -341,6 +342,15 @@ write32 ptr val stream flags = nothingIfOk =<< cuStreamWriteValue32 stream ptr v
   , combineBitMasks `[StreamWriteFlag]'
   }
   -> `Status' cToEnum #}
+#else
+{# fun unsafe cuStreamWriteValue32_v2 as cuStreamWriteValue32
+  { useStream       `Stream'
+  , useDeviceHandle `DevicePtr Word32'
+  ,                 `Word32'
+  , combineBitMasks `[StreamWriteFlag]'
+  }
+  -> `Status' cToEnum #}
+#endif
 #endif
 
 {-# INLINE write64 #-}
@@ -351,6 +361,7 @@ write64 _   _   _      _     = requireSDK 'write64 9.0
 write64 ptr val stream flags = nothingIfOk =<< cuStreamWriteValue64 stream ptr val flags
 
 {-# INLINE cuStreamWriteValue64 #-}
+#if CUDA_VERSION < 12000
 {# fun unsafe cuStreamWriteValue64
   { useStream       `Stream'
   , useDeviceHandle `DevicePtr Word64'
@@ -358,6 +369,15 @@ write64 ptr val stream flags = nothingIfOk =<< cuStreamWriteValue64 stream ptr v
   , combineBitMasks `[StreamWriteFlag]'
   }
   -> `Status' cToEnum #}
+#else
+{# fun unsafe cuStreamWriteValue64_v2 as cuStreamWriteValue64
+  { useStream       `Stream'
+  , useDeviceHandle `DevicePtr Word64'
+  ,                 `Word64'
+  , combineBitMasks `[StreamWriteFlag]'
+  }
+  -> `Status' cToEnum #}
+#endif
 #endif
 
 
@@ -388,12 +408,21 @@ wait32 _   _   _      _     = requireSDK 'wait32 8.0
 wait32 ptr val stream flags = nothingIfOk =<< cuStreamWaitValue32 stream ptr val flags
 
 {-# INLINE cuStreamWaitValue32 #-}
+#if CUDA_VERSION < 12000
 {# fun unsafe cuStreamWaitValue32
   { useStream       `Stream'
   , useDeviceHandle `DevicePtr Word32'
   ,                 `Word32'
   , combineBitMasks `[StreamWaitFlag]'
   } -> `Status' cToEnum #}
+#else
+{# fun unsafe cuStreamWaitValue32_v2 as cuStreamWaitValue32
+  { useStream       `Stream'
+  , useDeviceHandle `DevicePtr Word32'
+  ,                 `Word32'
+  , combineBitMasks `[StreamWaitFlag]'
+  } -> `Status' cToEnum #}
+#endif
 #endif
 
 {-# INLINE wait64 #-}
@@ -404,12 +433,21 @@ wait64 _   _   _      _     = requireSDK 'wait64 9.0
 wait64 ptr val stream flags = nothingIfOk =<< cuStreamWaitValue64 stream ptr val flags
 
 {-# INLINE cuStreamWaitValue64 #-}
+#if CUDA_VERSION < 12000
 {# fun unsafe cuStreamWaitValue64
   { useStream       `Stream'
   , useDeviceHandle `DevicePtr Word64'
   ,                 `Word64'
   , combineBitMasks `[StreamWaitFlag]'
   } -> `Status' cToEnum #}
+#else
+{# fun unsafe cuStreamWaitValue64_v2 as cuStreamWaitValue64
+  { useStream       `Stream'
+  , useDeviceHandle `DevicePtr Word64'
+  ,                 `Word64'
+  , combineBitMasks `[StreamWaitFlag]'
+  } -> `Status' cToEnum #}
+#endif
 #endif
 
 
diff --git a/src/Foreign/CUDA/Driver/Texture.chs b/src/Foreign/CUDA/Driver/Texture.chs
deleted file mode 100644
index c06a768..0000000
--- a/src/Foreign/CUDA/Driver/Texture.chs
+++ /dev/null
@@ -1,308 +0,0 @@
-{-# LANGUAGE BangPatterns             #-}
-{-# LANGUAGE ForeignFunctionInterface #-}
-{-# OPTIONS_HADDOCK prune #-}
---------------------------------------------------------------------------------
--- |
--- Module    : Foreign.CUDA.Driver.Texture
--- Copyright : [2009..2023] Trevor L. McDonell
--- License   : BSD
---
--- Texture management for low-level driver interface
---
---------------------------------------------------------------------------------
-
-module Foreign.CUDA.Driver.Texture (
-
-  -- * Texture Reference Management
-  Texture(..), Format(..), AddressMode(..), FilterMode(..), ReadMode(..),
-  bind, bind2D,
-  getAddressMode, getFilterMode, getFormat,
-  setAddressMode, setFilterMode, setFormat, setReadMode,
-
-  -- Deprecated
-  create, destroy,
-
-  -- Internal
-  peekTex
-
-) where
-
-#include "cbits/stubs.h"
-{# context lib="cuda" #}
-
--- Friends
-import Foreign.CUDA.Ptr
-import Foreign.CUDA.Driver.Error
-import Foreign.CUDA.Driver.Marshal
-import Foreign.CUDA.Internal.C2HS
-
--- System
-import Foreign
-import Foreign.C
-import Control.Monad
-
-#if CUDA_VERSION >= 3020
-{-# DEPRECATED create, destroy "as of CUDA version 3.2" #-}
-#endif
-
-
---------------------------------------------------------------------------------
--- Data Types
---------------------------------------------------------------------------------
-
--- |
--- A texture reference
---
-newtype Texture = Texture { useTexture :: {# type CUtexref #}}
-  deriving (Eq, Show)
-
-instance Storable Texture where
-  sizeOf _    = sizeOf    (undefined :: {# type CUtexref #})
-  alignment _ = alignment (undefined :: {# type CUtexref #})
-  peek p      = Texture `fmap` peek (castPtr p)
-  poke p t    = poke (castPtr p) (useTexture t)
-
--- |
--- Texture reference addressing modes
---
-{# enum CUaddress_mode as AddressMode
-  { underscoreToCase }
-  with prefix="CU_TR_ADDRESS_MODE" deriving (Eq, Show) #}
-
--- |
--- Texture reference filtering mode
---
-{# enum CUfilter_mode as FilterMode
-  { underscoreToCase }
-  with prefix="CU_TR_FILTER_MODE" deriving (Eq, Show) #}
-
--- |
--- Texture read mode options
---
-#c
-typedef enum CUtexture_flag_enum {
-  CU_TEXTURE_FLAG_READ_AS_INTEGER        = CU_TRSF_READ_AS_INTEGER,
-  CU_TEXTURE_FLAG_NORMALIZED_COORDINATES = CU_TRSF_NORMALIZED_COORDINATES,
-  CU_TEXTURE_FLAG_SRGB                   = CU_TRSF_SRGB
-} CUtexture_flag;
-#endc
-
-{# enum CUtexture_flag as ReadMode
-  { underscoreToCase
-  , CU_TEXTURE_FLAG_SRGB as SRGB }
-  with prefix="CU_TEXTURE_FLAG" deriving (Eq, Show) #}
-
--- |
--- Texture data formats
---
-{# enum CUarray_format as Format
-  { underscoreToCase
-  , UNSIGNED_INT8  as Word8
-  , UNSIGNED_INT16 as Word16
-  , UNSIGNED_INT32 as Word32
-  , SIGNED_INT8    as Int8
-  , SIGNED_INT16   as Int16
-  , SIGNED_INT32   as Int32 }
-  with prefix="CU_AD_FORMAT" deriving (Eq, Show) #}
-
-
---------------------------------------------------------------------------------
--- Texture management
---------------------------------------------------------------------------------
-
--- |
--- Create a new texture reference. Once created, the application must call
--- 'setPtr' to associate the reference with allocated memory. Other texture
--- reference functions are used to specify the format and interpretation to be
--- used when the memory is read through this reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF__DEPRECATED.html#group__CUDA__TEXREF__DEPRECATED_1g0084fabe2c6d28ffcf9d9f5c7164f16c>
---
-{-# INLINEABLE create #-}
-create :: IO Texture
-create = resultIfOk =<< cuTexRefCreate
-
-{-# INLINE cuTexRefCreate #-}
-{# fun unsafe cuTexRefCreate
-  { alloca- `Texture' peekTex* } -> `Status' cToEnum #}
-
-
--- |
--- Destroy a texture reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF__DEPRECATED.html#group__CUDA__TEXREF__DEPRECATED_1gea8edbd6cf9f97e6ab2b41fc6785519d>
---
-{-# INLINEABLE destroy #-}
-destroy :: Texture -> IO ()
-destroy !tex = nothingIfOk =<< cuTexRefDestroy tex
-
-{-# INLINE cuTexRefDestroy #-}
-{# fun unsafe cuTexRefDestroy
-  { useTexture `Texture' } -> `Status' cToEnum #}
-
-
--- |
--- Bind a linear array address of the given size (bytes) as a texture
--- reference. Any previously bound references are unbound.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g44ef7e5055192d52b3d43456602b50a8>
---
-{-# INLINEABLE bind #-}
-bind :: Texture -> DevicePtr a -> Int64 -> IO ()
-bind !tex !dptr !bytes = nothingIfOk =<< cuTexRefSetAddress tex dptr bytes
-
-{-# INLINE cuTexRefSetAddress #-}
-{# fun unsafe cuTexRefSetAddress
-  { alloca-         `Int'
-  , useTexture      `Texture'
-  , useDeviceHandle `DevicePtr a'
-  ,                 `Int64'       } -> `Status' cToEnum #}
-
-
--- |
--- Bind a linear address range to the given texture reference as a
--- two-dimensional arena. Any previously bound reference is unbound. Note that
--- calls to 'setFormat' can not follow a call to 'bind2D' for the same texture
--- reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g26f709bbe10516681913d1ffe8756ee2>
---
-{-# INLINEABLE bind2D #-}
-bind2D :: Texture -> Format -> Int -> DevicePtr a -> (Int,Int) -> Int64 -> IO ()
-bind2D !tex !fmt !chn !dptr (!width,!height) !pitch =
-  nothingIfOk =<< cuTexRefSetAddress2D_simple tex fmt chn dptr width height pitch
-
-{-# INLINE cuTexRefSetAddress2D_simple #-}
-{# fun unsafe cuTexRefSetAddress2D_simple
-  { useTexture      `Texture'
-  , cFromEnum       `Format'
-  ,                 `Int'
-  , useDeviceHandle `DevicePtr a'
-  ,                 `Int'
-  ,                 `Int'
-  ,                 `Int64'       } -> `Status' cToEnum #}
-
-
--- |
--- Get the addressing mode used by a texture reference, corresponding to the
--- given dimension (currently the only supported dimension values are 0 or 1).
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1gfb367d93dc1d20aab0cf8ce70d543b33>
---
-{-# INLINEABLE getAddressMode #-}
-getAddressMode :: Texture -> Int -> IO AddressMode
-getAddressMode !tex !dim = resultIfOk =<< cuTexRefGetAddressMode tex dim
-
-{-# INLINE cuTexRefGetAddressMode #-}
-{# fun unsafe cuTexRefGetAddressMode
-  { alloca-    `AddressMode' peekEnum*
-  , useTexture `Texture'
-  ,            `Int'                   } -> `Status' cToEnum #}
-
-
--- |
--- Get the filtering mode used by a texture reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g2439e069746f69b940f2f4dbc78cdf87>
---
-{-# INLINEABLE getFilterMode #-}
-getFilterMode :: Texture -> IO FilterMode
-getFilterMode !tex = resultIfOk =<< cuTexRefGetFilterMode tex
-
-{-# INLINE cuTexRefGetFilterMode #-}
-{# fun unsafe cuTexRefGetFilterMode
-  { alloca-    `FilterMode' peekEnum*
-  , useTexture `Texture'              } -> `Status' cToEnum #}
-
-
--- |
--- Get the data format and number of channel components of the bound texture.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g90936eb6c7c4434a609e1160c278ae53>
---
-{-# INLINEABLE getFormat #-}
-getFormat :: Texture -> IO (Format, Int)
-getFormat !tex = do
-  (!status,!fmt,!dim) <- cuTexRefGetFormat tex
-  resultIfOk (status,(fmt,dim))
-
-{-# INLINE cuTexRefGetFormat #-}
-{# fun unsafe cuTexRefGetFormat
-  { alloca-    `Format'    peekEnum*
-  , alloca-    `Int'       peekIntConv*
-  , useTexture `Texture'                } -> `Status' cToEnum #}
-
-
--- |
--- Specify the addressing mode for the given dimension of a texture reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g85f4a13eeb94c8072f61091489349bcb>
---
-{-# INLINEABLE setAddressMode #-}
-setAddressMode :: Texture -> Int -> AddressMode -> IO ()
-setAddressMode !tex !dim !mode = nothingIfOk =<< cuTexRefSetAddressMode tex dim mode
-
-{-# INLINE cuTexRefSetAddressMode #-}
-{# fun unsafe cuTexRefSetAddressMode
-  { useTexture `Texture'
-  ,            `Int'
-  , cFromEnum  `AddressMode' } -> `Status' cToEnum #}
-
-
--- |
--- Specify the filtering mode to be used when reading memory through a texture
--- reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g595d0af02c55576f8c835e4efd1f39c0>
---
-{-# INLINEABLE setFilterMode #-}
-setFilterMode :: Texture -> FilterMode -> IO ()
-setFilterMode !tex !mode = nothingIfOk =<< cuTexRefSetFilterMode tex mode
-
-{-# INLINE cuTexRefSetFilterMode #-}
-{# fun unsafe cuTexRefSetFilterMode
-  { useTexture `Texture'
-  , cFromEnum  `FilterMode' } -> `Status' cToEnum #}
-
-
--- |
--- Specify additional characteristics for reading and indexing the texture
--- reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g554ffd896487533c36810f2e45bb7a28>
---
-{-# INLINEABLE setReadMode #-}
-setReadMode :: Texture -> ReadMode -> IO ()
-setReadMode !tex !mode = nothingIfOk =<< cuTexRefSetFlags tex mode
-
-{-# INLINE cuTexRefSetFlags #-}
-{# fun unsafe cuTexRefSetFlags
-  { useTexture `Texture'
-  , cFromEnum  `ReadMode' } -> `Status' cToEnum #}
-
-
--- |
--- Specify the format of the data and number of packed components per element to
--- be read by the texture reference.
---
--- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXREF.html#group__CUDA__TEXREF_1g05585ef8ea2fec728a03c6c8f87cf07a>
---
-{-# INLINEABLE setFormat #-}
-setFormat :: Texture -> Format -> Int -> IO ()
-setFormat !tex !fmt !chn = nothingIfOk =<< cuTexRefSetFormat tex fmt chn
-
-{-# INLINE cuTexRefSetFormat #-}
-{# fun unsafe cuTexRefSetFormat
-  { useTexture `Texture'
-  , cFromEnum  `Format'
-  ,            `Int'     } -> `Status' cToEnum #}
-
-
---------------------------------------------------------------------------------
--- Internal
---------------------------------------------------------------------------------
-
-{-# INLINE peekTex #-}
-peekTex :: Ptr {# type CUtexref #} -> IO Texture
-peekTex = liftM Texture . peek
-
diff --git a/src/Foreign/CUDA/Runtime/Device.chs b/src/Foreign/CUDA/Runtime/Device.chs
index fa58f9e..90e2089 100644
--- a/src/Foreign/CUDA/Runtime/Device.chs
+++ b/src/Foreign/CUDA/Runtime/Device.chs
@@ -219,9 +219,15 @@ props :: Device -> IO DeviceProperties
 props !n = resultIfOk =<< cudaGetDeviceProperties n
 
 {-# INLINE cudaGetDeviceProperties #-}
+#if CUDA_VERSION < 12000
 {# fun unsafe cudaGetDeviceProperties
   { alloca- `DeviceProperties' peek*
   ,         `Int'                    } -> `Status' cToEnum #}
+#else
+{# fun unsafe cudaGetDeviceProperties_v2 as cudaGetDeviceProperties
+  { alloca- `DeviceProperties' peek*
+  ,         `Int'                    } -> `Status' cToEnum #}
+#endif
 
 
 -- |
diff --git a/src/Foreign/CUDA/Runtime/Texture.chs b/src/Foreign/CUDA/Runtime/Texture.chs
deleted file mode 100644
index 94e8db9..0000000
--- a/src/Foreign/CUDA/Runtime/Texture.chs
+++ /dev/null
@@ -1,203 +0,0 @@
-{-# LANGUAGE BangPatterns             #-}
-{-# LANGUAGE ForeignFunctionInterface #-}
---------------------------------------------------------------------------------
--- |
--- Module    : Foreign.CUDA.Runtime.Texture
--- Copyright : [2009..2023] Trevor L. McDonell
--- License   : BSD
---
--- Texture references
---
---------------------------------------------------------------------------------
-
-module Foreign.CUDA.Runtime.Texture (
-
-  -- * Texture Reference Management
-  Texture(..), FormatKind(..), AddressMode(..), FilterMode(..), FormatDesc(..),
-  bind, bind2D
-
-) where
-
--- Friends
-import Foreign.CUDA.Ptr
-import Foreign.CUDA.Runtime.Error
-import Foreign.CUDA.Internal.C2HS
-
--- System
-import Data.Int
-import Foreign
-import Foreign.C
-
-#include "cbits/stubs.h"
-{# context lib="cudart" #}
-
-#c
-typedef struct textureReference      textureReference;
-typedef struct cudaChannelFormatDesc cudaChannelFormatDesc;
-#endc
-
---------------------------------------------------------------------------------
--- Data Types
---------------------------------------------------------------------------------
-
--- |A texture reference
---
-{# pointer *textureReference as ^ -> Texture #}
-
-data Texture = Texture
-  {
-    normalised :: !Bool,                -- ^ access texture using normalised coordinates [0.0,1.0)
-    filtering  :: !FilterMode,
-    addressing :: !(AddressMode, AddressMode, AddressMode),
-    format     :: !FormatDesc
-  }
-  deriving (Eq, Show)
-
--- |Texture channel format kind
---
-{# enum cudaChannelFormatKind as FormatKind
-  { }
-  with prefix="cudaChannelFormatKind" deriving (Eq, Show) #}
-
--- |Texture addressing mode
---
-{# enum cudaTextureAddressMode as AddressMode
-  { }
-  with prefix="cudaAddressMode" deriving (Eq, Show) #}
-
--- |Texture filtering mode
---
-{# enum cudaTextureFilterMode as FilterMode
-  { }
-  with prefix="cudaFilterMode" deriving (Eq, Show) #}
-
-
--- |A description of how memory read through the texture cache should be
--- interpreted, including the kind of data and the number of bits of each
--- component (x,y,z and w, respectively).
---
-{# pointer *cudaChannelFormatDesc as ^ foreign -> FormatDesc nocode #}
-
-data FormatDesc = FormatDesc
-  {
-    depth :: !(Int,Int,Int,Int),
-    kind  :: !FormatKind
-  }
-  deriving (Eq, Show)
-
-instance Storable FormatDesc where
-  sizeOf    _ = {# sizeof cudaChannelFormatDesc #}
-  alignment _ = alignment (undefined :: Ptr ())
-
-  peek p = do
-    dx <- cIntConv `fmap` {# get cudaChannelFormatDesc.x #} p
-    dy <- cIntConv `fmap` {# get cudaChannelFormatDesc.y #} p
-    dz <- cIntConv `fmap` {# get cudaChannelFormatDesc.z #} p
-    dw <- cIntConv `fmap` {# get cudaChannelFormatDesc.w #} p
-    df <- cToEnum  `fmap` {# get cudaChannelFormatDesc.f #} p
-    return $ FormatDesc (dx,dy,dz,dw) df
-
-  poke p (FormatDesc (x,y,z,w) k) = do
-    {# set cudaChannelFormatDesc.x #} p (cIntConv x)
-    {# set cudaChannelFormatDesc.y #} p (cIntConv y)
-    {# set cudaChannelFormatDesc.z #} p (cIntConv z)
-    {# set cudaChannelFormatDesc.w #} p (cIntConv w)
-    {# set cudaChannelFormatDesc.f #} p (cFromEnum k)
-
-
-instance Storable Texture where
-  sizeOf    _ = {# sizeof textureReference #}
-  alignment _ = alignment (undefined :: Ptr ())
-
-  peek p = do
-    norm    <- cToBool `fmap` {# get textureReference.normalized #} p
-    fmt     <- cToEnum `fmap` {# get textureReference.filterMode #} p
-    dsc     <- peek . castPtr          =<< {# get textureReference.channelDesc #} p
-    [x,y,z] <- peekArrayWith cToEnum 3 =<< {# get textureReference.addressMode #} p
-    return $ Texture norm fmt (x,y,z) dsc
-
-  poke p (Texture norm fmt (x,y,z) dsc) = do
-    {# set textureReference.normalized #} p (cFromBool norm)
-    {# set textureReference.filterMode #} p (cFromEnum fmt)
-    withArray (map cFromEnum [x,y,z]) ({# set textureReference.addressMode #} p)
-
-    -- c2hs is returning the wrong type for structs-within-structs
-    dscptr <- {# get textureReference.channelDesc #} p
-    poke (castPtr dscptr) dsc
-
-
---------------------------------------------------------------------------------
--- Texture References
---------------------------------------------------------------------------------
-
--- |Bind the memory area associated with the device pointer to a texture
--- reference given by the named symbol. Any previously bound references are
--- unbound.
---
-{-# INLINEABLE bind #-}
-bind :: String -> Texture -> DevicePtr a -> Int64 -> IO ()
-bind !name !tex !dptr !bytes = do
-  ref <- getTex name
-  poke ref tex
-  nothingIfOk =<< cudaBindTexture ref dptr (format tex) bytes
-
-{-# INLINE cudaBindTexture #-}
-{# fun unsafe cudaBindTexture
-  { alloca- `Int'
-  , id      `TextureReference'
-  , dptr    `DevicePtr a'
-  , with_*  `FormatDesc'
-  ,         `Int64'            } -> `Status' cToEnum #}
-  where dptr = useDevicePtr . castDevPtr
-
--- |Bind the two-dimensional memory area to the texture reference associated
--- with the given symbol. The size of the area is constrained by (width,height)
--- in texel units, and the row pitch in bytes. Any previously bound references
--- are unbound.
---
-{-# INLINEABLE bind2D #-}
-bind2D :: String -> Texture -> DevicePtr a -> (Int,Int) -> Int64 -> IO ()
-bind2D !name !tex !dptr (!width,!height) !bytes = do
-  ref <- getTex name
-  poke ref tex
-  nothingIfOk =<< cudaBindTexture2D ref dptr (format tex) width height bytes
-
-{-# INLINE cudaBindTexture2D #-}
-{# fun unsafe cudaBindTexture2D
-  { alloca- `Int'
-  , id      `TextureReference'
-  , dptr    `DevicePtr a'
-  , with_*  `FormatDesc'
-  ,         `Int'
-  ,         `Int'
-  ,         `Int64'             } -> `Status' cToEnum #}
-  where dptr = useDevicePtr . castDevPtr
-
-
--- |Returns the texture reference associated with the given symbol
---
-{-# INLINEABLE getTex #-}
-getTex :: String -> IO TextureReference
-getTex !name = resultIfOk =<< cudaGetTextureReference name
-
-{-# INLINE cudaGetTextureReference #-}
-{# fun unsafe cudaGetTextureReference
-  { alloca-       `Ptr Texture' peek*
-  , withCString_* `String'            } -> `Status' cToEnum #}
-
-
---------------------------------------------------------------------------------
--- Internal
---------------------------------------------------------------------------------
-
-{-# INLINE with_ #-}
-with_ :: Storable a => a -> (Ptr a -> IO b) -> IO b
-with_ = with
-
-
--- CUDA 5.0 changed the types of some attributes from char* to void*
---
-{-# INLINE withCString_ #-}
-withCString_ :: String -> (Ptr a -> IO b) -> IO b
-withCString_ !str !fn = withCString str (fn . castPtr)
-