JuliaGPU · vchuravy · Jul 22, 2025 · Jul 20, 2025 · Jul 20, 2025 · Jul 21, 2025
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -21,6 +21,7 @@ allocate
 
 ```@docs
 KernelAbstractions.zeros
+KernelAbstractions.supports_unified
 ```
 
 ## Internal

diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl
@@ -532,40 +532,67 @@ get_backend(::Array) = CPU()
 Adapt.adapt_storage(::CPU, a::Array) = a
 
 """
-    allocate(::Backend, Type, dims...)::AbstractArray
+    allocate(::Backend, Type, dims...; unified=false)::AbstractArray
 
-Allocate a storage array appropriate for the computational backend.
+Allocate a storage array appropriate for the computational backend. `unified=true`
+allocates an array using unified memory if the backend supports it and throws otherwise.
+Use [`supports_unified`](@ref) to determine whether it is supported by a backend.
 
 !!! note
     Backend implementations **must** implement `allocate(::NewBackend, T, dims::Tuple)`
-"""
-allocate(backend::Backend, T::Type, dims...) = allocate(backend, T, dims)
-allocate(backend::Backend, T::Type, dims::Tuple) = throw(MethodError(allocate, (backend, T, dims)))
+    Backend implementations **should** implement `allocate(::NewBackend, T, dims::Tuple; unified::Bool=false)`
+"""
+allocate(backend::Backend, T::Type, dims...; kwargs...) = allocate(backend, T, dims; kwargs...)
+function allocate(backend::Backend, T::Type, dims::Tuple; unified::Union{Nothing, Bool} = nothing)
+    if isnothing(unified)
+        throw(MethodError(allocate, (backend, T, dims)))
+    elseif unified
+        throw(ArgumentError("`$(typeof(backend))` does not support unified memory. If you believe it does, please open a github issue."))
+    else
+        return allocate(backend, T, dims)
+    end
+end
+
 
 """
-    zeros(::Backend, Type, dims...)::AbstractArray
+    zeros(::Backend, Type, dims...; unified=false)::AbstractArray
 
 Allocate a storage array appropriate for the computational backend filled with zeros.
+`unified=true` allocates an array using unified memory if the backend supports it and
+throws otherwise.
 """
-zeros(backend::Backend, T::Type, dims...) = zeros(backend, T, dims)
-function zeros(backend::Backend, ::Type{T}, dims::Tuple) where {T}
-    data = allocate(backend, T, dims...)
+zeros(backend::Backend, T::Type, dims...; kwargs...) = zeros(backend, T, dims; kwargs...)
+function zeros(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T}
+    data = allocate(backend, T, dims...; kwargs...)
     fill!(data, zero(T))
     return data
 end
 
 """
-    ones(::Backend, Type, dims...)::AbstractArray
+    ones(::Backend, Type, dims...; unified=false)::AbstractArray
 
 Allocate a storage array appropriate for the computational backend filled with ones.
+`unified=true` allocates an array using unified memory if the backend supports it and
+throws otherwise.
 """
-ones(backend::Backend, T::Type, dims...) = ones(backend, T, dims)
-function ones(backend::Backend, ::Type{T}, dims::Tuple) where {T}
-    data = allocate(backend, T, dims)
+ones(backend::Backend, T::Type, dims...; kwargs...) = ones(backend, T, dims; kwargs...)
+function ones(backend::Backend, ::Type{T}, dims::Tuple; kwargs...) where {T}
+    data = allocate(backend, T, dims; kwargs...)
     fill!(data, one(T))
     return data
 end
 
+"""
+    supports_unified(::Backend)::Bool
+
+Returns whether unified memory arrays are supported by the backend.
+
+!!! note
+    Backend implementations **should** implement this function
+    only if they **do** support unified memory.
+"""
+supports_unified(::Backend) = false
+
 """
     supports_atomics(::Backend)::Bool
 

diff --git a/src/cpu.jl b/src/cpu.jl
@@ -1,16 +1,16 @@
 unsafe_free!(::AbstractArray) = return
 synchronize(::CPU) = nothing
 
-allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)
+allocate(::CPU, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = Array{T}(undef, dims)
 
-function zeros(backend::CPU, ::Type{T}, dims::Tuple) where {T}
-    arr = allocate(backend, T, dims)
+function zeros(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T}
+    arr = allocate(backend, T, dims; kwargs...)
     kernel = init_kernel(backend)
     kernel(arr, zero, T, ndrange = length(arr))
     return arr
 end
-function ones(backend::CPU, ::Type{T}, dims::Tuple) where {T}
-    arr = allocate(backend, T, dims)
+function ones(backend::CPU, ::Type{T}, dims::Tuple; kwargs...) where {T}
+    arr = allocate(backend, T, dims; kwargs...)
     kernel = init_kernel(backend)
     kernel(arr, one, T; ndrange = length(arr))
     return arr
@@ -34,6 +34,7 @@ end
 
 functional(::CPU) = true
 pagelock!(::CPU, x) = nothing
+supports_unified(::CPU) = true
 
 function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
     ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize)

diff --git a/test/test.jl b/test/test.jl
@@ -78,6 +78,13 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
         backendT = typeof(backend).name.wrapper # To look through CUDABackend{true, false}
         @test backend isa backendT
 
+        unified = KernelAbstractions.supports_unified(backend)
+        @test unified isa Bool
+        U = allocate(backend, Float32, 5; unified)
+        if unified
+            @test U[3] isa Float32
+        end
+
         x = allocate(backend, Float32, 5)
         A = allocate(backend, Float32, 5, 5)
         @test @inferred(KernelAbstractions.get_backend(A)) isa backendT