Open
Description
Float16 CUDA conv
seems to be broken for 5D tensors, but not 3D or 4D tensors. FluxML/Flux.jl#2184
(using Julia 1.8.3 on a A100 GPU.)
julia> conv(rand(Float16, 16, 16, 1, 1) |> gpu, rand(Float16, 3,3,1,1) |> gpu)
14×14×1×1 CuArray{Float16, 4, CUDA.Mem.DeviceBuffer}:
[...]
julia> conv(rand(Float16, 16, 16, 16, 1, 1) |> gpu, rand(Float16, 3,3,3, 1,1) |> gpu)
ERROR: CUDNNError: CUDNN_STATUS_NOT_SUPPORTED (code 9)
Stacktrace:
[1] throw_api_error(res::cuDNN.cudnnStatus_t)
@ cuDNN /scratch/npj226/.julia/packages/cuDNN/7X4E7/src/libcudnn.jl:11
[2] macro expansion
@ /scratch/npj226/.julia/packages/cuDNN/7X4E7/src/libcudnn.jl:24 [inlined]
[3] cudnnConvolutionForward(handle::Ptr{cuDNN.cudnnContext}, alpha::Base.RefValue{Float32}, xDesc::cuDNN.cudnnTensorDescriptor, x::CuArray{Float16, 5, CUDA.Mem.DeviceBuffer}, wDesc::cuDNN.cudnnFilterDescriptor, w::CuArray{Float16, 5, CUDA.Mem.DeviceBuffer}, convDesc::cuDNN.cudnnConvolutionDescriptor, algo::cuDNN.cudnnConvolutionFwdAlgo_t, workSpace::CuArray{UInt8, 1, CUDA.Mem.DeviceBuffer}, workSpaceSizeInBytes::Int64, beta::Base.RefValue{Float32}, yDesc::cuDNN.cudnnTensorDescriptor, y::CuArray{Float16, 5, CUDA.Mem.DeviceBuffer})
@ cuDNN /scratch/npj226/.julia/packages/CUDA/ZdCxS/lib/utils/call.jl:26
[4] (::cuDNN.var"#1153#1155"{CuArray{Float16, 5, CUDA.Mem.DeviceBuffer}, cuDNN.cudnnActivationMode_t, cuDNN.cudnnConvolutionDescriptor, cuDNN.cudnnFilterDescriptor, cuDNN.cudnnTensorDescriptor, cuDNN.cudnnTensorDescriptor, Base.RefValue{Float32}, Base.RefValue{Float32}, CuArray{Float16, 5, CUDA.Mem.DeviceBuffer}, CuArray{Float16, 5, CUDA.Mem.DeviceBuffer}, cuDNN.cudnnConvolutionFwdAlgoPerfStruct})(workspace::CuArray{UInt8, 1, CUDA.Mem.DeviceBuffer})
@ cuDNN /scratch/npj226/.julia/packages/cuDNN/7X4E7/src/convolution.jl:105