-
Notifications
You must be signed in to change notification settings - Fork 1.1k
gpu: intel: ocl: allow unlimited allocations #4354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1028,9 +1028,9 @@ size_t get_cpu_ram_size() { | |
| } | ||
| #endif | ||
|
|
||
| int get_gpu_ram_sizes(size_t &ram_size, size_t &max_alloc_size) { | ||
| int get_gpu_ram_size(size_t &ram_size) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you have a chance to verify the change works with all 4 supported memory kinds times correctness and fast performance mode where different approach used for memory object management? As a part of this question also: should this call be updated?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The flag according to documentation only applies to |
||
| if (!is_gpu()) return OK; | ||
| if (ram_size > 0 && max_alloc_size > 0) return OK; | ||
| if (ram_size > 0) return OK; | ||
|
|
||
| #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL | ||
| auto eng = dnnl::engine(get_test_engine(), true); | ||
|
|
@@ -1042,26 +1042,16 @@ int get_gpu_ram_sizes(size_t &ram_size, size_t &max_alloc_size) { | |
| sizeof(cl_ulong), &ram_sz, nullptr); | ||
| if (status != CL_SUCCESS) return FAIL; | ||
|
|
||
| cl_ulong max_alloc_sz = 0; | ||
| status = clGetDeviceInfo(ocl_device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, | ||
| sizeof(cl_ulong), &max_alloc_sz, nullptr); | ||
| if (status != CL_SUCCESS) return FAIL; | ||
|
|
||
| ram_size = (size_t)ram_sz; | ||
| max_alloc_size = (size_t)max_alloc_sz; | ||
| return OK; | ||
| #elif DNNL_GPU_RUNTIME == DNNL_RUNTIME_DPCPP | ||
| auto eng = dnnl::engine(get_test_engine(), true); | ||
| auto sycl_dev = dnnl::sycl_interop::get_device(eng); | ||
| ram_size = (size_t)sycl_dev | ||
| .get_info<::sycl::info::device::global_mem_size>(); | ||
| max_alloc_size | ||
| = (size_t)sycl_dev | ||
| .get_info<::sycl::info::device::max_mem_alloc_size>(); | ||
| return OK; | ||
| #endif | ||
| ram_size = 0; | ||
| max_alloc_size = 0; | ||
| return OK; | ||
| } | ||
|
|
||
|
|
@@ -1132,8 +1122,7 @@ int check_total_size(res_t *res, dnnl_primitive_t prim_ref) { | |
|
|
||
| static size_t cpu_device_capacity = get_cpu_ram_size(); | ||
| static size_t gpu_device_capacity = 0; | ||
| static size_t gpu_max_alloc_capacity = 0; | ||
| SAFE(get_gpu_ram_sizes(gpu_device_capacity, gpu_max_alloc_capacity), WARN); | ||
| SAFE(get_gpu_ram_size(gpu_device_capacity), WARN); | ||
|
|
||
| const size_t device_max_capacity | ||
| = is_cpu() ? cpu_device_capacity : gpu_device_capacity; | ||
|
|
@@ -1171,32 +1160,13 @@ int check_total_size(res_t *res, dnnl_primitive_t prim_ref) { | |
| res->reason = skip_reason::not_enough_ram; | ||
| } | ||
|
|
||
| const bool all_allocation_fit_limit = std::all_of( | ||
| check_mem_size_args.sizes.cbegin(), | ||
| check_mem_size_args.sizes.cend(), [&](size_t s) { | ||
| const bool fit = s < gpu_max_alloc_capacity; | ||
| if (!fit) { | ||
| BENCHDNN_PRINT(1, | ||
| "[CHECK_MEM][%s]: Allocation of size %s " | ||
| "doesn't fit allocation limit of %s.\n", | ||
| dir_c_str(), smart_bytes(s).c_str(), | ||
| smart_bytes(gpu_max_alloc_capacity).c_str()); | ||
| } | ||
| return fit; | ||
| }); | ||
| if (!all_allocation_fit_limit) { | ||
| res->state = SKIPPED; | ||
| res->reason = skip_reason::not_enough_ram; | ||
| } | ||
|
|
||
| BENCHDNN_PRINT((!fits_device_ram ? 1 : 6), | ||
| "[CHECK_MEM][%s]: Requested: %s; benchdnn_device_limit: %s; " | ||
| "device_RAM_capacity: %s; gpu_max_alloc: %s;\n", | ||
| "device_RAM_capacity: %s;\n", | ||
| dir_c_str(), | ||
| smart_bytes(check_mem_size_args.total_size_device).c_str(), | ||
| smart_bytes(benchdnn_device_limit).c_str(), | ||
| smart_bytes(gpu_device_capacity).c_str(), | ||
| smart_bytes(gpu_max_alloc_capacity).c_str()); | ||
| smart_bytes(gpu_device_capacity).c_str()); | ||
| } | ||
|
|
||
| // Note: in theory, `total_size_ref` itself can be smaller for a `prim_ref` | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.