Skip to content

Commit 41fb006

Browse files
committed
[WIP] No handler submit
1 parent 3b42472 commit 41fb006

File tree

14 files changed

+802
-30
lines changed

14 files changed

+802
-30
lines changed

sycl/source/detail/kernel_name_based_cache_t.hpp renamed to sycl/include/sycl/detail/kernel_name_based_cache_t.hpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
//===----------------------------------------------------------------------===//
88
#pragma once
99

10-
#include <detail/hashers.hpp>
11-
#include <detail/kernel_arg_mask.hpp>
10+
//#include <detail/hashers.hpp>
11+
//#include <sycl/detail/kernel_arg_mask.hpp>
1212
#include <emhash/hash_table8.hpp>
1313
#include <sycl/detail/spinlock.hpp>
1414
#include <sycl/detail/ur.hpp>
@@ -20,6 +20,7 @@ namespace sycl {
2020
inline namespace _V1 {
2121
namespace detail {
2222
using FastKernelCacheKeyT = std::pair<ur_device_handle_t, ur_context_handle_t>;
23+
using KernelArgMask = std::vector<bool>;
2324

2425
struct FastKernelCacheVal {
2526
ur_kernel_handle_t MKernelHandle; /* UR kernel handle pointer. */
@@ -29,25 +30,27 @@ struct FastKernelCacheVal {
2930
const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */
3031
ur_program_handle_t MProgramHandle; /* UR program handle corresponding to
3132
this kernel. */
32-
const Adapter &MAdapterPtr; /* We can keep reference to the adapter
33+
/*const Adapter &MAdapterPtr;*/ /* We can keep reference to the adapter
3334
because during 2-stage shutdown the kernel
3435
cache is destroyed deliberately before the
3536
adapter. */
3637

3738
FastKernelCacheVal(ur_kernel_handle_t KernelHandle, std::mutex *Mutex,
3839
const KernelArgMask *KernelArgMask,
39-
ur_program_handle_t ProgramHandle,
40-
const Adapter &AdapterPtr)
40+
ur_program_handle_t ProgramHandle)
41+
//const Adapter &AdapterPtr)
4142
: MKernelHandle(KernelHandle), MMutex(Mutex),
42-
MKernelArgMask(KernelArgMask), MProgramHandle(ProgramHandle),
43-
MAdapterPtr(AdapterPtr) {}
43+
MKernelArgMask(KernelArgMask), MProgramHandle(ProgramHandle)
44+
/*MAdapterPtr(AdapterPtr)*/ {}
4445

4546
~FastKernelCacheVal() {
47+
/*
4648
if (MKernelHandle)
4749
MAdapterPtr.call<sycl::detail::UrApiKind::urKernelRelease>(MKernelHandle);
4850
if (MProgramHandle)
4951
MAdapterPtr.call<sycl::detail::UrApiKind::urProgramRelease>(
5052
MProgramHandle);
53+
*/
5154
MKernelHandle = nullptr;
5255
MMutex = nullptr;
5356
MKernelArgMask = nullptr;

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#pragma once
22

33
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
4+
#include <sycl/khr/requirements.hpp>
45

56
namespace sycl {
67
inline namespace _V1 {
78

8-
#ifdef __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
9+
10+
//#ifdef __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
911
namespace khr {
1012

1113
template <typename CommandGroupFunc>
@@ -148,33 +150,30 @@ void launch_grouped(handler &h, range<3> r, range<3> size,
148150
h.parallel_for(nd_range<3>(r, size), k);
149151
}
150152

151-
template <typename KernelType>
152-
void launch_grouped(const queue &q, range<1> r, range<1> size,
153+
template <typename KernelType, typename... Requirements>
154+
void launch_grouped(queue &q, range<1> r, range<1> size,
153155
const KernelType &k,
154-
const sycl::detail::code_location &codeLoc =
155-
sycl::detail::code_location::current()) {
156-
submit(
157-
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
158-
codeLoc);
156+
const requirements<Requirements...> req = {}) {
157+
q.parallel_for_no_handler_v2(nd_range<1>(r, size), k, req);
159158
}
159+
/*
160160
template <typename KernelType>
161-
void launch_grouped(const queue &q, range<2> r, range<2> size,
161+
void launch_grouped(queue &q, range<2> r, range<2> size,
162162
const KernelType &k,
163163
const sycl::detail::code_location &codeLoc =
164164
sycl::detail::code_location::current()) {
165-
submit(
166-
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
167-
codeLoc);
165+
(void)codeLoc;
166+
q.parallel_for_no_handler_v2(nd_range<2>(r, size), k);
168167
}
169168
template <typename KernelType>
170-
void launch_grouped(const queue &q, range<3> r, range<3> size,
169+
void launch_grouped(queue &q, range<3> r, range<3> size,
171170
const KernelType &k,
172171
const sycl::detail::code_location &codeLoc =
173172
sycl::detail::code_location::current()) {
174-
submit(
175-
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
176-
codeLoc);
173+
(void)codeLoc;
174+
q.parallel_for_no_handler_v2(nd_range<3>(r, size), k);
177175
}
176+
*/
178177

179178
template <typename... Args>
180179
void launch_grouped(sycl::handler &h, sycl::range<1> r, sycl::range<1> size,
@@ -283,7 +282,8 @@ template <typename KernelType>
283282
void launch_task(const sycl::queue &q, const KernelType &k,
284283
const sycl::detail::code_location &codeLoc =
285284
sycl::detail::code_location::current()) {
286-
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
285+
(void)codeLoc;
286+
q.single_task_no_handler(k);
287287
}
288288

289289
template <typename... Args>
@@ -298,6 +298,11 @@ void launch_task(const queue &q, const kernel &k, Args &&...args) {
298298
[&](handler &h) { launch_task(h, k, std::forward<Args>(args)...); });
299299
}
300300

301+
template <typename FuncT>
302+
void launch_host_task(queue &q, FuncT &&Func) {
303+
q.host_task_no_handler(std::move(Func));
304+
}
305+
301306
inline void memcpy(handler &h, void *dest, const void *src, size_t numBytes) {
302307
h.memcpy(dest, src, numBytes);
303308
}
@@ -520,6 +525,6 @@ inline void event_barrier(const queue &q, const std::vector<event> &events,
520525
}
521526

522527
} // namespace khr
523-
#endif
528+
//#endif
524529
} // namespace _V1
525530
} // namespace sycl
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#pragma once
2+
3+
namespace sycl {
4+
inline namespace _V1 {
5+
6+
//#ifdef __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
7+
namespace khr {
8+
9+
template <typename... Requirements>
10+
class requirements;
11+
12+
template <typename... Requirements>
13+
void add_dependencies(requirements<Requirements...> &Reqs, std::vector<event> &Events);
14+
15+
template <typename... Requirements>
16+
class requirements {
17+
public:
18+
requirements(Requirements... r) : MRequirements(r...) {}
19+
20+
private:
21+
std::tuple<Requirements...> MRequirements;
22+
23+
template <typename... R>
24+
friend void add_dependencies(const requirements<R...> &Reqs, std::vector<event> &Events);
25+
};
26+
27+
template <typename... Requirements>
28+
requirements(Requirements... r) -> requirements<Requirements...>;
29+
30+
template <typename Requirement>
31+
void add_dependency(std::vector<event> &Events, const Requirement &Req) {
32+
if constexpr (std::is_same_v<Requirement, event>)
33+
Events.push_back(Req);
34+
}
35+
36+
template <typename Requirement, typename... Requirements>
37+
void add_dependency(std::vector<event> &Events, const Requirement &Req, const Requirements&... Rest) {
38+
if constexpr (std::is_same_v<Requirement, event>)
39+
Events.push_back(Req);
40+
add_dependency(Events, Rest...);
41+
}
42+
43+
template <typename... Requirements, size_t... Is>
44+
void add_dependencies(const std::tuple<Requirements...> &ReqsTuple, std::vector<event> &Events,
45+
std::index_sequence<Is...>) {
46+
add_dependency(Events, std::get<Is>(ReqsTuple)...);
47+
}
48+
49+
template <typename... Requirements>
50+
void add_dependencies(const sycl::khr::requirements<Requirements...> &Reqs, std::vector<event> &Events) {
51+
add_dependencies(Reqs.MRequirements, Events, std::make_index_sequence<sizeof...(Requirements)>());
52+
}
53+
54+
template <typename... Requirements>
55+
constexpr bool has_dependencies() {
56+
return std::disjunction_v<std::is_same<event, Requirements>...>;
57+
}
58+
59+
} // namespace khr
60+
//#endif
61+
} // namespace _V1
62+
} // namespace sycl

0 commit comments

Comments
 (0)