Skip to content

Commit 26c44f8

Browse files
committed
Add serializing of command submission in some cases
1 parent 98d98a2 commit 26c44f8

File tree

4 files changed

+29
-20
lines changed

4 files changed

+29
-20
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ jobs:
485485
id: cmake_build
486486
run: |
487487
export Dawn_DIR=dawn/lib64/cmake/Dawn
488-
cmake -B build -DGGML_WEBGPU=ON
488+
cmake -B build -DGGML_WEBGPU=ON -DGGML_WEBGPU_SERIALIZE_SUBMIT=ON
489489
cmake --build build --config Release -j $(nproc)
490490
491491
- name: Test

ggml/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,10 @@ option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug in
221221
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
222222
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
223223
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
224+
option(GGML_WEBGPU_SERIALIZE_SUBMIT "ggml: enable WebGPU command serialization" OFF)
224225
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
225226
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
226-
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
227+
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
227228

228229
option(GGML_ZDNN "ggml: use zDNN" OFF)
229230
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ else()
4646
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
4747
endif()
4848

49+
if (GGML_WEBGPU_SERIALIZE_SUBMIT)
50+
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_SERIALIZE_SUBMIT=1)
51+
endif()
52+
4953
if (GGML_WEBGPU_DEBUG)
5054
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
5155
endif()

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,16 @@
5656
# define WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES 16 // e.g. enough for two timestamps
5757
#endif
5858

59+
#ifdef GGML_WEBGPU_SERIALIZE_SUBMIT
60+
# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1
61+
# define WEBGPU_WAIT_ANY_TIMEOUT_MS UINT64_MAX
62+
#else
63+
# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8
64+
# define WEBGPU_WAIT_ANY_TIMEOUT_MS 1
65+
#endif
66+
5967
/* Constants */
6068

61-
#define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8
62-
#define WEBGPU_WAIT_ANY_BATCH_SIZE 64
6369
#define WEBGPU_MUL_MAT_WG_SIZE 256
6470
#define WEBGPU_NUM_PARAM_BUFS 100
6571
#define WEBGPU_PARAMS_BUF_SIZE_BYTES 128 // enough for 32 parameters
@@ -297,14 +303,12 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device,
297303
/** WebGPU Actions */
298304

299305
// Wait for the queue to finish processing all submitted work
300-
static void ggml_backend_webgpu_wait(webgpu_context & ctx,
306+
static void ggml_backend_webgpu_wait(webgpu_context & ctx,
301307
std::vector<std::vector<wgpu::FutureWaitInfo>> & futures,
302-
uint64_t timeout_ms = UINT64_MAX) {
303-
// WebGPU implementations may limit the number of futures that can be waited on at once,
304-
// so wait in batches (64 is what Dawn supports).
308+
uint64_t timeout_ms = UINT64_MAX) {
305309
size_t i = 0;
306310
while (i < futures.size()) {
307-
auto waitStatus = ctx->instance.WaitAny(futures[i].size(), futures[i].data(), timeout_ms);
311+
auto waitStatus = ctx->instance.WaitAny(futures[i].size(), futures[i].data(), timeout_ms);
308312
switch (waitStatus) {
309313
case wgpu::WaitStatus::Success:
310314
futures.erase(futures.begin() + i);
@@ -381,25 +385,25 @@ static void ggml_backend_webgpu_debug(webgpu_context & ctx) {
381385
#endif
382386

383387
static std::vector<wgpu::FutureWaitInfo> ggml_backend_webgpu_submit(webgpu_context ctx,
384-
std::vector<webgpu_command> builds) {
385-
std::vector<wgpu::CommandBuffer> commands;
388+
std::vector<webgpu_command> commands) {
389+
std::vector<wgpu::CommandBuffer> command_buffers;
386390
std::vector<webgpu_pool_bufs> params_bufs;
387391
std::vector<webgpu_pool_bufs> set_rows_error_bufs;
388392
#ifdef GGML_WEBGPU_GPU_PROFILE
389393
std::vector<std::pair<std::string, webgpu_pool_bufs>> pipeline_name_and_ts_bufs;
390394
#endif
391395

392-
for (const auto & build : builds) {
393-
commands.push_back(build.commands);
394-
params_bufs.push_back(build.params_bufs);
395-
if (build.set_rows_error_bufs) {
396-
set_rows_error_bufs.push_back(build.set_rows_error_bufs.value());
396+
for (const auto & command : commands) {
397+
command_buffers.push_back(command.commands);
398+
params_bufs.push_back(command.params_bufs);
399+
if (command.set_rows_error_bufs) {
400+
set_rows_error_bufs.push_back(command.set_rows_error_bufs.value());
397401
}
398402
#ifdef GGML_WEBGPU_GPU_PROFILE
399-
pipeline_name_and_ts_bufs.push_back({ build.pipeline_name, build.timestamp_query_bufs });
403+
pipeline_name_and_ts_bufs.push_back({ command.pipeline_name, command.timestamp_query_bufs });
400404
#endif
401405
}
402-
ctx->queue.Submit(commands.size(), commands.data());
406+
ctx->queue.Submit(command_buffers.size(), command_buffers.data());
403407

404408
std::vector<wgpu::FutureWaitInfo> futures;
405409

@@ -1205,7 +1209,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str
12051209
WEBGPU_CPU_PROFILE_TOTAL_START(graph_compute);
12061210

12071211
WEBGPU_CPU_PROFILE_DETAIL_START(graph_compute_encode);
1208-
std::vector<webgpu_command> commands;
1212+
std::vector<webgpu_command> commands;
12091213
std::vector<std::vector<wgpu::FutureWaitInfo>> futures;
12101214
for (int i = 0; i < cgraph->n_nodes; i++) {
12111215
if (auto cmd = ggml_webgpu_encode_node(ctx, cgraph->nodes[i])) {
@@ -1214,7 +1218,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str
12141218
if (commands.size() >= WEBGPU_COMMAND_SUBMIT_BATCH_SIZE) {
12151219
std::vector<wgpu::FutureWaitInfo> new_futures = ggml_backend_webgpu_submit(ctx, commands);
12161220
// check if previous futures have finished
1217-
ggml_backend_webgpu_wait(ctx, futures);
1221+
ggml_backend_webgpu_wait(ctx, futures, WEBGPU_WAIT_ANY_TIMEOUT_MS);
12181222
futures.push_back({ new_futures });
12191223
commands.clear();
12201224
}

0 commit comments

Comments
 (0)