5656# define WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES 16 // e.g. enough for two timestamps
5757#endif
5858
59+ #ifdef GGML_WEBGPU_SERIALIZE_SUBMIT
60+ # define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1
61+ # define WEBGPU_WAIT_ANY_TIMEOUT_MS UINT64_MAX
62+ #else
63+ # define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8
64+ # define WEBGPU_WAIT_ANY_TIMEOUT_MS 1
65+ #endif
66+
5967/* Constants */
6068
61- #define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8
62- #define WEBGPU_WAIT_ANY_BATCH_SIZE 64
6369#define WEBGPU_MUL_MAT_WG_SIZE 256
6470#define WEBGPU_NUM_PARAM_BUFS 100
6571#define WEBGPU_PARAMS_BUF_SIZE_BYTES 128 // enough for 32 parameters
@@ -297,14 +303,12 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device,
297303/* * WebGPU Actions */
298304
299305// Wait for the queue to finish processing all submitted work
300- static void ggml_backend_webgpu_wait (webgpu_context & ctx,
306+ static void ggml_backend_webgpu_wait (webgpu_context & ctx,
301307 std::vector<std::vector<wgpu::FutureWaitInfo>> & futures,
302- uint64_t timeout_ms = UINT64_MAX) {
303- // WebGPU implementations may limit the number of futures that can be waited on at once,
304- // so wait in batches (64 is what Dawn supports).
308+ uint64_t timeout_ms = UINT64_MAX) {
305309 size_t i = 0 ;
306310 while (i < futures.size ()) {
307- auto waitStatus = ctx->instance .WaitAny (futures[i].size (), futures[i].data (), timeout_ms);
311+ auto waitStatus = ctx->instance .WaitAny (futures[i].size (), futures[i].data (), timeout_ms);
308312 switch (waitStatus) {
309313 case wgpu::WaitStatus::Success:
310314 futures.erase (futures.begin () + i);
@@ -381,25 +385,25 @@ static void ggml_backend_webgpu_debug(webgpu_context & ctx) {
381385#endif
382386
383387static std::vector<wgpu::FutureWaitInfo> ggml_backend_webgpu_submit (webgpu_context ctx,
384- std::vector<webgpu_command> builds ) {
385- std::vector<wgpu::CommandBuffer> commands ;
388+ std::vector<webgpu_command> commands ) {
389+ std::vector<wgpu::CommandBuffer> command_buffers ;
386390 std::vector<webgpu_pool_bufs> params_bufs;
387391 std::vector<webgpu_pool_bufs> set_rows_error_bufs;
388392#ifdef GGML_WEBGPU_GPU_PROFILE
389393 std::vector<std::pair<std::string, webgpu_pool_bufs>> pipeline_name_and_ts_bufs;
390394#endif
391395
392- for (const auto & build : builds ) {
393- commands .push_back (build .commands );
394- params_bufs.push_back (build .params_bufs );
395- if (build .set_rows_error_bufs ) {
396- set_rows_error_bufs.push_back (build .set_rows_error_bufs .value ());
396+ for (const auto & command : commands ) {
397+ command_buffers .push_back (command .commands );
398+ params_bufs.push_back (command .params_bufs );
399+ if (command .set_rows_error_bufs ) {
400+ set_rows_error_bufs.push_back (command .set_rows_error_bufs .value ());
397401 }
398402#ifdef GGML_WEBGPU_GPU_PROFILE
399- pipeline_name_and_ts_bufs.push_back ({ build .pipeline_name , build .timestamp_query_bufs });
403+ pipeline_name_and_ts_bufs.push_back ({ command .pipeline_name , command .timestamp_query_bufs });
400404#endif
401405 }
402- ctx->queue .Submit (commands .size (), commands .data ());
406+ ctx->queue .Submit (command_buffers .size (), command_buffers .data ());
403407
404408 std::vector<wgpu::FutureWaitInfo> futures;
405409
@@ -1205,7 +1209,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str
12051209 WEBGPU_CPU_PROFILE_TOTAL_START (graph_compute);
12061210
12071211 WEBGPU_CPU_PROFILE_DETAIL_START (graph_compute_encode);
1208- std::vector<webgpu_command> commands;
1212+ std::vector<webgpu_command> commands;
12091213 std::vector<std::vector<wgpu::FutureWaitInfo>> futures;
12101214 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
12111215 if (auto cmd = ggml_webgpu_encode_node (ctx, cgraph->nodes [i])) {
@@ -1214,7 +1218,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str
12141218 if (commands.size () >= WEBGPU_COMMAND_SUBMIT_BATCH_SIZE) {
12151219 std::vector<wgpu::FutureWaitInfo> new_futures = ggml_backend_webgpu_submit (ctx, commands);
12161220 // check if previous futures have finished
1217- ggml_backend_webgpu_wait (ctx, futures);
1221+ ggml_backend_webgpu_wait (ctx, futures, WEBGPU_WAIT_ANY_TIMEOUT_MS );
12181222 futures.push_back ({ new_futures });
12191223 commands.clear ();
12201224 }
0 commit comments