@@ -276,8 +276,6 @@ static void ggml_backend_webgpu_build_and_enqueue(webgpu_context &
276276 bool submit_imm = false ) {
277277 webgpu_param_bufs params_bufs = ctx->param_buf_pool .alloc_bufs ();
278278
279- std::lock_guard<std::recursive_mutex> lock (ctx->mutex );
280-
281279 ggml_backend_webgpu_map_buffer (ctx, params_bufs.host_buf , wgpu::MapMode::Write, 0 , params_bufs.host_buf .GetSize ());
282280 uint32_t * _params = (uint32_t *) params_bufs.host_buf .GetMappedRange ();
283281 for (size_t i = 0 ; i < params.size (); i++) {
@@ -318,6 +316,8 @@ static void ggml_backend_webgpu_build_and_enqueue(webgpu_context &
318316 ctx->param_buf_pool .free_bufs ({ params_bufs });
319317 });
320318 } else {
319+ // Lock the context mutex when pushing to the staging vectors.
320+ std::lock_guard<std::recursive_mutex> lock (ctx->mutex );
321321 // Enqueue commands and only submit if we have enough staged commands
322322 ctx->staged_command_bufs .push_back (commands);
323323 ctx->staged_param_bufs .push_back (params_bufs);
@@ -564,7 +564,6 @@ static void ggml_backend_webgpu_buffer_set_tensor(ggml_backend_buffer_t buffer,
564564
565565 size_t total_offset = webgpu_tensor_offset (tensor) + tensor->view_offs + offset;
566566
567- std::lock_guard<std::recursive_mutex> lock (webgpu_ctx->mutex );
568567 webgpu_ctx->queue .WriteBuffer (buf_ctx->buffer , total_offset, data, (size / 4 ) * 4 );
569568
570569 if (size % 4 != 0 ) {
@@ -622,11 +621,8 @@ static void ggml_backend_webgpu_buffer_get_tensor(ggml_backend_buffer_t buffer,
622621 encoder.CopyBufferToBuffer (buf_ctx->buffer , total_offset, webgpu_ctx->get_tensor_staging_buf , 0 , final_size);
623622 wgpu::CommandBuffer commands = encoder.Finish ();
624623
625- {
626- std::lock_guard<std::recursive_mutex> submit_lock (webgpu_ctx->mutex );
627- // Submit the command buffer to the queue
628- webgpu_ctx->queue .Submit (1 , &commands);
629- }
624+ // Submit the command buffer to the queue
625+ webgpu_ctx->queue .Submit (1 , &commands);
630626
631627 // Map the staging buffer to read the data
632628 ggml_backend_webgpu_map_buffer (webgpu_ctx, webgpu_ctx->get_tensor_staging_buf , wgpu::MapMode::Read, 0 , final_size);
@@ -775,7 +771,7 @@ static ggml_backend_t ggml_backend_webgpu_device_init(ggml_backend_dev_t dev, co
775771 std::lock_guard<std::mutex> lock (webgpu_ctx->init_mutex );
776772 if (!webgpu_ctx->device_init ) {
777773 // Initialize device
778- std::vector<wgpu::FeatureName> required_features = { wgpu::FeatureName::ShaderF16 };
774+ std::vector<wgpu::FeatureName> required_features = { wgpu::FeatureName::ShaderF16, wgpu::FeatureName::ImplicitDeviceSynchronization };
779775 wgpu::DeviceDescriptor dev_desc;
780776 dev_desc.requiredLimits = &webgpu_ctx->limits ;
781777 dev_desc.requiredFeatures = required_features.data ();
0 commit comments