diff --git a/.deny.toml b/.deny.toml index 80fc40130f1..e3e0f3e2ea3 100644 --- a/.deny.toml +++ b/.deny.toml @@ -13,6 +13,9 @@ skip-tree = [ { name = "bit-set", version = "0.5.3" }, { name = "bit-vec", version = "0.6.3" }, { name = "capacity_builder", version = "0.1.3" }, + + # Winit 0.30 uses an older objc2 + { name = "objc2-foundation", version = "0.2" }, ] skip = [ # Flume uses an old version diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a8a963cfc6..004e75c31d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,10 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913). - Fixed a bug where access to matrices with 2 rows would not work in some cases. By @andyleiserson in [#7438](https://github.com/gfx-rs/wgpu/pull/7438). +#### Metal + +- Implements ray-tracing acceleration structures for metal backend. By @lichtso in [#8071](https://github.com/gfx-rs/wgpu/pull/8071). + ### Bug Fixes #### General @@ -602,6 +606,9 @@ By @cwfitzgerald in [#6811](https://github.com/gfx-rs/wgpu/pull/6811), [#6815](h - Move incrementation of `Device::last_acceleration_structure_build_command_index` into queue submit. By @Vecvec in [#7462](https://github.com/gfx-rs/wgpu/pull/7462). - Implement indirect draw validation. By @teoxoy in [#7140](https://github.com/gfx-rs/wgpu/pull/7140) +#### Metal +- Use autogenerated `objc2` bindings internally, which should resolve a lot of leaks and unsoundness. By @madsmtm in [#5641](https://github.com/gfx-rs/wgpu/pull/5641). + #### Vulkan - Stop naga causing undefined behavior when a ray query misses. By @Vecvec in [#6752](https://github.com/gfx-rs/wgpu/pull/6752). diff --git a/Cargo.lock b/Cargo.lock index f7175cce623..f0a17ffe89c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,12 +442,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "block" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" - [[package]] name = "block-sys" version = "0.2.1" @@ -476,6 +470,15 @@ dependencies = [ "objc2 0.5.2", ] +[[package]] +name = "block2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d59b4c170e16f0405a2e95aff44432a0d41aa97675f3d52623effe95792a037" +dependencies = [ + "objc2 0.6.0", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -872,16 +875,6 @@ dependencies = [ "libc", ] -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -895,8 +888,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" dependencies = [ "bitflags 1.3.2", - "core-foundation 0.9.4", - "core-graphics-types 0.1.3", + "core-foundation", + "core-graphics-types", "foreign-types", "libc", ] @@ -908,18 +901,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" dependencies = [ "bitflags 1.3.2", - "core-foundation 0.9.4", - "libc", -] - -[[package]] -name = "core-graphics-types" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" -dependencies = [ - "bitflags 2.9.1", - "core-foundation 0.10.1", + "core-foundation", "libc", ] @@ -1779,7 +1761,7 @@ dependencies = [ "bitflags 2.9.1", "cfg_aliases 0.1.1", "cgl", - "core-foundation 0.9.4", + "core-foundation", "dispatch", "glutin_egl_sys", "glutin_wgl_sys 0.5.0", @@ -2384,15 +2366,6 @@ version = "0.1.4+2024.11.22-df583a3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e3cd67e8ea2ba061339150970542cf1c60ba44c6d17e31279cbc133a4b018f8" -[[package]] -name = "malloc_buf" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" -dependencies = [ - "libc", -] - [[package]] name = "matchers" version = "0.1.0" @@ -2426,21 +2399,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "metal" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c15a6f673ff72ddcc22394663290f870fb224c1bfce55734a75c414150e605" -dependencies = [ - "bitflags 2.9.1", - "block", - "core-graphics-types 0.2.0", - "foreign-types", - "log", - "objc", - "paste", -] - [[package]] name = "minicov" version = "0.3.7" @@ -2766,15 +2724,6 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059c95245738cdc7b40078cdd51a23200252a4c0a0a6dd005136152b3f467a4a" -[[package]] -name = "objc" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" -dependencies = [ - "malloc_buf", -] - [[package]] name = "objc-sys" version = "0.3.5" @@ -2801,6 +2750,15 @@ dependencies = [ "objc2-encode 4.1.0", ] +[[package]] +name = "objc2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3531f65190d9cff863b77a99857e74c314dd16bf56c538c4b57c7cbc3f3a6e59" +dependencies = [ + "objc2-encode 4.1.0", +] + [[package]] name = "objc2-app-kit" version = "0.2.2" @@ -2813,8 +2771,8 @@ dependencies = [ "objc2 0.5.2", "objc2-core-data", "objc2-core-image", - "objc2-foundation", - "objc2-quartz-core", + "objc2-foundation 0.2.2", + "objc2-quartz-core 0.2.2", ] [[package]] @@ -2827,7 +2785,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2838,7 +2796,7 @@ checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2850,7 +2808,17 @@ dependencies = [ "bitflags 2.9.1", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", + "objc2-foundation 0.2.2", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daeaf60f25471d26948a1c2f840e3f7d86f4109e3af4e8e4b5cd70c39690d925" +dependencies = [ + "bitflags 2.9.1", + "objc2 0.6.0", ] [[package]] @@ -2861,8 +2829,8 @@ checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", - "objc2-metal", + "objc2-foundation 0.2.2", + "objc2-metal 0.2.2", ] [[package]] @@ -2874,7 +2842,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-contacts", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2902,6 +2870,17 @@ dependencies = [ "objc2 0.5.2", ] +[[package]] +name = "objc2-foundation" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a21c6c9014b82c39515db5b396f91645182611c97d24637cf56ac01e5f8d998" +dependencies = [ + "bitflags 2.9.1", + "objc2 0.6.0", + "objc2-core-foundation", +] + [[package]] name = "objc2-link-presentation" version = "0.2.2" @@ -2911,7 +2890,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-app-kit", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2923,7 +2902,19 @@ dependencies = [ "bitflags 2.9.1", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", + "objc2-foundation 0.2.2", +] + +[[package]] +name = "objc2-metal" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01c41bc8b0e50ea7a5304a56f25e0066f526e99641b46fd7b9ad4421dd35bff6" +dependencies = [ + "bitflags 2.9.1", + "block2 0.6.0", + "objc2 0.6.0", + "objc2-foundation 0.3.0", ] [[package]] @@ -2935,8 +2926,21 @@ dependencies = [ "bitflags 2.9.1", "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", - "objc2-metal", + "objc2-foundation 0.2.2", + "objc2-metal 0.2.2", +] + +[[package]] +name = "objc2-quartz-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb3794501bb1bee12f08dcad8c61f2a5875791ad1c6f47faa71a0f033f20071" +dependencies = [ + "bitflags 2.9.1", + "objc2 0.6.0", + "objc2-core-foundation", + "objc2-foundation 0.3.0", + "objc2-metal 0.3.0", ] [[package]] @@ -2946,7 +2950,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a684efe3dec1b305badae1a28f6555f6ddd3bb2c2267896782858d5a78404dc" dependencies = [ "objc2 0.5.2", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2962,9 +2966,9 @@ dependencies = [ "objc2-core-data", "objc2-core-image", "objc2-core-location", - "objc2-foundation", + "objc2-foundation 0.2.2", "objc2-link-presentation", - "objc2-quartz-core", + "objc2-quartz-core 0.2.2", "objc2-symbols", "objc2-uniform-type-identifiers", "objc2-user-notifications", @@ -2978,7 +2982,7 @@ checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" dependencies = [ "block2 0.5.1", "objc2 0.5.2", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -2991,7 +2995,7 @@ dependencies = [ "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", - "objc2-foundation", + "objc2-foundation 0.2.2", ] [[package]] @@ -3441,6 +3445,18 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "raw-window-metal" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40d213455a5f1dc59214213c7330e074ddf8114c9a42411eb890c767357ce135" +dependencies = [ + "objc2 0.6.0", + "objc2-core-foundation", + "objc2-foundation 0.3.0", + "objc2-quartz-core 0.3.0", +] + [[package]] name = "rayon" version = "1.10.0" @@ -5062,11 +5078,10 @@ dependencies = [ "ash", "bit-set 0.8.0", "bitflags 2.9.1", - "block", + "block2 0.6.0", "bytemuck", "cfg-if", "cfg_aliases 0.2.1", - "core-graphics-types 0.2.0", "env_logger", "glam", "glow", @@ -5083,10 +5098,13 @@ dependencies = [ "libloading", "log", "mach-dxcompiler-rs", - "metal", "naga", "ndk-sys 0.6.0+11769913", - "objc", + "objc2 0.6.0", + "objc2-core-foundation", + "objc2-foundation 0.3.0", + "objc2-metal 0.3.0", + "objc2-quartz-core 0.3.0", "ordered-float 5.0.0", "parking_lot", "portable-atomic", @@ -5095,6 +5113,7 @@ dependencies = [ "range-alloc", "raw-window-handle 0.5.2", "raw-window-handle 0.6.2", + "raw-window-metal", "renderdoc-sys", "rustc-hash", "smallvec", @@ -5727,7 +5746,7 @@ dependencies = [ "bytemuck", "calloop 0.12.4", "cfg_aliases 0.1.1", - "core-foundation 0.9.4", + "core-foundation", "core-graphics", "cursor-icon", "icrate", @@ -5778,7 +5797,7 @@ dependencies = [ "calloop 0.13.0", "cfg_aliases 0.2.1", "concurrent-queue", - "core-foundation 0.9.4", + "core-foundation", "core-graphics", "cursor-icon", "dpi", @@ -5788,7 +5807,7 @@ dependencies = [ "ndk 0.9.0", "objc2 0.5.2", "objc2-app-kit", - "objc2-foundation", + "objc2-foundation 0.2.2", "objc2-ui-kit", "orbclient", "percent-encoding", diff --git a/Cargo.toml b/Cargo.toml index 529a0e92035..7d0b33c970f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -194,10 +194,64 @@ which = "8" xshell = "0.2.2" # Metal dependencies -metal = "0.32" -block = "0.1.6" -core-graphics-types = "0.2" -objc = "0.2.5" +block2 = "0.6" +objc2 = "0.6" +objc2-core-foundation = { version = "0.3", default-features = false, features = [ + "std", + "CFCGTypes", +] } +objc2-foundation = { version = "0.3", default-features = false, features = [ + "std", + "NSError", + "NSProcessInfo", + "NSRange", + "NSString", +] } +objc2-metal = { version = "0.3", default-features = false, features = [ + "std", + "block2", + "MTLAllocation", + "MTLBlitCommandEncoder", + "MTLBlitPass", + "MTLBuffer", + "MTLCaptureManager", + "MTLCaptureScope", + "MTLCommandBuffer", + "MTLCommandEncoder", + "MTLCommandQueue", + "MTLComputeCommandEncoder", + "MTLComputePass", + "MTLComputePipeline", + "MTLCounters", + "MTLDepthStencil", + "MTLDevice", + "MTLDrawable", + "MTLEvent", + "MTLLibrary", + "MTLPipeline", + "MTLPixelFormat", + "MTLRenderCommandEncoder", + "MTLRenderPass", + "MTLRenderPipeline", + "MTLResource", + "MTLSampler", + "MTLStageInputOutputDescriptor", + "MTLTexture", + "MTLTypes", + "MTLVertexDescriptor", + "MTLAccelerationStructure", + "MTLAccelerationStructureTypes", + "MTLAccelerationStructureCommandEncoder", + "MTLResidencySet", +] } +objc2-quartz-core = { version = "0.3", default-features = false, features = [ + "std", + "objc2-core-foundation", + "CALayer", + "CAMetalLayer", + "objc2-metal", +] } +raw-window-metal = "1.0" # Vulkan dependencies android_system_properties = "0.1.1" diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index 7a3017848fa..eff41d74fff 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -391,8 +391,6 @@ async fn start(title: &str) { &context.device, &context.queue, ); - - window_loop.window.request_redraw(); } WindowEvent::KeyboardInput { event: diff --git a/examples/features/src/hello_triangle/mod.rs b/examples/features/src/hello_triangle/mod.rs index 89ef9864f5b..7d8e6e24d4a 100644 --- a/examples/features/src/hello_triangle/mod.rs +++ b/examples/features/src/hello_triangle/mod.rs @@ -98,8 +98,6 @@ async fn run(event_loop: EventLoop<()>, window: Window) { config.width = new_size.width.max(1); config.height = new_size.height.max(1); surface.configure(&device, &config); - // On macos the window needs to be redrawn manually after resizing - window.request_redraw(); } WindowEvent::RedrawRequested => { let frame = surface diff --git a/examples/features/src/hello_windows/mod.rs b/examples/features/src/hello_windows/mod.rs index 9885d240221..b52fb8db433 100644 --- a/examples/features/src/hello_windows/mod.rs +++ b/examples/features/src/hello_windows/mod.rs @@ -98,8 +98,6 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Arc, wgpu::Color // Recreate the swap chain with the new size if let Some(viewport) = viewports.get_mut(&window_id) { viewport.resize(&device, new_size); - // On macos the window needs to be redrawn manually after resizing - viewport.desc.window.request_redraw(); } } WindowEvent::RedrawRequested => { diff --git a/examples/features/src/ray_shadows/mod.rs b/examples/features/src/ray_shadows/mod.rs index 559ac32342a..66cf852e614 100644 --- a/examples/features/src/ray_shadows/mod.rs +++ b/examples/features/src/ray_shadows/mod.rs @@ -116,7 +116,7 @@ impl crate::framework::Example for Example { fn required_limits() -> wgpu::Limits { wgpu::Limits { - max_push_constant_size: 12, + max_push_constant_size: 16, ..wgpu::Limits::default() } .using_minimum_supported_acceleration_structure_values() @@ -210,7 +210,7 @@ impl crate::framework::Example for Example { bind_group_layouts: &[&bind_group_layout], push_constant_ranges: &[wgpu::PushConstantRange { stages: wgpu::ShaderStages::FRAGMENT, - range: 0..12, + range: 0..16, }], }); diff --git a/examples/features/src/ray_shadows/shader.wgsl b/examples/features/src/ray_shadows/shader.wgsl index 4ba5d42f795..3e8e8d0c3ed 100644 --- a/examples/features/src/ray_shadows/shader.wgsl +++ b/examples/features/src/ray_shadows/shader.wgsl @@ -35,6 +35,7 @@ var acc_struct: acceleration_structure; struct PushConstants { light: vec3, + padding: f32, } var pc: PushConstants; diff --git a/examples/features/src/ray_traced_triangle/mod.rs b/examples/features/src/ray_traced_triangle/mod.rs index 0df2e829d2b..5b5a87127a2 100644 --- a/examples/features/src/ray_traced_triangle/mod.rs +++ b/examples/features/src/ray_traced_triangle/mod.rs @@ -120,7 +120,7 @@ impl crate::framework::Example for Example { }); let index_buffer = device.create_buffer_init(&BufferInitDescriptor { - label: Some("vertex buffer"), + label: Some("index buffer"), contents: bytemuck::cast_slice(&indices), usage: BufferUsages::BLAS_INPUT, }); diff --git a/examples/features/src/uniform_values/mod.rs b/examples/features/src/uniform_values/mod.rs index 3ee86767255..c9b945c3182 100644 --- a/examples/features/src/uniform_values/mod.rs +++ b/examples/features/src/uniform_values/mod.rs @@ -210,7 +210,6 @@ impl WgpuContext { self.surface_config.width = new_size.width; self.surface_config.height = new_size.height; self.surface.configure(&self.device, &self.surface_config); - self.window.request_redraw(); } } @@ -278,7 +277,6 @@ async fn run(event_loop: EventLoop<()>, window: Arc) { WindowEvent::Resized(new_size) => { let wgpu_context_mut = wgpu_context.as_mut().unwrap(); wgpu_context_mut.resize(new_size); - wgpu_context_mut.window.request_redraw(); } WindowEvent::RedrawRequested => { let wgpu_context_ref = wgpu_context.as_ref().unwrap(); diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index e4553ff222b..dd57a1ae164 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -347,21 +347,32 @@ impl Global { let mut tlas_descriptors = Vec::with_capacity(tlas_storage.len()); - for &TlasStore { - internal: - UnsafeTlasStore { - ref tlas, - ref entries, - ref scratch_buffer_offset, - }, - .. - } in &tlas_storage + for ( + &TlasStore { + internal: + UnsafeTlasStore { + ref tlas, + ref entries, + ref scratch_buffer_offset, + }, + .. + }, + TlasBuild { + tlas: _, + dependencies, + }, + ) in tlas_storage.iter().zip(build_command.tlas_s_built.iter()) { if tlas.update_mode == wgt::AccelerationStructureUpdateMode::PreferUpdate { log::info!("only rebuild implemented") } + let dependencies_raw = dependencies + .iter() + .map(|blas| blas.try_raw(&snatch_guard).unwrap()) + .collect::>(); tlas_descriptors.push(hal::BuildAccelerationStructureDescriptor { entries, + dependencies: dependencies_raw, mode: hal::AccelerationStructureBuildMode::Build, flags: tlas.flags, source_acceleration_structure: None, @@ -984,6 +995,7 @@ fn map_blas<'a>( } Ok(hal::BuildAccelerationStructureDescriptor { entries, + dependencies: Vec::new(), mode: hal::AccelerationStructureBuildMode::Build, flags: blas.flags, source_acceleration_structure: None, diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs index b44a8719d3f..236e32e3e5b 100644 --- a/wgpu-core/src/device/ray_tracing.rs +++ b/wgpu-core/src/device/ray_tracing.rs @@ -63,7 +63,7 @@ impl Device { dyn hal::DynBuffer, > { format: desc.index_format.unwrap(), - buffer: None, + buffer: Some(self.zero_buffer.as_ref()), offset: 0, count, }); @@ -98,7 +98,7 @@ impl Device { } entries.push(hal::AccelerationStructureTriangles:: { - vertex_buffer: None, + vertex_buffer: Some(self.zero_buffer.as_ref()), vertex_format: desc.vertex_format, first_vertex: 0, vertex_count: desc.vertex_count, @@ -207,7 +207,7 @@ impl Device { &hal::GetAccelerationStructureBuildSizesDescriptor { entries: &hal::AccelerationStructureEntries::Instances( hal::AccelerationStructureInstances { - buffer: None, + buffer: Some(self.zero_buffer.as_ref()), offset: 0, count: desc.max_instances, }, diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 94dc0e07e0c..63aafc236fc 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -18,10 +18,9 @@ rust-version = "1.82.0" [package.metadata.docs.rs] # Ideally we would enable all the features. # -# However, the metal features fail to be documented because the docs.rs runner cross-compiling under -# x86_64-unknown-linux-gnu and metal-rs cannot compile in that environment at the moment. The same applies -# for the dx12 feature. -features = ["vulkan", "gles", "renderdoc"] +# However, the dx12 features fail to be documented because the docs.rs runner cross-compiling under +# x86_64-unknown-linux-gnu cannot compile in that environment at the moment. +features = ["metal", "vulkan", "gles", "renderdoc"] rustdoc-args = ["--cfg", "docsrs"] targets = [ "x86_64-unknown-linux-gnu", @@ -59,7 +58,7 @@ unexpected_cfgs = { level = "warn", check-cfg = [ # exclude the Vulkan backend on MacOS unless a separate feature `vulkan-portability` is enabled. In response # to these features, it enables features of platform specific crates. For example, the `vulkan` feature in wgpu-core # enables the `vulkan` feature in `wgpu-core-deps-windows-linux-android` which in turn enables the -# `vulkan` feature in `wgpu-hal` _only_ on those platforms. If you enable the `vulkan-portability` feature, it +# `vulkan` feature in `wgpu-hal` _only_ on those platforms. If you enable the `vulkan-portability` feature, it # will enable the `vulkan` feature in `wgpu-core-deps-apple`. The only way to do this is unfortunately to have # a separate crate for each platform category that participates in the feature unification. # @@ -74,15 +73,19 @@ metal = [ # Metal is only available on Apple platforms, therefore request MSL output also only if we target an Apple platform. "naga/msl-out", "dep:arrayvec", - "dep:block", - "dep:core-graphics-types", + "dep:block2", + "dep:bytemuck", "dep:hashbrown", "dep:libc", "dep:log", - "dep:metal", - "dep:objc", + "dep:objc2", + "dep:objc2-core-foundation", + "dep:objc2-foundation", + "dep:objc2-metal", + "dep:objc2-quartz-core", "dep:parking_lot", "dep:profiling", + "dep:raw-window-metal", ] vulkan = [ "naga/spv-out", @@ -99,6 +102,7 @@ vulkan = [ "dep:ordered-float", "dep:parking_lot", "dep:profiling", + "dep:raw-window-metal", "dep:smallvec", "dep:windows", "windows/Win32", @@ -115,7 +119,7 @@ gles = [ "dep:libloading", "dep:log", "dep:ndk-sys", - "dep:objc", + "dep:objc2", "dep:parking_lot", "dep:profiling", "dep:wasm-bindgen", @@ -274,10 +278,16 @@ mach-dxcompiler-rs = { workspace = true, optional = true } [target.'cfg(target_vendor = "apple")'.dependencies] # Backend: Metal -block = { workspace = true, optional = true } -core-graphics-types = { workspace = true, optional = true } -metal = { workspace = true, optional = true } -objc = { workspace = true, optional = true } +block2 = { workspace = true, optional = true } +bytemuck = { workspace = true, optional = true } +objc2 = { workspace = true, optional = true } +objc2-core-foundation = { workspace = true, optional = true } +objc2-foundation = { workspace = true, optional = true } +objc2-metal = { workspace = true, optional = true } +objc2-quartz-core = { workspace = true, optional = true } + +# backend: Metal + Vulkan +raw-window-metal = { workspace = true, optional = true } ######################### ### Platform: Android ### diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 2947d1a6014..233f1a3f353 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -493,13 +493,6 @@ impl Example { }]; let blas_entries = hal::AccelerationStructureEntries::Triangles(blas_triangles); - let mut tlas_entries = - hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { - buffer: None, - count: 3, - offset: 0, - }); - let blas_sizes = unsafe { device.get_acceleration_structure_build_sizes( &hal::GetAccelerationStructureBuildSizesDescriptor { @@ -509,6 +502,89 @@ impl Example { ) }; + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + allow_compaction: false, + }) + } + .unwrap(); + + let instances = [ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + ]; + + let instances_buffer_size = instances.len() * size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: wgpu_types::BufferUses::MAP_WRITE + | wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer); + assert!(mapping.is_coherent); + + instances_buffer + }; + + let mut tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: Some(&instances_buffer), + count: 3, + offset: 0, + }); + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; @@ -521,16 +597,6 @@ impl Example { ) }; - let blas = unsafe { - device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { - label: Some("blas"), - size: blas_sizes.acceleration_structure_size, - format: hal::AccelerationStructureFormat::BottomLevel, - allow_compaction: false, - }) - } - .unwrap(); - let tlas = unsafe { device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { label: Some("tlas"), @@ -657,74 +723,7 @@ impl Example { .unwrap() }; - let instances = [ - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: 0.0, - y: 0.0, - z: 0.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: -1.0, - y: -1.0, - z: -2.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: 1.0, - y: -1.0, - z: -2.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - ]; - - let instances_buffer_size = instances.len() * size_of::(); - - let instances_buffer = unsafe { - let instances_buffer = device - .create_buffer(&hal::BufferDescriptor { - label: Some("instances_buffer"), - size: instances_buffer_size as u64, - usage: wgpu_types::BufferUses::MAP_WRITE - | wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, - memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, - }) - .unwrap(); - - let mapping = device - .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) - .unwrap(); - ptr::copy_nonoverlapping( - instances.as_ptr() as *const u8, - mapping.ptr.as_ptr(), - instances_buffer_size, - ); - device.unmap_buffer(&instances_buffer); - assert!(mapping.is_coherent); - - instances_buffer - }; - if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { - i.buffer = Some(&instances_buffer); assert!( instances.len() <= i.count as usize, "Tlas allocation to small" @@ -755,6 +754,7 @@ impl Example { destination_acceleration_structure: &blas, scratch_buffer: &scratch_buffer, entries: &blas_entries, + dependencies: Vec::new(), source_acceleration_structure: None, scratch_buffer_offset: 0, }], @@ -784,6 +784,7 @@ impl Example { destination_acceleration_structure: &tlas, scratch_buffer: &scratch_buffer, entries: &tlas_entries, + dependencies: vec![&blas], source_acceleration_structure: None, scratch_buffer_offset: 0, }], @@ -928,6 +929,7 @@ impl Example { destination_acceleration_structure: &self.tlas, scratch_buffer: &self.scratch_buffer, entries: &hal::AccelerationStructureEntries::Instances(instances), + dependencies: vec![&self.blas], source_acceleration_structure: Some(&self.tlas), scratch_buffer_offset: 0, }], diff --git a/wgpu-hal/src/dynamic/command.rs b/wgpu-hal/src/dynamic/command.rs index 43205c01d2f..b4d067d5871 100644 --- a/wgpu-hal/src/dynamic/command.rs +++ b/wgpu-hal/src/dynamic/command.rs @@ -665,6 +665,11 @@ impl DynCommandEncoder for C { ::AccelerationStructure, > { entries, + dependencies: d + .dependencies + .iter() + .map(|dependency| dependency.expect_downcast_ref()) + .collect::>(), mode: d.mode, flags: d.flags, source_acceleration_structure: d diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs index ff4d77a3357..0a08f0821ee 100644 --- a/wgpu-hal/src/gles/egl.rs +++ b/wgpu-hal/src/gles/egl.rs @@ -1382,10 +1382,11 @@ impl crate::Surface for Surface { let window_ptr = handle.ns_view.as_ptr(); #[cfg(target_os = "macos")] let window_ptr = { - use objc::{msg_send, runtime::Object, sel, sel_impl}; + use objc2::msg_send; + use objc2::runtime::AnyObject; // ns_view always have a layer and don't need to verify that it exists. - let layer: *mut Object = - msg_send![handle.ns_view.as_ptr().cast::(), layer]; + let layer: *mut AnyObject = + msg_send![handle.ns_view.as_ptr().cast::(), layer]; layer.cast::() }; window_ptr diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index e6b4e0b0f89..59f45cd6858 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2537,6 +2537,7 @@ pub struct BuildAccelerationStructureDescriptor< A: DynAccelerationStructure + ?Sized, > { pub entries: &'a AccelerationStructureEntries<'a, B>, + pub dependencies: Vec<&'a A>, pub mode: AccelerationStructureBuildMode, pub flags: AccelerationStructureBuildFlags, pub source_acceleration_structure: Option<&'a A>, diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 7ab14ca76d2..f11ab3080cb 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -1,12 +1,14 @@ -use metal::{ - MTLArgumentBuffersTier, MTLCounterSamplingPoint, MTLFeatureSet, MTLGPUFamily, - MTLLanguageVersion, MTLPixelFormat, MTLReadWriteTextureTier, NSInteger, +use objc2::runtime::ProtocolObject; +use objc2_foundation::{NSOperatingSystemVersion, NSProcessInfo}; +use objc2_metal::{ + MTLArgumentBuffersTier, MTLCounterSamplingPoint, MTLDevice, MTLFeatureSet, MTLGPUFamily, + MTLIndirectAccelerationStructureInstanceDescriptor, MTLLanguageVersion, MTLPixelFormat, + MTLReadWriteTextureTier, }; -use objc::{class, msg_send, sel, sel_impl}; use parking_lot::Mutex; use wgt::{AstcBlock, AstcChannel}; -use alloc::sync::Arc; +use alloc::{string::ToString as _, sync::Arc}; use super::TimestampQuerySupport; @@ -27,7 +29,7 @@ use super::TimestampQuerySupport; /// . /// /// [new command buffer]: https://developer.apple.com/documentation/metal/mtlcommandqueue/makecommandbuffer()?language=objc -const MAX_COMMAND_BUFFERS: u64 = 4096; +const MAX_COMMAND_BUFFERS: usize = 4096; unsafe impl Send for super::Adapter {} unsafe impl Sync for super::Adapter {} @@ -51,7 +53,8 @@ impl crate::Adapter for super::Adapter { .shared .device .lock() - .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS); + .newCommandQueueWithMaxCommandBufferCount(MAX_COMMAND_BUFFERS) + .unwrap(); // Acquiring the meaning of timestamp ticks is hard with Metal! // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`). @@ -72,7 +75,14 @@ impl crate::Adapter for super::Adapter { // Based on: // * https://github.com/gfx-rs/wgpu/pull/2528 // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326 - let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") { + let timestamp_period = if self + .shared + .device + .lock() + .name() + .to_string() + .starts_with("Intel") + { 83.333 } else { // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs. @@ -107,6 +117,8 @@ impl crate::Adapter for super::Adapter { MTLReadWriteTextureTier::TierNone => (Tfc::empty(), Tfc::empty()), MTLReadWriteTextureTier::Tier1 => (Tfc::STORAGE_READ_WRITE, Tfc::empty()), MTLReadWriteTextureTier::Tier2 => (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE), + // Unknown levels of support are likely higher than Tier 2. + _ => (Tfc::STORAGE_READ_WRITE, Tfc::STORAGE_READ_WRITE), }; let msaa_count = pc.sample_count_mask; @@ -129,7 +141,7 @@ impl crate::Adapter for super::Adapter { ], ); - let image_atomic_if = if pc.msl_version >= MTLLanguageVersion::V3_1 { + let image_atomic_if = if pc.msl_version >= MTLLanguageVersion::Version3_1 { Tfc::STORAGE_ATOMIC } else { Tfc::empty() @@ -511,70 +523,68 @@ const DEPTH_CLIP_MODE: &[MTLFeatureSet] = &[ MTLFeatureSet::macOS_GPUFamily1_v1, ]; -const OS_NOT_SUPPORT: (usize, usize) = (10000, 0); +const OS_NOT_SUPPORT: (isize, isize) = (10000, 0); impl super::PrivateCapabilities { - fn supports_any(raw: &metal::DeviceRef, features_sets: &[MTLFeatureSet]) -> bool { + fn supports_any(raw: &ProtocolObject, features_sets: &[MTLFeatureSet]) -> bool { features_sets .iter() .cloned() - .any(|x| raw.supports_feature_set(x)) + .any(|x| raw.supportsFeatureSet(x)) } - pub fn new(device: &metal::Device) -> Self { - #[repr(C)] - #[derive(Clone, Copy, Debug)] - #[allow(clippy::upper_case_acronyms)] - struct NSOperatingSystemVersion { - major: usize, - minor: usize, - patch: usize, + pub fn new(device: &ProtocolObject) -> Self { + trait AtLeast { + fn at_least( + &self, + mac_version: (isize, isize), + ios_version: (isize, isize), + is_mac: bool, + ) -> bool; } - impl NSOperatingSystemVersion { + impl AtLeast for NSOperatingSystemVersion { fn at_least( &self, - mac_version: (usize, usize), - ios_version: (usize, usize), + mac_version: (isize, isize), + ios_version: (isize, isize), is_mac: bool, ) -> bool { if is_mac { - self.major > mac_version.0 - || (self.major == mac_version.0 && self.minor >= mac_version.1) + self.majorVersion > mac_version.0 + || (self.majorVersion == mac_version.0 + && self.minorVersion >= mac_version.1) } else { - self.major > ios_version.0 - || (self.major == ios_version.0 && self.minor >= ios_version.1) + self.majorVersion > ios_version.0 + || (self.majorVersion == ios_version.0 + && self.minorVersion >= ios_version.1) } } } - let version: NSOperatingSystemVersion = unsafe { - let process_info: *mut objc::runtime::Object = - msg_send![class!(NSProcessInfo), processInfo]; - msg_send![process_info, operatingSystemVersion] - }; + let version = NSProcessInfo::processInfo().operatingSystemVersion(); - let os_is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + let os_is_mac = device.supportsFeatureSet(MTLFeatureSet::macOS_GPUFamily1_v1); // Metal was first introduced in OS X 10.11 and iOS 8. The current version number of visionOS is 1.0.0. Additionally, // on the Simulator, Apple only provides the Apple2 GPU capability, and the Apple2+ GPU capability covers the capabilities of Apple2. // Therefore, the following conditions can be used to determine if it is visionOS. // https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator - let os_is_xr = version.major < 8 && device.supports_family(MTLGPUFamily::Apple2); + let os_is_xr = version.majorVersion < 8 && device.supportsFamily(MTLGPUFamily::Apple2); let family_check = os_is_xr || version.at_least((10, 15), (13, 0), os_is_mac); let mut sample_count_mask = crate::TextureFormatCapabilities::MULTISAMPLE_X4; // 1 and 4 samples are supported on all devices - if device.supports_texture_sample_count(2) { + if device.supportsTextureSampleCount(2) { sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X2; } - if device.supports_texture_sample_count(8) { + if device.supportsTextureSampleCount(8) { sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X8; } - if device.supports_texture_sample_count(16) { + if device.supportsTextureSampleCount(16) { sample_count_mask |= crate::TextureFormatCapabilities::MULTISAMPLE_X16; } let rw_texture_tier = if version.at_least((10, 13), (11, 0), os_is_mac) { - device.read_write_texture_support() + device.readWriteTextureSupport() } else if version.at_least((10, 12), OS_NOT_SUPPORT, os_is_mac) { if Self::supports_any(device, &[MTLFeatureSet::macOS_ReadWriteTextureTier2]) { MTLReadWriteTextureTier::Tier2 @@ -587,58 +597,58 @@ impl super::PrivateCapabilities { let mut timestamp_query_support = TimestampQuerySupport::empty(); if version.at_least((11, 0), (14, 0), os_is_mac) - && device.supports_counter_sampling(MTLCounterSamplingPoint::AtStageBoundary) + && device.supportsCounterSampling(MTLCounterSamplingPoint::AtStageBoundary) { // If we don't support at stage boundary, don't support anything else. timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES); - if device.supports_counter_sampling(MTLCounterSamplingPoint::AtDrawBoundary) { + if device.supportsCounterSampling(MTLCounterSamplingPoint::AtDrawBoundary) { timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER); } - if device.supports_counter_sampling(MTLCounterSamplingPoint::AtDispatchBoundary) { + if device.supportsCounterSampling(MTLCounterSamplingPoint::AtDispatchBoundary) { timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER); } - if device.supports_counter_sampling(MTLCounterSamplingPoint::AtBlitBoundary) { + if device.supportsCounterSampling(MTLCounterSamplingPoint::AtBlitBoundary) { timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER); } // `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags. } - let argument_buffers = device.argument_buffers_support(); + let argument_buffers = device.argumentBuffersSupport(); Self { family_check, msl_version: if os_is_xr || version.at_least((14, 0), (17, 0), os_is_mac) { - MTLLanguageVersion::V3_1 + MTLLanguageVersion::Version3_1 } else if version.at_least((13, 0), (16, 0), os_is_mac) { - MTLLanguageVersion::V3_0 + MTLLanguageVersion::Version3_0 } else if version.at_least((12, 0), (15, 0), os_is_mac) { - MTLLanguageVersion::V2_4 + MTLLanguageVersion::Version2_4 } else if version.at_least((11, 0), (14, 0), os_is_mac) { - MTLLanguageVersion::V2_3 + MTLLanguageVersion::Version2_3 } else if version.at_least((10, 15), (13, 0), os_is_mac) { - MTLLanguageVersion::V2_2 + MTLLanguageVersion::Version2_2 } else if version.at_least((10, 14), (12, 0), os_is_mac) { - MTLLanguageVersion::V2_1 + MTLLanguageVersion::Version2_1 } else if version.at_least((10, 13), (11, 0), os_is_mac) { - MTLLanguageVersion::V2_0 + MTLLanguageVersion::Version2_0 } else if version.at_least((10, 12), (10, 0), os_is_mac) { - MTLLanguageVersion::V1_2 + MTLLanguageVersion::Version1_2 } else if version.at_least((10, 11), (9, 0), os_is_mac) { - MTLLanguageVersion::V1_1 + MTLLanguageVersion::Version1_1 } else { - MTLLanguageVersion::V1_0 + MTLLanguageVersion::Version1_0 }, // macOS 10.11 doesn't support read-write resources fragment_rw_storage: version.at_least((10, 12), (8, 0), os_is_mac), read_write_texture_tier: rw_texture_tier, msaa_desktop: os_is_mac, msaa_apple3: if family_check { - device.supports_family(MTLGPUFamily::Apple3) + device.supportsFamily(MTLGPUFamily::Apple3) } else { - device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily3_v4) + device.supportsFeatureSet(MTLFeatureSet::iOS_GPUFamily3_v4) }, - msaa_apple7: family_check && device.supports_family(MTLGPUFamily::Apple7), + msaa_apple7: family_check && device.supportsFamily(MTLGPUFamily::Apple7), resource_heaps: Self::supports_any(device, RESOURCE_HEAP_SUPPORT), argument_buffers, shared_textures: !os_is_mac, @@ -653,16 +663,16 @@ impl super::PrivateCapabilities { BASE_VERTEX_FIRST_INSTANCE_SUPPORT, ), dual_source_blending: Self::supports_any(device, DUAL_SOURCE_BLEND_SUPPORT), - low_power: !os_is_mac || device.is_low_power(), - headless: os_is_mac && device.is_headless(), + low_power: !os_is_mac || device.isLowPower(), + headless: os_is_mac && device.isHeadless(), layered_rendering: Self::supports_any(device, LAYERED_RENDERING_SUPPORT), function_specialization: Self::supports_any(device, FUNCTION_SPECIALIZATION_SUPPORT), depth_clip_mode: Self::supports_any(device, DEPTH_CLIP_MODE), texture_cube_array: Self::supports_any(device, TEXTURE_CUBE_ARRAY_SUPPORT), supports_float_filtering: os_is_mac || (version.at_least((11, 0), (14, 0), os_is_mac) - && device.supports_32bit_float_filtering()), - format_depth24_stencil8: os_is_mac && device.d24_s8_supported(), + && device.supports32BitFloatFiltering()), + format_depth24_stencil8: os_is_mac && device.isDepth24Stencil8PixelFormatSupported(), format_depth32_stencil8_filter: os_is_mac, format_depth32_stencil8_none: !os_is_mac, format_min_srgb_channels: if os_is_mac { 4 } else { 1 }, @@ -670,14 +680,14 @@ impl super::PrivateCapabilities { format_bc: os_is_mac, format_eac_etc: !os_is_mac // M1 in macOS supports EAC/ETC2 - || (family_check && device.supports_family(MTLGPUFamily::Apple7)), + || (family_check && device.supportsFamily(MTLGPUFamily::Apple7)), // A8(Apple2) and later always support ASTC pixel formats - format_astc: (family_check && device.supports_family(MTLGPUFamily::Apple2)) + format_astc: (family_check && device.supportsFamily(MTLGPUFamily::Apple2)) || Self::supports_any(device, ASTC_PIXEL_FORMAT_FEATURES), // A13(Apple6) M1(Apple7) and later always support HDR ASTC pixel formats - format_astc_hdr: family_check && device.supports_family(MTLGPUFamily::Apple6), + format_astc_hdr: family_check && device.supportsFamily(MTLGPUFamily::Apple6), // Apple3 and later supports compressed volume texture formats including ASTC Sliced 3D - format_astc_3d: family_check && device.supports_family(MTLGPUFamily::Apple3), + format_astc_3d: family_check && device.supportsFamily(MTLGPUFamily::Apple3), format_any8_unorm_srgb_all: Self::supports_any(device, ANY8_UNORM_SRGB_ALL), format_any8_unorm_srgb_no_write: !Self::supports_any(device, ANY8_UNORM_SRGB_ALL) && !os_is_mac, @@ -732,10 +742,10 @@ impl super::PrivateCapabilities { max_buffers_per_stage: 31, max_vertex_buffers: 31.min(crate::MAX_VERTEX_BUFFERS as u32), max_textures_per_stage: if os_is_mac - || (family_check && device.supports_family(MTLGPUFamily::Apple6)) + || (family_check && device.supportsFamily(MTLGPUFamily::Apple6)) { 128 - } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + } else if family_check && device.supportsFamily(MTLGPUFamily::Apple4) { 96 } else { 31 @@ -743,21 +753,21 @@ impl super::PrivateCapabilities { max_samplers_per_stage: 16, max_binding_array_elements: if argument_buffers == MTLArgumentBuffersTier::Tier2 { 1_000_000 - } else if family_check && device.supports_family(MTLGPUFamily::Apple4) { + } else if family_check && device.supportsFamily(MTLGPUFamily::Apple4) { 96 } else { 31 }, max_sampler_binding_array_elements: if family_check - && device.supports_family(MTLGPUFamily::Apple9) + && device.supportsFamily(MTLGPUFamily::Apple9) { 500_000 } else if family_check - && (device.supports_family(MTLGPUFamily::Apple7) - || device.supports_family(MTLGPUFamily::Mac2)) + && (device.supportsFamily(MTLGPUFamily::Apple7) + || device.supportsFamily(MTLGPUFamily::Mac2)) { 1000 - } else if family_check && device.supports_family(MTLGPUFamily::Apple6) { + } else if family_check && device.supportsFamily(MTLGPUFamily::Apple6) { 128 } else { 16 @@ -765,8 +775,7 @@ impl super::PrivateCapabilities { buffer_alignment: if os_is_mac || os_is_xr { 256 } else { 64 }, max_buffer_size: if version.at_least((10, 14), (12, 0), os_is_mac) { // maxBufferLength available on macOS 10.14+ and iOS 12.0+ - let buffer_size: NSInteger = unsafe { msg_send![device.as_ref(), maxBufferLength] }; - buffer_size as _ + device.maxBufferLength() as u64 } else if os_is_mac { 1 << 30 // 1GB on macOS 10.11 and up } else { @@ -787,7 +796,7 @@ impl super::PrivateCapabilities { max_texture_3d_size: 2048, max_texture_layers: 2048, max_fragment_input_components: if os_is_mac - || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1) + || device.supportsFeatureSet(MTLFeatureSet::iOS_GPUFamily4_v1) { 124 } else { @@ -807,14 +816,13 @@ impl super::PrivateCapabilities { }, // Per https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf max_color_attachment_bytes_per_sample: if family_check - && device.supports_family(MTLGPUFamily::Apple4) + && device.supportsFamily(MTLGPUFamily::Apple4) { 64 } else { 32 }, - max_varying_components: if device - .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) + max_varying_components: if device.supportsFeatureSet(MTLFeatureSet::macOS_GPUFamily1_v1) { 124 } else { @@ -852,8 +860,8 @@ impl super::PrivateCapabilities { ], ), supports_binary_archives: family_check - && (device.supports_family(MTLGPUFamily::Apple3) - || device.supports_family(MTLGPUFamily::Mac1)), + && (device.supportsFamily(MTLGPUFamily::Apple3) + || device.supportsFamily(MTLGPUFamily::Mac1)), supports_capture_manager: version.at_least((10, 13), (11, 0), os_is_mac), can_set_maximum_drawables_count: version.at_least((10, 14), (11, 2), os_is_mac), can_set_display_sync: version.at_least((10, 13), OS_NOT_SUPPORT, os_is_mac), @@ -867,40 +875,45 @@ impl super::PrivateCapabilities { ], ), supports_arrays_of_textures_write: family_check - && (device.supports_family(MTLGPUFamily::Apple6) - || device.supports_family(MTLGPUFamily::Mac1) - || device.supports_family(MTLGPUFamily::MacCatalyst1)), + && (device.supportsFamily(MTLGPUFamily::Apple6) + || device.supportsFamily(MTLGPUFamily::Mac1) + || device.supportsFamily(MTLGPUFamily::MacCatalyst1)), supports_mutability: version.at_least((10, 13), (11, 0), os_is_mac), //Depth clipping is supported on all macOS GPU families and iOS family 4 and later supports_depth_clip_control: os_is_mac - || device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily4_v1), + || device.supportsFeatureSet(MTLFeatureSet::iOS_GPUFamily4_v1), supports_preserve_invariance: version.at_least((11, 0), (13, 0), os_is_mac), // Metal 2.2 on mac, 2.3 on iOS. supports_shader_primitive_index: version.at_least((10, 15), (14, 0), os_is_mac), has_unified_memory: if version.at_least((10, 15), (13, 0), os_is_mac) { - Some(device.has_unified_memory()) + Some(device.hasUnifiedMemory()) } else { None }, timestamp_query_support, supports_simd_scoped_operations: family_check - && (device.supports_family(MTLGPUFamily::Metal3) - || device.supports_family(MTLGPUFamily::Mac2) - || device.supports_family(MTLGPUFamily::Apple7)), + && (device.supportsFamily(MTLGPUFamily::Metal3) + || device.supportsFamily(MTLGPUFamily::Mac2) + || device.supportsFamily(MTLGPUFamily::Apple7)), // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=5 int64: family_check - && (device.supports_family(MTLGPUFamily::Apple3) - || device.supports_family(MTLGPUFamily::Metal3)), + && (device.supportsFamily(MTLGPUFamily::Apple3) + || device.supportsFamily(MTLGPUFamily::Metal3)), // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 int64_atomics: family_check - && ((device.supports_family(MTLGPUFamily::Apple8) - && device.supports_family(MTLGPUFamily::Mac2)) - || device.supports_family(MTLGPUFamily::Apple9)), + && ((device.supportsFamily(MTLGPUFamily::Apple8) + && device.supportsFamily(MTLGPUFamily::Mac2)) + || device.supportsFamily(MTLGPUFamily::Apple9)), // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 float_atomics: family_check - && (device.supports_family(MTLGPUFamily::Apple7) - || device.supports_family(MTLGPUFamily::Mac2)), + && (device.supportsFamily(MTLGPUFamily::Apple7) + || device.supportsFamily(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), + supports_raytracing: if version.at_least((15, 0), (18, 0), os_is_mac) { + device.supportsRaytracing() && unsafe { device.supportsRaytracingFromRender() } + } else { + false + }, } } @@ -945,7 +958,7 @@ impl super::PrivateCapabilities { ); features.set( F::DUAL_SOURCE_BLENDING, - self.msl_version >= MTLLanguageVersion::V1_2 && self.dual_source_blending, + self.msl_version >= MTLLanguageVersion::Version1_2 && self.dual_source_blending, ); features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); @@ -965,29 +978,29 @@ impl super::PrivateCapabilities { | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING | F::STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING | F::PARTIALLY_BOUND_BINDING_ARRAY, - self.msl_version >= MTLLanguageVersion::V3_0 + self.msl_version >= MTLLanguageVersion::Version3_0 && self.supports_arrays_of_textures - && self.argument_buffers as u64 >= MTLArgumentBuffersTier::Tier2 as u64, + && self.argument_buffers >= MTLArgumentBuffersTier::Tier2, ); features.set( F::SHADER_INT64, - self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, + self.int64 && self.msl_version >= MTLLanguageVersion::Version2_3, ); features.set( F::SHADER_INT64_ATOMIC_MIN_MAX, - self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::Version2_4, ); features.set( F::TEXTURE_INT64_ATOMIC, - self.int64_atomics && self.msl_version >= MTLLanguageVersion::V3_1, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::Version3_1, ); features.set( F::TEXTURE_ATOMIC, - self.msl_version >= MTLLanguageVersion::V3_1, + self.msl_version >= MTLLanguageVersion::Version3_1, ); features.set( F::SHADER_FLOAT32_ATOMIC, - self.float_atomics && self.msl_version >= MTLLanguageVersion::V3_0, + self.float_atomics && self.msl_version >= MTLLanguageVersion::Version3_0, ); features.set( @@ -1002,6 +1015,8 @@ impl super::PrivateCapabilities { features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); } + features.set(F::EXPERIMENTAL_RAY_QUERY, self.supports_raytracing); + features } @@ -1083,16 +1098,17 @@ impl super::PrivateCapabilities { max_mesh_multiview_count: 0, max_mesh_output_layers: 0, - max_blas_primitive_count: 0, // When added: 2^28 from https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/extendedlimits - max_blas_geometry_count: 0, // When added: 2^24 - max_tlas_instance_count: 0, // When added: 2^24 + // from https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/extendedlimits + max_blas_primitive_count: 1 << 28, + max_blas_geometry_count: 1 << 24, + max_tlas_instance_count: 1 << 24, // Unsure what this will be when added: acceleration structures count as a buffer so // it may be worth using argument buffers for this all acceleration structures, then // there will be no limit. // From 2.17.7 in https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf // > [Acceleration structures] are opaque objects that can be bound directly using // buffer binding points or via argument buffers - max_acceleration_structures_per_shader_stage: 0, + max_acceleration_structures_per_shader_stage: self.max_buffers_per_stage, }, alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), @@ -1101,8 +1117,9 @@ impl super::PrivateCapabilities { // Metal Shading Language it generates, so from `wgpu_hal`'s // users' point of view, references are tightly checked. uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), - raw_tlas_instance_size: 0, - ray_tracing_scratch_buffer_alignment: 0, + raw_tlas_instance_size: size_of::( + ), + ray_tracing_scratch_buffer_alignment: 1, }, downlevel, } @@ -1279,11 +1296,11 @@ impl super::PrivateCapabilities { } impl super::PrivateDisabilities { - pub fn new(device: &metal::Device) -> Self { - let is_intel = device.name().starts_with("Intel"); + pub fn new(device: &ProtocolObject) -> Self { + let is_intel = device.name().to_string().starts_with("Intel"); Self { broken_viewport_near_depth: is_intel - && !device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v4), + && !device.supportsFeatureSet(MTLFeatureSet::macOS_GPUFamily1_v4), broken_layered_clear_image: is_intel, } } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 72a799a0275..6a53ad4fe37 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1,14 +1,24 @@ -use super::{conv, AsNative, TimestampQuerySupport}; +use hashbrown::HashSet; +use objc2::{ + rc::{autoreleasepool, Retained}, + runtime::ProtocolObject, +}; +use objc2_foundation::{NSRange, NSString}; +use objc2_metal::{ + MTLAccelerationStructureCommandEncoder, MTLBlitCommandEncoder, MTLBlitPassDescriptor, + MTLCommandBuffer, MTLCommandEncoder, MTLCommandQueue, MTLComputeCommandEncoder, + MTLComputePassDescriptor, MTLCounterDontSample, MTLLoadAction, MTLPrimitiveType, + MTLRenderCommandEncoder, MTLRenderPassDescriptor, MTLResidencySet, MTLScissorRect, MTLSize, + MTLStoreAction, MTLTexture, MTLViewport, MTLVisibilityResultMode, +}; + +use super::{conv, TimestampQuerySupport}; use crate::CommandEncoder as _; use alloc::{ borrow::{Cow, ToOwned as _}, vec::Vec, }; -use core::ops::Range; -use metal::{ - MTLIndexType, MTLLoadAction, MTLPrimitiveType, MTLScissorRect, MTLSize, MTLStoreAction, - MTLViewport, MTLVisibilityResultMode, NSRange, -}; +use core::{ops::Range, ptr::NonNull}; // has to match `Temp::binding_sizes` const WORD_SIZE: usize = 4; @@ -17,12 +27,18 @@ impl Default for super::CommandState { fn default() -> Self { Self { blit: None, + acceleration_structure_builder: None, render: None, compute: None, raw_primitive_type: MTLPrimitiveType::Point, index: None, - raw_wg_size: MTLSize::new(0, 0, 0), + raw_wg_size: MTLSize { + width: 0, + depth: 0, + height: 0, + }, stage_infos: Default::default(), + residency_sets: Default::default(), storage_buffer_length_map: Default::default(), vertex_buffer_size_map: Default::default(), work_group_memory_sizes: Vec::new(), @@ -33,8 +49,9 @@ impl Default for super::CommandState { } impl super::CommandEncoder { - fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { + fn enter_blit(&mut self) -> Retained> { if self.state.blit.is_none() { + self.leave_acceleration_structure_builder(); debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); @@ -60,47 +77,48 @@ impl super::CommandEncoder { .contains(TimestampQuerySupport::ON_BLIT_ENCODER); if !self.state.pending_timer_queries.is_empty() && !supports_sample_counters_in_buffer { - objc::rc::autoreleasepool(|| { - let descriptor = metal::BlitPassDescriptor::new(); + autoreleasepool(|_| unsafe { + let descriptor = MTLBlitPassDescriptor::new(); let mut last_query = None; for (i, (set, index)) in self.state.pending_timer_queries.drain(..).enumerate() { let sba_descriptor = descriptor - .sample_buffer_attachments() - .object_at(i as _) - .unwrap(); + .sampleBufferAttachments() + .objectAtIndexedSubscript(i); sba_descriptor - .set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + .setSampleBuffer(Some(set.counter_sample_buffer.as_ref().unwrap())); // Here be dragons: // As mentioned above, for some reasons using the start of the encoder won't yield any results sometimes! - sba_descriptor - .set_start_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); - sba_descriptor.set_end_of_encoder_sample_index(index as _); + sba_descriptor.setStartOfEncoderSampleIndex(MTLCounterDontSample); + sba_descriptor.setEndOfEncoderSampleIndex(index as _); last_query = Some((set, index)); } - let encoder = cmd_buf.blit_command_encoder_with_descriptor(descriptor); + let encoder = cmd_buf + .blitCommandEncoderWithDescriptor(&descriptor) + .unwrap(); // As explained above, we need to do some write: // Conveniently, we have a buffer with every query set, that we can use for this for a dummy write, // since we know that it is going to be overwritten again on timer resolve and HAL doesn't define its state before that. let raw_range = NSRange { - location: last_query.as_ref().unwrap().1 as u64 * crate::QUERY_SIZE, + location: last_query.as_ref().unwrap().1 as usize + * crate::QUERY_SIZE as usize, length: 1, }; - encoder.fill_buffer( + encoder.fillBuffer_range_value( &last_query.as_ref().unwrap().0.raw_buffer, raw_range, 255, // Don't write 0, so it's easier to identify if something went wrong. ); - encoder.end_encoding(); + encoder.endEncoding(); }); } - objc::rc::autoreleasepool(|| { - self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); + autoreleasepool(|_| { + self.state.blit = Some(cmd_buf.blitCommandEncoder().unwrap()); }); let encoder = self.state.blit.as_ref().unwrap(); @@ -109,29 +127,58 @@ impl super::CommandEncoder { // If the above described issue with empty blit encoder applies to `sample_counters_in_buffer` as well, we should use the same workaround instead! for (set, index) in self.state.pending_timer_queries.drain(..) { debug_assert!(supports_sample_counters_in_buffer); - encoder.sample_counters_in_buffer( - set.counter_sample_buffer.as_ref().unwrap(), - index as _, - true, - ) + unsafe { + encoder.sampleCountersInBuffer_atSampleIndex_withBarrier( + set.counter_sample_buffer.as_ref().unwrap(), + index as _, + true, + ) + } } } - self.state.blit.as_ref().unwrap() + self.state.blit.as_ref().unwrap().clone() } pub(super) fn leave_blit(&mut self) { if let Some(encoder) = self.state.blit.take() { - encoder.end_encoding(); + encoder.endEncoding(); + } + } + + fn enter_acceleration_structure_builder( + &mut self, + ) -> Retained> { + if self.state.acceleration_structure_builder.is_none() { + self.leave_blit(); + debug_assert!( + self.state.render.is_none() + && self.state.compute.is_none() + && self.state.blit.is_none() + ); + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + autoreleasepool(|_| { + self.state.acceleration_structure_builder = + cmd_buf.accelerationStructureCommandEncoder().to_owned(); + }); } + self.state.acceleration_structure_builder.clone().unwrap() } - fn active_encoder(&mut self) -> Option<&metal::CommandEncoderRef> { + pub(super) fn leave_acceleration_structure_builder(&mut self) { + if let Some(encoder) = self.state.acceleration_structure_builder.take() { + encoder.endEncoding(); + } + } + + fn active_encoder(&mut self) -> Option<&ProtocolObject> { if let Some(ref encoder) = self.state.render { - Some(encoder) + Some(ProtocolObject::from_ref(&**encoder)) + } else if let Some(ref encoder) = self.state.acceleration_structure_builder { + Some(ProtocolObject::from_ref(&**encoder)) } else if let Some(ref encoder) = self.state.compute { - Some(encoder) + Some(ProtocolObject::from_ref(&**encoder)) } else if let Some(ref encoder) = self.state.blit { - Some(encoder) + Some(ProtocolObject::from_ref(&**encoder)) } else { None } @@ -140,6 +187,7 @@ impl super::CommandEncoder { fn begin_pass(&mut self) { self.state.reset(); self.leave_blit(); + self.leave_acceleration_structure_builder(); } } @@ -193,14 +241,15 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { let queue = &self.raw_queue.lock(); let retain_references = self.shared.settings.retain_command_buffer_references; - let raw = objc::rc::autoreleasepool(move || { + let raw = autoreleasepool(move |_| { let cmd_buf_ref = if retain_references { - queue.new_command_buffer() + queue.commandBuffer() } else { - queue.new_command_buffer_with_unretained_references() - }; + queue.commandBufferWithUnretainedReferences() + } + .unwrap(); if let Some(label) = label { - cmd_buf_ref.set_label(label); + cmd_buf_ref.setLabel(Some(&NSString::from_str(label))); } cmd_buf_ref.to_owned() }); @@ -212,13 +261,14 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn discard_encoding(&mut self) { self.leave_blit(); + self.leave_acceleration_structure_builder(); // when discarding, we don't have a guarantee that // everything is in a good state, so check carefully if let Some(encoder) = self.state.render.take() { - encoder.end_encoding(); + encoder.endEncoding(); } if let Some(encoder) = self.state.compute.take() { - encoder.end_encoding(); + encoder.endEncoding(); } self.raw_cmd_buf = None; } @@ -231,12 +281,16 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.leave_blit(); + self.leave_acceleration_structure_builder(); debug_assert!(self.state.render.is_none()); debug_assert!(self.state.compute.is_none()); debug_assert!(self.state.pending_timer_queries.is_empty()); + let mut residency_sets = HashSet::new(); + core::mem::swap(&mut residency_sets, &mut self.state.residency_sets); Ok(super::CommandBuffer { raw: self.raw_cmd_buf.take().unwrap(), + residency_sets, }) } @@ -261,7 +315,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) { let encoder = self.enter_blit(); - encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0); + encoder.fillBuffer_range_value(&buffer.raw, conv::map_range(&range), 0); } unsafe fn copy_buffer_to_buffer( @@ -274,12 +328,12 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - encoder.copy_from_buffer( + encoder.copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size( &src.raw, - copy.src_offset, + copy.src_offset as usize, &dst.raw, - copy.dst_offset, - copy.size.get(), + copy.dst_offset as usize, + copy.size.get() as usize, ); } } @@ -295,8 +349,8 @@ impl crate::CommandEncoder for super::CommandEncoder { { let dst_texture = if src.format != dst.format { let raw_format = self.shared.private_caps.map_format(src.format); - Cow::Owned(objc::rc::autoreleasepool(|| { - dst.raw.new_texture_view(raw_format) + Cow::Owned(autoreleasepool(|_| { + dst.raw.newTextureViewWithPixelFormat(raw_format).unwrap() })) } else { Cow::Borrowed(&dst.raw) @@ -307,15 +361,15 @@ impl crate::CommandEncoder for super::CommandEncoder { let dst_origin = conv::map_origin(©.dst_base.origin); // no clamping is done: Metal expects physical sizes here let extent = conv::map_copy_extent(©.size); - encoder.copy_from_texture( + encoder.copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin( &src.raw, - copy.src_base.array_layer as u64, - copy.src_base.mip_level as u64, + copy.src_base.array_layer as usize, + copy.src_base.mip_level as usize, src_origin, extent, &dst_texture, - copy.dst_base.array_layer as u64, - copy.dst_base.mip_level as u64, + copy.dst_base.array_layer as usize, + copy.dst_base.mip_level as usize, dst_origin, ); } @@ -348,15 +402,15 @@ impl crate::CommandEncoder for super::CommandEncoder { // the amount of data to copy. 0 }; - encoder.copy_from_buffer_to_texture( + encoder.copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_options( &src.raw, - copy.buffer_layout.offset, - bytes_per_row, - image_byte_stride, + copy.buffer_layout.offset as usize, + bytes_per_row as usize, + image_byte_stride as usize, conv::map_copy_extent(&extent), &dst.raw, - copy.texture_base.array_layer as u64, - copy.texture_base.mip_level as u64, + copy.texture_base.array_layer as usize, + copy.texture_base.mip_level as usize, dst_origin, conv::get_blit_option(dst.format, copy.texture_base.aspect), ); @@ -385,16 +439,16 @@ impl crate::CommandEncoder for super::CommandEncoder { .buffer_layout .rows_per_image .map_or(0, |v| v as u64 * bytes_per_row); - encoder.copy_from_texture_to_buffer( + encoder.copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_options( &src.raw, - copy.texture_base.array_layer as u64, - copy.texture_base.mip_level as u64, + copy.texture_base.array_layer as usize, + copy.texture_base.mip_level as usize, src_origin, conv::map_copy_extent(&extent), &dst.raw, - copy.buffer_layout.offset, - bytes_per_row, - bytes_per_image, + copy.buffer_layout.offset as usize, + bytes_per_row as usize, + bytes_per_image as usize, conv::get_blit_option(src.format, copy.texture_base.aspect), ); } @@ -402,11 +456,32 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_acceleration_structure_to_acceleration_structure( &mut self, - _src: &super::AccelerationStructure, - _dst: &super::AccelerationStructure, - _copy: wgt::AccelerationStructureCopy, + src: &super::AccelerationStructure, + dst: &super::AccelerationStructure, + copy: wgt::AccelerationStructureCopy, ) { - unimplemented!() + // Store a reference of the residency_set in the encoder state to be committed later + self.state + .residency_sets + .insert(Retained::clone(&dst.residency_set)); + dst.residency_set.removeAllAllocations(); + let allocations = src.residency_set.allAllocations(); + for index in 0..allocations.count() { + dst.residency_set + .addAllocation(&allocations.objectAtIndex(index)); + } + let command_encoder = self.enter_acceleration_structure_builder(); + match copy { + wgt::AccelerationStructureCopy::Clone => { + command_encoder + .copyAccelerationStructure_toAccelerationStructure(&src.raw, &dst.raw); + } + wgt::AccelerationStructureCopy::Compact => { + command_encoder.copyAndCompactAccelerationStructure_toAccelerationStructure( + &src.raw, &dst.raw, + ); + } + }; } unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { @@ -416,9 +491,9 @@ impl crate::CommandEncoder for super::CommandEncoder { .render .as_ref() .unwrap() - .set_visibility_result_mode( + .setVisibilityResultMode_offset( MTLVisibilityResultMode::Boolean, - index as u64 * crate::QUERY_SIZE, + index as usize * crate::QUERY_SIZE as usize, ); } _ => {} @@ -431,7 +506,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .render .as_ref() .unwrap() - .set_visibility_result_mode(MTLVisibilityResultMode::Disabled, 0); + .setVisibilityResultMode_offset(MTLVisibilityResultMode::Disabled, 0); } _ => {} } @@ -451,17 +526,29 @@ impl crate::CommandEncoder for super::CommandEncoder { support.contains(TimestampQuerySupport::ON_BLIT_ENCODER), self.state.blit.as_ref(), ) { - encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + encoder.sampleCountersInBuffer_atSampleIndex_withBarrier( + sample_buffer, + index as _, + with_barrier, + ); } else if let (true, Some(encoder)) = ( support.contains(TimestampQuerySupport::ON_RENDER_ENCODER), self.state.render.as_ref(), ) { - encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + encoder.sampleCountersInBuffer_atSampleIndex_withBarrier( + sample_buffer, + index as _, + with_barrier, + ); } else if let (true, Some(encoder)) = ( support.contains(TimestampQuerySupport::ON_COMPUTE_ENCODER), self.state.compute.as_ref(), ) { - encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + encoder.sampleCountersInBuffer_atSampleIndex_withBarrier( + sample_buffer, + index as _, + with_barrier, + ); } else { // If we're here it means we either have no encoder open, or it's not supported to sample within them. // If this happens with render/compute open, this is an invalid usage! @@ -478,10 +565,10 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) { let encoder = self.enter_blit(); let raw_range = NSRange { - location: range.start as u64 * crate::QUERY_SIZE, - length: (range.end - range.start) as u64 * crate::QUERY_SIZE, + location: range.start as usize * crate::QUERY_SIZE as usize, + length: (range.end - range.start) as usize * crate::QUERY_SIZE as usize, }; - encoder.fill_buffer(&set.raw_buffer, raw_range, 0); + encoder.fillBuffer_range_value(&set.raw_buffer, raw_range, 0); } unsafe fn copy_query_results( @@ -496,20 +583,20 @@ impl crate::CommandEncoder for super::CommandEncoder { match set.ty { wgt::QueryType::Occlusion => { let size = (range.end - range.start) as u64 * crate::QUERY_SIZE; - encoder.copy_from_buffer( + encoder.copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size( &set.raw_buffer, - range.start as u64 * crate::QUERY_SIZE, + range.start as usize * crate::QUERY_SIZE as usize, &buffer.raw, - offset, - size, + offset as usize, + size as usize, ); } wgt::QueryType::Timestamp => { - encoder.resolve_counters( + encoder.resolveCounters_inRange_destinationBuffer_destinationOffset( set.counter_sample_buffer.as_ref().unwrap(), - NSRange::new(range.start as u64, (range.end - range.start) as u64), + NSRange::new(range.start as usize, (range.end - range.start) as usize), &buffer.raw, - offset, + offset as usize, ); } wgt::QueryType::PipelineStatistics(_) => todo!(), @@ -529,44 +616,44 @@ impl crate::CommandEncoder for super::CommandEncoder { assert!(self.state.compute.is_none()); assert!(self.state.render.is_none()); - objc::rc::autoreleasepool(|| { - let descriptor = metal::RenderPassDescriptor::new(); + autoreleasepool(|_| { + let descriptor = MTLRenderPassDescriptor::new(); for (i, at) in desc.color_attachments.iter().enumerate() { if let Some(at) = at.as_ref() { - let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); - at_descriptor.set_texture(Some(&at.target.view.raw)); + let at_descriptor = descriptor.colorAttachments().objectAtIndexedSubscript(i); + at_descriptor.setTexture(Some(&at.target.view.raw)); if let Some(depth_slice) = at.depth_slice { - at_descriptor.set_depth_plane(depth_slice as u64); + at_descriptor.setDepthPlane(depth_slice as usize); } if let Some(ref resolve) = at.resolve_target { //Note: the selection of levels and slices is already handled by `TextureView` - at_descriptor.set_resolve_texture(Some(&resolve.view.raw)); + at_descriptor.setResolveTexture(Some(&resolve.view.raw)); } let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) { MTLLoadAction::Load } else { - at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value)); + at_descriptor.setClearColor(conv::map_clear_color(&at.clear_value)); MTLLoadAction::Clear }; let store_action = conv::map_store_action( at.ops.contains(crate::AttachmentOps::STORE), at.resolve_target.is_some(), ); - at_descriptor.set_load_action(load_action); - at_descriptor.set_store_action(store_action); + at_descriptor.setLoadAction(load_action); + at_descriptor.setStoreAction(store_action); } } if let Some(ref at) = desc.depth_stencil_attachment { if at.target.view.aspects.contains(crate::FormatAspects::DEPTH) { - let at_descriptor = descriptor.depth_attachment().unwrap(); - at_descriptor.set_texture(Some(&at.target.view.raw)); + let at_descriptor = descriptor.depthAttachment(); + at_descriptor.setTexture(Some(&at.target.view.raw)); let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) { MTLLoadAction::Load } else { - at_descriptor.set_clear_depth(at.clear_value.0 as f64); + at_descriptor.setClearDepth(at.clear_value.0 as f64); MTLLoadAction::Clear }; let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) { @@ -574,8 +661,8 @@ impl crate::CommandEncoder for super::CommandEncoder { } else { MTLStoreAction::DontCare }; - at_descriptor.set_load_action(load_action); - at_descriptor.set_store_action(store_action); + at_descriptor.setLoadAction(load_action); + at_descriptor.setStoreAction(store_action); } if at .target @@ -583,13 +670,13 @@ impl crate::CommandEncoder for super::CommandEncoder { .aspects .contains(crate::FormatAspects::STENCIL) { - let at_descriptor = descriptor.stencil_attachment().unwrap(); - at_descriptor.set_texture(Some(&at.target.view.raw)); + let at_descriptor = descriptor.stencilAttachment(); + at_descriptor.setTexture(Some(&at.target.view.raw)); let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) { MTLLoadAction::Load } else { - at_descriptor.set_clear_stencil(at.clear_value.1); + at_descriptor.setClearStencil(at.clear_value.1); MTLLoadAction::Clear }; let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) { @@ -597,20 +684,19 @@ impl crate::CommandEncoder for super::CommandEncoder { } else { MTLStoreAction::DontCare }; - at_descriptor.set_load_action(load_action); - at_descriptor.set_store_action(store_action); + at_descriptor.setLoadAction(load_action); + at_descriptor.setStoreAction(store_action); } } let mut sba_index = 0; let mut next_sba_descriptor = || { let sba_descriptor = descriptor - .sample_buffer_attachments() - .object_at(sba_index) - .unwrap(); + .sampleBufferAttachments() + .objectAtIndexedSubscript(sba_index); - sba_descriptor.set_end_of_vertex_sample_index(metal::COUNTER_DONT_SAMPLE); - sba_descriptor.set_start_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.setEndOfVertexSampleIndex(MTLCounterDontSample); + sba_descriptor.setStartOfFragmentSampleIndex(MTLCounterDontSample); sba_index += 1; sba_descriptor @@ -618,51 +704,50 @@ impl crate::CommandEncoder for super::CommandEncoder { for (set, index) in self.state.pending_timer_queries.drain(..) { let sba_descriptor = next_sba_descriptor(); - sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); - sba_descriptor.set_start_of_vertex_sample_index(index as _); - sba_descriptor.set_end_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.setSampleBuffer(Some(set.counter_sample_buffer.as_ref().unwrap())); + sba_descriptor.setStartOfVertexSampleIndex(index as _); + sba_descriptor.setEndOfFragmentSampleIndex(MTLCounterDontSample); } if let Some(ref timestamp_writes) = desc.timestamp_writes { let sba_descriptor = next_sba_descriptor(); - sba_descriptor.set_sample_buffer( + sba_descriptor.setSampleBuffer(Some( timestamp_writes .query_set .counter_sample_buffer .as_ref() .unwrap(), - ); + )); - sba_descriptor.set_start_of_vertex_sample_index( + sba_descriptor.setStartOfVertexSampleIndex( timestamp_writes .beginning_of_pass_write_index - .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + .map_or(MTLCounterDontSample, |i| i as _), ); - sba_descriptor.set_end_of_fragment_sample_index( + sba_descriptor.setEndOfFragmentSampleIndex( timestamp_writes .end_of_pass_write_index - .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + .map_or(MTLCounterDontSample, |i| i as _), ); } if let Some(occlusion_query_set) = desc.occlusion_query_set { - descriptor - .set_visibility_result_buffer(Some(occlusion_query_set.raw_buffer.as_ref())) + descriptor.setVisibilityResultBuffer(Some(occlusion_query_set.raw_buffer.as_ref())) } let raw = self.raw_cmd_buf.as_ref().unwrap(); - let encoder = raw.new_render_command_encoder(descriptor); + let encoder = raw.renderCommandEncoderWithDescriptor(&descriptor).unwrap(); if let Some(label) = desc.label { - encoder.set_label(label); + encoder.setLabel(Some(&NSString::from_str(label))); } - self.state.render = Some(encoder.to_owned()); + self.state.render = Some(encoder); }); Ok(()) } unsafe fn end_render_pass(&mut self) { - self.state.render.take().unwrap().end_encoding(); + self.state.render.take().unwrap().endEncoding(); } unsafe fn set_bind_group( @@ -677,23 +762,39 @@ impl crate::CommandEncoder for super::CommandEncoder { if let Some(ref encoder) = self.state.render { let mut changes_sizes_buffer = false; for index in 0..group.counters.vs.buffers { - let buf = &group.buffers[index as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_vertex_buffer( - (bg_info.base_resource_indices.vs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + let res = &group.buffers[index as usize]; + match res { + super::BufferLikeResource::Buffer { + ptr, + mut offset, + dynamic_index, + binding_size, + binding_location, + } => { + if let Some(dyn_index) = dynamic_index { + offset += dynamic_offsets[*dyn_index as usize] as wgt::BufferAddress; + } + encoder.setVertexBuffer_offset_atIndex( + Some(ptr.as_ref()), + offset as usize, + (bg_info.base_resource_indices.vs.buffers + index) as usize, + ); + if let Some(size) = binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: *binding_location, + }; + self.state.storage_buffer_length_map.insert(br, *size); + changes_sizes_buffer = true; + } + } + super::BufferLikeResource::AccelerationStructure(ptr, residency_set) => { + encoder.setVertexAccelerationStructure_atBufferIndex( + Some(ptr.as_ref()), + (bg_info.base_resource_indices.vs.buffers + index) as usize, + ); + residency_set.as_ref().requestResidency(); + } } } if changes_sizes_buffer { @@ -701,33 +802,49 @@ impl crate::CommandEncoder for super::CommandEncoder { naga::ShaderStage::Vertex, &mut self.temp.binding_sizes, ) { - encoder.set_vertex_bytes( + encoder.setVertexBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } changes_sizes_buffer = false; for index in 0..group.counters.fs.buffers { - let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_fragment_buffer( - (bg_info.base_resource_indices.fs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + let res = &group.buffers[(group.counters.vs.buffers + index) as usize]; + match res { + super::BufferLikeResource::Buffer { + ptr, + mut offset, + dynamic_index, + binding_size, + binding_location, + } => { + if let Some(dyn_index) = dynamic_index { + offset += dynamic_offsets[*dyn_index as usize] as wgt::BufferAddress; + } + encoder.setFragmentBuffer_offset_atIndex( + Some(ptr.as_ref()), + offset as usize, + (bg_info.base_resource_indices.fs.buffers + index) as usize, + ); + if let Some(size) = binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: *binding_location, + }; + self.state.storage_buffer_length_map.insert(br, *size); + changes_sizes_buffer = true; + } + } + super::BufferLikeResource::AccelerationStructure(ptr, residency_set) => { + encoder.setFragmentAccelerationStructure_atBufferIndex( + Some(ptr.as_ref()), + (bg_info.base_resource_indices.fs.buffers + index) as usize, + ); + residency_set.as_ref().requestResidency(); + } } } if changes_sizes_buffer { @@ -735,47 +852,47 @@ impl crate::CommandEncoder for super::CommandEncoder { naga::ShaderStage::Fragment, &mut self.temp.binding_sizes, ) { - encoder.set_fragment_bytes( + encoder.setFragmentBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } for index in 0..group.counters.vs.samplers { let res = group.samplers[index as usize]; - encoder.set_vertex_sampler_state( - (bg_info.base_resource_indices.vs.samplers + index) as u64, - Some(res.as_native()), + encoder.setVertexSamplerState_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.vs.samplers + index) as usize, ); } for index in 0..group.counters.fs.samplers { let res = group.samplers[(group.counters.vs.samplers + index) as usize]; - encoder.set_fragment_sampler_state( - (bg_info.base_resource_indices.fs.samplers + index) as u64, - Some(res.as_native()), + encoder.setFragmentSamplerState_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.fs.samplers + index) as usize, ); } for index in 0..group.counters.vs.textures { let res = group.textures[index as usize]; - encoder.set_vertex_texture( - (bg_info.base_resource_indices.vs.textures + index) as u64, - Some(res.as_native()), + encoder.setVertexTexture_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.vs.textures + index) as usize, ); } for index in 0..group.counters.fs.textures { let res = group.textures[(group.counters.vs.textures + index) as usize]; - encoder.set_fragment_texture( - (bg_info.base_resource_indices.fs.textures + index) as u64, - Some(res.as_native()), + encoder.setFragmentTexture_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.fs.textures + index) as usize, ); } // Call useResource on all textures and buffers used indirectly so they are alive for (resource, use_info) in group.resources_to_use.iter() { - encoder.use_resource_at(resource.as_native(), use_info.uses, use_info.stages); + encoder.useResource_usage_stages(resource.as_ref(), use_info.uses, use_info.stages); } } @@ -788,23 +905,39 @@ impl crate::CommandEncoder for super::CommandEncoder { let mut changes_sizes_buffer = false; for index in 0..group.counters.cs.buffers { - let buf = &group.buffers[(index_base.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_buffer( - (bg_info.base_resource_indices.cs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + let res = &group.buffers[(index_base.buffers + index) as usize]; + match res { + super::BufferLikeResource::Buffer { + ptr, + mut offset, + dynamic_index, + binding_size, + binding_location, + } => { + if let Some(dyn_index) = dynamic_index { + offset += dynamic_offsets[*dyn_index as usize] as wgt::BufferAddress; + } + encoder.setBuffer_offset_atIndex( + Some(ptr.as_ref()), + offset as usize, + (bg_info.base_resource_indices.cs.buffers + index) as usize, + ); + if let Some(size) = binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: *binding_location, + }; + self.state.storage_buffer_length_map.insert(br, *size); + changes_sizes_buffer = true; + } + } + super::BufferLikeResource::AccelerationStructure(ptr, residency_set) => { + encoder.setAccelerationStructure_atBufferIndex( + Some(ptr.as_ref()), + (bg_info.base_resource_indices.cs.buffers + index) as usize, + ); + residency_set.as_ref().requestResidency(); + } } } if changes_sizes_buffer { @@ -812,26 +945,26 @@ impl crate::CommandEncoder for super::CommandEncoder { naga::ShaderStage::Compute, &mut self.temp.binding_sizes, ) { - encoder.set_bytes( + encoder.setBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } for index in 0..group.counters.cs.samplers { let res = group.samplers[(index_base.samplers + index) as usize]; - encoder.set_sampler_state( - (bg_info.base_resource_indices.cs.samplers + index) as u64, - Some(res.as_native()), + encoder.setSamplerState_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.cs.samplers + index) as usize, ); } for index in 0..group.counters.cs.textures { let res = group.textures[(index_base.textures + index) as usize]; - encoder.set_texture( - (bg_info.base_resource_indices.cs.textures + index) as u64, - Some(res.as_native()), + encoder.setTexture_atIndex( + Some(res.as_ref()), + (bg_info.base_resource_indices.cs.textures + index) as usize, ); } @@ -840,7 +973,7 @@ impl crate::CommandEncoder for super::CommandEncoder { if !use_info.visible_in_compute { continue; } - encoder.use_resource(resource.as_native(), use_info.uses); + encoder.useResource_usage(resource.as_ref(), use_info.uses); } } } @@ -861,46 +994,59 @@ impl crate::CommandEncoder for super::CommandEncoder { let offset_words = offset_bytes as usize / WORD_SIZE; state_pc[offset_words..offset_words + data.len()].copy_from_slice(data); + let bytes = NonNull::new(state_pc.as_ptr().cast_mut().cast()).unwrap(); if stages.contains(wgt::ShaderStages::COMPUTE) { - self.state.compute.as_ref().unwrap().set_bytes( - layout.push_constants_infos.cs.unwrap().buffer_index as _, - (layout.total_push_constants as usize * WORD_SIZE) as _, - state_pc.as_ptr().cast(), - ) + self.state + .compute + .as_ref() + .unwrap() + .setBytes_length_atIndex( + bytes, + layout.total_push_constants as usize * WORD_SIZE, + layout.push_constants_infos.cs.unwrap().buffer_index as _, + ) } if stages.contains(wgt::ShaderStages::VERTEX) { - self.state.render.as_ref().unwrap().set_vertex_bytes( - layout.push_constants_infos.vs.unwrap().buffer_index as _, - (layout.total_push_constants as usize * WORD_SIZE) as _, - state_pc.as_ptr().cast(), - ) + self.state + .render + .as_ref() + .unwrap() + .setVertexBytes_length_atIndex( + bytes, + layout.total_push_constants as usize * WORD_SIZE, + layout.push_constants_infos.vs.unwrap().buffer_index as _, + ) } if stages.contains(wgt::ShaderStages::FRAGMENT) { - self.state.render.as_ref().unwrap().set_fragment_bytes( - layout.push_constants_infos.fs.unwrap().buffer_index as _, - (layout.total_push_constants as usize * WORD_SIZE) as _, - state_pc.as_ptr().cast(), - ) + self.state + .render + .as_ref() + .unwrap() + .setFragmentBytes_length_atIndex( + bytes, + layout.total_push_constants as usize * WORD_SIZE, + layout.push_constants_infos.fs.unwrap().buffer_index as _, + ) } } unsafe fn insert_debug_marker(&mut self, label: &str) { if let Some(encoder) = self.active_encoder() { - encoder.insert_debug_signpost(label); + encoder.insertDebugSignpost(&NSString::from_str(label)); } } unsafe fn begin_debug_marker(&mut self, group_label: &str) { if let Some(encoder) = self.active_encoder() { - encoder.push_debug_group(group_label); + encoder.pushDebugGroup(&NSString::from_str(group_label)); } else if let Some(ref buf) = self.raw_cmd_buf { - buf.push_debug_group(group_label); + buf.pushDebugGroup(&NSString::from_str(group_label)); } } unsafe fn end_debug_marker(&mut self) { if let Some(encoder) = self.active_encoder() { - encoder.pop_debug_group(); + encoder.popDebugGroup(); } else if let Some(ref buf) = self.raw_cmd_buf { - buf.pop_debug_group(); + buf.popDebugGroup(); } } @@ -913,16 +1059,20 @@ impl crate::CommandEncoder for super::CommandEncoder { } let encoder = self.state.render.as_ref().unwrap(); - encoder.set_render_pipeline_state(&pipeline.raw); - encoder.set_front_facing_winding(pipeline.raw_front_winding); - encoder.set_cull_mode(pipeline.raw_cull_mode); - encoder.set_triangle_fill_mode(pipeline.raw_triangle_fill_mode); + encoder.setRenderPipelineState(&pipeline.raw); + encoder.setFrontFacingWinding(pipeline.raw_front_winding); + encoder.setCullMode(pipeline.raw_cull_mode); + encoder.setTriangleFillMode(pipeline.raw_triangle_fill_mode); if let Some(depth_clip) = pipeline.raw_depth_clip_mode { - encoder.set_depth_clip_mode(depth_clip); + encoder.setDepthClipMode(depth_clip); } if let Some((ref state, bias)) = pipeline.depth_stencil { - encoder.set_depth_stencil_state(state); - encoder.set_depth_bias(bias.constant as f32, bias.slope_scale, bias.clamp); + encoder.setDepthStencilState(Some(state)); + encoder.setDepthBias_slopeScale_clamp( + bias.constant as f32, + bias.slope_scale, + bias.clamp, + ); } { @@ -930,10 +1080,10 @@ impl crate::CommandEncoder for super::CommandEncoder { .state .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) { - encoder.set_vertex_bytes( + encoder.setVertexBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } @@ -942,10 +1092,10 @@ impl crate::CommandEncoder for super::CommandEncoder { .state .make_sizes_buffer_update(naga::ShaderStage::Fragment, &mut self.temp.binding_sizes) { - encoder.set_fragment_bytes( + encoder.setFragmentBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } @@ -956,12 +1106,9 @@ impl crate::CommandEncoder for super::CommandEncoder { binding: crate::BufferBinding<'a, super::Buffer>, format: wgt::IndexFormat, ) { - let (stride, raw_type) = match format { - wgt::IndexFormat::Uint16 => (2, MTLIndexType::UInt16), - wgt::IndexFormat::Uint32 => (4, MTLIndexType::UInt32), - }; + let (stride, raw_type) = conv::map_index_format(format); self.state.index = Some(super::IndexState { - buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), + buffer_ptr: NonNull::from(&*binding.buffer.raw), offset: binding.offset, stride, raw_type, @@ -975,7 +1122,11 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; let encoder = self.state.render.as_ref().unwrap(); - encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); + encoder.setVertexBuffer_offset_atIndex( + Some(&binding.buffer.raw), + binding.offset as usize, + buffer_index as usize, + ); let buffer_size = binding.resolve_size(); if buffer_size > 0 { @@ -991,10 +1142,10 @@ impl crate::CommandEncoder for super::CommandEncoder { .state .make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes) { - encoder.set_vertex_bytes( + encoder.setVertexBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } } @@ -1006,7 +1157,7 @@ impl crate::CommandEncoder for super::CommandEncoder { depth_range.end }; let encoder = self.state.render.as_ref().unwrap(); - encoder.set_viewport(MTLViewport { + encoder.setViewport(MTLViewport { originX: rect.x as _, originY: rect.y as _, width: rect.w as _, @@ -1024,15 +1175,15 @@ impl crate::CommandEncoder for super::CommandEncoder { height: rect.h as _, }; let encoder = self.state.render.as_ref().unwrap(); - encoder.set_scissor_rect(scissor); + encoder.setScissorRect(scissor); } unsafe fn set_stencil_reference(&mut self, value: u32) { let encoder = self.state.render.as_ref().unwrap(); - encoder.set_stencil_front_back_reference_value(value, value); + encoder.setStencilFrontReferenceValue_backReferenceValue(value, value); } unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { let encoder = self.state.render.as_ref().unwrap(); - encoder.set_blend_color(color[0], color[1], color[2], color[3]); + encoder.setBlendColorRed_green_blue_alpha(color[0], color[1], color[2], color[3]); } unsafe fn draw( @@ -1044,7 +1195,7 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { let encoder = self.state.render.as_ref().unwrap(); if first_instance != 0 { - encoder.draw_primitives_instanced_base_instance( + encoder.drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance( self.state.raw_primitive_type, first_vertex as _, vertex_count as _, @@ -1052,14 +1203,14 @@ impl crate::CommandEncoder for super::CommandEncoder { first_instance as _, ); } else if instance_count != 1 { - encoder.draw_primitives_instanced( + encoder.drawPrimitives_vertexStart_vertexCount_instanceCount( self.state.raw_primitive_type, first_vertex as _, vertex_count as _, instance_count as _, ); } else { - encoder.draw_primitives( + encoder.drawPrimitives_vertexStart_vertexCount( self.state.raw_primitive_type, first_vertex as _, vertex_count as _, @@ -1077,33 +1228,33 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { let encoder = self.state.render.as_ref().unwrap(); let index = self.state.index.as_ref().unwrap(); - let offset = index.offset + index.stride * first_index as wgt::BufferAddress; + let offset = (index.offset + index.stride * first_index as wgt::BufferAddress) as usize; if base_vertex != 0 || first_instance != 0 { - encoder.draw_indexed_primitives_instanced_base_instance( + encoder.drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance( self.state.raw_primitive_type, index_count as _, index.raw_type, - index.buffer_ptr.as_native(), + index.buffer_ptr.as_ref(), offset, instance_count as _, base_vertex as _, first_instance as _, ); } else if instance_count != 1 { - encoder.draw_indexed_primitives_instanced( + encoder.drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount( self.state.raw_primitive_type, index_count as _, index.raw_type, - index.buffer_ptr.as_native(), + index.buffer_ptr.as_ref(), offset, instance_count as _, ); } else { - encoder.draw_indexed_primitives( + encoder.drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset( self.state.raw_primitive_type, index_count as _, index.raw_type, - index.buffer_ptr.as_native(), + index.buffer_ptr.as_ref(), offset, ); } @@ -1126,7 +1277,11 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { let encoder = self.state.render.as_ref().unwrap(); for _ in 0..draw_count { - encoder.draw_primitives_indirect(self.state.raw_primitive_type, &buffer.raw, offset); + encoder.drawPrimitives_indirectBuffer_indirectBufferOffset( + self.state.raw_primitive_type, + &buffer.raw, + offset as usize, + ); offset += size_of::() as wgt::BufferAddress; } } @@ -1140,13 +1295,13 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.render.as_ref().unwrap(); let index = self.state.index.as_ref().unwrap(); for _ in 0..draw_count { - encoder.draw_indexed_primitives_indirect( + encoder.drawIndexedPrimitives_indexType_indexBuffer_indexBufferOffset_indirectBuffer_indirectBufferOffset( self.state.raw_primitive_type, index.raw_type, - index.buffer_ptr.as_native(), - index.offset, + index.buffer_ptr.as_ref(), + index.offset as usize, &buffer.raw, - offset, + offset as usize, ); offset += size_of::() as wgt::BufferAddress; } @@ -1204,65 +1359,66 @@ impl crate::CommandEncoder for super::CommandEncoder { let raw = self.raw_cmd_buf.as_ref().unwrap(); - objc::rc::autoreleasepool(|| { + autoreleasepool(|_| { // TimeStamp Queries and ComputePassDescriptor were both introduced in Metal 2.3 (macOS 11, iOS 14) // and we currently only need ComputePassDescriptor for timestamp queries let encoder = if self.shared.private_caps.timestamp_query_support.is_empty() { - raw.new_compute_command_encoder() + raw.computeCommandEncoder().unwrap() } else { - let descriptor = metal::ComputePassDescriptor::new(); + let descriptor = MTLComputePassDescriptor::new(); let mut sba_index = 0; let mut next_sba_descriptor = || { let sba_descriptor = descriptor - .sample_buffer_attachments() - .object_at(sba_index) - .unwrap(); + .sampleBufferAttachments() + .objectAtIndexedSubscript(sba_index); sba_index += 1; sba_descriptor }; for (set, index) in self.state.pending_timer_queries.drain(..) { let sba_descriptor = next_sba_descriptor(); - sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); - sba_descriptor.set_start_of_encoder_sample_index(index as _); - sba_descriptor.set_end_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor + .setSampleBuffer(Some(set.counter_sample_buffer.as_ref().unwrap())); + sba_descriptor.setStartOfEncoderSampleIndex(index as _); + sba_descriptor.setEndOfEncoderSampleIndex(MTLCounterDontSample); } if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { let sba_descriptor = next_sba_descriptor(); - sba_descriptor.set_sample_buffer( + sba_descriptor.setSampleBuffer(Some( timestamp_writes .query_set .counter_sample_buffer .as_ref() .unwrap(), - ); + )); - sba_descriptor.set_start_of_encoder_sample_index( + sba_descriptor.setStartOfEncoderSampleIndex( timestamp_writes .beginning_of_pass_write_index - .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + .map_or(MTLCounterDontSample, |i| i as _), ); - sba_descriptor.set_end_of_encoder_sample_index( + sba_descriptor.setEndOfEncoderSampleIndex( timestamp_writes .end_of_pass_write_index - .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + .map_or(MTLCounterDontSample, |i| i as _), ); } - raw.compute_command_encoder_with_descriptor(descriptor) + raw.computeCommandEncoderWithDescriptor(&descriptor) + .unwrap() }; if let Some(label) = desc.label { - encoder.set_label(label); + encoder.setLabel(Some(&NSString::from_str(label))); } self.state.compute = Some(encoder.to_owned()); }); } unsafe fn end_compute_pass(&mut self) { - self.state.compute.take().unwrap().end_encoding(); + self.state.compute.take().unwrap().endEncoding(); } unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { @@ -1270,16 +1426,16 @@ impl crate::CommandEncoder for super::CommandEncoder { self.state.stage_infos.cs.assign_from(&pipeline.cs_info); let encoder = self.state.compute.as_ref().unwrap(); - encoder.set_compute_pipeline_state(&pipeline.raw); + encoder.setComputePipelineState(&pipeline.raw); if let Some((index, sizes)) = self .state .make_sizes_buffer_update(naga::ShaderStage::Compute, &mut self.temp.binding_sizes) { - encoder.set_bytes( + encoder.setBytes_length_atIndex( + NonNull::new(sizes.as_ptr().cast_mut().cast()).unwrap(), + sizes.len() * WORD_SIZE, index as _, - (sizes.len() * WORD_SIZE) as u64, - sizes.as_ptr().cast(), ); } @@ -1297,7 +1453,7 @@ impl crate::CommandEncoder for super::CommandEncoder { let size = pipeline_size.next_multiple_of(16); if *cur_size != size { *cur_size = size; - encoder.set_threadgroup_memory_length(index as _, size as _); + encoder.setThreadgroupMemoryLength_atIndex(size as _, index); } } } @@ -1306,23 +1462,27 @@ impl crate::CommandEncoder for super::CommandEncoder { if count[0] > 0 && count[1] > 0 && count[2] > 0 { let encoder = self.state.compute.as_ref().unwrap(); let raw_count = MTLSize { - width: count[0] as u64, - height: count[1] as u64, - depth: count[2] as u64, + width: count[0] as usize, + height: count[1] as usize, + depth: count[2] as usize, }; - encoder.dispatch_thread_groups(raw_count, self.state.raw_wg_size); + encoder.dispatchThreadgroups_threadsPerThreadgroup(raw_count, self.state.raw_wg_size); } } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { let encoder = self.state.compute.as_ref().unwrap(); - encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); + encoder.dispatchThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerThreadgroup( + &buffer.raw, + offset as usize, + self.state.raw_wg_size, + ); } unsafe fn build_acceleration_structures<'a, T>( &mut self, _descriptor_count: u32, - _descriptors: T, + descriptors: T, ) where super::Api: 'a, T: IntoIterator< @@ -1333,22 +1493,69 @@ impl crate::CommandEncoder for super::CommandEncoder { >, >, { - unimplemented!() + let command_encoder = self.enter_acceleration_structure_builder(); + for descriptor in descriptors { + let acceleration_structure_descriptor = + conv::map_acceleration_structure_descriptor(descriptor.entries, descriptor.flags); + if matches!( + descriptor.entries, + crate::AccelerationStructureEntries::Instances(_) + ) { + // Store a reference of the residency_set in the encoder state to be committed later + self.state.residency_sets.insert(Retained::clone( + &descriptor.destination_acceleration_structure.residency_set, + )); + descriptor + .destination_acceleration_structure + .residency_set + .removeAllAllocations(); + for dependency in descriptor.dependencies.iter() { + descriptor + .destination_acceleration_structure + .residency_set + .addAllocation(dependency.raw.as_ref()); + } + } + match descriptor.mode { + crate::AccelerationStructureBuildMode::Build => { + command_encoder + .buildAccelerationStructure_descriptor_scratchBuffer_scratchBufferOffset( + &descriptor.destination_acceleration_structure.raw, + &acceleration_structure_descriptor, + &descriptor.scratch_buffer.raw, + descriptor.scratch_buffer_offset as usize, + ); + } + crate::AccelerationStructureBuildMode::Update => { + command_encoder.refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset( + &descriptor.source_acceleration_structure.unwrap().raw, + &acceleration_structure_descriptor, + Some(&descriptor.destination_acceleration_structure.raw), + Some(&descriptor.scratch_buffer.raw), + descriptor.scratch_buffer_offset as usize, + ); + } + } + } } unsafe fn place_acceleration_structure_barrier( &mut self, _barriers: crate::AccelerationStructureBarrier, ) { - unimplemented!() } unsafe fn read_acceleration_structure_compact_size( &mut self, - _acceleration_structure: &super::AccelerationStructure, - _buf: &super::Buffer, + acceleration_structure: &super::AccelerationStructure, + buffer: &super::Buffer, ) { - unimplemented!() + let command_encoder = self.enter_acceleration_structure_builder(); + command_encoder.writeCompactedAccelerationStructureSize_toBuffer_offset( + &acceleration_structure.raw, + &buffer.raw, + 0, + ); } } @@ -1362,7 +1569,7 @@ impl Drop for super::CommandEncoder { // appears to be a requirement for all MTLCommandEncoder objects. Failing to call // endEncoding causes a crash with the message 'Command encoder released without // endEncoding'. To prevent this, we explicitiy call discard_encoding, which - // calls end_encoding on any still-held metal::CommandEncoders. + // calls endEncoding on any still-held MTLCommandEncoders. unsafe { self.discard_encoding(); } diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index 260b6c15a32..7c0ba5d5962 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -1,9 +1,16 @@ -use metal::{ - MTLBlendFactor, MTLBlendOperation, MTLBlitOption, MTLClearColor, MTLColorWriteMask, - MTLCompareFunction, MTLCullMode, MTLOrigin, MTLPrimitiveTopologyClass, MTLPrimitiveType, +use objc2::rc::Retained; +use objc2_foundation::{NSArray, NSRange}; +use objc2_metal::{ + MTLAccelerationStructureBoundingBoxGeometryDescriptor, MTLAccelerationStructureDescriptor, + MTLAccelerationStructureGeometryDescriptor, MTLAccelerationStructureInstanceDescriptorType, + MTLAccelerationStructureTriangleGeometryDescriptor, MTLAccelerationStructureUsage, + MTLAttributeFormat, MTLBlendFactor, MTLBlendOperation, MTLBlitOption, MTLClearColor, + MTLColorWriteMask, MTLCompareFunction, MTLCullMode, MTLIndexType, + MTLInstanceAccelerationStructureDescriptor, MTLOrigin, + MTLPrimitiveAccelerationStructureDescriptor, MTLPrimitiveTopologyClass, MTLPrimitiveType, MTLRenderStages, MTLResourceUsage, MTLSamplerAddressMode, MTLSamplerBorderColor, MTLSamplerMinMagFilter, MTLSize, MTLStencilOperation, MTLStoreAction, MTLTextureType, - MTLTextureUsage, MTLVertexFormat, MTLVertexStepFunction, MTLWinding, NSRange, + MTLTextureUsage, MTLVertexFormat, MTLVertexStepFunction, MTLWinding, }; pub fn map_texture_usage(format: wgt::TextureFormat, usage: wgt::TextureUses) -> MTLTextureUsage { @@ -44,12 +51,12 @@ pub fn map_texture_view_dimension(dim: wgt::TextureViewDimension) -> MTLTextureT use wgt::TextureViewDimension as Tvd; use MTLTextureType as MTL; match dim { - Tvd::D1 => MTL::D1, - Tvd::D2 => MTL::D2, - Tvd::D2Array => MTL::D2Array, - Tvd::D3 => MTL::D3, - Tvd::Cube => MTL::Cube, - Tvd::CubeArray => MTL::CubeArray, + Tvd::D1 => MTL::Type1D, + Tvd::D2 => MTL::Type2D, + Tvd::D2Array => MTL::Type2DArray, + Tvd::D3 => MTL::Type3D, + Tvd::Cube => MTL::TypeCube, + Tvd::CubeArray => MTL::TypeCubeArray, } } @@ -234,6 +241,13 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> MTLVertexFormat { } } +pub fn map_index_format(format: wgt::IndexFormat) -> (u64, MTLIndexType) { + match format { + wgt::IndexFormat::Uint16 => (2, MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, MTLIndexType::UInt32), + } +} + pub fn map_step_mode(mode: wgt::VertexStepMode) -> MTLVertexStepFunction { match mode { wgt::VertexStepMode::Vertex => MTLVertexStepFunction::PerVertex, @@ -274,24 +288,24 @@ pub fn map_cull_mode(face: Option) -> MTLCullMode { pub fn map_range(range: &crate::MemoryRange) -> NSRange { NSRange { - location: range.start, - length: range.end - range.start, + location: range.start as usize, + length: (range.end - range.start) as usize, } } pub fn map_copy_extent(extent: &crate::CopyExtent) -> MTLSize { MTLSize { - width: extent.width as u64, - height: extent.height as u64, - depth: extent.depth as u64, + width: extent.width as usize, + height: extent.height as usize, + depth: extent.depth as usize, } } pub fn map_origin(origin: &wgt::Origin3d) -> MTLOrigin { MTLOrigin { - x: origin.x as u64, - y: origin.y as u64, - z: origin.z as u64, + x: origin.x as usize, + y: origin.y as usize, + z: origin.z as usize, } } @@ -341,6 +355,7 @@ pub fn map_render_stages(stage: wgt::ShaderStages) -> MTLRenderStages { pub fn map_resource_usage(ty: &wgt::BindingType) -> MTLResourceUsage { match ty { + #[allow(deprecated)] wgt::BindingType::Texture { .. } => MTLResourceUsage::Sample, wgt::BindingType::StorageTexture { access, .. } => match access { wgt::StorageTextureAccess::WriteOnly => MTLResourceUsage::Write, @@ -353,3 +368,109 @@ pub fn map_resource_usage(ty: &wgt::BindingType) -> MTLResourceUsage { _ => unreachable!(), } } + +pub unsafe fn map_acceleration_structure_descriptor<'a>( + entries: &'a crate::AccelerationStructureEntries<'a, super::Buffer>, + flags: crate::AccelerationStructureBuildFlags, +) -> Retained { + let descriptor = match entries { + crate::AccelerationStructureEntries::Instances(instances) => { + let descriptor = MTLInstanceAccelerationStructureDescriptor::new(); + descriptor.setInstanceDescriptorType( + MTLAccelerationStructureInstanceDescriptorType::Indirect, + ); + descriptor.setInstanceCount(instances.count as usize); + descriptor.setInstanceDescriptorBuffer(Some(&instances.buffer.unwrap().raw)); + descriptor.setInstanceDescriptorBufferOffset(instances.offset as usize); + descriptor.into_super() + } + crate::AccelerationStructureEntries::Triangles(entries) => { + let geometry_descriptors = entries + .iter() + .map(|triangles| { + let descriptor = MTLAccelerationStructureTriangleGeometryDescriptor::new(); + if let Some(indices) = triangles.indices.as_ref() { + descriptor.setIndexBuffer(Some(&indices.buffer.unwrap().raw)); + descriptor.setIndexBufferOffset(indices.offset as usize); + descriptor.setIndexType(map_index_format(indices.format).1); + descriptor.setTriangleCount(indices.count as usize / 3); + } else { + descriptor.setTriangleCount(triangles.vertex_count as usize / 3); + } + descriptor.setVertexBuffer(Some(&triangles.vertex_buffer.unwrap().raw)); + descriptor.setVertexBufferOffset( + triangles.first_vertex as usize * triangles.vertex_stride as usize, + ); + descriptor.setVertexStride(triangles.vertex_stride as usize); + // Safety: MTLVertexFormat and MTLAttributeFormat are identical. + // https://docs.rs/metal/latest/metal/enum.MTLAttributeFormat.html + // https://docs.rs/metal/latest/metal/enum.MTLVertexFormat.html + descriptor.setVertexFormat(unsafe { + core::mem::transmute::( + map_vertex_format(triangles.vertex_format), + ) + }); + if let Some(transform) = triangles.transform.as_ref() { + descriptor.setTransformationMatrixBuffer(Some(&transform.buffer.raw)); + descriptor.setTransformationMatrixBufferOffset(transform.offset as usize); + } + descriptor.setOpaque( + triangles + .flags + .contains(wgt::AccelerationStructureGeometryFlags::OPAQUE), + ); + if !triangles.flags.contains( + wgt::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION, + ) { + descriptor.allowDuplicateIntersectionFunctionInvocation(); + } + // descriptor.setIntersectionFunctionTableOffset(offset); + descriptor.into_super() + }) + .collect::>>(); + let descriptor = MTLPrimitiveAccelerationStructureDescriptor::new(); + descriptor.setGeometryDescriptors(Some(&NSArray::from_retained_slice( + geometry_descriptors.as_slice(), + ))); + descriptor.into_super() + } + crate::AccelerationStructureEntries::AABBs(entries) => { + let geometry_descriptors = entries + .iter() + .map(|aabbs| { + let descriptor = MTLAccelerationStructureBoundingBoxGeometryDescriptor::new(); + descriptor.setBoundingBoxBuffer(Some(&aabbs.buffer.unwrap().raw)); + descriptor.setBoundingBoxCount(aabbs.count as usize); + descriptor.setBoundingBoxStride(aabbs.stride as usize); + descriptor.setBoundingBoxBufferOffset(aabbs.offset as usize); + descriptor.setOpaque( + aabbs + .flags + .contains(wgt::AccelerationStructureGeometryFlags::OPAQUE), + ); + if !aabbs.flags.contains( + wgt::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION, + ) { + descriptor.allowDuplicateIntersectionFunctionInvocation(); + } + // descriptor.setIntersectionFunctionTableOffset(offset); + descriptor.into_super() + }) + .collect::>>(); + let descriptor = MTLPrimitiveAccelerationStructureDescriptor::new(); + descriptor.setGeometryDescriptors(Some(&NSArray::from_retained_slice( + geometry_descriptors.as_slice(), + ))); + descriptor.into_super() + } + }; + let mut usage = MTLAccelerationStructureUsage::None; + if flags.contains(wgt::AccelerationStructureFlags::ALLOW_UPDATE) { + usage |= MTLAccelerationStructureUsage::Refit; + } + if flags.contains(wgt::AccelerationStructureFlags::PREFER_FAST_BUILD) { + usage |= MTLAccelerationStructureUsage::PreferFastBuild; + } + descriptor.setUsage(usage); + descriptor +} diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index fbb166c2723..2bae5badd21 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -2,25 +2,37 @@ use alloc::{borrow::ToOwned as _, sync::Arc, vec::Vec}; use core::{ptr::NonNull, sync::atomic}; use std::{thread, time}; +use bytemuck::TransparentWrapper; +use objc2::{ + msg_send, + rc::{autoreleasepool, Retained}, + runtime::ProtocolObject, +}; +use objc2_foundation::{ns_string, NSError, NSRange, NSString}; +use objc2_metal::{ + MTLAccelerationStructure, MTLAccelerationStructureInstanceOptions, MTLBuffer, + MTLCaptureManager, MTLCaptureScope, MTLCommandBuffer, MTLCommandBufferStatus, + MTLCompileOptions, MTLComputePipelineDescriptor, MTLComputePipelineState, + MTLCounterSampleBufferDescriptor, MTLCounterSet, MTLDepthClipMode, MTLDepthStencilDescriptor, + MTLDevice, MTLFunction, MTLIndirectAccelerationStructureInstanceDescriptor, MTLLanguageVersion, + MTLLibrary, MTLMutability, MTLPackedFloat3, MTLPackedFloat4x3, + MTLPipelineBufferDescriptorArray, MTLPixelFormat, MTLPrimitiveTopologyClass, + MTLRenderPipelineDescriptor, MTLResidencySet, MTLResidencySetDescriptor, MTLResource, + MTLResourceID, MTLResourceOptions, MTLSamplerAddressMode, MTLSamplerDescriptor, + MTLSamplerMipFilter, MTLSamplerState, MTLSize, MTLStencilDescriptor, MTLStorageMode, + MTLTexture, MTLTextureDescriptor, MTLTextureType, MTLTriangleFillMode, MTLVertexDescriptor, + MTLVertexStepFunction, +}; use parking_lot::Mutex; -use super::{conv, PassthroughShader}; -use crate::auxil::map_naga_stage; -use crate::metal::ShaderModuleSource; -use crate::TlasInstance; - -use metal::{ - foreign_types::ForeignType, MTLCommandBufferStatus, MTLDepthClipMode, MTLLanguageVersion, - MTLMutability, MTLPixelFormat, MTLPrimitiveTopologyClass, MTLResourceID, MTLResourceOptions, - MTLSamplerAddressMode, MTLSamplerMipFilter, MTLSize, MTLStorageMode, MTLTextureType, - MTLTriangleFillMode, MTLVertexStepFunction, NSRange, -}; +use super::{conv, PassthroughShader, ShaderModuleSource}; +use crate::{auxil::map_naga_stage, TlasInstance}; type DeviceResult = Result; struct CompiledShader { - library: metal::Library, - function: metal::Function, + library: Retained>, + function: Retained>, wg_size: MTLSize, wg_memory_sizes: Vec, @@ -41,27 +53,29 @@ fn create_stencil_desc( face: &wgt::StencilFaceState, read_mask: u32, write_mask: u32, -) -> metal::StencilDescriptor { - let desc = metal::StencilDescriptor::new(); - desc.set_stencil_compare_function(conv::map_compare_function(face.compare)); - desc.set_read_mask(read_mask); - desc.set_write_mask(write_mask); - desc.set_stencil_failure_operation(conv::map_stencil_op(face.fail_op)); - desc.set_depth_failure_operation(conv::map_stencil_op(face.depth_fail_op)); - desc.set_depth_stencil_pass_operation(conv::map_stencil_op(face.pass_op)); +) -> Retained { + let desc = unsafe { MTLStencilDescriptor::new() }; + desc.setStencilCompareFunction(conv::map_compare_function(face.compare)); + desc.setReadMask(read_mask); + desc.setWriteMask(write_mask); + desc.setStencilFailureOperation(conv::map_stencil_op(face.fail_op)); + desc.setDepthFailureOperation(conv::map_stencil_op(face.depth_fail_op)); + desc.setDepthStencilPassOperation(conv::map_stencil_op(face.pass_op)); desc } -fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthStencilDescriptor { - let desc = metal::DepthStencilDescriptor::new(); - desc.set_depth_compare_function(conv::map_compare_function(state.depth_compare)); - desc.set_depth_write_enabled(state.depth_write_enabled); +fn create_depth_stencil_desc( + state: &wgt::DepthStencilState, +) -> Retained { + let desc = unsafe { MTLDepthStencilDescriptor::new() }; + desc.setDepthCompareFunction(conv::map_compare_function(state.depth_compare)); + desc.setDepthWriteEnabled(state.depth_write_enabled); let s = &state.stencil; if s.is_enabled() { let front_desc = create_stencil_desc(&s.front, s.read_mask, s.write_mask); - desc.set_front_face_stencil(Some(&front_desc)); + desc.setFrontFaceStencil(Some(&front_desc)); let back_desc = create_stencil_desc(&s.back, s.read_mask, s.write_mask); - desc.set_back_face_stencil(Some(&back_desc)); + desc.setBackFaceStencil(Some(&back_desc)); } desc } @@ -152,16 +166,19 @@ impl super::Device { let options = naga::back::msl::Options { lang_version: match self.shared.private_caps.msl_version { - MTLLanguageVersion::V1_0 => (1, 0), - MTLLanguageVersion::V1_1 => (1, 1), - MTLLanguageVersion::V1_2 => (1, 2), - MTLLanguageVersion::V2_0 => (2, 0), - MTLLanguageVersion::V2_1 => (2, 1), - MTLLanguageVersion::V2_2 => (2, 2), - MTLLanguageVersion::V2_3 => (2, 3), - MTLLanguageVersion::V2_4 => (2, 4), - MTLLanguageVersion::V3_0 => (3, 0), - MTLLanguageVersion::V3_1 => (3, 1), + #[allow(deprecated)] + MTLLanguageVersion::Version1_0 => (1, 0), + MTLLanguageVersion::Version1_1 => (1, 1), + MTLLanguageVersion::Version1_2 => (1, 2), + MTLLanguageVersion::Version2_0 => (2, 0), + MTLLanguageVersion::Version2_1 => (2, 1), + MTLLanguageVersion::Version2_2 => (2, 2), + MTLLanguageVersion::Version2_3 => (2, 3), + MTLLanguageVersion::Version2_4 => (2, 4), + MTLLanguageVersion::Version3_0 => (3, 0), + MTLLanguageVersion::Version3_1 => (3, 1), + // Newer version, fall back to 3.1 + _ => (3, 1), }, inline_samplers: Default::default(), spirv_cross_compatibility: false, @@ -202,18 +219,18 @@ impl super::Device { &source ); - let options = metal::CompileOptions::new(); - options.set_language_version(self.shared.private_caps.msl_version); + let options = MTLCompileOptions::new(); + options.setLanguageVersion(self.shared.private_caps.msl_version); if self.shared.private_caps.supports_preserve_invariance { - options.set_preserve_invariance(true); + options.setPreserveInvariance(true); } let library = self .shared .device .lock() - .new_library_with_source(source.as_ref(), &options) + .newLibraryWithSource_options_error(&NSString::from_str(&source), Some(&options)) .map_err(|err| { log::warn!("Naga generated shader:\n{source}"); crate::PipelineError::Linkage(stage_bit, format!("Metal: {err}")) @@ -236,9 +253,9 @@ impl super::Device { }; let function = library - .get_function(translated_ep_name, None) - .map_err(|e| { - log::error!("get_function: {e:?}"); + .newFunctionWithName(&NSString::from_str(translated_ep_name)) + .ok_or_else(|| { + log::error!("Function '{translated_ep_name}' does not exist"); crate::PipelineError::EntryPoint(naga_stage) })?; @@ -301,21 +318,19 @@ impl super::Device { } fn set_buffers_mutability( - buffers: &metal::PipelineBufferDescriptorArrayRef, + buffers: &MTLPipelineBufferDescriptorArray, mut immutable_mask: usize, ) { while immutable_mask != 0 { let slot = immutable_mask.trailing_zeros(); immutable_mask ^= 1 << slot; - buffers - .object_at(slot as u64) - .unwrap() - .set_mutability(MTLMutability::Immutable); + unsafe { buffers.objectAtIndexedSubscript(slot as usize) } + .setMutability(MTLMutability::Immutable); } } pub unsafe fn texture_from_raw( - raw: metal::Texture, + raw: Retained>, format: wgt::TextureFormat, raw_type: MTLTextureType, array_layers: u32, @@ -332,7 +347,10 @@ impl super::Device { } } - pub unsafe fn device_from_raw(raw: metal::Device, features: wgt::Features) -> super::Device { + pub unsafe fn device_from_raw( + raw: Retained>, + features: wgt::Features, + ) -> super::Device { super::Device { shared: Arc::new(super::AdapterShared::new(raw)), features, @@ -340,11 +358,14 @@ impl super::Device { } } - pub unsafe fn buffer_from_raw(raw: metal::Buffer, size: wgt::BufferAddress) -> super::Buffer { + pub unsafe fn buffer_from_raw( + raw: Retained>, + size: wgt::BufferAddress, + ) -> super::Buffer { super::Buffer { raw, size } } - pub fn raw_device(&self) -> &Mutex { + pub fn raw_device(&self) -> &Mutex>> { &self.shared.device } } @@ -367,10 +388,15 @@ impl crate::Device for super::Device { //TODO: HazardTrackingModeUntracked - objc::rc::autoreleasepool(|| { - let raw = self.shared.device.lock().new_buffer(desc.size, options); + autoreleasepool(|_| { + let raw = self + .shared + .device + .lock() + .newBufferWithLength_options(desc.size as usize, options) + .unwrap(); if let Some(label) = desc.label { - raw.set_label(label); + raw.setLabel(Some(&NSString::from_str(label))); } self.counters.buffers.add(1); Ok(super::Buffer { @@ -393,9 +419,8 @@ impl crate::Device for super::Device { range: crate::MemoryRange, ) -> DeviceResult { let ptr = buffer.raw.contents().cast::(); - assert!(!ptr.is_null()); Ok(crate::BufferMapping { - ptr: NonNull::new(unsafe { ptr.offset(range.start as isize) }).unwrap(), + ptr: NonNull::new(unsafe { ptr.as_ptr().offset(range.start as isize) }).unwrap(), is_coherent: true, }) } @@ -408,46 +433,46 @@ impl crate::Device for super::Device { &self, desc: &crate::TextureDescriptor, ) -> DeviceResult { - use metal::foreign_types::ForeignType as _; - let mtl_format = self.shared.private_caps.map_format(desc.format); - objc::rc::autoreleasepool(|| { - let descriptor = metal::TextureDescriptor::new(); + autoreleasepool(|_| { + let descriptor = MTLTextureDescriptor::new(); let mtl_type = match desc.dimension { - wgt::TextureDimension::D1 => MTLTextureType::D1, + wgt::TextureDimension::D1 => MTLTextureType::Type1D, wgt::TextureDimension::D2 => { if desc.sample_count > 1 { - descriptor.set_sample_count(desc.sample_count as u64); - MTLTextureType::D2Multisample + descriptor.setSampleCount(desc.sample_count as usize); + MTLTextureType::Type2DMultisample } else if desc.size.depth_or_array_layers > 1 { - descriptor.set_array_length(desc.size.depth_or_array_layers as u64); - MTLTextureType::D2Array + descriptor.setArrayLength(desc.size.depth_or_array_layers as usize); + MTLTextureType::Type2DArray } else { - MTLTextureType::D2 + MTLTextureType::Type2D } } wgt::TextureDimension::D3 => { - descriptor.set_depth(desc.size.depth_or_array_layers as u64); - MTLTextureType::D3 + descriptor.setDepth(desc.size.depth_or_array_layers as usize); + MTLTextureType::Type3D } }; - descriptor.set_texture_type(mtl_type); - descriptor.set_width(desc.size.width as u64); - descriptor.set_height(desc.size.height as u64); - descriptor.set_mipmap_level_count(desc.mip_level_count as u64); - descriptor.set_pixel_format(mtl_format); - descriptor.set_usage(conv::map_texture_usage(desc.format, desc.usage)); - descriptor.set_storage_mode(MTLStorageMode::Private); + descriptor.setTextureType(mtl_type); + descriptor.setWidth(desc.size.width as usize); + descriptor.setHeight(desc.size.height as usize); + descriptor.setMipmapLevelCount(desc.mip_level_count as usize); + descriptor.setPixelFormat(mtl_format); + descriptor.setUsage(conv::map_texture_usage(desc.format, desc.usage)); + descriptor.setStorageMode(MTLStorageMode::Private); - let raw = self.shared.device.lock().new_texture(&descriptor); - if raw.as_ptr().is_null() { - return Err(crate::DeviceError::OutOfMemory); - } + let raw = self + .shared + .device + .lock() + .newTextureWithDescriptor(&descriptor) + .ok_or(crate::DeviceError::OutOfMemory)?; if let Some(label) = desc.label { - raw.set_label(label); + raw.setLabel(Some(&NSString::from_str(label))); } self.counters.textures.add(1); @@ -476,7 +501,7 @@ impl crate::Device for super::Device { texture: &super::Texture, desc: &crate::TextureViewDescriptor, ) -> DeviceResult { - let raw_type = if texture.raw_type == MTLTextureType::D2Multisample { + let raw_type = if texture.raw_type == MTLTextureType::Type2DMultisample { texture.raw_type } else { conv::map_texture_view_dimension(desc.dimension) @@ -509,21 +534,24 @@ impl crate::Device for super::Device { .array_layer_count .unwrap_or(texture.array_layers - desc.range.base_array_layer); - objc::rc::autoreleasepool(|| { - let raw = texture.raw.new_texture_view_from_slice( - raw_format, - raw_type, - NSRange { - location: desc.range.base_mip_level as _, - length: mip_level_count as _, - }, - NSRange { - location: desc.range.base_array_layer as _, - length: array_layer_count as _, - }, - ); + autoreleasepool(|_| { + let raw = texture + .raw + .newTextureViewWithPixelFormat_textureType_levels_slices( + raw_format, + raw_type, + NSRange { + location: desc.range.base_mip_level as _, + length: mip_level_count as _, + }, + NSRange { + location: desc.range.base_array_layer as _, + length: array_layer_count as _, + }, + ) + .unwrap(); if let Some(label) = desc.label { - raw.set_label(label); + raw.setLabel(Some(&NSString::from_str(label))); } raw }) @@ -542,12 +570,12 @@ impl crate::Device for super::Device { &self, desc: &crate::SamplerDescriptor, ) -> DeviceResult { - objc::rc::autoreleasepool(|| { - let descriptor = metal::SamplerDescriptor::new(); + autoreleasepool(|_| { + let descriptor = MTLSamplerDescriptor::new(); - descriptor.set_min_filter(conv::map_filter_mode(desc.min_filter)); - descriptor.set_mag_filter(conv::map_filter_mode(desc.mag_filter)); - descriptor.set_mip_filter(match desc.mipmap_filter { + descriptor.setMinFilter(conv::map_filter_mode(desc.min_filter)); + descriptor.setMagFilter(conv::map_filter_mode(desc.mag_filter)); + descriptor.setMipFilter(match desc.mipmap_filter { wgt::FilterMode::Nearest if desc.lod_clamp == (0.0..0.0) => { MTLSamplerMipFilter::NotMipmapped } @@ -556,45 +584,50 @@ impl crate::Device for super::Device { }); let [s, t, r] = desc.address_modes; - descriptor.set_address_mode_s(conv::map_address_mode(s)); - descriptor.set_address_mode_t(conv::map_address_mode(t)); - descriptor.set_address_mode_r(conv::map_address_mode(r)); + descriptor.setSAddressMode(conv::map_address_mode(s)); + descriptor.setTAddressMode(conv::map_address_mode(t)); + descriptor.setRAddressMode(conv::map_address_mode(r)); // Anisotropy is always supported on mac up to 16x - descriptor.set_max_anisotropy(desc.anisotropy_clamp as _); + descriptor.setMaxAnisotropy(desc.anisotropy_clamp as _); - descriptor.set_lod_min_clamp(desc.lod_clamp.start); - descriptor.set_lod_max_clamp(desc.lod_clamp.end); + descriptor.setLodMinClamp(desc.lod_clamp.start); + descriptor.setLodMaxClamp(desc.lod_clamp.end); if let Some(fun) = desc.compare { - descriptor.set_compare_function(conv::map_compare_function(fun)); + descriptor.setCompareFunction(conv::map_compare_function(fun)); } if let Some(border_color) = desc.border_color { if let wgt::SamplerBorderColor::Zero = border_color { if s == wgt::AddressMode::ClampToBorder { - descriptor.set_address_mode_s(MTLSamplerAddressMode::ClampToZero); + descriptor.setSAddressMode(MTLSamplerAddressMode::ClampToZero); } if t == wgt::AddressMode::ClampToBorder { - descriptor.set_address_mode_t(MTLSamplerAddressMode::ClampToZero); + descriptor.setTAddressMode(MTLSamplerAddressMode::ClampToZero); } if r == wgt::AddressMode::ClampToBorder { - descriptor.set_address_mode_r(MTLSamplerAddressMode::ClampToZero); + descriptor.setRAddressMode(MTLSamplerAddressMode::ClampToZero); } } else { - descriptor.set_border_color(conv::map_border_color(border_color)); + descriptor.setBorderColor(conv::map_border_color(border_color)); } } if let Some(label) = desc.label { - descriptor.set_label(label); + descriptor.setLabel(Some(&NSString::from_str(label))); } if self.features.contains(wgt::Features::TEXTURE_BINDING_ARRAY) { - descriptor.set_support_argument_buffers(true); + descriptor.setSupportArgumentBuffers(true); } - let raw = self.shared.device.lock().new_sampler(&descriptor); + let raw = self + .shared + .device + .lock() + .newSamplerStateWithDescriptor(&descriptor) + .unwrap(); self.counters.samplers.add(1); @@ -746,7 +779,10 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::Atomic => true, }; } - wgt::BindingType::AccelerationStructure { .. } => unimplemented!(), + wgt::BindingType::AccelerationStructure { .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + } wgt::BindingType::ExternalTexture => unimplemented!(), } } @@ -826,7 +862,7 @@ impl crate::Device for super::Device { super::AccelerationStructure, >, ) -> DeviceResult { - objc::rc::autoreleasepool(|| { + autoreleasepool(|_| { let mut bg = super::BindGroup::default(); for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { let stage_bit = map_naga_stage(stage); @@ -853,15 +889,20 @@ impl crate::Device for super::Device { let uses = conv::map_resource_usage(&layout.ty); // Create argument buffer for this array - let buffer = self.shared.device.lock().new_buffer( - 8 * count as u64, - MTLResourceOptions::HazardTrackingModeUntracked - | MTLResourceOptions::StorageModeShared, - ); + let buffer = self + .shared + .device + .lock() + .newBufferWithLength_options( + 8 * count as usize, + MTLResourceOptions::HazardTrackingModeUntracked + | MTLResourceOptions::StorageModeShared, + ) + .unwrap(); let contents: &mut [MTLResourceID] = unsafe { core::slice::from_raw_parts_mut( - buffer.contents().cast(), + buffer.contents().cast().as_ptr(), count as usize, ) }; @@ -874,7 +915,7 @@ impl crate::Device for super::Device { let textures = &desc.textures[start..end]; for (idx, tex) in textures.iter().enumerate() { - contents[idx] = tex.view.raw.gpu_resource_id(); + contents[idx] = tex.view.raw.gpuResourceID(); let use_info = bg .resources_to_use @@ -892,18 +933,44 @@ impl crate::Device for super::Device { let samplers = &desc.samplers[start..end]; for (idx, &sampler) in samplers.iter().enumerate() { - contents[idx] = sampler.raw.gpu_resource_id(); + contents[idx] = sampler.raw.gpuResourceID(); // Samplers aren't resources like buffers and textures, so don't // need to be passed to useResource } } + wgt::BindingType::AccelerationStructure { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let acceleration_structures = + &desc.acceleration_structures[start..end]; + + for (idx, &acceleration_structure) in + acceleration_structures.iter().enumerate() + { + contents[idx] = acceleration_structure.raw.gpuResourceID(); + + let super::BufferLikeResource::AccelerationStructure( + structure, + _residency_set, + ) = acceleration_structure.as_raw() + else { + unreachable!(); + }; + let use_info = + bg.resources_to_use.entry(structure.cast()).or_default(); + use_info.stages |= stages; + use_info.uses |= uses; + use_info.visible_in_compute |= + layout.visibility.contains(wgt::ShaderStages::COMPUTE); + } + } _ => { unimplemented!(); } } - bg.buffers.push(super::BufferResource { - ptr: unsafe { NonNull::new_unchecked(buffer.as_ptr()) }, + bg.buffers.push(super::BufferLikeResource::Buffer { + ptr: NonNull::from(&*buffer), offset: 0, dynamic_index: None, binding_size: None, @@ -946,7 +1013,7 @@ impl crate::Device for super::Device { } _ => None, }; - super::BufferResource { + super::BufferLikeResource::Buffer { ptr: source.buffer.as_raw(), offset: source.offset, dynamic_index: if has_dynamic_offset { @@ -979,7 +1046,16 @@ impl crate::Device for super::Device { ); counter.textures += 1; } - wgt::BindingType::AccelerationStructure { .. } => unimplemented!(), + wgt::BindingType::AccelerationStructure { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.buffers.extend( + desc.acceleration_structures[start..end].iter().map( + |acceleration_structure| acceleration_structure.as_raw(), + ), + ); + counter.buffers += 1; + } wgt::BindingType::ExternalTexture => unimplemented!(), } } @@ -1013,17 +1089,22 @@ impl crate::Device for super::Device { entry_point, num_workgroups, } => { - let options = metal::CompileOptions::new(); + let options = MTLCompileOptions::new(); // Obtain the locked device from shared let device = self.shared.device.lock(); let library = device - .new_library_with_source(&source, &options) + .newLibraryWithSource_options_error( + &NSString::from_str(&source), + Some(&options), + ) .map_err(|e| crate::ShaderError::Compilation(format!("MSL: {e:?}")))?; - let function = library.get_function(&entry_point, None).map_err(|_| { - crate::ShaderError::Compilation(format!( - "Entry point '{entry_point}' not found" - )) - })?; + let function = library + .newFunctionWithName(&NSString::from_str(&entry_point)) + .ok_or_else(|| { + crate::ShaderError::Compilation(format!( + "Entry point '{entry_point}' not found" + )) + })?; Ok(super::ShaderModule { source: ShaderModuleSource::Passthrough(PassthroughShader { @@ -1064,8 +1145,8 @@ impl crate::Device for super::Device { crate::VertexProcessor::Mesh { .. } => unreachable!(), }; - objc::rc::autoreleasepool(|| { - let descriptor = metal::RenderPipelineDescriptor::new(); + autoreleasepool(|_| { + let descriptor = MTLRenderPipelineDescriptor::new(); let raw_triangle_fill_mode = match desc.primitive.polygon_mode { wgt::PolygonMode::Fill => MTLTriangleFillMode::Fill, @@ -1118,10 +1199,10 @@ impl crate::Device for super::Device { naga::ShaderStage::Vertex, )?; - descriptor.set_vertex_function(Some(&vs.function)); + descriptor.setVertexFunction(Some(&vs.function)); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( - descriptor.vertex_buffers().unwrap(), + &descriptor.vertexBuffers(), vs.immutable_buffer_mask, ); } @@ -1147,10 +1228,10 @@ impl crate::Device for super::Device { naga::ShaderStage::Fragment, )?; - descriptor.set_fragment_function(Some(&fs.function)); + descriptor.setFragmentFunction(Some(&fs.function)); if self.shared.private_caps.supports_mutability { Self::set_buffers_mutability( - descriptor.fragment_buffers().unwrap(), + &descriptor.fragmentBuffers(), fs.immutable_buffer_mask, ); } @@ -1168,37 +1249,37 @@ impl crate::Device for super::Device { // TODO: This is a workaround for what appears to be a Metal validation bug // A pixel format is required even though no attachments are provided if desc.color_targets.is_empty() && desc.depth_stencil.is_none() { - descriptor.set_depth_attachment_pixel_format(MTLPixelFormat::Depth32Float); + descriptor.setDepthAttachmentPixelFormat(MTLPixelFormat::Depth32Float); } (None, None) } }; for (i, ct) in desc.color_targets.iter().enumerate() { - let at_descriptor = descriptor.color_attachments().object_at(i as u64).unwrap(); + let at_descriptor = descriptor.colorAttachments().objectAtIndexedSubscript(i); let ct = if let Some(color_target) = ct.as_ref() { color_target } else { - at_descriptor.set_pixel_format(MTLPixelFormat::Invalid); + at_descriptor.setPixelFormat(MTLPixelFormat::Invalid); continue; }; let raw_format = self.shared.private_caps.map_format(ct.format); - at_descriptor.set_pixel_format(raw_format); - at_descriptor.set_write_mask(conv::map_color_write(ct.write_mask)); + at_descriptor.setPixelFormat(raw_format); + at_descriptor.setWriteMask(conv::map_color_write(ct.write_mask)); if let Some(ref blend) = ct.blend { - at_descriptor.set_blending_enabled(true); + at_descriptor.setBlendingEnabled(true); let (color_op, color_src, color_dst) = conv::map_blend_component(&blend.color); let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_component(&blend.alpha); - at_descriptor.set_rgb_blend_operation(color_op); - at_descriptor.set_source_rgb_blend_factor(color_src); - at_descriptor.set_destination_rgb_blend_factor(color_dst); + at_descriptor.setRgbBlendOperation(color_op); + at_descriptor.setSourceRGBBlendFactor(color_src); + at_descriptor.setDestinationRGBBlendFactor(color_dst); - at_descriptor.set_alpha_blend_operation(alpha_op); - at_descriptor.set_source_alpha_blend_factor(alpha_src); - at_descriptor.set_destination_alpha_blend_factor(alpha_dst); + at_descriptor.setAlphaBlendOperation(alpha_op); + at_descriptor.setSourceAlphaBlendFactor(alpha_src); + at_descriptor.setDestinationAlphaBlendFactor(alpha_dst); } } @@ -1207,10 +1288,10 @@ impl crate::Device for super::Device { let raw_format = self.shared.private_caps.map_format(ds.format); let aspects = crate::FormatAspects::from(ds.format); if aspects.contains(crate::FormatAspects::DEPTH) { - descriptor.set_depth_attachment_pixel_format(raw_format); + descriptor.setDepthAttachmentPixelFormat(raw_format); } if aspects.contains(crate::FormatAspects::STENCIL) { - descriptor.set_stencil_attachment_pixel_format(raw_format); + descriptor.setStencilAttachmentPixelFormat(raw_format); } let ds_descriptor = create_depth_stencil_desc(ds); @@ -1218,7 +1299,8 @@ impl crate::Device for super::Device { .shared .device .lock() - .new_depth_stencil_state(&ds_descriptor); + .newDepthStencilStateWithDescriptor(&ds_descriptor) + .unwrap(); Some((raw, ds.bias)) } None => None, @@ -1239,11 +1321,13 @@ impl crate::Device for super::Device { } if !desc_vertex_buffers.is_empty() { - let vertex_descriptor = metal::VertexDescriptor::new(); + let vertex_descriptor = MTLVertexDescriptor::new(); for (i, vb) in desc_vertex_buffers.iter().enumerate() { let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - i as u64; - let buffer_desc = vertex_descriptor.layouts().object_at(buffer_index).unwrap(); + let buffer_desc = vertex_descriptor + .layouts() + .objectAtIndexedSubscript(buffer_index as usize); // Metal expects the stride to be the actual size of the attributes. // The semantics of array_stride == 0 can be achieved by setting @@ -1255,44 +1339,43 @@ impl crate::Device for super::Device { .map(|attribute| attribute.offset + attribute.format.size()) .max() .unwrap_or(0); - buffer_desc.set_stride(wgt::math::align_to(stride, 4)); - buffer_desc.set_step_function(MTLVertexStepFunction::Constant); - buffer_desc.set_step_rate(0); + buffer_desc.setStride(wgt::math::align_to(stride as usize, 4)); + buffer_desc.setStepFunction(MTLVertexStepFunction::Constant); + buffer_desc.setStepRate(0); } else { - buffer_desc.set_stride(vb.array_stride); - buffer_desc.set_step_function(conv::map_step_mode(vb.step_mode)); + buffer_desc.setStride(vb.array_stride as usize); + buffer_desc.setStepFunction(conv::map_step_mode(vb.step_mode)); } for at in vb.attributes { let attribute_desc = vertex_descriptor .attributes() - .object_at(at.shader_location as u64) - .unwrap(); - attribute_desc.set_format(conv::map_vertex_format(at.format)); - attribute_desc.set_buffer_index(buffer_index); - attribute_desc.set_offset(at.offset); + .objectAtIndexedSubscript(at.shader_location as usize); + attribute_desc.setFormat(conv::map_vertex_format(at.format)); + attribute_desc.setBufferIndex(buffer_index as usize); + attribute_desc.setOffset(at.offset as usize); } } - descriptor.set_vertex_descriptor(Some(vertex_descriptor)); + descriptor.setVertexDescriptor(Some(&vertex_descriptor)); } if desc.multisample.count != 1 { //TODO: handle sample mask - descriptor.set_sample_count(desc.multisample.count as u64); - descriptor - .set_alpha_to_coverage_enabled(desc.multisample.alpha_to_coverage_enabled); + #[allow(deprecated)] + descriptor.setSampleCount(desc.multisample.count as usize); + descriptor.setAlphaToCoverageEnabled(desc.multisample.alpha_to_coverage_enabled); //descriptor.set_alpha_to_one_enabled(desc.multisample.alpha_to_one_enabled); } if let Some(name) = desc.label { - descriptor.set_label(name); + descriptor.setLabel(Some(&NSString::from_str(name))); } let raw = self .shared .device .lock() - .new_render_pipeline_state(&descriptor) + .newRenderPipelineStateWithDescriptor_error(&descriptor) .map_err(|e| { crate::PipelineError::Linkage( wgt::ShaderStages::VERTEX | wgt::ShaderStages::FRAGMENT, @@ -1338,19 +1421,19 @@ impl crate::Device for super::Device { super::PipelineCache, >, ) -> Result { - objc::rc::autoreleasepool(|| { - let descriptor = metal::ComputePipelineDescriptor::new(); + autoreleasepool(|_| { + let descriptor = MTLComputePipelineDescriptor::new(); let module = desc.stage.module; let cs = if let ShaderModuleSource::Passthrough(desc) = &module.source { CompiledShader { library: desc.library.clone(), function: desc.function.clone(), - wg_size: MTLSize::new( - desc.num_workgroups.0 as u64, - desc.num_workgroups.1 as u64, - desc.num_workgroups.2 as u64, - ), + wg_size: MTLSize { + width: desc.num_workgroups.0 as usize, + height: desc.num_workgroups.1 as usize, + depth: desc.num_workgroups.2 as usize, + }, wg_memory_sizes: vec![], sized_bindings: vec![], immutable_buffer_mask: 0, @@ -1365,13 +1448,10 @@ impl crate::Device for super::Device { )? }; - descriptor.set_compute_function(Some(&cs.function)); + descriptor.setComputeFunction(Some(&cs.function)); if self.shared.private_caps.supports_mutability { - Self::set_buffers_mutability( - descriptor.buffers().unwrap(), - cs.immutable_buffer_mask, - ); + Self::set_buffers_mutability(&descriptor.buffers(), cs.immutable_buffer_mask); } let cs_info = super::PipelineStageInfo { @@ -1382,15 +1462,17 @@ impl crate::Device for super::Device { }; if let Some(name) = desc.label { - descriptor.set_label(name); + descriptor.setLabel(Some(&NSString::from_str(name))); } - let raw = self - .shared - .device - .lock() - .new_compute_pipeline_state(&descriptor) - .map_err(|e| { + // TODO: `newComputePipelineStateWithDescriptor:error:` is not exposed on + // `MTLDevice`, is this always correct? + let raw = unsafe { + msg_send![&**self.shared.device.lock(), newComputePipelineStateWithDescriptor: &*descriptor, error: _] + }; + + let raw: Retained> = + raw.map_err(|e: Retained| { crate::PipelineError::Linkage( wgt::ShaderStages::COMPUTE, format!("new_compute_pipeline_state: {e:?}"), @@ -1425,15 +1507,20 @@ impl crate::Device for super::Device { &self, desc: &wgt::QuerySetDescriptor, ) -> DeviceResult { - objc::rc::autoreleasepool(|| { + autoreleasepool(|_| { match desc.ty { wgt::QueryType::Occlusion => { let size = desc.count as u64 * crate::QUERY_SIZE; let options = MTLResourceOptions::empty(); //TODO: HazardTrackingModeUntracked - let raw_buffer = self.shared.device.lock().new_buffer(size, options); + let raw_buffer = self + .shared + .device + .lock() + .newBufferWithLength_options(size as usize, options) + .unwrap(); if let Some(label) = desc.label { - raw_buffer.set_label(label); + raw_buffer.setLabel(Some(&NSString::from_str(label))); } Ok(super::QuerySet { raw_buffer, @@ -1444,28 +1531,32 @@ impl crate::Device for super::Device { wgt::QueryType::Timestamp => { let size = desc.count as u64 * crate::QUERY_SIZE; let device = self.shared.device.lock(); - let destination_buffer = device.new_buffer(size, MTLResourceOptions::empty()); + let destination_buffer = device + .newBufferWithLength_options(size as usize, MTLResourceOptions::empty()) + .unwrap(); - let csb_desc = metal::CounterSampleBufferDescriptor::new(); - csb_desc.set_storage_mode(MTLStorageMode::Shared); - csb_desc.set_sample_count(desc.count as _); + let csb_desc = MTLCounterSampleBufferDescriptor::new(); + csb_desc.setStorageMode(MTLStorageMode::Shared); + csb_desc.setSampleCount(desc.count as _); if let Some(label) = desc.label { - csb_desc.set_label(label); + csb_desc.setLabel(&NSString::from_str(label)); } - let counter_sets = device.counter_sets(); - let timestamp_counter = - match counter_sets.iter().find(|cs| cs.name() == "timestamp") { - Some(counter) => counter, - None => { - log::error!("Failed to obtain timestamp counter set."); - return Err(crate::DeviceError::Unexpected); - } - }; - csb_desc.set_counter_set(timestamp_counter); + let counter_sets = device.counterSets().unwrap(); + let timestamp_counter = match counter_sets + .iter() + .find(|cs| &*cs.name() == ns_string!("timestamp")) + { + Some(counter) => counter, + None => { + log::error!("Failed to obtain timestamp counter set."); + return Err(crate::DeviceError::Unexpected); + } + }; + csb_desc.setCounterSet(Some(×tamp_counter)); let counter_sample_buffer = - match device.new_counter_sample_buffer_with_descriptor(&csb_desc) { + match device.newCounterSampleBufferWithDescriptor_error(&csb_desc) { Ok(buffer) => buffer, Err(err) => { log::error!("Failed to create counter sample buffer: {err:?}"); @@ -1495,7 +1586,7 @@ impl crate::Device for super::Device { unsafe fn create_fence(&self) -> DeviceResult { self.counters.fences.add(1); let shared_event = if self.shared.private_caps.supports_shared_event { - Some(self.shared.device.lock().new_shared_event()) + Some(self.shared.device.lock().newSharedEvent().unwrap()) } else { None }; @@ -1558,52 +1649,113 @@ impl crate::Device for super::Device { return false; } let device = self.shared.device.lock(); - let shared_capture_manager = metal::CaptureManager::shared(); - let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device); - shared_capture_manager.set_default_capture_scope(&default_capture_scope); - shared_capture_manager.start_capture_with_scope(&default_capture_scope); - default_capture_scope.begin_scope(); + let shared_capture_manager = MTLCaptureManager::sharedCaptureManager(); + let default_capture_scope = shared_capture_manager.newCaptureScopeWithDevice(&device); + shared_capture_manager.setDefaultCaptureScope(Some(&default_capture_scope)); + #[allow(deprecated)] + shared_capture_manager.startCaptureWithScope(&default_capture_scope); + default_capture_scope.beginScope(); true } unsafe fn stop_graphics_debugger_capture(&self) { - let shared_capture_manager = metal::CaptureManager::shared(); - if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() { - default_capture_scope.end_scope(); + let shared_capture_manager = MTLCaptureManager::sharedCaptureManager(); + if let Some(default_capture_scope) = shared_capture_manager.defaultCaptureScope() { + default_capture_scope.endScope(); } - shared_capture_manager.stop_capture(); + shared_capture_manager.stopCapture(); } unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + descriptor: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { - unimplemented!() + let acceleration_structure_descriptor = + conv::map_acceleration_structure_descriptor(descriptor.entries, descriptor.flags); + let device = self.shared.device.lock(); + let info = + device.accelerationStructureSizesWithDescriptor(&acceleration_structure_descriptor); + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: info.accelerationStructureSize as u64, + update_scratch_size: info.refitScratchBufferSize as u64, + build_scratch_size: info.buildScratchBufferSize as u64, + } } unsafe fn get_acceleration_structure_device_address( &self, - _acceleration_structure: &super::AccelerationStructure, + acceleration_structure: &super::AccelerationStructure, ) -> wgt::BufferAddress { - unimplemented!() + acceleration_structure.raw.gpuResourceID().to_raw() } unsafe fn create_acceleration_structure( &self, - _desc: &crate::AccelerationStructureDescriptor, + descriptor: &crate::AccelerationStructureDescriptor, ) -> Result { - unimplemented!() + // self.counters.acceleration_structures.add(1); + let device = self.shared.device.lock(); + autoreleasepool(|_| { + let residency_set_descriptor = MTLResidencySetDescriptor::new(); + residency_set_descriptor.setInitialCapacity(1); + Ok(super::AccelerationStructure { + raw: device + .newAccelerationStructureWithSize(descriptor.size as usize) + .ok_or(crate::DeviceError::OutOfMemory)?, + residency_set: device + .newResidencySetWithDescriptor_error(&residency_set_descriptor) + .map_err(|err| { + log::error!("Failed to create residency set: {err:?}"); + crate::DeviceError::Unexpected + })?, + }) + }) } unsafe fn destroy_acceleration_structure( &self, - _acceleration_structure: super::AccelerationStructure, + acceleration_structure: super::AccelerationStructure, ) { - unimplemented!() + // self.counters.acceleration_structures.sub(1); + acceleration_structure.residency_set.endResidency(); } - fn tlas_instance_to_bytes(&self, _instance: TlasInstance) -> Vec { - unimplemented!() + fn tlas_instance_to_bytes(&self, instance: TlasInstance) -> Vec { + let temp = MTLIndirectAccelerationStructureInstanceDescriptor { + transformationMatrix: MTLPackedFloat4x3 { + columns: [ + MTLPackedFloat3 { + x: instance.transform[0], + y: instance.transform[4], + z: instance.transform[8], + }, + MTLPackedFloat3 { + x: instance.transform[1], + y: instance.transform[5], + z: instance.transform[9], + }, + MTLPackedFloat3 { + x: instance.transform[2], + y: instance.transform[6], + z: instance.transform[10], + }, + MTLPackedFloat3 { + x: instance.transform[3], + y: instance.transform[7], + z: instance.transform[11], + }, + ], + }, + options: MTLAccelerationStructureInstanceOptions::None, + mask: instance.mask as u32, + intersectionFunctionTableOffset: 0, + userID: instance.custom_data, + accelerationStructureID: unsafe { MTLResourceID::from_raw(instance.blas_address) }, + }; + + wgt::bytemuck_wrapper!(unsafe struct Desc(MTLIndirectAccelerationStructureInstanceDescriptor)); + + bytemuck::bytes_of(&Desc::wrap(temp)).to_vec() } fn get_internal_counters(&self) -> wgt::HalCounters { diff --git a/wgpu-hal/src/metal/layer_observer.rs b/wgpu-hal/src/metal/layer_observer.rs deleted file mode 100644 index 8acd83b5314..00000000000 --- a/wgpu-hal/src/metal/layer_observer.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! A rewrite of `raw-window-metal` using `objc` instead of `objc2`. -//! -//! See that for details: -//! -//! This should be temporary, see . - -use core::ffi::{c_void, CStr}; -use core_graphics_types::base::CGFloat; -use core_graphics_types::geometry::CGRect; -use objc::declare::ClassDecl; -use objc::rc::StrongPtr; -use objc::runtime::{Class, Object, Sel, BOOL, NO}; -use objc::{class, msg_send, sel, sel_impl}; -use std::sync::OnceLock; - -#[link(name = "Foundation", kind = "framework")] -extern "C" { - static NSKeyValueChangeNewKey: &'static Object; -} - -#[allow(non_upper_case_globals)] -const NSKeyValueObservingOptionNew: usize = 0x01; -#[allow(non_upper_case_globals)] -const NSKeyValueObservingOptionInitial: usize = 0x04; - -const CONTENTS_SCALE: &CStr = c"contentsScale"; -const BOUNDS: &CStr = c"bounds"; - -/// Create a new custom layer that tracks parameters from the given super layer. -/// -/// Same as . -pub unsafe fn new_observer_layer(root_layer: *mut Object) -> StrongPtr { - let this: *mut Object = unsafe { msg_send![class(), new] }; - - // Add the layer as a sublayer of the root layer. - let _: () = unsafe { msg_send![root_layer, addSublayer: this] }; - - // Register for key-value observing. - let key_path: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: CONTENTS_SCALE.as_ptr()] }; - let _: () = unsafe { - msg_send![ - root_layer, - addObserver: this - forKeyPath: key_path - options: NSKeyValueObservingOptionNew | NSKeyValueObservingOptionInitial - context: context_ptr() - ] - }; - - let key_path: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: BOUNDS.as_ptr()] }; - let _: () = unsafe { - msg_send![ - root_layer, - addObserver: this - forKeyPath: key_path - options: NSKeyValueObservingOptionNew | NSKeyValueObservingOptionInitial - context: context_ptr() - ] - }; - - // Uncomment when debugging resize issues. - // extern "C" { - // static kCAGravityTopLeft: *mut Object; - // } - // let _: () = unsafe { msg_send![this, setContentsGravity: kCAGravityTopLeft] }; - - unsafe { StrongPtr::new(this) } -} - -/// Same as . -fn class() -> &'static Class { - static CLASS: OnceLock<&'static Class> = OnceLock::new(); - - CLASS.get_or_init(|| { - let superclass = class!(CAMetalLayer); - let class_name = format!("WgpuObserverLayer@{:p}", &CLASS); - let mut decl = ClassDecl::new(&class_name, superclass).unwrap(); - - // From NSKeyValueObserving. - let sel = sel!(observeValueForKeyPath:ofObject:change:context:); - let method: extern "C" fn( - &Object, - Sel, - *mut Object, - *mut Object, - *mut Object, - *mut c_void, - ) = observe_value; - unsafe { decl.add_method(sel, method) }; - - let sel = sel!(dealloc); - let method: extern "C" fn(&Object, Sel) = dealloc; - unsafe { decl.add_method(sel, method) }; - - decl.register() - }) -} - -/// The unique context pointer for this class. -fn context_ptr() -> *mut c_void { - let ptr: *const Class = class(); - ptr.cast_mut().cast() -} - -/// Same as . -extern "C" fn observe_value( - this: &Object, - _cmd: Sel, - key_path: *mut Object, - object: *mut Object, - change: *mut Object, - context: *mut c_void, -) { - // An unrecognized context must belong to the super class. - if context != context_ptr() { - // SAFETY: The signature is correct, and it's safe to forward to - // the superclass' method when we're overriding the method. - return unsafe { - msg_send![ - super(this, class!(CAMetalLayer)), - observeValueForKeyPath: key_path - ofObject: object - change: change - context: context - ] - }; - } - - assert!(!change.is_null()); - - let key = unsafe { NSKeyValueChangeNewKey }; - let new: *mut Object = unsafe { msg_send![change, objectForKey: key] }; - assert!(!new.is_null()); - - let to_compare: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: CONTENTS_SCALE.as_ptr()] }; - let is_equal: BOOL = unsafe { msg_send![key_path, isEqual: to_compare] }; - if is_equal != NO { - // `contentsScale` is a CGFloat, and so the observed value is always a NSNumber. - let scale_factor: CGFloat = if cfg!(target_pointer_width = "64") { - unsafe { msg_send![new, doubleValue] } - } else { - unsafe { msg_send![new, floatValue] } - }; - - // Set the scale factor of the layer to match the root layer. - let _: () = unsafe { msg_send![this, setContentsScale: scale_factor] }; - return; - } - - let to_compare: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: BOUNDS.as_ptr()] }; - let is_equal: BOOL = unsafe { msg_send![key_path, isEqual: to_compare] }; - if is_equal != NO { - // `bounds` is a CGRect, and so the observed value is always a NSNumber. - let bounds: CGRect = unsafe { msg_send![new, rectValue] }; - - // Set `bounds` and `position` to match the root layer. - // - // This differs from just setting the `bounds`, as it also takes into account any - // translation that the superlayer may have that we'd want to preserve. - let _: () = unsafe { msg_send![this, setFrame: bounds] }; - return; - } - - panic!("unknown observed keypath {key_path:?}"); -} - -extern "C" fn dealloc(this: &Object, _cmd: Sel) { - // Load the root layer if it still exists, and deregister the observer. - // - // This is not entirely sound, as the ObserverLayer _could_ have been - // moved to another layer; but Wgpu doesn't do that, so it should be fine. - // - // `raw-window-metal` uses a weak instance variable to do it correctly: - // https://docs.rs/raw-window-metal/1.1.0/src/raw_window_metal/observer.rs.html#74-132 - // (but that's difficult to do with `objc`). - let root_layer: *mut Object = unsafe { msg_send![this, superlayer] }; - if !root_layer.is_null() { - let key_path: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: CONTENTS_SCALE.as_ptr()] }; - let _: () = unsafe { msg_send![root_layer, removeObserver: this forKeyPath: key_path] }; - - let key_path: *const Object = - unsafe { msg_send![class!(NSString), stringWithUTF8String: BOUNDS.as_ptr()] }; - let _: () = unsafe { msg_send![root_layer, removeObserver: this forKeyPath: key_path] }; - } -} diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 00223b2f778..5f4c7b716a8 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -12,6 +12,9 @@ resources, followed by other bind groups. The vertex buffers are bound at the ve end of the VS buffer table. !*/ +// Avoid noise, many objc2-metal functions are still `unsafe`. +// See also https://github.com/madsmtm/objc2/issues/685. +#![allow(unsafe_op_in_unsafe_fn)] // `MTLFeatureSet` is superseded by `MTLGpuFamily`. // However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13, @@ -22,23 +25,36 @@ mod adapter; mod command; mod conv; mod device; -mod layer_observer; mod surface; mod time; -use alloc::{borrow::ToOwned as _, string::String, sync::Arc, vec::Vec}; +use alloc::{ + string::{String, ToString as _}, + sync::Arc, + vec::Vec, +}; use core::{fmt, iter, ops, ptr::NonNull, sync::atomic}; use arrayvec::ArrayVec; use bitflags::bitflags; -use hashbrown::HashMap; -use metal::{ - foreign_types::ForeignTypeRef as _, MTLArgumentBuffersTier, MTLBuffer, MTLCommandBufferStatus, - MTLCullMode, MTLDepthClipMode, MTLIndexType, MTLLanguageVersion, MTLPrimitiveType, - MTLReadWriteTextureTier, MTLRenderStages, MTLResource, MTLResourceUsage, MTLSamplerState, - MTLSize, MTLTexture, MTLTextureType, MTLTriangleFillMode, MTLWinding, -}; +use hashbrown::{HashMap, HashSet}; use naga::FastHashMap; +use objc2::{ + rc::{autoreleasepool, Retained}, + runtime::ProtocolObject, +}; +use objc2_foundation::ns_string; +use objc2_metal::{ + MTLAccelerationStructure, MTLAccelerationStructureCommandEncoder, MTLArgumentBuffersTier, + MTLBlitCommandEncoder, MTLBuffer, MTLCommandBuffer, MTLCommandBufferStatus, MTLCommandQueue, + MTLComputeCommandEncoder, MTLComputePipelineState, MTLCounterSampleBuffer, MTLCullMode, + MTLDepthClipMode, MTLDepthStencilState, MTLDevice, MTLDrawable, MTLFunction, MTLIndexType, + MTLLanguageVersion, MTLLibrary, MTLPrimitiveType, MTLReadWriteTextureTier, + MTLRenderCommandEncoder, MTLRenderPipelineState, MTLRenderStages, MTLResidencySet, MTLResource, + MTLResourceUsage, MTLSamplerState, MTLSharedEvent, MTLSize, MTLTexture, MTLTextureType, + MTLTriangleFillMode, MTLWinding, +}; +use objc2_quartz_core::CAMetalLayer; use parking_lot::{Mutex, RwLock}; #[derive(Clone, Debug)] @@ -105,8 +121,8 @@ crate::impl_dyn_resource!( pub struct Instance {} impl Instance { - pub fn create_surface_from_layer(&self, layer: &metal::MetalLayerRef) -> Surface { - unsafe { Surface::from_layer(layer) } + pub fn create_surface_from_layer(&self, layer: &CAMetalLayer) -> Surface { + Surface::from_layer(layer) } } @@ -125,30 +141,38 @@ impl crate::Instance for Instance { _display_handle: raw_window_handle::RawDisplayHandle, window_handle: raw_window_handle::RawWindowHandle, ) -> Result { - match window_handle { - #[cfg(any(target_os = "ios", target_os = "visionos"))] - raw_window_handle::RawWindowHandle::UiKit(handle) => { - Ok(unsafe { Surface::from_view(handle.ui_view.cast()) }) - } - #[cfg(target_os = "macos")] - raw_window_handle::RawWindowHandle::AppKit(handle) => { - Ok(unsafe { Surface::from_view(handle.ns_view.cast()) }) + let layer = match window_handle { + raw_window_handle::RawWindowHandle::AppKit(handle) => unsafe { + raw_window_metal::Layer::from_ns_view(handle.ns_view) + }, + raw_window_handle::RawWindowHandle::UiKit(handle) => unsafe { + raw_window_metal::Layer::from_ui_view(handle.ui_view) + }, + _ => { + return Err(crate::InstanceError::new(format!( + "window handle {window_handle:?} is not a Metal-compatible handle" + ))) } - _ => Err(crate::InstanceError::new(format!( - "window handle {window_handle:?} is not a Metal-compatible handle" - ))), - } + }; + + // SAFETY: The layer is an initialized instance of `CAMetalLayer`, and + // we transfer the retain count to `Retained` using `into_raw`. + let layer = unsafe { + Retained::from_raw(layer.into_raw().cast::().as_ptr()).unwrap() + }; + + Ok(Surface::new(layer)) } unsafe fn enumerate_adapters( &self, _surface_hint: Option<&Surface>, ) -> Vec> { - let devices = metal::Device::all(); + let devices = objc2_metal::MTLCopyAllDevices(); let mut adapters: Vec> = devices .into_iter() .map(|dev| { - let name = dev.name().into(); + let name = dev.name().to_string(); let shared = AdapterShared::new(dev); crate::ExposedAdapter { info: wgt::AdapterInfo { @@ -300,6 +324,7 @@ struct PrivateCapabilities { int64_atomics: bool, float_atomics: bool, supports_shared_event: bool, + supports_raytracing: bool, } #[derive(Clone, Debug)] @@ -325,7 +350,7 @@ impl Default for Settings { } struct AdapterShared { - device: Mutex, + device: Mutex>>, disabilities: PrivateDisabilities, private_caps: PrivateCapabilities, settings: Settings, @@ -336,7 +361,7 @@ unsafe impl Send for AdapterShared {} unsafe impl Sync for AdapterShared {} impl AdapterShared { - fn new(device: metal::Device) -> Self { + fn new(device: Retained>) -> Self { let private_caps = PrivateCapabilities::new(&device); log::debug!("{private_caps:#?}"); @@ -355,7 +380,7 @@ pub struct Adapter { } pub struct Queue { - raw: Arc>, + raw: Arc>>>, timestamp_period: f32, } @@ -363,14 +388,17 @@ unsafe impl Send for Queue {} unsafe impl Sync for Queue {} impl Queue { - pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self { + pub unsafe fn queue_from_raw( + raw: Retained>, + timestamp_period: f32, + ) -> Self { Self { raw: Arc::new(Mutex::new(raw)), timestamp_period, } } - pub fn as_raw(&self) -> &Arc> { + pub fn as_raw(&self) -> &Arc>>> { &self.raw } } @@ -382,7 +410,7 @@ pub struct Device { } pub struct Surface { - render_layer: Mutex, + render_layer: Mutex>, swapchain_format: RwLock>, extent: RwLock, // Useful for UI-intensive applications that are sensitive to @@ -396,7 +424,7 @@ unsafe impl Sync for Surface {} #[derive(Debug)] pub struct SurfaceTexture { texture: Texture, - drawable: metal::MetalDrawable, + drawable: Retained>, present_with_transaction: bool, } @@ -426,33 +454,31 @@ impl crate::Queue for Queue { _surface_textures: &[&SurfaceTexture], (signal_fence, signal_value): (&mut Fence, crate::FenceValue), ) -> Result<(), crate::DeviceError> { - objc::rc::autoreleasepool(|| { + autoreleasepool(|_| { let extra_command_buffer = { let completed_value = Arc::clone(&signal_fence.completed_value); - let block = block::ConcreteBlock::new(move |_cmd_buf| { + let block = block2::RcBlock::new(move |_cmd_buf| { completed_value.store(signal_value, atomic::Ordering::Release); - }) - .copy(); + }); + let block: *const _ = &*block; let raw = match command_buffers.last() { - Some(&cmd_buf) => cmd_buf.raw.to_owned(), + Some(&cmd_buf) => cmd_buf.raw.clone(), None => { let queue = self.raw.lock(); - queue - .new_command_buffer_with_unretained_references() - .to_owned() + queue.commandBufferWithUnretainedReferences().unwrap() } }; - raw.set_label("(wgpu internal) Signal"); - raw.add_completed_handler(&block); + raw.setLabel(Some(ns_string!("(wgpu internal) Signal"))); + raw.addCompletedHandler(block.cast_mut()); signal_fence.maintain(); signal_fence .pending_command_buffers - .push((signal_value, raw.to_owned())); + .push((signal_value, raw.clone())); - if let Some(shared_event) = signal_fence.shared_event.as_ref() { - raw.encode_signal_event(shared_event, signal_value); + if let Some(shared_event) = &signal_fence.shared_event { + raw.encodeSignalEvent_value(shared_event.as_ref(), signal_value); } // only return an extra one if it's extra match command_buffers.last() { @@ -463,6 +489,9 @@ impl crate::Queue for Queue { for cmd_buffer in command_buffers { cmd_buffer.raw.commit(); + for residency_set in &cmd_buffer.residency_sets { + residency_set.commit(); + } } if let Some(raw) = extra_command_buffer { @@ -477,19 +506,19 @@ impl crate::Queue for Queue { texture: SurfaceTexture, ) -> Result<(), crate::SurfaceError> { let queue = &self.raw.lock(); - objc::rc::autoreleasepool(|| { - let command_buffer = queue.new_command_buffer(); - command_buffer.set_label("(wgpu internal) Present"); + autoreleasepool(|_| { + let command_buffer = queue.commandBuffer().unwrap(); + command_buffer.setLabel(Some(ns_string!("(wgpu internal) Present"))); // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc if !texture.present_with_transaction { - command_buffer.present_drawable(&texture.drawable); + command_buffer.presentDrawable(&texture.drawable); } command_buffer.commit(); if texture.present_with_transaction { - command_buffer.wait_until_scheduled(); + command_buffer.waitUntilScheduled(); texture.drawable.present(); } }); @@ -503,7 +532,7 @@ impl crate::Queue for Queue { #[derive(Debug)] pub struct Buffer { - raw: metal::Buffer, + raw: Retained>, size: wgt::BufferAddress, } @@ -513,8 +542,8 @@ unsafe impl Sync for Buffer {} impl crate::DynBuffer for Buffer {} impl Buffer { - fn as_raw(&self) -> BufferPtr { - unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + fn as_raw(&self) -> NonNull> { + unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) } } } @@ -529,7 +558,7 @@ impl crate::BufferBinding<'_, Buffer> { #[derive(Debug)] pub struct Texture { - raw: metal::Texture, + raw: Retained>, format: wgt::TextureFormat, raw_type: MTLTextureType, array_layers: u32, @@ -538,10 +567,7 @@ pub struct Texture { } impl Texture { - /// # Safety - /// - /// - The texture handle must not be manually destroyed - pub unsafe fn raw_handle(&self) -> &metal::Texture { + pub fn raw_handle(&self) -> &ProtocolObject { &self.raw } } @@ -553,7 +579,7 @@ unsafe impl Sync for Texture {} #[derive(Debug)] pub struct TextureView { - raw: metal::Texture, + raw: Retained>, aspects: crate::FormatAspects, } @@ -563,14 +589,14 @@ unsafe impl Send for TextureView {} unsafe impl Sync for TextureView {} impl TextureView { - fn as_raw(&self) -> TexturePtr { - unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + fn as_raw(&self) -> NonNull> { + unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) } } } #[derive(Debug)] pub struct Sampler { - raw: metal::SamplerState, + raw: Retained>, } impl crate::DynSampler for Sampler {} @@ -579,8 +605,8 @@ unsafe impl Send for Sampler {} unsafe impl Sync for Sampler {} impl Sampler { - fn as_raw(&self) -> SamplerPtr { - unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + fn as_raw(&self) -> NonNull> { + unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) } } } @@ -676,82 +702,29 @@ pub struct PipelineLayout { impl crate::DynPipelineLayout for PipelineLayout {} -trait AsNative { - type Native; - fn from(native: &Self::Native) -> Self; - fn as_native(&self) -> &Self::Native; -} - -type ResourcePtr = NonNull; -type BufferPtr = NonNull; -type TexturePtr = NonNull; -type SamplerPtr = NonNull; - -impl AsNative for ResourcePtr { - type Native = metal::ResourceRef; - #[inline] - fn from(native: &Self::Native) -> Self { - unsafe { NonNull::new_unchecked(native.as_ptr()) } - } - #[inline] - fn as_native(&self) -> &Self::Native { - unsafe { Self::Native::from_ptr(self.as_ptr()) } - } -} - -impl AsNative for BufferPtr { - type Native = metal::BufferRef; - #[inline] - fn from(native: &Self::Native) -> Self { - unsafe { NonNull::new_unchecked(native.as_ptr()) } - } - #[inline] - fn as_native(&self) -> &Self::Native { - unsafe { Self::Native::from_ptr(self.as_ptr()) } - } -} - -impl AsNative for TexturePtr { - type Native = metal::TextureRef; - #[inline] - fn from(native: &Self::Native) -> Self { - unsafe { NonNull::new_unchecked(native.as_ptr()) } - } - #[inline] - fn as_native(&self) -> &Self::Native { - unsafe { Self::Native::from_ptr(self.as_ptr()) } - } -} - -impl AsNative for SamplerPtr { - type Native = metal::SamplerStateRef; - #[inline] - fn from(native: &Self::Native) -> Self { - unsafe { NonNull::new_unchecked(native.as_ptr()) } - } - #[inline] - fn as_native(&self) -> &Self::Native { - unsafe { Self::Native::from_ptr(self.as_ptr()) } - } -} - #[derive(Debug)] -struct BufferResource { - ptr: BufferPtr, - offset: wgt::BufferAddress, - dynamic_index: Option, - - /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. - /// - /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can - /// hold WGSL runtime-sized arrays. When one does, we must pass its size to - /// shader entry points to implement bounds checks and WGSL's `arrayLength` - /// function. See `device::CompiledShader::sized_bindings` for details. - /// - /// [`Storage`]: wgt::BufferBindingType::Storage - binding_size: Option, - - binding_location: u32, +enum BufferLikeResource { + Buffer { + ptr: NonNull>, + offset: wgt::BufferAddress, + dynamic_index: Option, + + /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`. + /// + /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can + /// hold WGSL runtime-sized arrays. When one does, we must pass its size to + /// shader entry points to implement bounds checks and WGSL's `arrayLength` + /// function. See `device::CompiledShader::sized_bindings` for details. + /// + /// [`Storage`]: wgt::BufferBindingType::Storage + binding_size: Option, + + binding_location: u32, + }, + AccelerationStructure( + NonNull>, + NonNull>, + ), } #[derive(Debug)] @@ -774,12 +747,12 @@ impl Default for UseResourceInfo { #[derive(Debug, Default)] pub struct BindGroup { counters: MultiStageResourceCounters, - buffers: Vec, - samplers: Vec, - textures: Vec, + buffers: Vec, + samplers: Vec>>, + textures: Vec>>, - argument_buffers: Vec, - resources_to_use: HashMap, + argument_buffers: Vec>>, + resources_to_use: HashMap>, UseResourceInfo>, } impl crate::DynBindGroup for BindGroup {} @@ -795,12 +768,15 @@ pub enum ShaderModuleSource { #[derive(Debug)] pub struct PassthroughShader { - pub library: metal::Library, - pub function: metal::Function, + pub library: Retained>, + pub function: Retained>, pub entry_point: String, pub num_workgroups: (u32, u32, u32), } +unsafe impl Send for PassthroughShader {} +unsafe impl Sync for PassthroughShader {} + #[derive(Debug)] pub struct ShaderModule { source: ShaderModuleSource, @@ -848,11 +824,11 @@ impl PipelineStageInfo { #[derive(Debug)] pub struct RenderPipeline { - raw: metal::RenderPipelineState, + raw: Retained>, #[allow(dead_code)] - vs_lib: metal::Library, + vs_lib: Retained>, #[allow(dead_code)] - fs_lib: Option, + fs_lib: Option>>, vs_info: PipelineStageInfo, fs_info: Option, raw_primitive_type: MTLPrimitiveType, @@ -860,7 +836,10 @@ pub struct RenderPipeline { raw_front_winding: MTLWinding, raw_cull_mode: MTLCullMode, raw_depth_clip_mode: Option, - depth_stencil: Option<(metal::DepthStencilState, wgt::DepthBiasState)>, + depth_stencil: Option<( + Retained>, + wgt::DepthBiasState, + )>, } unsafe impl Send for RenderPipeline {} @@ -870,9 +849,9 @@ impl crate::DynRenderPipeline for RenderPipeline {} #[derive(Debug)] pub struct ComputePipeline { - raw: metal::ComputePipelineState, + raw: Retained>, #[allow(dead_code)] - cs_lib: metal::Library, + cs_lib: Retained>, cs_info: PipelineStageInfo, work_group_size: MTLSize, work_group_memory_sizes: Vec, @@ -885,9 +864,9 @@ impl crate::DynComputePipeline for ComputePipeline {} #[derive(Debug, Clone)] pub struct QuerySet { - raw_buffer: metal::Buffer, + raw_buffer: Retained>, //Metal has a custom buffer for counters. - counter_sample_buffer: Option, + counter_sample_buffer: Option>>, ty: wgt::QueryType, } @@ -900,8 +879,11 @@ unsafe impl Sync for QuerySet {} pub struct Fence { completed_value: Arc, /// The pending fence values have to be ascending. - pending_command_buffers: Vec<(crate::FenceValue, metal::CommandBuffer)>, - shared_event: Option, + pending_command_buffers: Vec<( + crate::FenceValue, + Retained>, + )>, + shared_event: Option>>, } impl crate::DynFence for Fence {} @@ -926,13 +908,13 @@ impl Fence { .retain(|&(value, _)| value > latest); } - pub fn raw_shared_event(&self) -> Option<&metal::SharedEvent> { - self.shared_event.as_ref() + pub fn raw_shared_event(&self) -> Option<&ProtocolObject> { + self.shared_event.as_deref() } } struct IndexState { - buffer_ptr: BufferPtr, + buffer_ptr: NonNull>, offset: wgt::BufferAddress, stride: wgt::BufferAddress, raw_type: MTLIndexType, @@ -944,13 +926,16 @@ struct Temp { } struct CommandState { - blit: Option, - render: Option, - compute: Option, + blit: Option>>, + acceleration_structure_builder: + Option>>, + render: Option>>, + compute: Option>>, raw_primitive_type: MTLPrimitiveType, index: Option, raw_wg_size: MTLSize, stage_infos: MultiStageData, + residency_sets: HashSet>>, /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers. /// @@ -984,8 +969,8 @@ struct CommandState { pub struct CommandEncoder { shared: Arc, - raw_queue: Arc>, - raw_cmd_buf: Option, + raw_queue: Arc>>>, + raw_cmd_buf: Option>>, state: CommandState, temp: Temp, counters: Arc, @@ -1005,7 +990,8 @@ unsafe impl Sync for CommandEncoder {} #[derive(Debug)] pub struct CommandBuffer { - raw: metal::CommandBuffer, + raw: Retained>, + residency_sets: HashSet>>, } impl crate::DynCommandBuffer for CommandBuffer {} @@ -1019,6 +1005,23 @@ pub struct PipelineCache; impl crate::DynPipelineCache for PipelineCache {} #[derive(Debug)] -pub struct AccelerationStructure; +pub struct AccelerationStructure { + raw: Retained>, + residency_set: Retained>, +} + +impl AccelerationStructure { + fn as_raw(&self) -> BufferLikeResource { + unsafe { + BufferLikeResource::AccelerationStructure( + NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _), + NonNull::new_unchecked(Retained::as_ptr(&self.residency_set) as *mut _), + ) + } + } +} impl crate::DynAccelerationStructure for AccelerationStructure {} + +unsafe impl Send for AccelerationStructure {} +unsafe impl Sync for AccelerationStructure {} diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index 2a705cd7901..5b34d9d7278 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -1,29 +1,18 @@ -#![allow(clippy::let_unit_value)] // `let () =` being used to constrain result type - use alloc::borrow::ToOwned as _; -use core::mem::ManuallyDrop; -use core::ptr::NonNull; -use core_graphics_types::{ - base::CGFloat, - geometry::{CGRect, CGSize}, -}; -use metal::{foreign_types::ForeignType, MTLTextureType}; -use objc::{ - class, msg_send, - rc::{autoreleasepool, StrongPtr}, - runtime::{Object, BOOL, NO, YES}, - sel, sel_impl, +use objc2::{ + rc::{autoreleasepool, Retained}, + runtime::ProtocolObject, + ClassType, Message, }; +use objc2_core_foundation::CGSize; +use objc2_foundation::NSObjectProtocol; +use objc2_metal::MTLTextureType; +use objc2_quartz_core::{CAMetalDrawable, CAMetalLayer}; use parking_lot::{Mutex, RwLock}; -use crate::metal::layer_observer::new_observer_layer; - -#[link(name = "QuartzCore", kind = "framework")] -extern "C" {} - impl super::Surface { - fn new(layer: metal::MetalLayer) -> Self { + pub fn new(layer: Retained) -> Self { Self { render_layer: Mutex::new(layer), swapchain_format: RwLock::new(None), @@ -32,79 +21,9 @@ impl super::Surface { } } - /// If not called on the main thread, this will panic. - #[allow(clippy::transmute_ptr_to_ref)] - pub unsafe fn from_view(view: NonNull) -> Self { - let layer = unsafe { Self::get_metal_layer(view) }; - let layer = ManuallyDrop::new(layer); - // SAFETY: The layer is an initialized instance of `CAMetalLayer`, and - // we transfer the retain count to `MetalLayer` using `ManuallyDrop`. - let layer = unsafe { metal::MetalLayer::from_ptr(layer.cast()) }; - Self::new(layer) - } - - pub unsafe fn from_layer(layer: &metal::MetalLayerRef) -> Self { - let class = class!(CAMetalLayer); - let proper_kind: BOOL = msg_send![layer, isKindOfClass: class]; - assert_eq!(proper_kind, YES); - Self::new(layer.to_owned()) - } - - /// Get or create a new `CAMetalLayer` associated with the given `NSView` - /// or `UIView`. - /// - /// # Panics - /// - /// If called from a thread that is not the main thread, this will panic. - /// - /// # Safety - /// - /// The `view` must be a valid instance of `NSView` or `UIView`. - pub(crate) unsafe fn get_metal_layer(view: NonNull) -> StrongPtr { - let is_main_thread: BOOL = msg_send![class!(NSThread), isMainThread]; - if is_main_thread == NO { - panic!("get_metal_layer cannot be called in non-ui thread."); - } - - // Ensure that the view is layer-backed. - // Views are always layer-backed in UIKit. - #[cfg(target_os = "macos")] - let () = msg_send![view.as_ptr(), setWantsLayer: YES]; - - let root_layer: *mut Object = msg_send![view.as_ptr(), layer]; - // `-[NSView layer]` can return `NULL`, while `-[UIView layer]` should - // always be available. - assert!(!root_layer.is_null(), "failed making the view layer-backed"); - - // NOTE: We explicitly do not touch properties such as - // `layerContentsPlacement`, `needsDisplayOnBoundsChange` and - // `contentsGravity` etc. on the root layer, both since we would like - // to give the user full control over them, and because the default - // values suit us pretty well (especially the contents placement being - // `NSViewLayerContentsRedrawDuringViewResize`, which allows the view - // to receive `drawRect:`/`updateLayer` calls). - - let is_metal_layer: BOOL = msg_send![root_layer, isKindOfClass: class!(CAMetalLayer)]; - if is_metal_layer == YES { - // The view has a `CAMetalLayer` as the root layer, which can - // happen for example if user overwrote `-[NSView layerClass]` or - // the view is `MTKView`. - // - // This is easily handled: We take "ownership" over the layer, and - // render directly into that; after all, the user passed a view - // with an explicit Metal layer to us, so this is very likely what - // they expect us to do. - unsafe { StrongPtr::retain(root_layer) } - } else { - // The view does not have a `CAMetalLayer` as the root layer (this - // is the default for most views). - // - // This case is trickier! We cannot use the existing layer with - // Metal, so we must do something else. There are a few options, - // we do the same as outlined in: - // https://docs.rs/raw-window-metal/1.1.0/raw_window_metal/#reasoning-behind-creating-a-sublayer - unsafe { new_observer_layer(root_layer) } - } + pub fn from_layer(layer: &CAMetalLayer) -> Self { + assert!(layer.isKindOfClass(CAMetalLayer::class())); + Self::new(layer.retain()) } /// Gets the current dimensions of the `Surface`. @@ -115,11 +34,10 @@ impl super::Surface { /// of sync. This is sound, as these properties are accessed atomically. /// See: pub(super) fn dimensions(&self) -> wgt::Extent3d { - let (size, scale): (CGSize, CGFloat) = unsafe { - let render_layer_borrow = self.render_layer.lock(); - let render_layer = render_layer_borrow.as_ref(); - let bounds: CGRect = msg_send![render_layer, bounds]; - let contents_scale: CGFloat = msg_send![render_layer, contentsScale]; + let (size, scale) = { + let render_layer = self.render_layer.lock(); + let bounds = render_layer.bounds(); + let contents_scale = render_layer.contentsScale(); (bounds.size, contents_scale) }; @@ -155,31 +73,31 @@ impl crate::Surface for super::Surface { let drawable_size = CGSize::new(config.extent.width as f64, config.extent.height as f64); match config.composite_alpha_mode { - wgt::CompositeAlphaMode::Opaque => render_layer.set_opaque(true), - wgt::CompositeAlphaMode::PostMultiplied => render_layer.set_opaque(false), + wgt::CompositeAlphaMode::Opaque => render_layer.setOpaque(true), + wgt::CompositeAlphaMode::PostMultiplied => render_layer.setOpaque(false), _ => (), } let device_raw = device.shared.device.lock(); - render_layer.set_device(&device_raw); - render_layer.set_pixel_format(caps.map_format(config.format)); - render_layer.set_framebuffer_only(framebuffer_only); - render_layer.set_presents_with_transaction(self.present_with_transaction); + render_layer.setDevice(Some(&device_raw)); + render_layer.setPixelFormat(caps.map_format(config.format)); + render_layer.setFramebufferOnly(framebuffer_only); + render_layer.setPresentsWithTransaction(self.present_with_transaction); // opt-in to Metal EDR // EDR potentially more power used in display and more bandwidth, memory footprint. let wants_edr = config.format == wgt::TextureFormat::Rgba16Float; - if wants_edr != render_layer.wants_extended_dynamic_range_content() { - render_layer.set_wants_extended_dynamic_range_content(wants_edr); + if wants_edr != render_layer.wantsExtendedDynamicRangeContent() { + render_layer.setWantsExtendedDynamicRangeContent(wants_edr); } // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) - render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); - render_layer.set_drawable_size(drawable_size); + render_layer.setMaximumDrawableCount(config.maximum_frame_latency as usize + 1); + render_layer.setDrawableSize(drawable_size); if caps.can_set_next_drawable_timeout { - let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; + render_layer.setAllowsNextDrawableTimeout(false); } if caps.can_set_display_sync { - let () = msg_send![*render_layer, setDisplaySyncEnabled: display_sync]; + render_layer.setDisplaySyncEnabled(display_sync); } Ok(()) @@ -195,9 +113,9 @@ impl crate::Surface for super::Surface { _fence: &super::Fence, ) -> Result>, crate::SurfaceError> { let render_layer = self.render_layer.lock(); - let (drawable, texture) = match autoreleasepool(|| { + let (drawable, texture) = match autoreleasepool(|_| { render_layer - .next_drawable() + .nextDrawable() .map(|drawable| (drawable.to_owned(), drawable.texture().to_owned())) }) { Some(pair) => pair, @@ -210,7 +128,7 @@ impl crate::Surface for super::Surface { texture: super::Texture { raw: texture, format: swapchain_format, - raw_type: MTLTextureType::D2, + raw_type: MTLTextureType::Type2D, array_layers: 1, mip_levels: 1, copy_size: crate::CopyExtent { @@ -219,7 +137,7 @@ impl crate::Surface for super::Surface { depth: 1, }, }, - drawable, + drawable: ProtocolObject::from_retained(drawable), present_with_transaction: self.present_with_transaction, }; diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 572ffcd3f0f..9a8812de24c 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -554,10 +554,10 @@ impl super::Instance { Ok(self.create_surface_from_vk_surface_khr(surface)) } - #[cfg(metal)] - fn create_surface_from_view( + #[cfg(target_vendor = "apple")] + fn create_surface_from_layer( &self, - view: core::ptr::NonNull, + layer: raw_window_metal::Layer, ) -> Result { if !self.shared.extensions.contains(&ext::metal_surface::NAME) { return Err(crate::InstanceError::new(String::from( @@ -565,17 +565,14 @@ impl super::Instance { ))); } - let layer = unsafe { crate::metal::Surface::get_metal_layer(view.cast()) }; // NOTE: The layer is retained by Vulkan's `vkCreateMetalSurfaceEXT`, // so no need to retain it beyond the scope of this function. - let layer_ptr = (*layer).cast(); - let surface = { let metal_loader = ext::metal_surface::Instance::new(&self.shared.entry, &self.shared.raw); let vk_info = vk::MetalSurfaceCreateInfoEXT::default() .flags(vk::MetalSurfaceCreateFlagsEXT::empty()) - .layer(layer_ptr); + .layer(layer.as_ptr().as_ptr()); unsafe { metal_loader.create_metal_surface(&vk_info, None).unwrap() } }; @@ -944,17 +941,19 @@ impl crate::Instance for super::Instance { })?; self.create_surface_from_hwnd(hinstance.get(), handle.hwnd.get()) } - #[cfg(all(target_os = "macos", feature = "metal"))] + #[cfg(target_vendor = "apple")] (Rwh::AppKit(handle), _) if self.shared.extensions.contains(&ext::metal_surface::NAME) => { - self.create_surface_from_view(handle.ns_view) + let layer = unsafe { raw_window_metal::Layer::from_ns_view(handle.ns_view) }; + self.create_surface_from_layer(layer) } - #[cfg(all(any(target_os = "ios", target_os = "visionos"), feature = "metal"))] + #[cfg(target_vendor = "apple")] (Rwh::UiKit(handle), _) if self.shared.extensions.contains(&ext::metal_surface::NAME) => { - self.create_surface_from_view(handle.ui_view) + let layer = unsafe { raw_window_metal::Layer::from_ui_view(handle.ui_view) }; + self.create_surface_from_layer(layer) } (_, _) => Err(crate::InstanceError::new(format!( "window handle {window_handle:?} is not a Vulkan-compatible handle"