diff --git a/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl b/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl index 2afab39d984..e82cd4466ee 100644 --- a/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl +++ b/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl @@ -77,7 +77,13 @@ #define PREFER_HALF 1 #endif -#if HAS_HALF && PREFER_HALF +// Unity historically has a mixed stance on default sampler precision between mobiles and other platforms, +// and changes without altering existing behavior/performance is difficult. +// +// When UNITY_UNIFIED_SHADER_PRECISION_MODEL is defined, the expectation is to have full precision +// on all platforms and explicitly optimize against lower precision when useful. + +#if !defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL) && HAS_HALF && PREFER_HALF #define REAL_IS_HALF 1 #else #define REAL_IS_HALF 0 @@ -221,6 +227,12 @@ #define LANE_SWIZZLE_OFFSET(andMask, orMask, xorMask) (andMask | (orMask << 5) | (xorMask << 10)) #endif +// For multi_compile +#ifdef PLATFORM_LANE_COUNT_32 +#undef PLATFORM_LANE_COUNT +#define PLATFORM_LANE_COUNT 32 +#endif + #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonDeprecated.hlsl" #if !defined(SHADER_API_GLES) diff --git a/com.unity.render-pipelines.high-definition/Runtime/Core/Textures/TextureCache.cs b/com.unity.render-pipelines.high-definition/Runtime/Core/Textures/TextureCache.cs index 4f9283aa887..fb3baa8b7eb 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Core/Textures/TextureCache.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Core/Textures/TextureCache.cs @@ -312,25 +312,6 @@ protected int GetNumMips(int dim) return iNumMips; } - public static bool isMobileBuildTarget - { - get - { -#if UNITY_EDITOR - switch (EditorUserBuildSettings.activeBuildTarget) - { - case BuildTarget.iOS: - case BuildTarget.Android: - return true; - default: - return false; - } -#else - return Application.isMobilePlatform; -#endif - } - } - public static TextureFormat GetPreferredHDRCompressedTextureFormat { get diff --git a/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs b/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs index e110f438785..7f078aaee2a 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs @@ -40,7 +40,6 @@ unsafe struct ShaderVariablesDebugDisplay public int _DebugSingleShadowIndex; public int _DebugProbeVolumeMode; - public Vector3 _DebugDisplayPad0; } /// diff --git a/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs.hlsl index 17289409a65..462725b77f1 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs.hlsl +++ b/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.cs.hlsl @@ -66,7 +66,6 @@ CBUFFER_START(ShaderVariablesDebugDisplay) float _MatcapViewScale; int _DebugSingleShadowIndex; int _DebugProbeVolumeMode; - float3 _DebugDisplayPad0; CBUFFER_END diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/ClearLightLists.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/ClearLightLists.compute index 32274fae94b..f5b5a54d64c 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/ClearLightLists.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/ClearLightLists.compute @@ -1,9 +1,18 @@ #pragma kernel ClearList +// TODO: Fix PLATFORM_LANE_COUNT=32 support here and in Lightloop.cs if needed +//#pragma multi_compile _ PLATFORM_LANE_COUNT_32 + RWStructuredBuffer _LightListToClear; int _LightListEntries; -[numthreads(64, 1, 1)] +#ifdef PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT +#else +#define NR_THREADS 64 // default to 64 threads per group +#endif + +[numthreads(NR_THREADS, 1, 1)] void ClearList(uint3 id : SV_DispatchThreadID) { if (id.x < (uint)_LightListEntries) diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs index 30671f5e233..a67e19361ef 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs @@ -280,23 +280,6 @@ public partial class HDRenderPipeline internal const int k_MaxLightsPerClusterCell = 24; internal static readonly Vector3 k_BoxCullingExtentThreshold = Vector3.one * 0.01f; - #if UNITY_SWITCH - static bool k_PreferFragment = true; - #else - static bool k_PreferFragment = false; - #endif - #if !UNITY_EDITOR && UNITY_SWITCH - const bool k_HasNativeQuadSupport = true; - #else - const bool k_HasNativeQuadSupport = false; - #endif - - #if !UNITY_EDITOR && UNITY_SWITCH - const int k_ThreadGroupOptimalSize = 32; - #else - const int k_ThreadGroupOptimalSize = 64; - #endif - int m_MaxDirectionalLightsOnScreen; int m_MaxPunctualLightsOnScreen; int m_MaxAreaLightsOnScreen; @@ -464,7 +447,7 @@ public void AllocateNonRenderGraphResolutionDependentBuffers(HDCamera hdCamera, var nrClustersY = (height + LightDefinitions.s_TileSizeClustered - 1) / LightDefinitions.s_TileSizeClustered; var nrClusterTiles = nrClustersX * nrClustersY * viewCount; - perVoxelOffset = new ComputeBuffer((int)LightCategory.Count * (1 << k_Log2NumClusters) * nrClusterTiles, sizeof(uint)); + perVoxelOffset = new ComputeBuffer((int)LightCategory.Count * (1 << DeviceInfo.log2NumClusters) * nrClusterTiles, sizeof(uint)); perVoxelLightLists = new ComputeBuffer(NumLightIndicesPerClusteredTile() * nrClusterTiles, sizeof(uint)); if (clusterNeedsDepth) @@ -720,11 +703,6 @@ enum ClusterDepthSource : int const bool k_UseDepthBuffer = true; // only has an impact when EnableClustered is true (requires a depth-prepass) -#if !UNITY_EDITOR && UNITY_SWITCH - const int k_Log2NumClusters = 5; // accepted range is from 0 to 5 (NR_THREADS is set to 32 on Switch). NumClusters is 1< 4 x 8x8) diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/builddispatchindirect.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/builddispatchindirect.compute index 0afe7a4306c..81ce638d8b8 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/builddispatchindirect.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/builddispatchindirect.compute @@ -1,6 +1,7 @@ #pragma kernel BuildIndirect #pragma multi_compile _ IS_DRAWPROCEDURALINDIRECT +#pragma multi_compile _ PLATFORM_LANE_COUNT_32 #pragma only_renderers d3d11 playstation xboxone vulkan metal switch @@ -12,10 +13,10 @@ #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Lit/Lit.hlsl" -#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases. +#ifdef PLATFORM_LANE_COUNT #define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif RWBuffer g_DispatchIndirectBuffer : register( u0 ); // Indirect arguments have to be in a _buffer_, not a structured buffer diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/cleardispatchindirect.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/cleardispatchindirect.compute index cce3f47aedf..5c9976b611d 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/cleardispatchindirect.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/cleardispatchindirect.compute @@ -1,5 +1,8 @@ #pragma kernel ClearDispatchIndirect #pragma kernel ClearDrawProceduralIndirect + +#pragma multi_compile _ PLATFORM_LANE_COUNT_32 + #pragma only_renderers d3d11 playstation xboxone vulkan metal switch RWBuffer g_DispatchIndirectBuffer : register( u0 ); // Indirect arguments have to be in a _buffer_, not a structured buffer @@ -8,9 +11,9 @@ RWBuffer g_DispatchIndirectBuffer : register( u0 ); // Indirect argument #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl" #ifdef PLATFORM_LANE_COUNT -#define NR_THREADS PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif uniform uint g_NumTiles; diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-bigtile.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-bigtile.compute index aa9b402db5d..8a0e6736b19 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-bigtile.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-bigtile.compute @@ -1,5 +1,7 @@ #pragma kernel BigTileLightListGen +#pragma multi_compile _ PLATFORM_LANE_COUNT_32 + #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition-config/Runtime/ShaderConfig.cs.hlsl" @@ -8,6 +10,7 @@ #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightingConvexHullUtils.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/SortingComputeUtils.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl" + #pragma only_renderers d3d11 playstation xboxone vulkan metal switch #define EXACT_EDGE_TESTS @@ -22,11 +25,10 @@ StructuredBuffer g_vBoundsBuffer : register( t1 ); StructuredBuffer _LightVolumeData : register(t2); StructuredBuffer g_data : register( t3 ); - -#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases. +#ifdef PLATFORM_LANE_COUNT #define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif // output buffer @@ -144,7 +146,7 @@ void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_Gro } } -#if NR_THREADS > PLATFORM_LANE_COUNT || defined(SHADER_API_XBOXONE) || defined(SHADER_API_SWITCH) // not sure why XB1 and Switch need the barrier (it will not be correct without) +#if NR_THREADS > PLATFORM_LANE_COUNT || defined(SHADER_API_XBOXONE) || (defined(PLATFORM_LANE_COUNT) && PLATFORM_LANE_COUNT==32) // not sure why the barrier is needed GroupMemoryBarrierWithGroupSync(); #endif diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-clustered.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-clustered.compute index 944a0bb582c..4cdda8b1bc3 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-clustered.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild-clustered.compute @@ -10,6 +10,18 @@ #pragma kernel TileLightListGen_DepthRT_SrcBigTile_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE #pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE +#pragma kernel TileLightListGen_NoDepthRT_LE LIGHTLISTGEN=TileLightListGen_NoDepthRT_LE PLATFORM_LANE_COUNT_32 +#pragma kernel TileLightListGen_DepthRT_LE LIGHTLISTGEN=TileLightListGen_DepthRT_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE +#pragma kernel TileLightListGen_DepthRT_MSAA_LE LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED +#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile_LE LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile_LE PLATFORM_LANE_COUNT_32 USE_TWO_PASS_TILED_LIGHTING +#pragma kernel TileLightListGen_DepthRT_SrcBigTile_LE LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING +#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile_LE LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING + +#pragma kernel TileLightListGen_DepthRT_Oblique_LE LIGHTLISTGEN=TileLightListGen_DepthRT_Oblique_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE USE_OBLIQUE_MODE +#pragma kernel TileLightListGen_DepthRT_MSAA_Oblique_LE LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_Oblique_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_OBLIQUE_MODE +#pragma kernel TileLightListGen_DepthRT_SrcBigTile_Oblique_LE LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile_Oblique_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE +#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique_LE LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique_LE PLATFORM_LANE_COUNT_32 ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE + // #pragma enable_d3d11_debug_symbols #define REQUEST_DISABLE_OPTIMISTIC_SCALAR_ALLOCATION @@ -22,10 +34,7 @@ #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightingConvexHullUtils.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl" - -#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) && !defined(SHADER_API_SWITCH) #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/SortingComputeUtils.hlsl" -#endif #pragma only_renderers d3d11 playstation xboxone vulkan metal switch @@ -45,11 +54,10 @@ StructuredBuffer g_data : register( t3 ); StructuredBuffer g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity #endif - -#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases. -#define NR_THREADS PLATFORM_LANE_COUNT +#ifdef PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif RWStructuredBuffer g_vLayeredLightList : register( u0 ); // don't support RWBuffer yet in unity @@ -324,7 +332,7 @@ void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) #endif // sort lights (gives a more efficient execution in both deferred and tiled forward lighting). -#if NR_THREADS > PLATFORM_LANE_COUNT +#if NR_THREADS > PLATFORM_LANE_COUNT || (defined(PLATFORM_LANE_COUNT) && PLATFORM_LANE_COUNT==32) SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS); #endif @@ -488,7 +496,7 @@ int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screen lightOffsSph = offs; } -#if NR_THREADS > PLATFORM_LANE_COUNT +#if NR_THREADS > PLATFORM_LANE_COUNT || (defined(PLATFORM_LANE_COUNT) && PLATFORM_LANE_COUNT==32) GroupMemoryBarrierWithGroupSync(); #endif diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild.compute index 49a88c59d47..1a1b691d9aa 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/lightlistbuild.compute @@ -6,6 +6,7 @@ #pragma multi_compile _ USE_TWO_PASS_TILED_LIGHTING #pragma multi_compile _ USE_FEATURE_FLAGS #pragma multi_compile _ USE_OBLIQUE_MODE +#pragma multi_compile _ PLATFORM_LANE_COUNT_32 //#pragma #pragma enable_d3d11_debug_symbols @@ -37,14 +38,10 @@ StructuredBuffer g_data : register( t3 ); StructuredBuffer g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity #endif -#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) -#define PLATFORM_LANE_COUNT 32 -#endif - -#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases. -#define NR_THREADS PLATFORM_LANE_COUNT +#ifdef PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif #define PIXEL_PER_THREAD ((TILE_SIZE_FPTL*TILE_SIZE_FPTL) / NR_THREADS) // 8 or 4 @@ -196,7 +193,7 @@ void TileLightListGen(uint3 dispatchThreadId : SV_DispatchThreadID, uint threadI InterlockedMax(ldsZMax, asuint(dpt_ma)); InterlockedMin(ldsZMin, asuint(dpt_mi)); -#if NR_THREADS > PLATFORM_LANE_COUNT || defined(SHADER_API_SWITCH) // not sure why Switch needs the barrier (it will not be correct without) +#if NR_THREADS > PLATFORM_LANE_COUNT || (defined(PLATFORM_LANE_COUNT) && PLATFORM_LANE_COUNT==32) // not sure why the barrier is needed GroupMemoryBarrierWithGroupSync(); #endif } diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/materialflags.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/materialflags.compute index 4ac95c5b265..4fc28881330 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/materialflags.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/materialflags.compute @@ -2,6 +2,9 @@ #pragma multi_compile _ USE_OR +// TODO: Fix PLATFORM_LANE_COUNT=32 support here and in Lightloop.cs if needed +//#pragma multi_compile _ PLATFORM_LANE_COUNT_32 + // #pragma enable_d3d11_debug_symbols #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" @@ -18,10 +21,10 @@ #define USE_MATERIAL_FEATURE_FLAGS -#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases. -#define NR_THREADS PLATFORM_LANE_COUNT +#ifdef PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT #else -#define NR_THREADS 64 // default to 64 threads per group on other platforms.. +#define NR_THREADS 64 // default to 64 threads per group #endif groupshared uint ldsFeatureFlags; diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute index be07511307d..6d8caac2881 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute @@ -4,7 +4,8 @@ #pragma kernel ScreenBoundsAABB #pragma multi_compile _ USE_OBLIQUE_MODE - +// TODO: Fix PLATFORM_LANE_COUNT=32 support here and in Lightloop.cs if needed +//#pragma multi_compile _ PLATFORM_LANE_COUNT_32 #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition-config/Runtime/ShaderConfig.cs.hlsl" @@ -15,7 +16,11 @@ StructuredBuffer g_data : register( t0 ); -#define NR_THREADS 64 +#ifdef PLATFORM_LANE_COUNT +#define NR_THREADS PLATFORM_LANE_COUNT +#else +#define NR_THREADS 64 // default to 64 threads per group +#endif // output buffer RWStructuredBuffer g_vBoundsBuffer : register( u0 ); diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/VolumetricLighting.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/VolumetricLighting.cs index db409abb3da..ffba024c293 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/VolumetricLighting.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricLighting/VolumetricLighting.cs @@ -261,9 +261,10 @@ static internal Vector3Int ComputeVolumetricViewportSize(HDCamera hdCamera, ref float screenFraction = controller.screenResolutionPercentage.value * 0.01f; int sliceCount = controller.volumeSliceCount.value; - int w = Mathf.RoundToInt(viewportWidth * screenFraction); - int h = Mathf.RoundToInt(viewportHeight * screenFraction); - int d = sliceCount; + const int kMax3DTextureSize = 2048; // TODO: There's no SystemInfo.maxTextureSize equivalent for 3D textures + int w = Math.Min(Mathf.RoundToInt(viewportWidth * screenFraction), kMax3DTextureSize); + int h = Math.Min(Mathf.RoundToInt(viewportHeight * screenFraction), kMax3DTextureSize); + int d = Math.Min(sliceCount, kMax3DTextureSize); if (controller.screenResolutionPercentage.value == (1.0f/8.0f) * 100) voxelSize = 8; diff --git a/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/ComputeGgxIblSampleData.compute b/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/ComputeGgxIblSampleData.compute index d8445386ed7..bd84500812d 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/ComputeGgxIblSampleData.compute +++ b/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/ComputeGgxIblSampleData.compute @@ -5,20 +5,15 @@ #pragma only_renderers d3d11 playstation xboxone vulkan metal switch -#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) -#define MAX_IBL_SAMPLE_CNT 34 -#else -#define MAX_IBL_SAMPLE_CNT 89 -#endif - RWTexture2D outputResult; // [MAX_SAMPLE_CNT x UNITY_SPECCUBE_LOD_STEPS] -#pragma kernel ComputeGgxIblSampleData +#pragma kernel ComputeGgxIblSampleData COMPUTEGGGXIBLSAMPLEDATA=ComputeGgxIblSampleData MAX_IBL_SAMPLE_CNT=89 +#pragma kernel ComputeGgxIblSampleData_LE COMPUTEGGGXIBLSAMPLEDATA=ComputeGgxIblSampleData_LE MAX_IBL_SAMPLE_CNT=34 // Cannot use (MAX_SAMPLE_CNT x UNITY_SPECCUBE_LOD_STEPS) since // the thread group becomes too large for some platforms. [numthreads(8, UNITY_SPECCUBE_LOD_STEPS, 1)] -void ComputeGgxIblSampleData(uint3 groupThreadId : SV_GroupThreadID) +void COMPUTEGGGXIBLSAMPLEDATA(uint3 groupThreadId : SV_GroupThreadID) { uint numSamplesPerThread = (MAX_IBL_SAMPLE_CNT + 7) / 8; diff --git a/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/IBLFilterGGX.cs b/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/IBLFilterGGX.cs index 4c4c4101dd4..b7a420af50a 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/IBLFilterGGX.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/Material/GGXConvolution/IBLFilterGGX.cs @@ -5,7 +5,7 @@ namespace UnityEngine.Rendering.HighDefinition class IBLFilterGGX : IBLFilterBSDF { RenderTexture m_GgxIblSampleData; - int m_GgxIblMaxSampleCount = TextureCache.isMobileBuildTarget ? 34 : 89; // Width + int m_GgxIblMaxSampleCount; // Width const int k_GgxIblMipCountMinusOne = 6; // Height (UNITY_SPECCUBE_LOD_STEPS) ComputeShader m_ComputeGgxIblSampleDataCS; @@ -44,7 +44,8 @@ public override void Initialize(CommandBuffer cmd) if (!m_ComputeGgxIblSampleDataCS) { m_ComputeGgxIblSampleDataCS = m_RenderPipelineResources.shaders.computeGgxIblSampleDataCS; - m_ComputeGgxIblSampleDataKernel = m_ComputeGgxIblSampleDataCS.FindKernel("ComputeGgxIblSampleData"); + m_ComputeGgxIblSampleDataKernel = DeviceInfo.FindKernel(m_ComputeGgxIblSampleDataCS, "ComputeGgxIblSampleData"); + m_GgxIblMaxSampleCount = m_ComputeGgxIblSampleDataKernel == 0 ? 89 : 34; } if (!m_BuildProbabilityTablesCS) diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.LightLoop.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.LightLoop.cs index 51bc4527a55..dc7af325b42 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.LightLoop.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.LightLoop.cs @@ -183,7 +183,7 @@ BuildGPULightListOutput BuildGPULightList( RenderGraph rend var nrClusterTiles = nrClustersX * nrClustersY * m_MaxViewCount; passData.output.perVoxelOffset = builder.WriteComputeBuffer( - renderGraph.CreateComputeBuffer(new ComputeBufferDesc((int)LightCategory.Count * (1 << k_Log2NumClusters) * nrClusterTiles, sizeof(uint)) { name = "PerVoxelOffset" })); + renderGraph.CreateComputeBuffer(new ComputeBufferDesc((int)LightCategory.Count * (1 << DeviceInfo.log2NumClusters) * nrClusterTiles, sizeof(uint)) { name = "PerVoxelOffset" })); passData.output.perVoxelLightLists = builder.WriteComputeBuffer( renderGraph.CreateComputeBuffer(new ComputeBufferDesc(NumLightIndicesPerClusteredTile() * nrClusterTiles, sizeof(uint)) { name = "PerVoxelLightList" })); if (tileAndClusterData.clusterNeedsDepth) @@ -372,7 +372,7 @@ LightingOutput RenderDeferredLighting( RenderGraph renderGraph, if (data.parameters.enableTile) { - bool useCompute = data.parameters.useComputeLightingEvaluation && !k_PreferFragment; + bool useCompute = data.parameters.useComputeLightingEvaluation && DeviceInfo.preferComputeKernels; if (useCompute) RenderComputeDeferredLighting(data.parameters, resources, context.cmd); else diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.cs index 6585102ac51..f856dc8341f 100644 --- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.cs +++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDRenderPipeline.cs @@ -412,6 +412,8 @@ public HDRenderPipeline(HDRenderPipelineAsset asset, HDRenderPipelineAsset defau return; } + DeviceInfo.ProbeDeviceInfo(); + var defaultLensAttenuation = m_DefaultAsset.lensAttenuationMode; if (defaultLensAttenuation == LensAttenuationMode.ImperfectLens) { diff --git a/com.unity.render-pipelines.high-definition/Runtime/Utilities/DeviceInfo.cs b/com.unity.render-pipelines.high-definition/Runtime/Utilities/DeviceInfo.cs new file mode 100644 index 00000000000..141cbddab53 --- /dev/null +++ b/com.unity.render-pipelines.high-definition/Runtime/Utilities/DeviceInfo.cs @@ -0,0 +1,83 @@ +using System; +using UnityEngine; +#if UNITY_EDITOR +using UnityEditor; +#endif + +namespace UnityEngine.Rendering.HighDefinition +{ + public class DeviceInfo + { + public static int optimalThreadGroupSize; + public static int log2NumClusters; // / MSB of optimalThreadGroupSize, NumClusters is 1<