// Copyright Epic Games, Inc. All Rights Reserved. #include "/Engine/Private/Common.ush" #define SUBSTRATE_INLINE_SHADING 0 #define SUBSTRATE_SSS_MATERIAL_OVERRIDE 0 // Complex Special path is only supported with Format=1 #define SUBSTRATE_COMPLEXSPECIALPATH (SUBSTRATE_GBUFFER_FORMAT==1) #include "/Engine/Private/Substrate/Substrate.ush" #include "SubstrateTile.ush" #if SUBSTRATE_GBUFFER_FORMAT==0 #include "../DeferredShadingCommon.ush" #endif #define GROUP_THREAD_COUNT (SUBSTRATE_TILE_SIZE * SUBSTRATE_TILE_SIZE) #define SUBSTRATE_TILE_BITMASK_SSS (1< TopLayerTexture; #if PERMUTATION_CMASK Texture2D TopLayerCmaskTexture; #endif #if SUBSTRATE_GBUFFER_FORMAT==1 RWTexture2DArray MaterialTextureArrayUAV; #endif uint TileEncoding; uint4 TileListBufferOffsets[SUBSTRATE_TILE_TYPE_COUNT]; uint GetTileListBufferOffsets(uint Type) { return TileListBufferOffsets[Type].x; } // Indirect draw data buffer for all tile types RWBuffer TileDrawIndirectDataBufferUAV; RWBuffer TileListBufferUAV; #if PERMUTATION_DECAL Texture2D DBufferATexture; Texture2D DBufferBTexture; Texture2D DBufferCTexture; Texture2D DBufferRenderMask; SamplerState DBufferATextureSampler; SamplerState DBufferBTextureSampler; SamplerState DBufferCTextureSampler; // @param BufferUV - UV space in the DBuffer textures uint GetDBufferTargetMask(uint2 PixelPos) { #if PLATFORM_SUPPORTS_RENDERTARGET_WRITE_MASK return DecodeRTWriteMask(PixelPos, DBufferRenderMask, 3); #elif PLATFORM_SUPPORTS_PER_PIXEL_DBUFFER_MASK uint Mask = DBufferRenderMask.Load(uint3(PixelPos, 0)); return Mask > 0 ? 0x07 : 0x00; #else // For debug purpose: // return // (DBufferATexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x1 : 0x0) | // (DBufferBTexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x2 : 0x0) | // (DBufferCTexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x3 : 0x0) ; return 0x07; #endif } #endif // PERMUTATION_DECAL #if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED Texture2D OpaqueRoughRefractionTexture; #endif // SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED #if PERMUTATION_WAVE_OPS groupshared uint s_TileBitmask; #else groupshared uint s_TileBitmask[GROUP_THREAD_COUNT]; #endif [numthreads(SUBSTRATE_TILE_SIZE, SUBSTRATE_TILE_SIZE, 1)] void TileMainCS(uint2 DispatchThreadId : SV_DispatchThreadID, uint LinearIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID) { // Init primitive index if (DispatchThreadId.x < SUBSTRATE_TILE_TYPE_COUNT && DispatchThreadId.y == 0) { const uint TileType = DispatchThreadId.x; const uint IndexCountPerInstance = bRectPrimitive > 0 ? 4 : 6; TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(TileType) + 0] = IndexCountPerInstance; } const uint2 PixelCoord = DispatchThreadId.xy + View.ViewRectMin.xy; const bool bIsValid = all(DispatchThreadId.xy < uint2(View.ViewSizeAndInvSize.xy)); const float2 BufferUV = float2(PixelCoord + 0.5f) * View.BufferSizeAndInvSize.zw; // If CMask data are available, we use it as a coarse evaluation to know if a tile contains any data. // * If the tile is entirely empty: we clear the header & SSS data // * If the data contains any data: we do fine grain checking, and clear header & SSS data only for needed pixels. The top layer data texture is used // to know if a pixel is valid or not (since the material header is not cleared when the Cmask permutation is used). uint GroupTileBitmask = 0; #if PERMUTATION_CMASK && SUBSTRATE_GBUFFER_FORMAT==1 // Coarse test for clearing header (& SSS data) based on CMask data const uint CMask = TopLayerCmaskTexture.Load(uint3(GroupId.xy, 0)); BRANCH if (CMask == 0x0) { MaterialTextureArrayUAV[uint3(PixelCoord, 0)] = 0u; SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, 0u); // This is a good clear for FSubstrateSubsurfaceHeader, and we only need to clear the header. } else #endif { FSubstrateOpaqueRoughRefractionData OpaqueRoughRefractionData = (FSubstrateOpaqueRoughRefractionData)0; if (bIsValid) { #if SUBSTRATE_GBUFFER_FORMAT==0 // Control tiles using ShadingModelID const FGBufferData GBufferData = GetGBufferData(BufferUV); if (GBufferData.ShadingModelID != SHADINGMODELID_UNLIT) { bool bHasAnisotropy = false; BRANCH if(HasAnisotropy(GBufferData.SelectiveOutputMask)) { bHasAnisotropy = abs(GBufferData.Anisotropy) > 0; // Skip this load when the material doesn't even support anisotropy. } #if SUBSTRATE_COMPLEXSPECIALPATH if (false) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_COMPLEXSPECIAL; } else #endif if (bHasAnisotropy || GBufferData.ShadingModelID == SHADINGMODELID_EYE || GBufferData.ShadingModelID == SHADINGMODELID_HAIR) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_COMPLEX; } else if (GBufferData.ShadingModelID == SHADINGMODELID_DEFAULT_LIT) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SIMPLE; } else { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SINGLE; } // Skipped as only used for Format=0 // if (GBufferData.ShadingModelID == SHADINGMODELID_SUBSURFACE_PROFILE || GBufferData.ShadingModelID == SHADINGMODELID_EYE) // { // GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SSS; // } } #else // SUBSTRATE_GBUFFER_FORMAT==0 FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel); // Load mini header. const uint PackedHeader = MaterialTextureArrayUAV[uint3(PixelCoord, 0)]; FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(PackedHeader, SubstrateAddressing, TopLayerTexture); const bool bIsSimple = SubstratePixelHeader.IsSimpleMaterial() || SubstratePixelHeader.ClosureCount == 0; // BSDFCount == 0 ensures that non-Substrate pixel, like sky pixels, won't make a simple tile flagged as complex const bool bIsSingle = !SubstratePixelHeader.IsSimpleMaterial() && SubstratePixelHeader.IsSingleMaterial(); if (SubstratePixelHeader.IsComplexSpecialMaterial()) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_COMPLEXSPECIAL; } else if (!bIsSingle && !bIsSimple) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_COMPLEX; } else if (bIsSingle) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SINGLE; } else if (bIsSimple) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SIMPLE; } if (SubstratePixelHeader.HasSubsurface()) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SSS; } #if PERMUTATION_DECAL const uint DBufferResponseMask = SceneStencilTexture.Load(uint3(PixelCoord, 0)) STENCIL_COMPONENT_SWIZZLE; const uint DBufferTargetMask = GetDBufferTargetMask(PixelCoord); if (DBufferResponseMask != 0 && DBufferTargetMask != 0) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_DECAL; } #endif #if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED OpaqueRoughRefractionData = SubstrateUnpackOpaqueRoughRefractionData(OpaqueRoughRefractionTexture[PixelCoord]); if (OpaqueRoughRefractionData.OpaqueRoughRefractionEnabled > 0.0f) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_ROUGH_REFRACT; } #endif // Output/Patch SSS data for legacy encoding (this allows to save ALU & bandwidth during the base pass0 uint OptimisedLegacyMode = ((PackedHeader >> (HEADER_SINGLEENCODING_BIT_COUNT)) & HEADER_SINGLE_OPTLEGACYMODE_BIT_MASK); const bool bIsLegacyWrapOrWrapThin = OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSWRAP || OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_TWO_SIDED_SSSWRAP; // Wrap and Wrap thin have same packing const bool bIsLegacySSSProfile = OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSPROFILE; if (bIsSingle && (bIsLegacyWrapOrWrapThin || bIsLegacySSSProfile)) { GroupTileBitmask |= SUBSTRATE_TILE_BITMASK_SSS; if (bIsLegacyWrapOrWrapThin) { FSubstrateSubsurfaceHeader SSSHeader = (FSubstrateSubsurfaceHeader)0; if (OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSWRAP) { SubstrateSubSurfaceHeaderSetSSSType(SSSHeader, SSS_TYPE_WRAP); } else { SubstrateSubSurfaceHeaderSetSSSType(SSSHeader, SSS_TYPE_TWO_SIDED_WRAP); } const uint Data0 = PackedHeader; const uint Data2 = MaterialTextureArrayUAV[uint3(PixelCoord, 2)]; const uint PackedSSSWOpacity7bits = BitFieldExtractU32(Data0, 5, HEADER_SINGLEENCODING_BIT_COUNT + HEADER_SINGLE_OPTLEGACYMODE_BIT_COUNT) | (BitFieldExtractU32(Data2, 2, 30) << 5); const float Opacity = UnpackR7(PackedSSSWOpacity7bits); SubstrateSubSurfaceHeaderSetWrapOpacity(SSSHeader, Opacity); SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSHeader.Bytes); } else if (bIsLegacySSSProfile) { const uint Data1 = MaterialTextureArrayUAV[uint3(PixelCoord, 1)]; const uint Data2 = MaterialTextureArrayUAV[uint3(PixelCoord, 2)]; uint ProfileId = Data2 & 0xFF; float RadiusScale = UnpackG8(Data2); const uint PackedDiffuse20Bits = (Data1 & 0xFFFFF); const float3 BaseColor = UnpackR7G7B6Gamma2(PackedDiffuse20Bits); FSubstrateSubsurfaceHeader SSSHeader = (FSubstrateSubsurfaceHeader)0; SubstrateSubSurfaceHeaderSetSSSType(SSSHeader, SSS_TYPE_DIFFUSION_PROFILE); SubstrateSubSurfaceHeaderSetProfile(SSSHeader, RadiusScale, ProfileId); FSubstrateSubsurfaceExtras SSSExtras = (FSubstrateSubsurfaceExtras)0; SubstrateSubsurfaceExtrasSetBaseColor(SSSExtras, BaseColor); SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSHeader.Bytes); SubstrateStoreSubsurfaceExtras(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSExtras.Bytes); } } // Fine grain test for clearing based on CMask data #if PERMUTATION_CMASK // Fine grain check if clear is needed bool bClearHeader = false; BRANCH if (CMask > 0u && CMask < 0xF) { bClearHeader = !SubstrateIsTopLayerMaterial(TopLayerTexture.Load(uint3(PixelCoord, 0))); } // Header clear BRANCH if (bClearHeader) { MaterialTextureArrayUAV[uint3(PixelCoord, 0)] = 0u; } #endif #endif // SUBSTRATE_GBUFFER_FORMAT==0 } #if SUBSTRATE_GBUFFER_FORMAT==1 BRANCH if ((GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SSS) == 0) { // We must fill all the pixel which does not have subsurface scattering by default so that the SSS code is not executed where it should not. SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, 0u); // This is a good clear for FSubstrateSubsurfaceHeader, and we only need to clear the header. } #endif #if PERMUTATION_WAVE_OPS GroupTileBitmask = WaveActiveBitOr(GroupTileBitmask); if (WaveGetLaneCount() < SUBSTRATE_TILE_SIZE * SUBSTRATE_TILE_SIZE) { if (LinearIndex == 0) { s_TileBitmask = 0; } GroupMemoryBarrierWithGroupSync(); if (WaveIsFirstLane()) { uint Unused; InterlockedOr(s_TileBitmask, GroupTileBitmask, Unused); } GroupMemoryBarrierWithGroupSync(); GroupTileBitmask = s_TileBitmask; } #else // PERMUTATION_WAVE_OPS s_TileBitmask[LinearIndex] = GroupTileBitmask; GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 32) { s_TileBitmask[LinearIndex] = s_TileBitmask[LinearIndex] | s_TileBitmask[LinearIndex + 32]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 16) { s_TileBitmask[LinearIndex] = s_TileBitmask[LinearIndex] | s_TileBitmask[LinearIndex + 16]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 8) { s_TileBitmask[LinearIndex] = s_TileBitmask[LinearIndex] | s_TileBitmask[LinearIndex + 8]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 4) { s_TileBitmask[LinearIndex] = s_TileBitmask[LinearIndex] | s_TileBitmask[LinearIndex + 4]; } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 2) { s_TileBitmask[LinearIndex] = s_TileBitmask[LinearIndex] | s_TileBitmask[LinearIndex + 2]; } GroupMemoryBarrierWithGroupSync(); GroupTileBitmask = s_TileBitmask[0] | s_TileBitmask[1]; #endif // PERMUTATION_WAVE_OPS if (LinearIndex == 0 && GroupTileBitmask != 0) { const uint EncodedTile = SubstratePackTile(GroupId.xy, TileEncoding); #if SUBSTRATE_COMPLEXSPECIALPATH if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_COMPLEXSPECIAL) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_COMPLEX_SPECIAL) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_COMPLEX_SPECIAL) + WriteToIndex] = EncodedTile; } else #endif if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_COMPLEX) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_COMPLEX) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_COMPLEX) + WriteToIndex] = EncodedTile; } else if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SINGLE) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_SINGLE) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_SINGLE) + WriteToIndex] = EncodedTile; } else // (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SIMPLE) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_SIMPLE) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_SIMPLE) + WriteToIndex] = EncodedTile; } #if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED const bool bTileContainsOpaqueRoughRefraction = GroupTileBitmask & SUBSTRATE_TILE_BITMASK_ROUGH_REFRACT; const bool bTileContainsScreenSpaceSubsurfaceScattering = GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SSS; if (bTileContainsOpaqueRoughRefraction) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT) + WriteToIndex] = EncodedTile; } if(bTileContainsScreenSpaceSubsurfaceScattering && !bTileContainsOpaqueRoughRefraction) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT_SSS_WITHOUT) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT_SSS_WITHOUT) + WriteToIndex] = EncodedTile; } #endif #if PERMUTATION_DECAL if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_DECAL) { if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_COMPLEX) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_COMPLEX) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_COMPLEX) + WriteToIndex] = EncodedTile; } else if (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SINGLE) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_SINGLE) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_SINGLE) + WriteToIndex] = EncodedTile; } else // (GroupTileBitmask & SUBSTRATE_TILE_BITMASK_SIMPLE) { uint WriteToIndex; InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_SIMPLE) + 1], 1, WriteToIndex); TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_SIMPLE) + WriteToIndex] = EncodedTile; } } #endif } } } #endif // SHADER_TILE_CATEGORIZATION //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_MATERIAL_TILE_PREPARE_ARGS Buffer TileDrawIndirectDataBuffer; RWBuffer TileDispatchIndirectDataBuffer; [numthreads(32, 1, 1)] void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID) { const uint TileType = DispatchThreadId.x; if (TileType < SUBSTRATE_TILE_TYPE_COUNT) { // We could have more than 65k tile in particular with complex multi-layer closure covering full TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 0] = TileDrawIndirectDataBuffer[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(TileType) + 1]; TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 1] = 1; TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 2] = 1; } } #endif // SHADER_MATERIAL_TILE_PREPARE_ARGS //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_CLOSURE_TILE_PREPARE_ARGS int2 TileCount_Primary; Buffer TileDrawIndirectDataBuffer; RWBuffer TileDispatchIndirectDataBuffer; RWBuffer TileDispatchPerThreadIndirectDataBuffer; RWBuffer TileRaytracingIndirectDataBuffer; void WriteArgs(uint InTileCount, uint OutOffset) { const uint DispatchX = min(InTileCount, uint(TileCount_Primary.x)); const uint DispatchY = DivideAndRoundUp(InTileCount, TileCount_Primary.x); TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 0] = DispatchX; TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 1] = DispatchY; TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 2] = 1; TileDispatchPerThreadIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 0] = DivideAndRoundUp(InTileCount, SUBSTRATE_TILE_SIZE * SUBSTRATE_TILE_SIZE); TileDispatchPerThreadIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 1] = 1; TileDispatchPerThreadIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 2] = 1; // Indirect raytracing args are mapped on ray count. Each tile is expended into rays. const uint RayDispatchX = min(InTileCount, uint(TileCount_Primary.x)) * SUBSTRATE_TILE_SIZE; const uint RayDispatchY = DivideAndRoundUp(InTileCount, TileCount_Primary.x) * SUBSTRATE_TILE_SIZE; TileRaytracingIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 0] = RayDispatchX; TileRaytracingIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 1] = RayDispatchY; TileRaytracingIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(OutOffset) + 2] = 1; } [numthreads(3, 1, 1)] void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID) { const uint WriteOffset = DispatchThreadId.x; if (WriteOffset < 3) { const uint TileCount = TileDrawIndirectDataBuffer[0].x; uint TileCounts[3]; TileCounts[0] = TileCount; // Downsample factor=1 TileCounts[1] = DivideAndRoundUp4(TileCount); // Downsample factor=2 - 4 subtiles per per 8x8 tile TileCounts[2] = DivideAndRoundUp16(TileCount); // Downsample factor=3 - 16 subtiles per per 8x8 tile WriteArgs(TileCounts[WriteOffset], WriteOffset); } } #endif // SHADER_CLOSURE_TILE_PREPARE_ARGS //////////////////////////////////////////////////////////////////////////////////////////////////////////// #if SHADER_CLOSURE_TILE int2 ViewResolution; uint MaxBytesPerPixel; uint TileSizeLog2; int2 TileCount_Primary; Texture2D TopLayerTexture; Texture2DArray MaterialTextureArray; Buffer TileListBuffer; uint TileListBufferOffset; uint TileEncoding; RWTexture2D RWClosureOffsetTexture; RWBuffer RWClosureTileCountBuffer; RWBuffer RWClosureTileBuffer; #if !PERMUTATION_WAVE_OPS groupshared uint s_TileClosureCount[GROUP_THREAD_COUNT]; #endif #if PERMUTATION_WAVE_OPS && COMPILER_SUPPORTS_WAVE_SIZE WAVESIZE(64) // PERMUTATION_WAVE_OPS is true only when wave>=64 are available #endif [numthreads(SUBSTRATE_TILE_SIZE, SUBSTRATE_TILE_SIZE, 1)] void ClosureTileMainCS(uint2 GroupThreadId : SV_GroupThreadID, uint2 GroupId : SV_GroupID, uint LinearIndex : SV_GroupIndex) { const uint2 TileCoord = SubstrateUnpackTile(TileListBuffer[TileListBufferOffset + GroupId.x], TileEncoding); uint2 PixelCoord = TileCoord * SUBSTRATE_TILE_SIZE + GroupThreadId; const bool bIsInViewRect = all(PixelCoord < uint2(View.ViewRectMinAndSize.zw)); PixelCoord += View.ViewRectMinAndSize.xy; uint ClosureCount = 0; if (bIsInViewRect) { FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel); FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(MaterialTextureArray, SubstrateAddressing, TopLayerTexture); ClosureCount = min(SubstratePixelHeader.ClosureCount, SUBSTRATE_MATERIAL_CLOSURE_COUNT); if (ClosureCount > 0) { FSubstrateClosureOffset Offsets = (FSubstrateClosureOffset)0; Offsets.ClosureCount = ClosureCount; UNROLL_N(SUBSTRATE_MATERIAL_CLOSURE_COUNT) for (uint ClosureIndex = 0; ClosureIndex < ClosureCount; ++ClosureIndex) { Offsets.ClosureOffsets[ClosureIndex] = SubstrateAddressing.CurrentIndex; UnpackSubstrateBSDFIn(MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader); } RWClosureOffsetTexture[PixelCoord] = PackClosureOffset(Offsets); } } #if PERMUTATION_WAVE_OPS const uint TileClosureCount = WaveActiveMax(ClosureCount); #else // PERMUTATION_WAVE_OPS s_TileClosureCount[LinearIndex] = ClosureCount; GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 32) { s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 32]); } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 16) { s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 16]); } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 8) { s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 8]); } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 4) { s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 4]); } GroupMemoryBarrierWithGroupSync(); if (LinearIndex < 2) { s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 2]); } GroupMemoryBarrierWithGroupSync(); const uint TileClosureCount = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 1]); #endif // PERMUTATION_WAVE_OPS #if SUBSTRATE_MATERIAL_CLOSURE_COUNT > 1 if (LinearIndex == 0) { if (TileClosureCount > 1) { // Store only tile data for Closure[1..X]. Closure[0] is implicity stored into the first layer uint StoreIndex = 0; InterlockedAdd(RWClosureTileCountBuffer[0], TileClosureCount - 1, StoreIndex); FSubstrateClosureTile Tile; Tile.TileCoord = TileCoord; Tile.ClosureCount = TileClosureCount; for (uint ClosureIndex = 1; ClosureIndex < TileClosureCount; ++ClosureIndex) { Tile.ClosureIndex = ClosureIndex; RWClosureTileBuffer[StoreIndex + ClosureIndex - 1] = PackClosureTile(Tile); } } } #endif } #endif // SHADER_CLOSURE_TILE