160 lines
6.5 KiB
HLSL
160 lines
6.5 KiB
HLSL
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#pragma once
|
|
|
|
#include "../Common.ush"
|
|
#include "NaniteDataDecode.ush"
|
|
#include "NaniteHierarchyTraversalCommon.ush"
|
|
|
|
#define CULLING_PASS_NO_OCCLUSION 0
|
|
#define CULLING_PASS_OCCLUSION_MAIN 1
|
|
#define CULLING_PASS_OCCLUSION_POST 2
|
|
#define CULLING_PASS_EXPLICIT_LIST 3
|
|
|
|
struct FCandidateNode
|
|
{
|
|
uint Flags;
|
|
uint ViewId;
|
|
uint InstanceId;
|
|
uint NodeIndex;
|
|
uint AssemblyTransformIndex;
|
|
uint EnabledBitmask;
|
|
};
|
|
|
|
// NOTE: This must match Nanite::FGlobalResources::GetCandidateNodeSize(bPostPass) / 4
|
|
#if NANITE_ASSEMBLY_DATA
|
|
#define NANITE_CANDIDATE_NODE_SIZE_DWORDS(IS_POSTPASS) ((IS_POSTPASS) ? 4u : 3u)
|
|
#else
|
|
#define NANITE_CANDIDATE_NODE_SIZE_DWORDS(IS_POSTPASS) ((IS_POSTPASS) ? 3u : 2u)
|
|
#endif
|
|
|
|
uint GetCandidateNodeDwords(bool bPostPass) { return NANITE_CANDIDATE_NODE_SIZE_DWORDS(bPostPass); }
|
|
uint GetVisibleClusterDwords(bool bHasPageData) { return NANITE_CANDIDATE_CLUSTER_SIZE_DWORDS + (bHasPageData ? 1u : 0u); }
|
|
|
|
uint GetCandidateNodesOffset(bool bPostPass) { return bPostPass ? (MaxNodes * GetCandidateNodeDwords(false) * 4u) : 0u; }
|
|
uint GetClusterBatchesOffset(bool bPostPass) { return bPostPass ? (GetMaxClusterBatches() * 4u) : 0u; }
|
|
|
|
#define STORE_COMPONENTS_FUNCTION(BufferType, FunctionName) \
|
|
void FunctionName(BufferType DataBuffer, uint NumComponents, uint OffsetInBytes, uint Index, uint4 Data) \
|
|
{ \
|
|
const uint Address = OffsetInBytes + Index * (NumComponents * 4u); \
|
|
switch (NumComponents) \
|
|
{ \
|
|
case 1: DataBuffer.Store(Address, Data.x); break; \
|
|
case 2: DataBuffer.Store2(Address, Data.xy); break; \
|
|
case 3: DataBuffer.Store3(Address, Data.xyz); break; \
|
|
case 4: DataBuffer.Store4(Address, Data); break; \
|
|
} \
|
|
}
|
|
STORE_COMPONENTS_FUNCTION(RWCoherentByteAddressBuffer, StoreComponentsCoherent)
|
|
STORE_COMPONENTS_FUNCTION(RWByteAddressBuffer, StoreComponents)
|
|
|
|
#define LOAD_COMPONENTS_FUNCTION(BufferType, FunctionName) \
|
|
uint4 FunctionName(BufferType DataBuffer, uint NumComponents, uint OffsetInBytes, uint Index) \
|
|
{ \
|
|
const uint Address = OffsetInBytes + Index * (NumComponents * 4u); \
|
|
switch (NumComponents) \
|
|
{ \
|
|
case 1: return uint4(DataBuffer.Load(Address), 0, 0, 0); \
|
|
case 2: return uint4(DataBuffer.Load2(Address), 0, 0); \
|
|
case 3: return uint4(DataBuffer.Load3(Address), 0); \
|
|
case 4: return DataBuffer.Load4(Address); \
|
|
default:return 0; \
|
|
} \
|
|
}
|
|
LOAD_COMPONENTS_FUNCTION(RWCoherentByteAddressBuffer, LoadComponentsCoherent)
|
|
LOAD_COMPONENTS_FUNCTION(RWByteAddressBuffer, LoadComponents)
|
|
|
|
uint4 PackCandidateNode(FCandidateNode Node)
|
|
{
|
|
// Leave at least one bit unused in each of the fields, so 0xFFFFFFFFu is never a valid value.
|
|
uint4 RawData = 0;
|
|
RawData.x = (Node.InstanceId << NANITE_NUM_CULLING_FLAG_BITS) | Node.Flags;
|
|
RawData.y = (Node.ViewId << NANITE_MAX_NODES_PER_PRIMITIVE_BITS) | Node.NodeIndex;
|
|
#if NANITE_ASSEMBLY_DATA
|
|
RawData.z = Node.AssemblyTransformIndex;
|
|
RawData.w = Node.EnabledBitmask;
|
|
#else
|
|
RawData.z = Node.EnabledBitmask;
|
|
#endif
|
|
|
|
checkSlow(RawData.x != 0xFFFFFFFFu && RawData.y != 0xFFFFFFFFu && RawData.z != 0xFFFFFFFFu && RawData.w != 0xFFFFFFFFu);
|
|
|
|
return RawData;
|
|
}
|
|
|
|
FCandidateNode UnpackCandidateNode(uint4 RawData, bool bHasEnabledMask)
|
|
{
|
|
FCandidateNode Node;
|
|
Node.Flags = BitFieldExtractU32(RawData.x, NANITE_NUM_CULLING_FLAG_BITS, 0);
|
|
Node.InstanceId = BitFieldExtractU32(RawData.x, NANITE_MAX_INSTANCES_BITS, NANITE_NUM_CULLING_FLAG_BITS);
|
|
Node.NodeIndex = BitFieldExtractU32(RawData.y, NANITE_MAX_NODES_PER_PRIMITIVE_BITS, 0);
|
|
Node.ViewId = BitFieldExtractU32(RawData.y, NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS, NANITE_MAX_NODES_PER_PRIMITIVE_BITS);
|
|
#if NANITE_ASSEMBLY_DATA
|
|
Node.AssemblyTransformIndex = RawData.z;
|
|
Node.EnabledBitmask = bHasEnabledMask ? RawData.w : 0xFFFFFFFFu;
|
|
#else
|
|
Node.AssemblyTransformIndex = NANITE_MAX_ASSEMBLY_TRANSFORMS;
|
|
Node.EnabledBitmask = bHasEnabledMask ? RawData.z : 0xFFFFFFFFu;
|
|
#endif
|
|
|
|
return Node;
|
|
}
|
|
|
|
// Load/Store/Clear each have globally coherent buffer versions. Fix this with templates.
|
|
uint4 LoadVisibleClusterDataCoherent(RWCoherentByteAddressBuffer DataBuffer, uint ClusterIndex, bool bHasPageData)
|
|
{
|
|
checkSlow(ClusterIndex < MaxCandidateClusters);
|
|
return LoadComponentsCoherent(DataBuffer, GetVisibleClusterDwords(bHasPageData), 0u, ClusterIndex);
|
|
}
|
|
|
|
uint4 LoadVisibleClusterData(RWByteAddressBuffer DataBuffer, uint ClusterIndex, bool bHasPageData)
|
|
{
|
|
checkSlow(ClusterIndex < MaxCandidateClusters);
|
|
return LoadComponents(DataBuffer, GetVisibleClusterDwords(bHasPageData), 0u, ClusterIndex);
|
|
}
|
|
|
|
void StoreVisibleClusterCoherent(RWCoherentByteAddressBuffer DataBuffer, uint ClusterIndex, FVisibleCluster VisibleCluster, bool bHasPageData)
|
|
{
|
|
const uint4 RawData = PackVisibleCluster(VisibleCluster, bHasPageData);
|
|
StoreComponentsCoherent(DataBuffer, GetVisibleClusterDwords(bHasPageData), 0, ClusterIndex, RawData);
|
|
}
|
|
|
|
void StoreVisibleCluster(RWByteAddressBuffer DataBuffer, uint ClusterIndex, FVisibleCluster VisibleCluster, bool bHasPageData)
|
|
{
|
|
const uint4 RawData = PackVisibleCluster(VisibleCluster, bHasPageData);
|
|
StoreComponents(DataBuffer, GetVisibleClusterDwords(bHasPageData), 0, ClusterIndex, RawData);
|
|
}
|
|
|
|
// helper function to work around a shader macro expansion issue (incorrect expansion of self-referential macros) that causes,
|
|
// e.g., #define MaxNodes NaniteRaster.MaxNodes, to expand to things like NaniteRaster.NaniteRaster.NaniteRaster.MaxNodes when used as parameter to a macro.
|
|
void CheckNodeIndexHelper(uint NodeIndex)
|
|
{
|
|
uint MaxNodesTmp = MaxNodes;
|
|
checkSlow(NodeIndex < MaxNodesTmp);
|
|
}
|
|
|
|
uint4 LoadCandidateNodeDataCoherent(RWCoherentByteAddressBuffer DataBuffer, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
return LoadComponentsCoherent(DataBuffer, GetCandidateNodeDwords(bPostPass), GetCandidateNodesOffset(bPostPass), NodeIndex);
|
|
}
|
|
|
|
uint4 LoadCandidateNodeData(RWByteAddressBuffer DataBuffer, uint NodeIndex, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
return LoadComponents(DataBuffer, GetCandidateNodeDwords(bPostPass), GetCandidateNodesOffset(bPostPass), NodeIndex);
|
|
}
|
|
|
|
void StoreCandidateNodeDataCoherent(RWCoherentByteAddressBuffer DataBuffer, uint NodeIndex, uint4 RawData, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreComponentsCoherent(DataBuffer, GetCandidateNodeDwords(bPostPass), GetCandidateNodesOffset(bPostPass), NodeIndex, RawData);
|
|
}
|
|
|
|
void StoreCandidateNodeData(RWByteAddressBuffer DataBuffer, uint NodeIndex, uint4 RawData, bool bPostPass)
|
|
{
|
|
CheckNodeIndexHelper(NodeIndex);
|
|
StoreComponents(DataBuffer, GetCandidateNodeDwords(bPostPass), GetCandidateNodesOffset(bPostPass), NodeIndex, RawData);
|
|
}
|