Files
UnrealEngine/Engine/Shaders/Private/Nanite/NaniteAttributeDecode.ush
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

907 lines
31 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "NaniteDataDecode.ush"
#include "../Barycentrics.ush"
// NOTE: This must match Nanite::FGlobalResources::GetFixedFunctionBinMask
#if NANITE_VOXEL_DATA
#define NANITE_FIXED_FUNCTION_BIN_MASK 0x1Fu
#else
#define NANITE_FIXED_FUNCTION_BIN_MASK 0xFu
#endif
struct FNaniteRawAttributeData
{
float4 TangentXAndSign;
float3 TangentZ;
float4 Color;
float2 TexCoords[NANITE_MAX_UVS];
};
struct FNaniteTangentBasis
{
float4 TangentXAndSign; // Tangent and Bitangent Sign Bit
float3 TangentZ; // Normal
float3 DeriveTangentY()
{
// Bitangent
return cross(TangentZ, TangentXAndSign.xyz) * TangentXAndSign.w;
}
void RecalculateTangentX()
{
// Recalculate TangentX from the other two vectors - This can correct some quantization errors.
// The error shows up most in specular off of a mesh with a smoothed UV seam (normal is smooth, but tangents vary across the seam)
const float3 TangentY = DeriveTangentY();
TangentXAndSign.xyz = cross(TangentY, TangentZ) * TangentXAndSign.w;
}
void Normalize()
{
TangentXAndSign.xyz = normalize(TangentXAndSign.xyz);
TangentZ = normalize(TangentZ);
}
};
FNaniteTangentBasis MakeTangentBasis(FNaniteRawAttributeData RawAttributeData)
{
FNaniteTangentBasis TangentBasis;
TangentBasis.TangentXAndSign = RawAttributeData.TangentXAndSign;
TangentBasis.TangentZ = RawAttributeData.TangentZ;
return TangentBasis;
}
HLSL_STATIC_ASSERT(sizeof(FNaniteTangentBasis) == 28, "Unexpected size of FNaniteTangentBasis. Update WaveReadLaneAt to reflect changes.");
FNaniteTangentBasis WaveReadLaneAt(FNaniteTangentBasis In, uint SrcIndex)
{
FNaniteTangentBasis Out;
Out.TangentXAndSign = WaveReadLaneAt(In.TangentXAndSign, SrcIndex);
Out.TangentZ = WaveReadLaneAt(In.TangentZ, SrcIndex);
return Out;
}
HLSL_STATIC_ASSERT(sizeof(FNaniteRawAttributeData) == 44 + 8 * NANITE_MAX_UVS, "Unexpected size of FNaniteRawAttributeData. Update WaveReadLaneAt to reflect changes.");
FNaniteRawAttributeData WaveReadLaneAt(FNaniteRawAttributeData In, uint SrcIndex)
{
FNaniteRawAttributeData Out;
Out.TangentXAndSign = WaveReadLaneAt(In.TangentXAndSign, SrcIndex);
Out.TangentZ = WaveReadLaneAt(In.TangentZ, SrcIndex);
Out.Color = WaveReadLaneAt(In.Color, SrcIndex);
UNROLL
for (uint i = 0; i < NANITE_MAX_UVS; ++i)
{
Out.TexCoords[i] = WaveReadLaneAt(In.TexCoords[i], SrcIndex);
}
return Out;
}
#define SIZEOF_PACKED_UV_HEADER 8
struct FUVHeader
{
uint2 Min;
uint2 NumBits;
uint NumMantissaBits;
};
#define SIZEOF_PACKED_BONE_INFLUENCE_HEADER 8
struct FBoneInfluenceHeader
{
uint DataAddress;
uint NumVertexBoneInfluences;
uint NumVertexBoneIndexBits;
uint NumVertexBoneWeightBits;
};
FUVHeader UnpackUVHeader(uint2 Data)
{
FUVHeader Range;
Range.NumBits.x = BitFieldExtractU32(Data.x, 5, 0);
Range.Min.x = Data.x >> 5;
Range.NumBits.y = BitFieldExtractU32(Data.y, 5, 0);
Range.Min.y = Data.y >> 5;
Range.NumMantissaBits = NANITE_UV_FLOAT_NUM_MANTISSA_BITS; // Hardcode for now, but eventually make this a mesh setting.
return Range;
}
FUVHeader GetUVHeader(ByteAddressBuffer InputBuffer, uint StartOffset, uint Index)
{
uint2 Data = InputBuffer.Load2(StartOffset + Index * SIZEOF_PACKED_UV_HEADER);
return UnpackUVHeader(Data);
}
FUVHeader GetUVHeader(RWByteAddressBuffer InputBuffer, uint StartOffset, uint Index)
{
uint2 Data = InputBuffer.Load2(StartOffset + Index * SIZEOF_PACKED_UV_HEADER);
return UnpackUVHeader(Data);
}
FBoneInfluenceHeader GetBoneInfluenceHeader(FCluster Cluster)
{
const uint2 Data = ClusterPageData.Load2(Cluster.PageBaseAddress + Cluster.DecodeInfoOffset + Cluster.NumUVs * SIZEOF_PACKED_UV_HEADER);
FBoneInfluenceHeader Header;
Header.DataAddress = Cluster.PageBaseAddress + BitFieldExtractU32(Data.x, 22, 0);
Header.NumVertexBoneInfluences = BitFieldExtractU32(Data.x, 10, 22);
Header.NumVertexBoneIndexBits = BitFieldExtractU32(Data.y, 6, 0);
Header.NumVertexBoneWeightBits = BitFieldExtractU32(Data.y, 5, 6);
return Header;
}
float DecodeUVFloat(uint EncodedValue, uint NumMantissaBits)
{
const uint ExponentAndMantissaMask = BitFieldMaskU32(NANITE_UV_FLOAT_NUM_EXPONENT_BITS + NumMantissaBits, 0);
const bool bNeg = (EncodedValue <= ExponentAndMantissaMask);
const uint ExponentAndMantissa = (bNeg ? ~EncodedValue : EncodedValue) & ExponentAndMantissaMask;
float Result = asfloat(0x3F000000u + (ExponentAndMantissa << (23 - NumMantissaBits)));
Result = min(Result * 2.0f - 1.0f, Result); // Stretch denormals from [0.5,1.0] to [0.0,1.0]
return bNeg ? -Result : Result;
}
float2 UnpackTexCoord(uint2 Packed, FUVHeader UVHeader)
{
const uint2 GlobalUV = UVHeader.Min + Packed;
return float2( DecodeUVFloat(GlobalUV.x, UVHeader.NumMantissaBits),
DecodeUVFloat(GlobalUV.y, UVHeader.NumMantissaBits));
}
float3 UnpackNormal(uint Packed, uint Bits)
{
uint Mask = BitFieldMaskU32(Bits, 0);
float2 F = uint2(BitFieldExtractU32(Packed, Bits, 0), BitFieldExtractU32(Packed, Bits, Bits)) * (2.0f / Mask) - 1.0f;
float3 N = float3(F.xy, 1.0 - abs(F.x) - abs(F.y));
float T = saturate(-N.z);
N.xy += select(N.xy >= 0.0, -T, T);
return normalize(N);
}
uint CalculateMaxAttributeBits(uint NumTexCoordInterpolators)
{
uint Size = 0u;
Size += 2u * NANITE_MAX_NORMAL_QUANTIZATION_BITS;
Size += 1u + NANITE_MAX_TANGENT_QUANTIZATION_BITS;
Size += 4u * NANITE_MAX_COLOR_QUANTIZATION_BITS;
Size += NumTexCoordInterpolators * (2u * NANITE_MAX_TEXCOORD_COMPONENT_BITS);
return Size;
}
void DecodeMaterialRange(uint EncodedRange, out uint TriStart, out uint TriLength, out uint MaterialIndex)
{
// uint32 TriStart : 8; // max 128 triangles
// uint32 TriLength : 8; // max 128 triangles
// uint32 MaterialIndex : 6; // max 64 materials
// uint32 Padding : 10;
TriStart = BitFieldExtractU32(EncodedRange, 8, 0);
TriLength = BitFieldExtractU32(EncodedRange, 8, 8);
MaterialIndex = BitFieldExtractU32(EncodedRange, 6, 16);
}
bool IsMaterialFastPath(FCluster InCluster)
{
return (InCluster.Material0Length > 0);
}
uint GetMaterialCount(FCluster InCluster)
{
if (IsMaterialFastPath(InCluster))
{
const uint Material2Length = InCluster.MaterialTotalLength - InCluster.Material0Length - InCluster.Material1Length;
return 1 + (InCluster.Material1Length > 0) + (Material2Length > 0);
}
else
{
return InCluster.MaterialTableLength;
}
}
uint GetRelativeMaterialIndex(FCluster InCluster, uint InTriIndex)
{
uint MaterialIndex = 0xFFFFFFFF;
BRANCH
if (IsMaterialFastPath(InCluster))
{
if (InTriIndex < InCluster.Material0Length)
{
MaterialIndex = InCluster.Material0Index;
}
else if (InTriIndex < (InCluster.Material0Length + InCluster.Material1Length))
{
MaterialIndex = InCluster.Material1Index;
}
else
{
MaterialIndex = InCluster.Material2Index;
}
}
else
{
uint TableOffset = InCluster.PageBaseAddress + InCluster.MaterialTableOffset * 4;
LOOP for (uint TableEntry = 0; TableEntry < InCluster.MaterialTableLength; ++TableEntry)
{
uint EncodedRange = ClusterPageData.Load(TableOffset);
TableOffset += 4;
uint TriStart;
uint TriLength;
uint TriMaterialIndex;
DecodeMaterialRange(EncodedRange, TriStart, TriLength, TriMaterialIndex);
if (InTriIndex >= TriStart && InTriIndex < (TriStart + TriLength))
{
MaterialIndex = TriMaterialIndex;
break;
}
}
}
return MaterialIndex;
}
struct FNaniteMaterialPrimitiveData
{
uint MaterialBufferOffset;
uint MaterialMaxIndex;
uint MeshPassMask;
bool bHasUVDensities;
uint HitProxyBufferOffset;
float4 OverlayColor;
};
FNaniteMaterialPrimitiveData UnpackNaniteMaterialPrimitiveData(uint4 Data)
{
FNaniteMaterialPrimitiveData Output;
Output.MaterialBufferOffset = Data.x;
Output.MaterialMaxIndex = BitFieldExtractU32(Data.y, 8u, 0u);
Output.MeshPassMask = BitFieldExtractU32(Data.y, 8u, 8u);
Output.bHasUVDensities = BitFieldExtractU32(Data.y, 1u, 16u);
Output.HitProxyBufferOffset = Data.z;
Output.OverlayColor = float4(UnpackToUint4(Data.w, 8)) * (1.0f / 255.0f);
return Output;
}
FNaniteMaterialPrimitiveData LoadNaniteMaterialPrimitiveData(uint InPrimitiveIndex)
{
const uint ElementStride = Scene.NaniteMaterials.PrimitiveMaterialElementStride;
const uint Offset = InPrimitiveIndex * ElementStride;
uint4 Data = (uint4)0xFFFFFFFFu;
#if USE_EDITOR_SHADERS
// Check to load the hit proxy buffer offset
if (ElementStride / 4u >= 4u)
{
Data = Scene.NaniteMaterials.PrimitiveMaterialData.Load4(Offset);
}
else
#endif
{
checkSlow(ElementStride / 4u >= 2u);
Data.xy = Scene.NaniteMaterials.PrimitiveMaterialData.Load2(InPrimitiveIndex * 2u * 4u);
}
return UnpackNaniteMaterialPrimitiveData(Data);
}
struct FNaniteMaterialSlot
{
uint TriangleShadingBin;
uint VoxelShadingBin;
uint RasterBin;
uint FallbackRasterBin;
};
FNaniteMaterialSlot UnpackMaterialSlot(uint2 Data)
{
FNaniteMaterialSlot Output;
Output.TriangleShadingBin = Data.x & 0xFFFFu;
Output.VoxelShadingBin = Data.x >> 16u;
Output.RasterBin = Data.y & 0xFFFFu;
Output.FallbackRasterBin = Data.y >> 16u;
return Output;
}
FNaniteMaterialSlot LoadMaterialSlot(uint Offset)
{
uint2 Data = Scene.NaniteMaterials.MaterialData.Load2(Offset);
return UnpackMaterialSlot(Data);
}
float4 LoadMaterialUVDensities(uint Offset)
{
if (Offset == 0xFFFFFFFFu)
{
return (float4)1.0f;
}
return asfloat(Scene.NaniteMaterials.MaterialData.Load4(Offset));
}
uint GetMaterialSlotOffset(uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex)
{
FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex);
const uint DwordsPerMaterialSlot = 2u;
const uint MeshPassBit = (1u << InMeshPassIndex);
const uint MaterialCount = PrimitiveData.MaterialMaxIndex + 1;
const uint MeshPassSlotsOffset = MaterialCount * countbits(PrimitiveData.MeshPassMask & (MeshPassBit - 1u));
checkSlow(PrimitiveData.MaterialBufferOffset != 0xFFFFFFFFu);
checkSlow((MeshPassBit & PrimitiveData.MeshPassMask) != 0);
InRelativeMaterialIndex = min(InRelativeMaterialIndex, PrimitiveData.MaterialMaxIndex);
return 4 * (
PrimitiveData.MaterialBufferOffset +
DwordsPerMaterialSlot * (MeshPassSlotsOffset + InRelativeMaterialIndex)
);
}
uint GetMaterialUVDensitiesOffset(uint InRelativeMaterialIndex, uint InPrimitiveIndex)
{
FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex);
checkSlow(PrimitiveData.MaterialBufferOffset != 0xFFFFFFFFu);
if (!PrimitiveData.bHasUVDensities)
{
return 0xFFFFFFFFu;
}
const uint DwordsPerMaterialSlot = 2u;
const uint DwordsPerUVDensities = 4u;
const uint MaterialCount = PrimitiveData.MaterialMaxIndex + 1;
const uint FirstUVDensitiesOffset = MaterialCount * countbits(PrimitiveData.MeshPassMask) * DwordsPerMaterialSlot;
InRelativeMaterialIndex = min(InRelativeMaterialIndex, PrimitiveData.MaterialMaxIndex);
return 4 * (
PrimitiveData.MaterialBufferOffset +
FirstUVDensitiesOffset +
(InRelativeMaterialIndex * DwordsPerUVDensities)
);
}
FNaniteMaterialSlot LoadMaterialSlot(uint InRelativeMaterialIndex, uint InPrimitiveIndex, uint InMeshPassIndex)
{
return LoadMaterialSlot(GetMaterialSlotOffset(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex));
}
float4 LoadMaterialUVDensities(uint InRelativeMaterialIndex, uint InPrimitiveIndex)
{
return LoadMaterialUVDensities(GetMaterialUVDensitiesOffset(InRelativeMaterialIndex, InPrimitiveIndex));
}
uint GetMaterialShadingBinFromIndex(
uint InRelativeMaterialIndex,
uint InPrimitiveIndex,
uint InMeshPassIndex,
bool bVoxel)
{
FNaniteMaterialSlot MaterialSlot = LoadMaterialSlot(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex);
return (bVoxel && MaterialSlot.VoxelShadingBin != 0xFFFFu) ? MaterialSlot.VoxelShadingBin : MaterialSlot.TriangleShadingBin; // Fall back to rendering (incorrectly) with triangle shader if voxel shader has not been compiled
}
uint RemapRasterBin(uint InBinIndex, uint InRenderFlags, FNaniteMaterialFlags MaterialFlags, bool bVoxel, bool bIsDeforming)
{
if (InBinIndex == NANITE_INVALID_RASTER_BIN)
{
return NANITE_INVALID_RASTER_BIN;
}
// Any bins within the fixed function bin mask are special cased
const bool bFixedFunctionBin = InBinIndex <= NANITE_FIXED_FUNCTION_BIN_MASK;
const bool bDisableProgrammable = (InRenderFlags & NANITE_RENDER_FLAG_DISABLE_PROGRAMMABLE) != 0;
const bool bShadowPass = (InRenderFlags & NANITE_RENDER_FLAG_IS_SHADOW_PASS) != 0;
if (bVoxel || bFixedFunctionBin || bDisableProgrammable)
{
// For non-shadow views, remap shadow casting fixed function to non-shadow casting (explicitly skipped)
const bool bTwoSided = !bVoxel && MaterialFlags.bTwoSided;
const bool bSplineMesh = !bVoxel && MaterialFlags.bSplineMesh;
const bool bCastShadow = select(bShadowPass, MaterialFlags.bCastShadow, false);
const bool bSkinningActive = MaterialFlags.bSkinnedMesh && bIsDeforming;
InBinIndex = NANITE_FIXED_FUNCTION_BIN;
InBinIndex |= select(bTwoSided, NANITE_FIXED_FUNCTION_BIN_TWOSIDED, 0x0u);
InBinIndex |= select(bSplineMesh, NANITE_FIXED_FUNCTION_BIN_SPLINE, 0x0u);
InBinIndex |= select(bSkinningActive, NANITE_FIXED_FUNCTION_BIN_SKINNED, 0x0u);
InBinIndex |= select(bCastShadow, NANITE_FIXED_FUNCTION_BIN_CAST_SHADOW, 0x0u);
InBinIndex |= select(bVoxel, NANITE_FIXED_FUNCTION_BIN_VOXEL, 0x0u);
}
return InBinIndex;
}
uint GetMaterialRasterBinFromIndex(
uint InRelativeMaterialIndex,
uint InPrimitiveIndex,
uint InMeshPassIndex,
uint InRegularRasterBinCount,
bool bFallbackRasterBin)
{
FNaniteMaterialSlot MaterialSlot = LoadMaterialSlot(InRelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex);
uint RasterBin = MaterialSlot.RasterBin;
if (bFallbackRasterBin && MaterialSlot.FallbackRasterBin != NANITE_INVALID_RASTER_BIN)
{
RasterBin = MaterialSlot.FallbackRasterBin;
}
if (RasterBin != NANITE_INVALID_RASTER_BIN && RasterBin >= InRegularRasterBinCount)
{
RasterBin = NANITE_INVALID_RASTER_BIN - RasterBin - 1 + InRegularRasterBinCount;
}
return RasterBin;
}
float4 GetMaterialUVDensities(
FCluster InCluster,
uint InPrimitiveIndex,
uint InTriIndex)
{
const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex);
return LoadMaterialUVDensities(RelativeMaterialIndex, InPrimitiveIndex);
}
uint GetMaterialShadingBin(
FCluster InCluster,
uint InPrimitiveIndex,
uint InMeshPassIndex,
uint InTriIndex)
{
const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex);
return GetMaterialShadingBinFromIndex(RelativeMaterialIndex, InPrimitiveIndex, InMeshPassIndex, InCluster.bVoxel);
}
uint GetMaterialRasterBin(
FCluster InCluster,
uint InPrimitiveIndex,
uint InMeshPassIndex,
uint InTriIndex,
uint InRegularSlotCount,
bool bFallbackRasterBin)
{
return GetMaterialRasterBinFromIndex(
GetRelativeMaterialIndex(InCluster, InTriIndex),
InPrimitiveIndex,
InMeshPassIndex,
InRegularSlotCount,
bFallbackRasterBin
);
}
uint LoadMaterialHitProxyId(uint InPrimitiveIndex, uint InMaterialIndex, ByteAddressBuffer InMaterialHitProxyTable)
{
FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex);
const uint InvisibleHitProxyID = uint(-2);
uint HitProxyID = InvisibleHitProxyID;
if (PrimitiveData.HitProxyBufferOffset != 0xFFFFFFFFu)
{
const uint OffsetDwords = PrimitiveData.HitProxyBufferOffset + min(InMaterialIndex, PrimitiveData.MaterialMaxIndex);
HitProxyID = InMaterialHitProxyTable.Load(OffsetDwords * 4);
}
return HitProxyID;
}
uint GetMaterialHitProxyId(
FCluster InCluster,
uint InPrimitiveIndex,
uint InTriIndex,
ByteAddressBuffer InMaterialHitProxyTable)
{
const uint RelativeMaterialIndex = GetRelativeMaterialIndex(InCluster, InTriIndex);
const uint MaterialHitProxyId = LoadMaterialHitProxyId(InPrimitiveIndex, RelativeMaterialIndex, InMaterialHitProxyTable);
return MaterialHitProxyId;
}
float4 LoadNaniteMaterialOverlayColor(uint InPrimitiveIndex)
{
FNaniteMaterialPrimitiveData PrimitiveData = LoadNaniteMaterialPrimitiveData(InPrimitiveIndex);
return PrimitiveData.OverlayColor;
}
float3 UnpackTangentX(float3 TangentZ, uint TangentAngleBits, uint NumTangentBits)
{
const bool bSwapXZ = (abs(TangentZ.z) > abs(TangentZ.x));
if (bSwapXZ) TangentZ.xz = TangentZ.zx;
const float3 TangentRefX = float3(-TangentZ.y, TangentZ.x, 0.0f);
const float3 TangentRefY = cross(TangentZ, TangentRefX);
const float Scale = rsqrt(dot(TangentRefX.xy, TangentRefX.xy));
const float TangentAngle = float(TangentAngleBits) * ((2.0f * PI) / (1u << NumTangentBits));
float3 TangentX = TangentRefX * (cos(TangentAngle) * Scale) + TangentRefY * (sin(TangentAngle) * Scale);
if (bSwapXZ) TangentX.xz = TangentX.zx;
return TangentX;
}
void DecodeVertexBoneInfluence(FBoneInfluenceHeader BoneInfluenceHeader, uint VertIndex, uint InfluenceIndex, inout uint OutBoneIndex, inout float OutBoneWeight)
{
if (InfluenceIndex >= BoneInfluenceHeader.NumVertexBoneInfluences)
{
OutBoneIndex = 0;
OutBoneWeight = 0.0f;
return;
}
const uint BitsPerInfluence = (BoneInfluenceHeader.NumVertexBoneIndexBits + BoneInfluenceHeader.NumVertexBoneWeightBits);
const uint BitOffset = (VertIndex * BoneInfluenceHeader.NumVertexBoneInfluences + InfluenceIndex) * BitsPerInfluence;
FBitStreamReaderState BoneDataStream = BitStreamReader_Create_Aligned(BoneInfluenceHeader.DataAddress, BitOffset, 32);
const float WeightScale = 1.0f / ((1u << BoneInfluenceHeader.NumVertexBoneWeightBits) - 1u);
OutBoneIndex = BitStreamReader_Read_RO(ClusterPageData, BoneDataStream, BoneInfluenceHeader.NumVertexBoneIndexBits, NANITE_MAX_BONE_INDEX_BITS);
OutBoneWeight = (float)BitStreamReader_Read_RO(ClusterPageData, BoneDataStream, BoneInfluenceHeader.NumVertexBoneWeightBits, NANITE_MAX_BLEND_WEIGHT_BITS) * WeightScale;
OutBoneWeight = BoneInfluenceHeader.NumVertexBoneWeightBits ? OutBoneWeight : 1.0f;
}
#if COMPILER_SUPPORTS_HLSL2021
FClusterBoneInfluence DecodeClusterBoneInfluence(FCluster Cluster, uint InfluenceIndex)
{
return ClusterPageData.Load<FClusterBoneInfluence>(Cluster.ClusterBoneInfluenceAddress + InfluenceIndex * Cluster.ClusterBoneInfluenceStride);
}
#endif
FBoneInfluence DecodeBoneInfluence(uint PackedBoneInfluence)
{
FBoneInfluence Influence;
Influence.BoneIndex = PackedBoneInfluence >> 8;
Influence.Weight = (PackedBoneInfluence & 0xFFu) * (1.0f / 255.0f);
return Influence;
}
FBoneInfluence DecodeVoxelBoneInfluence(FCluster Cluster, uint InfluenceIndex)
{
const uint PackedBoneInfluence = ClusterPageData.Load(Cluster.ClusterBoneInfluenceAddress + InfluenceIndex * Cluster.ClusterBoneInfluenceStride);
return DecodeBoneInfluence(PackedBoneInfluence);
}
FBoneInfluence DecodeHierarchyAssemblyTransformBoneInfluence(uint BufferOffsetDwords, uint InfluenceIndex)
{
const uint PackedBoneInfluence = HierarchyBuffer.Load((BufferOffsetDwords + InfluenceIndex) * 4);
return DecodeBoneInfluence(PackedBoneInfluence);
}
struct FNaniteAssemblyBoneInfluenceHeader
{
uint NumInfluences;
uint SingleBoneIndex;
uint InfluenceBufferOffset;
};
FNaniteAssemblyBoneInfluenceHeader DecodeHierarchyAssemblyTransformBoneInfluenceHeader(uint TransformBufferOffset, uint NumTransforms, uint TransformIndex)
{
// NOTE: When bone attachment is enabled for a given assembly asset, its contents in the hierarchy buffer look like this:
// | Hierarchy Nodes | Assembly Transforms | Influence Headers | Influence List |
const uint HeaderBufferOffset = TransformBufferOffset + NumTransforms * NANITE_ASSEMBLY_TRANSFORM_SIZE_DWORDS;
const uint PackedHeader = HierarchyBuffer.Load((HeaderBufferOffset + TransformIndex) * 4);
const uint NumInfluences = PackedHeader >> 24u;
const uint OffsetOrIndex = PackedHeader & 0x00FFFFFFu;
FNaniteAssemblyBoneInfluenceHeader Header;
Header.NumInfluences = NumInfluences;
Header.SingleBoneIndex = select(NumInfluences == 1, OffsetOrIndex, 0xFFFFFFFFu);
Header.InfluenceBufferOffset = select(NumInfluences <= 1, 0xFFFFFFFFu, HeaderBufferOffset + NumTransforms + OffsetOrIndex);
return Header;
}
float3 GetNaniteFallbackTangent(float3 TangentZ)
{
const float Sign = TangentZ.z >= 0.0f ? 1.0f : -1.0f;
const float A = -rcp(Sign + TangentZ.z);
const float B = TangentZ.x * TangentZ.y * A;
return float3(1.0f + Sign * A * TangentZ.x * TangentZ.x, Sign * B, -Sign * TangentZ.x);
}
void DecodeImplicitTangents(inout FNaniteRawAttributeData RawAttributeData[1], float3 Positions[1], FCluster Cluster)
{
RawAttributeData[0].TangentXAndSign = float4(GetNaniteFallbackTangent(RawAttributeData[0].TangentZ), 1.0f);
}
void DecodeImplicitTangents(inout FNaniteRawAttributeData RawAttributeData[3], float3 Positions[3], FCluster Cluster)
{
BRANCH
if (!Cluster.bHasTangents)
{
float2 TexCoord10 = RawAttributeData[1].TexCoords[0] - RawAttributeData[0].TexCoords[0];
float2 TexCoord20 = RawAttributeData[2].TexCoords[0] - RawAttributeData[0].TexCoords[0];
bool TangentXValid = abs(TexCoord10.x) + abs(TexCoord20.x) > 1e-6;
BRANCH
if (TangentXValid)
{
const float3 PointLocal10 = float3(Positions[1] - Positions[0]);
const float3 PointLocal20 = float3(Positions[2] - Positions[0]);
const float3 GeoNormal = cross(PointLocal10, PointLocal20);
const float TexCoordSign = TexCoord10.x * TexCoord20.y - TexCoord20.x * TexCoord10.y;
[unroll]
for (uint i = 0; i < 3; i++)
{
// Based on Christian Schüler's derivation: http://www.thetenthplanet.de/archives/1180
const float3 TangentZ = RawAttributeData[i].TangentZ;
const float3 Perp2 = cross(TangentZ, PointLocal20);
const float3 Perp1 = cross(PointLocal10, TangentZ);
#if 1
const float3 TangentX = normalize(Perp2 * TexCoord10.x + Perp1 * TexCoord20.x);
const float Sign = TexCoordSign * dot(TangentZ, GeoNormal) < 0.0f ? -1.0f : 1.0f;
RawAttributeData[i].TangentXAndSign = float4(TangentX, Sign);
#else
const float3 TangentU = Perp2 * TexCoord10.x + Perp1 * TexCoord20.x;
const float3 TangentV = Perp2 * TexCoord10.y + Perp1 * TexCoord20.y;
const float3 TangentX = normalize(TangentU);
const float3 TangentY = cross(TangentZ, TangentX);
const float Sign = dot(TangentV, TangentY) < 0.0f ? -1.0f : 1.0f;
RawAttributeData[i].TangentXAndSign = float4(TangentX, Sign);
#endif
}
}
else
{
UNROLL
for (uint i = 0; i < 3; i++)
{
RawAttributeData[i].TangentXAndSign = float4(GetNaniteFallbackTangent(RawAttributeData[i].TangentZ), 1.0f);
}
}
}
}
// Decodes vertex attributes for N vertices. N must be compile-time constant and <= 3.
// Decoding multiple vertices from the same cluster simultaneously tends to generate better code than decoding them individually.
template<uint N>
void GetRawAttributeData(inout FNaniteRawAttributeData RawAttributeData[N],
FCluster Cluster,
float3 LocalPositions[N],
uint3 VertIndices,
uint CompileTimeMaxTexCoords
)
{
// Always process first UV set. Even if it isn't used, we might still need TangentToWorld.
CompileTimeMaxTexCoords = max(1, min(NANITE_MAX_UVS, CompileTimeMaxTexCoords));
const uint DecodeInfoOffset = Cluster.PageBaseAddress + Cluster.DecodeInfoOffset;
const uint AttributeDataOffset = Cluster.PageBaseAddress + Cluster.AttributeOffset;
float2 TexCoords[NANITE_MAX_UVS];
uint i;
UNROLL
for (i = 0; i < N; i++)
{
RawAttributeData[i] = (FNaniteRawAttributeData)0;
TexCoords[i] = 0.0f;
}
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
uint3 ReadOffset = AttributeDataOffset + VertIndices * Cluster.BitsPerAttribute / 8;
UNROLL
for(i = 0; i < N; i++)
{
RawAttributeData[i].TangentZ = asfloat(ClusterPageData.Load3(ReadOffset[i]));
ReadOffset[i] += 12;
if(Cluster.bHasTangents)
{
RawAttributeData[i].TangentXAndSign = asfloat(ClusterPageData.Load4(ReadOffset[i]));
ReadOffset[i] += 16;
}
RawAttributeData[i].Color = float4(UnpackToUint4(ClusterPageData.Load(ReadOffset[i]), 8)) * (1.0f / 255.0f);
ReadOffset[i] += 4;
}
UNROLL
for (uint TexCoordIndex = 0; TexCoordIndex < CompileTimeMaxTexCoords; TexCoordIndex++)
{
if(TexCoordIndex < Cluster.NumUVs)
{
UNROLL
for (uint i = 0; i < N; i++)
{
TexCoords[i] = asfloat(ClusterPageData.Load2(ReadOffset[i]));
}
ReadOffset += 8;
}
UNROLL
for (uint i = 0; i < N; i++)
{
RawAttributeData[i].TexCoords[TexCoordIndex] = TexCoords[i];
}
}
#else
const uint CompileTimeMaxAttributeBits = CalculateMaxAttributeBits(CompileTimeMaxTexCoords);
// Watch out! Make sure control flow around BitStreamReader is always compile-time constant or codegen degrades significantly
uint4 ColorMin = uint4(UnpackByte0(Cluster.ColorMin), UnpackByte1(Cluster.ColorMin), UnpackByte2(Cluster.ColorMin), UnpackByte3(Cluster.ColorMin));
const uint4 NumComponentBits = UnpackToUint4(Cluster.ColorBits, 4);
FBitStreamReaderState AttributeStream[N];
UNROLL
for (i = 0; i < N; i++)
{
AttributeStream[i] = BitStreamReader_Create_Aligned(AttributeDataOffset, VertIndices[i] * Cluster.BitsPerAttribute, CompileTimeMaxAttributeBits);
const uint NormalBits = BitStreamReader_Read_RO(ClusterPageData, AttributeStream[i], 2 * Cluster.NormalPrecision, 2 * NANITE_MAX_NORMAL_QUANTIZATION_BITS);
const float3 TangentZ = UnpackNormal(NormalBits, Cluster.NormalPrecision);
RawAttributeData[i].TangentZ = TangentZ;
const uint NumTangentBits = Cluster.bHasTangents ? (Cluster.TangentPrecision + 1) : 0u;
const uint TangentAngleAndSignBits = BitStreamReader_Read_RO(ClusterPageData, AttributeStream[i], NumTangentBits, NANITE_MAX_TANGENT_QUANTIZATION_BITS + 1);
BRANCH
if (Cluster.bHasTangents)
{
const bool bTangentYSign = (TangentAngleAndSignBits & (1u << Cluster.TangentPrecision)) != 0;
const uint TangentAngleBits = BitFieldExtractU32(TangentAngleAndSignBits, Cluster.TangentPrecision, 0);
RawAttributeData[i].TangentXAndSign = float4(UnpackTangentX(TangentZ, TangentAngleBits, Cluster.TangentPrecision), bTangentYSign ? -1.0f : 1.0f);
}
else
{
RawAttributeData[i].TangentXAndSign = 0.0f;
}
const uint4 ColorDelta = BitStreamReader_Read4_RO(ClusterPageData, AttributeStream[i], NumComponentBits, NANITE_MAX_COLOR_QUANTIZATION_BITS);
RawAttributeData[i].Color = float4(ColorMin + ColorDelta) * (1.0f / 255.0f);
}
UNROLL
for (uint TexCoordIndex = 0; TexCoordIndex < CompileTimeMaxTexCoords; ++TexCoordIndex)
{
uint2 UVHeaderData = 0u;
if (TexCoordIndex < Cluster.NumUVs)
{
UVHeaderData = ClusterPageData.Load2(DecodeInfoOffset + TexCoordIndex * SIZEOF_PACKED_UV_HEADER);
}
const FUVHeader UVHeader = UnpackUVHeader(UVHeaderData);
uint2 UVBits[N];
UNROLL
for (uint i = 0; i < N; i++)
{
UVBits[i] = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream[i], UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS);
}
BRANCH
if (TexCoordIndex < Cluster.NumUVs)
{
UNROLL
for (uint i = 0; i < N; i++)
{
TexCoords[i] = UnpackTexCoord(UVBits[i], UVHeader);
}
}
UNROLL
for (uint j = 0; j < N; j++)
{
RawAttributeData[j].TexCoords[TexCoordIndex] = TexCoords[j];
}
}
#endif
DecodeImplicitTangents(RawAttributeData, LocalPositions, Cluster);
}
half3x3 NaniteTangentToLocal(float4 TangentXAndSign, float3 UnnormalizedTangentZ)
{
const float3 TangentY = cross(UnnormalizedTangentZ.xyz, TangentXAndSign.xyz) * TangentXAndSign.w;
return float3x3(TangentXAndSign.xyz, TangentY, UnnormalizedTangentZ);
}
TDual< float2 > GetTexCoord(
FCluster Cluster,
uint3 TriIndices,
FBarycentrics Barycentrics,
uint TexCoordIndex
)
{
if (Cluster.NumUVs == 0)
return (TDual< float2 >)0;
TexCoordIndex = min(TexCoordIndex, Cluster.NumUVs - 1);
// Unpack and interpolate attributes
const uint DecodeInfoOffset = Cluster.PageBaseAddress + Cluster.DecodeInfoOffset;
const uint AttributeDataOffset = Cluster.PageBaseAddress + Cluster.AttributeOffset;
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
uint3 ReadOffset = AttributeDataOffset + TriIndices * Cluster.BitsPerAttribute / 8;
ReadOffset += 12 + 4 + TexCoordIndex * 8; // Normal + Color + TexCoord
#else
const uint4 NumColorComponentBits = UnpackToUint4(Cluster.ColorBits, 4);
const uint UVBitOffset = ((Cluster.UVBitOffsets >> (TexCoordIndex * 8u)) & 0xFFu);
const uint BitOffset = 2 * Cluster.NormalPrecision + dot(NumColorComponentBits, 1u) + UVBitOffset;
FBitStreamReaderState AttributeStream0 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.x * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS);
FBitStreamReaderState AttributeStream1 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.y * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS);
FBitStreamReaderState AttributeStream2 = BitStreamReader_Create_Aligned(AttributeDataOffset, BitOffset + TriIndices.z * Cluster.BitsPerAttribute, 2 * NANITE_MAX_TEXCOORD_COMPONENT_BITS);
#endif
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
float2 TexCoord0 = asfloat(ClusterPageData.Load2(ReadOffset.x));
float2 TexCoord1 = asfloat(ClusterPageData.Load2(ReadOffset.y));
float2 TexCoord2 = asfloat(ClusterPageData.Load2(ReadOffset.z));
#else
const FUVHeader UVHeader = GetUVHeader(ClusterPageData, DecodeInfoOffset, TexCoordIndex);
uint2 UVBits0 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream0, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS);
uint2 UVBits1 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream1, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS);
uint2 UVBits2 = BitStreamReader_Read2_RO(ClusterPageData, AttributeStream2, UVHeader.NumBits, NANITE_MAX_TEXCOORD_COMPONENT_BITS);
float2 TexCoord0 = UnpackTexCoord(UVBits0, UVHeader);
float2 TexCoord1 = UnpackTexCoord(UVBits1, UVHeader);
float2 TexCoord2 = UnpackTexCoord(UVBits2, UVHeader);
#endif
return Lerp( TexCoord0, TexCoord1, TexCoord2, Barycentrics );
}
#ifndef DEFINE_ITERATE_CLUSTER_SEGMENTS
# define DEFINE_ITERATE_CLUSTER_SEGMENTS (0)
#endif
// Need manually strip unused template functions here due to a compiler issue: https://github.com/microsoft/DirectXShaderCompiler/issues/4649
#if DEFINE_ITERATE_CLUSTER_SEGMENTS
template<class ClusterSegmentProcessor>
void IterateClusterSegments(FCluster Cluster, ByteAddressBuffer InClusterPageData, inout ClusterSegmentProcessor Processor)
{
BRANCH
if (IsMaterialFastPath(Cluster))
{
{
Processor.Process(0, Cluster.Material0Length, Cluster.Material0Index);
}
if (Cluster.Material1Length > 0)
{
Processor.Process(Cluster.Material0Length, Cluster.Material1Length, Cluster.Material1Index);
}
const uint Material2Length = Cluster.MaterialTotalLength - Cluster.Material0Length - Cluster.Material1Length;
if (Material2Length > 0)
{
Processor.Process(Cluster.Material0Length + Cluster.Material1Length, Material2Length, Cluster.Material2Index);
}
}
else
{
uint TableOffset = Cluster.PageBaseAddress + Cluster.MaterialTableOffset * 4;
LOOP for (uint TableEntry = 0; TableEntry < Cluster.MaterialTableLength; ++TableEntry)
{
uint EncodedRange = InClusterPageData.Load(TableOffset);
TableOffset += 4;
uint TriStart;
uint TriLength;
uint MaterialIndex;
DecodeMaterialRange(EncodedRange, TriStart, TriLength, MaterialIndex);
Processor.Process(TriStart, TriLength, MaterialIndex);
}
}
}
#endif