937 lines
34 KiB
C++
937 lines
34 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "NaniteEncodeGeometryData.h"
|
|
|
|
#include "Math/UnrealMath.h"
|
|
#include "Async/ParallelFor.h"
|
|
|
|
#include "Cluster.h"
|
|
#include "ClusterDAG.h"
|
|
#include "NaniteDefinitions.h"
|
|
#include "NaniteEncodeShared.h"
|
|
#include "NaniteEncodeMaterial.h"
|
|
#include "NaniteEncodeSkinning.h"
|
|
#include "NaniteEncodeVertReuseBatch.h"
|
|
|
|
namespace Nanite
|
|
{
|
|
|
|
static float DecodeUVFloat(uint32 EncodedValue, uint32 NumMantissaBits)
|
|
{
|
|
const uint32 ExponentAndMantissaMask = (1u << (NANITE_UV_FLOAT_NUM_EXPONENT_BITS + NumMantissaBits)) - 1u;
|
|
const bool bNeg = (EncodedValue <= ExponentAndMantissaMask);
|
|
const uint32 ExponentAndMantissa = (bNeg ? ~EncodedValue : EncodedValue) & ExponentAndMantissaMask;
|
|
|
|
const uint32 FloatBits = 0x3F000000u + (ExponentAndMantissa << (23 - NumMantissaBits));
|
|
float Result = (float&)FloatBits;
|
|
Result = FMath::Min(Result * 2.0f - 1.0f, Result); // Stretch denormals from [0.5,1.0] to [0.0,1.0]
|
|
|
|
return bNeg ? -Result : Result;
|
|
}
|
|
|
|
static void VerifyUVFloatEncoding(float Value, uint32 EncodedValue, uint32 NumMantissaBits)
|
|
{
|
|
check(FMath::IsFinite(Value)); // NaN and Inf should have been handled already
|
|
|
|
const uint32 NumValues = 1u << (1 + NumMantissaBits + NANITE_UV_FLOAT_NUM_EXPONENT_BITS);
|
|
|
|
const float DecodedValue = DecodeUVFloat(EncodedValue, NumMantissaBits);
|
|
const float Error = FMath::Abs(DecodedValue - Value);
|
|
|
|
// Verify that none of the neighbor code points are closer to the original float value.
|
|
if (EncodedValue > 0u)
|
|
{
|
|
const float PrevValue = DecodeUVFloat(EncodedValue - 1u, NumMantissaBits);
|
|
check(FMath::Abs(PrevValue - Value) >= Error);
|
|
}
|
|
|
|
if (EncodedValue + 1u < NumValues)
|
|
{
|
|
const float NextValue = DecodeUVFloat(EncodedValue + 1u, NumMantissaBits);
|
|
check(FMath::Abs(NextValue - Value) >= Error);
|
|
}
|
|
}
|
|
|
|
static uint32 EncodeUVFloat(float Value, uint32 NumMantissaBits)
|
|
{
|
|
// Encode UV floats as a custom float type where [0,1] is denormal, so it gets uniform precision.
|
|
// As UVs are encoded in clusters as ranges of encoded values, a few modifications to the usual
|
|
// float encoding are made to preserve the original float order when the encoded values are interpreted as uints:
|
|
// 1. Positive values use 1 as sign bit.
|
|
// 2. Negative values use 0 as sign bit and have their exponent and mantissa bits inverted.
|
|
|
|
checkSlow(FMath::IsFinite(Value));
|
|
|
|
const uint32 SignBitPosition = NANITE_UV_FLOAT_NUM_EXPONENT_BITS + NumMantissaBits;
|
|
const uint32 FloatUInt = (uint32&)Value;
|
|
const uint32 Exponent = (FloatUInt >> 23) & 0xFFu;
|
|
const uint32 Mantissa = FloatUInt & 0x7FFFFFu;
|
|
const uint32 AbsFloatUInt = FloatUInt & 0x7FFFFFFFu;
|
|
|
|
uint32 Result;
|
|
if (AbsFloatUInt < 0x3F800000u)
|
|
{
|
|
// Denormal encoding
|
|
// Note: Mantissa can overflow into first non-denormal value (1.0f),
|
|
// but that is desirable to get correct round-to-nearest behavior.
|
|
const float AbsFloat = (float&)AbsFloatUInt;
|
|
Result = uint32(double(AbsFloat * float(1u << NumMantissaBits)) + 0.5); // Cast to double to make sure +0.5 is lossless
|
|
}
|
|
else
|
|
{
|
|
// Normal encoding
|
|
// Extract exponent and mantissa bits from 32-bit float-
|
|
const uint32 Shift = (23 - NumMantissaBits);
|
|
const uint32 Tmp = (AbsFloatUInt - 0x3F000000u) + (1u << (Shift - 1)); // Bias to round to nearest
|
|
Result = FMath::Min(Tmp >> Shift, (1u << SignBitPosition) - 1u); // Clamp to largest UV float value
|
|
}
|
|
|
|
// Produce a mask that for positive values only flips the sign bit
|
|
// and for negative values only flips the exponent and mantissa bits.
|
|
const uint32 SignMask = (1u << SignBitPosition) - (FloatUInt >> 31u);
|
|
Result ^= SignMask;
|
|
|
|
#if DO_GUARD_SLOW
|
|
VerifyUVFloatEncoding(Value, Result, NumMantissaBits);
|
|
#endif
|
|
return Result;
|
|
}
|
|
|
|
static int32 ShortestWrap(int32 Value, uint32 NumBits)
|
|
{
|
|
if (NumBits == 0)
|
|
{
|
|
check(Value == 0);
|
|
return 0;
|
|
}
|
|
const int32 Shift = 32 - NumBits;
|
|
const int32 NumValues = (1 << NumBits);
|
|
const int32 MinValue = -(NumValues >> 1);
|
|
const int32 MaxValue = (NumValues >> 1) - 1;
|
|
|
|
Value = (Value << Shift) >> Shift;
|
|
check(Value >= MinValue && Value <= MaxValue);
|
|
return Value;
|
|
}
|
|
|
|
static uint32 EncodeZigZag(int32 X)
|
|
{
|
|
return uint32((X << 1) ^ (X >> 31));
|
|
}
|
|
|
|
static int32 DecodeZigZag(uint32 X)
|
|
{
|
|
return int32(X >> 1) ^ -int32(X & 1);
|
|
}
|
|
|
|
|
|
FORCEINLINE static FVector2f OctahedronEncode(FVector3f N)
|
|
{
|
|
FVector3f AbsN = N.GetAbs();
|
|
N /= (AbsN.X + AbsN.Y + AbsN.Z);
|
|
|
|
if (N.Z < 0.0)
|
|
{
|
|
AbsN = N.GetAbs();
|
|
N.X = (N.X >= 0.0f) ? (1.0f - AbsN.Y) : (AbsN.Y - 1.0f);
|
|
N.Y = (N.Y >= 0.0f) ? (1.0f - AbsN.X) : (AbsN.X - 1.0f);
|
|
}
|
|
|
|
return FVector2f(N.X, N.Y);
|
|
}
|
|
|
|
FORCEINLINE static void OctahedronEncode(FVector3f N, int32& X, int32& Y, int32 QuantizationBits)
|
|
{
|
|
const int32 QuantizationMaxValue = (1 << QuantizationBits) - 1;
|
|
const float Scale = 0.5f * (float)QuantizationMaxValue;
|
|
const float Bias = 0.5f * (float)QuantizationMaxValue + 0.5f;
|
|
|
|
FVector2f Coord = OctahedronEncode(N);
|
|
|
|
X = FMath::Clamp(int32(Coord.X * Scale + Bias), 0, QuantizationMaxValue);
|
|
Y = FMath::Clamp(int32(Coord.Y * Scale + Bias), 0, QuantizationMaxValue);
|
|
}
|
|
|
|
FORCEINLINE static FVector3f OctahedronDecode(int32 X, int32 Y, int32 QuantizationBits)
|
|
{
|
|
const int32 QuantizationMaxValue = (1 << QuantizationBits) - 1;
|
|
float fx = (float)X * (2.0f / (float)QuantizationMaxValue) - 1.0f;
|
|
float fy = (float)Y * (2.0f / (float)QuantizationMaxValue) - 1.0f;
|
|
float fz = 1.0f - FMath::Abs(fx) - FMath::Abs(fy);
|
|
float t = FMath::Clamp(-fz, 0.0f, 1.0f);
|
|
fx += (fx >= 0.0f ? -t : t);
|
|
fy += (fy >= 0.0f ? -t : t);
|
|
|
|
return FVector3f(fx, fy, fz).GetUnsafeNormal();
|
|
}
|
|
|
|
FORCEINLINE static void OctahedronEncodePreciseSIMD( FVector3f N, int32& X, int32& Y, int32 QuantizationBits )
|
|
{
|
|
const int32 QuantizationMaxValue = ( 1 << QuantizationBits ) - 1;
|
|
FVector2f ScalarCoord = OctahedronEncode( N );
|
|
|
|
const VectorRegister4f Scale = VectorSetFloat1( 0.5f * (float)QuantizationMaxValue );
|
|
const VectorRegister4f RcpScale = VectorSetFloat1( 2.0f / (float)QuantizationMaxValue );
|
|
VectorRegister4Int IntCoord = VectorFloatToInt( VectorMultiplyAdd( MakeVectorRegister( ScalarCoord.X, ScalarCoord.Y, ScalarCoord.X, ScalarCoord.Y ), Scale, Scale ) ); // x0, y0, x1, y1
|
|
IntCoord = VectorIntAdd( IntCoord, MakeVectorRegisterInt( 0, 0, 1, 1 ) );
|
|
VectorRegister4f Coord = VectorMultiplyAdd( VectorIntToFloat( IntCoord ), RcpScale, GlobalVectorConstants::FloatMinusOne ); // Coord = Coord * 2.0f / QuantizationMaxValue - 1.0f
|
|
|
|
VectorRegister4f Nx = VectorSwizzle( Coord, 0, 2, 0, 2 );
|
|
VectorRegister4f Ny = VectorSwizzle( Coord, 1, 1, 3, 3 );
|
|
VectorRegister4f Nz = VectorSubtract( VectorSubtract( VectorOneFloat(), VectorAbs( Nx ) ), VectorAbs( Ny ) ); // Nz = 1.0f - abs(Nx) - abs(Ny)
|
|
|
|
VectorRegister4f T = VectorMin( Nz, VectorZeroFloat() ); // T = min(Nz, 0.0f)
|
|
|
|
VectorRegister4f NxSign = VectorBitwiseAnd( Nx, GlobalVectorConstants::SignBit() );
|
|
VectorRegister4f NySign = VectorBitwiseAnd( Ny, GlobalVectorConstants::SignBit() );
|
|
|
|
Nx = VectorAdd(Nx, VectorBitwiseXor( T, NxSign ) ); // Nx += T ^ NxSign
|
|
Ny = VectorAdd(Ny, VectorBitwiseXor( T, NySign ) ); // Ny += T ^ NySign
|
|
|
|
VectorRegister4f Dots = VectorMultiplyAdd(Nx, VectorSetFloat1(N.X), VectorMultiplyAdd(Ny, VectorSetFloat1(N.Y), VectorMultiply(Nz, VectorSetFloat1(N.Z))));
|
|
VectorRegister4f Lengths = VectorSqrt(VectorMultiplyAdd(Nx, Nx, VectorMultiplyAdd(Ny, Ny, VectorMultiply(Nz, Nz))));
|
|
Dots = VectorDivide(Dots, Lengths);
|
|
|
|
VectorRegister4f Mask = MakeVectorRegister( 0xFFFFFFFCu, 0xFFFFFFFCu, 0xFFFFFFFCu, 0xFFFFFFFCu );
|
|
VectorRegister4f LaneIndices = MakeVectorRegister( 0u, 1u, 2u, 3u );
|
|
Dots = VectorBitwiseOr( VectorBitwiseAnd( Dots, Mask ), LaneIndices );
|
|
|
|
// Calculate max component
|
|
VectorRegister4f MaxDot = VectorMax( Dots, VectorSwizzle( Dots, 2, 3, 0, 1 ) );
|
|
MaxDot = VectorMax( MaxDot, VectorSwizzle( MaxDot, 1, 2, 3, 0 ) );
|
|
|
|
float fIndex = VectorGetComponent( MaxDot, 0 );
|
|
uint32 Index = *(uint32*)&fIndex;
|
|
|
|
uint32 IntCoordValues[ 4 ];
|
|
VectorIntStore( IntCoord, IntCoordValues );
|
|
X = FMath::Clamp((int32)(IntCoordValues[0] + ( Index & 1 )), 0, QuantizationMaxValue);
|
|
Y = FMath::Clamp((int32)(IntCoordValues[1] + ( ( Index >> 1 ) & 1 )), 0, QuantizationMaxValue);
|
|
}
|
|
|
|
FORCEINLINE static void OctahedronEncodePrecise(FVector3f N, int32& X, int32& Y, int32 QuantizationBits)
|
|
{
|
|
const int32 QuantizationMaxValue = (1 << QuantizationBits) - 1;
|
|
FVector2f Coord = OctahedronEncode(N);
|
|
|
|
const float Scale = 0.5f * (float)QuantizationMaxValue;
|
|
const float Bias = 0.5f * (float)QuantizationMaxValue;
|
|
int32 NX = FMath::Clamp(int32(Coord.X * Scale + Bias), 0, QuantizationMaxValue);
|
|
int32 NY = FMath::Clamp(int32(Coord.Y * Scale + Bias), 0, QuantizationMaxValue);
|
|
|
|
float MinError = 1.0f;
|
|
int32 BestNX = 0;
|
|
int32 BestNY = 0;
|
|
for (int32 OffsetY = 0; OffsetY < 2; OffsetY++)
|
|
{
|
|
for (int32 OffsetX = 0; OffsetX < 2; OffsetX++)
|
|
{
|
|
int32 TX = NX + OffsetX;
|
|
int32 TY = NY + OffsetY;
|
|
if (TX <= QuantizationMaxValue && TY <= QuantizationMaxValue)
|
|
{
|
|
FVector3f RN = OctahedronDecode(TX, TY, QuantizationBits);
|
|
float Error = FMath::Abs(1.0f - (RN | N));
|
|
if (Error < MinError)
|
|
{
|
|
MinError = Error;
|
|
BestNX = TX;
|
|
BestNY = TY;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
X = BestNX;
|
|
Y = BestNY;
|
|
}
|
|
|
|
FORCEINLINE static uint32 PackNormal(FVector3f Normal, uint32 QuantizationBits)
|
|
{
|
|
int32 X, Y;
|
|
OctahedronEncodePreciseSIMD(Normal, X, Y, QuantizationBits);
|
|
|
|
#if 0
|
|
// Test against non-SIMD version
|
|
int32 X2, Y2;
|
|
OctahedronEncodePrecise(Normal, X2, Y2, QuantizationBits);
|
|
FVector3f N0 = OctahedronDecode( X, Y, QuantizationBits );
|
|
FVector3f N1 = OctahedronDecode( X2, Y2, QuantizationBits );
|
|
float dt0 = Normal | N0;
|
|
float dt1 = Normal | N1;
|
|
check( dt0 >= dt1*0.99999f );
|
|
#endif
|
|
|
|
return (Y << QuantizationBits) | X;
|
|
}
|
|
|
|
FORCEINLINE static FVector3f UnpackNormal(uint32 PackedNormal, uint32 QuantizationBits)
|
|
{
|
|
const uint32 QuantizationMaxValue = (1u << QuantizationBits) - 1u;
|
|
const uint32 UX = PackedNormal & QuantizationMaxValue;
|
|
const uint32 UY = PackedNormal >> QuantizationBits;
|
|
float X = float(UX) * (2.0f / float(QuantizationMaxValue)) - 1.0f;
|
|
float Y = float(UY) * (2.0f / float(QuantizationMaxValue)) - 1.0f;
|
|
const float Z = 1.0f - FMath::Abs(X) - FMath::Abs(Y);
|
|
const float T = FMath::Clamp(-Z, 0.0f, 1.0f);
|
|
X += (X >= 0.0f) ? -T : T;
|
|
Y += (Y >= 0.0f) ? -T : T;
|
|
|
|
return FVector3f(X, Y, Z).GetUnsafeNormal();
|
|
}
|
|
|
|
static bool PackTangent(uint32& QuantizedTangentAngle, FVector3f TangentX, FVector3f TangentZ, uint32 NumTangentBits)
|
|
{
|
|
FVector3f LocalTangentX = TangentX;
|
|
FVector3f LocalTangentZ = TangentZ;
|
|
|
|
// Conditionally swap X and Z, if abs(Z)>abs(X).
|
|
// After this, we know the largest component is in X or Y and at least one of them is going to be non-zero.
|
|
checkSlow(TangentZ.IsNormalized());
|
|
const bool bSwapXZ = (FMath::Abs(LocalTangentZ.Z) > FMath::Abs(LocalTangentZ.X));
|
|
if (bSwapXZ)
|
|
{
|
|
Swap(LocalTangentZ.X, LocalTangentZ.Z);
|
|
Swap(LocalTangentX.X, LocalTangentX.Z);
|
|
}
|
|
|
|
FVector3f LocalTangentRefX = FVector3f(-LocalTangentZ.Y, LocalTangentZ.X, 0.0f).GetSafeNormal();
|
|
FVector3f LocalTangentRefY = (LocalTangentZ ^ LocalTangentRefX);
|
|
|
|
const float X = LocalTangentX | LocalTangentRefX;
|
|
const float Y = LocalTangentX | LocalTangentRefY;
|
|
const float LenSq = X * X + Y * Y;
|
|
|
|
if (LenSq >= 0.0001f)
|
|
{
|
|
float Angle = FMath::Atan2(Y, X);
|
|
if (Angle < PI) Angle += 2.0f * PI;
|
|
|
|
const float UnitAngle = Angle / (2.0f * PI);
|
|
|
|
int IntAngle = FMath::FloorToInt(UnitAngle * float(1 << NumTangentBits) + 0.5f);
|
|
QuantizedTangentAngle = uint32(IntAngle & ((1 << NumTangentBits) - 1));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static FVector3f UnpackTangent(uint32& QuantizedTangentAngle, FVector3f TangentZ, uint32 NumTangentBits)
|
|
{
|
|
FVector3f LocalTangentZ = TangentZ;
|
|
|
|
const bool bSwapXZ = (FMath::Abs(TangentZ.Z) > FMath::Abs(TangentZ.X));
|
|
if (bSwapXZ)
|
|
{
|
|
Swap(LocalTangentZ.X, LocalTangentZ.Z);
|
|
}
|
|
|
|
const FVector3f LocalTangentRefX = FVector3f(-LocalTangentZ.Y, LocalTangentZ.X, 0.0f).GetSafeNormal();
|
|
const FVector3f LocalTangentRefY = (LocalTangentZ ^ LocalTangentRefX);
|
|
|
|
const float UnpackedAngle = float(QuantizedTangentAngle) / float(1 << NumTangentBits) * 2.0f * PI;
|
|
FVector3f UnpackedTangentX = (LocalTangentRefX * FMath::Cos(UnpackedAngle) + LocalTangentRefY * FMath::Sin(UnpackedAngle)).GetUnsafeNormal();
|
|
|
|
if (bSwapXZ)
|
|
{
|
|
Swap(UnpackedTangentX.X, UnpackedTangentX.Z);
|
|
}
|
|
|
|
return UnpackedTangentX;
|
|
}
|
|
|
|
static void CalculateEncodingInfo(FEncodingInfo& Info, const FCluster& Cluster, int32 NormalPrecision, int32 TangentPrecision, int32 BoneWeightPrecision)
|
|
{
|
|
const uint32 NumClusterVerts = Cluster.Verts.Num();
|
|
const uint32 NumClusterTris = Cluster.NumTris;
|
|
const uint32 MaxBones = Cluster.Verts.Format.NumBoneInfluences;
|
|
|
|
FMemory::Memzero(Info);
|
|
|
|
// Write triangles indices. Indices are stored in a dense packed bitstream using ceil(log2(NumClusterVerices)) bits per index. The shaders implement unaligned bitstream reads to support this.
|
|
const uint32 BitsPerIndex = NumClusterVerts > 1 && NumClusterTris > 1 ? (FGenericPlatformMath::FloorLog2(NumClusterVerts - 1) + 1) : 1;
|
|
const uint32 BitsPerTriangle = BitsPerIndex + 2 * 5; // Base index + two 5-bit offsets
|
|
Info.BitsPerIndex = BitsPerIndex;
|
|
|
|
FPageSections& GpuSizes = Info.GpuSizes;
|
|
GpuSizes.Cluster = sizeof(FPackedCluster);
|
|
GpuSizes.MaterialTable = CalcMaterialTableSize(Cluster) * sizeof(uint32);
|
|
GpuSizes.VertReuseBatchInfo = Cluster.NumTris && Cluster.MaterialRanges.Num() > 3 ? CalcVertReuseBatchInfoSize(Cluster.MaterialRanges) * sizeof(uint32) : 0;
|
|
GpuSizes.DecodeInfo = Cluster.Verts.Format.NumTexCoords * sizeof(FPackedUVHeader) + (MaxBones > 0 ? sizeof(FPackedBoneInfluenceHeader) : 0);
|
|
GpuSizes.Index = (NumClusterTris * BitsPerTriangle + 31) / 32 * 4;
|
|
|
|
GpuSizes.BrickData = Cluster.Bricks.Num() * sizeof(FPackedBrick);
|
|
|
|
const uint32 NumPositions = (Cluster.NumTris != 0) ? NumClusterVerts : 0;
|
|
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
|
|
const uint32 AttribBytesPerVertex = (3 * sizeof(float) + (Cluster.Verts.Format.bHasTangents ? (4 * sizeof(float)) : 0) + sizeof(uint32) + Cluster.Verts.Format.NumTexCoords * 2 * sizeof(float));
|
|
|
|
Info.BitsPerAttribute = AttribBytesPerVertex * 8;
|
|
Info.ColorMin = FIntVector4(0, 0, 0, 0);
|
|
Info.ColorBits = FIntVector4(8, 8, 8, 8);
|
|
Info.ColorMode = NANITE_VERTEX_COLOR_MODE_VARIABLE;
|
|
Info.NormalPrecision = 0;
|
|
Info.TangentPrecision = 0;
|
|
|
|
// TODO: Nanite-Skinning: Implement uncompressed path
|
|
|
|
GpuSizes.Position = NumPositions * 3 * sizeof(float);
|
|
GpuSizes.Attribute = NumClusterVerts * AttribBytesPerVertex;
|
|
#else
|
|
Info.BitsPerAttribute = 2 * NormalPrecision;
|
|
|
|
if (Cluster.Verts.Format.bHasTangents)
|
|
{
|
|
Info.BitsPerAttribute += 1 + TangentPrecision;
|
|
}
|
|
|
|
check(NumClusterVerts > 0);
|
|
const bool bIsLeaf = (Cluster.GeneratingGroupIndex == MAX_uint32);
|
|
|
|
// Normals
|
|
Info.NormalPrecision = NormalPrecision;
|
|
Info.TangentPrecision = TangentPrecision;
|
|
|
|
// Vertex colors
|
|
Info.ColorMode = NANITE_VERTEX_COLOR_MODE_CONSTANT;
|
|
Info.ColorMin = FIntVector4(255, 255, 255, 255);
|
|
if (Cluster.Verts.Format.bHasColors)
|
|
{
|
|
FIntVector4 ColorMin = FIntVector4( 255, 255, 255, 255);
|
|
FIntVector4 ColorMax = FIntVector4( 0, 0, 0, 0);
|
|
for (uint32 i = 0; i < NumClusterVerts; i++)
|
|
{
|
|
FColor Color = Cluster.Verts.GetColor(i).ToFColor(false);
|
|
ColorMin.X = FMath::Min(ColorMin.X, (int32)Color.R);
|
|
ColorMin.Y = FMath::Min(ColorMin.Y, (int32)Color.G);
|
|
ColorMin.Z = FMath::Min(ColorMin.Z, (int32)Color.B);
|
|
ColorMin.W = FMath::Min(ColorMin.W, (int32)Color.A);
|
|
ColorMax.X = FMath::Max(ColorMax.X, (int32)Color.R);
|
|
ColorMax.Y = FMath::Max(ColorMax.Y, (int32)Color.G);
|
|
ColorMax.Z = FMath::Max(ColorMax.Z, (int32)Color.B);
|
|
ColorMax.W = FMath::Max(ColorMax.W, (int32)Color.A);
|
|
}
|
|
|
|
const FIntVector4 ColorDelta = ColorMax - ColorMin;
|
|
const int32 R_Bits = FMath::CeilLogTwo(ColorDelta.X + 1);
|
|
const int32 G_Bits = FMath::CeilLogTwo(ColorDelta.Y + 1);
|
|
const int32 B_Bits = FMath::CeilLogTwo(ColorDelta.Z + 1);
|
|
const int32 A_Bits = FMath::CeilLogTwo(ColorDelta.W + 1);
|
|
|
|
uint32 NumColorBits = R_Bits + G_Bits + B_Bits + A_Bits;
|
|
Info.BitsPerAttribute += NumColorBits;
|
|
Info.ColorMin = ColorMin;
|
|
Info.ColorBits = FIntVector4(R_Bits, G_Bits, B_Bits, A_Bits);
|
|
if (NumColorBits > 0)
|
|
{
|
|
Info.ColorMode = NANITE_VERTEX_COLOR_MODE_VARIABLE;
|
|
}
|
|
}
|
|
|
|
const int NumMantissaBits = NANITE_UV_FLOAT_NUM_MANTISSA_BITS; //TODO: make this a build setting
|
|
for( uint32 UVIndex = 0; UVIndex < Cluster.Verts.Format.NumTexCoords; UVIndex++ )
|
|
{
|
|
FUintVector2 UVMin = FUintVector2(0xFFFFFFFFu, 0xFFFFFFFFu);
|
|
FUintVector2 UVMax = FUintVector2(0u, 0u);
|
|
|
|
for (uint32 i = 0; i < NumClusterVerts; i++)
|
|
{
|
|
const FVector2f& UV = Cluster.Verts.GetUVs(i)[UVIndex];
|
|
|
|
const uint32 EncodedU = EncodeUVFloat(UV.X, NumMantissaBits);
|
|
const uint32 EncodedV = EncodeUVFloat(UV.Y, NumMantissaBits);
|
|
|
|
UVMin.X = FMath::Min(UVMin.X, EncodedU);
|
|
UVMin.Y = FMath::Min(UVMin.Y, EncodedV);
|
|
UVMax.X = FMath::Max(UVMax.X, EncodedU);
|
|
UVMax.Y = FMath::Max(UVMax.Y, EncodedV);
|
|
}
|
|
|
|
const FUintVector2 UVDelta = UVMax - UVMin;
|
|
|
|
FUVInfo& UVInfo = Info.UVs[UVIndex];
|
|
UVInfo.Min = UVMin;
|
|
UVInfo.NumBits.X = FMath::CeilLogTwo(UVDelta.X + 1u);
|
|
UVInfo.NumBits.Y = FMath::CeilLogTwo(UVDelta.Y + 1u);
|
|
|
|
Info.BitsPerAttribute += UVInfo.NumBits.X + UVInfo.NumBits.Y;
|
|
}
|
|
|
|
if (MaxBones > 0)
|
|
{
|
|
CalculateInfluences(Info.BoneInfluence, Cluster, BoneWeightPrecision);
|
|
|
|
// TODO: Nanite-Skinning: Make this more compact. Range of indices? Palette of indices? Omit the last weight?
|
|
const uint32 VertexInfluenceSize = ( NumClusterVerts * Info.BoneInfluence.NumVertexBoneInfluences * ( Info.BoneInfluence.NumVertexBoneIndexBits + Info.BoneInfluence.NumVertexBoneWeightBits ) + 31) / 32 * 4;
|
|
GpuSizes.BoneInfluence = VertexInfluenceSize;
|
|
|
|
check(IsAligned(GpuSizes.BoneInfluence, 4));
|
|
}
|
|
|
|
const uint32 PositionBitsPerVertex = Cluster.QuantizedPosBits.X + Cluster.QuantizedPosBits.Y + Cluster.QuantizedPosBits.Z;
|
|
GpuSizes.Position = (NumPositions * PositionBitsPerVertex + 31) / 32 * 4;
|
|
GpuSizes.Attribute = (NumClusterVerts * Info.BitsPerAttribute + 31) / 32 * 4;
|
|
#endif
|
|
}
|
|
|
|
void EncodeGeometryData( const uint32 LocalClusterIndex, const FCluster& Cluster, const FEncodingInfo& EncodingInfo,
|
|
const TArrayView<uint16> PageDependencies, const TArray<TMap<FVariableVertex, FVertexMapEntry>>& PageVertexMaps,
|
|
TMap<FVariableVertex, uint32>& UniqueVertices, uint32& NumCodedVertices, FPageStreams& Streams)
|
|
{
|
|
const uint32 NumClusterVerts = Cluster.Verts.Num();
|
|
const uint32 NumClusterTris = Cluster.NumTris;
|
|
|
|
Streams.VertexRefBitmask.AddZeroed(NANITE_MAX_CLUSTER_VERTICES / 32);
|
|
|
|
TArray<uint32> UniqueToVertexIndex;
|
|
|
|
bool bUseVertexRefs = NumClusterTris > 0 && !NANITE_USE_UNCOMPRESSED_VERTEX_DATA; // TODO: Skip voxels for now. Currently, voxel almost never match parents exactly.
|
|
if( !bUseVertexRefs )
|
|
{
|
|
NumCodedVertices = NumClusterVerts;
|
|
}
|
|
else
|
|
{
|
|
// Find vertices from same page we can reference instead of storing duplicates
|
|
struct FVertexRef
|
|
{
|
|
uint32 PageIndex;
|
|
uint32 LocalClusterIndex;
|
|
uint32 VertexIndex;
|
|
};
|
|
TArray<FVertexRef> VertexRefs;
|
|
|
|
for (uint32 VertexIndex = 0; VertexIndex < NumClusterVerts; VertexIndex++)
|
|
{
|
|
FVariableVertex Vertex;
|
|
Vertex.Data = &Cluster.Verts.Array[ VertexIndex * Cluster.Verts.GetVertSize() ];
|
|
Vertex.SizeInBytes = Cluster.Verts.GetVertSize() * sizeof(float);
|
|
|
|
FVertexRef VertexRef = {};
|
|
bool bFound = false;
|
|
|
|
// Look for vertex in parents
|
|
for (int32 SrcPageIndexIndex = 0; SrcPageIndexIndex < PageDependencies.Num(); SrcPageIndexIndex++)
|
|
{
|
|
uint32 SrcPageIndex = PageDependencies[SrcPageIndexIndex];
|
|
const FVertexMapEntry* EntryPtr = PageVertexMaps[SrcPageIndex].Find(Vertex);
|
|
if (EntryPtr)
|
|
{
|
|
VertexRef = FVertexRef{ (uint32)SrcPageIndexIndex + 1, EntryPtr->LocalClusterIndex, EntryPtr->VertexIndex };
|
|
bFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!bFound)
|
|
{
|
|
// Look for vertex in current page
|
|
uint32* VertexPtr = UniqueVertices.Find(Vertex);
|
|
if (VertexPtr)
|
|
{
|
|
VertexRef = FVertexRef{ 0, (*VertexPtr >> NANITE_MAX_CLUSTER_VERTICES_BITS), *VertexPtr & NANITE_MAX_CLUSTER_VERTICES_MASK };
|
|
bFound = true;
|
|
}
|
|
}
|
|
|
|
if(bFound)
|
|
{
|
|
VertexRefs.Add(VertexRef);
|
|
const uint32 BitIndex = (LocalClusterIndex << NANITE_MAX_CLUSTER_VERTICES_BITS) + VertexIndex;
|
|
Streams.VertexRefBitmask[BitIndex >> 5] |= 1u << (BitIndex & 31);
|
|
}
|
|
else
|
|
{
|
|
uint32 Val = (LocalClusterIndex << NANITE_MAX_CLUSTER_VERTICES_BITS) | (uint32)VertexIndex;
|
|
UniqueVertices.Add(Vertex, Val);
|
|
UniqueToVertexIndex.Add(VertexIndex);
|
|
}
|
|
}
|
|
NumCodedVertices = UniqueToVertexIndex.Num();
|
|
|
|
struct FClusterRef
|
|
{
|
|
uint32 PageIndex;
|
|
uint32 ClusterIndex;
|
|
|
|
bool operator==(const FClusterRef& Other) const { return PageIndex == Other.PageIndex && ClusterIndex == Other.ClusterIndex; }
|
|
bool operator<(const FClusterRef& Other) const { return (PageIndex != Other.PageIndex) ? (PageIndex < Other.PageIndex) : (ClusterIndex == Other.ClusterIndex); }
|
|
};
|
|
|
|
// Make list of unique Page-Cluster pairs
|
|
TArray<FClusterRef> ClusterRefs;
|
|
for (const FVertexRef& Ref : VertexRefs)
|
|
ClusterRefs.AddUnique(FClusterRef{ Ref.PageIndex, Ref.LocalClusterIndex });
|
|
|
|
ClusterRefs.Sort();
|
|
|
|
for (const FClusterRef& Ref : ClusterRefs)
|
|
{
|
|
Streams.PageClusterPair.Add((Ref.PageIndex << NANITE_MAX_CLUSTERS_PER_PAGE_BITS) | Ref.ClusterIndex);
|
|
}
|
|
|
|
// Write vertex refs using Page-Cluster index + vertex index
|
|
uint32 PrevVertexIndex = 0;
|
|
for (const FVertexRef& Ref : VertexRefs)
|
|
{
|
|
uint32 PageClusterIndex = ClusterRefs.Find(FClusterRef{ Ref.PageIndex, Ref.LocalClusterIndex });
|
|
check(PageClusterIndex < 256);
|
|
const uint32 VertexIndexDelta = (Ref.VertexIndex - PrevVertexIndex) & 0xFF;
|
|
Streams.VertexRef.Add(uint16((PageClusterIndex << NANITE_MAX_CLUSTER_VERTICES_BITS) | EncodeZigZag(ShortestWrap(VertexIndexDelta, 8))));
|
|
PrevVertexIndex = Ref.VertexIndex;
|
|
}
|
|
}
|
|
|
|
const uint32 BitsPerIndex = EncodingInfo.BitsPerIndex;
|
|
|
|
// Write triangle indices
|
|
#if NANITE_USE_STRIP_INDICES
|
|
for (uint32 i = 0; i < NANITE_MAX_CLUSTER_TRIANGLES / 32; i++)
|
|
{
|
|
Streams.StripBitmask.Add(Cluster.StripDesc.Bitmasks[i][0]);
|
|
Streams.StripBitmask.Add(Cluster.StripDesc.Bitmasks[i][1]);
|
|
Streams.StripBitmask.Add(Cluster.StripDesc.Bitmasks[i][2]);
|
|
}
|
|
Streams.Index.Append(Cluster.StripIndexData);
|
|
#else
|
|
for (uint32 i = 0; i < NumClusterTris * 3; i++)
|
|
{
|
|
uint32 Index = Cluster.Indexes[i];
|
|
Streams.Index.Add(Cluster.Indexes[i]);
|
|
}
|
|
#endif
|
|
|
|
check(NumClusterVerts > 0);
|
|
|
|
#if NANITE_USE_UNCOMPRESSED_VERTEX_DATA
|
|
FBitWriter BitWriter_Position(Streams.LowByte);
|
|
for (uint32 VertexIndex = 0; VertexIndex < NumClusterVerts; VertexIndex++)
|
|
{
|
|
const FVector3f& Position = Cluster.Verts.GetPosition(VertexIndex);
|
|
BitWriter_Position.PutBits(*(uint32*)&Position.X, 32);
|
|
BitWriter_Position.PutBits(*(uint32*)&Position.Y, 32);
|
|
BitWriter_Position.PutBits(*(uint32*)&Position.Z, 32);
|
|
}
|
|
BitWriter_Position.Flush(sizeof(uint32));
|
|
|
|
FBitWriter BitWriter_Attribute(Streams.MidByte);
|
|
for (uint32 VertexIndex = 0; VertexIndex < NumClusterVerts; VertexIndex++)
|
|
{
|
|
// Normal
|
|
const FVector3f& Normal = Cluster.Verts.GetNormal(VertexIndex);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&Normal.X, 32);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&Normal.Y, 32);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&Normal.Z, 32);
|
|
|
|
if(Cluster.Verts.Format.bHasTangents)
|
|
{
|
|
const FVector3f TangentX = Cluster.Verts.GetTangentX(VertexIndex);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&TangentX.X, 32);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&TangentX.Y, 32);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&TangentX.Z, 32);
|
|
|
|
const float TangentYSign = Cluster.Verts.GetTangentYSign(VertexIndex) < 0.0f ? -1.0f : 1.0f;
|
|
BitWriter_Attribute.PutBits(*(uint32*)&TangentYSign, 32);
|
|
}
|
|
|
|
// Color
|
|
uint32 ColorDW = Cluster.Verts.Format.bHasColors ? Cluster.Verts.GetColor(VertexIndex).ToFColor(false).DWColor() : 0xFFFFFFFFu;
|
|
BitWriter_Attribute.PutBits(ColorDW, 32);
|
|
|
|
// UVs
|
|
if (Cluster.Verts.Format.NumTexCoords > 0)
|
|
{
|
|
const FVector2f* UVs = Cluster.Verts.GetUVs(VertexIndex);
|
|
for (uint32 TexCoordIndex = 0; TexCoordIndex < Cluster.Verts.Format.NumTexCoords; TexCoordIndex++)
|
|
{
|
|
const FVector2f UV = (TexCoordIndex < Cluster.Verts.Format.NumTexCoords) ? UVs[TexCoordIndex] : FVector2f(0.0f);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&UV.X, 32);
|
|
BitWriter_Attribute.PutBits(*(uint32*)&UV.Y, 32);
|
|
}
|
|
}
|
|
}
|
|
BitWriter_Attribute.Flush(sizeof(uint32));
|
|
#else
|
|
const uint32 NumUniqueToVertices = bUseVertexRefs ? UniqueToVertexIndex.Num() : NumClusterVerts;
|
|
|
|
// Generate quantized texture coordinates
|
|
TArray<FIntVector2, TInlineAllocator<NANITE_MAX_CLUSTER_VERTICES*NANITE_MAX_UVS>> PackedUVs;
|
|
PackedUVs.AddUninitialized( NumClusterVerts * Cluster.Verts.Format.NumTexCoords );
|
|
|
|
const uint32 NumMantissaBits = NANITE_UV_FLOAT_NUM_MANTISSA_BITS;
|
|
for( uint32 UVIndex = 0; UVIndex < Cluster.Verts.Format.NumTexCoords; UVIndex++ )
|
|
{
|
|
const FUVInfo& UVInfo = EncodingInfo.UVs[UVIndex];
|
|
const uint32 NumTexCoordValuesU = 1u << UVInfo.NumBits.X;
|
|
const uint32 NumTexCoordValuesV = 1u << UVInfo.NumBits.Y;
|
|
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const FVector2f UV = (UVIndex < Cluster.Verts.Format.NumTexCoords) ? Cluster.Verts.GetUVs(VertexIndex)[UVIndex] : FVector2f(0.0f);
|
|
|
|
uint32 EncodedU = EncodeUVFloat(UV.X, NumMantissaBits);
|
|
uint32 EncodedV = EncodeUVFloat(UV.Y, NumMantissaBits);
|
|
|
|
check(EncodedU >= UVInfo.Min.X);
|
|
check(EncodedV >= UVInfo.Min.Y);
|
|
EncodedU -= UVInfo.Min.X;
|
|
EncodedV -= UVInfo.Min.Y;
|
|
|
|
check(EncodedU >= 0 && EncodedU < NumTexCoordValuesU);
|
|
check(EncodedV >= 0 && EncodedV < NumTexCoordValuesV);
|
|
PackedUVs[NumClusterVerts * UVIndex + VertexIndex].X = (int32)EncodedU;
|
|
PackedUVs[NumClusterVerts * UVIndex + VertexIndex].Y = (int32)EncodedV;
|
|
}
|
|
}
|
|
|
|
auto WriteZigZagDelta = [&](const int32 Delta, const uint32 NumBytes) {
|
|
const uint32 Value = EncodeZigZag(Delta);
|
|
checkSlow(DecodeZigZag(Value) == Delta);
|
|
|
|
checkSlow(NumBytes <= 3);
|
|
checkSlow(Value < (1u << (NumBytes*8)));
|
|
|
|
if (NumBytes >= 3)
|
|
{
|
|
Streams.HighByte.Add((Value >> 16) & 0xFFu);
|
|
}
|
|
|
|
if (NumBytes >= 2)
|
|
{
|
|
Streams.MidByte.Add((Value >> 8) & 0xFFu);
|
|
}
|
|
|
|
if (NumBytes >= 1)
|
|
{
|
|
Streams.LowByte.Add(Value & 0xFFu);
|
|
}
|
|
};
|
|
|
|
const uint32 BytesPerPositionComponent = (FMath::Max3(Cluster.QuantizedPosBits.X, Cluster.QuantizedPosBits.Y, Cluster.QuantizedPosBits.Z) + 7) / 8;
|
|
const uint32 BytesPerNormalComponent = (EncodingInfo.NormalPrecision + 7) / 8;
|
|
const uint32 BytesPerTangentComponent = (EncodingInfo.TangentPrecision + 1 + 7) / 8;
|
|
|
|
// Position
|
|
if (Cluster.NumTris != 0)
|
|
{
|
|
FIntVector PrevPosition = FIntVector((1 << Cluster.QuantizedPosBits.X) >> 1, (1 << Cluster.QuantizedPosBits.Y) >> 1, (1 << Cluster.QuantizedPosBits.Z) >> 1);
|
|
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const FIntVector& Position = Cluster.QuantizedPositions[VertexIndex];
|
|
FIntVector PositionDelta = Position - PrevPosition;
|
|
|
|
PositionDelta.X = ShortestWrap(PositionDelta.X, Cluster.QuantizedPosBits.X);
|
|
PositionDelta.Y = ShortestWrap(PositionDelta.Y, Cluster.QuantizedPosBits.Y);
|
|
PositionDelta.Z = ShortestWrap(PositionDelta.Z, Cluster.QuantizedPosBits.Z);
|
|
|
|
WriteZigZagDelta(PositionDelta.X, BytesPerPositionComponent);
|
|
WriteZigZagDelta(PositionDelta.Y, BytesPerPositionComponent);
|
|
WriteZigZagDelta(PositionDelta.Z, BytesPerPositionComponent);
|
|
PrevPosition = Position;
|
|
}
|
|
}
|
|
|
|
FIntPoint PrevNormal = FIntPoint::ZeroValue;
|
|
|
|
TArray< uint32, TInlineAllocator<NANITE_MAX_CLUSTER_VERTICES> > PackedNormals;
|
|
PackedNormals.AddUninitialized( NumClusterVerts );
|
|
|
|
// Normal
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const uint32 PackedNormal = PackNormal(Cluster.Verts.GetNormal(VertexIndex), EncodingInfo.NormalPrecision);
|
|
const FIntPoint Normal = FIntPoint(PackedNormal & ((1u << EncodingInfo.NormalPrecision) - 1u), PackedNormal >> EncodingInfo.NormalPrecision);
|
|
PackedNormals[LocalVertexIndex] = PackedNormal;
|
|
|
|
FIntPoint NormalDelta = Normal - PrevNormal;
|
|
NormalDelta.X = ShortestWrap(NormalDelta.X, EncodingInfo.NormalPrecision);
|
|
NormalDelta.Y = ShortestWrap(NormalDelta.Y, EncodingInfo.NormalPrecision);
|
|
PrevNormal = Normal;
|
|
|
|
WriteZigZagDelta(NormalDelta.X, BytesPerNormalComponent);
|
|
WriteZigZagDelta(NormalDelta.Y, BytesPerNormalComponent);
|
|
}
|
|
|
|
|
|
// Tangent
|
|
if (Cluster.Verts.Format.bHasTangents)
|
|
{
|
|
uint32 PrevTangentBits = 0u;
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const uint32 PackedTangentZ = PackedNormals[LocalVertexIndex];
|
|
|
|
FVector3f TangentX = Cluster.Verts.GetTangentX(VertexIndex);
|
|
const FVector3f UnpackedTangentZ = UnpackNormal(PackedTangentZ, EncodingInfo.NormalPrecision);
|
|
checkSlow(UnpackedTangentZ.IsNormalized());
|
|
|
|
uint32 TangentBits = PrevTangentBits; // HACK: If tangent space has collapsed, just repeat the tangent used by the previous vertex
|
|
if(TangentX.SquaredLength() > 1e-8f)
|
|
{
|
|
TangentX = TangentX.GetUnsafeNormal();
|
|
|
|
const bool bTangentYSign = Cluster.Verts.GetTangentYSign(VertexIndex) < 0.0f;
|
|
uint32 QuantizedTangentAngle;
|
|
if (PackTangent(QuantizedTangentAngle, TangentX, UnpackedTangentZ, EncodingInfo.TangentPrecision))
|
|
{
|
|
TangentBits = (bTangentYSign ? (1 << EncodingInfo.TangentPrecision) : 0) | QuantizedTangentAngle;
|
|
}
|
|
}
|
|
|
|
const uint32 TangentDelta = ShortestWrap(TangentBits - PrevTangentBits, EncodingInfo.TangentPrecision + 1);
|
|
WriteZigZagDelta(TangentDelta, BytesPerTangentComponent);
|
|
|
|
PrevTangentBits = TangentBits;
|
|
}
|
|
}
|
|
|
|
// Color
|
|
if (EncodingInfo.ColorMode == NANITE_VERTEX_COLOR_MODE_VARIABLE)
|
|
{
|
|
FIntVector4 PrevColor = FIntVector4(0);
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const FColor Color = Cluster.Verts.GetColor(VertexIndex).ToFColor(false);
|
|
const FIntVector4 ColorValue = FIntVector4(Color.R, Color.G, Color.B, Color.A) - EncodingInfo.ColorMin;
|
|
FIntVector4 ColorDelta = ColorValue - PrevColor;
|
|
|
|
ColorDelta.X = ShortestWrap(ColorDelta.X, EncodingInfo.ColorBits.X);
|
|
ColorDelta.Y = ShortestWrap(ColorDelta.Y, EncodingInfo.ColorBits.Y);
|
|
ColorDelta.Z = ShortestWrap(ColorDelta.Z, EncodingInfo.ColorBits.Z);
|
|
ColorDelta.W = ShortestWrap(ColorDelta.W, EncodingInfo.ColorBits.W);
|
|
|
|
WriteZigZagDelta(ColorDelta.X, 1);
|
|
WriteZigZagDelta(ColorDelta.Y, 1);
|
|
WriteZigZagDelta(ColorDelta.Z, 1);
|
|
WriteZigZagDelta(ColorDelta.W, 1);
|
|
|
|
PrevColor = ColorValue;
|
|
}
|
|
}
|
|
|
|
// UV
|
|
for (uint32 TexCoordIndex = 0; TexCoordIndex < Cluster.Verts.Format.NumTexCoords; TexCoordIndex++)
|
|
{
|
|
const int32 NumTexCoordBitsU = EncodingInfo.UVs[TexCoordIndex].NumBits.X;
|
|
const int32 NumTexCoordBitsV = EncodingInfo.UVs[TexCoordIndex].NumBits.Y;
|
|
const uint32 BytesPerTexCoordComponent = (FMath::Max(NumTexCoordBitsU, NumTexCoordBitsV) + 7) / 8;
|
|
|
|
FIntVector2 PrevUV = FIntVector2::ZeroValue;
|
|
for (uint32 LocalVertexIndex = 0; LocalVertexIndex < NumUniqueToVertices; LocalVertexIndex++)
|
|
{
|
|
uint32 VertexIndex = LocalVertexIndex;
|
|
if( bUseVertexRefs )
|
|
VertexIndex = UniqueToVertexIndex[LocalVertexIndex];
|
|
|
|
const FIntVector2 UV = PackedUVs[NumClusterVerts * TexCoordIndex + VertexIndex];
|
|
|
|
FIntVector2 UVDelta = UV - PrevUV;
|
|
UVDelta.X = ShortestWrap(UVDelta.X, NumTexCoordBitsU);
|
|
UVDelta.Y = ShortestWrap(UVDelta.Y, NumTexCoordBitsV);
|
|
WriteZigZagDelta(UVDelta.X, BytesPerTexCoordComponent);
|
|
WriteZigZagDelta(UVDelta.Y, BytesPerTexCoordComponent);
|
|
PrevUV = UV;
|
|
}
|
|
}
|
|
|
|
const uint32 NumVertexBones = EncodingInfo.BoneInfluence.NumVertexBoneInfluences;
|
|
if (NumVertexBones > 0)
|
|
{
|
|
// TODO: Nanite-Skinning: support parent references
|
|
FBitWriter BitWriter(Streams.BoneInfluence);
|
|
|
|
for (uint32 i = 0; i < NumClusterVerts; i++)
|
|
{
|
|
const FVector2f* BoneInfluences = Cluster.Verts.GetBoneInfluences(i);
|
|
for (uint32 j = 0; j < NumVertexBones; j++)
|
|
{
|
|
const uint32 BoneIndex = (uint32)BoneInfluences[j].X;
|
|
const uint32 BoneWeight = (uint32)BoneInfluences[j].Y;
|
|
BitWriter.PutBits(BoneWeight ? BoneIndex : 0u, EncodingInfo.BoneInfluence.NumVertexBoneIndexBits);
|
|
|
|
if(EncodingInfo.BoneInfluence.NumVertexBoneWeightBits > 0)
|
|
{
|
|
BitWriter.PutBits(BoneWeight, EncodingInfo.BoneInfluence.NumVertexBoneWeightBits);
|
|
}
|
|
}
|
|
}
|
|
BitWriter.Flush(sizeof(uint32));
|
|
}
|
|
|
|
|
|
#endif
|
|
}
|
|
|
|
TArray<TMap<FVariableVertex, FVertexMapEntry>> BuildVertexMaps(const TArray<FPage>& Pages, const TArray<FCluster>& Clusters, const TArray<FClusterGroupPart>& Parts)
|
|
{
|
|
TArray<TMap<FVariableVertex, FVertexMapEntry>> VertexMaps;
|
|
VertexMaps.SetNum(Pages.Num());
|
|
|
|
ParallelFor( TEXT("NaniteEncode.BuildVertexMaps.PF"), Pages.Num(), 1, [&VertexMaps, &Pages, &Clusters, &Parts](int32 PageIndex)
|
|
{
|
|
const FPage& Page = Pages[PageIndex];
|
|
ProcessPageClusters(Page, Parts, [&](uint32 LocalClusterIndex, uint32 ClusterIndex)
|
|
{
|
|
const FCluster& Cluster = Clusters[ClusterIndex];
|
|
|
|
if (Cluster.Verts.Num() == 0) // TODO: Skip voxels for now. Currently, voxel almost never match parents exactly.
|
|
return;
|
|
|
|
for (uint32 VertexIndex = 0; VertexIndex < Cluster.Verts.Num(); VertexIndex++)
|
|
{
|
|
FVariableVertex Vertex;
|
|
Vertex.Data = &Cluster.Verts.Array[VertexIndex * Cluster.Verts.GetVertSize()];
|
|
Vertex.SizeInBytes = Cluster.Verts.GetVertSize() * sizeof(float);
|
|
FVertexMapEntry Entry;
|
|
Entry.LocalClusterIndex = LocalClusterIndex;
|
|
Entry.VertexIndex = VertexIndex;
|
|
VertexMaps[PageIndex].Add(Vertex, Entry);
|
|
}
|
|
});
|
|
});
|
|
return VertexMaps;
|
|
}
|
|
|
|
|
|
void CalculateEncodingInfos(
|
|
TArray<FEncodingInfo>& EncodingInfos,
|
|
const TArray<FCluster>& Clusters,
|
|
int32 NormalPrecision,
|
|
int32 TangentPrecision,
|
|
int32 BoneWeightPrecision
|
|
)
|
|
{
|
|
uint32 NumClusters = Clusters.Num();
|
|
EncodingInfos.SetNumUninitialized(NumClusters);
|
|
|
|
ParallelFor(TEXT("NaniteEncode.CalculateEncodingInfos.PF"), Clusters.Num(), 128,
|
|
[&](uint32 ClusterIndex)
|
|
{
|
|
CalculateEncodingInfo(EncodingInfos[ClusterIndex], Clusters[ClusterIndex], NormalPrecision, TangentPrecision, BoneWeightPrecision);
|
|
});
|
|
}
|
|
|
|
} // namespace Nanite
|