Files
UnrealEngine/Engine/Source/Developer/NaniteBuilder/Private/ClusterDAG.cpp
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

1224 lines
36 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "ClusterDAG.h"
#include "Async/Async.h"
#include "Async/ParallelFor.h"
#include "GraphPartitioner.h"
#include "NaniteRayTracingScene.h"
#include "BVHCluster.h"
#include "MeshSimplify.h"
#include "Algo/Partition.h"
#define VALIDATE_CLUSTER_ADJACENCY (DO_CHECK && 1)
namespace Nanite
{
void FClusterDAG::AddMesh(
const FConstMeshBuildVertexView& Verts,
TArrayView< const uint32 > Indexes,
TArrayView< const int32 > MaterialIndexes,
const FBounds3f& VertexBounds,
const FVertexFormat& VertexFormat )
{
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::ClusterTriangles);
uint32 Time0 = FPlatformTime::Cycles();
LOG_CRC( Verts );
LOG_CRC( Indexes );
MaxTexCoords = FMath::Max(MaxTexCoords, VertexFormat.NumTexCoords);
MaxBoneInfluences = FMath::Max(MaxBoneInfluences, VertexFormat.NumBoneInfluences);
bHasSkinning |= VertexFormat.NumBoneInfluences > 0;
bHasTangents |= VertexFormat.bHasTangents;
bHasColors |= VertexFormat.bHasColors;
uint32 NumTriangles = Indexes.Num() / 3;
FAdjacency Adjacency( Indexes.Num() );
FEdgeHash EdgeHash( Indexes.Num() );
auto GetPosition = [ &Verts, &Indexes ]( uint32 EdgeIndex )
{
return Verts.Position[ Indexes[ EdgeIndex ] ];
};
ParallelFor( TEXT("Nanite.ClusterTriangles.PF"), Indexes.Num(), 4096,
[&]( int32 EdgeIndex )
{
EdgeHash.Add_Concurrent( EdgeIndex, GetPosition );
} );
ParallelFor( TEXT("Nanite.ClusterTriangles.PF"), Indexes.Num(), 1024,
[&]( int32 EdgeIndex )
{
int32 AdjIndex = -1;
int32 AdjCount = 0;
EdgeHash.ForAllMatching( EdgeIndex, false, GetPosition,
[&]( int32 EdgeIndex, int32 OtherEdgeIndex )
{
AdjIndex = OtherEdgeIndex;
AdjCount++;
} );
if( AdjCount > 1 )
AdjIndex = -2;
Adjacency.Direct[ EdgeIndex ] = AdjIndex;
} );
FDisjointSet DisjointSet( NumTriangles );
for( uint32 EdgeIndex = 0, Num = Indexes.Num(); EdgeIndex < Num; EdgeIndex++ )
{
if( Adjacency.Direct[ EdgeIndex ] == -2 )
{
// EdgeHash is built in parallel, so we need to sort before use to ensure determinism.
// This path is only executed in the rare event that an edge is shared by more than two triangles,
// so performance impact should be negligible in practice.
TArray< TPair< int32, int32 >, TInlineAllocator< 16 > > Edges;
EdgeHash.ForAllMatching( EdgeIndex, false, GetPosition,
[&]( int32 EdgeIndex0, int32 EdgeIndex1 )
{
Edges.Emplace( EdgeIndex0, EdgeIndex1 );
} );
Edges.Sort();
for( const TPair< int32, int32 >& Edge : Edges )
{
Adjacency.Link( Edge.Key, Edge.Value );
}
}
Adjacency.ForAll( EdgeIndex,
[&]( int32 EdgeIndex0, int32 EdgeIndex1 )
{
if( EdgeIndex0 > EdgeIndex1 )
DisjointSet.UnionSequential( EdgeIndex0 / 3, EdgeIndex1 / 3 );
} );
}
uint32 BoundaryTime = FPlatformTime::Cycles();
UE_LOG( LogStaticMesh, Log,
TEXT("Adjacency [%.2fs], tris: %i, UVs %i%s%s"),
FPlatformTime::ToMilliseconds( BoundaryTime - Time0 ) / 1000.0f,
Indexes.Num() / 3,
VertexFormat.NumTexCoords,
VertexFormat.bHasTangents ? TEXT(", Tangents") : TEXT(""),
VertexFormat.bHasColors ? TEXT(", Color") : TEXT("") );
#if 0//NANITE_VOXEL_DATA
FBVHCluster Partitioner( NumTriangles, FCluster::ClusterSize - 4, FCluster::ClusterSize );
{
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::PartitionGraph);
Partitioner.Build(
[ &Verts, &Indexes ]( uint32 TriIndex )
{
FBounds3f Bounds;
Bounds = Verts.Position[ Indexes[ TriIndex * 3 + 0 ] ];
Bounds += Verts.Position[ Indexes[ TriIndex * 3 + 1 ] ];
Bounds += Verts.Position[ Indexes[ TriIndex * 3 + 2 ] ];
return Bounds;
} );
check( Partitioner.Ranges.Num() );
LOG_CRC( Partitioner.Ranges );
}
#else
FGraphPartitioner Partitioner( NumTriangles, FCluster::ClusterSize - 4, FCluster::ClusterSize );
{
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::PartitionGraph);
auto GetCenter = [ &Verts, &Indexes ]( uint32 TriIndex )
{
FVector3f Center;
Center = Verts.Position[ Indexes[ TriIndex * 3 + 0 ] ];
Center += Verts.Position[ Indexes[ TriIndex * 3 + 1 ] ];
Center += Verts.Position[ Indexes[ TriIndex * 3 + 2 ] ];
return Center * (1.0f / 3.0f);
};
Partitioner.BuildLocalityLinks( DisjointSet, VertexBounds, MaterialIndexes, GetCenter );
auto* RESTRICT Graph = Partitioner.NewGraph( NumTriangles * 3 );
for( uint32 i = 0; i < NumTriangles; i++ )
{
Graph->AdjacencyOffset[i] = Graph->Adjacency.Num();
uint32 TriIndex = Partitioner.Indexes[i];
for( int k = 0; k < 3; k++ )
{
Adjacency.ForAll( 3 * TriIndex + k,
[ &Partitioner, Graph ]( int32 EdgeIndex, int32 AdjIndex )
{
Partitioner.AddAdjacency( Graph, AdjIndex / 3, 4 * 65 );
} );
}
Partitioner.AddLocalityLinks( Graph, TriIndex, 1 );
}
Graph->AdjacencyOffset[ NumTriangles ] = Graph->Adjacency.Num();
bool bSingleThreaded = NumTriangles < 5000;
Partitioner.PartitionStrict( Graph, !bSingleThreaded );
check( Partitioner.Ranges.Num() );
LOG_CRC( Partitioner.Ranges );
}
#endif
const uint32 OptimalNumClusters = FMath::DivideAndRoundUp< int32 >( Indexes.Num(), FCluster::ClusterSize * 3 );
uint32 ClusterTime = FPlatformTime::Cycles();
UE_LOG( LogStaticMesh, Log, TEXT("Clustering [%.2fs]. Ratio: %f"), FPlatformTime::ToMilliseconds( ClusterTime - BoundaryTime ) / 1000.0f, (float)Partitioner.Ranges.Num() / (float)OptimalNumClusters );
const uint32 BaseCluster = Clusters.Num();
Clusters.AddDefaulted( Partitioner.Ranges.Num() );
const uint32 MeshIndex = MeshInput.AddDefaulted();
MeshInput[ MeshIndex ].AddUninitialized( Partitioner.Ranges.Num() );
{
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::BuildClusters);
ParallelFor( TEXT("Nanite.BuildClusters.PF"), Partitioner.Ranges.Num(), 1024,
[&]( int32 Index )
{
auto& Range = Partitioner.Ranges[ Index ];
uint32 ClusterIndex = BaseCluster + Index;
Clusters[ ClusterIndex ] = FCluster(
Verts,
Indexes,
MaterialIndexes,
VertexFormat,
Range.Begin, Range.End,
Partitioner.Indexes, Partitioner.SortedTo, Adjacency );
// Negative notes it's a leaf
Clusters[ ClusterIndex ].EdgeLength *= -1.0f;
MeshInput[ MeshIndex ][ Index ] = FClusterRef( ClusterIndex );
});
}
for( FCluster& Cluster : Clusters )
SurfaceArea += Cluster.SurfaceArea;
#if RAY_TRACE_VOXELS
if( Settings.ShapePreservation == ENaniteShapePreservation::Voxelize )
{
for( FCluster& Cluster : Clusters )
RayTracingScene.AddCluster( Cluster );
}
#endif
uint32 LeavesTime = FPlatformTime::Cycles();
UE_LOG( LogStaticMesh, Log, TEXT("Leaves [%.2fs]"), FPlatformTime::ToMilliseconds( LeavesTime - ClusterTime ) / 1000.0f );
}
uint32 FClusterDAG::FindAdjacentClusters(
TArray< TMap< uint32, uint32 > >& OutAdjacency,
TArrayView< const FClusterRef > LevelClusters,
uint32 NumExternalEdges )
{
struct FExternalEdge
{
uint32 ClusterRefIndex;
int32 EdgeIndex;
};
TArray< FExternalEdge > ExternalEdges;
FHashTable ExternalEdgeHash;
TAtomic< uint32 > ExternalEdgeOffset(0);
OutAdjacency.AddDefaulted( LevelClusters.Num() );
// We have a total count of NumExternalEdges so we can allocate a hash table without growing.
ExternalEdges.AddUninitialized( NumExternalEdges );
ExternalEdgeHash.Clear( 1 << FMath::FloorLog2( NumExternalEdges ), NumExternalEdges );
// Add edges to hash table
ParallelFor( TEXT("Nanite.EdgeHashAdd.PF"), LevelClusters.Num(), 32,
[&]( uint32 ClusterRefIndex )
{
FCluster& Cluster = LevelClusters[ ClusterRefIndex ].GetCluster( *this );
for( int32 EdgeIndex = 0; EdgeIndex < Cluster.ExternalEdges.Num(); EdgeIndex++ )
{
if( Cluster.ExternalEdges[ EdgeIndex ] )
{
uint32 VertIndex0 = Cluster.Indexes[ EdgeIndex ];
uint32 VertIndex1 = Cluster.Indexes[ Cycle3( EdgeIndex ) ];
const FVector3f& Position0 = Cluster.Verts.GetPosition( VertIndex0 );
const FVector3f& Position1 = Cluster.Verts.GetPosition( VertIndex1 );
uint32 Hash0 = HashPosition( Position0 );
uint32 Hash1 = HashPosition( Position1 );
uint32 Hash = Murmur32( { Hash0, Hash1 } );
uint32 ExternalEdgeIndex = ExternalEdgeOffset++;
ExternalEdges[ ExternalEdgeIndex ] = { ClusterRefIndex, EdgeIndex };
ExternalEdgeHash.Add_Concurrent( Hash, ExternalEdgeIndex );
}
}
} );
check( ExternalEdgeOffset == ExternalEdges.Num() );
std::atomic< uint32 > NumAdjacency(0);
// Find matching edge in other clusters
ParallelFor( TEXT("Nanite.FindMatchingEdge.PF"), LevelClusters.Num(), 32,
[&]( uint32 ClusterRefIndex )
{
FCluster& Cluster = LevelClusters[ ClusterRefIndex ].GetCluster( *this );
TMap< uint32, uint32 >& AdjacentClusters = OutAdjacency[ ClusterRefIndex ];
for( int32 EdgeIndex = 0; EdgeIndex < Cluster.ExternalEdges.Num(); EdgeIndex++ )
{
if( Cluster.ExternalEdges[ EdgeIndex ] )
{
uint32 VertIndex0 = Cluster.Indexes[ EdgeIndex ];
uint32 VertIndex1 = Cluster.Indexes[ Cycle3( EdgeIndex ) ];
const FVector3f& Position0 = Cluster.Verts.GetPosition( VertIndex0 );
const FVector3f& Position1 = Cluster.Verts.GetPosition( VertIndex1 );
uint32 Hash0 = HashPosition( Position0 );
uint32 Hash1 = HashPosition( Position1 );
uint32 Hash = Murmur32( { Hash1, Hash0 } );
for( uint32 ExternalEdgeIndex = ExternalEdgeHash.First( Hash ); ExternalEdgeHash.IsValid( ExternalEdgeIndex ); ExternalEdgeIndex = ExternalEdgeHash.Next( ExternalEdgeIndex ) )
{
FExternalEdge ExternalEdge = ExternalEdges[ ExternalEdgeIndex ];
FCluster& OtherCluster = LevelClusters[ ExternalEdge.ClusterRefIndex ].GetCluster( *this );
if( OtherCluster.ExternalEdges[ ExternalEdge.EdgeIndex ] )
{
uint32 OtherVertIndex0 = OtherCluster.Indexes[ ExternalEdge.EdgeIndex ];
uint32 OtherVertIndex1 = OtherCluster.Indexes[ Cycle3( ExternalEdge.EdgeIndex ) ];
if( Position0 == OtherCluster.Verts.GetPosition( OtherVertIndex1 ) &&
Position1 == OtherCluster.Verts.GetPosition( OtherVertIndex0 ) )
{
if( ClusterRefIndex != ExternalEdge.ClusterRefIndex )
{
// Increase its count
AdjacentClusters.FindOrAdd( ExternalEdge.ClusterRefIndex, 0 )++;
// Can't break or a triple edge might be non-deterministically connected.
// Need to find all matching, not just first.
}
}
}
}
}
}
NumAdjacency += AdjacentClusters.Num();
// Force deterministic order of adjacency.
AdjacentClusters.KeySort(
[&]( uint32 A, uint32 B )
{
return LevelClusters[A].GetCluster( *this ).GUID < LevelClusters[B].GetCluster( *this ).GUID;
} );
} );
#if VALIDATE_CLUSTER_ADJACENCY
// Validate the bi-directionality of adjacency. Also, since only roots are currently instanced, validate that
// no instanced clusters have adjacency.
for( int32 i = 0; i < LevelClusters.Num(); ++i )
{
check( OutAdjacency[ i ].Num() == 0 || !LevelClusters[ i ].IsInstance() );
for( const auto& KeyValue : OutAdjacency[ i ] )
{
check( KeyValue.Value > 0 );
check( KeyValue.Value == OutAdjacency[ KeyValue.Key ].FindChecked( i ));
check( !LevelClusters[ KeyValue.Key ].IsInstance() );
}
}
#endif
return NumAdjacency;
}
static const uint32 MinGroupSize = 8;
static const uint32 MaxGroupSize = 32;
uint32 GetMaxParents( const FClusterGroup& Group, FClusterDAG& DAG, uint32 MaxClusterSize )
{
uint32 NumGroupElements = 0;
for( FClusterRef Child : Group.Children )
{
NumGroupElements += Child.GetCluster( DAG ).MaterialIndexes.Num();
}
return FMath::DivideAndRoundUp( NumGroupElements, MaxClusterSize * 2 );
}
void FClusterDAG::GroupTriangleClusters( TArrayView< const FClusterRef > LevelClusters, uint32 NumExternalEdges )
{
if( LevelClusters.IsEmpty() )
return;
if( LevelClusters.Num() <= MaxGroupSize )
{
FClusterGroup& Group = Groups.AddDefaulted_GetRef();
Group.Children.Append( LevelClusters );
return;
}
TArray< TMap< uint32, uint32 > > Adjacency;
const uint32 NumAdjacency = FindAdjacentClusters( Adjacency, LevelClusters, NumExternalEdges );
FDisjointSet DisjointSet( LevelClusters.Num() );
for( uint32 ClusterRefIndex = 0; ClusterRefIndex < (uint32)LevelClusters.Num(); ClusterRefIndex++ )
{
for( const auto& Pair : Adjacency[ ClusterRefIndex ] )
{
const uint32 OtherClusterRefIndex = Pair.Key;
if( ClusterRefIndex > OtherClusterRefIndex )
{
DisjointSet.UnionSequential( ClusterRefIndex, OtherClusterRefIndex );
}
}
}
FGraphPartitioner Partitioner( LevelClusters.Num(), MinGroupSize, MaxGroupSize );
// TODO Cache this
auto GetCenter = [&]( uint32 ClusterRefIndex )
{
FClusterRef ClusterRef = LevelClusters[ ClusterRefIndex ];
FBounds3f& Bounds = ClusterRef.GetCluster( *this ).Bounds;
FVector3f Center = 0.5f * ( Bounds.Min + Bounds.Max );
if( ClusterRef.IsInstance() )
Center = ClusterRef.GetTransform( *this ).TransformPosition( Center );
return Center;
};
Partitioner.BuildLocalityLinks( DisjointSet, TotalBounds, TArrayView< const int32 >(), GetCenter );
auto* RESTRICT Graph = Partitioner.NewGraph( NumAdjacency );
for( int32 i = 0; i < LevelClusters.Num(); i++ )
{
Graph->AdjacencyOffset[i] = Graph->Adjacency.Num();
uint32 ClusterRefIndex = Partitioner.Indexes[i];
const FCluster& Cluster = LevelClusters[ ClusterRefIndex ].GetCluster( *this );
for( const auto& Pair : Adjacency[ ClusterRefIndex ] )
{
uint32 OtherClusterRefIndex = Pair.Key;
uint32 NumSharedEdges = Pair.Value;
const FCluster& OtherCluster = LevelClusters[ OtherClusterRefIndex ].GetCluster( *this );
bool bSiblings = Cluster.GeneratingGroupIndex != MAX_uint32 && Cluster.GeneratingGroupIndex == OtherCluster.GeneratingGroupIndex;
Partitioner.AddAdjacency( Graph, OtherClusterRefIndex, NumSharedEdges * ( bSiblings ? 12 : 16 ) + 4 );
}
Partitioner.AddLocalityLinks( Graph, ClusterRefIndex, 1 );
}
Graph->AdjacencyOffset[ Graph->Num ] = Graph->Adjacency.Num();
LOG_CRC( Graph->Adjacency );
LOG_CRC( Graph->AdjacencyCost );
LOG_CRC( Graph->AdjacencyOffset );
bool bSingleThreaded = LevelClusters.Num() <= 32;
Partitioner.PartitionStrict( Graph, !bSingleThreaded );
LOG_CRC( Partitioner.Ranges );
for( auto& Range : Partitioner.Ranges )
{
FClusterGroup& Group = Groups.AddDefaulted_GetRef();
for( uint32 i = Range.Begin; i < Range.End; i++ )
Group.Children.Add( LevelClusters[ Partitioner.Indexes[i] ] );
}
}
void FClusterDAG::GroupVoxelClusters( TArrayView< const FClusterRef > LevelClusters )
{
if( LevelClusters.IsEmpty() )
return;
// TODO If Clusters were compacted instead of resorted this wouldn't be needed.
LevelClusters.Sort(
[&]( FClusterRef A, FClusterRef B )
{
if( !A.IsInstance() || !B.IsInstance() || A.InstanceIndex == B.InstanceIndex )
return A.GetCluster( *this ).GeneratingGroupIndex < B.GetCluster( *this ).GeneratingGroupIndex;
else
return A.InstanceIndex < B.InstanceIndex;
} );
int32 GroupOffset = Groups.Num();
{
uint32 RunInstanceIndex = ~0u;
uint32 RunGroupIndex = ~0u;
for( FClusterRef ClusterRef : LevelClusters )
{
uint32 InstanceIndex = ClusterRef.IsInstance() ? ClusterRef.InstanceIndex : ~0u;
uint32 GroupIndex = ClusterRef.GetCluster( *this ).GeneratingGroupIndex;
if( RunInstanceIndex != InstanceIndex ||
RunGroupIndex != GroupIndex )
{
RunInstanceIndex = InstanceIndex;
RunGroupIndex = GroupIndex;
FClusterGroup& Group = Groups.AddDefaulted_GetRef();
FClusterGroup& GeneratingGroup = Groups[ GroupIndex ];
Group.Bounds = GeneratingGroup.Bounds;
Group.ParentLODError = GeneratingGroup.ParentLODError;
if( ClusterRef.IsInstance() )
{
const FMatrix44f& Transform = ClusterRef.GetTransform( *this );
Group.Bounds.Center = Transform.TransformPosition( Group.Bounds.Center );
const float MaxScale = Transform.GetScaleVector().GetMax();
Group.Bounds.W *= MaxScale;
Group.ParentLODError *= MaxScale;
}
}
Groups.Last().Children.Add( ClusterRef );
check( Groups.Last().Children.Num() <= NANITE_MAX_CLUSTERS_PER_GROUP_TARGET );
}
}
TArrayView< FClusterGroup > LevelGroups( Groups.GetData() + GroupOffset, Groups.Num() - GroupOffset );
TArray< uint32 > SortKeys;
TArray< int32 > Input, Output;
SortKeys.AddUninitialized( LevelGroups.Num() );
Input.AddUninitialized( LevelGroups.Num() );
Output.AddUninitialized( LevelGroups.Num() );
ParallelFor( TEXT("GroupVoxelClusters.SortKeys.PF"), LevelGroups.Num(), 4096,
[&]( int32 Index )
{
FVector3f Center = LevelGroups[ Index ].Bounds.Center;
FVector3f CenterLocal = ( Center - TotalBounds.Min ) / FVector3f( TotalBounds.Max - TotalBounds.Min ).GetMax();
uint32 Morton;
Morton = FMath::MortonCode3( uint32( CenterLocal.X * 1023 ) );
Morton |= FMath::MortonCode3( uint32( CenterLocal.Y * 1023 ) ) << 1;
Morton |= FMath::MortonCode3( uint32( CenterLocal.Z * 1023 ) ) << 2;
SortKeys[ Index ] = Morton;
Input[ Index ] = Index;
} );
RadixSort32( Output.GetData(), Input.GetData(), Input.Num(),
[&]( int32 Index )
{
return SortKeys[ Index ];
} );
TArrayView< int32 > MergeIndex( (int32*)SortKeys.GetData(), SortKeys.Num() );
while( Output.Num() > 1 )
{
Swap( Input, Output );
Output.Reset();
const int32 SearchRadius = 16;
uint32 PossibleMerges = 0;
// Find least cost neighbor
ParallelFor( TEXT("GroupVoxelClusters.LeastCostIndex.PF"), Input.Num(), 4096,
[&]( int32 i )
{
const FClusterGroup& Group0 = LevelGroups[ Input[i] ];
float LeastCost = MAX_flt;
int32 LeastCostIndex = -1;
int32 SearchMin = FMath::Max( i - SearchRadius, 0 );
int32 SearchMax = FMath::Min( i + SearchRadius + 1, Input.Num() );
for( int32 NeighborIndex = SearchMin; NeighborIndex < SearchMax; NeighborIndex++ )
{
if( NeighborIndex == i )
continue;
const FClusterGroup& Group1 = LevelGroups[ Input[ NeighborIndex ] ];
bool bTooSmall =
Group0.Children.Num() < MinGroupSize ||
Group1.Children.Num() < MinGroupSize;
bool bTooLarge = Group0.Children.Num() + Group1.Children.Num() > MaxGroupSize;
if( bTooSmall && !bTooLarge )
{
float Cost = ( Group0.Bounds + Group1.Bounds ).W;
// TODO include difference in error
if( Cost < LeastCost )
{
LeastCost = Cost;
LeastCostIndex = NeighborIndex;
PossibleMerges = 1;
}
}
}
MergeIndex[i] = LeastCostIndex;
} );
if( PossibleMerges == 0 )
{
Swap( Input, Output );
break;
}
// Merge pass
for( int32 i = 0; i < Input.Num(); i++ )
{
int32 Merge1 = MergeIndex[i];
int32 Merge0 = Merge1 >= 0 ? MergeIndex[ Merge1 ] : -1;
if( i == Merge0 )
{
// Matching pair, merge
if( i < Merge1 )
{
// Left side: Merge
FClusterGroup& Group0 = LevelGroups[ Input[i] ];
FClusterGroup& Group1 = LevelGroups[ Input[ Merge1 ] ];
Group0.Children.Append( Group1.Children );
Group1.Children.Empty();
Group0.Bounds += Group1.Bounds;
check( Group0.Children.Num() <= NANITE_MAX_CLUSTERS_PER_GROUP_TARGET );
Output.Add( Input[i] );
}
else
{
// Right side: Do nothing because left side owns merging
}
}
else
{
// Not valid to merge this pass. Just copy for next time.
Output.Add( Input[i] );
}
}
}
// Remove empty groups
for( int32 i = GroupOffset; i < Groups.Num(); )
{
if( Groups[i].Children.IsEmpty() )
Groups.RemoveAtSwap( i, EAllowShrinking::No );
else
i++;
}
}
void FClusterDAG::ReduceMesh( uint32 MeshIndex )
{
TRACE_CPUPROFILER_EVENT_SCOPE(Nanite::Build::DAG.ReduceMesh);
if( MeshInput[ MeshIndex ].IsEmpty() )
return;
#if RAY_TRACE_VOXELS
if( Settings.ShapePreservation == ENaniteShapePreservation::Voxelize )
rtcCommitScene( RayTracingScene.Scene );
#endif
std::atomic< int32 > NumClusters( Clusters.Num() );
UE::Tasks::FCancellationToken* CancellationToken = UE::Tasks::FCancellationTokenScope::GetCurrentCancellationToken();
while( true )
{
if (CancellationToken && CancellationToken->IsCanceled())
{
return;
}
TArrayView< FClusterRef > LevelClusters = MeshInput[ MeshIndex ];
int32 ClusterOffset = Clusters.Num();
int32 GroupOffset = Groups.Num();
uint32 NumExternalEdges = 0;
float MinError = +MAX_flt;
float MaxError = -MAX_flt;
float AvgError = 0.0f;
for( FClusterRef ClusterRef : LevelClusters )
{
const FCluster& Cluster = ClusterRef.GetCluster( *this );
FBounds3f Bounds = Cluster.Bounds;
float LODError = Cluster.LODError;
if( ClusterRef.IsInstance() )
{
const FMatrix44f& Transform = ClusterRef.GetTransform( *this );
Bounds = Bounds.TransformBy( Transform );
LODError *= Transform.GetScaleVector().GetMax();
}
NumExternalEdges += Cluster.NumExternalEdges;
TotalBounds += Bounds;
MinError = FMath::Min( MinError, LODError );
MaxError = FMath::Max( MaxError, LODError );
AvgError += LODError;
}
AvgError /= (float)LevelClusters.Num();
UE_LOG( LogStaticMesh, Verbose, TEXT("Num clusters %i. Error %.4f, %.4f, %.4f"), LevelClusters.Num(), MinError, AvgError, MaxError );
uint32 MaxClusterSize = FCluster::ClusterSize;
if( LevelClusters.Num() == 1 )
{
const FCluster& Cluster = LevelClusters[0].GetCluster( *this );
if( Cluster.NumTris )
break;
else if( Cluster.MaterialIndexes.Num() > 64 && Settings.ExtraVoxelLevels >= 1 )
MaxClusterSize = 64;
else if( Cluster.MaterialIndexes.Num() > 32 && Settings.ExtraVoxelLevels >= 2 )
MaxClusterSize = 32;
else
break;
}
ON_SCOPE_EXIT
{
if (CancellationToken && CancellationToken->IsCanceled())
{
return;
}
check( ClusterOffset < NumClusters );
// Correct num to atomic count
Clusters.SetNum( NumClusters, EAllowShrinking::No );
MeshInput[ MeshIndex ].Reset();
for( uint32 ClusterIndex = ClusterOffset; ClusterIndex < (uint32)Clusters.Num(); ClusterIndex++ )
MeshInput[ MeshIndex ].Add( FClusterRef( ClusterIndex ) );
};
if( LevelClusters.Num() <= MaxGroupSize )
{
int32 GroupIndex = Groups.AddDefaulted();
Groups[ GroupIndex ].Children.Append( LevelClusters );
uint32 MaxParents = GetMaxParents( Groups[ GroupIndex ], *this, MaxClusterSize );
Clusters.AddDefaulted( MaxParents );
ReduceGroup( NumClusters, MaxClusterSize, MaxParents, GroupIndex, MeshIndex );
continue;
}
int32 NumTriClusters = Algo::Partition( LevelClusters,
[ this ]( FClusterRef ClusterRef )
{
return ClusterRef.GetCluster( *this ).NumTris > 0;
} );
TArrayView< FClusterRef > TriClusters = LevelClusters.Slice( 0, NumTriClusters );
TArrayView< FClusterRef > VoxelClusters = LevelClusters.Slice( NumTriClusters, LevelClusters.Num() - NumTriClusters );
if( TriClusters.Num() == 1 )
{
TriClusters = TArrayView< FClusterRef >();
VoxelClusters = LevelClusters;
}
if( VoxelClusters.Num() == 1 )
{
VoxelClusters = TArrayView< FClusterRef >();
TriClusters = LevelClusters;
}
GroupTriangleClusters( TriClusters, NumExternalEdges );
GroupVoxelClusters( VoxelClusters );
TArrayView< FClusterGroup > LevelGroups( Groups.GetData() + GroupOffset, Groups.Num() - GroupOffset );
uint32 MaxParents = 0;
for( const FClusterGroup& Group : LevelGroups )
MaxParents += GetMaxParents( Group, *this, MaxClusterSize );
Clusters.AddDefaulted( MaxParents );
ParallelFor( TEXT("Nanite.ReduceGroup.PF"), LevelGroups.Num(), 1,
[&]( int32 i )
{
if (CancellationToken && CancellationToken->IsCanceled())
{
return;
}
uint32 MaxParents = GetMaxParents( LevelGroups[i], *this, MaxClusterSize );
ReduceGroup( NumClusters, MaxClusterSize, MaxParents, GroupOffset + i, MeshIndex );
},
EParallelForFlags::Unbalanced );
if (CancellationToken && CancellationToken->IsCanceled())
{
return;
}
// Force a deterministic order of the generated parent clusters
{
// TODO: Optimize me.
// Just sorting the array directly seems like the safest option at this stage (right before UE5 final build).
// On AOD_Shield this seems to be on the order of 0.01s in practice.
// As the Clusters array is already conservatively allocated, it seems storing the parent clusters in their designated
// conservative ranges and then doing a compaction pass at the end would be a more efficient solution that doesn't involve sorting.
//uint32 StartTime = FPlatformTime::Cycles();
TArrayView< FCluster > Parents( &Clusters[ ClusterOffset ], NumClusters - ClusterOffset );
Parents.Sort(
[&]( const FCluster& A, const FCluster& B )
{
return A.GUID < B.GUID;
} );
//UE_LOG(LogStaticMesh, Log, TEXT("SortTime Adjacency [%.2fs]"), FPlatformTime::ToMilliseconds(FPlatformTime::Cycles() - StartTime) / 1000.0f);
}
}
#if RAY_TRACE_VOXELS
// Clear ExtraVoxels for all clusters except the root as it might be needed for assembly composition.
for( int32 i = 0; i < int32( Clusters.Num() ) - 1; i++ )
{
Clusters[ i ].ExtraVoxels.Empty();
}
#endif
// Max out root node
const FClusterRef& LastClusterRef = MeshInput[MeshIndex].Last();
FCluster& LastCluster = LastClusterRef.GetCluster(*this);
FSphere3f LODBounds = LastCluster.LODBounds;
if( LastClusterRef.IsInstance() )
{
// Corner case: The root cluster comes from a single assembly part
const FMatrix44f& Transform = LastClusterRef.GetTransform( *this );
LODBounds.Center = Transform.TransformPosition( LODBounds.Center );
const float MaxScale = Transform.GetScaleVector().GetMax();
LODBounds.W *= MaxScale;
AssemblyInstanceData[ LastClusterRef.InstanceIndex ].LODBounds = LODBounds;
AssemblyInstanceData[ LastClusterRef.InstanceIndex ].ParentLODError = 1e10f;
}
FClusterGroup RootClusterGroup;
RootClusterGroup.Children.Add( LastClusterRef );
RootClusterGroup.Bounds = LastCluster.SphereBounds;
RootClusterGroup.LODBounds = LODBounds;
RootClusterGroup.ParentLODError = 1e10f;
RootClusterGroup.MipLevel = LastCluster.MipLevel;
RootClusterGroup.MeshIndex = MeshIndex;
RootClusterGroup.bRoot = true;
LastCluster.GroupIndex = Groups.Num();
Groups.Add(RootClusterGroup);
// Clear the root cluster's external edges
FMemory::Memzero(LastCluster.ExternalEdges.GetData(), LastCluster.ExternalEdges.Num());
LastCluster.NumExternalEdges = 0;
}
template< typename FPartitioner, typename FPartitionFunc >
bool SplitCluster( FCluster& Merged, TArray< FCluster >& Clusters, std::atomic< int32 >& NumClusters, uint32 MaxClusterSize, uint32& NumParents, uint32& ParentStart, uint32& ParentEnd, FPartitionFunc&& PartitionFunc )
{
if( Merged.MaterialIndexes.Num() <= (int32)MaxClusterSize )
{
ParentEnd = ( NumClusters += 1 );
ParentStart = ParentEnd - 1;
Clusters[ ParentStart ] = Merged;
Clusters[ ParentStart ].Bound();
return true;
}
else if( NumParents > 1 )
{
check( MaxClusterSize == FCluster::ClusterSize );
FAdjacency Adjacency = Merged.BuildAdjacency();
FPartitioner Partitioner( Merged.MaterialIndexes.Num(), MaxClusterSize - 4, MaxClusterSize );
PartitionFunc( Partitioner, Adjacency );
if( Partitioner.Ranges.Num() <= (int32)NumParents )
{
NumParents = Partitioner.Ranges.Num();
ParentEnd = ( NumClusters += NumParents );
ParentStart = ParentEnd - NumParents;
int32 Parent = ParentStart;
for( auto& Range : Partitioner.Ranges )
{
Clusters[ Parent ] = FCluster( Merged, Range.Begin, Range.End, Partitioner.Indexes, Partitioner.SortedTo, Adjacency );
Parent++;
}
return true;
}
}
return false;
}
void FClusterDAG::ReduceGroup( std::atomic< int32 >& NumClusters, uint32 MaxClusterSize, uint32 NumParents, int32 GroupIndex, uint32 MeshIndex )
{
FClusterGroup& Group = Groups[ GroupIndex ];
check( Group.Children.Num() <= NANITE_MAX_CLUSTERS_PER_GROUP_TARGET );
bool bAnyTriangles = false;
bool bAllTriangles = true;
uint32 GroupNumVerts = 0;
float GroupArea = 0.0f;
TArray< FSphere3f, TInlineAllocator< MaxGroupSize > > Children_Bounds;
TArray< FSphere3f, TInlineAllocator< MaxGroupSize > > Children_LODBounds;
for( FClusterRef Child : Group.Children )
{
FCluster& Cluster = Child.GetCluster( *this );
GroupNumVerts += Cluster.Verts.Num();
bAnyTriangles = bAnyTriangles || Cluster.NumTris > 0;
bAllTriangles = bAllTriangles && Cluster.NumTris > 0;
bool bLeaf = Cluster.EdgeLength < 0.0f;
FSphere3f SphereBounds = Cluster.SphereBounds;
FSphere3f LODBounds = Cluster.LODBounds;
float LODError = Cluster.LODError;
if( Child.IsInstance() )
{
const FMatrix44f& Transform = Child.GetTransform( *this );
SphereBounds.Center = Transform.TransformPosition( SphereBounds.Center );
LODBounds.Center = Transform.TransformPosition( LODBounds.Center );
const float MaxScale = Transform.GetScaleVector().GetMax();
SphereBounds.W *= MaxScale;
LODBounds.W *= MaxScale;
LODError *= MaxScale;
GroupArea += Cluster.SurfaceArea * FMath::Square( MaxScale );
}
else
{
// Instanced children are already owned by a group.
Cluster.GroupIndex = GroupIndex;
GroupArea += Cluster.SurfaceArea;
}
// Force monotonic nesting.
Children_Bounds.Add( SphereBounds );
Children_LODBounds.Add( LODBounds );
Group.ParentLODError = FMath::Max( Group.ParentLODError, LODError );
Group.MipLevel = FMath::Max( Group.MipLevel, Cluster.MipLevel );
}
Group.Bounds = FSphere3f( Children_Bounds.GetData(), Children_Bounds.Num() );
Group.LODBounds = FSphere3f( Children_LODBounds.GetData(), Children_LODBounds.Num() );
Group.MeshIndex = MeshIndex;
uint32 ParentStart = 0;
uint32 ParentEnd = 0;
FCluster Merged;
float SimplifyError = MAX_flt;
bool bVoxels = false;
bVoxels = !bAllTriangles || Settings.ShapePreservation == ENaniteShapePreservation::Voxelize;
uint32 TargetClusterSize = MaxClusterSize - 2;
if( bAllTriangles )
{
uint32 TargetNumTris = NumParents * TargetClusterSize;
if( !bVoxels ||
Settings.VoxelLevel == 0 ||
Settings.VoxelLevel > Group.MipLevel + 1 )
{
Merged = FCluster( *this, Group.Children );
SimplifyError = Merged.Simplify( *this, TargetNumTris );
}
}
if( bVoxels )
{
int32 TargetNumBricks = NumParents * MaxClusterSize;
//uint32 TargetNumVoxels = TargetNumBricks * 16;
uint32 TargetNumVoxels = FMath::Max( 1u, ( GroupNumVerts * 3 ) / 4 );
float VoxelSize = FMath::Sqrt( GroupArea / float(TargetNumVoxels) );
VoxelSize *= 0.75f;
VoxelSize = FMath::Max( VoxelSize, Group.ParentLODError );
#if 0
// Round to pow2
// = exp2( floor( log2(x) + 0.5 ) )
FFloat32 VoxelSizeF( VoxelSize * UE_SQRT_2 );
VoxelSizeF.Components.Mantissa = 0;
VoxelSize = VoxelSizeF.FloatValue;
#endif
check( VoxelSize > 0.0f );
check( FMath::IsFinite( VoxelSize ) );
float EstimatedVoxelSize = VoxelSize;
while( VoxelSize < SimplifyError )
{
FCluster Voxelized;
Voxelized.Voxelize( *this, Group.Children, VoxelSize );
if( Voxelized.Verts.Num() <= TargetNumVoxels &&
Voxelized.Bricks.Num() <= TargetNumBricks )
{
bool bSplitSuccess = SplitCluster< FBVHCluster >( Voxelized, Clusters, NumClusters, MaxClusterSize, NumParents, ParentStart, ParentEnd,
[ &Voxelized ]( FBVHCluster& Partitioner, FAdjacency& Adjacency )
{
Partitioner.Build(
[ &Voxelized ]( uint32 VertIndex )
{
FBounds3f Bounds;
Bounds = FVector3f( Voxelized.Bricks[ VertIndex ].Position );
return Bounds;
} );
} );
// Voxel clusters will never be split up so pass on all extra data to first parent
#if RAY_TRACE_VOXELS
Clusters[ ParentStart ].ExtraVoxels.Append( Voxelized.ExtraVoxels );
#endif
check( bSplitSuccess );
break;
}
VoxelSize *= 1.1f;
check( FMath::IsFinite( VoxelSize ) );
}
if( VoxelSize < SimplifyError )
SimplifyError = VoxelSize;
else
bVoxels = false;
}
if( !bVoxels )
{
check( bAllTriangles );
while(1)
{
bool bSplitSuccess = SplitCluster< FGraphPartitioner >( Merged, Clusters, NumClusters, MaxClusterSize, NumParents, ParentStart, ParentEnd,
[ &Merged ]( FGraphPartitioner& Partitioner, FAdjacency& Adjacency )
{
Merged.Split( Partitioner, Adjacency );
} );
if( bSplitSuccess )
break;
TargetClusterSize -= 2;
if( TargetClusterSize <= MaxClusterSize / 2 )
break;
uint32 TargetNumTris = NumParents * TargetClusterSize;
// Start over from scratch. Continuing from simplified cluster screws up ExternalEdges and LODError.
Merged = FCluster( *this, Group.Children );
SimplifyError = Merged.Simplify( *this, TargetNumTris );
}
}
Group.ParentLODError = FMath::Max( Group.ParentLODError, SimplifyError );
// Force parents to have same LOD data. They are all dependent.
for( uint32 Parent = ParentStart; Parent < ParentEnd; Parent++ )
{
Clusters[ Parent ].LODBounds = Group.LODBounds;
Clusters[ Parent ].LODError = Group.ParentLODError;
Clusters[ Parent ].GeneratingGroupIndex = GroupIndex;
}
for( FClusterRef Child : Group.Children )
{
if( Child.IsInstance() )
{
AssemblyInstanceData[ Child.InstanceIndex ].LODBounds = Group.LODBounds;
AssemblyInstanceData[ Child.InstanceIndex ].ParentLODError = Group.ParentLODError;
}
}
}
void FClusterDAG::FindCut(
TArray< FClusterRef >& SelectedClusters,
TSet< uint32 >& SelectedGroups,
uint32 TargetNumTris,
float TargetError,
uint32 TargetOvershoot ) const
{
// TODO After traversing into a part, how to know which transform?
// Need absolute transform stack just like visible clusters.
// TODO Scale error.
TSet< uint64 > SelectedGroups2;
bool bHitTargetBefore = false;
uint32 NumTris = 0;
float MinError = 0.0f;
const FClusterGroup& RootGroup = Groups.Last();
for( FClusterRef Child : RootGroup.Children )
{
SelectedClusters.Add(Child);
const FCluster& RootCluster = Child.GetCluster( *this );
float RootLODError = RootCluster.LODError;
if (Child.IsInstance())
{
const FMatrix44f& Transform = Child.GetTransform(*this);
RootLODError *= Transform.GetScaleVector().GetMax();
}
NumTris += RootCluster.NumTris;
MinError = RootLODError;
}
SelectedGroups.Add( Groups.Num() - 1 );
SelectedGroups2.Add( Groups.Num() - 1 );
auto LargestError = [ this ]( FClusterRef A, FClusterRef B )
{
const FCluster& ClusterA = A.GetCluster( *this );
const FCluster& ClusterB = B.GetCluster( *this );
float LODErrorA = ClusterA.LODError;
if (A.IsInstance())
{
const FMatrix44f& Transform = A.GetTransform(*this);
LODErrorA *= Transform.GetScaleVector().GetMax();
}
float LODErrorB = ClusterB.LODError;
if (B.IsInstance())
{
const FMatrix44f& Transform = B.GetTransform(*this);
LODErrorB *= Transform.GetScaleVector().GetMax();
}
return LODErrorA > LODErrorB;
};
while( true )
{
// Grab highest error cluster to replace to reduce cut error
const FClusterRef ClusterRef = SelectedClusters.HeapTop();
const FCluster& Cluster = ClusterRef.GetCluster( *this );
if( Cluster.MipLevel == 0 )
break;
if( Cluster.GeneratingGroupIndex == MAX_uint32 )
break;
bool bHitTarget = NumTris > TargetNumTris || MinError < TargetError;
// Overshoot the target by TargetOvershoot number of triangles. This allows granular edge collapses to better minimize error to the target.
if( TargetOvershoot > 0 && bHitTarget && !bHitTargetBefore )
{
TargetNumTris = NumTris + TargetOvershoot;
bHitTarget = false;
bHitTargetBefore = true;
}
float LODError = Cluster.LODError;
if (ClusterRef.IsInstance())
{
const FMatrix44f& Transform = ClusterRef.GetTransform(*this);
LODError *= Transform.GetScaleVector().GetMax();
}
if( bHitTarget && LODError < MinError )
break;
SelectedClusters.HeapPopDiscard( LargestError, EAllowShrinking::No );
NumTris -= Cluster.NumTris;
check(LODError <= MinError );
MinError = LODError;
bool bAlreadyAdded = false;
SelectedGroups.FindOrAdd( Cluster.GeneratingGroupIndex, &bAlreadyAdded );
SelectedGroups2.FindOrAdd( uint64(Cluster.GeneratingGroupIndex) << 32 | ClusterRef.InstanceIndex, &bAlreadyAdded );
// There will be other parent clusters with the same LODError from the same GeneratingGroupIndex still on the heap.
if( !bAlreadyAdded )
{
for( FClusterRef Child : Groups[ Cluster.GeneratingGroupIndex ].Children )
{
const FCluster& ChildCluster = Child.GetCluster( *this );
float ChildLODError = ChildCluster.LODError;
if (!Child.IsInstance())
{
// hack FClusterRef to point to parent InstanceIndex
Child = FClusterRef( ClusterRef.InstanceIndex, Child.ClusterIndex );
}
if (Child.IsInstance())
{
const FMatrix44f& Transform = Child.GetTransform(*this);
ChildLODError *= Transform.GetScaleVector().GetMax();
}
check( ChildCluster.MipLevel < Cluster.MipLevel );
check( ChildLODError <= MinError );
SelectedClusters.HeapPush( Child, LargestError );
NumTris += ChildCluster.NumTris;
}
}
}
}
} // namespace Nanite