Files
UnrealEngine/Engine/Source/Developer/NaniteBuilder/Private/Encode/NaniteEncodeHierarchy.cpp
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

758 lines
26 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "NaniteEncodeHierarchy.h"
#include "Math/UnrealMath.h"
#include "Cluster.h"
#include "ClusterDAG.h"
#include "NaniteEncodeShared.h"
#define BVH_BUILD_WRITE_GRAPHVIZ 0
namespace Nanite
{
struct FIntermediateNode
{
uint32 PartIndex = MAX_uint32;
uint32 AssemblyRootIndex = MAX_uint32;
uint32 NewAssemblyInstanceIndex = MAX_uint32; // To be able to share subtrees this is only set when the instance changes.
uint32 HierarchyNodeIndex = MAX_uint32;
uint32 MipLevel = MAX_int32;
FBounds3f Bound;
TArray< uint32 > Children;
bool IsLeaf() const { return PartIndex != MAX_uint32; }
};
struct FHierarchyNode
{
FSphere3f LODBounds[NANITE_MAX_BVH_NODE_FANOUT];
FBounds3f Bounds[NANITE_MAX_BVH_NODE_FANOUT];
float MinLODErrors[NANITE_MAX_BVH_NODE_FANOUT];
float MaxParentLODErrors[NANITE_MAX_BVH_NODE_FANOUT];
uint32 ChildrenStartIndex[NANITE_MAX_BVH_NODE_FANOUT];
uint32 NumChildren[NANITE_MAX_BVH_NODE_FANOUT];
uint32 ClusterGroupPartIndex[NANITE_MAX_BVH_NODE_FANOUT];
uint32 AssemblyTransformIndex[NANITE_MAX_BVH_NODE_FANOUT];
};
static void WriteDotGraph(const TArray<FHierarchyNode>& Nodes)
{
auto IsEnabled = [](const FHierarchyNode& Node, uint32 ChildIndex) -> bool
{
return Node.NumChildren[ChildIndex] != 0u;
};
auto IsLeaf = [](const FHierarchyNode& Node, uint32 ChildIndex) -> bool
{
return Node.ClusterGroupPartIndex[ChildIndex] != MAX_uint32;
};
FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("digraph {\n"));
FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("node[shape = record]\n"));
TSet<uint32> ReferencedNodes;
const uint32 NumNodes = Nodes.Num();
for (uint32 NodeIndex = 0; NodeIndex < NumNodes; NodeIndex++)
{
const FHierarchyNode& Node = Nodes[NodeIndex];
for (uint32 ChildIndex = 0; ChildIndex < NANITE_MAX_BVH_NODE_FANOUT; ChildIndex++)
{
if (IsEnabled(Node, ChildIndex) && !IsLeaf(Node, ChildIndex))
{
const uint32 ChildNodeIndex = Node.ChildrenStartIndex[ChildIndex];
FGenericPlatformMisc::LowLevelOutputDebugStringf(TEXT("n%d->n%d\n"), NodeIndex, ChildNodeIndex);
ReferencedNodes.Add(NodeIndex);
ReferencedNodes.Add(ChildNodeIndex);
}
}
}
for (uint32 NodeIndex : ReferencedNodes)
{
const FHierarchyNode& Node = Nodes[NodeIndex];
FString Str = FString::Printf(TEXT("n%d [label=\""), NodeIndex);
for (uint32 ChildIndex = 0; ChildIndex < NANITE_MAX_BVH_NODE_FANOUT; ChildIndex++)
{
if (IsEnabled(Node, ChildIndex))
{
if (ChildIndex != 0)
Str += TEXT("|");
Str += FString::Printf(TEXT("{%.2f|<f%d>%.2f}"),
Node.MaxParentLODErrors[ChildIndex],
ChildIndex,
Node.LODBounds[ChildIndex].W
);
}
}
Str += TEXT("\"]\n");
FGenericPlatformMisc::LowLevelOutputDebugStringf(*Str);
}
FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("\n}\n"));
}
static uint32 BuildHierarchyRecursive(
TArray<FHierarchyNode>& HierarchyNodes,
TArray<FIntermediateNode>& Nodes,
const FClusterDAG& ClusterDAG,
TArray<FClusterGroupPart>& Parts,
uint32 AssemblyInstanceIndex,
uint32 CurrentNodeIndex,
bool& bOutIsInstanceDependent //TODO: Temporary hack to make sure hierarchy nodes for root groups are not instanced, so their unique ParentErrors are respected.
)
{
const FIntermediateNode& INode = Nodes[ CurrentNodeIndex ];
check( INode.PartIndex == MAX_uint32 );
check( !INode.IsLeaf() );
uint32 HNodeIndex = HierarchyNodes.Num();
HierarchyNodes.AddZeroed();
bOutIsInstanceDependent = false;
uint32 NumChildren = INode.Children.Num();
check(NumChildren <= NANITE_MAX_BVH_NODE_FANOUT);
for( uint32 ChildIndex = 0; ChildIndex < NumChildren; ChildIndex++ )
{
uint32 ChildNodeIndex = INode.Children[ ChildIndex ];
const FIntermediateNode* ChildNode = &Nodes[ ChildNodeIndex ];
const uint32 ChildNewAssemblyInstanceIndex = ChildNode->NewAssemblyInstanceIndex;
uint32 ChildAssemblyInstanceIndex = AssemblyInstanceIndex;
if( ChildNode->AssemblyRootIndex != MAX_uint32 )
{
// Forward assembly ref to real node
ChildNodeIndex = ChildNode->AssemblyRootIndex;
ChildNode = &Nodes[ ChildNodeIndex ];
check( ChildNode->AssemblyRootIndex == MAX_uint32 );
ChildAssemblyInstanceIndex = ChildNewAssemblyInstanceIndex;
}
if( ChildNode->IsLeaf() )
{
// Cluster Group
FClusterGroupPart& Part = Parts[ChildNode->PartIndex];
const FClusterGroup& Group = ClusterDAG.Groups[Part.GroupIndex];
FSphere3f LODBounds = Group.LODBounds;
float ParentLODError = Group.ParentLODError;
if( Group.bRoot && ChildAssemblyInstanceIndex != MAX_uint32 )
{
LODBounds = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].LODBounds;
ParentLODError = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].ParentLODError;
const FMatrix44f& Transform = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].Transform;
const float MaxScale = Transform.GetScaleVector().GetMax();
// Invert the transform to put back in assembly part local space.
LODBounds.Center = Transform.InverseTransformPosition( LODBounds.Center );
LODBounds.W /= MaxScale;
ParentLODError /= MaxScale;
check( ParentLODError >= 0.0f );
bOutIsInstanceDependent = true;
}
FHierarchyNode& HNode = HierarchyNodes[HNodeIndex];
HNode.Bounds[ChildIndex] = Part.Bounds;
HNode.LODBounds[ChildIndex] = LODBounds;
HNode.MinLODErrors[ChildIndex] = Part.MinLODError;
HNode.MaxParentLODErrors[ChildIndex] = ParentLODError;
HNode.ChildrenStartIndex[ChildIndex] = 0xFFFFFFFFu;
HNode.NumChildren[ChildIndex] = Part.Clusters.Num();
HNode.ClusterGroupPartIndex[ChildIndex] = ChildNode->PartIndex;
HNode.AssemblyTransformIndex[ChildIndex] = ChildNewAssemblyInstanceIndex;
check(HNode.NumChildren[ChildIndex] <= NANITE_MAX_CLUSTERS_PER_GROUP);
Part.HierarchyNodeRefs.Add(FHierarchyNodeRef{ HNodeIndex, ChildIndex });
}
else
{
// Hierarchy node
uint32 ChildHierarchyNodeIndex = ChildNode->HierarchyNodeIndex;
if (ChildHierarchyNodeIndex == MAX_uint32)
{
// Only recurse when it isn't cached
bool bChildIsInstanceDependent;
ChildHierarchyNodeIndex = BuildHierarchyRecursive(HierarchyNodes, Nodes, ClusterDAG, Parts, ChildAssemblyInstanceIndex, ChildNodeIndex, bChildIsInstanceDependent);
if (!bChildIsInstanceDependent)
{
// TODO: Caching the subhierarchy currently gives LOD artifacts at the transition to the mip tail because MaxParentLODError is unique per instance.
// Fix this in a follow-up commit.
Nodes[ChildNodeIndex].HierarchyNodeIndex = ChildHierarchyNodeIndex; // Deliberately don't reference ChildNode past recursive call
}
bOutIsInstanceDependent |= bChildIsInstanceDependent;
}
const FHierarchyNode& ChildHNode = HierarchyNodes[ChildHierarchyNodeIndex];
FBounds3f Bounds;
TArray< FSphere3f, TInlineAllocator<NANITE_MAX_BVH_NODE_FANOUT> > LODBoundSpheres;
float MinLODError = MAX_flt;
float MaxParentLODError = 0.0f;
for (uint32 GrandChildIndex = 0; GrandChildIndex < NANITE_MAX_BVH_NODE_FANOUT && ChildHNode.NumChildren[GrandChildIndex] != 0; GrandChildIndex++)
{
const uint32 GrandChildAssemblyTransformIndex = ChildHNode.AssemblyTransformIndex[GrandChildIndex];
float GrandChildMinLODError = ChildHNode.MinLODErrors[GrandChildIndex];
float GrandChildMaxParentLODError = ChildHNode.MaxParentLODErrors[GrandChildIndex];
if (GrandChildAssemblyTransformIndex != MAX_uint32)
{
const FMatrix44f& Transform = ClusterDAG.AssemblyInstanceData[GrandChildAssemblyTransformIndex].Transform;
const float MaxScale = Transform.GetScaleVector().GetMax();
Bounds += ChildHNode.Bounds[GrandChildIndex].TransformBy( Transform );
FSphere3f LODBounds = ChildHNode.LODBounds[GrandChildIndex];
LODBounds.Center = Transform.TransformPosition(LODBounds.Center);
LODBounds.W *= MaxScale;
GrandChildMinLODError *= MaxScale;
GrandChildMaxParentLODError *= MaxScale;
LODBoundSpheres.Add(LODBounds);
}
else
{
Bounds += ChildHNode.Bounds[GrandChildIndex];
LODBoundSpheres.Add(ChildHNode.LODBounds[GrandChildIndex]);
}
MinLODError = FMath::Min(MinLODError, GrandChildMinLODError);
MaxParentLODError = FMath::Max(MaxParentLODError, GrandChildMaxParentLODError);
}
FSphere3f LODBounds = FSphere3f(LODBoundSpheres.GetData(), LODBoundSpheres.Num());
FHierarchyNode& HNode = HierarchyNodes[HNodeIndex];
HNode.Bounds[ChildIndex] = Bounds;
HNode.LODBounds[ChildIndex] = LODBounds;
HNode.MinLODErrors[ChildIndex] = MinLODError;
HNode.MaxParentLODErrors[ChildIndex] = MaxParentLODError;
HNode.ChildrenStartIndex[ChildIndex] = ChildHierarchyNodeIndex;
HNode.NumChildren[ChildIndex] = NANITE_MAX_CLUSTERS_PER_GROUP;
HNode.ClusterGroupPartIndex[ChildIndex] = MAX_uint32;
HNode.AssemblyTransformIndex[ChildIndex] = ChildNewAssemblyInstanceIndex;
}
}
return HNodeIndex;
}
static float BVH_Cost(const TArray<FIntermediateNode>& Nodes, TArrayView<uint32> NodeIndices)
{
FBounds3f Bound;
for (uint32 NodeIndex : NodeIndices)
{
Bound += Nodes[NodeIndex].Bound;
}
return Bound.GetSurfaceArea();
}
static void BVH_SortNodes(const TArray<FIntermediateNode>& Nodes, TArrayView<uint32> NodeIndices, const TArray<uint32>& ChildSizes)
{
// Perform NANITE_MAX_BVH_NODE_FANOUT_BITS binary splits
for (uint32 Level = 0; Level < NANITE_MAX_BVH_NODE_FANOUT_BITS; Level++)
{
const uint32 NumBuckets = 1 << Level;
const uint32 NumChildrenPerBucket = NANITE_MAX_BVH_NODE_FANOUT >> Level;
const uint32 NumChildrenPerBucketHalf = NumChildrenPerBucket >> 1;
uint32 BucketStartIndex = 0;
for (uint32 BucketIndex = 0; BucketIndex < NumBuckets; BucketIndex++)
{
const uint32 FirstChild = NumChildrenPerBucket * BucketIndex;
uint32 Sizes[2] = {};
for (uint32 i = 0; i < NumChildrenPerBucketHalf; i++)
{
Sizes[0] += ChildSizes[FirstChild + i];
Sizes[1] += ChildSizes[FirstChild + i + NumChildrenPerBucketHalf];
}
TArrayView<uint32> NodeIndices01 = NodeIndices.Slice(BucketStartIndex, Sizes[0] + Sizes[1]);
TArrayView<uint32> NodeIndices0 = NodeIndices.Slice(BucketStartIndex, Sizes[0]);
TArrayView<uint32> NodeIndices1 = NodeIndices.Slice(BucketStartIndex + Sizes[0], Sizes[1]);
BucketStartIndex += Sizes[0] + Sizes[1];
auto SortByAxis = [&](uint32 AxisIndex)
{
if (AxisIndex == 0)
NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().X < Nodes[B].Bound.GetCenter().X; });
else if (AxisIndex == 1)
NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().Y < Nodes[B].Bound.GetCenter().Y; });
else if (AxisIndex == 2)
NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().Z < Nodes[B].Bound.GetCenter().Z; });
else
check(false);
};
float BestCost = MAX_flt;
uint32 BestAxisIndex = 0;
// Try sorting along different axes and pick the best one
const uint32 NumAxes = 3;
for (uint32 AxisIndex = 0; AxisIndex < NumAxes; AxisIndex++)
{
SortByAxis(AxisIndex);
float Cost = BVH_Cost(Nodes, NodeIndices0) + BVH_Cost(Nodes, NodeIndices1);
if (Cost < BestCost)
{
BestCost = Cost;
BestAxisIndex = AxisIndex;
}
}
// Resort if we the best one wasn't the last one
if (BestAxisIndex != NumAxes - 1)
{
SortByAxis(BestAxisIndex);
}
}
}
}
// Build hierarchy using a top-down splitting approach.
// WIP: So far it just focuses on minimizing worst-case tree depth/latency.
// It does this by building a complete tree with at most one partially filled level.
// At most one node is partially filled.
//TODO: Experiment with sweeping, even if it results in more total nodes and/or makes some paths slightly longer.
static uint32 BuildHierarchyTopDown(TArray<FIntermediateNode>& Nodes, TArrayView<uint32> NodeIndices, bool bSort)
{
const uint32 N = NodeIndices.Num();
if (N == 1)
{
return NodeIndices[0];
}
const uint32 NewRootIndex = Nodes.Num();
Nodes.AddDefaulted();
if (N <= NANITE_MAX_BVH_NODE_FANOUT)
{
Nodes[NewRootIndex].Children = NodeIndices;
return NewRootIndex;
}
// Where does the last (incomplete) level start
uint32 TopSize = NANITE_MAX_BVH_NODE_FANOUT;
while (TopSize * NANITE_MAX_BVH_NODE_FANOUT <= N)
{
TopSize *= NANITE_MAX_BVH_NODE_FANOUT;
}
const uint32 LargeChildSize = TopSize;
const uint32 SmallChildSize = TopSize / NANITE_MAX_BVH_NODE_FANOUT;
const uint32 MaxExcessPerChild = LargeChildSize - SmallChildSize;
TArray<uint32> ChildSizes;
ChildSizes.SetNum(NANITE_MAX_BVH_NODE_FANOUT);
uint32 Excess = N - TopSize;
for (int32 i = NANITE_MAX_BVH_NODE_FANOUT-1; i >= 0; i--)
{
const uint32 ChildExcess = FMath::Min(Excess, MaxExcessPerChild);
ChildSizes[i] = SmallChildSize + ChildExcess;
Excess -= ChildExcess;
}
check(Excess == 0);
if (bSort)
{
BVH_SortNodes(Nodes, NodeIndices, ChildSizes);
}
uint32 Offset = 0;
for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++)
{
uint32 ChildSize = ChildSizes[i];
uint32 NodeIndex = BuildHierarchyTopDown(Nodes, NodeIndices.Slice(Offset, ChildSize), bSort); // Needs to be separated from next statement with sequence point to order access to Nodes array.
Nodes[NewRootIndex].Children.Add(NodeIndex);
Offset += ChildSize;
}
return NewRootIndex;
}
static uint32 BuildHierarchyLODRoots(TArray<FIntermediateNode>& Nodes, TArrayView<uint32> NodeIndices, uint32 ChainLevels)
{
const uint32 N = NodeIndices.Num();
if (N == 1)
{
return NodeIndices[0];
}
if (N <= NANITE_MAX_BVH_NODE_FANOUT)
{
const uint32 NewRootIndex = Nodes.Num();
Nodes.AddDefaulted();
Nodes[NewRootIndex].Children = NodeIndices;
return NewRootIndex;
}
else
{
if (ChainLevels == 0)
{
return BuildHierarchyTopDown(Nodes, NodeIndices, false);
}
else
{
const uint32 NewRootIndex = Nodes.Num();
Nodes.AddDefaulted();
Nodes[NewRootIndex].Children = NodeIndices.Slice(NodeIndices.Num() - 3, 3); // Last 3 LOD levels directly
uint32 ChildNode = BuildHierarchyLODRoots(Nodes, NodeIndices.Slice(0, NodeIndices.Num() - 3), ChainLevels - 1);
Nodes[NewRootIndex].Children.Add(ChildNode);
return NewRootIndex;
}
}
}
static void CalculatePageHierarchyPartDepths(TArray<FPage>& Pages, const TArray<FClusterGroupPart>& Parts, const TArray<FHierarchyNode>& HierarchyNodes, uint32 NodeIndex, uint32 Depth)
{
const FHierarchyNode& Node = HierarchyNodes[NodeIndex];
for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++)
{
if (Node.NumChildren[i] > 0)
{
if (Node.ClusterGroupPartIndex[i] != MAX_uint32)
{
// Leaf node
const FClusterGroupPart& Part = Parts[Node.ClusterGroupPartIndex[i]];
uint32& MaxDepth = Pages[Part.PageIndex].MaxHierarchyPartDepth;
MaxDepth = FMath::Max(MaxDepth, Depth);
check(MaxDepth <= NANITE_MAX_CLUSTER_HIERARCHY_DEPTH);
}
else
{
CalculatePageHierarchyPartDepths(Pages, Parts, HierarchyNodes, Node.ChildrenStartIndex[i], Depth + 1);
}
}
}
}
// Update worst-case hierarchy depths for pages.
// These are used to determine how many node culling passes are needed at runtime.
void CalculateFinalPageHierarchyDepth(const FResources& Resources, TArray<FPage>& Pages)
{
// Calculate final MaxHierarchyDepth based hierarchy depth of the parts.
// Make sure all pages are reachable from at least one of its dependencies,
// otherwise we can potentially get into a situation where streaming can't progress any further.
for (uint32 PageIndex = 0; PageIndex < (uint32)Pages.Num(); PageIndex++)
{
Pages[PageIndex].MaxHierarchyDepth = FMath::Max(Pages[PageIndex].MaxHierarchyDepth, Pages[PageIndex].MaxHierarchyPartDepth);
const FPageStreamingState& PageStreamingState = Resources.PageStreamingStates[PageIndex];
// Determine max hierarchy depth of dependencies
uint32 DependencyMaxDepth = 0;
uint32 DependencyMaxDepthIndex = MAX_uint32;
for (uint32 i = 0; i < PageStreamingState.DependenciesNum; i++)
{
const uint32 DependencyPageIndex = Resources.PageDependencies[PageStreamingState.DependenciesStart + i];
check(DependencyPageIndex < PageIndex);
const uint32 DependencyPageDepth = Pages[DependencyPageIndex].MaxHierarchyDepth;
if (DependencyPageDepth >= DependencyMaxDepth) // >= so DependencyMaxDepthIndex also gets initialized for DependencyPageDepth=0
{
DependencyMaxDepth = DependencyPageDepth;
DependencyMaxDepthIndex = i;
}
}
// If page parts can't be reached, increase the depth of the dependency that already has the highest depth
if (DependencyMaxDepthIndex != MAX_uint32 && DependencyMaxDepth < Pages[PageIndex].MaxHierarchyPartDepth)
{
const uint32 DependencyPageIndex = Resources.PageDependencies[PageStreamingState.DependenciesStart + DependencyMaxDepthIndex];
Pages[DependencyPageIndex].MaxHierarchyDepth = Pages[PageIndex].MaxHierarchyPartDepth;
}
}
}
// TODO: Do a cleaner and shared implementation?
static FFloat16 Float32ToFloat16Floor(float Value)
{
FFloat16 ValueF16 = FFloat16(Value).Encoded;
while(ValueF16.GetFloat() > Value && ValueF16.Encoded != 0xFFFFu) // This should only loop once or maybe twice under normal circumstances
{
if (ValueF16.Encoded == 0u)
ValueF16.Encoded = 0x8000u;
else
ValueF16.Encoded += (ValueF16.Encoded & 0x8000u) ? 1 : -1;
}
return ValueF16;
}
static void PackHierarchyNode(FPackedHierarchyNode& OutNode, const FHierarchyNode& InNode, const TArray<FClusterGroup>& Groups, const TArray<FClusterGroupPart>& GroupParts, const uint32 NumResourceRootPages)
{
static_assert(NANITE_MAX_RESOURCE_PAGES_BITS + NANITE_MAX_CLUSTERS_PER_GROUP_BITS + NANITE_MAX_GROUP_PARTS_BITS <= 32, "");
for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++)
{
OutNode.LODBounds[i] = FVector4f(InNode.LODBounds[i].Center, InNode.LODBounds[i].W);
const FBounds3f& Bounds = InNode.Bounds[i];
OutNode.Misc0[i].BoxBoundsCenter = Bounds.GetCenter();
OutNode.Misc1[i].BoxBoundsExtent = Bounds.GetExtent();
check(InNode.NumChildren[i] <= NANITE_MAX_CLUSTERS_PER_GROUP);
OutNode.Misc0[i].MinLODError_MaxParentLODError = FFloat16( InNode.MaxParentLODErrors[i] ).Encoded | ( Float32ToFloat16Floor(InNode.MinLODErrors[i]).Encoded << 16 );
OutNode.Misc1[i].ChildStartReference = InNode.ChildrenStartIndex[i];
uint32 ResourcePageRangeKey = NANITE_PAGE_RANGE_KEY_EMPTY_RANGE;
uint32 GroupPartSize = 0;
if( InNode.NumChildren[ i ] > 0 )
{
if( InNode.ClusterGroupPartIndex[ i ] != MAX_uint32 )
{
// Leaf node
const FClusterGroup& Group = Groups[ GroupParts[ InNode.ClusterGroupPartIndex[ i ] ].GroupIndex ];
ResourcePageRangeKey = Group.PageRangeKey.Value;
GroupPartSize = InNode.NumChildren[ i ];
}
else
{
// Hierarchy node. No resource pages or group size.
ResourcePageRangeKey = 0xFFFFFFFFu;
}
}
OutNode.Misc2[ i ].ResourcePageRangeKey = ResourcePageRangeKey;
OutNode.Misc2[ i ].GroupPartSize_AssemblyPartIndex =
(InNode.AssemblyTransformIndex[i] & NANITE_HIERARCHY_MAX_ASSEMBLY_TRANSFORMS) |
(GroupPartSize << NANITE_HIERARCHY_ASSEMBLY_TRANSFORM_INDEX_BITS);
}
}
static uint32 BuildHierarchy(
TArray<FIntermediateNode>& Nodes,
const FClusterDAG& ClusterDAG,
const TArray<FClusterGroupPart>& Parts,
const TArray<uint32>& AssemblyRoots,
const TArray<uint32>& PartIndices,
bool bAssemblyPart)
{
const TArray<FClusterGroup>& Groups = ClusterDAG.Groups;
const uint32 NumGroupParts = (uint32)PartIndices.Num();
const uint32 NumAssemblyParts = (uint32)ClusterDAG.AssemblyPartData.Num();
const uint32 NumAssemblyInstances = (uint32)ClusterDAG.AssemblyInstanceData.Num();
check(AssemblyRoots.Num() == ClusterDAG.AssemblyPartData.Num());
int32 MaxMipLevel = 0;
for(const uint32 PartIndex : PartIndices)
{
const FClusterGroupPart& Part = Parts[PartIndex];
const FClusterGroup& Group = Groups[Part.GroupIndex];
MaxMipLevel = FMath::Max(MaxMipLevel, Group.MipLevel);
}
MaxMipLevel++;
const uint32 AssemblyMipLevel = 0;
const uint32 FirstNodeIndex = (uint32)Nodes.Num();
// Build leaf nodes for each LOD level of the mesh
TArray<TArray<uint32>> NodesByMip;
NodesByMip.SetNum(MaxMipLevel + 1);
{
Nodes.AddDefaulted(NumGroupParts);
for (uint32 i = 0; i < NumGroupParts; i++)
{
const uint32 PartIndex = PartIndices[i];
const FClusterGroupPart& Part = Parts[PartIndex];
const FClusterGroup& Group = Groups[Part.GroupIndex];
const uint32 NodeIndex = FirstNodeIndex + i;
FIntermediateNode& Node = Nodes[NodeIndex];
Node.Bound = Part.Bounds;
Node.PartIndex = PartIndex;
Node.MipLevel = Group.MipLevel + 1;
NodesByMip[Group.MipLevel + 1].Add(NodeIndex);
}
}
if(!bAssemblyPart)
{
Nodes.Reserve(Nodes.Num() + NumAssemblyInstances);
for (uint32 i = 0; i < NumAssemblyParts; i++)
{
const FAssemblyPartData& PartData = ClusterDAG.AssemblyPartData[i];
const uint32 AssemblyRootIndex = AssemblyRoots[i];
if (AssemblyRootIndex == MAX_uint32)
continue;
for (uint32 j = 0; j < PartData.NumInstances; j++)
{
const uint32 InstanceIndex = PartData.FirstInstance + j;
NodesByMip[AssemblyMipLevel].Add(Nodes.Num());
FIntermediateNode& Node = Nodes.AddDefaulted_GetRef();
Node.Bound = Nodes[AssemblyRootIndex].Bound.TransformBy( ClusterDAG.AssemblyInstanceData[InstanceIndex].Transform );
Node.AssemblyRootIndex = AssemblyRootIndex;
Node.NewAssemblyInstanceIndex = InstanceIndex;
Node.MipLevel = AssemblyMipLevel;
}
}
}
const uint32 NumNewNodes = (uint32)Nodes.Num() - FirstNodeIndex;
uint32 RootIndex = MAX_uint32;
if (NumNewNodes == 0)
{
if (!bAssemblyPart)
{
RootIndex = (uint32)Nodes.Num();
Nodes.AddDefaulted();
}
}
else if (NumNewNodes == 1)
{
if (!bAssemblyPart)
{
FIntermediateNode& Node = Nodes.AddDefaulted_GetRef();
Node.Children.Add( uint32 ( Nodes.Num() - 2 ) );
Node.Bound = Nodes[0].Bound;
}
RootIndex = uint32( Nodes.Num() - 1 );
}
else
{
// Build hierarchy:
// Nanite meshes contain cluster data for many levels of detail. Clusters from different levels
// of detail can vary wildly in size, which can already be challenge for building a good hierarchy.
// Apart from the visibility bounds, the hierarchy also tracks conservative LOD error metrics for the child nodes.
// The runtime traversal descends into children as long as they are visible and the conservative LOD error is not
// more detailed than what we are looking for. We have to be very careful when mixing clusters from different LODs
// as less detailed clusters can easily end up bloating both bounds and error metrics.
// We have experimented with a bunch of mixed LOD approached, but currently, it seems, building separate hierarchies
// for each LOD level and then building a hierarchy of those hierarchies gives the best and most predictable results.
// TODO: The roots of these hierarchies all share the same visibility and LOD bounds, or at least close enough that we could
// make a shared conservative bound without losing much. This makes a lot of the work around the root node fairly
// redundant. Perhaps we should consider evaluating a shared root during instance cull instead and enable/disable
// the per-level hierarchies based on 1D range tests for LOD error.
TArray<uint32> LevelRoots;
for (int32 MipLevel = 0; MipLevel <= MaxMipLevel; MipLevel++)
{
if (NodesByMip[MipLevel].Num() > 0)
{
// Build a hierarchy for the mip level
uint32 NodeIndex = BuildHierarchyTopDown(Nodes, NodesByMip[MipLevel], true);
if (Nodes[NodeIndex].IsLeaf() || Nodes[NodeIndex].Children.Num() == NANITE_MAX_BVH_NODE_FANOUT)
{
// Leaf or filled node. Just add it.
LevelRoots.Add(NodeIndex);
}
else
{
// Incomplete node. Discard the node and add the children as roots instead.
LevelRoots.Append(Nodes[NodeIndex].Children);
}
}
}
// Build top hierarchy. A hierarchy of MIP hierarchies.
RootIndex = BuildHierarchyLODRoots(Nodes, LevelRoots, 1);
}
return RootIndex;
}
void BuildHierarchies(
FResources& Resources,
const FClusterDAG& ClusterDAG,
TArray<FPage>& Pages,
TArray<FClusterGroupPart>& Parts,
uint32 NumMeshes)
{
TArray<TArray<uint32>> RootMeshGroupParts;
RootMeshGroupParts.SetNum(NumMeshes);
const uint32 NumAssemblyParts = (uint32)ClusterDAG.AssemblyPartData.Num();
TArray<TArray<uint32>> AssemblyGroupParts;
AssemblyGroupParts.SetNum(NumAssemblyParts);
TArray<uint32> AssemblyRoots;
AssemblyRoots.SetNum(NumAssemblyParts);
// Assign group part instances to their assembly part or a mesh root
const uint32 NumTotalGroupParts = (uint32)Parts.Num();
for (uint32 GroupPartIndex = 0; GroupPartIndex < NumTotalGroupParts; GroupPartIndex++)
{
const FClusterGroup& Group = ClusterDAG.Groups[Parts[GroupPartIndex].GroupIndex];
if (Group.AssemblyPartIndex != MAX_uint32)
{
AssemblyGroupParts[Group.AssemblyPartIndex].Add(GroupPartIndex);
}
else
{
RootMeshGroupParts[Group.MeshIndex].Add(GroupPartIndex);
}
}
TArray<FIntermediateNode> Nodes;
for (uint32 AssemblyPartIndex = 0; AssemblyPartIndex < NumAssemblyParts; AssemblyPartIndex++)
{
const TArray<uint32>& GroupParts = AssemblyGroupParts[AssemblyPartIndex];
uint32 RootIndex = MAX_uint32;
if (GroupParts.Num() > 0)
{
RootIndex = BuildHierarchy(Nodes, ClusterDAG, Parts, AssemblyRoots, GroupParts, true);
}
AssemblyRoots[AssemblyPartIndex] = RootIndex;
}
for (uint32 MeshIndex = 0; MeshIndex < NumMeshes; MeshIndex++)
{
const uint32 RootIndex = BuildHierarchy(Nodes, ClusterDAG, Parts, AssemblyRoots, RootMeshGroupParts[MeshIndex], false);
TArray<FHierarchyNode> HierarchyNodes;
bool bIsInstanceDependent;
BuildHierarchyRecursive(HierarchyNodes, Nodes, ClusterDAG, Parts, MAX_uint32, RootIndex, bIsInstanceDependent);
CalculatePageHierarchyPartDepths(Pages, Parts, HierarchyNodes, 0, 0);
#if BVH_BUILD_WRITE_GRAPHVIZ
WriteDotGraph(HierarchyNodes);
#endif
// Convert hierarchy to packed format
const uint32 NumHierarchyNodes = HierarchyNodes.Num();
const uint32 PackedBaseIndex = Resources.HierarchyNodes.Num();
Resources.HierarchyRootOffsets.Add(PackedBaseIndex);
Resources.HierarchyNodes.AddDefaulted(NumHierarchyNodes);
for (uint32 i = 0; i < NumHierarchyNodes; i++)
{
PackHierarchyNode(Resources.HierarchyNodes[PackedBaseIndex + i], HierarchyNodes[i], ClusterDAG.Groups, Parts, Resources.NumRootPages);
}
}
}
} // namespace Nanite