// Copyright Epic Games, Inc. All Rights Reserved. #include "NaniteEncodeHierarchy.h" #include "Math/UnrealMath.h" #include "Cluster.h" #include "ClusterDAG.h" #include "NaniteEncodeShared.h" #define BVH_BUILD_WRITE_GRAPHVIZ 0 namespace Nanite { struct FIntermediateNode { uint32 PartIndex = MAX_uint32; uint32 AssemblyRootIndex = MAX_uint32; uint32 NewAssemblyInstanceIndex = MAX_uint32; // To be able to share subtrees this is only set when the instance changes. uint32 HierarchyNodeIndex = MAX_uint32; uint32 MipLevel = MAX_int32; FBounds3f Bound; TArray< uint32 > Children; bool IsLeaf() const { return PartIndex != MAX_uint32; } }; struct FHierarchyNode { FSphere3f LODBounds[NANITE_MAX_BVH_NODE_FANOUT]; FBounds3f Bounds[NANITE_MAX_BVH_NODE_FANOUT]; float MinLODErrors[NANITE_MAX_BVH_NODE_FANOUT]; float MaxParentLODErrors[NANITE_MAX_BVH_NODE_FANOUT]; uint32 ChildrenStartIndex[NANITE_MAX_BVH_NODE_FANOUT]; uint32 NumChildren[NANITE_MAX_BVH_NODE_FANOUT]; uint32 ClusterGroupPartIndex[NANITE_MAX_BVH_NODE_FANOUT]; uint32 AssemblyTransformIndex[NANITE_MAX_BVH_NODE_FANOUT]; }; static void WriteDotGraph(const TArray& Nodes) { auto IsEnabled = [](const FHierarchyNode& Node, uint32 ChildIndex) -> bool { return Node.NumChildren[ChildIndex] != 0u; }; auto IsLeaf = [](const FHierarchyNode& Node, uint32 ChildIndex) -> bool { return Node.ClusterGroupPartIndex[ChildIndex] != MAX_uint32; }; FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("digraph {\n")); FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("node[shape = record]\n")); TSet ReferencedNodes; const uint32 NumNodes = Nodes.Num(); for (uint32 NodeIndex = 0; NodeIndex < NumNodes; NodeIndex++) { const FHierarchyNode& Node = Nodes[NodeIndex]; for (uint32 ChildIndex = 0; ChildIndex < NANITE_MAX_BVH_NODE_FANOUT; ChildIndex++) { if (IsEnabled(Node, ChildIndex) && !IsLeaf(Node, ChildIndex)) { const uint32 ChildNodeIndex = Node.ChildrenStartIndex[ChildIndex]; FGenericPlatformMisc::LowLevelOutputDebugStringf(TEXT("n%d->n%d\n"), NodeIndex, ChildNodeIndex); ReferencedNodes.Add(NodeIndex); ReferencedNodes.Add(ChildNodeIndex); } } } for (uint32 NodeIndex : ReferencedNodes) { const FHierarchyNode& Node = Nodes[NodeIndex]; FString Str = FString::Printf(TEXT("n%d [label=\""), NodeIndex); for (uint32 ChildIndex = 0; ChildIndex < NANITE_MAX_BVH_NODE_FANOUT; ChildIndex++) { if (IsEnabled(Node, ChildIndex)) { if (ChildIndex != 0) Str += TEXT("|"); Str += FString::Printf(TEXT("{%.2f|%.2f}"), Node.MaxParentLODErrors[ChildIndex], ChildIndex, Node.LODBounds[ChildIndex].W ); } } Str += TEXT("\"]\n"); FGenericPlatformMisc::LowLevelOutputDebugStringf(*Str); } FGenericPlatformMisc::LowLevelOutputDebugString(TEXT("\n}\n")); } static uint32 BuildHierarchyRecursive( TArray& HierarchyNodes, TArray& Nodes, const FClusterDAG& ClusterDAG, TArray& Parts, uint32 AssemblyInstanceIndex, uint32 CurrentNodeIndex, bool& bOutIsInstanceDependent //TODO: Temporary hack to make sure hierarchy nodes for root groups are not instanced, so their unique ParentErrors are respected. ) { const FIntermediateNode& INode = Nodes[ CurrentNodeIndex ]; check( INode.PartIndex == MAX_uint32 ); check( !INode.IsLeaf() ); uint32 HNodeIndex = HierarchyNodes.Num(); HierarchyNodes.AddZeroed(); bOutIsInstanceDependent = false; uint32 NumChildren = INode.Children.Num(); check(NumChildren <= NANITE_MAX_BVH_NODE_FANOUT); for( uint32 ChildIndex = 0; ChildIndex < NumChildren; ChildIndex++ ) { uint32 ChildNodeIndex = INode.Children[ ChildIndex ]; const FIntermediateNode* ChildNode = &Nodes[ ChildNodeIndex ]; const uint32 ChildNewAssemblyInstanceIndex = ChildNode->NewAssemblyInstanceIndex; uint32 ChildAssemblyInstanceIndex = AssemblyInstanceIndex; if( ChildNode->AssemblyRootIndex != MAX_uint32 ) { // Forward assembly ref to real node ChildNodeIndex = ChildNode->AssemblyRootIndex; ChildNode = &Nodes[ ChildNodeIndex ]; check( ChildNode->AssemblyRootIndex == MAX_uint32 ); ChildAssemblyInstanceIndex = ChildNewAssemblyInstanceIndex; } if( ChildNode->IsLeaf() ) { // Cluster Group FClusterGroupPart& Part = Parts[ChildNode->PartIndex]; const FClusterGroup& Group = ClusterDAG.Groups[Part.GroupIndex]; FSphere3f LODBounds = Group.LODBounds; float ParentLODError = Group.ParentLODError; if( Group.bRoot && ChildAssemblyInstanceIndex != MAX_uint32 ) { LODBounds = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].LODBounds; ParentLODError = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].ParentLODError; const FMatrix44f& Transform = ClusterDAG.AssemblyInstanceData[ ChildAssemblyInstanceIndex ].Transform; const float MaxScale = Transform.GetScaleVector().GetMax(); // Invert the transform to put back in assembly part local space. LODBounds.Center = Transform.InverseTransformPosition( LODBounds.Center ); LODBounds.W /= MaxScale; ParentLODError /= MaxScale; check( ParentLODError >= 0.0f ); bOutIsInstanceDependent = true; } FHierarchyNode& HNode = HierarchyNodes[HNodeIndex]; HNode.Bounds[ChildIndex] = Part.Bounds; HNode.LODBounds[ChildIndex] = LODBounds; HNode.MinLODErrors[ChildIndex] = Part.MinLODError; HNode.MaxParentLODErrors[ChildIndex] = ParentLODError; HNode.ChildrenStartIndex[ChildIndex] = 0xFFFFFFFFu; HNode.NumChildren[ChildIndex] = Part.Clusters.Num(); HNode.ClusterGroupPartIndex[ChildIndex] = ChildNode->PartIndex; HNode.AssemblyTransformIndex[ChildIndex] = ChildNewAssemblyInstanceIndex; check(HNode.NumChildren[ChildIndex] <= NANITE_MAX_CLUSTERS_PER_GROUP); Part.HierarchyNodeRefs.Add(FHierarchyNodeRef{ HNodeIndex, ChildIndex }); } else { // Hierarchy node uint32 ChildHierarchyNodeIndex = ChildNode->HierarchyNodeIndex; if (ChildHierarchyNodeIndex == MAX_uint32) { // Only recurse when it isn't cached bool bChildIsInstanceDependent; ChildHierarchyNodeIndex = BuildHierarchyRecursive(HierarchyNodes, Nodes, ClusterDAG, Parts, ChildAssemblyInstanceIndex, ChildNodeIndex, bChildIsInstanceDependent); if (!bChildIsInstanceDependent) { // TODO: Caching the subhierarchy currently gives LOD artifacts at the transition to the mip tail because MaxParentLODError is unique per instance. // Fix this in a follow-up commit. Nodes[ChildNodeIndex].HierarchyNodeIndex = ChildHierarchyNodeIndex; // Deliberately don't reference ChildNode past recursive call } bOutIsInstanceDependent |= bChildIsInstanceDependent; } const FHierarchyNode& ChildHNode = HierarchyNodes[ChildHierarchyNodeIndex]; FBounds3f Bounds; TArray< FSphere3f, TInlineAllocator > LODBoundSpheres; float MinLODError = MAX_flt; float MaxParentLODError = 0.0f; for (uint32 GrandChildIndex = 0; GrandChildIndex < NANITE_MAX_BVH_NODE_FANOUT && ChildHNode.NumChildren[GrandChildIndex] != 0; GrandChildIndex++) { const uint32 GrandChildAssemblyTransformIndex = ChildHNode.AssemblyTransformIndex[GrandChildIndex]; float GrandChildMinLODError = ChildHNode.MinLODErrors[GrandChildIndex]; float GrandChildMaxParentLODError = ChildHNode.MaxParentLODErrors[GrandChildIndex]; if (GrandChildAssemblyTransformIndex != MAX_uint32) { const FMatrix44f& Transform = ClusterDAG.AssemblyInstanceData[GrandChildAssemblyTransformIndex].Transform; const float MaxScale = Transform.GetScaleVector().GetMax(); Bounds += ChildHNode.Bounds[GrandChildIndex].TransformBy( Transform ); FSphere3f LODBounds = ChildHNode.LODBounds[GrandChildIndex]; LODBounds.Center = Transform.TransformPosition(LODBounds.Center); LODBounds.W *= MaxScale; GrandChildMinLODError *= MaxScale; GrandChildMaxParentLODError *= MaxScale; LODBoundSpheres.Add(LODBounds); } else { Bounds += ChildHNode.Bounds[GrandChildIndex]; LODBoundSpheres.Add(ChildHNode.LODBounds[GrandChildIndex]); } MinLODError = FMath::Min(MinLODError, GrandChildMinLODError); MaxParentLODError = FMath::Max(MaxParentLODError, GrandChildMaxParentLODError); } FSphere3f LODBounds = FSphere3f(LODBoundSpheres.GetData(), LODBoundSpheres.Num()); FHierarchyNode& HNode = HierarchyNodes[HNodeIndex]; HNode.Bounds[ChildIndex] = Bounds; HNode.LODBounds[ChildIndex] = LODBounds; HNode.MinLODErrors[ChildIndex] = MinLODError; HNode.MaxParentLODErrors[ChildIndex] = MaxParentLODError; HNode.ChildrenStartIndex[ChildIndex] = ChildHierarchyNodeIndex; HNode.NumChildren[ChildIndex] = NANITE_MAX_CLUSTERS_PER_GROUP; HNode.ClusterGroupPartIndex[ChildIndex] = MAX_uint32; HNode.AssemblyTransformIndex[ChildIndex] = ChildNewAssemblyInstanceIndex; } } return HNodeIndex; } static float BVH_Cost(const TArray& Nodes, TArrayView NodeIndices) { FBounds3f Bound; for (uint32 NodeIndex : NodeIndices) { Bound += Nodes[NodeIndex].Bound; } return Bound.GetSurfaceArea(); } static void BVH_SortNodes(const TArray& Nodes, TArrayView NodeIndices, const TArray& ChildSizes) { // Perform NANITE_MAX_BVH_NODE_FANOUT_BITS binary splits for (uint32 Level = 0; Level < NANITE_MAX_BVH_NODE_FANOUT_BITS; Level++) { const uint32 NumBuckets = 1 << Level; const uint32 NumChildrenPerBucket = NANITE_MAX_BVH_NODE_FANOUT >> Level; const uint32 NumChildrenPerBucketHalf = NumChildrenPerBucket >> 1; uint32 BucketStartIndex = 0; for (uint32 BucketIndex = 0; BucketIndex < NumBuckets; BucketIndex++) { const uint32 FirstChild = NumChildrenPerBucket * BucketIndex; uint32 Sizes[2] = {}; for (uint32 i = 0; i < NumChildrenPerBucketHalf; i++) { Sizes[0] += ChildSizes[FirstChild + i]; Sizes[1] += ChildSizes[FirstChild + i + NumChildrenPerBucketHalf]; } TArrayView NodeIndices01 = NodeIndices.Slice(BucketStartIndex, Sizes[0] + Sizes[1]); TArrayView NodeIndices0 = NodeIndices.Slice(BucketStartIndex, Sizes[0]); TArrayView NodeIndices1 = NodeIndices.Slice(BucketStartIndex + Sizes[0], Sizes[1]); BucketStartIndex += Sizes[0] + Sizes[1]; auto SortByAxis = [&](uint32 AxisIndex) { if (AxisIndex == 0) NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().X < Nodes[B].Bound.GetCenter().X; }); else if (AxisIndex == 1) NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().Y < Nodes[B].Bound.GetCenter().Y; }); else if (AxisIndex == 2) NodeIndices01.Sort([&Nodes](uint32 A, uint32 B) { return Nodes[A].Bound.GetCenter().Z < Nodes[B].Bound.GetCenter().Z; }); else check(false); }; float BestCost = MAX_flt; uint32 BestAxisIndex = 0; // Try sorting along different axes and pick the best one const uint32 NumAxes = 3; for (uint32 AxisIndex = 0; AxisIndex < NumAxes; AxisIndex++) { SortByAxis(AxisIndex); float Cost = BVH_Cost(Nodes, NodeIndices0) + BVH_Cost(Nodes, NodeIndices1); if (Cost < BestCost) { BestCost = Cost; BestAxisIndex = AxisIndex; } } // Resort if we the best one wasn't the last one if (BestAxisIndex != NumAxes - 1) { SortByAxis(BestAxisIndex); } } } } // Build hierarchy using a top-down splitting approach. // WIP: So far it just focuses on minimizing worst-case tree depth/latency. // It does this by building a complete tree with at most one partially filled level. // At most one node is partially filled. //TODO: Experiment with sweeping, even if it results in more total nodes and/or makes some paths slightly longer. static uint32 BuildHierarchyTopDown(TArray& Nodes, TArrayView NodeIndices, bool bSort) { const uint32 N = NodeIndices.Num(); if (N == 1) { return NodeIndices[0]; } const uint32 NewRootIndex = Nodes.Num(); Nodes.AddDefaulted(); if (N <= NANITE_MAX_BVH_NODE_FANOUT) { Nodes[NewRootIndex].Children = NodeIndices; return NewRootIndex; } // Where does the last (incomplete) level start uint32 TopSize = NANITE_MAX_BVH_NODE_FANOUT; while (TopSize * NANITE_MAX_BVH_NODE_FANOUT <= N) { TopSize *= NANITE_MAX_BVH_NODE_FANOUT; } const uint32 LargeChildSize = TopSize; const uint32 SmallChildSize = TopSize / NANITE_MAX_BVH_NODE_FANOUT; const uint32 MaxExcessPerChild = LargeChildSize - SmallChildSize; TArray ChildSizes; ChildSizes.SetNum(NANITE_MAX_BVH_NODE_FANOUT); uint32 Excess = N - TopSize; for (int32 i = NANITE_MAX_BVH_NODE_FANOUT-1; i >= 0; i--) { const uint32 ChildExcess = FMath::Min(Excess, MaxExcessPerChild); ChildSizes[i] = SmallChildSize + ChildExcess; Excess -= ChildExcess; } check(Excess == 0); if (bSort) { BVH_SortNodes(Nodes, NodeIndices, ChildSizes); } uint32 Offset = 0; for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++) { uint32 ChildSize = ChildSizes[i]; uint32 NodeIndex = BuildHierarchyTopDown(Nodes, NodeIndices.Slice(Offset, ChildSize), bSort); // Needs to be separated from next statement with sequence point to order access to Nodes array. Nodes[NewRootIndex].Children.Add(NodeIndex); Offset += ChildSize; } return NewRootIndex; } static uint32 BuildHierarchyLODRoots(TArray& Nodes, TArrayView NodeIndices, uint32 ChainLevels) { const uint32 N = NodeIndices.Num(); if (N == 1) { return NodeIndices[0]; } if (N <= NANITE_MAX_BVH_NODE_FANOUT) { const uint32 NewRootIndex = Nodes.Num(); Nodes.AddDefaulted(); Nodes[NewRootIndex].Children = NodeIndices; return NewRootIndex; } else { if (ChainLevels == 0) { return BuildHierarchyTopDown(Nodes, NodeIndices, false); } else { const uint32 NewRootIndex = Nodes.Num(); Nodes.AddDefaulted(); Nodes[NewRootIndex].Children = NodeIndices.Slice(NodeIndices.Num() - 3, 3); // Last 3 LOD levels directly uint32 ChildNode = BuildHierarchyLODRoots(Nodes, NodeIndices.Slice(0, NodeIndices.Num() - 3), ChainLevels - 1); Nodes[NewRootIndex].Children.Add(ChildNode); return NewRootIndex; } } } static void CalculatePageHierarchyPartDepths(TArray& Pages, const TArray& Parts, const TArray& HierarchyNodes, uint32 NodeIndex, uint32 Depth) { const FHierarchyNode& Node = HierarchyNodes[NodeIndex]; for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++) { if (Node.NumChildren[i] > 0) { if (Node.ClusterGroupPartIndex[i] != MAX_uint32) { // Leaf node const FClusterGroupPart& Part = Parts[Node.ClusterGroupPartIndex[i]]; uint32& MaxDepth = Pages[Part.PageIndex].MaxHierarchyPartDepth; MaxDepth = FMath::Max(MaxDepth, Depth); check(MaxDepth <= NANITE_MAX_CLUSTER_HIERARCHY_DEPTH); } else { CalculatePageHierarchyPartDepths(Pages, Parts, HierarchyNodes, Node.ChildrenStartIndex[i], Depth + 1); } } } } // Update worst-case hierarchy depths for pages. // These are used to determine how many node culling passes are needed at runtime. void CalculateFinalPageHierarchyDepth(const FResources& Resources, TArray& Pages) { // Calculate final MaxHierarchyDepth based hierarchy depth of the parts. // Make sure all pages are reachable from at least one of its dependencies, // otherwise we can potentially get into a situation where streaming can't progress any further. for (uint32 PageIndex = 0; PageIndex < (uint32)Pages.Num(); PageIndex++) { Pages[PageIndex].MaxHierarchyDepth = FMath::Max(Pages[PageIndex].MaxHierarchyDepth, Pages[PageIndex].MaxHierarchyPartDepth); const FPageStreamingState& PageStreamingState = Resources.PageStreamingStates[PageIndex]; // Determine max hierarchy depth of dependencies uint32 DependencyMaxDepth = 0; uint32 DependencyMaxDepthIndex = MAX_uint32; for (uint32 i = 0; i < PageStreamingState.DependenciesNum; i++) { const uint32 DependencyPageIndex = Resources.PageDependencies[PageStreamingState.DependenciesStart + i]; check(DependencyPageIndex < PageIndex); const uint32 DependencyPageDepth = Pages[DependencyPageIndex].MaxHierarchyDepth; if (DependencyPageDepth >= DependencyMaxDepth) // >= so DependencyMaxDepthIndex also gets initialized for DependencyPageDepth=0 { DependencyMaxDepth = DependencyPageDepth; DependencyMaxDepthIndex = i; } } // If page parts can't be reached, increase the depth of the dependency that already has the highest depth if (DependencyMaxDepthIndex != MAX_uint32 && DependencyMaxDepth < Pages[PageIndex].MaxHierarchyPartDepth) { const uint32 DependencyPageIndex = Resources.PageDependencies[PageStreamingState.DependenciesStart + DependencyMaxDepthIndex]; Pages[DependencyPageIndex].MaxHierarchyDepth = Pages[PageIndex].MaxHierarchyPartDepth; } } } // TODO: Do a cleaner and shared implementation? static FFloat16 Float32ToFloat16Floor(float Value) { FFloat16 ValueF16 = FFloat16(Value).Encoded; while(ValueF16.GetFloat() > Value && ValueF16.Encoded != 0xFFFFu) // This should only loop once or maybe twice under normal circumstances { if (ValueF16.Encoded == 0u) ValueF16.Encoded = 0x8000u; else ValueF16.Encoded += (ValueF16.Encoded & 0x8000u) ? 1 : -1; } return ValueF16; } static void PackHierarchyNode(FPackedHierarchyNode& OutNode, const FHierarchyNode& InNode, const TArray& Groups, const TArray& GroupParts, const uint32 NumResourceRootPages) { static_assert(NANITE_MAX_RESOURCE_PAGES_BITS + NANITE_MAX_CLUSTERS_PER_GROUP_BITS + NANITE_MAX_GROUP_PARTS_BITS <= 32, ""); for (uint32 i = 0; i < NANITE_MAX_BVH_NODE_FANOUT; i++) { OutNode.LODBounds[i] = FVector4f(InNode.LODBounds[i].Center, InNode.LODBounds[i].W); const FBounds3f& Bounds = InNode.Bounds[i]; OutNode.Misc0[i].BoxBoundsCenter = Bounds.GetCenter(); OutNode.Misc1[i].BoxBoundsExtent = Bounds.GetExtent(); check(InNode.NumChildren[i] <= NANITE_MAX_CLUSTERS_PER_GROUP); OutNode.Misc0[i].MinLODError_MaxParentLODError = FFloat16( InNode.MaxParentLODErrors[i] ).Encoded | ( Float32ToFloat16Floor(InNode.MinLODErrors[i]).Encoded << 16 ); OutNode.Misc1[i].ChildStartReference = InNode.ChildrenStartIndex[i]; uint32 ResourcePageRangeKey = NANITE_PAGE_RANGE_KEY_EMPTY_RANGE; uint32 GroupPartSize = 0; if( InNode.NumChildren[ i ] > 0 ) { if( InNode.ClusterGroupPartIndex[ i ] != MAX_uint32 ) { // Leaf node const FClusterGroup& Group = Groups[ GroupParts[ InNode.ClusterGroupPartIndex[ i ] ].GroupIndex ]; ResourcePageRangeKey = Group.PageRangeKey.Value; GroupPartSize = InNode.NumChildren[ i ]; } else { // Hierarchy node. No resource pages or group size. ResourcePageRangeKey = 0xFFFFFFFFu; } } OutNode.Misc2[ i ].ResourcePageRangeKey = ResourcePageRangeKey; OutNode.Misc2[ i ].GroupPartSize_AssemblyPartIndex = (InNode.AssemblyTransformIndex[i] & NANITE_HIERARCHY_MAX_ASSEMBLY_TRANSFORMS) | (GroupPartSize << NANITE_HIERARCHY_ASSEMBLY_TRANSFORM_INDEX_BITS); } } static uint32 BuildHierarchy( TArray& Nodes, const FClusterDAG& ClusterDAG, const TArray& Parts, const TArray& AssemblyRoots, const TArray& PartIndices, bool bAssemblyPart) { const TArray& Groups = ClusterDAG.Groups; const uint32 NumGroupParts = (uint32)PartIndices.Num(); const uint32 NumAssemblyParts = (uint32)ClusterDAG.AssemblyPartData.Num(); const uint32 NumAssemblyInstances = (uint32)ClusterDAG.AssemblyInstanceData.Num(); check(AssemblyRoots.Num() == ClusterDAG.AssemblyPartData.Num()); int32 MaxMipLevel = 0; for(const uint32 PartIndex : PartIndices) { const FClusterGroupPart& Part = Parts[PartIndex]; const FClusterGroup& Group = Groups[Part.GroupIndex]; MaxMipLevel = FMath::Max(MaxMipLevel, Group.MipLevel); } MaxMipLevel++; const uint32 AssemblyMipLevel = 0; const uint32 FirstNodeIndex = (uint32)Nodes.Num(); // Build leaf nodes for each LOD level of the mesh TArray> NodesByMip; NodesByMip.SetNum(MaxMipLevel + 1); { Nodes.AddDefaulted(NumGroupParts); for (uint32 i = 0; i < NumGroupParts; i++) { const uint32 PartIndex = PartIndices[i]; const FClusterGroupPart& Part = Parts[PartIndex]; const FClusterGroup& Group = Groups[Part.GroupIndex]; const uint32 NodeIndex = FirstNodeIndex + i; FIntermediateNode& Node = Nodes[NodeIndex]; Node.Bound = Part.Bounds; Node.PartIndex = PartIndex; Node.MipLevel = Group.MipLevel + 1; NodesByMip[Group.MipLevel + 1].Add(NodeIndex); } } if(!bAssemblyPart) { Nodes.Reserve(Nodes.Num() + NumAssemblyInstances); for (uint32 i = 0; i < NumAssemblyParts; i++) { const FAssemblyPartData& PartData = ClusterDAG.AssemblyPartData[i]; const uint32 AssemblyRootIndex = AssemblyRoots[i]; if (AssemblyRootIndex == MAX_uint32) continue; for (uint32 j = 0; j < PartData.NumInstances; j++) { const uint32 InstanceIndex = PartData.FirstInstance + j; NodesByMip[AssemblyMipLevel].Add(Nodes.Num()); FIntermediateNode& Node = Nodes.AddDefaulted_GetRef(); Node.Bound = Nodes[AssemblyRootIndex].Bound.TransformBy( ClusterDAG.AssemblyInstanceData[InstanceIndex].Transform ); Node.AssemblyRootIndex = AssemblyRootIndex; Node.NewAssemblyInstanceIndex = InstanceIndex; Node.MipLevel = AssemblyMipLevel; } } } const uint32 NumNewNodes = (uint32)Nodes.Num() - FirstNodeIndex; uint32 RootIndex = MAX_uint32; if (NumNewNodes == 0) { if (!bAssemblyPart) { RootIndex = (uint32)Nodes.Num(); Nodes.AddDefaulted(); } } else if (NumNewNodes == 1) { if (!bAssemblyPart) { FIntermediateNode& Node = Nodes.AddDefaulted_GetRef(); Node.Children.Add( uint32 ( Nodes.Num() - 2 ) ); Node.Bound = Nodes[0].Bound; } RootIndex = uint32( Nodes.Num() - 1 ); } else { // Build hierarchy: // Nanite meshes contain cluster data for many levels of detail. Clusters from different levels // of detail can vary wildly in size, which can already be challenge for building a good hierarchy. // Apart from the visibility bounds, the hierarchy also tracks conservative LOD error metrics for the child nodes. // The runtime traversal descends into children as long as they are visible and the conservative LOD error is not // more detailed than what we are looking for. We have to be very careful when mixing clusters from different LODs // as less detailed clusters can easily end up bloating both bounds and error metrics. // We have experimented with a bunch of mixed LOD approached, but currently, it seems, building separate hierarchies // for each LOD level and then building a hierarchy of those hierarchies gives the best and most predictable results. // TODO: The roots of these hierarchies all share the same visibility and LOD bounds, or at least close enough that we could // make a shared conservative bound without losing much. This makes a lot of the work around the root node fairly // redundant. Perhaps we should consider evaluating a shared root during instance cull instead and enable/disable // the per-level hierarchies based on 1D range tests for LOD error. TArray LevelRoots; for (int32 MipLevel = 0; MipLevel <= MaxMipLevel; MipLevel++) { if (NodesByMip[MipLevel].Num() > 0) { // Build a hierarchy for the mip level uint32 NodeIndex = BuildHierarchyTopDown(Nodes, NodesByMip[MipLevel], true); if (Nodes[NodeIndex].IsLeaf() || Nodes[NodeIndex].Children.Num() == NANITE_MAX_BVH_NODE_FANOUT) { // Leaf or filled node. Just add it. LevelRoots.Add(NodeIndex); } else { // Incomplete node. Discard the node and add the children as roots instead. LevelRoots.Append(Nodes[NodeIndex].Children); } } } // Build top hierarchy. A hierarchy of MIP hierarchies. RootIndex = BuildHierarchyLODRoots(Nodes, LevelRoots, 1); } return RootIndex; } void BuildHierarchies( FResources& Resources, const FClusterDAG& ClusterDAG, TArray& Pages, TArray& Parts, uint32 NumMeshes) { TArray> RootMeshGroupParts; RootMeshGroupParts.SetNum(NumMeshes); const uint32 NumAssemblyParts = (uint32)ClusterDAG.AssemblyPartData.Num(); TArray> AssemblyGroupParts; AssemblyGroupParts.SetNum(NumAssemblyParts); TArray AssemblyRoots; AssemblyRoots.SetNum(NumAssemblyParts); // Assign group part instances to their assembly part or a mesh root const uint32 NumTotalGroupParts = (uint32)Parts.Num(); for (uint32 GroupPartIndex = 0; GroupPartIndex < NumTotalGroupParts; GroupPartIndex++) { const FClusterGroup& Group = ClusterDAG.Groups[Parts[GroupPartIndex].GroupIndex]; if (Group.AssemblyPartIndex != MAX_uint32) { AssemblyGroupParts[Group.AssemblyPartIndex].Add(GroupPartIndex); } else { RootMeshGroupParts[Group.MeshIndex].Add(GroupPartIndex); } } TArray Nodes; for (uint32 AssemblyPartIndex = 0; AssemblyPartIndex < NumAssemblyParts; AssemblyPartIndex++) { const TArray& GroupParts = AssemblyGroupParts[AssemblyPartIndex]; uint32 RootIndex = MAX_uint32; if (GroupParts.Num() > 0) { RootIndex = BuildHierarchy(Nodes, ClusterDAG, Parts, AssemblyRoots, GroupParts, true); } AssemblyRoots[AssemblyPartIndex] = RootIndex; } for (uint32 MeshIndex = 0; MeshIndex < NumMeshes; MeshIndex++) { const uint32 RootIndex = BuildHierarchy(Nodes, ClusterDAG, Parts, AssemblyRoots, RootMeshGroupParts[MeshIndex], false); TArray HierarchyNodes; bool bIsInstanceDependent; BuildHierarchyRecursive(HierarchyNodes, Nodes, ClusterDAG, Parts, MAX_uint32, RootIndex, bIsInstanceDependent); CalculatePageHierarchyPartDepths(Pages, Parts, HierarchyNodes, 0, 0); #if BVH_BUILD_WRITE_GRAPHVIZ WriteDotGraph(HierarchyNodes); #endif // Convert hierarchy to packed format const uint32 NumHierarchyNodes = HierarchyNodes.Num(); const uint32 PackedBaseIndex = Resources.HierarchyNodes.Num(); Resources.HierarchyRootOffsets.Add(PackedBaseIndex); Resources.HierarchyNodes.AddDefaulted(NumHierarchyNodes); for (uint32 i = 0; i < NumHierarchyNodes; i++) { PackHierarchyNode(Resources.HierarchyNodes[PackedBaseIndex + i], HierarchyNodes[i], ClusterDAG.Groups, Parts, Resources.NumRootPages); } } } } // namespace Nanite