Files
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

1360 lines
52 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "NaniteRayTracing.h"
#if RHI_RAYTRACING
#include "Rendering/NaniteStreamingManager.h"
#include "NaniteStreamOut.h"
#include "NaniteSceneProxy.h"
#include "NaniteShared.h"
#include "ShaderPrintParameters.h"
#include "PrimitiveSceneInfo.h"
#include "ScenePrivate.h"
#include "SceneInterface.h"
#include "RendererModule.h"
#include "RenderGraphUtils.h"
#include "RendererOnScreenNotification.h"
/*
* TODO:
* - StagingAuxiliaryDataBuffer
* - Keep track of how many pages/clusters are streamed-in per resource
* and allocate less staging memory than the very conservative (Data.NumClusters * NANITE_MAX_CLUSTER_TRIANGLES)
*
* - Defragment AuxiliaryDataBuffer
*
* - VB/IB Buffers
* - Resize VB/IB buffers dynamically instead of always allocating max size
* - Store vertices and indices in the same buffer in a single allocation
*
* - Support reserved resources to avoid copy when resizing auxiliary data buffer
*/
static bool GNaniteRayTracingUpdate = true;
static FAutoConsoleVariableRef CVarNaniteRayTracingUpdate(
TEXT("r.RayTracing.Nanite.Update"),
GNaniteRayTracingUpdate,
TEXT("Whether to process Nanite RayTracing update requests."),
ECVF_RenderThreadSafe
);
static bool GNaniteRayTracingForceUpdateVisible = false;
static FAutoConsoleVariableRef CVarNaniteRayTracingForceUpdateVisible(
TEXT("r.RayTracing.Nanite.ForceUpdateVisible"),
GNaniteRayTracingForceUpdateVisible,
TEXT("Force BLAS of visible primitives to be updated next frame."),
ECVF_RenderThreadSafe
);
static float GNaniteRayTracingCutError = 0.0f;
static FAutoConsoleVariableRef CVarNaniteRayTracingCutError(
TEXT("r.RayTracing.Nanite.CutError"),
GNaniteRayTracingCutError,
TEXT("Global target cut error to control quality when using procedural raytracing geometry for Nanite meshes."),
ECVF_RenderThreadSafe
);
static bool GNaniteRayTracingStreaming = false;
static FAutoConsoleVariableRef CVarNaniteRayTracingStreaming(
TEXT("r.RayTracing.Nanite.Streaming"),
GNaniteRayTracingStreaming,
TEXT("Whether to drive Nanite streaming based on instances in ray tracing scene using Nanite Ray Tracing."),
ECVF_RenderThreadSafe
);
static TAutoConsoleVariable<float> CVarNaniteRayTracingStreamingLodBias(
TEXT("r.RayTracing.Nanite.Streaming.LODBias"), 0.0f,
TEXT("LOD bias for nanite geometry in ray tracing. 0 = full detail. >0 = reduced detail."),
ECVF_RenderThreadSafe);
static TAutoConsoleVariable<float> CVarNaniteRayTracingStreamingOffscreenLodBias(
TEXT("r.RayTracing.Nanite.Streaming.Offscreen.LODBias"), 1.0f,
TEXT("LOD bias for offscreen nanite geometry in ray tracing. 0 = full detail. >0 = reduced detail."),
ECVF_RenderThreadSafe);
static float GNaniteRayTracingStreamingOffscreenMinCutError = 4.0f;
static FAutoConsoleVariableRef CVarNaniteRayTracingStreamingOffscreenMinCutError(
TEXT("r.RayTracing.Nanite.Streaming.Offscreen.MinCutError"),
GNaniteRayTracingStreamingOffscreenMinCutError,
TEXT("Global target cut error when generating Nanite streaming requests for instances in ray tracing scene."),
ECVF_RenderThreadSafe
);
static int32 GNaniteRayTracingMaxNumVertices = 16 * 1024 * 1024;
static FAutoConsoleVariableRef CVarNaniteRayTracingMaxNumVertices(
TEXT("r.RayTracing.Nanite.StreamOut.MaxNumVertices"),
GNaniteRayTracingMaxNumVertices,
TEXT("Max number of vertices to stream out per frame."),
ECVF_RenderThreadSafe
);
static int32 GNaniteRayTracingMaxNumIndices = 64 * 1024 * 1024;
static FAutoConsoleVariableRef CVarNaniteRayTracingMaxNumIndices(
TEXT("r.RayTracing.Nanite.StreamOut.MaxNumIndices"),
GNaniteRayTracingMaxNumIndices,
TEXT("Max number of indices to stream out per frame."),
ECVF_RenderThreadSafe
);
static int32 GNaniteRayTracingMaxBuiltPrimitivesPerFrame = 8 * 1024 * 1024;
static FAutoConsoleVariableRef CVarNaniteRayTracingMaxBuiltPrimitivesPerFrame(
TEXT("r.RayTracing.Nanite.MaxBuiltPrimitivesPerFrame"),
GNaniteRayTracingMaxBuiltPrimitivesPerFrame,
TEXT("Limit number of BLAS built per frame based on a budget defined in terms of maximum number of triangles."),
ECVF_RenderThreadSafe
);
static int32 GNaniteRayTracingMaxStagingBufferSizeMB = 1024;
static FAutoConsoleVariableRef CVarNaniteRayTracingMaxStagingBufferSizeMB(
TEXT("r.RayTracing.Nanite.MaxStagingBufferSizeMB"),
GNaniteRayTracingMaxStagingBufferSizeMB,
TEXT("Limit the size of the staging buffer used during stream out (lower values can cause updates to be throttled)\n")
TEXT("Default = 1024 MB.\n")
TEXT("Max value = 2048 MB."),
ECVF_RenderThreadSafe
);
static int32 GNaniteRayTracingBLASScratchSizeMultipleMB = 64;
static FAutoConsoleVariableRef CVarNaniteRayTracingBLASScratchSizeMultipleMB(
TEXT("r.RayTracing.Nanite.BLASScratchSizeMultipleMB"),
GNaniteRayTracingBLASScratchSizeMultipleMB,
TEXT("Round the size of the BLAS build scratch buffer to be a multiple of this value.\n")
TEXT("This helps maintain consistent memory usage and prevent memory usage spikes.\n")
TEXT("Default = 64 MB."),
ECVF_RenderThreadSafe
);
static bool GNaniteRayTracingProfileStreamOut = false;
static FAutoConsoleVariableRef CVarNaniteRayTracingProfileStreamOut(
TEXT("r.RayTracing.Nanite.ProfileStreamOut"),
GNaniteRayTracingProfileStreamOut,
TEXT("[Development only] Stream out pending requests every frame in order to measure performance."),
ECVF_RenderThreadSafe
);
DECLARE_GPU_STAT(NaniteRayTracingUpdateStreaming);
DECLARE_GPU_STAT(RebuildNaniteBLAS);
DECLARE_STATS_GROUP(TEXT("Nanite RayTracing"), STATGROUP_NaniteRayTracing, STATCAT_Advanced);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("In-flight Updates"), STAT_NaniteRayTracingInFlightUpdates, STATGROUP_NaniteRayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Stream Out Requests"), STAT_NaniteRayTracingStreamOutRequests, STATGROUP_NaniteRayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Failed Stream Out Requests"), STAT_NaniteRayTracingFailedStreamOutRequests, STATGROUP_NaniteRayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Scheduled Builds"), STAT_NaniteRayTracingScheduledBuilds, STATGROUP_NaniteRayTracing);
DECLARE_DWORD_COUNTER_STAT(TEXT("Scheduled Builds - Num Primitives"), STAT_NaniteRayTracingScheduledBuildsNumPrimitives, STATGROUP_NaniteRayTracing);
DECLARE_DWORD_ACCUMULATOR_STAT(TEXT("Pending Builds"), STAT_NaniteRayTracingPendingBuilds, STATGROUP_NaniteRayTracing);
DECLARE_MEMORY_STAT(TEXT("Auxiliary Data Buffer"), STAT_NaniteRayTracingAuxiliaryDataBuffer, STATGROUP_NaniteRayTracing);
DECLARE_MEMORY_STAT(TEXT("Staging Auxiliary Data Buffer"), STAT_NaniteRayTracingStagingAuxiliaryDataBuffer, STATGROUP_NaniteRayTracing);
static const uint32 GMinAuxiliaryBufferEntries = 4 * 1024 * 1024; // buffer size will be 16MB
static const uint32 GDisabledMinAuxiliaryBufferEntries = 8; // used when Nanite Ray Tracing is not enabled
namespace Nanite
{
using FRayTracingLoadBalancer = TInstanceCullingLoadBalancer<SceneRenderingAllocator>;
BEGIN_SHADER_PARAMETER_STRUCT(FRayTracingQueueParameters, )
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<FQueuePassState>, QueueState)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, Nodes)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWByteAddressBuffer, CandidateClusters)
SHADER_PARAMETER(uint32, MaxNodes)
SHADER_PARAMETER(uint32, MaxCandidateClusters)
END_SHADER_PARAMETER_STRUCT()
class FRayTracingStreamingInitQueueCS : public FNaniteGlobalShader
{
DECLARE_GLOBAL_SHADER(FRayTracingStreamingInitQueueCS);
SHADER_USE_PARAMETER_STRUCT(FRayTracingStreamingInitQueueCS, FNaniteGlobalShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER_STRUCT_INCLUDE(FRayTracingQueueParameters, QueueParameters)
SHADER_PARAMETER_STRUCT_INCLUDE(FGPUScene::FInstanceGPULoadBalancer::FShaderParameters, BatcherParameters)
SHADER_PARAMETER_STRUCT_INCLUDE(ShaderPrint::FShaderParameters, ShaderPrint)
END_SHADER_PARAMETER_STRUCT()
using FPermutationDomain = TShaderPermutationDomain<>;
static constexpr int32 NumThreadsPerGroup = FGPUScene::FInstanceGPULoadBalancer::ThreadGroupSize;
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FNaniteGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
ShaderPrint::ModifyCompilationEnvironment(Parameters.Platform, OutEnvironment);
FGPUScene::FInstanceGPULoadBalancer::SetShaderDefines(OutEnvironment);
}
};
IMPLEMENT_GLOBAL_SHADER(FRayTracingStreamingInitQueueCS, "/Engine/Private/Nanite/NaniteRayTracing.usf", "InitQueueCS", SF_Compute);
struct FNaniteRayTracingStreamingTraversalCS : public FNaniteGlobalShader
{
DECLARE_GLOBAL_SHADER(FNaniteRayTracingStreamingTraversalCS);
SHADER_USE_PARAMETER_STRUCT(FNaniteRayTracingStreamingTraversalCS, FNaniteGlobalShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FSceneUniformParameters, Scene)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, HierarchyBuffer)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, ClusterPageData)
SHADER_PARAMETER(FIntVector4, PageConstants)
SHADER_PARAMETER_STRUCT_INCLUDE(FRayTracingQueueParameters, QueueParameters)
SHADER_PARAMETER_RDG_BUFFER_SRV(Buffer<uint>, CurrentNodeIndirectArgs)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer<uint>, NextNodeIndirectArgs)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer, OutStreamingRequests)
SHADER_PARAMETER(uint32, StreamingRequestsBufferVersion)
SHADER_PARAMETER(uint32, StreamingRequestsBufferSize)
SHADER_PARAMETER(uint32, RenderFlags)
SHADER_PARAMETER(float, RayTracingStreamingMinCutError)
SHADER_PARAMETER(uint32, NodeLevel)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<FPackedNaniteView>, PackedNaniteViews)
SHADER_PARAMETER_STRUCT_INCLUDE(ShaderPrint::FShaderParameters, ShaderPrint)
RDG_BUFFER_ACCESS(IndirectArgs, ERHIAccess::IndirectArgs)
END_SHADER_PARAMETER_STRUCT()
class FCullingTypeDim : SHADER_PERMUTATION_SPARSE_INT("CULLING_TYPE", NANITE_CULLING_TYPE_NODES, NANITE_CULLING_TYPE_CLUSTERS); // don't need cluster permutation
using FPermutationDomain = TShaderPermutationDomain<FCullingTypeDim>;
static inline void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FNaniteGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
ShaderPrint::ModifyCompilationEnvironment(Parameters.Platform, OutEnvironment);
OutEnvironment.SetDefine(TEXT("NANITE_HIERARCHY_TRAVERSAL"), 1);
}
};
IMPLEMENT_GLOBAL_SHADER(FNaniteRayTracingStreamingTraversalCS, "/Engine/Private/Nanite/NaniteRayTracing.usf", "NaniteRayTracingStreamingTraversalCS", SF_Compute);
static FRDGBufferRef ResizeBufferIfNeeded(FRDGBuilder& GraphBuilder, TRefCountPtr<FRDGPooledBuffer>& ExternalBuffer, FRDGBufferDesc BufferDesc, const TCHAR* Name, bool bCopy, EAllowShrinking AllowShrinking)
{
if (!ExternalBuffer)
{
FRDGBuffer* InternalBufferNew = GraphBuilder.CreateBuffer(BufferDesc, Name);
ExternalBuffer = GraphBuilder.ConvertToExternalBuffer(InternalBufferNew);
return InternalBufferNew;
}
FRDGBufferRef BufferRDG = GraphBuilder.RegisterExternalBuffer(ExternalBuffer);
if (BufferDesc.GetSize() > BufferRDG->GetSize()) // grow
{
FRDGBufferRef SrcBufferRDG = BufferRDG;
BufferRDG = GraphBuilder.CreateBuffer(BufferDesc, Name);
ExternalBuffer = GraphBuilder.ConvertToExternalBuffer(BufferRDG);
if (bCopy)
{
AddCopyBufferPass(GraphBuilder, BufferRDG, SrcBufferRDG);
}
}
else if (AllowShrinking == EAllowShrinking::Yes && BufferDesc.GetSize() / 2 < BufferRDG->GetSize()) // shrink
{
FRDGBufferRef SrcBufferRDG = BufferRDG;
BufferRDG = GraphBuilder.CreateBuffer(BufferDesc, Name);
ExternalBuffer = GraphBuilder.ConvertToExternalBuffer(BufferRDG);
if (bCopy)
{
AddCopyBufferPass(GraphBuilder, BufferRDG, 0, SrcBufferRDG, 0, BufferDesc.GetSize());
}
}
return BufferRDG;
}
static FRDGBufferRef ResizeBufferIfNeeded(FRDGBuilder& GraphBuilder, TRefCountPtr<FRDGPooledBuffer>& ExternalBuffer, uint32 BytesPerElement, uint32 NumElements, const TCHAR* Name, bool bCopy, EAllowShrinking AllowShrinking)
{
return ResizeBufferIfNeeded(GraphBuilder, ExternalBuffer, FRDGBufferDesc::CreateStructuredDesc(BytesPerElement, NumElements), Name, bCopy, AllowShrinking);
}
static FRDGBufferRef ResizeByteAddressBufferIfNeeded(FRDGBuilder& GraphBuilder, TRefCountPtr<FRDGPooledBuffer>& ExternalBuffer, uint32 ByteAddressBufferSize, const TCHAR* Name, bool bCopy, EAllowShrinking AllowShrinking)
{
return ResizeBufferIfNeeded(GraphBuilder, ExternalBuffer, FRDGBufferDesc::CreateByteAddressDesc(ByteAddressBufferSize), Name, bCopy, AllowShrinking);
}
static uint32 GetAuxiliaryEntrySize()
{
return NaniteAssembliesSupported() ? sizeof(FUintVector2) : sizeof(uint32);
}
static uint32 CalculateAuxiliaryDataSizeInUints(uint32 NumTriangles)
{
return NumTriangles; // (one uint per triangle)
}
FRayTracingManager::FRayTracingManager()
{
}
FRayTracingManager::~FRayTracingManager()
{
}
void FRayTracingManager::Initialize()
{
#if !UE_BUILD_SHIPPING
ScreenMessageDelegate = FRendererOnScreenNotification::Get().AddLambda([this](TMultiMap<FCoreDelegates::EOnScreenMessageSeverity, FText >& OutMessages)
{
if (NumVerticesHighWaterMark >= GNaniteRayTracingMaxNumVertices)
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Nanite Ray Tracing vertex buffer overflow detected, increase 'r.RayTracing.Nanite.StreamOut.MaxNumVertices' to avoid rendering artifacts."))));
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT(" Required max num vertices for update = %d, currently = %d"), NumVerticesHighWaterMark, GNaniteRayTracingMaxNumVertices)));
if (NumVerticesHighWaterMark > NumVerticesHighWaterMarkPrev)
{
UE_LOG(LogRenderer, Warning, TEXT("Nanite Ray Tracing vertex buffer overflow detected, increase 'r.RayTracing.Nanite.StreamOut.MaxNumVertices' to avoid rendering artifacts.\n")
TEXT(" Required max num vertices for update = %d, currently = %d"), NumVerticesHighWaterMark, GNaniteRayTracingMaxNumVertices);
NumVerticesHighWaterMarkPrev = NumVerticesHighWaterMark;
}
}
if (NumIndicesHighWaterMark >= GNaniteRayTracingMaxNumIndices)
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Nanite Ray Tracing index buffer overflow detected, increase 'r.RayTracing.Nanite.StreamOut.MaxNumIndices' to avoid rendering artifacts."))));
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT(" Required max num indices for update = %d, currently = %d"), NumIndicesHighWaterMark, GNaniteRayTracingMaxNumIndices)));
if (NumIndicesHighWaterMark > NumIndicesHighWaterMarkPrev)
{
UE_LOG(LogRenderer, Warning, TEXT("Nanite Ray Tracing index buffer overflow detected, increase 'r.RayTracing.Nanite.StreamOut.MaxNumIndices' to avoid rendering artifacts.\n")
TEXT(" Required max num indices for update = %d, currently = %d"), NumIndicesHighWaterMark, GNaniteRayTracingMaxNumIndices);
NumIndicesHighWaterMarkPrev = NumIndicesHighWaterMark;
}
}
if (StagingBufferSizeHighWaterMark >= GNaniteRayTracingMaxStagingBufferSizeMB * (1024ull * 1024ull))
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Nanite Ray Tracing staging buffer overflow detected, increase 'r.RayTracing.Nanite.MaxStagingBufferSizeMB' to avoid rendering artifacts."))));
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT(" Required for update = %d, currently = %d"), StagingBufferSizeHighWaterMark / (1024ull * 1024ull), GNaniteRayTracingMaxStagingBufferSizeMB)));
if (StagingBufferSizeHighWaterMark > StagingBufferSizeHighWaterMarkPrev)
{
UE_LOG(LogRenderer, Warning, TEXT("Nanite Ray Tracing staging buffer overflow detected, increase 'r.RayTracing.Nanite.MaxStagingBufferSizeMB' to avoid rendering artifacts.\n")
TEXT(" Required for update = %d, currently = %d"), StagingBufferSizeHighWaterMark / (1024ull * 1024ull), GNaniteRayTracingMaxStagingBufferSizeMB);
StagingBufferSizeHighWaterMarkPrev = StagingBufferSizeHighWaterMark;
}
}
});
#endif
}
void FRayTracingManager::Shutdown()
{
#if !UE_BUILD_SHIPPING
FRendererOnScreenNotification::Get().Remove(ScreenMessageDelegate);
#endif
}
void FRayTracingManager::InitRHI(FRHICommandListBase&)
{
AuxiliaryDataBuffer = AllocatePooledBuffer(FRDGBufferDesc::CreateByteAddressDesc(GetAuxiliaryEntrySize() * GDisabledMinAuxiliaryBufferEntries), TEXT("NaniteRayTracing.AuxiliaryDataBuffer"));
SET_MEMORY_STAT(STAT_NaniteRayTracingAuxiliaryDataBuffer, AuxiliaryDataBuffer->GetSize());
if (!DoesPlatformSupportNanite(GMaxRHIShaderPlatform))
{
return;
}
ReadbackBuffers.SetNum(MaxReadbackBuffers);
for (auto& ReadbackData : ReadbackBuffers)
{
ReadbackData.MeshDataReadbackBuffer = new FRHIGPUBufferReadback(TEXT("NaniteRayTracing.MeshDataReadbackBuffer"));
}
bInitialized = true;
}
void FRayTracingManager::ReleaseRHI()
{
AuxiliaryDataBuffer.SafeRelease();
if (!bInitialized)
{
return;
}
bInitialized = false;
VertexBuffer.SafeRelease();
IndexBuffer.SafeRelease();
for (auto& ReadbackData : ReadbackBuffers)
{
delete ReadbackData.MeshDataReadbackBuffer;
ReadbackData.MeshDataReadbackBuffer = nullptr;
}
ReadbackBuffers.Empty();
StagingAuxiliaryDataBuffer.SafeRelease();
}
void FRayTracingManager::Add(FPrimitiveSceneInfo* SceneInfo)
{
if (!IsRayTracingEnabled() || (GetRayTracingMode() == ERayTracingMode::Fallback))
{
return;
}
auto NaniteProxy = static_cast<Nanite::FSceneProxyBase*>(SceneInfo->Proxy);
Nanite::FResourcePrimitiveInfo PrimitiveInfo = NaniteProxy->GetResourcePrimitiveInfo();
// TODO: Should use both ResourceID and HierarchyOffset as identifier for raytracing geometry
// For example, FNaniteGeometryCollectionSceneProxy can use the same ResourceID with different HierarchyOffsets
// (FNaniteGeometryCollectionSceneProxy are not supported in raytracing yet)
uint32& Id = ResourceToRayTracingIdMap.FindOrAdd(PrimitiveInfo.ResourceID, INDEX_NONE);
FInternalData* Data;
if (Id == INDEX_NONE)
{
Nanite::FResourceMeshInfo MeshInfo = NaniteProxy->GetResourceMeshInfo();
check(MeshInfo.NumClusters);
Data = new FInternalData;
Data->ResourceId = PrimitiveInfo.ResourceID;
Data->HierarchyOffset = PrimitiveInfo.HierarchyOffset;
Data->NumClusters = MeshInfo.NumClusters;
Data->NumNodes = MeshInfo.NumNodes;
Data->NumVertices = MeshInfo.NumVertices;
Data->NumTriangles = MeshInfo.NumTriangles;
Data->NumMaterials = MeshInfo.NumMaterials;
Data->NumSegments = MeshInfo.NumSegments;
Data->SegmentMapping = MeshInfo.SegmentMapping;
Data->bAssembly = MeshInfo.bAssembly;
Data->DebugName = MeshInfo.DebugName;
Data->NumResidentClusters = 0;
Data->NumResidentClustersUpdate = MeshInfo.NumResidentClusters;
Data->PrimitiveId = INDEX_NONE;
Id = Geometries.Add(Data);
if (Data->NumResidentClustersUpdate > 0)
{
// some clusters are already streamed in and RequestUpdates(...) is only called when new pages are streamed in/out
// so request an update here to make sure we build ray tracing geometry with the currently available data
UpdateRequests.Add(Id);
}
}
else
{
Data = Geometries[Id];
}
Data->Primitives.Add(SceneInfo);
PendingRemoves.Remove(Id);
NaniteProxy->SetRayTracingId(Id);
NaniteProxy->SetRayTracingDataOffset(Data->AuxiliaryDataOffset);
}
void FRayTracingManager::Remove(FPrimitiveSceneInfo* SceneInfo)
{
if (!IsRayTracingAllowed())
{
return;
}
auto NaniteProxy = static_cast<Nanite::FSceneProxyBase*>(SceneInfo->Proxy);
const uint32 GeometryId = NaniteProxy->GetRayTracingId();
if(GeometryId == INDEX_NONE)
{
check(NaniteProxy->GetRayTracingDataOffset() == INDEX_NONE);
return;
}
FInternalData* Data = Geometries[GeometryId];
Data->Primitives.Remove(SceneInfo);
if (Data->Primitives.IsEmpty())
{
PendingRemoves.Add(GeometryId);
}
NaniteProxy->SetRayTracingId(INDEX_NONE);
NaniteProxy->SetRayTracingDataOffset(INDEX_NONE);
}
void FRayTracingManager::RequestUpdates(const TMap<uint32, uint32>& InUpdateRequests)
{
if (!IsRayTracingEnabled() || (GetRayTracingMode() == ERayTracingMode::Fallback))
{
return;
}
for (auto& Elem : InUpdateRequests)
{
uint32 RuntimeResourceID = Elem.Key;
uint32* GeometryId = ResourceToRayTracingIdMap.Find(RuntimeResourceID);
if (GeometryId != nullptr)
{
FInternalData& Data = *Geometries[*GeometryId];
Data.NumResidentClustersUpdate = Elem.Value;
check(Data.NumResidentClustersUpdate > 0);
UpdateRequests.Add(*GeometryId);
}
}
}
void FRayTracingManager::AddVisiblePrimitive(const FPrimitiveSceneInfo* SceneInfo)
{
check(GetRayTracingMode() != ERayTracingMode::Fallback);
auto NaniteProxy = static_cast<Nanite::FSceneProxyBase*>(SceneInfo->Proxy);
const uint32 Id = NaniteProxy->GetRayTracingId();
check(Id != INDEX_NONE);
FInternalData* Data = Geometries[Id];
Data->PrimitiveId = SceneInfo->GetPersistentIndex().Index;
VisibleGeometries.Add(Id);
VisiblePrimitives.Add(SceneInfo);
}
void AddPass_InitNodeCullArgs(FRDGBuilder& GraphBuilder, FGlobalShaderMap* ShaderMap, FRDGEventName&& PassName, FRDGBufferUAVRef QueueState, FRDGBufferRef NodeCullArgs0, FRDGBufferRef NodeCullArgs1, uint32 CullingPass);
void AddPass_InitClusterCullArgs(FRDGBuilder& GraphBuilder, FGlobalShaderMap* ShaderMap, FRDGEventName&& PassName, FRDGBufferUAVRef QueueState, FRDGBufferRef ClusterCullArgs, uint32 CullingPass);
static void AddInitQueuePass(
FRDGBuilder& GraphBuilder,
FGlobalShaderMap* ShaderMap,
FRayTracingQueueParameters& QueueParameters,
FRayTracingLoadBalancer& LoadBalancer)
{
// Reset queue to empty state
AddClearUAVPass(GraphBuilder, QueueParameters.QueueState, 0);
// Init queue with requests
{
FRayTracingStreamingInitQueueCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FRayTracingStreamingInitQueueCS::FParameters>();
PassParameters->QueueParameters = QueueParameters;
ShaderPrint::SetParameters(GraphBuilder, PassParameters->ShaderPrint);
LoadBalancer.Upload(GraphBuilder).GetShaderParameters(GraphBuilder, PassParameters->BatcherParameters);
FRayTracingStreamingInitQueueCS::FPermutationDomain PermutationVector;
auto ComputeShader = ShaderMap->GetShader<FRayTracingStreamingInitQueueCS>(PermutationVector);
FComputeShaderUtils::AddPass(GraphBuilder, RDG_EVENT_NAME("NaniteRayTracing::StreamingInitQueue"), ComputeShader, PassParameters, LoadBalancer.GetWrappedCsGroupCount());
}
}
static void GetQueueParams(FRDGBuilder& GraphBuilder, FGlobalShaderMap* ShaderMap, FRayTracingQueueParameters& OutQueueParameters)
{
const uint32 MaxNodes = Nanite::FGlobalResources::GetMaxNodes();
const uint32 MaxCandidateClusters = Nanite::FGlobalResources::GetMaxCandidateClusters();
FRDGBufferRef QueueState = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32) + 2 * (6 * sizeof(uint32)), 1), TEXT("NaniteRayTracing.QueueState"));
// Allocate buffer for nodes
FRDGBufferRef NodesBuffer = nullptr;
{
const uint32 CandidateNodeSizeInUints = 2;
FRDGBufferDesc Desc = FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), MaxNodes * CandidateNodeSizeInUints);
Desc.Usage = EBufferUsageFlags(Desc.Usage | BUF_ByteAddressBuffer);
NodesBuffer = GraphBuilder.CreateBuffer(Desc, TEXT("NaniteRayTracing.NodesBuffer"));
}
OutQueueParameters.QueueState = GraphBuilder.CreateUAV(QueueState);
OutQueueParameters.Nodes = GraphBuilder.CreateUAV(NodesBuffer);
OutQueueParameters.CandidateClusters = nullptr;
OutQueueParameters.MaxNodes = MaxNodes;
OutQueueParameters.MaxCandidateClusters = 0;
}
static void AddPass_StreamingTraversal(
FRDGBuilder& GraphBuilder,
FGlobalShaderMap* ShaderMap,
FSceneUniformBuffer& SceneUniformBuffer,
float MinCutError,
FRayTracingLoadBalancer& LoadBalancer,
FRDGBufferSRVRef PackedNaniteViews,
FRayTracingQueueParameters& QueueParameters
)
{
AddInitQueuePass(
GraphBuilder,
ShaderMap,
QueueParameters,
LoadBalancer);
FNaniteRayTracingStreamingTraversalCS::FParameters SharedParameters;
SharedParameters.Scene = SceneUniformBuffer.GetBuffer(GraphBuilder);
SharedParameters.QueueParameters = QueueParameters;
SharedParameters.HierarchyBuffer = Nanite::GStreamingManager.GetHierarchySRV(GraphBuilder);
SharedParameters.ClusterPageData = Nanite::GStreamingManager.GetClusterPageDataSRV(GraphBuilder);
SharedParameters.PageConstants.X = 0;
SharedParameters.PageConstants.Y = Nanite::GStreamingManager.GetMaxStreamingPages();
SharedParameters.RayTracingStreamingMinCutError = MinCutError;
FRDGBufferRef StreamingRequests = Nanite::GStreamingManager.GetStreamingRequestsBuffer(GraphBuilder);
SharedParameters.OutStreamingRequests = GraphBuilder.CreateUAV(StreamingRequests);
SharedParameters.StreamingRequestsBufferVersion = GStreamingManager.GetStreamingRequestsBufferVersion();
SharedParameters.StreamingRequestsBufferSize = StreamingRequests->Desc.NumElements;
SharedParameters.RenderFlags = 0;
SharedParameters.RenderFlags |= NANITE_RENDER_FLAG_OUTPUT_STREAMING_REQUESTS;
ShaderPrint::SetParameters(GraphBuilder, SharedParameters.ShaderPrint);
FNaniteRayTracingStreamingTraversalCS::FPermutationDomain PermutationVector;
{
RDG_EVENT_SCOPE(GraphBuilder, "StreamingTraversal");
// Node passes
{
FRDGBufferRef NodeCullArgs0 = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc((NANITE_MAX_CLUSTER_HIERARCHY_DEPTH + 1) * NANITE_NODE_CULLING_ARG_COUNT), TEXT("Nanite.CullArgs0"));
FRDGBufferRef NodeCullArgs1 = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc((NANITE_MAX_CLUSTER_HIERARCHY_DEPTH + 1) * NANITE_NODE_CULLING_ARG_COUNT), TEXT("Nanite.CullArgs1"));
AddPass_InitNodeCullArgs(GraphBuilder, ShaderMap, RDG_EVENT_NAME("InitNodeCullArgs"), QueueParameters.QueueState, NodeCullArgs0, NodeCullArgs1, 0);
PermutationVector.Set<FNaniteRayTracingStreamingTraversalCS::FCullingTypeDim>(NANITE_CULLING_TYPE_NODES);
auto ComputeShader = ShaderMap->GetShader<FNaniteRayTracingStreamingTraversalCS>(PermutationVector);
const uint32 MaxLevels = Nanite::GStreamingManager.GetMaxHierarchyLevels();
for (uint32 NodeLevel = 0; NodeLevel < MaxLevels; NodeLevel++)
{
auto* PassParameters = GraphBuilder.AllocParameters<FNaniteRayTracingStreamingTraversalCS::FParameters>(&SharedParameters);
FRDGBufferRef CurrentIndirectArgs = (NodeLevel & 1) ? NodeCullArgs1 : NodeCullArgs0;
FRDGBufferRef NextIndirectArgs = (NodeLevel & 1) ? NodeCullArgs0 : NodeCullArgs1;
PassParameters->CurrentNodeIndirectArgs = GraphBuilder.CreateSRV(CurrentIndirectArgs);
PassParameters->NextNodeIndirectArgs = GraphBuilder.CreateUAV(NextIndirectArgs);
PassParameters->IndirectArgs = CurrentIndirectArgs;
PassParameters->NodeLevel = NodeLevel;
PassParameters->PackedNaniteViews = PackedNaniteViews;
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("NodeCull_%d", NodeLevel),
ComputeShader,
PassParameters,
CurrentIndirectArgs,
NodeLevel * NANITE_NODE_CULLING_ARG_COUNT * sizeof(uint32)
);
}
}
}
}
void FRayTracingManager::UpdateStreaming(FRDGBuilder& GraphBuilder, TConstArrayView<FViewInfo> Views, FSceneUniformBuffer& SceneUniformBuffer, FIntPoint RasterTextureSize)
{
if (!GNaniteRayTracingStreaming || Views.IsEmpty() || VisiblePrimitives.IsEmpty())
{
VisiblePrimitives.Empty();
return;
}
RDG_EVENT_SCOPE_STAT(GraphBuilder, NaniteRayTracingUpdateStreaming, "NaniteRayTracing::UpdateStreaming");
RDG_GPU_STAT_SCOPE(GraphBuilder, NaniteRayTracingUpdateStreaming);
// TODO: MaxPixelsPerEdgeMultipler should match rasterization + LOD bias (ie: FDeferredShadingSceneRenderer::RenderNanite(...))
float LODScaleFactor = FMath::Exp2(-CVarNaniteRayTracingStreamingLodBias.GetValueOnRenderThread());
float LODScaleFactorOffscreen = FMath::Exp2(-CVarNaniteRayTracingStreamingOffscreenLodBias.GetValueOnRenderThread());
LODScaleFactor *= Nanite::GStreamingManager.GetQualityScaleFactor();
LODScaleFactorOffscreen *= Nanite::GStreamingManager.GetQualityScaleFactor();
float MaxPixelsPerEdgeMultipler = 1.0f / LODScaleFactor;
float MaxPixelsPerEdgeMultiplerOffscreen = 1.0f / LODScaleFactorOffscreen;
FRDGUploadData<Nanite::FPackedView> PackedViews(GraphBuilder, Views.Num() * 2);
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ++ViewIndex)
{
PackedViews[ViewIndex * 2 + 0] = Nanite::CreatePackedViewFromViewInfo(
Views[ViewIndex],
RasterTextureSize,
NANITE_VIEW_FLAG_NEAR_CLIP, // TODO: HZB test
/* StreamingPriorityCategory = */ 3, // TODO
/* MinBoundsRadius = */ 0.0f,
MaxPixelsPerEdgeMultipler,
nullptr // TODO: HZB test
);
PackedViews[ViewIndex * 2 + 1] = Nanite::CreatePackedViewFromViewInfo(
Views[ViewIndex],
RasterTextureSize,
NANITE_VIEW_FLAG_NEAR_CLIP,
/* StreamingPriorityCategory = */ 0, // TODO
/* MinBoundsRadius = */ 0.0f,
MaxPixelsPerEdgeMultiplerOffscreen,
nullptr
);
}
FRDGBufferRef ViewsBufferUpload = CreateStructuredBuffer(GraphBuilder, TEXT("NaniteRayTracing.Views"), PackedViews);
FRayTracingLoadBalancer LoadBalancer;
// TODO: move this to tasks
for (const FPrimitiveSceneInfo* SceneInfo : VisiblePrimitives)
{
const int32 InstanceSceneDataOffset = SceneInfo->GetInstanceSceneDataOffset();
const int32 NumInstanceSceneDataEntries = SceneInfo->GetNumInstanceSceneDataEntries();
if (NumInstanceSceneDataEntries > 0u)
{
LoadBalancer.Add(InstanceSceneDataOffset, NumInstanceSceneDataEntries, SceneInfo->GetPersistentIndex().Index);
}
}
FGlobalShaderMap* ShaderMap = GetGlobalShaderMap(GetFeatureLevel());
FRayTracingQueueParameters QueueParameters;
GetQueueParams(GraphBuilder, ShaderMap, QueueParameters);
FRDGBufferRef VertexAndIndexAllocatorBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 2), TEXT("NaniteStreamOut.VertexAndIndexAllocatorBuffer"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(VertexAndIndexAllocatorBuffer), 0);
AddPass_StreamingTraversal(GraphBuilder, ShaderMap, SceneUniformBuffer, GNaniteRayTracingStreamingOffscreenMinCutError, LoadBalancer, GraphBuilder.CreateSRV(ViewsBufferUpload), QueueParameters);
VisiblePrimitives.Empty();
}
BEGIN_SHADER_PARAMETER_STRUCT(FNaniteRayTracingPrimitivesParams, )
RDG_BUFFER_ACCESS(Buffer0, ERHIAccess::SRVCompute)
RDG_BUFFER_ACCESS(Buffer1, ERHIAccess::SRVCompute)
RDG_BUFFER_ACCESS(ScratchBuffer, ERHIAccess::UAVCompute)
END_SHADER_PARAMETER_STRUCT()
void FRayTracingManager::ProcessUpdateRequests(FRDGBuilder& GraphBuilder, FSceneUniformBuffer &SceneUniformBuffer)
{
// D3D12 limits resources to 2048MB.
GNaniteRayTracingMaxStagingBufferSizeMB = FMath::Min(GNaniteRayTracingMaxStagingBufferSizeMB, 2048);
if (GNaniteRayTracingForceUpdateVisible)
{
UpdateRequests.Append(VisibleGeometries);
GNaniteRayTracingForceUpdateVisible = false;
}
if (!GNaniteRayTracingUpdate || GetRayTracingMode() == ERayTracingMode::Fallback || bUpdating || UpdateRequests.IsEmpty())
{
VisibleGeometries.Empty();
// TODO: shrink staging buffer
return;
}
TSet<uint32> ToUpdate;
uint32 NumMeshDataEntries = 0;
uint32 NumAuxiliaryDataEntries = 0;
uint32 NumSegmentMappingEntries = 0;
const uint64 AuxiliaryEntrySize = GetAuxiliaryEntrySize();
for (uint32 GeometryId : VisibleGeometries)
{
if (UpdateRequests.Contains(GeometryId))
{
FInternalData& Data = *Geometries[GeometryId];
check(Data.NumResidentClustersUpdate > 0);
//check(Data.NumResidentClustersUpdate <= Data.NumClusters); // Temporary workaround: NumClusters from cooked data is not always correct for Geometry Collections: UE-194917
// TODO: Investigate a more conservative MaxNumTriangles for assemblies
const uint32 MaxNumTriangles = Data.bAssembly ? Data.NumTriangles : (Data.NumResidentClustersUpdate * NANITE_MAX_CLUSTER_TRIANGLES);
const uint64 MaxNumAuxiliaryDataEntries = CalculateAuxiliaryDataSizeInUints(MaxNumTriangles);
const uint64 NewNumAuxiliaryDataEntries = NumAuxiliaryDataEntries + MaxNumAuxiliaryDataEntries;
const uint64 NewAuxiliaryDataBufferSize = NewNumAuxiliaryDataEntries * AuxiliaryEntrySize;
#if !UE_BUILD_SHIPPING
StagingBufferSizeHighWaterMark = FMath::Max(StagingBufferSizeHighWaterMark, MaxNumAuxiliaryDataEntries * AuxiliaryEntrySize);
#endif
if (NewAuxiliaryDataBufferSize >= (uint64)GNaniteRayTracingMaxStagingBufferSizeMB * (1024ull * 1024ull))
{
break;
}
check(NewAuxiliaryDataBufferSize <= (1u << 31)); // D3D12 limits resources to 2048MB.
if (!GNaniteRayTracingProfileStreamOut) // don't remove request when profiling stream out
{
UpdateRequests.Remove(GeometryId);
}
ToUpdate.Add(GeometryId);
Data.NumResidentClusters = Data.NumResidentClustersUpdate;
check(!Data.bUpdating);
Data.bUpdating = true;
check(Data.BaseMeshDataOffset == -1);
Data.BaseMeshDataOffset = NumMeshDataEntries;
check(Data.StagingAuxiliaryDataOffset == INDEX_NONE);
Data.StagingAuxiliaryDataOffset = NumAuxiliaryDataEntries;
NumMeshDataEntries += (sizeof(FStreamOutMeshDataHeader) + sizeof(FStreamOutMeshDataSegment) * Data.NumSegments);
NumAuxiliaryDataEntries = NewNumAuxiliaryDataEntries;
NumSegmentMappingEntries += Data.SegmentMapping.Num();
}
}
VisibleGeometries.Empty();
if (ToUpdate.IsEmpty())
{
return;
}
RDG_EVENT_SCOPE(GraphBuilder, "Nanite::FRayTracingManager::ProcessUpdateRequests");
bUpdating = true;
FReadbackData& ReadbackData = ReadbackBuffers[ReadbackBuffersWriteIndex];
check(ReadbackData.Entries.IsEmpty());
// Upload geometry data
FRDGBufferRef RequestBuffer = nullptr;
FRDGBufferRef SegmentMappingBuffer = nullptr;
{
FRDGUploadData<FStreamOutRequest> UploadData(GraphBuilder, ToUpdate.Num());
FRDGUploadData<uint32> SegmentMappingUploadData(GraphBuilder, NumSegmentMappingEntries);
uint32 Index = 0;
uint32 SegmentMappingOffset = 0;
for (auto GeometryId : ToUpdate)
{
const FInternalData& Data = *Geometries[GeometryId];
FStreamOutRequest& Request = UploadData[Index];
Request.PrimitiveId = Data.PrimitiveId;
Request.NumMaterials = Data.NumMaterials;
Request.NumSegments = Data.NumSegments;
Request.SegmentMappingOffset = SegmentMappingOffset;
Request.AuxiliaryDataOffset = Data.StagingAuxiliaryDataOffset;
Request.MeshDataOffset = Data.BaseMeshDataOffset;
for (uint32 SegmentIndex : Data.SegmentMapping)
{
SegmentMappingUploadData[SegmentMappingOffset] = SegmentIndex;
++SegmentMappingOffset;
}
ReadbackData.Entries.Add(GeometryId);
++Index;
}
INC_DWORD_STAT_BY(STAT_NaniteRayTracingInFlightUpdates, ToUpdate.Num());
RequestBuffer = CreateStructuredBuffer(GraphBuilder, TEXT("NaniteRayTracing.RequestBuffer"), UploadData);
SegmentMappingBuffer = CreateStructuredBuffer(GraphBuilder, TEXT("NaniteRayTracing.SegmentMappingBuffer"), SegmentMappingUploadData);
}
FRDGBufferDesc MeshDataBufferDesc = FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), FMath::Max(NumMeshDataEntries, 32U));
MeshDataBufferDesc.Usage |= BUF_SourceCopy;
FRDGBufferRef MeshDataBuffer = GraphBuilder.CreateBuffer(MeshDataBufferDesc, TEXT("NaniteRayTracing.MeshDataBuffer"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(MeshDataBuffer), 0);
FRDGBufferRef StagingAuxiliaryDataBufferRDG;
{
const uint32 BufferNumAuxiliaryDataEntries = FMath::Max(NumAuxiliaryDataEntries, GMinAuxiliaryBufferEntries);
const bool bCopy = false;
StagingAuxiliaryDataBufferRDG = ResizeByteAddressBufferIfNeeded(GraphBuilder, StagingAuxiliaryDataBuffer, AuxiliaryEntrySize * BufferNumAuxiliaryDataEntries, TEXT("NaniteRayTracing.StagingAuxiliaryDataBuffer"), bCopy, EAllowShrinking::Yes);
SET_MEMORY_STAT(STAT_NaniteRayTracingStagingAuxiliaryDataBuffer, StagingAuxiliaryDataBufferRDG->GetSize());
}
FRDGBufferRef VertexBufferRDG = ResizeBufferIfNeeded(GraphBuilder, VertexBuffer, sizeof(float), GNaniteRayTracingMaxNumVertices * 3, TEXT("NaniteRayTracing.VertexBuffer"), /*bCopy*/ false, EAllowShrinking::Yes);
FRDGBufferRef IndexBufferRDG = ResizeBufferIfNeeded(GraphBuilder, IndexBuffer, sizeof(uint32), GNaniteRayTracingMaxNumIndices, TEXT("NaniteRayTracing.IndexBuffer"), /*bCopy*/ false, EAllowShrinking::Yes);
StreamOutData(
GraphBuilder,
GetGlobalShaderMap(GetFeatureLevel()),
SceneUniformBuffer,
GetCutError(),
ToUpdate.Num(),
RequestBuffer,
SegmentMappingBuffer,
MeshDataBuffer,
StagingAuxiliaryDataBufferRDG,
VertexBufferRDG,
GNaniteRayTracingMaxNumVertices,
IndexBufferRDG,
GNaniteRayTracingMaxNumIndices);
INC_DWORD_STAT_BY(STAT_NaniteRayTracingStreamOutRequests, ToUpdate.Num());
if (!GNaniteRayTracingProfileStreamOut)
{
// readback
{
AddReadbackBufferPass(GraphBuilder, RDG_EVENT_NAME("NaniteRayTracing::Readback"), MeshDataBuffer,
[MeshDataReadbackBuffer = ReadbackData.MeshDataReadbackBuffer, MeshDataBuffer](FRDGAsyncTask, FRHICommandList& RHICmdList)
{
MeshDataReadbackBuffer->EnqueueCopy(RHICmdList, MeshDataBuffer->GetRHI(), 0u);
});
ReadbackData.NumMeshDataEntries = NumMeshDataEntries;
ReadbackBuffersWriteIndex = (ReadbackBuffersWriteIndex + 1u) % MaxReadbackBuffers;
ReadbackBuffersNumPending = FMath::Min(ReadbackBuffersNumPending + 1u, MaxReadbackBuffers);
}
}
else
{
// if running profile mode, clear state for next frame
bUpdating = false;
for (auto GeometryId : ToUpdate)
{
FInternalData& Data = *Geometries[GeometryId];
Data.bUpdating = false;
Data.BaseMeshDataOffset = -1;
Data.StagingAuxiliaryDataOffset = INDEX_NONE;
}
ReadbackData.Entries.Empty();
}
ToUpdate.Empty();
}
void FRayTracingManager::Update()
{
const bool bUsingNaniteRayTracing = GetRayTracingMode() != ERayTracingMode::Fallback;
if (!bUsingNaniteRayTracing && !bUpdating)
{
StagingAuxiliaryDataBuffer.SafeRelease();
SET_MEMORY_STAT(STAT_NaniteRayTracingStagingAuxiliaryDataBuffer, 0);
VertexBuffer.SafeRelease();
IndexBuffer.SafeRelease();
#if !UE_BUILD_SHIPPING
NumVerticesHighWaterMark = 0;
NumIndicesHighWaterMark = 0;
StagingBufferSizeHighWaterMark = 0;
#endif
}
// process PendingRemoves
{
TSet<uint32> StillPendingRemoves;
for (uint32 GeometryId : PendingRemoves)
{
FInternalData* Data = Geometries[GeometryId];
if (Data->bUpdating)
{
// can't remove until update is finished, delay to next frame
StillPendingRemoves.Add(GeometryId);
}
else
{
if (Data->AuxiliaryDataOffset != INDEX_NONE)
{
AuxiliaryDataAllocator.Free(Data->AuxiliaryDataOffset, Data->AuxiliaryDataSize);
}
ResourceToRayTracingIdMap.Remove(Data->ResourceId);
Geometries.RemoveAt(GeometryId);
delete (Data);
}
}
Swap(PendingRemoves, StillPendingRemoves);
}
const uint32 PrevScheduledBuildsNumPrimitives = ScheduledBuildsNumPrimitives;
// scheduling pending builds
{
const uint32 PrevNumScheduled = ScheduledBuilds.Num();
for (const FPendingBuild& PendingBuild : PendingBuilds)
{
if (ScheduledBuildsNumPrimitives >= GNaniteRayTracingMaxBuiltPrimitivesPerFrame)
{
break;
}
FInternalData& Data = *Geometries[PendingBuild.GeometryId];
Data.RayTracingGeometryRHI = PendingBuild.RayTracingGeometryRHI;
const FRayTracingGeometryInitializer& Initializer = Data.RayTracingGeometryRHI->GetInitializer();
ScheduledBuildsNumPrimitives += Initializer.TotalPrimitiveCount;
if (Data.AuxiliaryDataOffset != INDEX_NONE)
{
AuxiliaryDataAllocator.Free(Data.AuxiliaryDataOffset, Data.AuxiliaryDataSize);
}
Data.AuxiliaryDataSize = Initializer.TotalPrimitiveCount;
Data.AuxiliaryDataOffset = AuxiliaryDataAllocator.Allocate(Data.AuxiliaryDataSize);
for (auto& Primitive : Data.Primitives)
{
if (bUsingNaniteRayTracing)
{
Primitive->SetCachedRayTracingInstanceGeometryRHI(Data.RayTracingGeometryRHI, Data.NumSegments);
}
auto NaniteProxy = static_cast<Nanite::FSceneProxyBase*>(Primitive->Proxy);
NaniteProxy->SetRayTracingDataOffset(Data.AuxiliaryDataOffset);
Primitive->Scene->GPUScene.AddPrimitiveToUpdate(Primitive->GetPersistentIndex(), EPrimitiveDirtyState::ChangedOther);
}
ScheduledBuilds.Add(PendingBuild.GeometryId);
}
// not using RemoveAtSwap to avoid starving requests in the middle
// not expecting significant number of elements remaining anyway
PendingBuilds.RemoveAt(0, ScheduledBuilds.Num() - PrevNumScheduled);
DEC_DWORD_STAT_BY(STAT_NaniteRayTracingPendingBuilds, ScheduledBuilds.Num() - PrevNumScheduled);
}
while (ReadbackBuffersNumPending > 0)
{
uint32 Index = (ReadbackBuffersWriteIndex + MaxReadbackBuffers - ReadbackBuffersNumPending) % MaxReadbackBuffers;
FReadbackData& ReadbackData = ReadbackBuffers[Index];
if (ReadbackData.MeshDataReadbackBuffer->IsReady())
{
ReadbackBuffersNumPending--;
auto MeshDataReadbackBufferPtr = (const uint32*)ReadbackData.MeshDataReadbackBuffer->Lock(ReadbackData.NumMeshDataEntries * sizeof(uint32));
for (int32 GeometryIndex = 0; GeometryIndex < ReadbackData.Entries.Num(); ++GeometryIndex)
{
uint32 GeometryId = ReadbackData.Entries[GeometryIndex];
FInternalData& Data = *Geometries[GeometryId];
auto Header = (const FStreamOutMeshDataHeader*)(MeshDataReadbackBufferPtr + Data.BaseMeshDataOffset);
auto Segments = (const FStreamOutMeshDataSegment*)(Header + 1);
if (!Data.bAssembly)
{
check(Header->NumClusters <= Data.NumResidentClusters);
}
const uint32 VertexBufferOffset = Header->VertexBufferOffset;
const uint32 IndexBufferOffset = Header->IndexBufferOffset;
const uint32 NumVertices = Header->NumVertices;
if (VertexBufferOffset == 0xFFFFFFFFu || IndexBufferOffset == 0xFFFFFFFFu)
{
// ran out of space in StreamOut buffers
Data.bUpdating = false;
Data.BaseMeshDataOffset = -1;
check(Data.StagingAuxiliaryDataOffset != INDEX_NONE);
Data.StagingAuxiliaryDataOffset = INDEX_NONE;
UpdateRequests.Add(GeometryId); // request update again
DEC_DWORD_STAT_BY(STAT_NaniteRayTracingInFlightUpdates, 1);
INC_DWORD_STAT_BY(STAT_NaniteRayTracingFailedStreamOutRequests, 1);
#if !UE_BUILD_SHIPPING
NumVerticesHighWaterMark = FMath::Max(NumVerticesHighWaterMark, (int32)Header->NumVertices);
NumIndicesHighWaterMark = FMath::Max(NumIndicesHighWaterMark, (int32)Header->NumIndices);
#endif
continue;
}
FRayTracingGeometryInitializer Initializer;
Initializer.DebugName = Data.DebugName;
// Initializer.bFastBuild = false;
// Initializer.bAllowUpdate = false;
Initializer.bAllowCompaction = false;
Initializer.IndexBuffer = IndexBuffer->GetRHI();
Initializer.IndexBufferOffset = IndexBufferOffset * sizeof(uint32);
Initializer.TotalPrimitiveCount = 0;
Initializer.Segments.SetNum(Data.NumSegments);
for (uint32 SegmentIndex = 0; SegmentIndex < Data.NumSegments; ++SegmentIndex)
{
const uint32 NumIndices = Segments[SegmentIndex].NumIndices;
const uint32 FirstIndex = Segments[SegmentIndex].FirstIndex;
FRayTracingGeometrySegment& Segment = Initializer.Segments[SegmentIndex];
Segment.FirstPrimitive = FirstIndex / 3;
Segment.NumPrimitives = NumIndices / 3;
Segment.VertexBuffer = VertexBuffer->GetRHI();
Segment.VertexBufferOffset = VertexBufferOffset * sizeof(FVector3f);
Segment.MaxVertices = NumVertices;
Initializer.TotalPrimitiveCount += Segment.NumPrimitives;
}
FRayTracingGeometryRHIRef RayTracingGeometryRHI = RHICreateRayTracingGeometry(Initializer);
if (ScheduledBuildsNumPrimitives < GNaniteRayTracingMaxBuiltPrimitivesPerFrame)
{
ScheduledBuildsNumPrimitives += RayTracingGeometryRHI->GetInitializer().TotalPrimitiveCount;
Data.RayTracingGeometryRHI = MoveTemp(RayTracingGeometryRHI);
if (Data.AuxiliaryDataOffset != INDEX_NONE)
{
AuxiliaryDataAllocator.Free(Data.AuxiliaryDataOffset, Data.AuxiliaryDataSize);
}
// allocate persistent auxiliary range
Data.AuxiliaryDataSize = CalculateAuxiliaryDataSizeInUints(Initializer.TotalPrimitiveCount);
Data.AuxiliaryDataOffset = AuxiliaryDataAllocator.Allocate(Data.AuxiliaryDataSize);
for (auto& Primitive : Data.Primitives)
{
if (bUsingNaniteRayTracing)
{
Primitive->SetCachedRayTracingInstanceGeometryRHI(Data.RayTracingGeometryRHI, Data.NumSegments);
}
auto NaniteProxy = static_cast<Nanite::FSceneProxyBase*>(Primitive->Proxy);
NaniteProxy->SetRayTracingDataOffset(Data.AuxiliaryDataOffset);
Primitive->Scene->GPUScene.AddPrimitiveToUpdate(Primitive->GetPersistentIndex(), EPrimitiveDirtyState::ChangedOther);
}
ScheduledBuilds.Add(GeometryId);
}
else
{
FPendingBuild PendingBuild;
PendingBuild.GeometryId = GeometryId;
PendingBuild.RayTracingGeometryRHI = MoveTemp(RayTracingGeometryRHI);
PendingBuilds.Add(MoveTemp(PendingBuild));
INC_DWORD_STAT_BY(STAT_NaniteRayTracingPendingBuilds, 1);
}
}
ReadbackData.Entries.Empty();
ReadbackData.MeshDataReadbackBuffer->Unlock();
}
else
{
break;
}
}
INC_DWORD_STAT_BY(STAT_NaniteRayTracingScheduledBuildsNumPrimitives, ScheduledBuildsNumPrimitives - PrevScheduledBuildsNumPrimitives);
}
bool FRayTracingManager::ProcessBuildRequests(FRDGBuilder& GraphBuilder)
{
if (!bInitialized)
{
return false;
}
// resize AuxiliaryDataBuffer if necessary
FRDGBufferRef AuxiliaryDataBufferRDG;
{
uint32 MinAuxiliaryBufferEntries;
EAllowShrinking AllowShrinking;
if (GetRayTracingMode() == ERayTracingMode::Fallback)
{
// when not using Nanite Ray Tracing allow AuxiliaryDataBuffer to shrink to initial size
MinAuxiliaryBufferEntries = GDisabledMinAuxiliaryBufferEntries;
AllowShrinking = EAllowShrinking::Yes;
}
else
{
MinAuxiliaryBufferEntries = GMinAuxiliaryBufferEntries;
AllowShrinking = EAllowShrinking::No;
}
const uint32 NumAuxiliaryDataEntries = FMath::Max((uint32)AuxiliaryDataAllocator.GetMaxSize(), MinAuxiliaryBufferEntries);
AuxiliaryDataBufferRDG = ResizeByteAddressBufferIfNeeded(GraphBuilder, AuxiliaryDataBuffer, GetAuxiliaryEntrySize() * NumAuxiliaryDataEntries, TEXT("NaniteRayTracing.AuxiliaryDataBuffer"), /*bCopy*/ true, AllowShrinking);
SET_MEMORY_STAT(STAT_NaniteRayTracingAuxiliaryDataBuffer, AuxiliaryDataBufferRDG->GetSize());
}
FRDGBufferRef StagingAuxiliaryDataBufferRDG = ScheduledBuilds.IsEmpty() ? nullptr : GraphBuilder.RegisterExternalBuffer(StagingAuxiliaryDataBuffer);
TArray<FRayTracingGeometryBuildParams> BuildParams;
uint32 BLASScratchSize = 0;
const uint32 AuxiliaryEntrySize = GetAuxiliaryEntrySize();
for (uint32 GeometryId : ScheduledBuilds)
{
FInternalData& Data = *Geometries[GeometryId];
const FRayTracingGeometryInitializer& Initializer = Data.RayTracingGeometryRHI->GetInitializer();
FRayTracingGeometryBuildParams Params;
Params.Geometry = Data.RayTracingGeometryRHI;
Params.BuildMode = EAccelerationStructureBuildMode::Build;
BuildParams.Add(Params);
FRayTracingAccelerationStructureSize SizeInfo = RHICalcRayTracingGeometrySize(Initializer);
BLASScratchSize = Align(BLASScratchSize + SizeInfo.BuildScratchSize, GRHIRayTracingScratchBufferAlignment);
Data.bUpdating = false;
Data.BaseMeshDataOffset = -1;
DEC_DWORD_STAT_BY(STAT_NaniteRayTracingInFlightUpdates, 1);
// copy from staging to persistent auxiliary data buffer
AddCopyBufferPass(GraphBuilder, AuxiliaryDataBufferRDG, Data.AuxiliaryDataOffset * AuxiliaryEntrySize, StagingAuxiliaryDataBufferRDG, Data.StagingAuxiliaryDataOffset * AuxiliaryEntrySize, Data.AuxiliaryDataSize * AuxiliaryEntrySize);
Data.StagingAuxiliaryDataOffset = INDEX_NONE;
}
const uint32 BLASScratchSizeMultiple = FMath::Max(GNaniteRayTracingBLASScratchSizeMultipleMB, 1) * 1024 * 1024;
BLASScratchSize = FMath::DivideAndRoundUp(BLASScratchSize, BLASScratchSizeMultiple) * BLASScratchSizeMultiple;
INC_DWORD_STAT_BY(STAT_NaniteRayTracingScheduledBuilds, ScheduledBuilds.Num());
ScheduledBuilds.Empty();
ScheduledBuildsNumPrimitives = 0;
bool bAnyBlasRebuilt = false;
if (BuildParams.Num() > 0)
{
RDG_EVENT_SCOPE_STAT(GraphBuilder, RebuildNaniteBLAS, "RebuildNaniteBLAS");
RDG_GPU_STAT_SCOPE(GraphBuilder, RebuildNaniteBLAS);
FRDGBufferDesc ScratchBufferDesc;
ScratchBufferDesc.Usage = EBufferUsageFlags::RayTracingScratch | EBufferUsageFlags::StructuredBuffer;
ScratchBufferDesc.BytesPerElement = GRHIRayTracingScratchBufferAlignment;
ScratchBufferDesc.NumElements = FMath::DivideAndRoundUp(BLASScratchSize, GRHIRayTracingScratchBufferAlignment);
FRDGBufferRef ScratchBuffer = GraphBuilder.CreateBuffer(ScratchBufferDesc, TEXT("NaniteRayTracing.BLASSharedScratchBuffer"));
FNaniteRayTracingPrimitivesParams* PassParams = GraphBuilder.AllocParameters<FNaniteRayTracingPrimitivesParams>();
PassParams->Buffer0 = nullptr;
PassParams->Buffer1 = nullptr;
PassParams->ScratchBuffer = ScratchBuffer;
GraphBuilder.AddPass(RDG_EVENT_NAME("NaniteRayTracing::UpdateBLASes"), PassParams, ERDGPassFlags::Compute | ERDGPassFlags::NeverCull,
[PassParams, BuildParams = MoveTemp(BuildParams)](FRDGAsyncTask, FRHIComputeCommandList& RHICmdList)
{
FRHIBufferRange ScratchBufferRange;
ScratchBufferRange.Buffer = PassParams->ScratchBuffer->GetRHI();
ScratchBufferRange.Offset = 0;
RHICmdList.BuildAccelerationStructures(BuildParams, ScratchBufferRange);
});
bAnyBlasRebuilt = true;
}
if (ReadbackBuffersNumPending == 0 && PendingBuilds.IsEmpty())
{
bUpdating = false;
}
return bAnyBlasRebuilt;
}
FRHIRayTracingGeometry* FRayTracingManager::GetRayTracingGeometry(FPrimitiveSceneInfo* SceneInfo) const
{
auto NaniteProxy = static_cast<const Nanite::FSceneProxyBase*>(SceneInfo->Proxy);
const uint32 Id = NaniteProxy->GetRayTracingId();
if (Id == INDEX_NONE)
{
return nullptr;
}
const FInternalData* Data = Geometries[Id];
return Data->RayTracingGeometryRHI;
}
bool FRayTracingManager::CheckModeChanged()
{
bPrevMode = bCurrentMode;
bCurrentMode = GetRayTracingMode();
return bPrevMode != bCurrentMode;
}
float FRayTracingManager::GetCutError() const
{
return GNaniteRayTracingCutError;
}
void FRayTracingManager::EndFrame()
{
// clear RDG resources since they can't be reused over multiple frames
UniformBuffer = nullptr;
}
void FRayTracingManager::UpdateUniformBuffer(FRDGBuilder& GraphBuilder, bool bShouldRenderNanite)
{
FNaniteRayTracingUniformParameters* Parameters = GraphBuilder.AllocParameters<FNaniteRayTracingUniformParameters>();
if (bShouldRenderNanite && bCurrentMode != ERayTracingMode::Fallback)
{
Parameters->PageConstants.X = 0;
Parameters->PageConstants.Y = Nanite::GStreamingManager.GetMaxStreamingPages();
Parameters->MaxNodes = Nanite::FGlobalResources::GetMaxNodes();
Parameters->ClusterPageData = Nanite::GStreamingManager.GetClusterPageDataSRV(GraphBuilder);
Parameters->HierarchyBuffer = Nanite::GStreamingManager.GetHierarchySRV(GraphBuilder);
Parameters->RayTracingDataBuffer = Nanite::GRayTracingManager.GetAuxiliaryDataSRV(GraphBuilder);
}
else
{
Parameters->PageConstants.X = 0;
Parameters->PageConstants.Y = 0;
Parameters->MaxNodes = 0;
Parameters->ClusterPageData = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultByteAddressBuffer(GraphBuilder, 4u));
Parameters->HierarchyBuffer = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultByteAddressBuffer(GraphBuilder, 4u));
Parameters->RayTracingDataBuffer = GraphBuilder.CreateSRV(GSystemTextures.GetDefaultByteAddressBuffer(GraphBuilder, 8u));
}
UniformBuffer = GraphBuilder.CreateUniformBuffer(Parameters);
}
TGlobalResource<FRayTracingManager> GRayTracingManager;
} // namespace Nanite
#endif // RHI_RAYTRACING