Files
UnrealEngine/Engine/Plugins/Experimental/NNERuntimeIREE/Source/IREEDriverRDG/Private/IREEDriverRDGBuiltinExecutables.cpp
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

125 lines
3.9 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "IREEDriverRDGBuiltinExecutables.h"
#ifdef WITH_IREE_DRIVER_RDG
#include "NNERuntimeIREEShaderFillBufferCS.h"
#include "RenderGraphBuilder.h"
#include "RenderGraphUtils.h"
DECLARE_GPU_STAT_NAMED(FDirectCommandBufferFillBuffer, TEXT("DirectCommandBuffer.AddFillBufferPass"));
namespace UE::IREE::HAL::RDG
{
namespace BuiltinExecutables
{
namespace Private
{
template <typename T>
uint32 ExpandToWord(T Pattern);
template<>
uint32 ExpandToWord<uint8>(uint8 Pattern)
{
return Pattern * 0x01010101;
}
template<>
uint32 ExpandToWord<uint16>(uint16 Pattern)
{
return Pattern * 0x00010001;
}
uint32 ExpandPatternToWord(uint32 Pattern, uint32 PatternLength)
{
return PatternLength == 1 ? ExpandToWord<uint8>(static_cast<uint8>(Pattern)):
PatternLength == 2 ? ExpandToWord<uint16>(static_cast<uint16>(Pattern)) : Pattern;
}
}
iree_status_t FillBuffer(uint8* Buffer, uint32 BufferLength, uint32 Pattern, uint32 PatternLength, uint32 FillOffset, uint32 FillLength)
{
check(Buffer);
check(BufferLength >= FillOffset + FillLength);
check(FillOffset % 4 == 0);
check(FillLength % 4 == 0);
const uint32 ExpandedPattern = Private::ExpandPatternToWord(Pattern, PatternLength);
const uint32 ExpandedLength = 4;
const uint32 NumRepetitions = FillLength / ExpandedLength;
for (uint32 i = 0; i < NumRepetitions; i++)
{
FPlatformMemory::Memcpy(Buffer + FillOffset + i * ExpandedLength, &ExpandedPattern, ExpandedLength);
}
return iree_ok_status();
}
iree_status_t AddFillBufferPass(FRDGBuilder& GraphBuilder, FRDGBufferRef RDGBuffer, uint32 Pattern, uint32 PatternLength, uint32 FillOffset, uint32 FillLength)
{
SCOPED_NAMED_EVENT_TEXT("BuiltinExecutables::AddFillBufferPass", FColor::Magenta);
using namespace UE::NNERuntimeIREEShader::Internal;
if (FillLength == 0)
{
return iree_ok_status();
}
check(RDGBuffer != FRDGBufferRef{});
check(RDGBuffer->Desc.GetSize() >= FillOffset + FillLength);
check(FillOffset % 4 == 0);
check(FillLength % 4 == 0);
const uint32 ExpandedPattern = Private::ExpandPatternToWord(Pattern, PatternLength);
const uint32 ExpandedLength = 4;
const uint32 NumThreads = FillLength / 4;
const uint64 NumThreadsPerDispatch = GRHIMaxDispatchThreadGroupsPerDimension.X * FFillBufferConstants::THREAD_GROUP_SIZE;
const uint32 NumDispatches = (uint32)FMath::DivideAndRoundUp((uint64)NumThreads, NumThreadsPerDispatch);
for (uint32 i = 0; i < NumDispatches; i++)
{
const uint32 DispatchNumThreads = (uint32)FMath::Min((uint64)NumThreads - i * NumThreadsPerDispatch, NumThreadsPerDispatch);
check(DispatchNumThreads > 0);
FFillBufferCS::FParameters* ShaderParameters = GraphBuilder.AllocParameters<FFillBufferCS::FParameters>();
ShaderParameters->TargetBuffer = GraphBuilder.CreateUAV(RDGBuffer);
ShaderParameters->Fill.X = ExpandedPattern;
ShaderParameters->Fill.Y = ExpandedLength;
ShaderParameters->Fill.Z = FillOffset + i * 4 * NumThreadsPerDispatch;
ShaderParameters->Fill.W = 4 * DispatchNumThreads;
const int32 ThreadGroupCountX = FMath::DivideAndRoundUp(DispatchNumThreads, FFillBufferConstants::THREAD_GROUP_SIZE);
const FIntVector GroupCount = FIntVector(ThreadGroupCountX, 1, 1);
FGlobalShaderMap* GlobalShaderMap = GetGlobalShaderMap(GMaxRHIFeatureLevel);
TShaderMapRef<FFillBufferCS> Shader(GlobalShaderMap);
RDG_EVENT_SCOPE_STAT(GraphBuilder, FDirectCommandBufferFillBuffer, "DirectCommandBuffer.AddFillBufferPass %d with %d Threads", i, ThreadGroupCountX * FFillBufferConstants::THREAD_GROUP_SIZE);
RDG_GPU_STAT_SCOPE(GraphBuilder, FDirectCommandBufferFillBuffer);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("DirectCommandBuffer.AddFillBufferPass"),
ERDGPassFlags::Compute | ERDGPassFlags::NeverCull,
Shader,
ShaderParameters,
GroupCount);
}
return iree_ok_status();
}
} // namespace BuiltinExecutables
} // UE::IREE
#endif // WITH_IREE_DRIVER_RDG