Files
UnrealEngine/Engine/Plugins/FX/Niagara/Shaders/Private/NiagaraComputeFreeIDs.usf
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

126 lines
4.4 KiB
HLSL

// Copyright Epic Games, Inc. All Rights Reserved.
#include "/Engine/Private/Common.ush"
// ID mapping table produced by the simulate step.
Buffer<int> IDToIndexTable;
// Output list of free IDs.
RWBuffer<int> RWFreeIDList;
// Buffer containing the current size of each list that's currently being processed.
// Multiple invocations of this shader might be running concurrently, for independent emitters.
RWBuffer<int> RWFreeIDListSizes;
// Index of the free list that's being computed by this invocation. The current size of the
// list is at RWFreeIDListSizes[FreeIDListIndex].
uint FreeIDListIndex;
#if USE_WAVE_INTRINSICS
// This happens to be fixed on PS4 and XB1, but we'll need to configure it externally
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeFreeIDs(uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint InputStart = GroupId.x * WaveGetLaneCount();
uint Thread = GroupThreadId.x;
uint ParticleID = InputStart + Thread;
uint IsFree = (IDToIndexTable[ParticleID] == -1) ? 1 : 0;
// Count how many predicates are true across all lanes.
uint NumGroupFreeIDs = WaveActiveCountBits(IsFree);
// Compute a prefix sum on the predicate bit mask. This gives us the
// local write offset for each lane.
uint WriteOffset = WavePrefixCountBits(IsFree);
// Skip the whole wave if all the IDs are in use.
if(NumGroupFreeIDs > 0)
{
uint GroupWriteStart;
if (WaveIsFirstLane())
{
// Add to the global write offset. The previous offset is where we start writing in the output buffer.
// We can reduce the number of atomic adds by processing multiple sub-groups in the shader, such that each lane
// computes the offsets and counts of a sub-group. However, this doesn't seem to be the bottleneck right now,
// so it's not worth the effort.
InterlockedAdd(RWFreeIDListSizes[FreeIDListIndex], NumGroupFreeIDs, GroupWriteStart);
}
// Broadcast the global write offset to all lanes.
GroupWriteStart = WaveReadLaneFirst(GroupWriteStart);
if(IsFree)
{
RWFreeIDList[GroupWriteStart + WriteOffset] = ParticleID;
}
}
}
#else
// Temporary buffer where we compute the number of elements to write before each input element.
groupshared uint GroupWriteOffsets[2*THREADGROUP_SIZE];
// The offset where this thread group will start writing its output.
groupshared uint GroupWriteStart;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeFreeIDs(uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID, uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint InputStart = GroupId.x * THREADGROUP_SIZE;
uint Thread = GroupThreadId.x;
uint ParticleID = InputStart + Thread;
int OutBuffer = 0, InBuffer = 1;
// Evaluate predicate for each input slot, then shift down by one position because we need an exclusive sum.
if(Thread > 0)
{
uint IsFree = (IDToIndexTable[ParticleID - 1] == -1) ? 1 : 0;
GroupWriteOffsets[Thread] = IsFree;
}
else
{
GroupWriteOffsets[0] = 0;
}
GroupMemoryBarrierWithGroupSync();
// Compute prefix sum.
[unroll]
for(uint Offset = 1; Offset < THREADGROUP_SIZE; Offset = Offset << 1)
{
// Swap the input and output buffers.
OutBuffer = 1 - OutBuffer;
InBuffer = 1 - InBuffer;
if(Thread >= Offset)
{
GroupWriteOffsets[OutBuffer*THREADGROUP_SIZE + Thread] = GroupWriteOffsets[InBuffer*THREADGROUP_SIZE + Thread - Offset] + GroupWriteOffsets[InBuffer*THREADGROUP_SIZE + Thread];
}
else
{
GroupWriteOffsets[OutBuffer*THREADGROUP_SIZE + Thread] = GroupWriteOffsets[InBuffer*THREADGROUP_SIZE + Thread];
}
GroupMemoryBarrierWithGroupSync();
}
if(Thread == 0)
{
// Add the value of the last predicate to get the total number of IDs that this group will write.
uint LastIsFree = (IDToIndexTable[InputStart + THREADGROUP_SIZE - 1] == -1) ? 1 : 0;
uint NumGroupFreeIDs = GroupWriteOffsets[(OutBuffer + 1)*THREADGROUP_SIZE - 1] + LastIsFree;
// Add to the global write offset. The previous offset is where we start writing in the output buffer.
InterlockedAdd(RWFreeIDListSizes[FreeIDListIndex], NumGroupFreeIDs, GroupWriteStart);
}
GroupMemoryBarrierWithGroupSync();
if(IDToIndexTable[ParticleID] == -1)
{
// This is a free ID, output it.
uint WriteOffset = GroupWriteStart + GroupWriteOffsets[OutBuffer*THREADGROUP_SIZE + Thread];
// TODO: consider buffering the results before writing to the output UAV.
RWFreeIDList[WriteOffset] = ParticleID;
}
}
#endif