// Copyright Epic Games, Inc. All Rights Reserved. #include "TaskDispatcherSolver.h" #include "PhysicsProxy/ClusterUnionPhysicsProxy.h" #include "PhysicsProxy/GeometryCollectionPhysicsProxy.h" namespace Chaos::Private { void FTaskDispatcherSolver::Initialize(TParticleView& DirtyParticles) { using TSOA = typename TParticleView::TSOA; using THandle = typename TSOA::THandleType; using THandleBase = typename THandle::THandleBase; using TTransientHandle = typename THandle::TTransientHandle; int32 DispatchBatchIndex = 0; // Precompute NumRigidBatches NumRigidBatches = 0; for (int32 ViewIndex = 0; ViewIndex < DirtyParticles.SOAViews.Num(); ++ViewIndex) { const TSOAView& SOAView = DirtyParticles.SOAViews[ViewIndex]; const int32 NumParticles = SOAView.Size(); if (NumParticles == 0) { continue; } constexpr int32 MinRigidPerTask = 50; const int32 NumTasks = FMath::Max(FMath::Min(FTaskGraphInterface::Get().GetNumWorkerThreads() - 1, Chaos::MaxNumWorkers), 1); const int32 NumRigidTask = FMath::Max(FMath::Min(NumTasks, NumParticles), 1); const int32 DirtyByTask = FMath::Max(FMath::DivideAndRoundUp(NumParticles, NumRigidTask), MinRigidPerTask); NumRigidBatches += FMath::DivideAndRoundUp(NumParticles, DirtyByTask); } DirtyRigidPendingTasks.Reset(NumRigidBatches); DirtyGCAndClusterPendingTask = UE::Tasks::FTask(); ActiveRigidArray.SetNum(NumRigidBatches); ActiveGC.Reset(); ActiveClusterUnions.Reset(); PullDataDirtyRigidsArray.Reset(); PreviousCopyRigidTask = UE::Tasks::FTask(); } void FTaskDispatcherSolver::BufferRigidResults(FPullPhysicsData* PullData) { // Num Batch should be the same for the three arrays check(ActiveRigidArray.Num() == NumRigidBatches); check(ActiveRigidArray.Num() == DirtyRigidPendingTasks.Num()); PullDataDirtyRigidsArray.SetNum(NumRigidBatches); //we only fill this once per frame ensure(PullData->DirtyRigids.Num() == 0); for (int32 BatchIndex = 0; BatchIndex < NumRigidBatches; BatchIndex++) { TArray& PullDataDirtyRigids = PullDataDirtyRigidsArray[BatchIndex]; PullDataDirtyRigids.Reset(); TArray& ActiveRigid = ActiveRigidArray[BatchIndex]; UE::Tasks::FTask RigidPendingTask = UE::Tasks::Launch(UE_SOURCE_LOCATION, [&ActiveRigid, &PullDataDirtyRigids]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferRigidResultsTasks); for (int32 Idx = 0; Idx < ActiveRigid.Num(); ++Idx) { PullDataDirtyRigids.AddDefaulted(); ActiveRigid[Idx]->BufferPhysicsResults(PullDataDirtyRigids.Last()); } }, DirtyRigidPendingTasks[BatchIndex]); // Link the following task with the one from the previous iteration to keep determinism UE::Tasks::FTask RigidCopyPendingTask = UE::Tasks::Launch(UE_SOURCE_LOCATION, [PullData, &PullDataDirtyRigids]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferRigidResultsCopyTasks); PullData->DirtyRigids.Append(PullDataDirtyRigids); }, TArray{RigidPendingTask, PreviousCopyRigidTask}); PreviousCopyRigidTask = RigidCopyPendingTask; } } void FTaskDispatcherSolver::BufferGCResults(FPullPhysicsData* PullData, FPBDRigidsSolver* RigidsSolver) { GCPendingTask = UE::Tasks::Launch(UE_SOURCE_LOCATION, [this, PullData, RigidsSolver]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferGCResultsTask); // This ActiveGC cannot be read while the last DirtyGCAndClusterPendingTask is not finished // that why we need to have this code in a dependent task int32 NumGC = ActiveGC.Num(); constexpr int32 MinGCPerTask = 1; const int32 NumTasks = FMath::Max(FMath::Min(FTaskGraphInterface::Get().GetNumWorkerThreads() - 1, Chaos::MaxNumWorkers), 1); const int32 NumGCTask = FMath::Max(FMath::Min(NumTasks, NumGC), 1); const int32 GCByTask = FMath::Max(FMath::DivideAndRoundUp(NumGC, NumGCTask), MinGCPerTask); const int32 NumGCBatches = FMath::DivideAndRoundUp(NumGC, GCByTask); //we only fill this once per frame ensure(PullData->DirtyGeometryCollections.Num() == 0); PullData->DirtyGeometryCollections.SetNum(NumGC); GCBatchTasks.SetNum(NumGCBatches); for (int32 BatchIndex = 0; BatchIndex < NumGCBatches; BatchIndex++) { const int32 StartIndex = BatchIndex * GCByTask; int32 EndIndex = (BatchIndex + 1) * GCByTask; EndIndex = FMath::Min(NumGC, EndIndex); GCBatchTasks[BatchIndex] = UE::Tasks::Launch(UE_SOURCE_LOCATION, [StartIndex, EndIndex, RigidsSolver, &ActiveGC = ActiveGC, &DirtyGeometryCollections = PullData->DirtyGeometryCollections]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferGCResultsTasks); for (int32 Idx = StartIndex; Idx < EndIndex; ++Idx) { ActiveGC[Idx]->BufferPhysicsResults_Internal(RigidsSolver, DirtyGeometryCollections[Idx]); } }); } }, DirtyGCAndClusterPendingTask); } void FTaskDispatcherSolver::BufferClusterResults(FPullPhysicsData* PullData) { ClusterPendingTask = UE::Tasks::Launch(UE_SOURCE_LOCATION, [this, PullData]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferClusterResultsTask); // This value cannot be read while the last DirtyGCAndClusterPendingTask is not finished // that why we need to have this code in a dependent task int32 NumClusters = ActiveClusterUnions.Num(); constexpr int32 MinClusterPerTask = 1; const int32 NumTasks = FMath::Max(FMath::Min(FTaskGraphInterface::Get().GetNumWorkerThreads() - 1, Chaos::MaxNumWorkers), 1); const int32 NumClusterTask = FMath::Max(FMath::Min(NumTasks, NumClusters), 1); const int32 ClusterByTask = FMath::Max(FMath::DivideAndRoundUp(NumClusters, NumClusterTask), MinClusterPerTask); const int32 NumClusterBatches = FMath::DivideAndRoundUp(NumClusters, ClusterByTask); //we only fill this once per frame ensure(PullData->DirtyClusterUnions.Num() == 0); PullData->DirtyClusterUnions.SetNum(NumClusters); ClusterBatchTasks.SetNum(NumClusterBatches); for (int32 BatchIndex = 0; BatchIndex < NumClusterBatches; BatchIndex++) { const int32 StartIndex = BatchIndex * ClusterByTask; int32 EndIndex = (BatchIndex + 1) * ClusterByTask; EndIndex = FMath::Min(NumClusters, EndIndex); ClusterBatchTasks[BatchIndex] = UE::Tasks::Launch(UE_SOURCE_LOCATION, [StartIndex, EndIndex, &ActiveClusterUnions = ActiveClusterUnions, &DirtyClusterUnions = PullData->DirtyClusterUnions]() { QUICK_SCOPE_CYCLE_COUNTER(STAT_BufferClusterResultsTasks); for (int32 Idx = StartIndex; Idx < EndIndex; ++Idx) { ActiveClusterUnions[Idx]->BufferPhysicsResults_Internal(DirtyClusterUnions[Idx]); } }, GCBatchTasks); } }, GCPendingTask); } void FTaskDispatcherSolver::WaitTaskEndBufferResults() const { check(ActiveRigidArray.Num() == NumRigidBatches); check(ActiveRigidArray.Num() == DirtyRigidPendingTasks.Num()); // First we need to wait for the outer task which will create inner task UE::Tasks::Wait(TArray{GCPendingTask}); // We are creating a dependency between GC and CU threads to be sure if any dependency is created // a CU and its GC the program doesn't create any race condition UE::Tasks::Wait(TArray{ClusterPendingTask}); // Only when those outer tasks are finished and so those inner task created we can wait for those inner tasks TArray TaskToWaitOn({ PreviousCopyRigidTask }); TaskToWaitOn.Append(GCBatchTasks); TaskToWaitOn.Append(ClusterBatchTasks); UE::Tasks::Wait(TaskToWaitOn); } }