// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "HAL/Platform.h" #include "ProfilingDebugging/CpuProfilerTrace.h" THIRD_PARTY_INCLUDES_START PRAGMA_DISABLE_DEPRECATION_WARNINGS #include #include #include #include #include #include #include PRAGMA_ENABLE_DEPRECATION_WARNINGS THIRD_PARTY_INCLUDES_END /** * Wrappers for the tbb calls to allow for easier single threaded testing. * * The ProxyLOD system depends on a thirdparty library (openvdb) that requires * Intel's threaded building blocks (tbb). * * To maintain the parallelism model, the ProxyLOD system uses tbb for all internal threading. * This allows us to exploit the inherent load balancing provided by this compossible task-based * threading model, and share the underlying task-manager with the third-party code. * * NB: These methods are fully compossible. Meaning that nesting of parallel calls * (e.g. calling a Parallel_For within a Parallel_Reduce or any task group) is admissible. */ namespace ProxyLOD { /** * Splittable Range Types for parallel loops. * * NB: to satisfy load balancing needs, the parallel loop-based algorithms can split the range of an item * of work to make smaller tasks. */ typedef tbb::blocked_range FIntRange; typedef tbb::blocked_range FUIntRange; typedef tbb::blocked_range2d FIntRange2d; /** * Range-based parallel for that supports internal range splitting. The work may be split into multiple tasks. * * ------------------------------------------------------------------------------------------- * Example: Data transformation of an array. * * TArray SrcArray; * ... * // target for data transformation * TArray DstArray; * * // prep for parallel write * Resize(DstArray, SrcArray.Num); * ... * auto Functor = [&DstArray, &SrcArray](const FIntRange& Range)->void * { * for (int i = Range.start(), I = Range.end(); i < I; ++i ) * { * DstArray[i] = SomeOpperation( SrcArray[i] ); * } * }; * * Parallel_For(FIntRange(0, SrcArray.Num), Functor); * ------------------------------------------------------------------------------------------- * @param Range RangeType(start, end) describes the range of the for loop * @param Functor Functor with Functor(const RangeType& range) signature performs sub-section of the for loop. * @param bParallel Bool to control parallel vs single threaded (default true). * */ template void Parallel_For(const RangeType& Range, const FunctorType& Functor, const bool bParallel = true) { TRACE_CPUPROFILER_EVENT_SCOPE(ProxyLOD::Parallel_For) if (bParallel) // run in parallel { // Functor can be passed by reference since we wait until completion tbb::parallel_for(Range, [&Functor](const RangeType& Range) { // #TODO Investigate why Insights stops working when used //TRACE_CPUPROFILER_EVENT_SCOPE(ProxyLOD::Parallel_For) Functor(Range); }); } else // single threaded { Functor(Range); } } /** * Range-based reduction that supports internal range splitting. The work may be split into multiple tasks. * Requires a functor that works on a subset of data, and a reduction functor that merges the results of multiple subsets. * * ------------------------------------------------------------------------------------------- * Example: To sum all the numbers in an array of floats * * TArray MyArray; * ... * auto Functor = [&MyArray](const FIntRange& Range, float CurrentSum)->float * { * for (int i = Range.start(), I = Range.end(); i < I; ++i ) * { * CurrentSum += MyArray[i]; * } * return CurrentSum; * }; * * auto ReduceFunctor = [](float A, float B)->float { return A + B; } * * Sum = Parallel_Reduce(FIntRange(0, MyArray.Num), 0, Functor, ReduceFunctor); * ------------------------------------------------------------------------------------------- * * * @param Range RangeType(start, end) describes the range of the work * @param Functor Functor(const RangeType& range, ValueType InitialValue) signature performs sub-section of the work, returns ValueType * @param ReduceFunctor ReduceFunctor(ValueType& A, ValueType& B) signature , returns value type. * * @param bParallel Bool to control parallel vs single threaded (default true). * * @return Result of parallel reduce - of type ValueType. */ template ValueType Parallel_Reduce(const RangeType& Range, const ValueType& IdentityValue, const FunctorType& Functor, const ReduceFunctorType& ReduceFunctor, const bool bParallel = true) { TRACE_CPUPROFILER_EVENT_SCOPE(ProxyLOD::Parallel_Reduce) if (bParallel) { // Functor can be passed by reference since we wait until completion return tbb::parallel_reduce(Range, IdentityValue, [&Functor](const RangeType& Range, ValueType InitialValue) { // #TODO Investigate why Insights stops working when used //TRACE_CPUPROFILER_EVENT_SCOPE(ProxyLOD::Parallel_Reduce) return Functor(Range, InitialValue); }, ReduceFunctor); } else { return Functor(Range, IdentityValue); } } /** * Parallel Task Group - enqueues tasks to be run in parallel. * * This class can be constructed with FTrasGroup(false) to force single threaded behavior. * * NB: The syntax suggest launching (run) individual threads and joining (wait), but in fact this only enqueues task-based functors * in a task manager, and there is no guarantee the tasks will actually be executed on separate threads, or even in parallel. * */ class FTaskGroup { public: /** * A task group constructed with the default constructor will enqueue tasks in the parallel task manager */ FTaskGroup() {}; /** * Constructor determines if the task group will actually enqueue tasks in the parallel task manager, or * simply run them consecutively. * * @param Parallel - should be set to false for debugging to force single threaded behavior. */ FTaskGroup(bool Parallel) :bParallel(Parallel) {}; /** * Enqueue a functor in the task manager. * * NB: The functor will directly run if this task group was constructed with Parallel=false; * * @param Functor - task to run. */ template void Run(const FunctorType& Functor); /** * Run the functor on the current thread and wait for any other tasks enqueued by this task group to finish. * * NB: The functor will directly run if this task group was constructed with Parallel=false; * * @param Functor - task to run. */ template void RunAndWait(const FunctorType& Functor); /** * Wait for all tasks enqueued by this task group to finish. * * NB: No op if this task group was constructed with Parallel=false; */ void Wait() { if (bParallel) { TRACE_CPUPROFILER_EVENT_SCOPE(FTaskGroup::Wait) TBBTaskGroup.wait(); } } private: bool bParallel = true; tbb::task_group TBBTaskGroup; }; template void FTaskGroup::Run(const FunctorType& Functor) { TRACE_CPUPROFILER_EVENT_SCOPE(FTaskGroup::Run) if (bParallel) { // Functor must be passed by copy here since the function returns before completion TBBTaskGroup.run( [Functor]() { TRACE_CPUPROFILER_EVENT_SCOPE(FTaskGroup::Run) Functor(); } ); } else { Functor(); } } template void FTaskGroup::RunAndWait(const FunctorType& Functor) { TRACE_CPUPROFILER_EVENT_SCOPE(FTaskGroup::RunAndWait) if (bParallel) { // Functor can be passed by reference since we wait until completion TBBTaskGroup.run_and_wait( [&Functor]() { TRACE_CPUPROFILER_EVENT_SCOPE(FTaskGroup::RunAndWait) Functor(); }); } else { Functor(); } } }