Apply partitioner optimizations to all code paths

hkaiser · hkaiser · commit 9f02f2ac597d · 2025-12-15T12:50:35.000-06:00
Signed-off-by: Hartmut Kaiser &lt;hartmut.kaiser@gmail.com&gt;
diff --git a/libs/core/algorithms/include/hpx/parallel/util/partitioner.hpp b/libs/core/algorithms/include/hpx/parallel/util/partitioner.hpp
@@ -17,6 +17,7 @@
 #include <hpx/modules/execution.hpp>
 #include <hpx/modules/execution_base.hpp>
 #include <hpx/modules/iterator_support.hpp>
+#include <hpx/modules/pack_traversal.hpp>
 #include <hpx/modules/type_support.hpp>
 #include <hpx/parallel/util/detail/chunk_size.hpp>
 #include <hpx/parallel/util/detail/handle_local_exceptions.hpp>
@@ -69,9 +70,52 @@ namespace hpx::parallel::util::detail {
             auto&& shape =
                 detail::get_bulk_iteration_shape(policy, it_or_r, count, cores);
 
-            return execution::bulk_async_execute(policy.executor(),
-                partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
-                HPX_MOVE(shape));
+            using executor_type = decltype(policy.executor());
+
+            // We attempt to perform some optimizations in case of non-task
+            // execution.
+            if constexpr (!hpx::is_async_execution_policy_v<ExPolicy> &&
+                !hpx::execution_policy_has_scheduler_executor_v<ExPolicy>)
+            {
+                // Switch to sequential execution for one-core, one-chunk case
+                // if the executor supports it.
+                if constexpr (hpx::traits::is_one_way_executor_v<executor_type>)
+                {
+                    if (cores == 1 && std::size(shape) == 1)
+                    {
+                        return execution::sync_execute(policy.executor(),
+                            partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                            *std::begin(HPX_MOVE(shape)));
+                    }
+                }
+
+                if constexpr (hpx::traits::is_bulk_one_way_executor_v<
+                                  executor_type>)
+                {
+                    return execution::bulk_sync_execute(policy.executor(),
+                        partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                        HPX_MOVE(shape));
+                }
+
+                // Fall back if given executor doesn't support any of the above
+                // optimizations.
+                auto&& items = execution::bulk_async_execute(policy.executor(),
+                    partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                    HPX_MOVE(shape));
+                if (hpx::wait_all_nothrow(items))
+                {
+                    using handle_local_exceptions =
+                        detail::handle_local_exceptions<ExPolicy>;
+                    handle_local_exceptions::call(items);
+                }
+                return hpx::unwrap(items);
+            }
+            else
+            {
+                return execution::bulk_async_execute(policy.executor(),
+                    partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                    HPX_MOVE(shape));
+            }
         }
         else
         {
@@ -122,9 +166,52 @@ namespace hpx::parallel::util::detail {
             auto&& shape = detail::get_bulk_iteration_shape_idx(
                 policy, first, count, cores, stride);
 
-            return execution::bulk_async_execute(policy.executor(),
-                partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
-                HPX_MOVE(shape));
+            using executor_type = decltype(policy.executor());
+
+            // We attempt to perform some optimizations in case of non-task
+            // execution.
+            if constexpr (!hpx::is_async_execution_policy_v<ExPolicy> &&
+                !hpx::execution_policy_has_scheduler_executor_v<ExPolicy>)
+            {
+                // Switch to sequential execution for one-core, one-chunk case
+                // if the executor supports it.
+                if constexpr (hpx::traits::is_one_way_executor_v<executor_type>)
+                {
+                    if (cores == 1 && std::size(shape) == 1)
+                    {
+                        return execution::sync_execute(policy.executor(),
+                            partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                            *std::begin(HPX_MOVE(shape)));
+                    }
+                }
+
+                if constexpr (hpx::traits::is_bulk_one_way_executor_v<
+                                  executor_type>)
+                {
+                    return execution::bulk_sync_execute(policy.executor(),
+                        partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                        HPX_MOVE(shape));
+                }
+
+                // Fall back if given executor doesn't support any of the above
+                // optimizations.
+                auto&& items = execution::bulk_async_execute(policy.executor(),
+                    partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                    HPX_MOVE(shape));
+                if (hpx::wait_all_nothrow(items))
+                {
+                    using handle_local_exceptions =
+                        detail::handle_local_exceptions<ExPolicy>;
+                    handle_local_exceptions::call(items);
+                }
+                return hpx::unwrap(items);
+            }
+            else
+            {
+                return execution::bulk_async_execute(policy.executor(),
+                    partitioner_iteration<Result, F>{HPX_FORWARD(F, f)},
+                    HPX_MOVE(shape));
+            }
         }
         else
         {
@@ -207,13 +294,26 @@ namespace hpx::parallel::util::detail {
 
             try
             {
-                auto&& items =
+                if constexpr (std::is_void_v<decltype(detail::partition<Result>(
+                                  policy, first, count, f1))>)
+                {
                     detail::partition<Result>(HPX_FORWARD(ExPolicy_, policy),
                         first, count, HPX_FORWARD(F1, f1));
 
-                scoped_params.mark_end_of_scheduling();
+                    scoped_params.mark_end_of_scheduling();
 
-                return reduce(HPX_MOVE(items), HPX_FORWARD(F2, f2));
+                    return HPX_INVOKE(f2);
+                }
+                else
+                {
+                    auto&& items = detail::partition<Result>(
+                        HPX_FORWARD(ExPolicy_, policy), first, count,
+                        HPX_FORWARD(F1, f1));
+
+                    scoped_params.mark_end_of_scheduling();
+
+                    return reduce(HPX_MOVE(items), HPX_FORWARD(F2, f2));
+                }
             }
             catch (...)
             {
@@ -236,13 +336,28 @@ namespace hpx::parallel::util::detail {
 
             try
             {
-                auto&& items = detail::partition_with_index<Result>(
-                    HPX_FORWARD(ExPolicy_, policy), first, count, stride,
-                    HPX_FORWARD(F1, f1));
+                if constexpr (std::is_void_v<
+                                  decltype(detail::partition_with_index<Result>(
+                                      policy, first, count, stride, f1))>)
+                {
+                    detail::partition_with_index<Result>(
+                        HPX_FORWARD(ExPolicy_, policy), first, count, stride,
+                        HPX_FORWARD(F1, f1));
 
-                scoped_params.mark_end_of_scheduling();
+                    scoped_params.mark_end_of_scheduling();
 
-                return reduce(HPX_MOVE(items), HPX_FORWARD(F2, f2));
+                    return HPX_INVOKE(HPX_FORWARD(F2, f2));
+                }
+                else
+                {
+                    auto&& items = detail::partition_with_index<Result>(
+                        HPX_FORWARD(ExPolicy_, policy), first, count, stride,
+                        HPX_FORWARD(F1, f1));
+
+                    scoped_params.mark_end_of_scheduling();
+
+                    return reduce(HPX_MOVE(items), HPX_FORWARD(F2, f2));
+                }
             }
             catch (...)
             {
diff --git a/libs/core/algorithms/include/hpx/parallel/util/partitioner_with_cleanup.hpp b/libs/core/algorithms/include/hpx/parallel/util/partitioner_with_cleanup.hpp
@@ -66,7 +66,7 @@ namespace hpx::parallel::util {
 
                 try
                 {
-                    const bool has_scheduler_executor =
+                    constexpr bool has_scheduler_executor =
                         hpx::execution_policy_has_scheduler_executor_v<
                             ExPolicy_>;
 

Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,7 @@ namespace hpx::parallel::util {`
`66`	`66`
`67`	`67`	`try`
`68`	`68`	`{`
`69`		`- const bool has_scheduler_executor =`
	`69`	`+ constexpr bool has_scheduler_executor =`
`70`	`70`	`hpx::execution_policy_has_scheduler_executor_v<`
`71`	`71`	`ExPolicy_>;`
`72`	`72`