Skip to content

Commit 761fe3f

Browse files
committed
Fixing issues in the implementation of memoizing_range
- flyby: optimize implementation of some functions in topology - flyby: cache pu mask for parallel_executor Signed-off-by: Hartmut Kaiser <hartmut.kaiser@gmail.com>
1 parent 5c20e03 commit 761fe3f

11 files changed

Lines changed: 257 additions & 129 deletions

File tree

docs/sphinx/manual/executors.rst

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -403,13 +403,13 @@ Full example code
403403
// Bulk async_execute (used by parallel algorithms)
404404
template <typename F, typename Shape, typename... Ts>
405405
friend auto tag_invoke(hpx::parallel::execution::bulk_async_execute_t,
406-
simple_annotating_executor const& exec, F&& f, Shape const& shape,
406+
simple_annotating_executor const& exec, F&& f, Shape && shape,
407407
Ts&&... ts)
408408
{
409409
return hpx::parallel::execution::bulk_async_execute(
410410
exec.base_,
411411
hpx::annotated_function(std::forward<F>(f), exec.annotation_),
412-
shape, std::forward<Ts>(ts)...);
412+
std::forward<Shape>(shape), std::forward<Ts>(ts)...);
413413
}
414414
};
415415

@@ -418,19 +418,6 @@ Full example code
418418
// The annotating executor exposes the same executor categories as its
419419
// underlying (wrapped) executor.
420420

421-
template <typename BaseExecutor>
422-
struct is_never_blocking_one_way_executor<
423-
simple_annotating_executor<BaseExecutor>>
424-
: is_never_blocking_one_way_executor<BaseExecutor>
425-
{
426-
};
427-
428-
template <typename BaseExecutor>
429-
struct is_one_way_executor<simple_annotating_executor<BaseExecutor>>
430-
: is_one_way_executor<BaseExecutor>
431-
{
432-
};
433-
434421
template <typename BaseExecutor>
435422
struct is_two_way_executor<simple_annotating_executor<BaseExecutor>>
436423
: is_two_way_executor<BaseExecutor>
@@ -503,13 +490,13 @@ to integrate seamlessly with the |hpx| parallel algorithms infrastructure.
503490

504491
template <typename F, typename Shape, typename... Ts>
505492
friend auto tag_invoke(hpx::parallel::execution::bulk_async_execute_t,
506-
simple_annotating_executor const& exec, F&& f, Shape const& shape,
493+
simple_annotating_executor const& exec, F&& f, Shape && shape,
507494
Ts&&... ts)
508495
{
509496
return hpx::parallel::execution::bulk_async_execute(
510497
exec.base_,
511498
hpx::annotated_function(std::forward<F>(f), exec.annotation_),
512-
shape, std::forward<Ts>(ts)...);
499+
std::forward<Shape>(shape), std::forward<Ts>(ts)...);
513500
}
514501

515502
Note how we delegate the actual execution to `exec.base_`, the underlying

libs/core/algorithms/include/hpx/parallel/algorithms/merge.hpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -661,14 +661,30 @@ namespace hpx::parallel {
661661
using merge_region =
662662
hpx::tuple<Iter1, std::size_t, Iter2, std::size_t, std::size_t>;
663663

664+
#if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
665+
static hpx::util::itt::event notify_event("reshape");
666+
hpx::util::itt::mark_event e(notify_event);
667+
#endif
668+
664669
#if defined(HPX_HAVE_CXX20_COROUTINES)
665-
auto reshape = [](auto&& shape, auto&& args)
670+
auto reshape = [](auto&& shape, auto&& args,
671+
std::size_t const cores)
666672
-> hpx::generator<merge_region&, merge_region> {
667673
auto [len1, first2, last2, comp, proj1, proj2] = args;
674+
675+
if (std::size(shape) == 1 && cores == 1)
676+
{
677+
// special case: one core, one chunk - the partitioner will
678+
// switch to sequential execution in this case
679+
auto [it1, size1, _] = *std::begin(shape);
680+
auto size2 = detail::distance(first2, last2);
681+
merge_region region(it1, size1, first2, size2, 0);
682+
co_yield region;
683+
co_return;
684+
}
668685
#else
669686
auto reshape = [len1, first2, last2, comp, proj1, proj2](
670-
auto&& shape, std::size_t cores) {
671-
687+
auto&& shape, std::size_t const cores) {
672688
auto shape_size = std::size(shape);
673689
if (shape_size == 1 && cores == 1)
674690
{
@@ -684,11 +700,6 @@ namespace hpx::parallel {
684700
reshaped.reserve(2 * shape_size);
685701
#endif
686702

687-
#if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
688-
static hpx::util::itt::event notify_event("reshape");
689-
hpx::util::itt::mark_event e(notify_event);
690-
#endif
691-
692703
Iter2 it2 = first2;
693704
std::size_t dest_start = 0;
694705
for (auto [it1, size1, base] : shape)
@@ -784,9 +795,9 @@ namespace hpx::parallel {
784795
#if defined(HPX_HAVE_CXX20_COROUTINES)
785796
return [reshape = HPX_MOVE(reshape),
786797
data = hpx::make_tuple(len1, first2, last2, comp, proj1,
787-
proj2)](auto&& shape) {
798+
proj2)](auto&& shape, std::size_t cores) {
788799
return util::memoizing_range(
789-
reshape(shape, data), 2 * std::size(shape));
800+
reshape(shape, data, cores), 2 * std::size(shape));
790801
};
791802
#else
792803
return reshape;

libs/core/algorithms/include/hpx/parallel/util/foreach_partitioner.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ namespace hpx::parallel::util::detail {
170170
try
171171
{
172172
if constexpr (std::is_void_v<decltype(foreach_partition<Result>(
173-
policy, first, count, f1, reshape))>)
173+
HPX_FORWARD(ExPolicy_, policy), first, count,
174+
HPX_FORWARD(F1, f1),
175+
HPX_FORWARD(ReShape, reshape)))>)
174176
{
175177
detail::foreach_partition<Result>(
176178
HPX_FORWARD(ExPolicy_, policy), first, count,

0 commit comments

Comments
 (0)