Skip to content

Commit 1da76d8

Browse files
executors: fix parallel_executor caching and fork_join stacksize
- Remove mutable mask_ cache from parallel_policy_executor_base. Storing a mutable member in a constexpr execution_policy causes UB and compilation failures (specifically, reading the mutable member during constant destruction in C++20). The caching has been entirely removed to restore correctness. - Remove stale 'Must not be nostack' doc comments from fork_join_executor constructors, as stackless threads are now supported (yielding is simply skipped via allow_yielding).
1 parent acfa35a commit 1da76d8

2 files changed

Lines changed: 4 additions & 66 deletions

File tree

libs/core/executors/include/hpx/executors/fork_join_executor.hpp

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,24 +1250,6 @@ namespace hpx::execution::experimental {
12501250
HPX_FORWARD(F, f), HPX_FORWARD(Fs, fs)...);
12511251
}
12521252

1253-
template <typename Parameters>
1254-
requires(hpx::traits::is_executor_parameters_v<Parameters>)
1255-
friend std::size_t tag_invoke(
1256-
hpx::execution::experimental::processing_units_count_t,
1257-
Parameters&&, fork_join_executor const& exec,
1258-
hpx::chrono::steady_duration const& = hpx::chrono::null_duration,
1259-
std::size_t = 0)
1260-
{
1261-
return exec.shared_data_->num_threads_;
1262-
}
1263-
1264-
friend std::size_t tag_invoke(
1265-
hpx::execution::experimental::get_first_core_t,
1266-
fork_join_executor const& exec) noexcept
1267-
{
1268-
return hpx::threads::find_first(exec.shared_data_->pu_mask_);
1269-
}
1270-
12711253
public:
12721254
bool operator==(fork_join_executor const& rhs) const noexcept
12731255
{
@@ -1288,8 +1270,7 @@ namespace hpx::execution::experimental {
12881270
/// \brief Construct a fork_join_executor.
12891271
///
12901272
/// \param priority The priority of the worker threads.
1291-
/// \param stacksize The stacksize of the worker threads. Must not be
1292-
/// nostack.
1273+
/// \param stacksize The stacksize of the worker threads.
12931274
/// \param sched The loop schedule of the parallel regions.
12941275
/// \param yield_delay The time after which the executor yields to other
12951276
/// work if it has not received any new work for execution.
@@ -1301,15 +1282,6 @@ namespace hpx::execution::experimental {
13011282
std::chrono::nanoseconds yield_delay = std::chrono::microseconds(
13021283
300))
13031284
{
1304-
if (stacksize == threads::thread_stacksize::nostack)
1305-
{
1306-
HPX_THROW_EXCEPTION(hpx::error::bad_parameter,
1307-
"fork_join_executor::fork_join_executor",
1308-
"The fork_join_executor does not support using "
1309-
"thread_stacksize::nostack as the stacksize (stackful "
1310-
"threads are required to yield correctly when idle)");
1311-
}
1312-
13131285
shared_data_ = std::make_shared<shared_data>(
13141286
priority, stacksize, sched, yield_delay);
13151287
}
@@ -1318,8 +1290,7 @@ namespace hpx::execution::experimental {
13181290
///
13191291
/// \param pu_mask The PU-mask to use for placing the created threads
13201292
/// \param priority The priority of the worker threads.
1321-
/// \param stacksize The stacksize of the worker threads. Must not be
1322-
/// nostack.
1293+
/// \param stacksize The stacksize of the worker threads.
13231294
/// \param sched The loop schedule of the parallel regions.
13241295
/// \param yield_delay The time after which the executor yields to other
13251296
/// work if it has not received any new work for execution.
@@ -1331,15 +1302,6 @@ namespace hpx::execution::experimental {
13311302
std::chrono::nanoseconds yield_delay = std::chrono::microseconds(
13321303
300))
13331304
{
1334-
if (stacksize == threads::thread_stacksize::nostack)
1335-
{
1336-
HPX_THROW_EXCEPTION(hpx::error::bad_parameter,
1337-
"fork_join_executor::fork_join_executor",
1338-
"The fork_join_executor does not support using "
1339-
"thread_stacksize::nostack as the stacksize (stackful "
1340-
"threads are required to yield correctly when idle)");
1341-
}
1342-
13431305
shared_data_ = std::make_shared<shared_data>(
13441306
priority, stacksize, sched, yield_delay, pu_mask);
13451307
}

libs/core/executors/include/hpx/executors/parallel_executor.hpp

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include <hpx/modules/topology.hpp>
2828

2929
#include <algorithm>
30-
#include <atomic>
3130
#include <cstddef>
3231
#include <cstdint>
3332
#include <string>
@@ -162,7 +161,6 @@ namespace hpx::execution {
162161
, policy_(rhs.policy_)
163162
, first_core_(rhs.first_core_)
164163
, num_cores_(rhs.num_cores_)
165-
, mask_(nullptr) // force recomputing cached pu mask
166164
#if defined(HPX_HAVE_THREAD_DESCRIPTION)
167165
, annotation_(rhs.annotation_)
168166
#endif
@@ -179,9 +177,6 @@ namespace hpx::execution {
179177
policy_ = rhs.policy_;
180178
first_core_ = rhs.first_core_;
181179
num_cores_ = rhs.num_cores_;
182-
auto* old_mask =
183-
mask_.exchange(nullptr, std::memory_order_relaxed);
184-
delete old_mask; // force recomputing cached pu mask
185180

186181
#if defined(HPX_HAVE_THREAD_DESCRIPTION)
187182
annotation_ = rhs.annotation_;
@@ -190,10 +185,7 @@ namespace hpx::execution {
190185
return *this;
191186
}
192187

193-
~parallel_policy_executor_base()
194-
{
195-
delete mask_.load(std::memory_order_relaxed);
196-
}
188+
constexpr ~parallel_policy_executor_base() = default;
197189

198190
// backwards compatibility support, will be removed in the future
199191
template <typename Parameters>
@@ -348,12 +340,6 @@ namespace hpx::execution {
348340

349341
hpx::threads::mask_type pu_mask() const
350342
{
351-
auto* current_mask = mask_.load(std::memory_order_acquire);
352-
if (current_mask != nullptr && hpx::threads::any(*current_mask))
353-
{
354-
return *current_mask;
355-
}
356-
357343
auto const num_threads = get_num_cores();
358344
auto const available_threads = static_cast<std::uint32_t>(
359345
pool()->get_active_os_thread_count());
@@ -380,16 +366,7 @@ namespace hpx::execution {
380366
}
381367
}
382368

383-
auto* new_mask = new hpx::threads::mask_type(mask);
384-
hpx::threads::mask_type* expected = nullptr;
385-
if (mask_.compare_exchange_strong(expected, new_mask,
386-
std::memory_order_release, std::memory_order_acquire))
387-
{
388-
return *new_mask;
389-
}
390-
391-
delete new_mask;
392-
return *expected;
369+
return mask;
393370
}
394371

395372
threads::thread_pool_base* pool() const
@@ -403,7 +380,6 @@ namespace hpx::execution {
403380
Policy policy_;
404381
std::size_t first_core_ = 0;
405382
std::size_t num_cores_ = 0;
406-
mutable std::atomic<hpx::threads::mask_type*> mask_{nullptr};
407383
#if defined(HPX_HAVE_THREAD_DESCRIPTION)
408384
char const* annotation_ = nullptr;
409385
#endif

0 commit comments

Comments
 (0)