Skip to content
Open
5 changes: 5 additions & 0 deletions gloo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ list(APPEND GLOO_HDRS
"${CMAKE_CURRENT_SOURCE_DIR}/types.h"
)

if(NOT MSVC AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm)")
list(APPEND GLOO_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.cc")
list(APPEND GLOO_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.h")
endif()

if(USE_CUDA)
file(GLOB GLOO_CUDA_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/cuda*.cc"
Expand Down
19 changes: 18 additions & 1 deletion gloo/allreduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
#include <array>
#include <cstring>

#if !defined(_WIN32) && !defined(__aarch64__) && !defined(__arm__)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems to be copied in a bunch of places - can we make this a macro?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I've defined a macro of this in gloo/allreduce.h, which will be used in unit test too,

#if !defined(_WIN32) && !defined(__aarch64__) && !defined(__arm__)
#define GLOO_SHM_ALLREDUCE_APPLICABLE 1
#else
#define GLOO_SHM_ALLREDUCE_APPLICABLE 0
#endif

#include "gloo/allreduce_shm.h"
#endif
#include "gloo/common/logging.h"
#include "gloo/math.h"
#include "gloo/transport/device.h"
#include "gloo/types.h"

namespace gloo {
Expand Down Expand Up @@ -131,14 +135,27 @@ void allreduce(const detail::AllreduceOptionsImpl& opts) {
return;
}

switch (opts.algorithm) {
auto algorithm = opts.algorithm;

#if !defined(_WIN32) && !defined(__aarch64__) && !defined(__arm__)
if (context->isIntraNode() && !context->getDevice()->hasGPUDirect()) {
algorithm = detail::AllreduceOptionsImpl::SHM;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont see users to be able to use explicit algorithm - this will override anything user explicitly specifies. should we check Algorithm::UNSPECIFIED before we override?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've modified it to make sure it will override Algorithm::UNSPECIFIED only when shm allreduce is applicable. Also I added unit test for shm allreduce in gloo/test/allreduce_test.cc

}
#endif

switch (algorithm) {
case detail::AllreduceOptionsImpl::UNSPECIFIED:
case detail::AllreduceOptionsImpl::RING:
ring(opts, reduceInputs, broadcastOutputs);
break;
case detail::AllreduceOptionsImpl::BCUBE:
bcube(opts, reduceInputs, broadcastOutputs);
break;
#if !defined(_WIN32) && !defined(__aarch64__) && !defined(__arm__)
case detail::AllreduceOptionsImpl::SHM:
shm(opts);
break;
#endif
default:
GLOO_ENFORCE(false, "Algorithm not handled.");
}
Expand Down
1 change: 1 addition & 0 deletions gloo/allreduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct AllreduceOptionsImpl {
UNSPECIFIED = 0,
RING = 1,
BCUBE = 2,
SHM = 3,
};

explicit AllreduceOptionsImpl(const std::shared_ptr<Context>& context)
Expand Down
Loading
Loading