Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -188,27 +188,37 @@ else
endif
endif

# Check if OpenMPI and NCCL are available, include them if so, for multi-GPU training
# Check if NCCL is available, include if so, for multi-GPU training
ifeq ($(NO_MULTI_GPU), 1)
$(info → Multi-GPU (OpenMPI + NCCL) is manually disabled)
$(info → Multi-GPU (NCCL) is manually disabled)
else
ifneq ($(OS), Windows_NT)
# Detect if running on macOS or Linux
ifeq ($(SHELL_UNAME), Darwin)
$(info ✗ Multi-GPU on CUDA on Darwin is not supported, skipping OpenMPI + NCCL support)
else ifeq ($(shell [ -d /usr/lib/x86_64-linux-gnu/openmpi/lib/ ] && [ -d /usr/lib/x86_64-linux-gnu/openmpi/include/ ] && echo "exists"), exists)
$(info ✓ OpenMPI found, OK to train with multiple GPUs)
NVCC_INCLUDES += -I/usr/lib/x86_64-linux-gnu/openmpi/include
NVCC_LDFLAGS += -L/usr/lib/x86_64-linux-gnu/openmpi/lib/
NVCC_LDLIBS += -lmpi -lnccl
NVCC_FLAGS += -DMULTI_GPU
$(info ✗ Multi-GPU on CUDA on Darwin is not supported, skipping NCCL support)
else ifeq ($(shell dpkg -l | grep -q nccl && echo "exists"), exists)
$(info ✓ NCCL found, OK to train with multiple GPUs)
NVCC_LDLIBS += -lnccl
else
$(info ✗ OpenMPI is not found, disabling multi-GPU support)
$(info ---> On Linux you can try install OpenMPI with `sudo apt install openmpi-bin openmpi-doc libopenmpi-dev`)
$(info ✗ NCCL is not found, disabling multi-GPU support)
$(info ---> On Linux you can try install NCCL with `sudo apt install libnccl2 libnccl-dev`)
endif
endif
endif

ifeq ($(NO_USE_MPI), 1)
$(info → MPI is manually disabled)
else ifeq ($(shell [ -d /usr/lib/x86_64-linux-gnu/openmpi/lib/ ] && [ -d /usr/lib/x86_64-linux-gnu/openmpi/include/ ] && echo "exists"), exists)
$(info ✓ MPI enabled)
NVCC_INCLUDES += -I/usr/lib/x86_64-linux-gnu/openmpi/include
NVCC_LDFLAGS += -L/usr/lib/x86_64-linux-gnu/openmpi/lib/
NVCC_LDLIBS += -lmpi
NVCC_FLAGS += -DUSE_MPI
NVCC_FLAGS += -DMULTI_GPU
else
$(info ✗ MPI not found)
endif

# Precision settings, default to bf16 but ability to override
PRECISION ?= BF16
VALID_PRECISIONS := FP32 FP16 BF16
Expand Down
Loading