Adding support for ROCm for the LBANN software stack. (#21716)
* Also removed LBANN CUDA CMake flags that are set by the version of Hydrogen that is compiled against. * Updated recipes to use HWLOC 2.3 with ROCm to enable topology awareness. Co-authored-by: Harmen Stoppels <harmenstoppels@gmail.com>
This commit is contained in:
parent
482a1a86be
commit
c9246af112
4 changed files with 92 additions and 8 deletions
|
@ -7,7 +7,7 @@
|
|||
from spack import *
|
||||
|
||||
|
||||
class Aluminum(CMakePackage, CudaPackage):
|
||||
class Aluminum(CMakePackage, CudaPackage, ROCmPackage):
|
||||
"""Aluminum provides a generic interface to high-performance
|
||||
communication libraries, with a focus on allreduce
|
||||
algorithms. Blocking and non-blocking algorithms and GPU-aware
|
||||
|
@ -38,13 +38,19 @@ class Aluminum(CMakePackage, CudaPackage):
|
|||
' communication of accelerator data')
|
||||
variant('cuda_rma', default=False, description='Builds with support for CUDA intra-node '
|
||||
' Put/Get and IPC RMA functionality')
|
||||
variant('rccl', default=False, description='Builds with support for NCCL communication lib')
|
||||
|
||||
depends_on('cmake@3.17.0:', type='build')
|
||||
depends_on('mpi')
|
||||
depends_on('nccl', when='+nccl')
|
||||
depends_on('hwloc@1.11:')
|
||||
depends_on('hwloc +cuda +nvml', when='+cuda')
|
||||
depends_on('hwloc@2.3.0:', when='+rocm')
|
||||
depends_on('cub', when='@:0.1,0.6.0: +cuda ^cuda@:10.99')
|
||||
depends_on('hipcub', when='@:0.1,0.6.0: +rocm')
|
||||
|
||||
conflicts('~cuda', when='+cuda_rma', msg='CUDA RMA support requires CUDA')
|
||||
conflicts('+cuda', when='+rocm', msg='CUDA and ROCm support are mutually exclusive')
|
||||
|
||||
generator = 'Ninja'
|
||||
depends_on('ninja', type='build')
|
||||
|
@ -54,7 +60,8 @@ def cmake_args(self):
|
|||
args = [
|
||||
'-DCMAKE_CXX_STANDARD=14',
|
||||
'-DALUMINUM_ENABLE_CUDA:BOOL=%s' % ('+cuda' in spec),
|
||||
'-DALUMINUM_ENABLE_NCCL:BOOL=%s' % ('+nccl' in spec)]
|
||||
'-DALUMINUM_ENABLE_NCCL:BOOL=%s' % ('+nccl' in spec or '+rccl' in spec),
|
||||
'-DALUMINUM_ENABLE_ROCM:BOOL=%s' % ('+rocm' in spec)]
|
||||
|
||||
if '+cuda' in spec:
|
||||
args.append('-DCMAKE_CUDA_STANDARD=14')
|
||||
|
@ -82,4 +89,16 @@ def cmake_args(self):
|
|||
args.extend([
|
||||
'-DOpenMP_DIR={0}'.format(clang_root)])
|
||||
|
||||
if '+rocm' in spec:
|
||||
args.extend([
|
||||
'-DHIP_ROOT_DIR={0}'.format(spec['hip'].prefix),
|
||||
'-DHIP_CXX_COMPILER={0}'.format(self.spec['hip'].hipcc)])
|
||||
archs = self.spec.variants['amdgpu_target'].value
|
||||
if archs != 'none':
|
||||
arch_str = ",".join(archs)
|
||||
args.append(
|
||||
'-DHIP_HIPCC_FLAGS=--amdgpu-target={0}'
|
||||
' -g -fsized-deallocation -fPIC'.format(arch_str)
|
||||
)
|
||||
|
||||
return args
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
from spack import *
|
||||
|
||||
|
||||
class Dihydrogen(CMakePackage, CudaPackage):
|
||||
class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
|
||||
"""DiHydrogen is the second version of the Hydrogen fork of the
|
||||
well-known distributed linear algebra library,
|
||||
Elemental. DiHydrogen aims to be a basic distributed
|
||||
|
@ -77,10 +77,16 @@ class Dihydrogen(CMakePackage, CudaPackage):
|
|||
|
||||
# Add Aluminum variants
|
||||
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
|
||||
depends_on('aluminum +rocm +rccl +ht +cuda_rma', when='+al +rocm')
|
||||
|
||||
for arch in CudaPackage.cuda_arch_values:
|
||||
depends_on('aluminum cuda_arch=%s' % arch, when='+al +cuda cuda_arch=%s' % arch)
|
||||
|
||||
# variants +rocm and amdgpu_targets are not automatically passed to
|
||||
# dependencies, so do it manually.
|
||||
for val in ROCmPackage.amdgpu_targets:
|
||||
depends_on('aluminum amdgpu_target=%s' % val, when='amdgpu_target=%s' % val)
|
||||
|
||||
depends_on('cuda', when=('+cuda' or '+legacy'))
|
||||
depends_on('cudnn', when=('+cuda' or '+legacy'))
|
||||
depends_on('cub', when='^cuda@:10.99')
|
||||
|
@ -190,6 +196,18 @@ def cmake_args(self):
|
|||
'-DOpenMP_libomp_LIBRARY={0}/lib/libomp.dylib'.format(
|
||||
clang_root)])
|
||||
|
||||
if '+rocm' in spec:
|
||||
args.extend([
|
||||
'-DHIP_ROOT_DIR={0}'.format(spec['hip'].prefix),
|
||||
'-DHIP_CXX_COMPILER={0}'.format(self.spec['hip'].hipcc)])
|
||||
archs = self.spec.variants['amdgpu_target'].value
|
||||
if archs != 'none':
|
||||
arch_str = ",".join(archs)
|
||||
args.append(
|
||||
'-DHIP_HIPCC_FLAGS=--amdgpu-target={0}'
|
||||
' -g -fsized-deallocation -fPIC'.format(arch_str)
|
||||
)
|
||||
|
||||
return args
|
||||
|
||||
def setup_build_environment(self, env):
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
from spack import *
|
||||
|
||||
|
||||
class Hydrogen(CMakePackage, CudaPackage):
|
||||
class Hydrogen(CMakePackage, CudaPackage, ROCmPackage):
|
||||
"""Hydrogen: Distributed-memory dense and sparse-direct linear algebra
|
||||
and optimization library. Based on the Elemental library."""
|
||||
|
||||
|
@ -64,11 +64,13 @@ class Hydrogen(CMakePackage, CudaPackage):
|
|||
description='Builds with support for FP16 precision data types')
|
||||
|
||||
conflicts('~openmp', when='+omp_taskloops')
|
||||
conflicts('+cuda', when='+rocm', msg='CUDA and ROCm support are mutually exclusive')
|
||||
|
||||
depends_on('cmake@3.17.0:', type='build')
|
||||
depends_on('mpi')
|
||||
depends_on('hwloc@1.11:')
|
||||
depends_on('hwloc +cuda +nvml', when='+cuda')
|
||||
depends_on('hwloc@2.3.0:', when='+rocm')
|
||||
|
||||
# Note that #1712 forces us to enumerate the different blas variants
|
||||
depends_on('openblas', when='blas=openblas')
|
||||
|
@ -96,10 +98,16 @@ class Hydrogen(CMakePackage, CudaPackage):
|
|||
|
||||
# Add Aluminum variants
|
||||
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
|
||||
depends_on('aluminum +rocm +rccl +ht', when='+al +rocm')
|
||||
|
||||
for arch in CudaPackage.cuda_arch_values:
|
||||
depends_on('aluminum cuda_arch=%s' % arch, when='+al +cuda cuda_arch=%s' % arch)
|
||||
|
||||
# variants +rocm and amdgpu_targets are not automatically passed to
|
||||
# dependencies, so do it manually.
|
||||
for val in ROCmPackage.amdgpu_targets:
|
||||
depends_on('aluminum amdgpu_target=%s' % val, when='+al +rocm amdgpu_target=%s' % val)
|
||||
|
||||
# Note that this forces us to use OpenBLAS until #1712 is fixed
|
||||
depends_on('lapack', when='blas=openblas ~openmp_blas')
|
||||
|
||||
|
@ -110,6 +118,7 @@ class Hydrogen(CMakePackage, CudaPackage):
|
|||
|
||||
depends_on('cuda', when='+cuda')
|
||||
depends_on('cub', when='^cuda@:10.99')
|
||||
depends_on('hipcub', when='+rocm')
|
||||
depends_on('half', when='+half')
|
||||
|
||||
depends_on('llvm-openmp', when='%apple-clang +openmp')
|
||||
|
@ -143,8 +152,9 @@ def cmake_args(self):
|
|||
'-DHydrogen_ENABLE_MPC:BOOL=%s' % ('+mpfr' in spec),
|
||||
'-DHydrogen_GENERAL_LAPACK_FALLBACK=ON',
|
||||
'-DHydrogen_ENABLE_ALUMINUM=%s' % ('+al' in spec),
|
||||
'-DHydrogen_ENABLE_CUB=%s' % ('+cuda' in spec),
|
||||
'-DHydrogen_ENABLE_CUB=%s' % ('+cuda' in spec or '+rocm' in spec),
|
||||
'-DHydrogen_ENABLE_CUDA=%s' % ('+cuda' in spec),
|
||||
'-DHydrogen_ENABLE_ROCM=%s' % ('+rocm' in spec),
|
||||
'-DHydrogen_ENABLE_TESTING=%s' % ('+test' in spec),
|
||||
'-DHydrogen_ENABLE_HALF=%s' % ('+half' in spec),
|
||||
'-DHydrogen_ENABLE_GPU_FP16=%s' % enable_gpu_fp16,
|
||||
|
@ -153,6 +163,18 @@ def cmake_args(self):
|
|||
if '+cuda' in spec:
|
||||
args.append('-DCMAKE_CUDA_STANDARD=14')
|
||||
|
||||
if '+rocm' in spec:
|
||||
args.extend([
|
||||
'-DHIP_ROOT_DIR={0}'.format(spec['hip'].prefix),
|
||||
'-DHIP_CXX_COMPILER={0}'.format(self.spec['hip'].hipcc)])
|
||||
archs = self.spec.variants['amdgpu_target'].value
|
||||
if archs != 'none':
|
||||
arch_str = ",".join(archs)
|
||||
args.append(
|
||||
'-DHIP_HIPCC_FLAGS=--amdgpu-target={0}'
|
||||
' -g -fsized-deallocation -fPIC'.format(arch_str)
|
||||
)
|
||||
|
||||
# Add support for OS X to find OpenMP (LLVM installed via brew)
|
||||
if self.spec.satisfies('%clang +openmp platform=darwin'):
|
||||
clang = self.compiler.cc
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
from spack import *
|
||||
|
||||
|
||||
class Lbann(CMakePackage, CudaPackage):
|
||||
class Lbann(CMakePackage, CudaPackage, ROCmPackage):
|
||||
"""LBANN: Livermore Big Artificial Neural Network Toolkit. A distributed
|
||||
memory, HPC-optimized, model and data parallel training toolkit for deep
|
||||
neural networks."""
|
||||
|
@ -73,6 +73,7 @@ class Lbann(CMakePackage, CudaPackage):
|
|||
conflicts('~cuda', when='+nvprof')
|
||||
conflicts('~hwloc', when='+al')
|
||||
conflicts('~cuda', when='+nvshmem')
|
||||
conflicts('+cuda', when='+rocm', msg='CUDA and ROCm support are mutually exclusive')
|
||||
|
||||
depends_on('cmake@3.17.0:', type='build')
|
||||
|
||||
|
@ -89,6 +90,8 @@ class Lbann(CMakePackage, CudaPackage):
|
|||
depends_on('hydrogen +cuda', when='+cuda')
|
||||
depends_on('hydrogen ~half', when='~half')
|
||||
depends_on('hydrogen +half', when='+half')
|
||||
depends_on('hydrogen ~rocm', when='~rocm')
|
||||
depends_on('hydrogen +rocm', when='+rocm')
|
||||
depends_on('hydrogen build_type=Debug', when='build_type=Debug')
|
||||
|
||||
# Older versions depended on Elemental not Hydrogen
|
||||
|
@ -103,6 +106,7 @@ class Lbann(CMakePackage, CudaPackage):
|
|||
|
||||
# Add Aluminum variants
|
||||
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
|
||||
depends_on('aluminum +rocm +rccl +ht', when='+al +rocm')
|
||||
|
||||
depends_on('dihydrogen +openmp', when='+dihydrogen')
|
||||
depends_on('dihydrogen ~cuda', when='+dihydrogen ~cuda')
|
||||
|
@ -114,6 +118,8 @@ class Lbann(CMakePackage, CudaPackage):
|
|||
depends_on('dihydrogen +half', when='+dihydrogen +half')
|
||||
depends_on('dihydrogen ~nvshmem', when='+dihydrogen ~nvshmem')
|
||||
depends_on('dihydrogen +nvshmem', when='+dihydrogen +nvshmem')
|
||||
depends_on('dihydrogen ~rocm', when='+dihydrogen ~rocm')
|
||||
depends_on('dihydrogen +rocm', when='+dihydrogen +rocm')
|
||||
depends_on('dihydrogen@0.1', when='@0.101:0.101.99 +dihydrogen')
|
||||
depends_on('dihydrogen@:0.0,0.2:', when='@:0.90,0.102: +dihydrogen')
|
||||
conflicts('~dihydrogen', when='+distconv')
|
||||
|
@ -124,13 +130,22 @@ class Lbann(CMakePackage, CudaPackage):
|
|||
depends_on('dihydrogen cuda_arch=%s' % arch, when='+dihydrogen cuda_arch=%s' % arch)
|
||||
depends_on('nccl cuda_arch=%s' % arch, when='+cuda cuda_arch=%s' % arch)
|
||||
|
||||
# variants +rocm and amdgpu_targets are not automatically passed to
|
||||
# dependencies, so do it manually.
|
||||
for val in ROCmPackage.amdgpu_targets:
|
||||
depends_on('hydrogen amdgpu_target=%s' % val, when='amdgpu_target=%s' % val)
|
||||
depends_on('aluminum amdgpu_target=%s' % val, when='+al amdgpu_target=%s' % val)
|
||||
depends_on('dihydrogen amdgpu_target=%s' % val, when='+dihydrogen amdgpu_target=%s' % val)
|
||||
|
||||
depends_on('cudnn', when='@0.90:0.100.99 +cuda')
|
||||
depends_on('cudnn@8.0.2:', when='@:0.90,0.101: +cuda')
|
||||
depends_on('cub', when='@0.94:0.98.2 +cuda ^cuda@:10.99')
|
||||
depends_on('hipcub', when='+rocm')
|
||||
depends_on('mpi')
|
||||
depends_on('hwloc@1.11:', when='@:0.90,0.102: +hwloc')
|
||||
depends_on('hwloc@1.11:1.11.99', when='@0.95:0.101.99 +hwloc')
|
||||
depends_on('hwloc +cuda +nvml', when='+cuda')
|
||||
depends_on('hwloc@2.3.0:', when='+rocm')
|
||||
|
||||
depends_on('half', when='+half')
|
||||
|
||||
|
@ -236,8 +251,6 @@ def cmake_args(self):
|
|||
'-DLBANN_WITH_HWLOC=%s' % ('+hwloc' in spec),
|
||||
'-DLBANN_WITH_ALUMINUM:BOOL=%s' % ('+al' in spec),
|
||||
'-DLBANN_WITH_CONDUIT:BOOL=%s' % ('+conduit' in spec),
|
||||
'-DLBANN_WITH_CUDA:BOOL=%s' % ('+cuda' in spec),
|
||||
'-DLBANN_WITH_CUDNN:BOOL=%s' % ('+cuda' in spec),
|
||||
'-DLBANN_WITH_NVSHMEM:BOOL=%s' % ('+nvshmem' in spec),
|
||||
'-DLBANN_WITH_FFT:BOOL=%s' % ('+fft' in spec),
|
||||
'-DLBANN_WITH_ONEDNN:BOOL=%s' % ('+onednn' in spec),
|
||||
|
@ -322,6 +335,18 @@ def cmake_args(self):
|
|||
args.append(
|
||||
'-DLBANN_WITH_DISTCONV:BOOL=%s' % ('+distconv' in spec))
|
||||
|
||||
if '+rocm' in spec:
|
||||
args.extend([
|
||||
'-DHIP_ROOT_DIR={0}'.format(spec['hip'].prefix),
|
||||
'-DHIP_CXX_COMPILER={0}'.format(self.spec['hip'].hipcc)])
|
||||
archs = self.spec.variants['amdgpu_target'].value
|
||||
if archs != 'none':
|
||||
arch_str = ",".join(archs)
|
||||
args.append(
|
||||
'-DHIP_HIPCC_FLAGS=--amdgpu-target={0}'
|
||||
' -g -fsized-deallocation -fPIC -std=c++17'.format(arch_str)
|
||||
)
|
||||
|
||||
return args
|
||||
|
||||
@when('@0.91:0.93')
|
||||
|
|
Loading…
Reference in a new issue