Update lbann version and simplify installation (#19579)

* Added hash values for LBANN v0.101 and Hydrogen v1.5.0.  Updated the
LBANN package to be more successful in resolving a legal configuration
of MPI and HWLOC packages.  This required the removal of the MPI
virtual package since it is unable to resolve dependencies with
minimum version requirements. As a result to enable a reasonable
install line for LBANN this requires explicit forwarding of MPI
variants to Hydrogen and Aluminum.  Due to the lack of variant
forwarding, there are many explicitly replicated dependencies for both
LBANN and Hydrogen.  Fixed the error in LBANN where gpu variant was
replaced by the cuda variant, but not all dependencies were fixed.

* Fixed the minumum cuDNN version for newer versions of LBANN.

* Added explicit versioning of the MPI libraries for DiHydrogen to avoid
all of the conflicts with minimum required versions of the OpenMPI library.

* Removed explicit MPI versions and went back to using the MPI virtual
dependency.  Updated construction of variant forwarding to use
iterative construction of constraints and variants.  This exacerbates
the challenges with backtracking in the current concretizer, but
should be fixed in the new concretizer.

* Added support for including the DiHydrogen library in LBANN as well as
support for the distributed convolution (DistConv) parallel
algorithms.  Also include support for building with half precision.

* Moving dependencies around

* Added conflict statement to ensure that the variant dihydrogen is
required for distconv.

* Removed the preferred field

* Fixed Flake8 and cuDNN version bounds
This commit is contained in:
Brian Van Essen 2020-10-30 12:51:10 -07:00 committed by GitHub
parent a04e7686f8
commit 7dea225fce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 39 deletions

View file

@ -38,7 +38,7 @@ class Aluminum(CMakePackage, CudaPackage):
depends_on('cmake@3.16.0:', type='build') depends_on('cmake@3.16.0:', type='build')
depends_on('mpi') depends_on('mpi')
depends_on('nccl', when='+nccl') depends_on('nccl', when='+nccl')
depends_on('hwloc') depends_on('hwloc@2.0:')
generator = 'Ninja' generator = 'Ninja'
depends_on('ninja', type='build') depends_on('ninja', type='build')

View file

@ -67,7 +67,11 @@ class Dihydrogen(CMakePackage, CudaPackage):
depends_on('mpi') depends_on('mpi')
depends_on('catch2', type='test') depends_on('catch2', type='test')
depends_on('aluminum', when='+al ~cuda') # Specify the correct version of Aluminum
depends_on('aluminum@0.4:0.4.99', when='@0.1:0.1.99 +al')
depends_on('aluminum@0.5:', when='@:0.0,0.2: +al')
# Add Aluminum variants
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda') depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
depends_on('cuda', when=('+cuda' or '+legacy')) depends_on('cuda', when=('+cuda' or '+legacy'))

View file

@ -18,6 +18,7 @@ class Hydrogen(CMakePackage, CudaPackage):
maintainers = ['bvanessen'] maintainers = ['bvanessen']
version('develop', branch='hydrogen') version('develop', branch='hydrogen')
version('1.5.0', sha256='03dd487fb23b9fdbc715554a8ea48c3196a1021502e61b0172ef3fdfbee75180')
version('1.4.0', sha256='c13374ff4a6c4d1076e47ba8c8d91a7082588b9958d1ed89cffb12f1d2e1452e') version('1.4.0', sha256='c13374ff4a6c4d1076e47ba8c8d91a7082588b9958d1ed89cffb12f1d2e1452e')
version('1.3.4', sha256='7979f6656f698f0bbad6798b39d4b569835b3013ff548d98089fce7c283c6741') version('1.3.4', sha256='7979f6656f698f0bbad6798b39d4b569835b3013ff548d98089fce7c283c6741')
version('1.3.3', sha256='a51a1cfd40ac74d10923dfce35c2c04a3082477683f6b35e7b558ea9f4bb6d51') version('1.3.3', sha256='a51a1cfd40ac74d10923dfce35c2c04a3082477683f6b35e7b558ea9f4bb6d51')
@ -62,8 +63,9 @@ class Hydrogen(CMakePackage, CudaPackage):
description='Use OpenMP taskloops instead of parallel for loops.') description='Use OpenMP taskloops instead of parallel for loops.')
variant('half', default=True, variant('half', default=True,
description='Builds with support for FP16 precision data types') description='Builds with support for FP16 precision data types')
depends_on('cmake@3.16.0:', type='build') depends_on('cmake@3.16.0:', type='build')
depends_on('mpi')
depends_on('hwloc@2.0:')
# Note that #1712 forces us to enumerate the different blas variants # Note that #1712 forces us to enumerate the different blas variants
depends_on('openblas', when='blas=openblas ~openmp_blas ~int64_blas') depends_on('openblas', when='blas=openblas ~openmp_blas ~int64_blas')
@ -85,14 +87,17 @@ class Hydrogen(CMakePackage, CudaPackage):
depends_on('essl threads=openmp +ilp64', when='blas=essl +openmp_blas +int64_blas') depends_on('essl threads=openmp +ilp64', when='blas=essl +openmp_blas +int64_blas')
depends_on('netlib-lapack +external-blas', when='blas=essl') depends_on('netlib-lapack +external-blas', when='blas=essl')
depends_on('aluminum', when='+al ~cuda') # Specify the correct version of Aluminum
depends_on('aluminum@:0.3.99', when='@:1.3.99 +al')
depends_on('aluminum@0.4:0.4.99', when='@1.4:1.4.99 +al')
depends_on('aluminum@0.5:', when='@:1.0,1.5.0: +al')
# Add Aluminum variants
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda') depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
# Note that this forces us to use OpenBLAS until #1712 is fixed # Note that this forces us to use OpenBLAS until #1712 is fixed
depends_on('lapack', when='blas=openblas ~openmp_blas') depends_on('lapack', when='blas=openblas ~openmp_blas')
depends_on('mpi')
depends_on('scalapack', when='+scalapack') depends_on('scalapack', when='+scalapack')
depends_on('gmp', when='+mpfr') depends_on('gmp', when='+mpfr')
depends_on('mpc', when='+mpfr') depends_on('mpc', when='+mpfr')

View file

@ -20,6 +20,7 @@ class Lbann(CMakePackage, CudaPackage):
maintainers = ['bvanessen'] maintainers = ['bvanessen']
version('develop', branch='develop') version('develop', branch='develop')
version('0.101', sha256='69d3fe000a88a448dc4f7e263bcb342c34a177bd9744153654528cd86335a1f7')
version('0.100', sha256='d1bab4fb6f1b80ae83a7286cc536a32830890f6e5b0c3107a17c2600d0796912') version('0.100', sha256='d1bab4fb6f1b80ae83a7286cc536a32830890f6e5b0c3107a17c2600d0796912')
version('0.99', sha256='3358d44f1bc894321ce07d733afdf6cb7de39c33e3852d73c9f31f530175b7cd') version('0.99', sha256='3358d44f1bc894321ce07d733afdf6cb7de39c33e3852d73c9f31f530175b7cd')
version('0.98.1', sha256='9a2da8f41cd8bf17d1845edf9de6d60f781204ebd37bffba96d8872036c10c66') version('0.98.1', sha256='9a2da8f41cd8bf17d1845edf9de6d60f781204ebd37bffba96d8872036c10c66')
@ -33,7 +34,6 @@ class Lbann(CMakePackage, CudaPackage):
version('0.92', sha256='9187c5bcbc562c2828fe619d53884ab80afb1bcd627a817edb935b80affe7b84') version('0.92', sha256='9187c5bcbc562c2828fe619d53884ab80afb1bcd627a817edb935b80affe7b84')
version('0.91', sha256='b69f470829f434f266119a33695592f74802cff4b76b37022db00ab32de322f5') version('0.91', sha256='b69f470829f434f266119a33695592f74802cff4b76b37022db00ab32de322f5')
variant('nccl', default=False, description='Builds with support for NCCL communication lib')
variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV') variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV')
variant('seq_init', default=False, description='Force serial initialization of weight matrices.') variant('seq_init', default=False, description='Force serial initialization of weight matrices.')
variant('dtype', default='float', variant('dtype', default='float',
@ -46,6 +46,14 @@ class Lbann(CMakePackage, CudaPackage):
variant('conduit', default=True, variant('conduit', default=True,
description='Builds with support for Conduit Library ' description='Builds with support for Conduit Library '
'(note that for v0.99 conduit is required)') '(note that for v0.99 conduit is required)')
variant('half', default=False,
description='Builds with support for FP16 precision data types')
variant('dihydrogen', default=False,
description='Builds with support for DiHydrogen Tensor Library')
variant('distconv', default=False,
description='Builds with support for spatial, filter, or channel '
'distributed convolutions')
variant('vtune', default=False, description='Builds with support for Intel VTune') variant('vtune', default=False, description='Builds with support for Intel VTune')
variant('docs', default=False, description='Builds with support for building documentation') variant('docs', default=False, description='Builds with support for building documentation')
variant('extras', default=False, description='Add python modules for LBANN related tools') variant('extras', default=False, description='Add python modules for LBANN related tools')
@ -54,57 +62,68 @@ class Lbann(CMakePackage, CudaPackage):
depends_on('cmake@3.16.0:', type='build') depends_on('cmake@3.16.0:', type='build')
# It seems that there is a need for one statement per version bounds # Specify the correct versions of Hydrogen
depends_on('hydrogen +openmp_blas +shared +int64', when='@:0.90,0.95: ~al') depends_on('hydrogen@:1.3.4', when='@0.95:0.100')
depends_on('hydrogen +openmp_blas +shared +int64 +al', when='@:0.90,0.95: +al') depends_on('hydrogen@1.4.0:1.4.99', when='@0.101:0.101.99')
depends_on('hydrogen@1.5.0:', when='@:0.90,0.102:')
depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug', # Add Hydrogen variants
when='build_type=Debug @:0.90,0.95: ~al') depends_on('hydrogen +openmp_blas +shared +int64')
depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug +al', depends_on('hydrogen +al', when='+al')
when='build_type=Debug @:0.90,0.95: +al') depends_on('hydrogen +cuda', when='+cuda')
depends_on('hydrogen +half', when='+half')
depends_on('hydrogen +openmp_blas +shared +int64 +cuda', depends_on('hydrogen build_type=Debug', when='build_type=Debug')
when='+gpu @:0.90,0.95: ~al')
depends_on('hydrogen +openmp_blas +shared +int64 +cuda +al',
when='+gpu @:0.90,0.95: +al')
depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug',
when='build_type=Debug @:0.90,0.95: +gpu')
depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug +al',
when='build_type=Debug @:0.90,0.95: +gpu +al')
# Older versions depended on Elemental not Hydrogen # Older versions depended on Elemental not Hydrogen
depends_on('elemental +openmp_blas +shared +int64', when='@0.91:0.94') depends_on('elemental +openmp_blas +shared +int64', when='@0.91:0.94')
depends_on('elemental +openmp_blas +shared +int64 build_type=Debug', depends_on('elemental +openmp_blas +shared +int64 build_type=Debug',
when='build_type=Debug @0.91:0.94') when='build_type=Debug @0.91:0.94')
depends_on('aluminum', when='@:0.90,0.95: +al ~gpu') # Specify the correct version of Aluminum
depends_on('aluminum +cuda +ht', when='@:0.90,0.95: +al +cuda ~nccl') depends_on('aluminum@:0.3.99', when='@0.95:0.100 +al')
depends_on('aluminum +cuda +nccl +ht', when='@:0.90,0.95: +al +cuda +nccl') depends_on('aluminum@0.4:0.4.99', when='@0.101:0.101.99 +al')
depends_on('aluminum@0.5:', when='@:0.90,0.102: +al')
depends_on('cudnn', when='+cuda') # Add Aluminum variants
depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda')
depends_on('dihydrogen +openmp', when='+dihydrogen')
depends_on('dihydrogen +cuda', when='+dihydrogen +cuda')
depends_on('dihydrogen +al', when='+dihydrogen +al')
depends_on('dihydrogen +legacy +cuda', when='+distconv')
depends_on('dihydrogen +half', when='+dihydrogen +half')
depends_on('dihydrogen@0.1', when='@0.101:0.101.99 +dihydrogen')
depends_on('dihydrogen@:0.0,0.2:', when='@:0.90,0.102: +dihydrogen')
conflicts('~dihydrogen', when='+distconv')
depends_on('cudnn', when='@0.90:0.100.99 +cuda')
depends_on('cudnn@8.0.2:', when='@:0.90,0.101: +cuda')
depends_on('cub', when='@0.94:0.98.2 +cuda') depends_on('cub', when='@0.94:0.98.2 +cuda')
depends_on('mpi') depends_on('mpi')
depends_on('hwloc') depends_on('hwloc@2.0:')
depends_on('half', when='+half')
# LBANN wraps OpenCV calls in OpenMP parallel loops, build without OpenMP # LBANN wraps OpenCV calls in OpenMP parallel loops, build without OpenMP
# Additionally disable video related options, they incorrectly link in a # Additionally disable video related options, they incorrectly link in a
# bad OpenMP library when building with clang or Intel compilers # bad OpenMP library when building with clang or Intel compilers
# Note that for Power systems we want the environment to add +powerpc +vsx depends_on('opencv@4.1.0: build_type=RelWithDebInfo +core +highgui +imgproc +jpeg '
depends_on('opencv@3.2.0: +core +highgui +imgproc +jpeg +png +tiff +zlib ' '+png +tiff +zlib +fast-math ~calib3d ~cuda ~dnn ~eigen'
'+fast-math ~calib3d ~cuda ~dnn ~eigen'
'~features2d ~flann ~gtk ~ipp ~ipp_iw ~jasper ~java ~lapack ~ml' '~features2d ~flann ~gtk ~ipp ~ipp_iw ~jasper ~java ~lapack ~ml'
'~openmp ~opencl ~opencl_svm ~openclamdblas ~openclamdfft' '~openmp ~opencl ~opencl_svm ~openclamdblas ~openclamdfft'
'~pthreads_pf ~python ~qt ~stitching ~superres ~ts ~video' '~pthreads_pf ~python ~qt +shared ~stitching ~superres ~ts'
'~videostab ~videoio ~vtk', when='+opencv') '~video ~videostab ~videoio ~vtk', when='+opencv')
# Note that for Power systems we want the environment to add +powerpc +vsx
depends_on('opencv@4.1.0: +powerpc +vsx', when='+opencv arch=ppc64le:')
depends_on('cnpy') depends_on('cnpy')
depends_on('nccl', when='@0.94:0.98.2 +cuda +nccl') depends_on('nccl', when='@0.94:0.98.2 +cuda')
depends_on('conduit@0.4.0: +hdf5', when='@0.94:0.99 +conduit') depends_on('conduit@0.4.0: +hdf5~hdf5_compat', when='@0.94:0.99 +conduit')
depends_on('conduit@0.4.0: +hdf5', when='@:0.90,0.99:') depends_on('conduit@0.4.0: +hdf5~hdf5_compat', when='@:0.90,0.99:')
depends_on('python@3: +shared', type=('build', 'run'), when='@:0.90,0.99:') depends_on('python@3:3.7.9 +shared', type=('build', 'run'), when='@:0.90,0.99:')
extends("python") extends("python")
depends_on('py-setuptools', type='build') depends_on('py-setuptools', type='build')
depends_on('py-argparse', type='run', when='@:0.90,0.99: ^python@:2.6') depends_on('py-argparse', type='run', when='@:0.90,0.99: ^python@:2.6')
@ -116,7 +135,8 @@ class Lbann(CMakePackage, CudaPackage):
depends_on('py-pandas@0.24.1:', type='run', when='@:0.90,0.99: +extras') depends_on('py-pandas@0.24.1:', type='run', when='@:0.90,0.99: +extras')
depends_on('py-texttable@1.4.0:', type='run', when='@:0.90,0.99: +extras') depends_on('py-texttable@1.4.0:', type='run', when='@:0.90,0.99: +extras')
depends_on('py-pytest', type='test', when='@:0.90,0.99:') depends_on('py-pytest', type='test', when='@:0.90,0.99:')
depends_on('py-protobuf+cpp@3.6.1:', type=('build', 'run'), when='@:0.90,0.99:') depends_on('py-protobuf+cpp@3.10.0', type=('build', 'run'), when='@:0.90,0.99:')
depends_on('protobuf+shared@3.10.0', when='@:0.90,0.99:')
depends_on('py-breathe', type='build', when='+docs') depends_on('py-breathe', type='build', when='+docs')
depends_on('doxygen', type='build', when='+docs') depends_on('doxygen', type='build', when='+docs')
@ -220,6 +240,14 @@ def cmake_args(self):
'-DNCCL_DIR={0}'.format( '-DNCCL_DIR={0}'.format(
spec['nccl'].prefix)]) spec['nccl'].prefix)])
if spec.satisfies('@:0.90') or spec.satisfies('@0.100:'):
args.extend([
'-DLBANN_WITH_DIHYDROGEN:BOOL=%s' % ('+dihydrogen' in spec)])
if spec.satisfies('@:0.90') or spec.satisfies('@0.101:'):
args.extend([
'-DLBANN_WITH_DISTCONV:BOOL=%s' % ('+distconv' in spec)])
return args return args
@when('@0.91:0.93') @when('@0.91:0.93')