[WIP] CEED 2.0 (#10903)
* Initial commit for v2.0 of the CEED software suite.
* Update Nek packages and gslib
* Help spack concretize the hypre version for ceed-2.0.
* Fix nekcem install error
* Add support for gfortran v8 in nek5000 and nekcem.
* Split Nek5000 into Nek5000 and Nektools
* Get Nektools to build fine in Theta
* Fix travis failure: remove unused 'import numbers' from nek5000.
* Check for gfortran if it is wrapped
* Tweak the detection of gfortran in nek5000.
* Fix Nek packages to add -std=legacy when FC=gcc
* spack install ceed~petsc works fine on Theta
* Fix flake8 errors
* Fix more flake8 tests
* Fix an import issue
* Tweak the suite-sparse package to avoid interaction with existing system
installations of suite-sparse.
* petsc: update superlu-dist dependency
* Updates in the packages: occa, libceed, and ceed.
* In the libceed package, explicitly tell nvcc which host compiler to use.
* Fix python formatting.
* Simplify the test for gfortran in nek* packages.
* ceed: 2.0 uses petsc@3.11.0
* hpgmg-0.4; use from ceed@2.0.0
* Update the hypre dependency for ceed 2.0.
* Disable the superlu-dist dependency (through hypre) when using a
+quickbuild of ceed 2.0.
* petsc-3.11.0: add xlf fix
* nekcem: has a build dependency on Python 2.7+
* hpgmg: better setting of compiler options and use python for configure
* libceed: use v0.4 tag
* libceed: fix 0.4 release oops (pkgconfig version)
* Add a patch for magma-2.5.0 that brings it up the current 'master'.
* In the mfem package, install the examples, miniapps, and data under
$prefix/share/mfem.
* In the magma package, apply a patch to v2.5.0 that disables
magma_sparse - for testing purposes.
* In the magma package, link the 'magma' library with the
'nvToolsExt' library.
* In the magma package, update the 'magma-2.5.0.patch' with the latest
commits from the magma source repository. Also, remove the library
'nvToolsExt' from the 'magma-2.5.0-cmake.patch' - now it is not
needed.
* In the magma package, disable OpenMP when using v2.5.0 with the
IBM XL compiler.
Please enter the commit message for your changes. Lines starting
* In the mfem package, add version for the 'laghos-v2.0' tag; also,
prefix the versions `laghos-v*` with their respective development
version numbers -- this way they are properly ordered within spack
relative to the official numbered versions.
* petsc: add version 3.11.1 (#11179)
(cherry picked from commit 1eab6e3c86
)
* ceed-2.0: use petsc-3.11.1
* this-is-so-dumb.f -> empty.f
This commit is contained in:
parent
a26aec520b
commit
49334f006d
18 changed files with 926 additions and 144 deletions
|
@ -20,6 +20,7 @@ class Ceed(Package):
|
|||
url = 'file://' + os.path.dirname(__file__) + '/README.md'
|
||||
sha1 = 'b2eefd95c09ba573f663a761b84811a2d7e39788'
|
||||
|
||||
version('2.0.0', sha1, expand=False)
|
||||
version('1.0.0', sha1, expand=False)
|
||||
|
||||
variant('cuda', default=False,
|
||||
|
@ -38,22 +39,53 @@ class Ceed(Package):
|
|||
# TODO: Add 'int64' variant?
|
||||
|
||||
# LibCEED
|
||||
# ceed-2.0
|
||||
depends_on('libceed@0.4~cuda', when='@2.0.0~cuda')
|
||||
depends_on('libceed@0.4+cuda', when='@2.0.0+cuda')
|
||||
depends_on('libceed@0.4+occa', when='@2.0.0+occa')
|
||||
depends_on('libceed@0.4~occa', when='@2.0.0~occa')
|
||||
# ceed-1.0
|
||||
depends_on('libceed@0.2~cuda', when='@1.0.0~cuda')
|
||||
depends_on('libceed@0.2+cuda', when='@1.0.0+cuda')
|
||||
depends_on('libceed@0.2+occa', when='@1.0.0+occa')
|
||||
depends_on('libceed@0.2~occa', when='@1.0.0~occa')
|
||||
|
||||
# OCCA
|
||||
depends_on('occa@v1.0.0-alpha.5~cuda', when='@1.0.0+occa~cuda')
|
||||
depends_on('occa@v1.0.0-alpha.5+cuda', when='@1.0.0+occa+cuda')
|
||||
# ceed-2.0
|
||||
depends_on('occa@1.0.8~cuda', when='@2.0.0+occa~cuda')
|
||||
depends_on('occa@1.0.8+cuda', when='@2.0.0+occa+cuda')
|
||||
# ceed-1.0
|
||||
depends_on('occa@1.0.0-alpha.5~cuda', when='@1.0.0+occa~cuda')
|
||||
depends_on('occa@1.0.0-alpha.5+cuda', when='@1.0.0+occa+cuda')
|
||||
|
||||
# Nek5000, GSLIB, Nekbone, and NekCEM
|
||||
# ceed-2.0
|
||||
depends_on('nek5000@17.0', when='@2.0.0+nek')
|
||||
depends_on('nektools@17.0%gcc', when='@2.0.0+nek')
|
||||
depends_on('gslib@1.0.2', when='@2.0.0+nek')
|
||||
depends_on('nekbone@17.0', when='@2.0.0+nek')
|
||||
depends_on('nekcem@7332619', when='@2.0.0+nek')
|
||||
# ceed-1.0
|
||||
depends_on('nek5000@17.0', when='@1.0.0+nek')
|
||||
depends_on('nektools@17.0%gcc', when='@1.0.0+nek')
|
||||
depends_on('gslib@1.0.2', when='@1.0.0+nek')
|
||||
depends_on('nekbone@17.0', when='@1.0.0+nek')
|
||||
depends_on('nekcem@0b8bedd', when='@1.0.0+nek')
|
||||
|
||||
# PETSc, HPGMG
|
||||
# ceed-2.0
|
||||
# For a +quickbuild we disable hdf5, and superlu-dist in PETSc.
|
||||
# Ideally, these can be turned into recommendations to Spack for
|
||||
# concretizing the PETSc spec, if Spack ever supports recommendations.
|
||||
depends_on('petsc@3.11.1~hdf5~superlu-dist',
|
||||
when='@2.0.0+petsc+quickbuild')
|
||||
depends_on('petsc@3.11.1+mpi+double~int64', when='@2.0.0+petsc~mfem')
|
||||
# The mfem petsc examples need the petsc variants +hypre, +suite-sparse,
|
||||
# and +mumps:
|
||||
depends_on('petsc@3.11.1+mpi+hypre+suite-sparse+mumps+double~int64',
|
||||
when='@2.0.0+petsc+mfem')
|
||||
depends_on('hpgmg@0.4+fe', when='@2.0.0+petsc')
|
||||
# ceed-1.0
|
||||
# For a +quickbuild we disable hdf5, and superlu-dist in PETSc.
|
||||
# Ideally, these can be turned into recommendations to Spack for
|
||||
# concretizing the PETSc spec, if Spack ever supports recommendations.
|
||||
|
@ -67,12 +99,33 @@ class Ceed(Package):
|
|||
depends_on('hpgmg@a0a5510df23b+fe', when='@1.0.0+petsc')
|
||||
|
||||
# MAGMA
|
||||
# ceed-2.0
|
||||
depends_on('magma@2.5.0', when='@2.0.0+cuda')
|
||||
# ceed-1.0
|
||||
depends_on('magma@2.3.0', when='@1.0.0+cuda')
|
||||
|
||||
# PUMI
|
||||
# ceed-2.0
|
||||
depends_on('pumi@2.2.0', when='@2.0.0+pumi')
|
||||
# ceed-1.0
|
||||
depends_on('pumi@2.1.0', when='@1.0.0+pumi')
|
||||
|
||||
# MFEM, Laghos
|
||||
# ceed-2.0
|
||||
depends_on('mfem@3.4.0+mpi+examples+miniapps', when='@2.0.0+mfem~petsc')
|
||||
depends_on('mfem@3.4.0+mpi+petsc+examples+miniapps',
|
||||
when='@2.0.0+mfem+petsc')
|
||||
depends_on('mfem@3.4.0+pumi', when='@2.0.0+mfem+pumi')
|
||||
depends_on('laghos@2.0', when='@2.0.0+mfem')
|
||||
# Help the spack concretizer find a suitable version of hypre:
|
||||
depends_on('hypre~internal-superlu', when='@2.0.0+mfem')
|
||||
depends_on('hypre~internal-superlu~superlu-dist',
|
||||
when='@2.0.0+mfem+quickbuild')
|
||||
|
||||
# If using gcc version <= 4.8 build suite-sparse version <= 5.1.0
|
||||
depends_on('suite-sparse@:5.1.0', when='@2.0.0%gcc@:4.8+mfem+petsc')
|
||||
|
||||
# ceed-1.0
|
||||
depends_on('mfem@3.3.2+mpi+examples+miniapps', when='@1.0.0+mfem~petsc')
|
||||
depends_on('mfem@3.3.2+mpi+petsc+examples+miniapps',
|
||||
when='@1.0.0+mfem+petsc')
|
||||
|
|
|
@ -12,6 +12,7 @@ class Gslib(Package):
|
|||
homepage = "https://github.com/gslib/gslib"
|
||||
git = "https://github.com/gslib/gslib.git"
|
||||
|
||||
version('1.0.3', tag='v1.0.3')
|
||||
version('1.0.2', tag='v1.0.2')
|
||||
version('1.0.1', tag='v1.0.1')
|
||||
version('1.0.0', tag='v1.0.0')
|
||||
|
|
|
@ -23,6 +23,7 @@ class Hpgmg(Package):
|
|||
tags = ['proxy-app']
|
||||
|
||||
version('develop', branch='master')
|
||||
version('0.4', 'abdabfe09453487299500b5bd8da4e6dc3d88477199bcfa38ac41d0b3c780f6f')
|
||||
version('a0a5510df23b', 'b9c50f25e541428d4735fb07344d1d0ed9fc821bdde918d8e0defa78c0d9b4f9')
|
||||
version('0.3', '12a65da216fec91daea78594ae4b5a069c8f1a700f1ba21eed9f45a79a68c793')
|
||||
|
||||
|
@ -64,7 +65,14 @@ def configure_args(self):
|
|||
if '+debug' in self.spec:
|
||||
cflags.append('-g')
|
||||
elif any(map(self.spec.satisfies, ['%gcc', '%clang', '%intel'])):
|
||||
cflags += ['-O3', '-march=native']
|
||||
cflags.append('-O3')
|
||||
if self.compiler.target in ['x86_64']:
|
||||
cflags.append('-march=native')
|
||||
else:
|
||||
cflags.append('-mcpu=native')
|
||||
cflags.append('-mtune=native')
|
||||
else:
|
||||
cflags.append('-O3')
|
||||
|
||||
args.append('--CFLAGS=' + ' '.join(cflags))
|
||||
|
||||
|
@ -75,7 +83,7 @@ def configure_args(self):
|
|||
return args
|
||||
|
||||
def configure(self, spec, prefix):
|
||||
configure(*self.configure_args())
|
||||
python('configure', *self.configure_args())
|
||||
|
||||
def build(self, spec, prefix):
|
||||
make('-C', 'build')
|
||||
|
|
|
@ -28,11 +28,13 @@ class Laghos(MakefilePackage):
|
|||
depends_on('mfem@develop+mpi+metis', when='@develop+metis')
|
||||
depends_on('mfem@develop+mpi~metis', when='@develop~metis')
|
||||
|
||||
depends_on('mfem@laghos-v2.0,3.4.0:+mpi+metis', when='@2.0:+metis')
|
||||
depends_on('mfem@laghos-v2.0,3.4.0:+mpi~metis', when='@2.0:~metis')
|
||||
# Recommended mfem version for laghos v2.0 is: ^mfem@3.4.1-laghos-v2.0
|
||||
depends_on('mfem@3.4.0:+mpi+metis', when='@2.0+metis')
|
||||
depends_on('mfem@3.4.0:+mpi~metis', when='@2.0~metis')
|
||||
|
||||
depends_on('mfem@laghos-v1.0,3.3.2:+mpi+metis', when='@1.0:+metis')
|
||||
depends_on('mfem@laghos-v1.0,3.3.2:+mpi~metis', when='@1.0:~metis')
|
||||
# Recommended mfem version for laghos v1.x is: ^mfem@3.3.1-laghos-v1.0
|
||||
depends_on('mfem@3.3.1-laghos-v1.0:+mpi+metis', when='@1.0,1.1+metis')
|
||||
depends_on('mfem@3.3.1-laghos-v1.0:+mpi~metis', when='@1.0,1.1~metis')
|
||||
|
||||
@property
|
||||
def build_targets(self):
|
||||
|
|
|
@ -13,18 +13,29 @@ class Libceed(Package):
|
|||
git = "https://github.com/CEED/libCEED.git"
|
||||
|
||||
version('develop', branch='master')
|
||||
version('0.4', tag='v0.4')
|
||||
version('0.2', tag='v0.2')
|
||||
version('0.1', tag='v0.1')
|
||||
|
||||
variant('occa', default=True, description='Enable OCCA backends')
|
||||
variant('cuda', default=False, description='Enable CUDA support')
|
||||
variant('debug', default=False, description='Enable debug build')
|
||||
variant('libxsmm', default=False, description='Enable LIBXSMM backend')
|
||||
|
||||
conflicts('+libxsmm', when='@:0.2')
|
||||
|
||||
depends_on('cuda', when='+cuda')
|
||||
|
||||
depends_on('occa@v1.0.0-alpha.5,develop', when='+occa')
|
||||
depends_on('occa@develop', when='@develop+occa')
|
||||
depends_on('occa@1.0.8:', when='@0.4+occa')
|
||||
depends_on('occa@1.0.0-alpha.5,develop', when='@:0.2+occa')
|
||||
depends_on('occa+cuda', when='+occa+cuda')
|
||||
depends_on('occa~cuda', when='+occa~cuda')
|
||||
|
||||
depends_on('libxsmm', when='+libxsmm')
|
||||
|
||||
patch('pkgconfig-version-0.4.diff', when='@0.4')
|
||||
|
||||
# occa: do not occaFree kernels
|
||||
# Repeated creation and freeing of kernels appears to expose a caching
|
||||
# bug in Occa.
|
||||
|
@ -32,18 +43,71 @@ class Libceed(Package):
|
|||
|
||||
phases = ['build', 'install']
|
||||
|
||||
def build(self, spec, prefix):
|
||||
@property
|
||||
def common_make_opts(self):
|
||||
spec = self.spec
|
||||
compiler = self.compiler
|
||||
# Note: The occa package exports OCCA_DIR in the environment
|
||||
|
||||
# Use verbose building output
|
||||
makeopts = ['V=1']
|
||||
|
||||
if '@:0.2' in spec:
|
||||
makeopts += ['NDEBUG=%s' % ('' if '+debug' in spec else '1')]
|
||||
|
||||
elif '@0.4:' in spec:
|
||||
# Determine options based on the compiler:
|
||||
if '+debug' in spec:
|
||||
opt = '-g'
|
||||
elif compiler.name == 'gcc':
|
||||
opt = '-O -g -ffp-contract=fast'
|
||||
if compiler.target in ['x86_64']:
|
||||
opt += ' -march=native'
|
||||
elif compiler.target in ['ppc64le']:
|
||||
opt += ' -mcpu=native -mtune=native'
|
||||
if compiler.version >= ver(4.9):
|
||||
opt += ' -fopenmp-simd'
|
||||
elif compiler.name == 'clang':
|
||||
opt = '-O -g -march=native -ffp-contract=fast'
|
||||
if compiler.version.string.endswith('-apple'):
|
||||
if compiler.version >= ver(10):
|
||||
opt += ' -fopenmp-simd'
|
||||
else: # not apple clang
|
||||
if compiler.version >= ver(6):
|
||||
opt += ' -fopenmp-simd'
|
||||
elif compiler.name in ['xl', 'xl_r']:
|
||||
opt = '-O -g -qsimd=auto'
|
||||
else:
|
||||
opt = '-O -g'
|
||||
makeopts += ['OPT=%s' % opt]
|
||||
|
||||
if '+cuda' in spec:
|
||||
makeopts += ['CUDA_DIR=%s' % spec['cuda'].prefix]
|
||||
nvccflags = ['-ccbin %s -Xcompiler "%s" -Xcompiler %s' %
|
||||
(compiler.cxx, opt, compiler.pic_flag)]
|
||||
nvccflags = ' '.join(nvccflags)
|
||||
makeopts += ['NVCCFLAGS=%s' % nvccflags]
|
||||
else:
|
||||
# Disable CUDA auto-detection:
|
||||
makeopts += ['CUDA_DIR=/disable-cuda']
|
||||
|
||||
if '+libxsmm' in spec:
|
||||
makeopts += ['XSMM_DIR=%s' % spec['libxsmm'].prefix]
|
||||
|
||||
return makeopts
|
||||
|
||||
def build(self, spec, prefix):
|
||||
makeopts = self.common_make_opts
|
||||
make('info', *makeopts)
|
||||
make(*makeopts)
|
||||
|
||||
if self.run_tests:
|
||||
make('prove', *makeopts, parallel=False)
|
||||
|
||||
def install(self, spec, prefix):
|
||||
make('install', 'prefix=%s' % prefix, parallel=False)
|
||||
installopts = ['prefix=%s' % prefix]
|
||||
installopts += self.common_make_opts
|
||||
make('install', *installopts, parallel=False)
|
||||
|
||||
@when('@0.1')
|
||||
def install(self, spec, prefix):
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
diff --git c/ceed.pc.template w/ceed.pc.template
|
||||
index 3216f08..5ada754 100644
|
||||
--- c/ceed.pc.template
|
||||
+++ w/ceed.pc.template
|
||||
@@ -4,6 +4,6 @@ libdir=${prefix}/lib
|
||||
|
||||
Name: CEED
|
||||
Description: Code for Efficient Extensible Discretization
|
||||
-Version: 0.2.1
|
||||
+Version: 0.4
|
||||
Cflags: -I${includedir}
|
||||
Libs: -L${libdir} -lceed
|
|
@ -0,0 +1,77 @@
|
|||
diff -ru magma-2.5.0-orig/CMakeLists.txt magma-2.5.0/CMakeLists.txt
|
||||
--- magma-2.5.0-orig/CMakeLists.txt 2019-01-02 11:18:39.000000000 -0800
|
||||
+++ magma-2.5.0/CMakeLists.txt 2019-04-03 15:58:01.871234891 -0700
|
||||
@@ -440,18 +440,20 @@
|
||||
# compile MAGMA sparse library
|
||||
|
||||
# sparse doesn't have Fortran at the moment, so no need for above shenanigans
|
||||
-include_directories( sparse/include )
|
||||
-include_directories( sparse/control )
|
||||
-include_directories( testing )
|
||||
-cuda_add_library( magma_sparse ${libsparse_all} )
|
||||
-target_link_libraries( magma_sparse
|
||||
- magma
|
||||
+if (MAGMA_SPARSE)
|
||||
+ include_directories( sparse/include )
|
||||
+ include_directories( sparse/control )
|
||||
+ include_directories( testing )
|
||||
+ cuda_add_library( magma_sparse ${libsparse_all} )
|
||||
+ target_link_libraries( magma_sparse
|
||||
+ magma
|
||||
${LAPACK_LIBRARIES}
|
||||
${CUDA_CUDART_LIBRARY}
|
||||
${CUDA_CUBLAS_LIBRARIES}
|
||||
${CUDA_cusparse_LIBRARY}
|
||||
-)
|
||||
-set( LIBS_SPARSE ${LIBS} magma_sparse )
|
||||
+ )
|
||||
+ set( LIBS_SPARSE ${LIBS} magma_sparse )
|
||||
+endif()
|
||||
|
||||
|
||||
# ----------------------------------------
|
||||
@@ -480,23 +482,31 @@
|
||||
|
||||
# ----------------------------------------
|
||||
# compile each sparse tester
|
||||
-set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
|
||||
-foreach( TEST ${sparse_testing_all} )
|
||||
+if (MAGMA_SPARSE)
|
||||
+ set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
|
||||
+ foreach( TEST ${sparse_testing_all} )
|
||||
string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
|
||||
string( REGEX REPLACE "sparse/testing/" "" EXE ${EXE} )
|
||||
#message( "${TEST} --> ${EXE}" )
|
||||
add_executable( ${EXE} ${TEST} )
|
||||
target_link_libraries( ${EXE} ${LIBS_SPARSE} )
|
||||
-endforeach()
|
||||
+ endforeach()
|
||||
+endif()
|
||||
|
||||
|
||||
# ----------------------------------------
|
||||
# what to install
|
||||
-install( TARGETS magma magma_sparse ${blas_fix}
|
||||
+set(MAGMA_TARGETS magma)
|
||||
+set(MAGMA_HEADERS_PATTERNS include/*.h)
|
||||
+if (MAGMA_SPARSE)
|
||||
+ set(MAGMA_TARGETS ${MAGMA_TARGETS} magma_sparse)
|
||||
+ set(MAGMA_HEADERS_PATTERNS ${MAGMA_HEADERS_PATTERNS} sparse/include/*.h)
|
||||
+endif()
|
||||
+install( TARGETS ${MAGMA_TARGETS} ${blas_fix}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib )
|
||||
-file( GLOB headers include/*.h sparse/include/*.h )
|
||||
+file( GLOB headers ${MAGMA_HEADERS_PATTERNS} )
|
||||
install( FILES ${headers}
|
||||
DESTINATION include )
|
||||
|
||||
@@ -509,4 +519,6 @@
|
||||
message( STATUS " NFLAGS ${CUDA_NVCC_FLAGS}" )
|
||||
message( STATUS " FFLAGS ${CMAKE_Fortran_FLAGS}" )
|
||||
message( STATUS " LIBS ${LIBS}" )
|
||||
-message( STATUS " LIBS_SPARSE ${LIBS_SPARSE}" )
|
||||
+if (MAGMA_SPARSE)
|
||||
+ message( STATUS " LIBS_SPARSE ${LIBS_SPARSE}" )
|
||||
+endif()
|
428
var/spack/repos/builtin/packages/magma/magma-2.5.0.patch
Normal file
428
var/spack/repos/builtin/packages/magma/magma-2.5.0.patch
Normal file
|
@ -0,0 +1,428 @@
|
|||
diff -r 89706c0efbdb .hgtags
|
||||
--- a/.hgtags Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/.hgtags Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -1,3 +1,4 @@
|
||||
9c7e7cffa7d0e2decd23cde36a4830dfb55bea13 v2.2.0
|
||||
b2b2e21c22a59a79eefbf1e5cff8e7d539a52c0c v2.3.0
|
||||
04d08aaa27dc8a551513d268c68fc299e81b6780 v2.4.0
|
||||
+89706c0efbdbfd48bf8a2c20cc0d73e53c3f387e v2.5.0
|
||||
diff -r 89706c0efbdb include/magma_types.h
|
||||
--- a/include/magma_types.h Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/include/magma_types.h Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -77,7 +77,7 @@
|
||||
typedef magma_int_t magma_device_t;
|
||||
|
||||
// Half precision in CUDA
|
||||
- #if defined(__cplusplus) && CUDA_VERSION > 7500
|
||||
+ #if defined(__cplusplus) && CUDA_VERSION >= 7500
|
||||
#include <cuda_fp16.h>
|
||||
typedef __half magmaHalf;
|
||||
#else
|
||||
diff -r 89706c0efbdb sparse/blas/magma_zsampleselect.cu
|
||||
--- a/sparse/blas/magma_zsampleselect.cu Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/sparse/blas/magma_zsampleselect.cu Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -15,9 +15,12 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
+
|
||||
namespace magma_sampleselect {
|
||||
|
||||
-__global__ void compute_abs(const magmaDoubleComplex* __restrict__ in, double* __restrict__ out, int32_t size) {
|
||||
+__global__ void compute_abs(const magmaDoubleComplex* __restrict__ in, double* __restrict__ out, int32_t size)
|
||||
+{
|
||||
+#if (__CUDA_ARCH__ >= 350)
|
||||
auto idx = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (idx >= size) {
|
||||
return;
|
||||
@@ -25,6 +28,7 @@
|
||||
|
||||
auto v = in[idx];
|
||||
out[idx] = real(v) * real(v) + imag(v) * imag(v);
|
||||
+#endif
|
||||
}
|
||||
|
||||
} // namespace magma_sampleselect
|
||||
@@ -164,36 +168,43 @@
|
||||
magma_queue_t queue )
|
||||
{
|
||||
magma_int_t info = 0;
|
||||
+ magma_int_t arch = magma_getdevice_arch();
|
||||
|
||||
- auto num_blocks = magma_ceildiv(total_size, block_size);
|
||||
- auto local_work = (total_size + num_threads - 1) / num_threads;
|
||||
- auto required_size = sizeof(double) * (total_size + searchtree_size)
|
||||
+ if( arch >= 350 ) {
|
||||
+ auto num_blocks = magma_ceildiv(total_size, block_size);
|
||||
+ auto local_work = (total_size + num_threads - 1) / num_threads;
|
||||
+ auto required_size = sizeof(double) * (total_size + searchtree_size)
|
||||
+ sizeof(int32_t) * (searchtree_width * (num_grouped_blocks + 1) + 1);
|
||||
- auto realloc_result = realloc_if_necessary(tmp_ptr, tmp_size, required_size);
|
||||
+ auto realloc_result = realloc_if_necessary(tmp_ptr, tmp_size, required_size);
|
||||
|
||||
- double* gputmp = (double*)*tmp_ptr;
|
||||
- double* gputree = gputmp + total_size;
|
||||
- uint32_t* gpubucketidx = (uint32_t*)(gputree + searchtree_size);
|
||||
- int32_t* gpurankout = (int32_t*)(gpubucketidx + 1);
|
||||
- int32_t* gpucounts = gpurankout + 1;
|
||||
- int32_t* gpulocalcounts = gpucounts + searchtree_width;
|
||||
- uint32_t bucketidx{};
|
||||
+ double* gputmp = (double*)*tmp_ptr;
|
||||
+ double* gputree = gputmp + total_size;
|
||||
+ uint32_t* gpubucketidx = (uint32_t*)(gputree + searchtree_size);
|
||||
+ int32_t* gpurankout = (int32_t*)(gpubucketidx + 1);
|
||||
+ int32_t* gpucounts = gpurankout + 1;
|
||||
+ int32_t* gpulocalcounts = gpucounts + searchtree_width;
|
||||
+ uint32_t bucketidx{};
|
||||
|
||||
- CHECK(realloc_result);
|
||||
+ CHECK(realloc_result);
|
||||
|
||||
- compute_abs<<<num_blocks, block_size, 0, queue->cuda_stream()>>>
|
||||
- (val, gputmp, total_size);
|
||||
- build_searchtree<<<1, sample_size, 0, queue->cuda_stream()>>>
|
||||
- (gputmp, gputree, total_size);
|
||||
- count_buckets<<<num_grouped_blocks, block_size, 0, queue->cuda_stream()>>>
|
||||
- (gputmp, gputree, gpulocalcounts, total_size, local_work);
|
||||
- reduce_counts<<<searchtree_width, num_grouped_blocks, 0, queue->cuda_stream()>>>
|
||||
- (gpulocalcounts, gpucounts, num_grouped_blocks);
|
||||
- sampleselect_findbucket<<<1, searchtree_width / 2, 0, queue->cuda_stream()>>>
|
||||
- (gpucounts, subset_size, gpubucketidx, gpurankout);
|
||||
- magma_getvector(1, sizeof(uint32_t), gpubucketidx, 1, &bucketidx, 1, queue);
|
||||
- magma_dgetvector(1, gputree + searchtree_width - 1 + bucketidx, 1, thrs, 1, queue);
|
||||
- *thrs = std::sqrt(*thrs);
|
||||
+ compute_abs<<<num_blocks, block_size, 0, queue->cuda_stream()>>>
|
||||
+ (val, gputmp, total_size);
|
||||
+ build_searchtree<<<1, sample_size, 0, queue->cuda_stream()>>>
|
||||
+ (gputmp, gputree, total_size);
|
||||
+ count_buckets<<<num_grouped_blocks, block_size, 0, queue->cuda_stream()>>>
|
||||
+ (gputmp, gputree, gpulocalcounts, total_size, local_work);
|
||||
+ reduce_counts<<<searchtree_width, num_grouped_blocks, 0, queue->cuda_stream()>>>
|
||||
+ (gpulocalcounts, gpucounts, num_grouped_blocks);
|
||||
+ sampleselect_findbucket<<<1, searchtree_width / 2, 0, queue->cuda_stream()>>>
|
||||
+ (gpucounts, subset_size, gpubucketidx, gpurankout);
|
||||
+ magma_getvector(1, sizeof(uint32_t), gpubucketidx, 1, &bucketidx, 1, queue);
|
||||
+ magma_dgetvector(1, gputree + searchtree_width - 1 + bucketidx, 1, thrs, 1, queue);
|
||||
+ *thrs = std::sqrt(*thrs);
|
||||
+ }
|
||||
+ else {
|
||||
+ printf("error: this functionality needs CUDA architecture >= 3.5\n");
|
||||
+ info = MAGMA_ERR_NOT_SUPPORTED;
|
||||
+ }
|
||||
|
||||
cleanup:
|
||||
return info;
|
||||
diff -r 89706c0efbdb src/xhsgetrf_gpu.cpp
|
||||
--- a/src/xhsgetrf_gpu.cpp Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/src/xhsgetrf_gpu.cpp Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -16,6 +16,131 @@
|
||||
#include <cuda_fp16.h>
|
||||
#endif
|
||||
|
||||
+#if CUDA_VERSION < 9020
|
||||
+// conversion float to half are not defined for host in CUDA version <9.2
|
||||
+// thus uses the conversion below when CUDA VERSION is < 9.2.
|
||||
+#include <string.h>
|
||||
+//
|
||||
+// Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
+//
|
||||
+// Redistribution and use in source and binary forms, with or without
|
||||
+// modification, are permitted provided that the following conditions
|
||||
+// are met:
|
||||
+// * Redistributions of source code must retain the above copyright
|
||||
+// notice, this list of conditions and the following disclaimer.
|
||||
+// * Redistributions in binary form must reproduce the above copyright
|
||||
+// notice, this list of conditions and the following disclaimer in the
|
||||
+// documentation and/or other materials provided with the distribution.
|
||||
+// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
+// contributors may be used to endorse or promote products derived
|
||||
+// from this software without specific prior written permission.
|
||||
+//
|
||||
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
+
|
||||
+// This code modified from the public domain code here:
|
||||
+// https://gist.github.com/rygorous/2156668
|
||||
+// The URL above includes more robust conversion routines
|
||||
+// that handle Inf and NaN correctly.
|
||||
+//
|
||||
+// It is recommended to use the more robust versions in production code.
|
||||
+
|
||||
+typedef unsigned uint;
|
||||
+
|
||||
+union FP32
|
||||
+{
|
||||
+ uint u;
|
||||
+ float f;
|
||||
+ struct
|
||||
+ {
|
||||
+ uint Mantissa : 23;
|
||||
+ uint Exponent : 8;
|
||||
+ uint Sign : 1;
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
+union FP16
|
||||
+{
|
||||
+ unsigned short u;
|
||||
+ struct
|
||||
+ {
|
||||
+ uint Mantissa : 10;
|
||||
+ uint Exponent : 5;
|
||||
+ uint Sign : 1;
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
+// Approximate solution. This is faster but converts some sNaNs to
|
||||
+// infinity and doesn't round correctly. Handle with care.
|
||||
+// Approximate solution. This is faster but converts some sNaNs to
|
||||
+// infinity and doesn't round correctly. Handle with care.
|
||||
+static half approx_float_to_half(float fl)
|
||||
+{
|
||||
+ FP32 f32infty = { 255 << 23 };
|
||||
+ FP32 f16max = { (127 + 16) << 23 };
|
||||
+ FP32 magic = { 15 << 23 };
|
||||
+ FP32 expinf = { (255 ^ 31) << 23 };
|
||||
+ uint sign_mask = 0x80000000u;
|
||||
+ FP16 o = { 0 };
|
||||
+
|
||||
+ FP32 f = *((FP32*)&fl);
|
||||
+
|
||||
+ uint sign = f.u & sign_mask;
|
||||
+ f.u ^= sign;
|
||||
+
|
||||
+ if (!(f.f < f32infty.u)) // Inf or NaN
|
||||
+ o.u = f.u ^ expinf.u;
|
||||
+ else
|
||||
+ {
|
||||
+ if (f.f > f16max.f) f.f = f16max.f;
|
||||
+ f.f *= magic.f;
|
||||
+ }
|
||||
+
|
||||
+ o.u = f.u >> 13; // Take the mantissa bits
|
||||
+ o.u |= sign >> 16;
|
||||
+ half tmp;
|
||||
+ memcpy(&tmp, &o, sizeof(half));
|
||||
+ //return *((half*)&o);
|
||||
+ return tmp;
|
||||
+}
|
||||
+
|
||||
+// from half->float code - just for verification.
|
||||
+static float half_to_float(half hf)
|
||||
+{
|
||||
+ FP16 h;
|
||||
+ memcpy(&h, &hf, sizeof(half));
|
||||
+
|
||||
+ static const FP32 magic = { 113 << 23 };
|
||||
+ static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
+ FP32 o;
|
||||
+
|
||||
+ o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits
|
||||
+ uint exp = shifted_exp & o.u; // just the exponent
|
||||
+ o.u += (127 - 15) << 23; // exponent adjust
|
||||
+
|
||||
+ // handle exponent special cases
|
||||
+ if (exp == shifted_exp) // Inf/NaN?
|
||||
+ o.u += (128 - 16) << 23; // extra exp adjust
|
||||
+ else if (exp == 0) // Zero/Denormal?
|
||||
+ {
|
||||
+ o.u += 1 << 23; // extra exp adjust
|
||||
+ o.f -= magic.f; // renormalize
|
||||
+ }
|
||||
+
|
||||
+ o.u |= (h.u & 0x8000) << 16; // sign bit
|
||||
+ return o.f;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#include "magma_internal.h"
|
||||
//#include "nvToolsExt.h"
|
||||
|
||||
@@ -106,10 +231,13 @@
|
||||
float c_one = MAGMA_S_ONE;
|
||||
float c_neg_one = MAGMA_S_NEG_ONE;
|
||||
#if 1
|
||||
+ #if CUDA_VERSION >= 9020
|
||||
const magmaHalf h_one = (magmaHalf) 1.0;
|
||||
const magmaHalf h_neg_one = (magmaHalf)-1.0;
|
||||
- //const magmaHalf h_one = approx_float_to_half(1.0);
|
||||
- //const magmaHalf h_neg_one = approx_float_to_half(-1.0);
|
||||
+ #else
|
||||
+ const magmaHalf h_one = approx_float_to_half(1.0);
|
||||
+ const magmaHalf h_neg_one = approx_float_to_half(-1.0);
|
||||
+ #endif
|
||||
#else
|
||||
FP32 float_one = *((FP32*)&c_one);
|
||||
FP16 half_one = float_to_half_full(float_one);
|
||||
diff -r 89706c0efbdb src/xshgetrf_gpu.cpp
|
||||
--- a/src/xshgetrf_gpu.cpp Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/src/xshgetrf_gpu.cpp Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -92,7 +92,7 @@
|
||||
magma_mp_type_t enable_tc,
|
||||
magma_mp_type_t mp_algo_type )
|
||||
{
|
||||
-#if CUDA_VERSION >= 7500
|
||||
+#if CUDA_VERSION >= 9000
|
||||
#ifdef HAVE_clBLAS
|
||||
#define dA(i_, j_) dA, (dA_offset + (i_) + (j_)*ldda)
|
||||
#define dAT(i_, j_) dAT, (dAT_offset + (i_)*lddat + (j_))
|
||||
diff -r 89706c0efbdb testing/testing_hgemm.cpp
|
||||
--- a/testing/testing_hgemm.cpp Wed Jan 02 14:17:26 2019 -0500
|
||||
+++ b/testing/testing_hgemm.cpp Wed Apr 03 15:50:54 2019 -0700
|
||||
@@ -22,6 +22,131 @@
|
||||
#include "magma_operators.h"
|
||||
#include "testings.h"
|
||||
|
||||
+#if CUDA_VERSION < 9020
|
||||
+// conversion float to half are not defined for host in CUDA version <9.2
|
||||
+// thus uses the conversion below when CUDA VERSION is < 9.2.
|
||||
+#include <string.h>
|
||||
+//
|
||||
+// Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
+//
|
||||
+// Redistribution and use in source and binary forms, with or without
|
||||
+// modification, are permitted provided that the following conditions
|
||||
+// are met:
|
||||
+// * Redistributions of source code must retain the above copyright
|
||||
+// notice, this list of conditions and the following disclaimer.
|
||||
+// * Redistributions in binary form must reproduce the above copyright
|
||||
+// notice, this list of conditions and the following disclaimer in the
|
||||
+// documentation and/or other materials provided with the distribution.
|
||||
+// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
+// contributors may be used to endorse or promote products derived
|
||||
+// from this software without specific prior written permission.
|
||||
+//
|
||||
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
+
|
||||
+// This code modified from the public domain code here:
|
||||
+// https://gist.github.com/rygorous/2156668
|
||||
+// The URL above includes more robust conversion routines
|
||||
+// that handle Inf and NaN correctly.
|
||||
+//
|
||||
+// It is recommended to use the more robust versions in production code.
|
||||
+
|
||||
+typedef unsigned uint;
|
||||
+
|
||||
+union FP32
|
||||
+{
|
||||
+ uint u;
|
||||
+ float f;
|
||||
+ struct
|
||||
+ {
|
||||
+ uint Mantissa : 23;
|
||||
+ uint Exponent : 8;
|
||||
+ uint Sign : 1;
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
+union FP16
|
||||
+{
|
||||
+ unsigned short u;
|
||||
+ struct
|
||||
+ {
|
||||
+ uint Mantissa : 10;
|
||||
+ uint Exponent : 5;
|
||||
+ uint Sign : 1;
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
+// Approximate solution. This is faster but converts some sNaNs to
|
||||
+// infinity and doesn't round correctly. Handle with care.
|
||||
+// Approximate solution. This is faster but converts some sNaNs to
|
||||
+// infinity and doesn't round correctly. Handle with care.
|
||||
+static half approx_float_to_half(float fl)
|
||||
+{
|
||||
+ FP32 f32infty = { 255 << 23 };
|
||||
+ FP32 f16max = { (127 + 16) << 23 };
|
||||
+ FP32 magic = { 15 << 23 };
|
||||
+ FP32 expinf = { (255 ^ 31) << 23 };
|
||||
+ uint sign_mask = 0x80000000u;
|
||||
+ FP16 o = { 0 };
|
||||
+
|
||||
+ FP32 f = *((FP32*)&fl);
|
||||
+
|
||||
+ uint sign = f.u & sign_mask;
|
||||
+ f.u ^= sign;
|
||||
+
|
||||
+ if (!(f.f < f32infty.u)) // Inf or NaN
|
||||
+ o.u = f.u ^ expinf.u;
|
||||
+ else
|
||||
+ {
|
||||
+ if (f.f > f16max.f) f.f = f16max.f;
|
||||
+ f.f *= magic.f;
|
||||
+ }
|
||||
+
|
||||
+ o.u = f.u >> 13; // Take the mantissa bits
|
||||
+ o.u |= sign >> 16;
|
||||
+ half tmp;
|
||||
+ memcpy(&tmp, &o, sizeof(half));
|
||||
+ //return *((half*)&o);
|
||||
+ return tmp;
|
||||
+}
|
||||
+
|
||||
+// from half->float code - just for verification.
|
||||
+static float half_to_float(half hf)
|
||||
+{
|
||||
+ FP16 h;
|
||||
+ memcpy(&h, &hf, sizeof(half));
|
||||
+
|
||||
+ static const FP32 magic = { 113 << 23 };
|
||||
+ static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
+ FP32 o;
|
||||
+
|
||||
+ o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits
|
||||
+ uint exp = shifted_exp & o.u; // just the exponent
|
||||
+ o.u += (127 - 15) << 23; // exponent adjust
|
||||
+
|
||||
+ // handle exponent special cases
|
||||
+ if (exp == shifted_exp) // Inf/NaN?
|
||||
+ o.u += (128 - 16) << 23; // extra exp adjust
|
||||
+ else if (exp == 0) // Zero/Denormal?
|
||||
+ {
|
||||
+ o.u += 1 << 23; // extra exp adjust
|
||||
+ o.f -= magic.f; // renormalize
|
||||
+ }
|
||||
+
|
||||
+ o.u |= (h.u & 0x8000) << 16; // sign bit
|
||||
+ return o.f;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/* ////////////////////////////////////////////////////////////////////////////
|
||||
-- Testing sgemm
|
||||
*/
|
||||
@@ -47,8 +172,13 @@
|
||||
float c_neg_one = MAGMA_S_NEG_ONE;
|
||||
float alpha = MAGMA_S_MAKE( 0.29, -0.86 );
|
||||
float beta = MAGMA_S_MAKE( -0.48, 0.38 );
|
||||
- magmaHalf h_alpha = (magmaHalf)alpha;
|
||||
- magmaHalf h_beta = (magmaHalf)beta;
|
||||
+ #if CUDA_VERSION >= 9020
|
||||
+ const magmaHalf h_alpha = (magmaHalf) alpha;
|
||||
+ const magmaHalf h_beta = (magmaHalf) beta;
|
||||
+ #else
|
||||
+ const magmaHalf h_alpha = approx_float_to_half(alpha);
|
||||
+ const magmaHalf h_beta = approx_float_to_half(beta);
|
||||
+ #endif
|
||||
magma_opts opts;
|
||||
opts.parse_opts( argc, argv );
|
||||
|
|
@ -36,6 +36,8 @@ class Magma(CMakePackage):
|
|||
patch('ibm-xl.patch', when='@2.2:%xl')
|
||||
patch('ibm-xl.patch', when='@2.2:%xl_r')
|
||||
patch('magma-2.3.0-gcc-4.8.patch', when='@2.3.0%gcc@:4.8')
|
||||
patch('magma-2.5.0.patch', when='@2.5.0')
|
||||
patch('magma-2.5.0-cmake.patch', when='@2.5.0')
|
||||
|
||||
def cmake_args(self):
|
||||
spec = self.spec
|
||||
|
@ -70,6 +72,11 @@ def cmake_args(self):
|
|||
else:
|
||||
options.extend(['-DGPU_TARGET=sm_30'])
|
||||
|
||||
if '@2.5.0' in spec:
|
||||
options.extend(['-DMAGMA_SPARSE=OFF'])
|
||||
if spec.compiler.name in ['xl', 'xl_r']:
|
||||
options.extend(['-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=TRUE'])
|
||||
|
||||
return options
|
||||
|
||||
@run_after('install')
|
||||
|
|
|
@ -45,6 +45,9 @@ class Mfem(Package):
|
|||
# other version.
|
||||
version('develop', branch='master')
|
||||
|
||||
# Tagged development version used by the laghos package:
|
||||
version('3.4.1-laghos-v2.0', tag='laghos-v2.0')
|
||||
|
||||
version('3.4.0',
|
||||
'4e73e4fe0482636de3c5dc983cd395839a83cb16f6f509bd88b053e8b3858e05',
|
||||
url='https://bit.ly/mfem-3-4', extension='.tar.gz',
|
||||
|
@ -54,7 +57,8 @@ class Mfem(Package):
|
|||
'b70fa3c5080b9ec514fc05f4a04ff74322b99ac4ecd6d99c229f0ed5188fc0ce',
|
||||
url='https://goo.gl/Kd7Jk8', extension='.tar.gz')
|
||||
|
||||
version('laghos-v1.0', tag='laghos-v1.0')
|
||||
# Tagged development version used by the laghos package:
|
||||
version('3.3.1-laghos-v1.0', tag='laghos-v1.0')
|
||||
|
||||
version('3.3',
|
||||
'b17bd452593aada93dc0fee748fcfbbf4f04ce3e7d77fdd0341cc9103bcacd0b',
|
||||
|
@ -424,16 +428,18 @@ def install(self, spec, prefix):
|
|||
copy(str(self.config_mk), 'config.mk')
|
||||
shutil.copystat('config.mk.orig', 'config.mk')
|
||||
|
||||
prefix_share = join_path(prefix, 'share', 'mfem')
|
||||
|
||||
if '+examples' in spec:
|
||||
make('examples')
|
||||
install_tree('examples', join_path(prefix, 'examples'))
|
||||
install_tree('examples', join_path(prefix_share, 'examples'))
|
||||
|
||||
if '+miniapps' in spec:
|
||||
make('miniapps')
|
||||
install_tree('miniapps', join_path(prefix, 'miniapps'))
|
||||
install_tree('miniapps', join_path(prefix_share, 'miniapps'))
|
||||
|
||||
if install_em:
|
||||
install_tree('data', join_path(prefix, 'data'))
|
||||
install_tree('data', join_path(prefix_share, 'data'))
|
||||
|
||||
@property
|
||||
def suitesparse_components(self):
|
||||
|
|
|
@ -5,19 +5,9 @@
|
|||
|
||||
from spack import *
|
||||
|
||||
import numbers
|
||||
import os
|
||||
|
||||
|
||||
def is_integral(x):
|
||||
"""Any integer value"""
|
||||
try:
|
||||
return isinstance(int(x), numbers.Integral) and \
|
||||
not isinstance(x, bool) and int(x) > 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class Nek5000(Package):
|
||||
"""A fast and scalable high-order solver for computational fluid
|
||||
dynamics"""
|
||||
|
@ -40,33 +30,8 @@ class Nek5000(Package):
|
|||
# TODO: add a variant 'blas' or 'external-blas' to enable the usage of
|
||||
# Spack installed/configured blas.
|
||||
|
||||
# Variant for MAXNEL, we need to read this from user
|
||||
variant(
|
||||
'MAXNEL',
|
||||
default=150000,
|
||||
description='Maximum number of elements for Nek5000 tools.',
|
||||
values=is_integral
|
||||
)
|
||||
|
||||
# Variants for Nek tools
|
||||
variant('genbox', default=True, description='Build genbox tool.')
|
||||
variant('int_tp', default=True, description='Build int_tp tool.')
|
||||
variant('n2to3', default=True, description='Build n2to3 tool.')
|
||||
variant('postnek', default=True, description='Build postnek tool.')
|
||||
variant('reatore2', default=True, description='Build reatore2 tool.')
|
||||
variant('genmap', default=True, description='Build genmap tool.')
|
||||
variant('nekmerge', default=True, description='Build nekmerge tool.')
|
||||
variant('prenek', default=True, description='Build prenek tool.')
|
||||
|
||||
# Dependencies
|
||||
depends_on('mpi', when="+mpi")
|
||||
depends_on('libx11', when="+prenek")
|
||||
depends_on('libx11', when="+postnek")
|
||||
# libxt is needed for X11/Intrinsic.h but not for linking
|
||||
depends_on('libxt', when="+prenek")
|
||||
depends_on('xproto', when="+prenek")
|
||||
depends_on('libxt', when="+postnek")
|
||||
depends_on('visit', when="+visit")
|
||||
|
||||
@run_before('install')
|
||||
def fortran_check(self):
|
||||
|
@ -83,7 +48,6 @@ def test_install(self):
|
|||
raise RuntimeError(msg)
|
||||
|
||||
def install(self, spec, prefix):
|
||||
tools_dir = 'tools'
|
||||
bin_dir = 'bin'
|
||||
|
||||
# Do not use the Spack compiler wrappers.
|
||||
|
@ -93,102 +57,23 @@ def install(self, spec, prefix):
|
|||
|
||||
fflags = spec.compiler_flags['fflags']
|
||||
cflags = spec.compiler_flags['cflags']
|
||||
if ('+prenek' in spec) or ('+postnek' in spec):
|
||||
libx11_h = find_headers('Xlib', spec['libx11'].prefix.include,
|
||||
recursive=True)
|
||||
if not libx11_h:
|
||||
raise RuntimeError('Xlib.h not found in %s' %
|
||||
spec['libx11'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(libx11_h.directories[0])]
|
||||
|
||||
xproto_h = find_headers('X', spec['xproto'].prefix.include,
|
||||
recursive=True)
|
||||
if not xproto_h:
|
||||
raise RuntimeError('X.h not found in %s' %
|
||||
spec['xproto'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(xproto_h.directories[0])]
|
||||
|
||||
libxt_h = find_headers('Intrinsic', spec['libxt'].prefix.include,
|
||||
recursive=True)
|
||||
if not libxt_h:
|
||||
raise RuntimeError('X11/Intrinsic.h not found in %s' %
|
||||
spec['libxt'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(libxt_h.directories[0])]
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Use '-qextname' to add underscores.
|
||||
# Use '-WF,-qnotrigraph' to fix an error about a string: '... ??'
|
||||
fflags += ['-qextname', '-WF,-qnotrigraph']
|
||||
|
||||
error = Executable(fc)('empty.f', output=str, error=str,
|
||||
fail_on_error=False)
|
||||
|
||||
if 'gfortran' in error or 'GNU' in error or 'gfortran' in fc:
|
||||
# Use '-std=legacy' to suppress an error that used to be a
|
||||
# warning in previous versions of gfortran.
|
||||
fflags += ['-std=legacy']
|
||||
|
||||
fflags = ' '.join(fflags)
|
||||
cflags = ' '.join(cflags)
|
||||
|
||||
# Build the tools, maketools copy them to Nek5000/bin by default.
|
||||
# We will then install Nek5000/bin under prefix after that.
|
||||
with working_dir(tools_dir):
|
||||
# Update the maketools script to use correct compilers
|
||||
filter_file(r'^#FC\s*=.*', 'FC="{0}"'.format(fc), 'maketools')
|
||||
filter_file(r'^#CC\s*=.*', 'CC="{0}"'.format(cc), 'maketools')
|
||||
if fflags:
|
||||
filter_file(r'^#FFLAGS=.*', 'FFLAGS="{0}"'.format(fflags),
|
||||
'maketools')
|
||||
if cflags:
|
||||
filter_file(r'^#CFLAGS=.*', 'CFLAGS="{0}"'.format(cflags),
|
||||
'maketools')
|
||||
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Patch 'maketools' to use '-qextname' when checking for
|
||||
# underscore becasue 'xl'/'xl_r' use this option to enable the
|
||||
# addition of the underscore.
|
||||
filter_file(r'^\$FC -c ', '$FC -qextname -c ', 'maketools')
|
||||
|
||||
libx11_lib = find_libraries('libX11', spec['libx11'].prefix.lib,
|
||||
shared=True, recursive=True)
|
||||
if not libx11_lib:
|
||||
libx11_lib = \
|
||||
find_libraries('libX11', spec['libx11'].prefix.lib64,
|
||||
shared=True, recursive=True)
|
||||
if not libx11_lib:
|
||||
raise RuntimeError('libX11 not found in %s/{lib,lib64}' %
|
||||
spec['libx11'].prefix)
|
||||
# There is no other way to set the X11 library path except brute
|
||||
# force:
|
||||
filter_file(r'-L\$\(X\)', libx11_lib.search_flags,
|
||||
join_path('prenek', 'makefile'))
|
||||
filter_file(r'-L\$\(X\)', libx11_lib.search_flags,
|
||||
join_path('postnek', 'makefile'))
|
||||
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Use '-qextname' when compiling mxm.f
|
||||
filter_file(r'\$\(OLAGS\)', '-qextname $(OLAGS)',
|
||||
join_path('postnek', 'makefile'))
|
||||
# Define 'rename_' function that calls 'rename'
|
||||
with open(join_path('postnek', 'xdriver.c'), 'a') as xdriver:
|
||||
xdriver.write('\nvoid rename_(char *from, char *to)\n{\n'
|
||||
' rename(from, to);\n}\n')
|
||||
|
||||
maxnel = self.spec.variants['MAXNEL'].value
|
||||
filter_file(r'^#MAXNEL\s*=.*', 'MAXNEL=' + maxnel, 'maketools')
|
||||
|
||||
maketools = Executable('./maketools')
|
||||
|
||||
# Build the tools
|
||||
if '+genbox' in spec:
|
||||
maketools('genbox')
|
||||
# "ERROR: int_tp does not exist!"
|
||||
# if '+int_tp' in spec:
|
||||
# maketools('int_tp')
|
||||
if '+n2to3' in spec:
|
||||
maketools('n2to3')
|
||||
if '+postnek' in spec:
|
||||
maketools('postnek')
|
||||
if '+reatore2' in spec:
|
||||
maketools('reatore2')
|
||||
if '+genmap' in spec:
|
||||
maketools('genmap')
|
||||
if '+nekmerge' in spec:
|
||||
maketools('nekmerge')
|
||||
if '+prenek' in spec:
|
||||
maketools('prenek')
|
||||
|
||||
with working_dir(bin_dir):
|
||||
if '+mpi' in spec:
|
||||
fc = spec['mpi'].mpif77
|
||||
|
|
|
@ -52,11 +52,21 @@ def install(self, spec, prefix):
|
|||
install(makenek, prefix.bin)
|
||||
install(nekpmpi, prefix.bin)
|
||||
|
||||
error = Executable(fc)('empty.f', output=str, error=str,
|
||||
fail_on_error=False)
|
||||
|
||||
fflags = ''
|
||||
if 'gfortran' in error or 'GNU' in error or 'gfortran' in fc:
|
||||
# Use '-std=legacy' to suppress an error that used to be a
|
||||
# warning in previous versions of gfortran.
|
||||
fflags = ' -std=legacy'
|
||||
|
||||
with working_dir(prefix.bin):
|
||||
filter_file(r'^SOURCE_ROOT\s*=.*', 'SOURCE_ROOT=\"' +
|
||||
prefix.bin.Nekbone + '/src\"', 'makenek')
|
||||
filter_file(r'^CC\s*=.*', 'CC=\"' + cc + '\"', 'makenek')
|
||||
filter_file(r'^F77\s*=.*', 'F77=\"' + fc + '\"', 'makenek')
|
||||
filter_file(r'^F77\s*=.*', 'F77=\"' + fc + fflags + '\"',
|
||||
'makenek')
|
||||
|
||||
if '+mpi' not in spec:
|
||||
filter_file(r'^#IFMPI=\"false\"', 'IFMPI=\"false\"', 'makenek')
|
||||
|
|
|
@ -22,11 +22,13 @@ class Nekcem(Package):
|
|||
# We only have a development version
|
||||
version('develop')
|
||||
version('0b8bedd', commit='0b8beddfdcca646bfcc866dfda1c5f893338399b')
|
||||
version('7332619', commit='7332619b73d03868a256614b61794dce2d95b360')
|
||||
|
||||
# dependencies
|
||||
depends_on('mpi', when='+mpi')
|
||||
depends_on('blas')
|
||||
depends_on('lapack')
|
||||
depends_on('python@2.7:', type='build')
|
||||
|
||||
@run_before('install')
|
||||
def fortran_check(self):
|
||||
|
@ -81,6 +83,14 @@ def install(self, spec, prefix):
|
|||
fflags += ['-r8']
|
||||
cflags += ['-DUNDERSCORE']
|
||||
|
||||
error = Executable(fc)('empty.f', output=str, error=str,
|
||||
fail_on_error=False)
|
||||
|
||||
if 'gfortran' in error or 'GNU' in error or 'gfortran' in fc:
|
||||
# Use '-std=legacy' to suppress an error that used to be a
|
||||
# warning in previous versions of gfortran.
|
||||
fflags += ['-std=legacy']
|
||||
|
||||
if '+mpi' in spec:
|
||||
fflags += ['-DMPI', '-DMPIIO']
|
||||
cflags += ['-DMPI', '-DMPIIO']
|
||||
|
|
177
var/spack/repos/builtin/packages/nektools/package.py
Normal file
177
var/spack/repos/builtin/packages/nektools/package.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
from spack import *
|
||||
|
||||
import numbers
|
||||
import os
|
||||
|
||||
|
||||
def is_integral(x):
|
||||
"""Any integer value"""
|
||||
try:
|
||||
return isinstance(int(x), numbers.Integral) and \
|
||||
not isinstance(x, bool) and int(x) > 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class Nektools(Package):
|
||||
"""Tools reuqired by Nek5000"""
|
||||
|
||||
homepage = "https://nek5000.mcs.anl.gov/"
|
||||
url = "https://github.com/Nek5000/Nek5000/releases/download/v17.0/Nek5000-v17.0.tar.gz"
|
||||
git = "https://github.com/Nek5000/Nek5000.git"
|
||||
|
||||
tags = ['cfd', 'flow', 'hpc', 'solver', 'navier-stokes',
|
||||
'spectral-elements', 'fluid', 'ecp', 'ecp-apps']
|
||||
|
||||
version('develop', branch='master')
|
||||
version('17.0', '6a13bfad2ce023897010dd88f54a0a87')
|
||||
|
||||
# Variant for MAXNEL, we need to read this from user
|
||||
variant(
|
||||
'MAXNEL',
|
||||
default=150000,
|
||||
description='Maximum number of elements for Nek5000 tools.',
|
||||
values=is_integral
|
||||
)
|
||||
|
||||
# Variants for Nek tools
|
||||
variant('genbox', default=True, description='Build genbox tool.')
|
||||
variant('n2to3', default=True, description='Build n2to3 tool.')
|
||||
variant('postnek', default=True, description='Build postnek tool.')
|
||||
variant('reatore2', default=True, description='Build reatore2 tool.')
|
||||
variant('genmap', default=True, description='Build genmap tool.')
|
||||
variant('nekmerge', default=True, description='Build nekmerge tool.')
|
||||
variant('prenek', default=True, description='Build prenek tool.')
|
||||
|
||||
depends_on('libx11', when="+prenek")
|
||||
depends_on('libx11', when="+postnek")
|
||||
# libxt is needed for X11/Intrinsic.h but not for linking
|
||||
depends_on('libxt', when="+prenek")
|
||||
depends_on('xproto', when="+prenek")
|
||||
depends_on('libxt', when="+postnek")
|
||||
depends_on('visit', when="+visit")
|
||||
|
||||
@run_before('install')
|
||||
def fortran_check(self):
|
||||
if not self.compiler.f77:
|
||||
msg = 'Cannot build Nek5000 without a Fortran 77 compiler.'
|
||||
raise RuntimeError(msg)
|
||||
|
||||
def install(self, spec, prefix):
|
||||
tools_dir = 'tools'
|
||||
bin_dir = 'bin'
|
||||
|
||||
fc = env['FC']
|
||||
cc = env['CC']
|
||||
|
||||
fflags = spec.compiler_flags['fflags']
|
||||
cflags = spec.compiler_flags['cflags']
|
||||
if ('+prenek' in spec) or ('+postnek' in spec):
|
||||
libx11_h = find_headers('Xlib', spec['libx11'].prefix.include,
|
||||
recursive=True)
|
||||
if not libx11_h:
|
||||
raise RuntimeError('Xlib.h not found in %s' %
|
||||
spec['libx11'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(libx11_h.directories[0])]
|
||||
|
||||
xproto_h = find_headers('X', spec['xproto'].prefix.include,
|
||||
recursive=True)
|
||||
if not xproto_h:
|
||||
raise RuntimeError('X.h not found in %s' %
|
||||
spec['xproto'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(xproto_h.directories[0])]
|
||||
|
||||
libxt_h = find_headers('Intrinsic', spec['libxt'].prefix.include,
|
||||
recursive=True)
|
||||
if not libxt_h:
|
||||
raise RuntimeError('X11/Intrinsic.h not found in %s' %
|
||||
spec['libxt'].prefix.include)
|
||||
cflags += ['-I%s' % os.path.dirname(libxt_h.directories[0])]
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Use '-qextname' to add underscores.
|
||||
# Use '-WF,-qnotrigraph' to fix an error about a string: '... ??'
|
||||
fflags += ['-qextname', '-WF,-qnotrigraph']
|
||||
|
||||
error = Executable(fc)('empty.f', output=str, error=str,
|
||||
fail_on_error=False)
|
||||
|
||||
if 'gfortran' in error or 'GNU' in error or 'gfortran' in fc:
|
||||
# Use '-std=legacy' to suppress an error that used to be a
|
||||
# warning in previous versions of gfortran.
|
||||
fflags += ['-std=legacy']
|
||||
|
||||
fflags = ' '.join(fflags)
|
||||
cflags = ' '.join(cflags)
|
||||
|
||||
# Build the tools, maketools copy them to Nek5000/bin by default.
|
||||
# We will then install Nek5000/bin under prefix after that.
|
||||
with working_dir(tools_dir):
|
||||
# Update the maketools script to use correct compilers
|
||||
filter_file(r'^#FC\s*=.*', 'FC="{0}"'.format(fc), 'maketools')
|
||||
filter_file(r'^#CC\s*=.*', 'CC="{0}"'.format(cc), 'maketools')
|
||||
if fflags:
|
||||
filter_file(r'^#FFLAGS=.*', 'FFLAGS="{0}"'.format(fflags),
|
||||
'maketools')
|
||||
if cflags:
|
||||
filter_file(r'^#CFLAGS=.*', 'CFLAGS="{0}"'.format(cflags),
|
||||
'maketools')
|
||||
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Patch 'maketools' to use '-qextname' when checking for
|
||||
# underscore becasue 'xl'/'xl_r' use this option to enable the
|
||||
# addition of the underscore.
|
||||
filter_file(r'^\$FC -c ', '$FC -qextname -c ', 'maketools')
|
||||
|
||||
libx11_lib = find_libraries('libX11', spec['libx11'].prefix.lib,
|
||||
shared=True, recursive=True)
|
||||
if not libx11_lib:
|
||||
libx11_lib = \
|
||||
find_libraries('libX11', spec['libx11'].prefix.lib64,
|
||||
shared=True, recursive=True)
|
||||
if not libx11_lib:
|
||||
raise RuntimeError('libX11 not found in %s/{lib,lib64}' %
|
||||
spec['libx11'].prefix)
|
||||
# There is no other way to set the X11 library path except brute
|
||||
# force:
|
||||
filter_file(r'-L\$\(X\)', libx11_lib.search_flags,
|
||||
join_path('prenek', 'makefile'))
|
||||
filter_file(r'-L\$\(X\)', libx11_lib.search_flags,
|
||||
join_path('postnek', 'makefile'))
|
||||
|
||||
if self.compiler.name in ['xl', 'xl_r']:
|
||||
# Use '-qextname' when compiling mxm.f
|
||||
filter_file(r'\$\(OLAGS\)', '-qextname $(OLAGS)',
|
||||
join_path('postnek', 'makefile'))
|
||||
# Define 'rename_' function that calls 'rename'
|
||||
with open(join_path('postnek', 'xdriver.c'), 'a') as xdriver:
|
||||
xdriver.write('\nvoid rename_(char *from, char *to)\n{\n'
|
||||
' rename(from, to);\n}\n')
|
||||
|
||||
maxnel = self.spec.variants['MAXNEL'].value
|
||||
filter_file(r'^#MAXNEL\s*=.*', 'MAXNEL=' + maxnel, 'maketools')
|
||||
|
||||
maketools = Executable('./maketools')
|
||||
|
||||
# Build the tools
|
||||
if '+genbox' in spec:
|
||||
maketools('genbox')
|
||||
if '+n2to3' in spec:
|
||||
maketools('n2to3')
|
||||
if '+postnek' in spec:
|
||||
maketools('postnek')
|
||||
if '+reatore2' in spec:
|
||||
maketools('reatore2')
|
||||
if '+genmap' in spec:
|
||||
maketools('genmap')
|
||||
if '+nekmerge' in spec:
|
||||
maketools('nekmerge')
|
||||
if '+prenek' in spec:
|
||||
maketools('prenek')
|
||||
|
||||
# Install Nek5000/bin in prefix/bin
|
||||
install_tree(bin_dir, prefix.bin)
|
|
@ -20,9 +20,10 @@ class Occa(Package):
|
|||
git = 'https://github.com/libocca/occa.git'
|
||||
|
||||
version('develop')
|
||||
version('v1.0.0-alpha.5', tag='v1.0.0-alpha.5')
|
||||
version('v0.2.0', tag='v0.2.0')
|
||||
version('v0.1.0', tag='v0.1.0')
|
||||
version('1.0.8', tag='v1.0.8')
|
||||
version('1.0.0-alpha.5', tag='v1.0.0-alpha.5')
|
||||
version('0.2.0', tag='v0.2.0')
|
||||
version('0.1.0', tag='v0.1.0')
|
||||
|
||||
variant('cuda',
|
||||
default=True,
|
||||
|
|
|
@ -108,6 +108,8 @@ class Petsc(Package):
|
|||
|
||||
patch('xcode_stub_out_of_sync.patch', when='@:3.10.4')
|
||||
|
||||
patch('xlf_fix-dup-petscfecreate.patch', when='@3.11.0')
|
||||
|
||||
# Virtual dependencies
|
||||
# Git repository needs sowing to build Fortran interface
|
||||
depends_on('sowing', when='@develop')
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
commit 5849cfbb8c629127894f722457da7b2cbb7f6a0b
|
||||
Author: Satish Balay <balay@mcs.anl.gov>
|
||||
Date: Sun Mar 31 21:43:12 2019 -0500
|
||||
|
||||
dm: remove duplicate PetscFECreate() definition from f90 modules. This fixes IBM xlf build.
|
||||
|
||||
"/tmp/dobrev1/spack-stage/spack-stage-Vp2QRG/petsc-3.11.0/src/tao/f90-mod/petsctaomod.F", line 38.13: 1514-264 (S) Procedure petscfecreate has more than one interface accessible by use association. The interfaces are assumed to be the same.
|
||||
1501-511 Compilation failed for file petsctaomod.F.
|
||||
|
||||
Reported-by: "Dobrev, Veselin A." <dobrev1@llnl.gov>
|
||||
|
||||
diff --git a/src/dm/f90-mod/petscdt.h90 b/src/dm/f90-mod/petscdt.h90
|
||||
index 339f7fd41e..766f0e37dc 100644
|
||||
--- a/src/dm/f90-mod/petscdt.h90
|
||||
+++ b/src/dm/f90-mod/petscdt.h90
|
||||
@@ -49,15 +49,6 @@
|
||||
End Subroutine
|
||||
End Interface
|
||||
|
||||
- Interface
|
||||
- Subroutine PetscFECreate(c,f,ierr)
|
||||
- use petscdmdef
|
||||
- MPI_Comm, intent(in) :: c
|
||||
- PetscFE, intent(out) :: f
|
||||
- PetscErrorCode, intent(out) :: ierr
|
||||
- End Subroutine
|
||||
- End Interface
|
||||
-
|
||||
Interface
|
||||
Subroutine PetscFECreateDefault(c,d,n,i,str,o,f,ierr)
|
||||
use petscdmdef
|
|
@ -114,6 +114,14 @@ def install(self, spec, prefix):
|
|||
'TBB=-L%s -ltbb' % spec['tbb'].prefix.lib,
|
||||
]
|
||||
|
||||
if '@5.3:' in spec:
|
||||
# Without CMAKE_LIBRARY_PATH defined, the CMake file in the
|
||||
# Mongoose directory finds libsuitesparseconfig.so in system
|
||||
# directories like /usr/lib.
|
||||
make_args += [
|
||||
'CMAKE_OPTIONS=-DCMAKE_INSTALL_PREFIX=%s' % prefix +
|
||||
' -DCMAKE_LIBRARY_PATH=%s' % prefix.lib]
|
||||
|
||||
make('install', *make_args)
|
||||
|
||||
@property
|
||||
|
|
Loading…
Reference in a new issue