Enable packages to use the NVIDIA HPC SDK (#19452)

* Enable packages to use the NVIDIA HPC SDK * fix linter and review items * fix linter issue Co-authored-by: Scott McMillan <smcmillan@nvidia.com>
2020-10-22 16:42:48 -05:00 · 2020-10-22 16:42:48 -05:00 · 0015dd0bf8
commit 0015dd0bf8
parent fd3dbb2493
85 changed files with 1824 additions and 18 deletions
--- a/var/spack/repos/builtin/packages/amber/aarch64.patch
+++ b/var/spack/repos/builtin/packages/amber/aarch64.patch
@ -0,0 +1,24 @@
+--- a/ambertools_tmpdir/AmberTools/src/fftw-3.3/config.guess	2020-09-30 13:28:45.035344970 -0700
+++ b/ambertools_tmpdir/AmberTools/src/fftw-3.3/config.guess	2020-09-30 13:28:55.534923364 -0700
+@@ -858,6 +858,9 @@
+     i*86:Minix:*:*)
+ 	echo ${UNAME_MACHINE}-pc-minix
+ 	exit ;;
+    aarch64:Linux:*:*)
+	echo aarch64-unknown-linux-gnu
+	exit ;;
+     alpha:Linux:*:*)
+ 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ 	  EV5)   UNAME_MACHINE=alphaev5 ;;
+--- a/ambertools_tmpdir/AmberTools/src/xblas/config/config.guess	2020-09-30 15:30:11.573779015 -0700
+++ b/ambertools_tmpdir/AmberTools/src/xblas/config/config.guess	2020-09-30 15:30:46.932366326 -0700
+@@ -863,6 +863,9 @@
+     i*86:Minix:*:*)
+ 	echo ${UNAME_MACHINE}-pc-minix
+ 	exit ;;
+    aarch64:Linux:*:*)
+       echo aarch64-unknown-linux-gnu
+       exit ;;
+     alpha:Linux:*:*)
+ 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ 	  EV5)   UNAME_MACHINE=alphaev5 ;;
--- a/var/spack/repos/builtin/packages/amber/nvhpc-boost.patch
+++ b/var/spack/repos/builtin/packages/amber/nvhpc-boost.patch
@ -0,0 +1,61 @@
+--- a/ambertools_tmpdir/AmberTools/src/ba/ambertools_tmpdir/oost/tools/build/src/engine/build.sh	2020-09-30 14:26:41.136036986 -0700
+++ b/ambertools_tmpdir/AmberTools/src/boost/tools/build/src/engine/build.sh	2020-09-30 14:27:29.634090876 -0700
+@@ -95,7 +95,7 @@
+     elif test -r /opt/intel/compiler50/ia32/bin/iccvars.sh ; then
+         BOOST_JAM_TOOLSET=intel-linux
+         BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler50/ia32/
+-    elif test_path pgcc ; then BOOST_JAM_TOOLSET=pgi
+    elif test_path nvc ; then BOOST_JAM_TOOLSET=pgi
+     elif test_path pathcc ; then BOOST_JAM_TOOLSET=pathscale
+     elif test_path como ; then BOOST_JAM_TOOLSET=como
+     elif test_path KCC ; then BOOST_JAM_TOOLSET=kcc
+@@ -231,7 +231,7 @@
+     ;;
+ 
+     pgi)
+-    BOOST_JAM_CC=pgcc
+    BOOST_JAM_CC=nvc
+     ;;
+ 
+     sun*)
+--- a/ambertools_tmpdir/AmberTools/src/boost/tools/build/src/tools/pgi.jam	2020-09-30 14:28:09.652483687 -0700
+++ b/ambertools_tmpdir/AmberTools/src/boost/tools/build/src/tools/pgi.jam	2020-09-30 14:28:34.421488935 -0700
+@@ -26,11 +26,11 @@
+ {
+   local condition = [ common.check-init-parameters pgi : version $(version) ] ;
+ 
+-  local l_command = [ common.get-invocation-command pgi : pgc++ : $(command) ] ;
+  local l_command = [ common.get-invocation-command pgi : nvc++ : $(command) ] ;
+ 
+   common.handle-options pgi : $(condition) : $(l_command) : $(options) ;
+     
+-  command_c = $(command_c[1--2]) $(l_command[-1]:B=pgcc) ;
+  command_c = $(command_c[1--2]) $(l_command[-1]:B=nvc) ;
+ 
+   toolset.flags pgi CONFIG_C_COMMAND $(condition) : $(command_c) ;
+ 
+--- a/ambertools_tmpdir/AmberTools/src/boost/tools/build/src/engine/build.jam	2020-09-30 14:40:16.983284833 -0700
+++ b/ambertools_tmpdir/AmberTools/src/boost/tools/build/src/engine/build.jam	2020-09-30 14:40:24.172997290 -0700
+@@ -309,7 +309,7 @@
+     -I$(--python-include) -I$(--extra-include)
+     : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+ ## Portland Group Pgi 6.2
+-toolset pgi pgcc : "-o " : -D
+toolset pgi nvc : "-o " : -D
+     :
+     [ opt --release : -s -O3 ]
+     [ opt --debug : -g ]
+--- a/ambertools_tmpdir/AmberTools/src/boost/libs/filesystem/src/operations.cpp	2020-09-30 15:07:31.998097202 -0700
+++ b/ambertools_tmpdir/AmberTools/src/boost/libs/filesystem/src/operations.cpp	2020-09-30 15:07:37.117892885 -0700
+@@ -2087,10 +2087,6 @@
+     return ok;
+   }
+ 
+-#if defined(__PGI) && defined(__USE_FILE_OFFSET64)
+-#define dirent dirent64
+-#endif
+-
+   error_code dir_itr_first(void *& handle, void *& buffer,
+     const char* dir, string& target,
+     fs::file_status &, fs::file_status &)
+
--- a/var/spack/repos/builtin/packages/amber/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/amber/nvhpc.patch
@ -0,0 +1,20 @@
+--- a/ambertools_tmpdir/AmberTools/src/configure2	2020-09-10 07:37:44.380726161 -0700
+++ b/ambertools_tmpdir/AmberTools/src/configure2	2020-09-10 07:38:22.052936370 -0700
+@@ -3190,7 +3190,7 @@
+       cd fftw-3.3 && \
+         ./configure --disable-doc --prefix=$amberprefix --libdir=$amberprefix/lib \
+         --enable-static $enable_mpi $mpicc $enable_debug $enable_sse\
+-        CC="$cc" CFLAGS="$cflags $coptflags" \
+        CC="$cc" CFLAGS="$cflags $cnooptflags" \
+         F77="$fc" FFLAGS="$fflags $foptflags" \
+         FLIBS="$flibs_arch" \
+         > ../fftw3_config.log 2>&1
+@@ -3287,6 +3287,8 @@
+       # b2 install will use intel-linux.compile.c++ (boost_1_64_0).
+       ./bootstrap.sh --prefix=$amberprefix --with-toolset=intel-linux > ../boost_config.log
+       ncerror=$?
+  elif [ $x86_64 = 'no' ] && [ "$compiler" = 'pgi' ]; then
+      ./bootstrap.sh --prefix=$amberprefix --with-toolset=pgi > ../boost_config.log
+   else
+       ./bootstrap.sh --prefix=$amberprefix > ../boost_config.log
+       ncerror=$?
--- a/var/spack/repos/builtin/packages/amber/package.py
+++ b/var/spack/repos/builtin/packages/amber/package.py
@ -91,6 +91,19 @@ def url_for_version(self, version):
        patch(patch_url_str.format(ver, num),
              sha256=checksum, level=0, when='@{0}'.format(ver))

+    # Patch to add ppc64le in config.guess
+    patch('ppc64le.patch', when='@18.20')
+
+    # Patch to add aarch64 in config.guess
+    patch('aarch64.patch', when='@18.20')
+
+    # Workaround to modify the AmberTools script when using the NVIDIA
+    # compilers
+    patch('nvhpc.patch', when='@18.20 %nvhpc')
+
+    # Workaround to use NVIDIA compilers to build the bundled Boost
+    patch('nvhpc-boost.patch', when='@18.20 %nvhpc')
+
    variant('mpi', description='Build MPI executables',
            default=True)
    variant('openmp', description='Use OpenMP pragmas to parallelize',
@ -101,6 +114,7 @@ def url_for_version(self, version):
            default=False)

    depends_on('zlib')
+    depends_on('bzip2')
    depends_on('flex', type='build')
    depends_on('bison', type='build')
    depends_on('netcdf-fortran')
@ -125,6 +139,12 @@ def url_for_version(self, version):
    def setup_build_environment(self, env):
        amber_src = self.stage.source_path
        env.set('AMBERHOME', amber_src)
+
+        # The bundled Boost does not detect the bzip2 package, but
+        # will silently fall back to a system install (if available).
+        # Force it to use the bzip2 package.
+        env.prepend_path('CPATH', self.spec['bzip2'].prefix.include)
+
        # CUDA
        if self.spec.satisfies('+cuda'):
            env.set('CUDA_HOME', self.spec['cuda'].prefix)
@ -146,6 +166,8 @@ def install(self, spec, prefix):
            compiler = 'intel'
        elif self.spec.satisfies('%pgi'):
            compiler = 'pgi'
+        elif self.spec.satisfies('%nvhpc'):
+            compiler = 'pgi'
        elif self.spec.satisfies('%clang'):
            compiler = 'clang'
        else:
@ -166,6 +188,10 @@ def install(self, spec, prefix):
        else:
            base_args += ['--no-updates']

+        # Non-x86 architecture
+        if self.spec.target.family != 'x86_64':
+            base_args += ['-nosse']
+
        # Single core
        conf(*(base_args + [compiler]))
        make('install')
--- a/var/spack/repos/builtin/packages/amber/ppc64le.patch
+++ b/var/spack/repos/builtin/packages/amber/ppc64le.patch
@ -0,0 +1,24 @@
+--- a/ambertools_tmpdir/AmberTools/src/fftw-3.3/config.guess	2020-09-30 13:28:45.035344970 -0700
+++ b/ambertools_tmpdir/AmberTools/src/fftw-3.3/config.guess	2020-09-30 13:28:55.534923364 -0700
+@@ -953,6 +953,9 @@
+     ppc64:Linux:*:*)
+ 	echo powerpc64-unknown-linux-gnu
+ 	exit ;;
+    ppc64le:Linux:*:*)
+       echo powerpc64le-unknown-linux-gnu
+       exit ;;
+     ppc:Linux:*:*)
+ 	echo powerpc-unknown-linux-gnu
+ 	exit ;;
+--- a/ambertools_tmpdir/AmberTools/src/xblas/config/config.guess	2020-09-30 15:30:11.573779015 -0700
+++ b/ambertools_tmpdir/AmberTools/src/xblas/config/config.guess	2020-09-30 15:30:46.932366326 -0700
+@@ -967,6 +967,9 @@
+     ppc64:Linux:*:*)
+ 	echo powerpc64-unknown-linux-gnu
+ 	exit ;;
+    ppc64le:Linux:*:*)
+       echo powerpc64le-unknown-linux-gnu
+       exit ;;
+     ppc:Linux:*:*)
+ 	echo powerpc-unknown-linux-gnu
+ 	exit ;;
--- a/var/spack/repos/builtin/packages/berkeley-db/package.py
+++ b/var/spack/repos/builtin/packages/berkeley-db/package.py
@ -29,7 +29,7 @@ def patch(self):
            filter_file(r'gsg_db_server', '', 'dist/Makefile.in')

    def configure_args(self):
-        return [
+        config_args = [
            '--disable-static',
            '--enable-cxx',
            '--enable-dbm',
@ -40,3 +40,10 @@ def configure_args(self):
            # depends on Berkey DB, creating a circular dependency
            '--with-repmgr-ssl=no',
        ]
+
+        # The default glibc provided by CentOS 7 does not provide proper
+        # atomic support when using the NVIDIA compilers
+        if self.spec.satisfies('%nvhpc os=centos7'):
+            config_args.append('--disable-atomicsupport')
+
+        return config_args
--- a/var/spack/repos/builtin/packages/bison/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/bison/nvhpc.patch
@ -0,0 +1,38 @@
+--- a/lib/xalloc-oversized.h	2020-08-21 13:24:01.793744123 -0700
+++ b/lib/xalloc-oversized.h	2020-08-21 13:25:52.394097061 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/lib/intprops.h	2020-08-21 13:23:54.587721128 -0700
+++ b/lib/intprops.h	2020-08-21 13:24:59.717928964 -0700
+@@ -222,9 +222,9 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+ #else
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 0
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
--- a/var/spack/repos/builtin/packages/bison/package.py
+++ b/var/spack/repos/builtin/packages/bison/package.py
@ -46,6 +46,7 @@ class Bison(AutotoolsPackage, GNUMirrorPackage):
    depends_on('help2man', type='build')

    patch('pgi.patch', when='@3.0.4')
+    patch('nvhpc.patch', when='%nvhpc')

    conflicts('%intel@:14', when='@3.4.2:',
              msg="Intel 14 has immature C11 support")
--- a/var/spack/repos/builtin/packages/boost/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/boost/nvhpc.patch
@ -0,0 +1,45 @@
+--- a/tools/build/src/engine/build.sh	2020-08-25 08:44:18.873304084 -0700
+++ b/tools/build/src/engine/build.sh	2020-08-25 08:44:34.215370891 -0700
+@@ -89,7 +89,7 @@
+             kylix) ( ${CXX:=bc++} -tC -q check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+             mipspro) ( ${CXX:=CC} -FE:template_in_elf_section -ptused check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+             pathscale) ( ${CXX:=pathCC} check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+-            pgi) ( ${CXX:=pgc++} -std=c++11 check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+            pgi) ( ${CXX:=nvc++} -std=c++11 check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+             sun*) ( ${CXX:=CC} -std=c++11 check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+             clang*) ( ${CXX:=clang++} -x c++ -std=c++11 check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+             tru64cxx) ( ${CXX:=cc} check_cxx11.cpp && rm -f a.out ) 1>/dev/null 2>/dev/null ;;
+@@ -140,7 +140,7 @@
+     elif test -r /opt/intel/compiler50/ia32/bin/iccvars.sh && test_cxx11 intel-linux ; then
+         B2_TOOLSET=intel-linux
+         B2_TOOLSET_ROOT=/opt/intel/compiler50/ia32/
+-    elif test_path pgc++ && test_cxx11 pgi ; then B2_TOOLSET=pgi
+    elif test_path nvc++ && test_cxx11 pgi ; then B2_TOOLSET=pgi
+     elif test_path pathCC && test_cxx11 pathscale ; then B2_TOOLSET=pathscale
+     elif test_path como && test_cxx11 como ; then B2_TOOLSET=como
+     elif test_path KCC && test_cxx11 kcc ; then B2_TOOLSET=kcc
+@@ -345,7 +345,7 @@
+     ;;
+ 
+     pgi)
+-        CXX=${CXX:=pgc++}
+        CXX=${CXX:=nvc++}
+         CXX_VERSION_OPT=${CXX_VERSION_OPT:=--version}
+         B2_CXX="${CXX} -std=c++11"
+         B2_CXXFLAGS_RELEASE="-fast -s"
+--- a/tools/build/src/tools/pgi.jam	2020-08-25 08:45:01.015487600 -0700
+++ b/tools/build/src/tools/pgi.jam	2020-08-25 08:45:16.462554871 -0700
+@@ -26,11 +26,11 @@
+ {
+   local condition = [ common.check-init-parameters pgi : version $(version) ] ;
+ 
+-  local l_command = [ common.get-invocation-command pgi : pgc++ : $(command) ] ;
+  local l_command = [ common.get-invocation-command pgi : nvc++ : $(command) ] ;
+ 
+   common.handle-options pgi : $(condition) : $(l_command) : $(options) ;
+     
+-  command_c = $(command_c[1--2]) $(l_command[-1]:B=pgcc) ;
+  command_c = $(command_c[1--2]) $(l_command[-1]:B=nvc) ;
+ 
+   toolset.flags pgi CONFIG_C_COMMAND $(condition) : $(command_c) ;
+ 
--- a/var/spack/repos/builtin/packages/boost/package.py
+++ b/var/spack/repos/builtin/packages/boost/package.py
@ -208,6 +208,9 @@ def libs(self):
    patch('boost_1.63.0_pgi.patch', when='@1.63.0%pgi')
    patch('boost_1.63.0_pgi_17.4_workaround.patch', when='@1.63.0%pgi@17.4')

+    # Patch to override the PGI toolset when using the NVIDIA compilers
+    patch('nvhpc.patch', when='%nvhpc')
+
    # Fix for version comparison on newer Clang on darwin
    # See: https://github.com/boostorg/build/issues/440
    # See: https://github.com/macports/macports-ports/pull/6726
@ -252,6 +255,19 @@ def libs(self):
    # See https://github.com/boostorg/build/pull/154
    patch('boost_154.patch', when='@:1.63.99')

+    def patch(self):
+        # Disable SSSE3 and AVX2 when using the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('dump_avx2', '', 'libs/log/build/Jamfile.v2')
+            filter_file('<define>BOOST_LOG_USE_AVX2', '',
+                        'libs/log/build/Jamfile.v2')
+            filter_file('dump_ssse3', '', 'libs/log/build/Jamfile.v2')
+            filter_file('<define>BOOST_LOG_USE_SSSE3', '',
+                        'libs/log/build/Jamfile.v2')
+
+            filter_file('-fast', '-O1', 'tools/build/src/tools/pgi.jam')
+            filter_file('-fast', '-O1', 'tools/build/src/engine/build.sh')
+
    def url_for_version(self, version):
        if version >= Version('1.63.0'):
            url = "https://dl.bintray.com/boostorg/release/{0}/source/boost_{1}.tar.bz2"
@ -268,6 +284,7 @@ def determine_toolset(self, spec):
                    'xlc++': 'xlcpp',
                    'xlc++_r': 'xlcpp',
                    'pgc++': 'pgi',
+                    'nvc++': 'pgi',
                    'FCC': 'clang'}

        if spec.satisfies('@1.47:'):
--- a/var/spack/repos/builtin/packages/bzip2/package.py
+++ b/var/spack/repos/builtin/packages/bzip2/package.py
@ -52,9 +52,10 @@ def patch(self):
        )

        # The Makefiles use GCC flags that are incompatible with PGI
-        if self.compiler.name == 'pgi':
+        if self.spec.satisfies('%pgi') or self.spec.satisfies('%nvhpc'):
            filter_file('-Wall -Winline', '-Minform=inform', 'Makefile')
-            filter_file('-Wall -Winline', '-Minform=inform', 'Makefile-libbz2_so')  # noqa
+            filter_file('-Wall -Winline', '-Minform=inform',
+                        'Makefile-libbz2_so')

        # Patch the link line to use RPATHs on macOS
        if 'darwin' in self.spec.architecture:
--- a/var/spack/repos/builtin/packages/charmpp/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/charmpp/nvhpc.patch
@ -0,0 +1,77 @@
+--- /dev/null	2020-08-25 10:23:08.110000124 -0700
+++ b/src/arch/common/cc-nvc.h	2020-09-03 08:11:13.912665728 -0700
+@@ -0,0 +1,9 @@
+#undef CMK_DLL_CC
+
+/* pgcc can not compile RDTSC timer */
+#if CMK_TIMER_USE_RDTSC
+# undef CMK_TIMER_USE_GETRUSAGE
+# undef CMK_TIMER_USE_RDTSC
+# define CMK_TIMER_USE_GETRUSAGE                            1
+# define CMK_TIMER_USE_RDTSC                                0
+#endif
+--- /dev/null	2020-08-25 10:23:08.110000124 -0700
+++ b/src/arch/common/cc-nvc.sh	2020-09-03 08:11:01.439597250 -0700
+@@ -0,0 +1,42 @@
+
+# machine specific recommendation
+CMK_DEFS=""
+case `hostname` in
+*.ranger.tacc.utexas.edu) CMK_DEFS="-tp barcelona-64 " ;;
+esac
+
+CMK_CPP_C="nvc -E "
+CMK_CC="nvc -fPIC -DCMK_FIND_FIRST_OF_PREDICATE=1 "
+CMK_CC_RELIABLE="gcc "
+CMK_CXX="nvc++ -fPIC -DCMK_FIND_FIRST_OF_PREDICATE=1 --no_using_std "
+CMK_LD="$CMK_CC "
+CMK_LDXX="$CMK_CXX "
+CMK_PIE=" "
+
+# compiler for compiling sequential programs
+# nvc can not handle QT right for generic64, so always use gcc
+CMK_SEQ_CC="gcc -fPIC "
+CMK_SEQ_LD="$CMK_SEQ_CC "
+CMK_SEQ_CXX="nvc++ -fPIC --no_using_std "
+CMK_SEQ_LDXX="$CMK_SEQ_CXX"
+CMK_SEQ_LIBS=""
+
+# compiler for native programs
+CMK_NATIVE_CC="gcc "
+CMK_NATIVE_LD="gcc "
+CMK_NATIVE_CXX="g++ "
+CMK_NATIVE_LDXX="g++ "
+CMK_NATIVE_LIBS=""
+
+# fortran compiler
+CMK_CF77="nvfortran "
+CMK_CF90="nvfortran "
+CMK_CF90_FIXED="$CMK_CF90 -Mfixed "
+f90libdir="."
+f90bindir=`which nvfortran 2>/dev/null`
+if test -n "$f90bindir"
+then
+  f90libdir="$f90bindir/../lib"
+fi
+CMK_F90LIBS="-L$f90libdir "
+CMK_F90_USE_MODDIR=""
+--- /dev/null	2020-08-25 10:23:08.110000124 -0700
+++ b/src/arch/common/conv-mach-nvfortran.sh	2020-09-03 08:25:09.042243776 -0700
+@@ -0,0 +1,13 @@
+COMMENT="Use nvfortran fortran compiler"
+NVFORTRAN=`which nvfortran`
+if test x$NVFORTRAN = x
+then
+  echo charmc> Fatal error: nvfortran not found!
+  exit 1
+fi
+CMK_CF77="$NVFORTRAN "
+CMK_CF90="$NVFORTRAN "
+CMK_CF90_FIXED="$CMK_CF90 -Mfixed "
+CMK_F90LIBS="-lm "
+CMK_F90_MODINC="-module "
+CMK_F90_USE_MODDIR=""
+--- /dev/null   2020-08-25 10:23:08.110000124 -0700
+++ b/src/arch/common/conv-mach-nvfortran.h	2020-09-03 08:25:09.042243776 -0700
+@@ -0,0 +1,1 @@
+/* empty file */
--- a/var/spack/repos/builtin/packages/charmpp/package.py
+++ b/var/spack/repos/builtin/packages/charmpp/package.py
@ -44,6 +44,9 @@ class Charmpp(Package):
    # support Fujitsu compiler
    patch("fj.patch", when="%fj")

+    # support NVIDIA compilers
+    patch("nvhpc.patch", when="%nvhpc")
+
    # Ignore compiler warnings while configuring
    patch("strictpass.patch", when="@:6.8.2")

--- a/var/spack/repos/builtin/packages/cmake/package.py
+++ b/var/spack/repos/builtin/packages/cmake/package.py
@ -118,6 +118,8 @@ class Cmake(Package):
                  'please use %apple-clang. '
                  'See: https://gitlab.kitware.com/cmake/cmake/-/issues/21135')

+    conflicts('%nvhpc')
+
    # Really this should conflict since it's enabling or disabling openssl for
    # CMake's internal copy of curl.  Ideally we'd want a way to have the
    # openssl variant disabled when ~ownlibs but there's not really a way to
@ -163,6 +165,11 @@ class Cmake(Package):
    # to combine C++ and Fortran programs.
    patch('fujitsu_add_linker_option.patch', when='%fj')

+    # Remove -A from the C++ flags we use when CXX_EXTENSIONS is OFF
+    # Should be fixed in 3.19.
+    # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/5025
+    patch('pgi-cxx-ansi.patch', when='@3.1:3.18.99')
+
    conflicts('+qt', when='^qt@5.4.0')  # qt-5.4.0 has broken CMake modules

    # https://gitlab.kitware.com/cmake/cmake/issues/18166
--- a/var/spack/repos/builtin/packages/cmake/pgi-cxx-ansi.patch
+++ b/var/spack/repos/builtin/packages/cmake/pgi-cxx-ansi.patch
@ -0,0 +1,50 @@
+From 52eee1938919deb59cc2b51d44f365f0d9a418e5 Mon Sep 17 00:00:00 2001
+From: Tin Huynh <ahuynh@nvidia.com>
+Date: Thu, 16 Jul 2020 16:06:33 -0700
+Subject: [PATCH] PGI: Remove -A from the C++ flags we use when CXX_EXTENSIONS
+ is OFF
+
+Since commit 9b97cb5562 (PGI: Add language standards for PGI,
+2017-05-01, v3.9.0-rc1~174^2), we have passed the `-A` flag to
+the PGI C++ compiler when specifying a C++ standard flag with
+compiler extensions turned off.  The flag is not meant for that.
+The PGI C++ standard flags do not turn extensions on by default
+and have a separate `--gnu_extensions` flag for that which we
+already use when CXX_EXTENSIONS is ON.  Simply drop the `-A` flag.
+
+Fixes: #20997
+---
+ Modules/Compiler/PGI-CXX.cmake | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/Modules/Compiler/PGI-CXX.cmake b/Modules/Compiler/PGI-CXX.cmake
+index c77de3605b..2d7a303c9b 100644
+--- a/Modules/Compiler/PGI-CXX.cmake
+++ b/Modules/Compiler/PGI-CXX.cmake
+@@ -4,19 +4,19 @@ string(APPEND CMAKE_CXX_FLAGS_MINSIZEREL_INIT " -DNDEBUG")
+ string(APPEND CMAKE_CXX_FLAGS_RELEASE_INIT " -DNDEBUG")
+ 
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.10)
+-  set(CMAKE_CXX98_STANDARD_COMPILE_OPTION  -A)
+  set(CMAKE_CXX98_STANDARD_COMPILE_OPTION "")
+   set(CMAKE_CXX98_EXTENSION_COMPILE_OPTION --gnu_extensions)
+   set(CMAKE_CXX98_STANDARD__HAS_FULL_SUPPORT ON)
+   if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.10)
+-    set(CMAKE_CXX11_STANDARD_COMPILE_OPTION  --c++11 -A)
+    set(CMAKE_CXX11_STANDARD_COMPILE_OPTION  --c++11)
+     set(CMAKE_CXX11_EXTENSION_COMPILE_OPTION --c++11 --gnu_extensions)
+     set(CMAKE_CXX11_STANDARD__HAS_FULL_SUPPORT ON)
+     if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.7)
+-      set(CMAKE_CXX14_STANDARD_COMPILE_OPTION  --c++14 -A)
+      set(CMAKE_CXX14_STANDARD_COMPILE_OPTION  --c++14)
+       set(CMAKE_CXX14_EXTENSION_COMPILE_OPTION --c++14 --gnu_extensions)
+       set(CMAKE_CXX14_STANDARD__HAS_FULL_SUPPORT ON)
+       if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 17.1)
+-        set(CMAKE_CXX17_STANDARD_COMPILE_OPTION  --c++17 -A)
+        set(CMAKE_CXX17_STANDARD_COMPILE_OPTION  --c++17)
+         set(CMAKE_CXX17_EXTENSION_COMPILE_OPTION --c++17 --gnu_extensions)
+         set(CMAKE_CXX17_STANDARD__HAS_FULL_SUPPORT ON)
+       endif()
+-- 
+GitLab
+
--- a/var/spack/repos/builtin/packages/cp2k/package.py
+++ b/var/spack/repos/builtin/packages/cp2k/package.py
@ -203,6 +203,7 @@ def edit(self, spec, prefix):
            ],
            'intel': ['-O2', '-pc64', '-unroll', ],
            'pgi': ['-fast'],
+            'nvhpc': ['-fast'],
            'cray': ['-O2'],
            'xl': ['-O3'],
        }
@ -241,7 +242,7 @@ def edit(self, spec, prefix):
                '-ffree-line-length-none',
                '-ggdb',  # make sure we get proper Fortran backtraces
            ]
-        elif '%pgi' in spec:
+        elif '%pgi' in spec or '%nvhpc' in spec:
            fcflags += ['-Mfreeform', '-Mextend']
        elif '%cray' in spec:
            fcflags += ['-emf', '-ffree', '-hflex_mp=strict']
--- a/var/spack/repos/builtin/packages/diffutils/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/diffutils/nvhpc.patch
@ -0,0 +1,35 @@
+--- a/lib/xalloc-oversized.h	2020-08-07 08:03:53.257539226 -0700
+++ b/lib/xalloc-oversized.h	2020-08-07 07:57:54.024124785 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/lib/intprops.h	2020-08-07 07:58:37.233294093 -0700
+++ b/lib/intprops.h	2020-08-07 08:00:05.887641482 -0700
+@@ -221,14 +221,14 @@
+    : (max) >> (b) < (a))
+ 
+ /* True if __builtin_add_overflow (A, B, P) works when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_OVERFLOW 1
+ #else
+ # define _GL_HAS_BUILTIN_OVERFLOW 0
+ #endif
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
+
--- a/var/spack/repos/builtin/packages/diffutils/package.py
+++ b/var/spack/repos/builtin/packages/diffutils/package.py
@ -22,6 +22,8 @@ class Diffutils(AutotoolsPackage, GNUMirrorPackage):

    build_directory = 'spack-build'

+    patch('nvhpc.patch', when='@3.7 %nvhpc')
+
    depends_on('iconv')

    def setup_build_environment(self, env):
--- a/var/spack/repos/builtin/packages/ember/package.py
+++ b/var/spack/repos/builtin/packages/ember/package.py
@ -60,6 +60,8 @@ def edit(self, spec, prefix):
    def build_targets(self):
        targets = []
        cc = self.spec['mpi'].mpicc
+        cflags = '-O3'
+        if not self.spec.satisfies('%nvhpc'):
            cflags = '-O3 -std=c99'
        oshmem_cc = 'cc'
        oshmem_c_flags = '-O3 -g'
--- a/var/spack/repos/builtin/packages/ffmpeg/package.py
+++ b/var/spack/repos/builtin/packages/ffmpeg/package.py
@ -103,6 +103,7 @@ class Ffmpeg(AutotoolsPackage):
    conflicts('+libwebp', when='@2.1.999:')
    conflicts('+libssh', when='@2.0.999:')
    conflicts('+libzmq', when='@:1.999.999')
+    conflicts('%nvhpc')

    @property
    def libs(self):
--- a/var/spack/repos/builtin/packages/fftw/package.py
+++ b/var/spack/repos/builtin/packages/fftw/package.py
@ -141,6 +141,20 @@ def configure(self, spec, prefix):
        # float only
        float_simd_features = ['altivec', 'sse']

+        # Workaround NVIDIA compiler bug when avx512 is enabled
+        if spec.satisfies('%nvhpc') and 'avx512' in simd_features:
+            simd_features.remove('avx512')
+
+        # NVIDIA compiler does not support Altivec intrinsics
+        if spec.satisfies('%nvhpc') and 'vsx' in simd_features:
+            simd_features.remove('vsx')
+        if spec.satisfies('%nvhpc') and 'altivec' in float_simd_features:
+            float_simd_features.remove('altivec')
+
+        # NVIDIA compiler does not support Neon intrinsics
+        if spec.satisfies('%nvhpc') and 'neon' in simd_features:
+            simd_features.remove('neon')
+
        simd_options = []
        for feature in simd_features:
            msg = '--enable-{0}' if feature in spec.target else '--disable-{0}'
@ -149,6 +163,8 @@ def configure(self, spec, prefix):
        # If no features are found, enable the generic ones
        if not any(f in spec.target for f in
                   simd_features + float_simd_features):
+            # Workaround NVIDIA compiler bug
+            if not spec.satisfies('%nvhpc'):
                simd_options += [
                    '--enable-generic-simd128',
                    '--enable-generic-simd256'
--- a/var/spack/repos/builtin/packages/findutils/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/findutils/nvhpc.patch
@ -0,0 +1,22 @@
+--- a/gl/lib/xalloc-oversized.h	2020-08-21 06:54:54.059327027 -0700
+++ b/gl/lib/xalloc-oversized.h	2020-08-21 06:55:41.122512071 -0700
+@@ -36,7 +36,7 @@
+    sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for
+    exactly-SIZE_MAX allocations on such hosts; this avoids a test and
+    branch when S is known to be 1.  */
+-#if 5 <= __GNUC__ || __has_builtin (__builtin_mul_overflow)
+#if ((5 <= __GNUC__ || __has_builtin (__builtin_mul_overflow)) && !defined __NVCOMPILER)
+ # define xalloc_oversized(n, s) \
+     ({ size_t __xalloc_size; __builtin_mul_overflow (n, s, &__xalloc_size); })
+ #else
+--- a/gl/lib/intprops.h	2020-08-21 06:55:49.065543300 -0700
+++ b/gl/lib/intprops.h	2020-08-21 06:58:10.362070878 -0700
+@@ -354,7 +354,7 @@
+ /* Store A <op> B into *R, where OP specifies the operation.
+    BUILTIN is the builtin operation, and OVERFLOW the overflow predicate.
+    See above for restrictions.  */
+-#if 5 <= __GNUC__ || __has_builtin (__builtin_add_overflow)
+#if ((5 <= __GNUC__ || __has_builtin (__builtin_add_overflow)) && !defined __NVCOMPILER)
+ # define _GL_INT_OP_WRAPV(a, b, r, op, builtin, overflow) builtin (a, b, r)
+ #elif 201112 <= __STDC_VERSION__ && !_GL__GENERIC_BOGUS
+ # define _GL_INT_OP_WRAPV(a, b, r, op, builtin, overflow) \
--- a/var/spack/repos/builtin/packages/findutils/package.py
+++ b/var/spack/repos/builtin/packages/findutils/package.py
@ -46,6 +46,8 @@ class Findutils(AutotoolsPackage, GNUMirrorPackage):
    patch('https://src.fedoraproject.org/rpms/findutils/raw/97ba2d7a18d1f9ae761b6ff0b4f1c4d33d7a8efc/f/findutils-4.6.0-gnulib-fflush.patch', sha256='84b916c0bf8c51b7e7b28417692f0ad3e7030d1f3c248ba77c42ede5c1c5d11e', when='@4.6.0')
    patch('https://src.fedoraproject.org/rpms/findutils/raw/97ba2d7a18d1f9ae761b6ff0b4f1c4d33d7a8efc/f/findutils-4.6.0-gnulib-makedev.patch', sha256='bd9e4e5cc280f9753ae14956c4e4aa17fe7a210f55dd6c84aa60b12d106d47a2', when='@4.6.0')

+    patch('nvhpc.patch', when='%nvhpc')
+
    build_directory = 'spack-build'

    @classmethod
--- a/var/spack/repos/builtin/packages/fltk/package.py
+++ b/var/spack/repos/builtin/packages/fltk/package.py
@ -51,3 +51,15 @@ def install(self, spec, prefix):
        configure(*options)
        make()
        make('install')
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('OPTIM="-Wall -Wunused -Wno-format-y2k $OPTIM"',
+                        'OPTIM="-Wall $OPTIM"', 'configure', string=True)
+            filter_file('OPTIM="-Os $OPTIM"', 'OPTIM="-O2 $OPTIM"',
+                        'configure', string=True)
+            filter_file('CXXFLAGS="$CXXFLAGS -fvisibility=hidden"',
+                        'CXXFLAGS="$CXXFLAGS"', 'configure', string=True)
+            filter_file('OPTIM="$OPTIM -fvisibility=hidden"',
+                        'OPTIM="$OPTIM"', 'configure', string=True)
--- a/var/spack/repos/builtin/packages/gettext/nvhpc-builtin.patch
+++ b/var/spack/repos/builtin/packages/gettext/nvhpc-builtin.patch
@ -0,0 +1,166 @@
+--- a/gettext-runtime/gnulib-lib/xalloc-oversized.h	2020-08-21 07:51:29.459375578 -0700
+++ b/gettext-runtime/gnulib-lib/xalloc-oversized.h	2020-08-21 07:53:18.571795663 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/gettext-runtime/gnulib-lib/intprops.h	2020-08-21 07:51:20.668341900 -0700
+++ b/gettext-runtime/gnulib-lib/intprops.h	2020-08-21 07:52:43.906661856 -0700
+@@ -222,7 +222,7 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
+--- a/gettext-tools/gnulib-lib/xalloc-oversized.h	2020-08-21 10:19:23.875281647 -0700
+++ b/gettext-tools/gnulib-lib/xalloc-oversized.h	2020-08-21 10:20:40.650583499 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/gettext-tools/gnulib-lib/intprops.h	2020-08-21 10:18:38.650103825 -0700
+++ b/gettext-tools/gnulib-lib/intprops.h	2020-08-21 10:19:12.379236445 -0700
+@@ -222,7 +222,7 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
+--- a/gettext-tools/libgrep/intprops.h	2020-08-21 10:31:00.726022663 -0700
+++ b/gettext-tools/libgrep/intprops.h	2020-08-21 10:31:29.946137693 -0700
+@@ -222,7 +222,7 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
+--- a/gettext-tools/libgettextpo/xalloc-oversized.h	2020-08-21 11:19:50.065564273 -0700
+++ b/gettext-tools/libgettextpo/xalloc-oversized.h	2020-08-21 11:21:14.732898185 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/gettext-tools/libgettextpo/intprops.h	2020-08-21 11:19:58.703598336 -0700
+++ b/gettext-tools/libgettextpo/intprops.h	2020-08-21 11:20:37.612751786 -0700
+@@ -222,7 +222,7 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
+--- a/libtextstyle/lib/xalloc-oversized.h	2020-08-21 11:30:13.488022919 -0700
+++ b/libtextstyle/lib/xalloc-oversized.h	2020-08-21 11:31:26.561311097 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+
+--- a/libtextstyle/lib/intprops.h	2020-08-21 11:30:24.283065492 -0700
+++ b/libtextstyle/lib/intprops.h	2020-08-21 11:30:54.415184325 -0700
+@@ -222,7 +222,7 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
+    (A, B, P) work when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
+-#elif defined __has_builtin
+#elif defined __has_builtin && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
+@@ -240,7 +240,7 @@
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
+    __builtin_mul_overflow_p and __builtin_mul_overflow_p.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
--- a/var/spack/repos/builtin/packages/gettext/nvhpc-export-symbols.patch
+++ b/var/spack/repos/builtin/packages/gettext/nvhpc-export-symbols.patch
@ -0,0 +1,31 @@
+--- a/gettext-runtime/intl/Makefile.in	2020-08-21 08:39:59.102729081 -0700
+++ b/gettext-runtime/intl/Makefile.in	2020-08-21 08:40:07.425761760 -0700
+@@ -1471,7 +1471,6 @@
+ OTHER_LDFLAGS = \
+   @LTLIBICONV@ @INTL_MACOSX_LIBS@ $(INTL_WINDOWS_LIBS) @LTLIBTHREAD@ \
+   -no-undefined \
+-  -export-symbols-regex '^([^g]|g[^l]|gl[^w]|glw[^t]|glwt[^h]|glwth[^r]|glwthr[^e]|glwthre[^a]|glwthrea[^d]).*' \
+   -version-info $(LTV_CURRENT):$(LTV_REVISION):$(LTV_AGE) \
+   -rpath $(libdir)
+
+--- a/gettext-tools/intl/Makefile.in	2020-08-21 07:57:18.357721212 -0700
+++ b/gettext-tools/intl/Makefile.in	2020-08-21 07:57:29.051762490 -0700
+@@ -2296,7 +2296,6 @@
+ OTHER_LDFLAGS = \
+   @LTLIBICONV@ @INTL_MACOSX_LIBS@ $(INTL_WINDOWS_LIBS) @LTLIBTHREAD@ \
+   -no-undefined \
+-  -export-symbols-regex '^([^g]|g[^l]|gl[^w]|glw[^t]|glwt[^h]|glwth[^r]|glwthr[^e]|glwthre[^a]|glwthrea[^d]).*' \
+   -version-info $(LTV_CURRENT):$(LTV_REVISION):$(LTV_AGE) \
+   -rpath $(libdir)
+ 
+--- a/libtextstyle/lib/Makefile.in	2020-08-21 08:49:08.277982271 -0700
+++ b/libtextstyle/lib/Makefile.in	2020-08-21 08:49:19.675030561 -0700
+@@ -1917,7 +1917,7 @@
+ libtextstyle_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined $(FABS_LIBM) \
+ 	$(ISNAND_LIBM) $(ISNANF_LIBM) $(ISNANL_LIBM) $(LOG10_LIBM) \
+ 	$(LTLIBICONV) $(LTLIBINTL) $(POW_LIBM) $(am__append_8) \
+-	-no-undefined -export-symbols libtextstyle.sym -version-info \
+	-no-undefined -version-info \
+ 	$(LTV_CURRENT):$(LTV_REVISION):$(LTV_AGE) -rpath $(libdir)
+ 
+ # Use this preprocessor expression to decide whether #include_next works.
--- a/var/spack/repos/builtin/packages/gettext/nvhpc-long-width.patch
+++ b/var/spack/repos/builtin/packages/gettext/nvhpc-long-width.patch
@ -0,0 +1,17 @@
+--- a/gettext-tools/libgrep/regex_internal.h	2020-08-21 09:14:20.039942370 -0700
+++ b/gettext-tools/libgrep/regex_internal.h	2020-08-21 10:06:57.840331452 -0700
+@@ -35,6 +35,14 @@
+ #include <intprops.h>
+ #include <verify.h>
+ 
+#ifndef __LONG_WIDTH__
+#if LONG_WIDTH
+#define __LONG_WIDTH__ LONG_WIDTH
+#else
+#define __LONG_WIDTH__ __WORDSIZE
+#endif
+#endif
+
+ #if defined DEBUG && DEBUG != 0
+ # include <assert.h>
+ # define DEBUG_ASSERT(x) assert (x)
--- a/var/spack/repos/builtin/packages/gettext/package.py
+++ b/var/spack/repos/builtin/packages/gettext/package.py
@ -47,6 +47,9 @@ class Gettext(AutotoolsPackage, GNUMirrorPackage):
    # depends_on('cvs')

    patch('test-verify-parallel-make-check.patch', when='@:0.19.8.1')
+    patch('nvhpc-builtin.patch', when='%nvhpc')
+    patch('nvhpc-export-symbols.patch', when='%nvhpc')
+    patch('nvhpc-long-width.patch', when='%nvhpc')

    def configure_args(self):
        spec = self.spec
--- a/var/spack/repos/builtin/packages/gromacs/package.py
+++ b/var/spack/repos/builtin/packages/gromacs/package.py
@ -75,6 +75,8 @@ class Gromacs(CMakePackage):
            description='GMX_RELAXED_DOUBLE_PRECISION for Fujitsu PRIMEHPC')
    variant('hwloc', default=True,
            description='Use the hwloc portable hardware locality library')
+    variant('lapack', default=False,
+            description='Enables an external LAPACK library')

    depends_on('mpi', when='+mpi')
    # define matching plumed versions
@ -95,6 +97,7 @@ class Gromacs(CMakePackage):
    depends_on('cmake@3.13.0:3.99.99', type='build', when='@master')
    depends_on('cmake@3.13.0:3.99.99', type='build', when='%fj')
    depends_on('cuda', when='+cuda')
+    depends_on('lapack', when='+lapack')

    # TODO: openmpi constraint; remove when concretizer is fixed
    depends_on('hwloc@:1.999', when='+hwloc')
@ -106,6 +109,10 @@ def patch(self):
        if '+plumed' in self.spec:
            self.spec['plumed'].package.apply_patch(self)

+        if self.spec.satisfies('%nvhpc'):
+            # Disable obsolete workaround
+            filter_file('ifdef __PGI', 'if 0', 'src/gromacs/fileio/xdrf.h')
+
    def cmake_args(self):

        options = []
@ -142,6 +149,14 @@ def cmake_args(self):
        if '+opencl' in self.spec:
            options.append('-DGMX_USE_OPENCL=on')

+        if '+lapack' in self.spec:
+            options.append('-DGMX_EXTERNAL_LAPACK:BOOL=ON')
+            if self.spec['lapack'].libs:
+                options.append('-DLAPACK_LIBRARIES={0}'.format(
+                    self.spec['lapack'].libs.joined(';')))
+        else:
+            options.append('-DGMX_EXTERNAL_LAPACK:BOOL=OFF')
+
        # Activate SIMD based on properties of the target
        target = self.spec.target
        if target >= llnl.util.cpu.targets['zen2']:
@ -158,6 +173,9 @@ def cmake_args(self):
            options.append('-DGMX_SIMD=IBM_VSX')
        elif target.family == llnl.util.cpu.targets['aarch64']:
            # ARMv8
+            if self.spec.satisfies('%nvhpc'):
+                options.append('-DGMX_SIMD=None')
+            else:
                options.append('-DGMX_SIMD=ARM_NEON_ASIMD')
        elif target == llnl.util.cpu.targets['mic_knl']:
            # Intel KNL
@ -172,6 +190,12 @@ def cmake_args(self):
                ('avx2', 'AVX2_256'),
                ('avx512', 'AVX_512'),
            ]
+
+            # Workaround NVIDIA compiler bug when avx512 is enabled
+            if (self.spec.satisfies('%nvhpc') and
+                ('avx512', 'AVX_512') in simd_features):
+                simd_features.remove(('avx512', 'AVX_512'))
+
            for feature, flag in reversed(simd_features):
                if feature in target:
                    options.append('-DGMX_SIMD:STRING={0}'.format(flag))
--- a/var/spack/repos/builtin/packages/json-cwx/package.py
+++ b/var/spack/repos/builtin/packages/json-cwx/package.py
@ -27,3 +27,10 @@ def autoreconf(self, spec, prefix):
        with working_dir('json-cwx'):
            autogen = Executable("./autogen.sh")
            autogen()
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-Wno-error=deprecated-declarations -Wextra '
+                        '-Wwrite-strings -Wno-unused-parameter -std=gnu99',
+                        '', 'json-cwx/Makefile.am.inc')
--- a/var/spack/repos/builtin/packages/libbsd/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/libbsd/nvhpc.patch
@ -0,0 +1,60 @@
+--- a/src/fgetln.c	2020-08-07 09:17:09.456951858 -0700
+++ b/src/fgetln.c	2020-08-07 09:16:12.471731317 -0700
+@@ -25,6 +25,7 @@
+  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+#include "config.h"
+ #include <stdio.h>
+ #include <sys/cdefs.h>
+ #include <sys/types.h>
+--- a/src/fpurge.c	2020-08-07 09:17:00.871918633 -0700
+++ b/src/fpurge.c	2020-08-07 09:15:55.436665389 -0700
+@@ -24,6 +24,7 @@
+  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+#include "config.h"
+ #include <errno.h>
+ #include <stdio.h>
+ #include <stdio_ext.h>
+--- a/src/funopen.c	2020-08-07 09:16:52.566886490 -0700
+++ b/src/funopen.c	2020-08-07 09:17:25.206012812 -0700
+@@ -24,6 +24,7 @@
+  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+#include "config.h"
+ #include <sys/cdefs.h>
+ #include <sys/types.h>
+ #include <errno.h>
+--- a/src/getpeereid.c	2020-08-07 09:25:13.822834376 -0700
+++ b/src/getpeereid.c	2020-08-07 09:25:27.802889361 -0700
+@@ -24,6 +24,7 @@
+  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+#include "config.h"
+ #include <sys/cdefs.h>
+ 
+ #include <sys/param.h>
+--- a/src/progname.c	2020-08-07 09:23:02.168316940 -0700
+++ b/src/progname.c	2020-08-07 09:23:09.840346776 -0700
+@@ -30,6 +30,7 @@
+  * <https://sourceware.org/ml/libc-alpha/2006-03/msg00125.html>.
+  */
+ 
+#include "config.h"
+ #include <errno.h>
+ #include <string.h>
+ #include <stdlib.h>
+--- a/src/setproctitle.c	2020-08-07 09:21:49.003033771 -0700
+++ b/src/setproctitle.c	2020-08-07 09:22:09.161111787 -0700
+@@ -22,6 +22,7 @@
+  * USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+#include "config.h"
+ #include <errno.h>
+ #include <stddef.h>
+ #include <stdarg.h>
--- a/var/spack/repos/builtin/packages/libbsd/package.py
+++ b/var/spack/repos/builtin/packages/libbsd/package.py
@ -27,6 +27,17 @@ class Libbsd(AutotoolsPackage):

    patch('cdefs.h.patch', when='@0.8.6 %gcc@:4')
    patch('local-elf.h.patch', when='%intel')
+    patch('nvhpc.patch', when='%nvhpc')

    # https://gitlab.freedesktop.org/libbsd/libbsd/issues/1
    conflicts('platform=darwin')
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%pgi') or self.spec.satisfies('%nvhpc'):
+            filter_file('-isystem', '-I', 'src/Makefile.in')
+            # This is not a 1 for 1 replacement, requiring nvhpc.patch
+            # to include config.h where needed
+            filter_file('-include ', '-I ', 'src/Makefile.in')
+            filter_file('-Wall -Wextra -Wno-unused-variable '
+                        '-Wno-unused-parameter', '-Wall', 'configure')
--- a/var/spack/repos/builtin/packages/libint/package.py
+++ b/var/spack/repos/builtin/packages/libint/package.py
@ -195,3 +195,10 @@ def install(self, spec, prefix):
            configure(*options)
            make()
            make('install')
+
+    def patch(self):
+        # Use Fortran compiler to link the Fortran example, not the C++
+        # compiler
+        if '+fortran' in self.spec and self.spec.satisfies('%nvhpc'):
+            filter_file('$(CXX) $(CXXFLAGS)', '$(FC) $(FCFLAGS)',
+                        'export/fortran/Makefile', string=True)
--- a/var/spack/repos/builtin/packages/libpciaccess/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/libpciaccess/nvhpc.patch
@ -0,0 +1,11 @@
+--- a/include/pciaccess.h	2020-08-07 11:43:47.395032509 -0700
+++ b/include/pciaccess.h	2020-08-07 11:44:13.384136014 -0700
+@@ -59,7 +59,7 @@
+ 
+ #include <inttypes.h>
+ 
+-#if (__GNUC__ >= 3) || (__SUNPRO_C >= 0x5130)
+#if (((__GNUC__ >= 3) || (__SUNPRO_C >= 0x5130)) && !defined __NVCOMPILER)
+ #define __deprecated __attribute__((deprecated))
+ #else
+ #define __deprecated
--- a/var/spack/repos/builtin/packages/libpciaccess/package.py
+++ b/var/spack/repos/builtin/packages/libpciaccess/package.py
@ -20,6 +20,8 @@ class Libpciaccess(AutotoolsPackage, XorgPackage):
    depends_on('pkgconfig', type='build')
    depends_on('util-macros', type='build')

+    patch('nvhpc.patch', when='%nvhpc')
+
    # A known issue exists when building with PGI as documented here:
    # https://bugs.freedesktop.org/show_bug.cgi?id=94398
    # https://www.pgroup.com/userforum/viewtopic.php?f=4&t=5126
@ -28,3 +30,14 @@ class Libpciaccess(AutotoolsPackage, XorgPackage):
    # When the ability to use dependencies built by another compiler, using a
    # libpciaccess built by gcc should be usable by PGI builds.
    conflicts('%pgi')
+
+    def configure_args(self):
+        config_args = []
+
+        if (self.spec.satisfies('%nvhpc') and
+            (self.spec.target.family == 'aarch64' or
+             self.spec.target.family == 'ppc64le')):
+            config_args.append('--disable-strict-compilation')
+            config_args.append('--disable-selective-werror')
+
+        return config_args
--- a/var/spack/repos/builtin/packages/libtiff/package.py
+++ b/var/spack/repos/builtin/packages/libtiff/package.py
@ -23,3 +23,9 @@ class Libtiff(AutotoolsPackage):
    depends_on('jpeg')
    depends_on('zlib')
    depends_on('xz')
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('vl_cv_prog_cc_warnings="-Wall -W"',
+                        'vl_cv_prog_cc_warnings="-Wall"', 'configure')
--- a/var/spack/repos/builtin/packages/libtool/package.py
+++ b/var/spack/repos/builtin/packages/libtool/package.py
@ -51,6 +51,12 @@ def libs(self):
    def _make_executable(self, name):
        return Executable(join_path(self.prefix.bin, name))

+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-fno-builtin', '-Mnobuiltin', 'configure')
+            filter_file('-fno-builtin', '-Mnobuiltin', 'libltdl/configure')
+
    def setup_dependent_build_environment(self, env, dependent_spec):
        env.append_path('ACLOCAL_PATH', self.prefix.share.aclocal)

--- a/var/spack/repos/builtin/packages/libx11/package.py
+++ b/var/spack/repos/builtin/packages/libx11/package.py
@ -27,6 +27,15 @@ class Libx11(AutotoolsPackage, XorgPackage):
    depends_on('util-macros', type='build')
    depends_on('perl', type='build')

+    def configure_args(self):
+        config_args = []
+
+        # -Werror flags are not properly interpreted by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            config_args.append('--disable-selective-werror')
+
+        return config_args
+
    def setup_dependent_build_environment(self, env, dependent_spec):
        env.prepend_path('XLOCALEDIR', self.prefix.share.X11.locale)

--- a/var/spack/repos/builtin/packages/libxc/nvhpc-configure.patch
+++ b/var/spack/repos/builtin/packages/libxc/nvhpc-configure.patch
@ -0,0 +1,75 @@
+--- a/configure	2020-09-24 11:13:16.306629033 -0700
+++ b/configure	2020-09-24 11:14:16.412221646 -0700
+@@ -8908,7 +8908,7 @@
+ 	lt_prog_compiler_pic='-fPIC'
+ 	lt_prog_compiler_static='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+         # Portland Group compilers (*not* the Pentium gcc compiler,
+ 	# which looks to be a dead project)
+ 	lt_prog_compiler_wl='-Wl,'
+@@ -9547,11 +9547,11 @@
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)			# Portland Group C compiler
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+ 					# Portland Group f77 and f90 compilers
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
+@@ -13810,7 +13810,7 @@
+ ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+ if test -n "$ac_tool_prefix"; then
+-  for ac_prog in xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor
+  for ac_prog in xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 nvfortran lf95 ftn nagfor
+   do
+     # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+ set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+@@ -13854,7 +13854,7 @@
+ fi
+ if test -z "$FC"; then
+   ac_ct_FC=$FC
+-  for ac_prog in xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor
+  for ac_prog in xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 nvfortran lf95 ftn nagfor
+ do
+   # Extract the first word of "$ac_prog", so it can be a program name with args.
+ set dummy $ac_prog; ac_word=$2
+@@ -14545,7 +14545,7 @@
+ 	lt_prog_compiler_pic_FC='-fPIC'
+ 	lt_prog_compiler_static_FC='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+         # Portland Group compilers (*not* the Pentium gcc compiler,
+ 	# which looks to be a dead project)
+ 	lt_prog_compiler_wl_FC='-Wl,'
+@@ -14587,7 +14587,7 @@
+ 	  lt_prog_compiler_pic_FC='-fPIC'
+ 	  lt_prog_compiler_static_FC='-static'
+ 	  ;;
+-	*Portland\ Group*)
+	*Portland\ Group* | *NVIDIA\ Compilers* | *PGI\ Compilers*)
+ 	  lt_prog_compiler_wl_FC='-Wl,'
+ 	  lt_prog_compiler_pic_FC='-fpic'
+ 	  lt_prog_compiler_static_FC='-Bstatic'
+@@ -15169,11 +15169,11 @@
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)			# Portland Group C compiler
+ 	  whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+ 					# Portland Group f77 and f90 compilers
+ 	  whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
--- a/var/spack/repos/builtin/packages/libxc/nvhpc-libtool.patch
+++ b/var/spack/repos/builtin/packages/libxc/nvhpc-libtool.patch
@ -0,0 +1,77 @@
+From b71206582131f88f6602a40e4c67e3d92b119229 Mon Sep 17 00:00:00 2001
+From: Tin Huynh <ahuynh@nvidia.com>
+Date: Mon, 27 Jul 2020 15:15:47 -0700
+Subject: [PATCH] Recognize new Nvidia compilers.
+
+With the upcoming release of HPC-SDK, updating Libtool to recognize Nvidia
+compilers (nvc, nvc++, nvfortran).
+---
+ m4/libtool.m4 | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/m4/libtool.m4 b/m4/libtool.m4
+index f2d1f39..4eac689 100644
+--- a/m4/libtool.m4
+++ b/m4/libtool.m4
+@@ -4402,8 +4402,8 @@ m4_if([$1], [CXX], [
+ 	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	    ;;
+-	  pgCC* | pgcpp*)
+-	    # Portland Group C++ compiler
+	  pgCC* | pgcpp* | pgc\+\+* |  nvc\+\+*)
+	    # NVIDIA HPC C++ compiler
+ 	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -4739,9 +4739,8 @@ m4_if([$1], [CXX], [
+ 	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+-        # Portland Group compilers (*not* the Pentium gcc compiler,
+-	# which looks to be a dead project)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+      # NVIDIA HPC Compilers
+ 	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -4781,7 +4780,7 @@ m4_if([$1], [CXX], [
+ 	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	  ;;
+-	*Portland\ Group*)
+	*Portland\ Group* | *NVIDIA\ Compilers* | *PGI\ Compilers*)
+ 	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -5209,12 +5208,12 @@ _LT_EOF
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)				# NVIDIA HPC C++ Compiler
+ 	  _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+-					# Portland Group f77 and f90 compilers
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+					# NVIDIA HPC Fortran Compilers
+ 	  _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
+ 	ecc*,ia64* | icc*,ia64*)	# Intel C compiler on ia64
+@@ -7004,8 +7003,8 @@ if test yes != "$_lt_caught_CXX_error"; then
+ 	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic'
+ 	    _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive'
+ 	    ;;
+-          pgCC* | pgcpp*)
+-            # Portland Group C++ compiler
+          pgCC* | pgcpp* | pgc\+\+* | nvc\+\+*)
+            # NVIDIA HPC C++ compiler
+ 	    case `$CC -V` in
+ 	    *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
+ 	      _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
+-- 
+2.7.4
+
--- a/var/spack/repos/builtin/packages/libxc/package.py
+++ b/var/spack/repos/builtin/packages/libxc/package.py
@ -29,6 +29,9 @@ class Libxc(AutotoolsPackage, CudaPackage):
    patch('0001-Bugfix-avoid-implicit-pointer-cast-to-make-libxc-com.patch', when='@5.0.0')
    patch('0002-Mark-xc_erfcx-a-GPU_FUNCTION.patch', when='@5.0.0')

+    patch('nvhpc-configure.patch', when='%nvhpc')
+    patch('nvhpc-libtool.patch', when='@develop %nvhpc')
+
    @property
    def libs(self):
        """Libxc can be queried for the following parameters:
--- a/var/spack/repos/builtin/packages/libxcb/package.py
+++ b/var/spack/repos/builtin/packages/libxcb/package.py
@ -39,6 +39,15 @@ class Libxcb(AutotoolsPackage):
    depends_on('pkgconfig', type='build')
    depends_on('util-macros', type='build')

+    def configure_args(self):
+        config_args = []
+
+        # -Werror flags are not properly interpreted by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            config_args.append('--disable-selective-werror')
+
+        return config_args
+
    def patch(self):
        filter_file(
            'typedef struct xcb_auth_info_t {',
--- a/var/spack/repos/builtin/packages/libxml2/nvhpc-configure.patch
+++ b/var/spack/repos/builtin/packages/libxml2/nvhpc-configure.patch
@ -0,0 +1,34 @@
+--- a/configure	2020-08-24 14:05:58.117448257 -0700
+++ b/configure	2020-08-24 14:08:33.040348242 -0700
+@@ -9184,7 +9184,7 @@
+ 	lt_prog_compiler_pic='-fPIC'
+ 	lt_prog_compiler_static='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+         # Portland Group compilers (*not* the Pentium gcc compiler,
+ 	# which looks to be a dead project)
+ 	lt_prog_compiler_wl='-Wl,'
+@@ -9226,7 +9226,7 @@
+ 	  lt_prog_compiler_pic='-fPIC'
+ 	  lt_prog_compiler_static='-static'
+ 	  ;;
+-	*Portland\ Group*)
+	*Portland\ Group* | *NVIDIA\ Compilers* | *PGI\ Compilers*)
+ 	  lt_prog_compiler_wl='-Wl,'
+ 	  lt_prog_compiler_pic='-fpic'
+ 	  lt_prog_compiler_static='-Bstatic'
+@@ -9823,11 +9823,11 @@
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)			# Portland Group C compiler
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+ 					# Portland Group f77 and f90 compilers
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
--- a/var/spack/repos/builtin/packages/libxml2/nvhpc-elfgcchack.patch
+++ b/var/spack/repos/builtin/packages/libxml2/nvhpc-elfgcchack.patch
@ -0,0 +1,11 @@
+--- a/elfgcchack.h	2020-08-19 07:55:24.340192000 -0700
+++ b/elfgcchack.h	2020-08-19 07:55:39.952437000 -0700
+@@ -9,7 +9,7 @@
+  */
+ 
+ #ifdef IN_LIBXML
+-#ifdef __GNUC__
+#if defined __GNUC__ && !defined __NVCOMPILER
+ #ifdef PIC
+ #ifdef __linux__
+ #if (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (__GNUC__ > 3)
--- a/var/spack/repos/builtin/packages/libxml2/package.py
+++ b/var/spack/repos/builtin/packages/libxml2/package.py
@ -39,6 +39,9 @@ class Libxml2(AutotoolsPackage):
    resource(name='xmlts', url='https://www.w3.org/XML/Test/xmlts20080827.tar.gz',
             sha256='96151685cec997e1f9f3387e3626d61e6284d4d6e66e0e440c209286c03e9cc7')

+    patch('nvhpc-configure.patch', when='%nvhpc')
+    patch('nvhpc-elfgcchack.patch', when='%nvhpc')
+
    @property
    def headers(self):
        include_dir = self.spec.prefix.include.libxml2
@ -62,6 +65,17 @@ def configure_args(self):

        return args

+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-pedantic -Wall -Wextra -Wshadow -Wpointer-arith '
+                        '-Wcast-align -Wwrite-strings -Waggregate-return '
+                        '-Wstrict-prototypes -Wmissing-prototypes '
+                        '-Wnested-externs -Winline -Wredundant-decls',
+                        '-Wall', 'configure')
+            filter_file('-Wno-long-long -Wno-format-extra-args', '',
+                        'configure')
+
    @run_after('install')
    @on_package_attributes(run_tests=True)
    def import_module_test(self):
--- a/var/spack/repos/builtin/packages/lz4/package.py
+++ b/var/spack/repos/builtin/packages/lz4/package.py
@ -34,14 +34,27 @@ def url_for_version(self, version):
            return "{0}/r{1}.tar.gz".format(url, version.joined)

    def build(self, spec, prefix):
+        par = True
+        if spec.compiler.name == 'nvhpc':
+            # relocation error when building shared and dynamic libs in
+            # parallel
+            par = False
+
        if sys.platform != "darwin":
-            make('MOREFLAGS=-lrt')  # fixes make error on CentOS6
+            make('MOREFLAGS=-lrt', parallel=par)  # fixes make error on CentOS6
        else:
-            make()
+            make(parallel=par)

    def install(self, spec, prefix):
        make('install', 'PREFIX={0}'.format(prefix))

+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-fvisibility=hidden', '', 'Makefile')
+            filter_file('-fvisibility=hidden', '', 'lib/Makefile')
+            filter_file('-pedantic', '', 'Makefile')
+
    @run_after('install')
    def darwin_fix(self):
        if sys.platform == 'darwin':
--- a/var/spack/repos/builtin/packages/m4/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/m4/nvhpc.patch
@ -0,0 +1,34 @@
+--- a/lib/xalloc-oversized.h	2020-08-07 11:04:56.154698639 -0700
+++ b/lib/xalloc-oversized.h	2020-08-07 11:06:11.667997389 -0700
+@@ -46,13 +46,13 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__ || __has_builtin (__builtin_add_overflow_p)
+#if ((7 <= __GNUC__ || __has_builtin (__builtin_add_overflow_p)) && !defined __NVCOMPILER)
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+ #elif ((5 <= __GNUC__ \
+         || (__has_builtin (__builtin_mul_overflow) \
+             && __has_builtin (__builtin_constant_p))) \
+-       && !__STRICT_ANSI__)
+       && !__STRICT_ANSI__ && !defined __NVCOMPILER)
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/lib/intprops.h	2020-08-07 11:06:15.508012580 -0700
+++ b/lib/intprops.h	2020-08-07 11:07:54.379403731 -0700
+@@ -242,11 +242,11 @@
+ 
+ /* True if __builtin_add_overflow (A, B, P) works when P is non-null.  */
+ #define _GL_HAS_BUILTIN_OVERFLOW \
+-  (5 <= __GNUC__ || __has_builtin (__builtin_add_overflow))
+  ((5 <= __GNUC__ || __has_builtin (__builtin_add_overflow)) && !defined __NVCOMPILER)
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works.  */
+ #define _GL_HAS_BUILTIN_OVERFLOW_P \
+-  (7 <= __GNUC__ || __has_builtin (__builtin_add_overflow_p))
+  ((7 <= __GNUC__ || __has_builtin (__builtin_add_overflow_p)) && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
--- a/var/spack/repos/builtin/packages/m4/package.py
+++ b/var/spack/repos/builtin/packages/m4/package.py
@ -17,6 +17,7 @@ class M4(AutotoolsPackage, GNUMirrorPackage):

    patch('gnulib-pgi.patch', when='@1.4.18')
    patch('pgi.patch', when='@1.4.17')
+    patch('nvhpc.patch', when='%nvhpc')
    # from: https://github.com/Homebrew/homebrew-core/blob/master/Formula/m4.rb
    # Patch credit to Jeremy Huddleston Sequoia <jeremyhu@apple.com>
    patch('secure_snprintf.patch', when='os = highsierra')
--- a/var/spack/repos/builtin/packages/matio/package.py
+++ b/var/spack/repos/builtin/packages/matio/package.py
@ -47,3 +47,11 @@ def configure_args(self):
        if '+shared' not in self.spec:
            args.append("--disable-shared")
        return args
+
+    def patch(self):
+        if self.spec.satisfies('%nvhpc'):
+            # workaround anonymous version tag linker error for the NVIDIA
+            # compilers
+            filter_file('${wl}-version-script '
+                        '${wl}$output_objdir/$libname.ver', '',
+                        'configure', string=True)
--- a/var/spack/repos/builtin/packages/mfem/package.py
+++ b/var/spack/repos/builtin/packages/mfem/package.py
@ -669,6 +669,12 @@ def install(self, spec, prefix):
        if install_em:
            install_tree('data', join_path(prefix_share, 'data'))

+    def patch(self):
+        # Remove the byte order mark since it messes with some compilers
+        filter_file(u'\uFEFF', '', 'fem/gslib.hpp')
+        filter_file(u'\uFEFF', '', 'fem/gslib.cpp')
+        filter_file(u'\uFEFF', '', 'linalg/hiop.hpp')
+
    @property
    def suitesparse_components(self):
        """Return the SuiteSparse components needed by MFEM."""
--- a/var/spack/repos/builtin/packages/miniqmc/package.py
+++ b/var/spack/repos/builtin/packages/miniqmc/package.py
@ -23,11 +23,19 @@ class Miniqmc(CMakePackage):
    depends_on('mpi')
    depends_on('lapack')

+    # Add missing PGI compiler config
+    patch('pgi-cmake.patch', when='@:0.4 % nvhpc')
+
    def cmake_args(self):
        args = [
            '-DCMAKE_CXX_COMPILER=%s' % self.spec['mpi'].mpicxx,
            '-DCMAKE_C_COMPILER=%s' % self.spec['mpi'].mpicc
        ]
+
+        if self.spec.satisfies('%nvhpc'):
+            args.append('-DLAPACK_LIBRARIES={0}'.format(
+                self.spec['lapack'].libs.joined(';')))
+
        return args

    def install(self, spec, prefix):
--- a/var/spack/repos/builtin/packages/miniqmc/pgi-cmake.patch
+++ b/var/spack/repos/builtin/packages/miniqmc/pgi-cmake.patch
@ -0,0 +1,43 @@
+diff --git a/CMake/PGICompilers.cmake b/CMake/PGICompilers.cmake
+new file mode 100644
+index 000000000..ec9d976a9
+--- /dev/null
+++ b/CMake/PGICompilers.cmake
+@@ -0,0 +1,26 @@
+# Enable OpenMP
+# If just -mp is specified, OMP_NUM_THREADS must be set in order to run in parallel
+# Specifying 'allcores' will run on all cores if OMP_NUM_THREADS is not set (which seems
+#  to be the default for other OpenMP implementations)
+IF(QMC_OMP)
+  SET(ENABLE_OPENMP 1)
+  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
+ENDIF(QMC_OMP)
+
+ADD_DEFINITIONS( -Drestrict=__restrict__ )
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline")
+
+# Set extra optimization specific flags
+SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fast" )
+SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fast" )
+
+
+# Setting this to 'OFF' adds the -A flag, which enforces strict standard compliance
+#  and causes the compilation to fail with some GNU header files
+SET(CMAKE_CXX_EXTENSIONS ON)
+
+# Add static flags if necessary
+IF(QMC_BUILD_STATIC)
+    SET(CMAKE_CXX_LINK_FLAGS " -Bstatic")
+ENDIF(QMC_BUILD_STATIC)
+--- a/CMakeLists.txt	2020-09-16 13:15:18.296609121 -0700
+++ b/CMakeLists.txt	2020-09-16 13:15:52.661916219 -0700
+@@ -260,6 +260,8 @@
+     INCLUDE(${PROJECT_CMAKE}/IntelCompilers.cmake)
+   ELSEIF( ${COMPILER} MATCHES "GNU" )
+     INCLUDE(${PROJECT_CMAKE}/GNUCompilers.cmake)
+  ELSEIF( ${COMPILER} MATCHES "PGI" )
+    INCLUDE(${PROJECT_CMAKE}/PGICompilers.cmake)
+   ELSEIF( ${COMPILER} MATCHES "Clang" )
+     INCLUDE(${PROJECT_CMAKE}/ClangCompilers.cmake)
+   ELSE()
--- a/var/spack/repos/builtin/packages/mumps/package.py
+++ b/var/spack/repos/builtin/packages/mumps/package.py
@ -111,6 +111,7 @@ def write_makefile_inc(self):
        # Determine which compiler suite we are using
        using_gcc = self.compiler.name == "gcc"
        using_pgi = self.compiler.name == "pgi"
+        using_nvhpc = self.compiler.name == "nvhpc"
        using_intel = self.compiler.name == "intel"
        using_xl = self.compiler.name in ['xl', 'xl_r']
        using_fj = self.compiler.name == "fj"
@ -179,7 +180,7 @@ def write_makefile_inc(self):

        # TODO: change the value to the correct one according to the
        # compiler possible values are -DAdd_, -DAdd__ and/or -DUPPER
-        if using_intel or using_pgi or using_fj:
+        if using_intel or using_pgi or using_nvhpc or using_fj:
            # Intel, PGI, and Fujitsu Fortran compiler provides
            # the main() function so C examples linked with the Fortran
            # compiler require a hack defined by _DMAIN_COMP
--- a/var/spack/repos/builtin/packages/namd/inherited-member-2.14.patch
+++ b/var/spack/repos/builtin/packages/namd/inherited-member-2.14.patch
@ -0,0 +1,16 @@
+--- a/src/colvarproxy_namd.C	2020-10-02 15:11:37.205197956 -0700
+++ b/src/colvarproxy_namd.C	2020-10-02 15:11:47.865071117 -0700
+@@ -94,12 +94,11 @@
+           "the output restart file could be defined, exiting.\n");
+   }
+ 
+  init_tcl_pointers();
+ 
+ #ifdef NAMD_TCL
+   have_scripts = true;
+ 
+-  init_tcl_pointers();
+-
+   // See is user-scripted forces are defined
+   if (Tcl_FindCommand(reinterpret_cast<Tcl_Interp *>(tcl_interp_),
+                       "calc_colvar_forces", NULL, 0) == NULL) {
--- a/var/spack/repos/builtin/packages/namd/inherited-member.patch
+++ b/var/spack/repos/builtin/packages/namd/inherited-member.patch
@ -0,0 +1,18 @@
+--- a/src/colvarproxy_namd.C	2020-09-04 12:07:22.649591658 -0700
+++ b/src/colvarproxy_namd.C	2020-09-04 12:08:41.724079519 -0700
+@@ -479,13 +479,13 @@
+ 
+ // Callback functions
+ 
+#ifdef NAMD_TCL
+ void colvarproxy_namd::init_tcl_pointers()
+ {
+-#ifdef NAMD_TCL
+   // Store pointer to NAMD's Tcl interpreter
+   _tcl_interp = reinterpret_cast<void *>(Node::Object()->getScript()->interp);
+-#endif
+ }
+#endif
+ 
+ int colvarproxy_namd::run_force_callback()
+ {
--- a/var/spack/repos/builtin/packages/namd/package.py
+++ b/var/spack/repos/builtin/packages/namd/package.py
@ -32,6 +32,12 @@ class Namd(MakefilePackage):
    variant('interface', default='none', values=('none', 'tcl', 'python'),
            description='Enables TCL and/or python interface')

+    # init_tcl_pointers() declaration and implementation are inconsistent
+    # "src/colvarproxy_namd.C", line 482: error: inherited member is not
+    # allowed
+    patch('inherited-member.patch', when='@:2.13')
+    patch('inherited-member-2.14.patch', when='@2.14:')
+
    depends_on('charmpp@6.10.1:', when="@2.14:")
    depends_on('charmpp@6.8.2', when="@2.13")
    depends_on('charmpp@6.7.1', when="@2.12")
--- a/var/spack/repos/builtin/packages/nasm/package.py
+++ b/var/spack/repos/builtin/packages/nasm/package.py
@ -26,3 +26,11 @@ class Nasm(AutotoolsPackage):

    conflicts('%intel@:14', when='@2.14:',
              msg="Intel 14 has immature C11 support")
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file(r'CFLAGS="\$pa_add_cflags__old_cflags -Werror=.*"',
+                        'CFLAGS="$pa_add_cflags__old_cflags"', 'configure')
+            filter_file(r'CFLAGS="\$pa_add_flags__old_flags -Werror=.*"',
+                        'CFLAGS="$pa_add_flags__old_flags"', 'configure')
--- a/var/spack/repos/builtin/packages/numactl/numactl-2.0.14-symver.patch
+++ b/var/spack/repos/builtin/packages/numactl/numactl-2.0.14-symver.patch
@ -0,0 +1,9 @@
+--- a/util.h	2020-10-08 10:08:40.517167202 -0700
+++ b/util.h	2020-10-08 10:08:55.523301155 -0700
+@@ -22,5 +22,5 @@
+ #if HAVE_ATTRIBUTE_SYMVER
+ #define SYMVER(a,b) __attribute__ ((symver (b)))
+ #else
+-#define SYMVER(a,b) __asm__ (".symver " #a "," #b);
+#define SYMVER(a,b) __asm__ (".symver " a "," b " ");
+ #endif
--- a/var/spack/repos/builtin/packages/numactl/package.py
+++ b/var/spack/repos/builtin/packages/numactl/package.py
@ -12,12 +12,30 @@ class Numactl(AutotoolsPackage):
    homepage = "http://oss.sgi.com/projects/libnuma/"
    url      = "https://github.com/numactl/numactl/archive/v2.0.11.tar.gz"

+    version('2.0.14', sha256='1ee27abd07ff6ba140aaf9bc6379b37825e54496e01d6f7343330cf1a4487035')
    version('2.0.12', sha256='7c3e819c2bdeb883de68bafe88776a01356f7ef565e75ba866c4b49a087c6bdf')
    version('2.0.11', sha256='3e099a59b2c527bcdbddd34e1952ca87462d2cef4c93da9b0bc03f02903f7089')

    patch('numactl-2.0.11-sysmacros.patch', when="@2.0.11")
+    # https://github.com/numactl/numactl/issues/94
+    patch('numactl-2.0.14-symver.patch', when="@2.0.14")

    depends_on('autoconf', type='build')
    depends_on('automake', type='build')
    depends_on('libtool',  type='build')
    depends_on('m4',       type='build')
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compiler
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-ffast-math -funroll-loops', '', 'Makefile.am')
+            filter_file('-std=gnu99', '-c99', 'Makefile.am')
+
+        # Avoid undefined reference errors
+        if self.spec.satisfies('@2.0.14 %nvhpc'):
+            filter_file('numa_sched_setaffinity_v1_int',
+                        'numa_sched_setaffinity_v1', 'libnuma.c')
+            filter_file('numa_sched_setaffinity_v2_int',
+                        'numa_sched_setaffinity_v2', 'libnuma.c')
+            filter_file('numa_sched_getaffinity_v2_int',
+                        'numa_sched_getaffinity_v2', 'libnuma.c')
--- a/var/spack/repos/builtin/packages/openmpi/nvhpc-configure.patch
+++ b/var/spack/repos/builtin/packages/openmpi/nvhpc-configure.patch
@ -0,0 +1,75 @@
+--- a/configure	2020-08-20 14:10:40.050641106 -0700
+++ b/configure	2020-08-20 14:15:03.473674334 -0700
+@@ -38272,7 +38272,7 @@
+ ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+ if test -n "$ac_tool_prefix"; then
+-  for ac_prog in gfortran f95 fort xlf95 ifort ifc efc pgfortran pgf95 lf95 f90 xlf90 pgf90 epcf90 nagfor
+  for ac_prog in gfortran f95 fort xlf95 ifort ifc efc pgfortran nvfortran pgf95 lf95 f90 xlf90 pgf90 epcf90 nagfor
+   do
+     # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+ set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+@@ -38316,7 +38316,7 @@
+ fi
+ if test -z "$FC"; then
+   ac_ct_FC=$FC
+-  for ac_prog in gfortran f95 fort xlf95 ifort ifc efc pgfortran pgf95 lf95 f90 xlf90 pgf90 epcf90 nagfor
+  for ac_prog in gfortran f95 fort xlf95 ifort ifc efc pgfortran nvfortran pgf95 lf95 f90 xlf90 pgf90 epcf90 nagfor
+ do
+   # Extract the first word of "$ac_prog", so it can be a program name with args.
+ set dummy $ac_prog; ac_word=$2
+@@ -384411,7 +384411,7 @@
+ 	lt_prog_compiler_pic='-fPIC'
+ 	lt_prog_compiler_static='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+         # Portland Group compilers (*not* the Pentium gcc compiler,
+ 	# which looks to be a dead project)
+ 	lt_prog_compiler_wl='-Wl,'
+@@ -385050,11 +385050,11 @@
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)			# Portland Group C compiler
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+ 					# Portland Group f77 and f90 compilers
+ 	  whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
+@@ -391724,7 +391724,7 @@
+ 	lt_prog_compiler_pic_FC='-fPIC'
+ 	lt_prog_compiler_static_FC='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+         # Portland Group compilers (*not* the Pentium gcc compiler,
+ 	# which looks to be a dead project)
+ 	lt_prog_compiler_wl_FC='-Wl,'
+@@ -391766,7 +391766,7 @@
+ 	  lt_prog_compiler_pic_FC='-fPIC'
+ 	  lt_prog_compiler_static_FC='-static'
+ 	  ;;
+-	*Portland\ Group*)
+	*Portland\ Group* | *NVIDIA\ Compilers* | *PGI\ Compilers*)
+ 	  lt_prog_compiler_wl_FC='-Wl,'
+ 	  lt_prog_compiler_pic_FC='-fpic'
+ 	  lt_prog_compiler_static_FC='-Bstatic'
+@@ -392348,11 +392348,11 @@
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)			# Portland Group C compiler
+ 	  whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+ 					# Portland Group f77 and f90 compilers
+ 	  whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
--- a/var/spack/repos/builtin/packages/openmpi/nvhpc-libtool.patch
+++ b/var/spack/repos/builtin/packages/openmpi/nvhpc-libtool.patch
@ -0,0 +1,77 @@
+From b71206582131f88f6602a40e4c67e3d92b119229 Mon Sep 17 00:00:00 2001
+From: Tin Huynh <ahuynh@nvidia.com>
+Date: Mon, 27 Jul 2020 15:15:47 -0700
+Subject: [PATCH] Recognize new Nvidia compilers.
+
+With the upcoming release of HPC-SDK, updating Libtool to recognize Nvidia
+compilers (nvc, nvc++, nvfortran).
+---
+ m4/libtool.m4 | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/m4/libtool.m4 b/m4/libtool.m4
+index f2d1f39..4eac689 100644
+--- a/config/libtool.m4
+++ b/config/libtool.m4
+@@ -4402,8 +4402,8 @@ m4_if([$1], [CXX], [
+ 	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	    ;;
+-	  pgCC* | pgcpp*)
+-	    # Portland Group C++ compiler
+	  pgCC* | pgcpp* | pgc\+\+* |  nvc\+\+*)
+	    # NVIDIA HPC C++ compiler
+ 	    _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	    _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -4739,9 +4739,8 @@ m4_if([$1], [CXX], [
+ 	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	_LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	;;
+-      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+-        # Portland Group compilers (*not* the Pentium gcc compiler,
+-	# which looks to be a dead project)
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran* | nvc | nvfortran*)
+      # NVIDIA HPC Compilers
+ 	_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	_LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -4781,7 +4780,7 @@ m4_if([$1], [CXX], [
+ 	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ 	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ 	  ;;
+-	*Portland\ Group*)
+	*Portland\ Group* | *NVIDIA\ Compilers* | *PGI\ Compilers*)
+ 	  _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ 	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ 	  _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+@@ -5209,12 +5208,12 @@ _LT_EOF
+ 	tmp_addflag=' $pic_flag'
+ 	tmp_sharedflag='-shared'
+ 	case $cc_basename,$host_cpu in
+-        pgcc*)				# Portland Group C compiler
+        pgcc* | nvc)				# NVIDIA HPC C++ Compiler
+ 	  _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag'
+ 	  ;;
+-	pgf77* | pgf90* | pgf95* | pgfortran*)
+-					# Portland Group f77 and f90 compilers
+	pgf77* | pgf90* | pgf95* | pgfortran* | nvfortran*)
+					# NVIDIA HPC Fortran Compilers
+ 	  _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
+ 	  tmp_addflag=' $pic_flag -Mnomain' ;;
+ 	ecc*,ia64* | icc*,ia64*)	# Intel C compiler on ia64
+@@ -7004,8 +7003,8 @@ if test yes != "$_lt_caught_CXX_error"; then
+ 	    _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic'
+ 	    _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive'
+ 	    ;;
+-          pgCC* | pgcpp*)
+-            # Portland Group C++ compiler
+          pgCC* | pgcpp* | pgc\+\+* | nvc\+\+*)
+            # NVIDIA HPC C++ compiler
+ 	    case `$CC -V` in
+ 	    *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
+ 	      _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
+-- 
+2.7.4
+
--- a/var/spack/repos/builtin/packages/openmpi/package.py
+++ b/var/spack/repos/builtin/packages/openmpi/package.py
@ -168,6 +168,9 @@ class Openmpi(AutotoolsPackage):
    patch('nag_pthread/2.0.0_2.1.1.patch', when='@2.0.0:2.1.1%nag')
    patch('nag_pthread/1.10.4_1.10.999.patch', when='@1.10.4:1.10.999%nag')

+    patch('nvhpc-libtool.patch', when='%nvhpc@develop')
+    patch('nvhpc-configure.patch', when='%nvhpc')
+
    # Fix MPI_Sizeof() in the "mpi" Fortran module for compilers that do not
    # support "IGNORE TKR" functionality (e.g. NAG).
    # The issue has been resolved upstream in two steps:
@ -618,6 +621,10 @@ def configure_args(self):
            else:
                config_args.append('--without-cuda')

+        if spec.satisfies('%nvhpc'):
+            # Workaround compiler issues
+            config_args.append('CFLAGS=-O1')
+
        if '+wrapper-rpath' in spec:
            config_args.append('--enable-wrapper-rpath')

--- a/var/spack/repos/builtin/packages/openssl/package.py
+++ b/var/spack/repos/builtin/packages/openssl/package.py
@ -112,6 +112,11 @@ def install(self, spec, prefix):
           'aarch64' in spack.architecture.sys_type():
            options.append('no-asm')

+        # The default glibc provided by CentOS 7 does not provide proper
+        # atomic support when using the NVIDIA compilers
+        if self.spec.satisfies('%nvhpc os=centos7'):
+            options.append('-D__STDC_NO_ATOMICS__')
+
        config = Executable('./config')
        config('--prefix=%s' % prefix,
               '--openssldir=%s' % join_path(prefix, 'etc', 'openssl'),
--- a/var/spack/repos/builtin/packages/perl/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/perl/nvhpc.patch
@ -0,0 +1,88 @@
+--- a/Configure	2020-10-08 12:20:33.969182412 -0700
+++ b/Configure	2020-10-08 12:20:47.835314546 -0700
+@@ -4971,7 +4971,7 @@
+ # and usrinc directly from the compiler.
+ # Note that ccname for clang is also gcc.
+ case "$ccname" in
+-    gcc)
+    XXX)
+ 	$echo 'extern int foo;' > try.c
+ 	set X `$cppstdin -v try.c 2>&1 | $awk '/^#include </,/^End of search /'|$cppfilter $grep '/include'`
+ 	shift
+--- a/cflags.SH	2020-10-08 12:20:58.992420850 -0700
+++ b/cflags.SH	2020-10-08 12:24:29.783426846 -0700
+@@ -179,8 +179,6 @@
+ # -pedantic* before -Werror=d-a-s
+ #
+ *)  for opt in -std=c89 -ansi $pedantic \
+-		-Werror=declaration-after-statement \
+-	        -Werror=pointer-arith \
+ 		-Wextra -W \
+ 		-Wc++-compat -Wwrite-strings
+     do
+@@ -354,9 +352,7 @@
+   #
+   # -Werror=d-a-s option is valid for g++, by definition,
+   # but we remove it just for cleanliness and shorter command lines.
+-  for f in -Wdeclaration-after-statement \
+-		-Werror=declaration-after-statement \
+-		-Wc++-compat \
+  for f in -Wc++-compat \
+ 		-std=c89
+   do
+     case "$ccflags$warn" in
+@@ -370,8 +366,8 @@
+   ;;
+ esac
+ 
+-for f in -Wdeclaration-after-statement -Werror=declaration-after-statement \
+-         -Wpointer-arith -Werror=pointer-arith
+for f in -Wdeclaration-after-statement \
+         -Wpointer-arith
+ do
+   case "$cppflags" in
+   *"$f"*)
+--- a/ext/XS-APItest/Makefile.PL	2020-10-08 13:07:33.804290877 -0700
+++ b/ext/XS-APItest/Makefile.PL	2020-10-08 13:09:18.154336339 -0700
+@@ -15,7 +15,7 @@
+     'OBJECT'            => '$(BASEEXT)$(OBJ_EXT) XSUB-undef-XS_VERSION$(OBJ_EXT) XSUB-redefined-macros$(OBJ_EXT) $(O_FILES)'. $dtrace_o,
+     realclean => {FILES	=> 'const-c.inc const-xs.inc'},
+     ($Config{gccversion} && $Config{d_attribute_deprecated} ?
+-      (CCFLAGS => $Config{ccflags} . ' -Wno-deprecated-declarations') : ()),
+      (CCFLAGS => $Config{ccflags}) : ()),
+     depend => { 'core.o' => 'core_or_not.inc',
+ 		'notcore.o' => 'core_or_not.inc' },
+ );
+--- a/Makefile.SH	2020-10-08 15:24:40.457848934 -0700
+++ b/Makefile.SH	2020-10-08 15:25:21.289415093 -0700
+@@ -1140,8 +1140,8 @@
+ # $(PERL_EXE) and ext because pod_lib.pl needs Digest::MD5
+ # But also this ensures that all extensions are built before we try to scan
+ # them, which picks up Devel::PPPort's documentation.
+-pod/perltoc.pod: $(perltoc_pod_prereqs) $(PERL_EXE) $(ext) pod/buildtoc
+-	$(RUN_PERL) -f pod/buildtoc -q
+pod/perltoc.pod: $(perltoc_pod_prereqs) $(PERL_EXE) $(ext) $(MINIPERL_EXE) pod/buildtoc
+	$(MINIPERL) -f pod/buildtoc -q
+ 
+ pod/perlapi.pod: pod/perlintern.pod
+
+@@ -1198,8 +1198,8 @@
+         $spitshell >>$Makefile <<EOT
+ 
+ install_$name install-$name: \$(INSTALL_DEPENDENCE) installperl all installman
+-	\$(RUN_PERL) installperl --destdir=\$(DESTDIR) $flags \$(INSTALLFLAGS) \$(STRIPFLAGS)
+-	\$(RUN_PERL) installman --destdir=\$(DESTDIR) $flags
+	\$(MINIPERL) installperl --destdir=\$(DESTDIR) $flags \$(INSTALLFLAGS) \$(STRIPFLAGS)
+	\$(MINIPERL) installman --destdir=\$(DESTDIR) $flags
+ EOT
+     fi
+ 
+@@ -1217,7 +1217,7 @@
+ 	LOCAL_PERL='$(RUN_PERL)'
+ 	$spitshell >>$Makefile <<'!NO!SUBS!'
+ install.perl:	$(INSTALL_DEPENDENCE) installperl
+-	$(RUN_PERL) installperl --destdir=$(DESTDIR) $(INSTALLFLAGS) $(STRIPFLAGS)
+	$(MINIPERL) installperl --destdir=$(DESTDIR) $(INSTALLFLAGS) $(STRIPFLAGS)
+ 	-@test ! -s extras.lst || PATH="`pwd`:\${PATH}" PERL5LIB="`pwd`/lib" \$(RUN_PERL) -Ilib -MCPAN -e '@ARGV&&install(@ARGV)' `cat extras.lst`
+ 
+ !NO!SUBS!
--- a/var/spack/repos/builtin/packages/perl/package.py
+++ b/var/spack/repos/builtin/packages/perl/package.py
@ -77,6 +77,9 @@ class Perl(Package):  # Perl doesn't use Autotools, it should subclass Package
    # https://github.com/Perl/perl5/pull/17946
    patch('macos-11-version-check.patch', when='@5.24.1:5.32.0 platform=darwin')

+    # Enable builds with the NVIDIA compiler
+    patch('nvhpc.patch', when='%nvhpc')
+
    # Installing cpanm alongside the core makes it safe and simple for
    # people/projects to install their own sets of perl modules.  Not
    # having it in core increases the "energy of activation" for doing
--- a/var/spack/repos/builtin/packages/picsarlite/package.py
+++ b/var/spack/repos/builtin/packages/picsarlite/package.py
@ -70,7 +70,7 @@ def build_targets(self):

    def build(self, spec, prefix):
        with working_dir('PICSARlite'):
-            make(parallel=False)
+            make(parallel=False, *self.build_targets)

    def install(self, spec, prefix):
        mkdirp(prefix.docs)
--- a/var/spack/repos/builtin/packages/pkgconf/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/pkgconf/nvhpc.patch
@ -0,0 +1,11 @@
+--- a/Makefile.in	2020-08-06 15:35:59.461116401 -0700
+++ b/Makefile.in	2020-08-06 15:36:13.010162558 -0700
+@@ -532,7 +532,7 @@
+ 		libpkgconf/personality.c	\
+ 		libpkgconf/parser.c
+ 
+-libpkgconf_la_LDFLAGS = -no-undefined -version-info 3:0:0 -export-symbols-regex '^pkgconf_'
+libpkgconf_la_LDFLAGS = -no-undefined -version-info 3:0:0
+ dist_man_MANS = \
+ 	man/pkgconf.1		\
+ 	man/pkg.m4.7		\
--- a/var/spack/repos/builtin/packages/pkgconf/package.py
+++ b/var/spack/repos/builtin/packages/pkgconf/package.py
@ -28,6 +28,9 @@ class Pkgconf(AutotoolsPackage):

    provides('pkgconfig')

+    # https://github.com/spack/spack/issues/11704
+    patch('nvhpc.patch', when='@1.7.3%nvhpc')
+
    # TODO: Add a package for the kyua testing framework
    # depends_on('kyua', type='test')

--- a/var/spack/repos/builtin/packages/popt/package.py
+++ b/var/spack/repos/builtin/packages/popt/package.py
@ -15,3 +15,9 @@ class Popt(AutotoolsPackage):
    version('1.16', sha256='e728ed296fe9f069a0e005003c3d6b2dde3d9cad453422a10d6558616d304cc8')

    depends_on('libiconv')
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compilers
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('CFLAGS="$CFLAGS -Wall -W"',
+                        'CFLAGS="$CFLAGS -Wall"', 'configure', string=True)
--- a/var/spack/repos/builtin/packages/python/package.py
+++ b/var/spack/repos/builtin/packages/python/package.py
@ -224,6 +224,8 @@ class Python(AutotoolsPackage):
    conflicts('+tix', when='~tkinter',
              msg='python+tix requires python+tix+tkinter')

+    conflicts('%nvhpc')
+
    _DISTUTIL_VARS_TO_SAVE = ['LDSHARED']
    _DISTUTIL_CACHE_FILENAME = 'sysconfig.json'
    _distutil_vars = None
--- a/var/spack/repos/builtin/packages/qd/package.py
+++ b/var/spack/repos/builtin/packages/qd/package.py
@ -21,6 +21,10 @@ class Qd(AutotoolsPackage):
    depends_on('libtool',  type='build')
    depends_on('m4',       type='build')

+    def setup_build_environment(self, env):
+        if self.spec.satisfies('%nvhpc'):
+            env.append_flags('FCFLAGS', "-fPIC")
+
    def configure_args(self):
        args = ['--enable-shared']
        return args
--- a/var/spack/repos/builtin/packages/quantum-espresso/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/quantum-espresso/nvhpc.patch
@ -0,0 +1,106 @@
+--- a/install/configure	2020-09-08 07:25:53.088725750 -0700
+++ b/install/configure	2020-09-08 07:35:22.637773050 -0700
+@@ -2349,7 +2349,7 @@
+ ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+ if test -n "$ac_tool_prefix"; then
+-  for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+  for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc nvfortran pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+   do
+     # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+ set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+@@ -2393,7 +2393,7 @@
+ fi
+ if test -z "$FC"; then
+   ac_ct_FC=$FC
+-  for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+  for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc nvfortran pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77
+ do
+   # Extract the first word of "$ac_prog", so it can be a program name with args.
+ set dummy $ac_prog; ac_word=$2
+@@ -2810,7 +2810,7 @@
+ # candidate compilers and flags based on architecture
+ case $arch in
+ ia32 | ia64 | x86_64 )
+-        try_f90="ifort pgf90 nagfor $try_f90"
+        try_f90="ifort nvfortran pgf90 nagfor $try_f90"
+         ;;
+ arm )
+         try_f90="$try_f90"
+@@ -3125,6 +3125,7 @@
+         echo $ECHO_N "checking version of $mpif90... $ECHO_C"
+         ifort_version=`$mpif90 -V 2>&1 | grep "Intel(R)"`
+         pgf_version=`$mpif90 -V 2>&1 | grep "^pgf"`
+        nvfortran_version=`$mpif90 -V 2>&1 | grep "^nvfortran"`
+         gfortran_version=`$mpif90 -v 2>&1 | grep "gcc version"`
+         nagfor_version=`$mpif90 -v 2>&1 | grep "NAG Fortran"`
+         xlf_version=`$mpif90 -v 2>&1 | grep "xlf"`
+@@ -3142,6 +3143,11 @@
+                 f90_in_mpif90="pgf90"
+                 # flag to test MKL with PGI
+                 MKL_FLAGS="-pgf90libs"
+        elif test "$nvfortran_version" != ""
+        then
+                version=`echo $nvfortran_version | cut -d ' ' -f2`
+                echo "${ECHO_T}nvfortran $version"
+                f90_in_mpif90="nvfortran"
+         elif test "$gfortran_version" != ""
+         then
+                 version=`echo $gfortran_version | cut -d ' ' -f3`
+@@ -3242,6 +3250,8 @@
+         f90_flavor=ifort
+     elif $f90 -V 2>&1 | grep -q "^pgf" ; then
+         f90_flavor=pgf
+    elif $f90 -V 2>&1 | grep -q "^nvfortran" ; then
+        f90_flavor=nvfortran
+     elif $f90 -v 2>&1 | grep -q "gcc version" ; then
+         f90_flavor=gfortran
+     elif $f90 -V 2>&1 | grep -q "Cray Fortran" ; then
+@@ -3300,6 +3310,9 @@
+ *:pgf90 )
+         try_cc="pgcc $try_cc"
+         ;;
+*:nvfortran )
+        try_cc="nvc $try_cc"
+        ;;
+ cray*:* )
+         try_cc="cc"
+         ;;
+@@ -4166,6 +4179,19 @@
+         try_dflags="$try_dflags -D__PGI"
+         have_cpp=1
+         ;;
+*:nvfortran* )
+	try_fflags_nomain="-Mnomain"
+        try_fflags="-fast"
+        try_fflags_openmp="-mp"
+        try_f90flags="-fast -Mcache_align -Mpreprocess -Mlarge_arrays"
+        try_foxflags="-fast -Mcache_align -Mpreprocess -Mlarge_arrays"
+        try_fflags_noopt="-O0"
+        try_ldflags=""
+        try_ldflags_openmp="-mp"
+        try_ldflags_static="-static"
+        try_dflags="$try_dflags -D__PGI"
+        have_cpp=1
+        ;;
+ *:*gfortran )
+ 	try_fflags="-O3 -g"
+         if test "$use_debug" -eq 1; then
+@@ -5044,7 +5070,7 @@
+                 done
+                 ;;
+ 
+-        x86_64:pgf* )
+        x86_64:pgf* | x8_64:nvfortran )
+                 try_libdirs="/opt/acml*/pathscale64/lib/"
+                 try_libdirs="$ld_library_path $libdirs $try_libdirs"
+ 
+@@ -6245,7 +6271,7 @@
+         if test "$have_blas" -eq 0
+         then
+         case "$f90" in
+-                pgf* )
+                pgf* | nvfortran )
+                 # check for PGI blas
+                 unset ac_cv_search_dgemm # clear cached value
+                 FFLAGS="$test_fflags"
--- a/var/spack/repos/builtin/packages/quantum-espresso/package.py
+++ b/var/spack/repos/builtin/packages/quantum-espresso/package.py
@ -219,6 +219,9 @@ class QuantumEspresso(Package):
          sha256='b1aa3179ee1c069964fb9c21f3b832aebeae54947ce8d3cc1a74e7b154c3c10f',
          when='+patch@6.4.1:6.5.0')

+    # Configure updated to work with NVIDIA compilers
+    patch('nvhpc.patch', when='@6.5 %nvhpc')
+
    # Spurious problems running in parallel the Makefile
    # generated by the configure
    parallel = False
--- a/var/spack/repos/builtin/packages/relion/package.py
+++ b/var/spack/repos/builtin/packages/relion/package.py
@ -48,7 +48,7 @@ class Relion(CMakePackage, CudaPackage):
    depends_on('libtiff')

    depends_on('cuda', when='+cuda')
-    depends_on('cuda@9:10.99', when='@3: +cuda')
+    depends_on('cuda@9:', when='@3: +cuda')

    def cmake_args(self):

@ -76,3 +76,8 @@ def cmake_args(self):
            args += ['-DMKLFFT=ON', '-DFORCE_OWN_TBB=ON', '-DALTCPU=ON']

        return args
+
+    def patch(self):
+        # Remove flags not recognized by the NVIDIA compilers
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-std=c99', '-c99', 'src/apps/CMakeLists.txt')
--- a/var/spack/repos/builtin/packages/rsync/package.py
+++ b/var/spack/repos/builtin/packages/rsync/package.py
@ -23,5 +23,7 @@ class Rsync(AutotoolsPackage):
    depends_on('zstd', when='@3.2:')
    depends_on('lz4', when='@3.2:')

+    conflicts('%nvhpc')
+
    def configure_args(self):
        return ['--with-included-zlib=no']
--- a/var/spack/repos/builtin/packages/suite-sparse/package.py
+++ b/var/spack/repos/builtin/packages/suite-sparse/package.py
@ -49,6 +49,7 @@ class SuiteSparse(Package):

    # This patch removes unsupported flags for pgi compiler
    patch('pgi.patch', when='%pgi')
+    patch('pgi.patch', when='%nvhpc')

    # This patch adds '-lm' when linking libgraphblas and when using clang.
    # Fixes 'libgraphblas.so.2.0.1: undefined reference to `__fpclassify''
--- a/var/spack/repos/builtin/packages/swfft/package.py
+++ b/var/spack/repos/builtin/packages/swfft/package.py
@ -36,6 +36,10 @@ def build_targets(self):
        targets.append('DFFT_MPI_CXX=%s' % spec['mpi'].mpicxx)
        targets.append('DFFT_MPI_F90=%s' % spec['mpi'].mpifc)

+        if self.spec.satisfies('%nvhpc'):
+            # remove -Wno-deprecated -std=gnu99
+            targets.append('DFFT_MPI_CFLAGS=-g -O3 -Wall')
+
        return targets

    def install(self, spec, prefix):
--- a/var/spack/repos/builtin/packages/tar/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/tar/nvhpc.patch
@ -0,0 +1,34 @@
+--- a/gnu/xalloc-oversized.h	2020-08-21 06:38:16.472440800 -0700
+++ b/gnu/xalloc-oversized.h	2020-08-21 06:39:22.717691266 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/gnu/intprops.h	2020-08-21 06:37:03.301158640 -0700
+++ b/gnu/intprops.h	2020-08-21 06:38:01.807384249 -0700
+@@ -219,14 +219,14 @@
+    : (max) >> (b) < (a))
+ 
+ /* True if __builtin_add_overflow (A, B, P) works when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_OVERFLOW 1
+ #else
+ # define _GL_HAS_BUILTIN_OVERFLOW 0
+ #endif
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
--- a/var/spack/repos/builtin/packages/tar/package.py
+++ b/var/spack/repos/builtin/packages/tar/package.py
@ -30,6 +30,7 @@ class Tar(AutotoolsPackage, GNUMirrorPackage):
    patch('se-selinux.patch', when='@:1.29')
    patch('argp-pgi.patch',   when='@:1.29')
    patch('gnutar-configure-xattrs.patch', when='@1.28')
+    patch('nvhpc.patch',      when='%nvhpc')

    @classmethod
    def determine_version(cls, exe):
--- a/var/spack/repos/builtin/packages/texinfo/nvhpc.patch
+++ b/var/spack/repos/builtin/packages/texinfo/nvhpc.patch
@ -0,0 +1,34 @@
+--- a/gnulib/lib/xalloc-oversized.h	2020-08-21 06:38:16.472440800 -0700
+++ b/gnulib/lib/xalloc-oversized.h	2020-08-21 06:39:22.717691266 -0700
+@@ -41,10 +41,10 @@
+    positive and N must be nonnegative.  This is a macro, not a
+    function, so that it works correctly even when SIZE_MAX < N.  */
+ 
+-#if 7 <= __GNUC__
+#if 7 <= __GNUC__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+-#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
+#elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__ && !defined __NVCOMPILER
+ # define xalloc_oversized(n, s) \
+    (__builtin_constant_p (n) && __builtin_constant_p (s) \
+     ? __xalloc_oversized (n, s) \
+--- a/gnulib/lib/intprops.h	2020-08-21 06:37:03.301158640 -0700
+++ b/gnulib/lib/intprops.h	2020-08-21 06:38:01.807384249 -0700
+@@ -219,14 +219,14 @@
+    : (max) >> (b) < (a))
+ 
+ /* True if __builtin_add_overflow (A, B, P) works when P is non-null.  */
+-#if 5 <= __GNUC__ && !defined __ICC
+#if 5 <= __GNUC__ && !defined __ICC && !defined __NVCOMPILER
+ # define _GL_HAS_BUILTIN_OVERFLOW 1
+ #else
+ # define _GL_HAS_BUILTIN_OVERFLOW 0
+ #endif
+ 
+ /* True if __builtin_add_overflow_p (A, B, C) works.  */
+-#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__)
+#define _GL_HAS_BUILTIN_OVERFLOW_P (7 <= __GNUC__ && !defined __NVCOMPILER)
+ 
+ /* The _GL*_OVERFLOW macros have the same restrictions as the
+    *_RANGE_OVERFLOW macros, except that they do not assume that operands
--- a/var/spack/repos/builtin/packages/texinfo/package.py
+++ b/var/spack/repos/builtin/packages/texinfo/package.py
@ -40,6 +40,8 @@ class Texinfo(AutotoolsPackage, GNUMirrorPackage):
    # Ref: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=902771
    patch('update_locale_handling.patch', when='@6.3:')

+    patch('nvhpc.patch', when='%nvhpc')
+
    @classmethod
    def determine_version(cls, exe):
        output = Executable(exe)('--version', output=str, error=str)
--- a/var/spack/repos/builtin/packages/vasp/package.py
+++ b/var/spack/repos/builtin/packages/vasp/package.py
@ -16,8 +16,9 @@ class Vasp(MakefilePackage):
    """

    homepage = "http://vasp.at"
-    url      = "file://{0}/vasp.5.4.4.tar.gz".format(os.getcwd())
+    url      = "file://{0}/vasp.5.4.4.pl2.tgz".format(os.getcwd())

+    version('5.4.4.pl2', sha256='98f75fd75399a23d76d060a6155f4416b340a1704f256a00146f89024035bc8e')
    version('5.4.4', sha256='5bd2449462386f01e575f9adf629c08cb03a13142806ffb6a71309ca4431cfb3')

    resource(name='vaspsol',
@ -42,6 +43,7 @@ class Vasp(MakefilePackage):
    depends_on('mpi', type=('build', 'link', 'run'))
    depends_on('netlib-scalapack', when='+scalapack')
    depends_on('cuda', when='+cuda')
+    depends_on('qd', when='%nvhpc')

    conflicts('%gcc@:8', msg='GFortran before 9.x does not support all features needed to build VASP')
    conflicts('+vaspsol', when='+cuda', msg='+vaspsol only available for CPU')
@ -52,6 +54,16 @@ def edit(self, spec, prefix):

        if '%gcc' in spec:
            make_include = join_path('arch', 'makefile.include.linux_gnu')
+        elif '%nvhpc' in spec:
+            make_include = join_path('arch', 'makefile.include.linux_pgi')
+            filter_file('pgcc', spack_cc, make_include)
+            filter_file('pgc++', spack_cxx, make_include, string=True)
+            filter_file('pgfortran', spack_fc, make_include)
+            filter_file('/opt/pgi/qd-2.3.17/install/include',
+                        spec['qd'].prefix.include, make_include)
+            filter_file('/opt/pgi/qd-2.3.17/install/lib',
+                        spec['qd'].prefix.lib, make_include)
+            filter_file('^SCALAPACK[ ]{0,}=.*$', 'SCALAPACK ?=', make_include)
        else:
            make_include = join_path('arch',
                                     'makefile.include.linux_' +
@ -92,10 +104,15 @@ def edit(self, spec, prefix):
    def setup_build_environment(self, spack_env):
        spec = self.spec

-        cpp_options = ['-DHOST=\\"LinuxGNU\\"', '-DMPI -DMPI_BLOCK=8000',
+        cpp_options = ['-DMPI -DMPI_BLOCK=8000',
                       '-Duse_collective', '-DCACHE_SIZE=4000',
                       '-Davoidalloc', '-Duse_bse_te',
                       '-Dtbdyn', '-Duse_shmem']
+        if '%nvhpc' in self.spec:
+            cpp_options.extend(['-DHOST=\\"LinuxPGI\\"', '-DPGI16',
+                                '-Dqd_emulate'])
+        else:
+            cpp_options.append('-DHOST=\\"LinuxGNU\\"')

        cflags = ['-fPIC', '-DADD_']

--- a/var/spack/repos/builtin/packages/xsbench/package.py
+++ b/var/spack/repos/builtin/packages/xsbench/package.py
@ -39,8 +39,11 @@ def build_directory(self):
    def build_targets(self):

        targets = []
+        cflags = ''

+        if not self.spec.satisfies('%nvhpc'):
            cflags = '-std=gnu99'
+
        if '+mpi' in self.spec:
            targets.append('CC={0}'.format(self.spec['mpi'].mpicc))
        else:
--- a/var/spack/repos/builtin/packages/xxhash/package.py
+++ b/var/spack/repos/builtin/packages/xxhash/package.py
@ -27,6 +27,19 @@ class Xxhash(MakefilePackage):
    version('0.5.1', sha256='0171af39eefa06be1e616bc43b250d13bba417e4741135ec85c1fe8dc391997d')
    version('0.5.0', sha256='9605cd18d40d798eb1262bc0c2a154e1a3c138a6a9a0c4c792e855d0c08c23e1')

+    @property
+    def build_targets(self):
+        targets = []
+
+        if '%nvhpc' in self.spec:
+            targets.append('CFLAGS=-O1')
+
+            if 'avx512' in self.spec.target:
+                # Workaround AVX512 compiler issue
+                targets.append('CPPFLAGS=-DXXH_VECTOR=XXH_AVX2')
+
+        return targets
+
    def edit(self, spec, prefix):
        makefile = FileFilter("Makefile")
        makefile.filter('/usr/local', prefix)
--- a/var/spack/repos/builtin/packages/zstd/package.py
+++ b/var/spack/repos/builtin/packages/zstd/package.py
@ -36,3 +36,26 @@ def build(self, spec, prefix):

    def install(self, spec, prefix):
        make('install', 'PREFIX={0}'.format(prefix))
+
+    def patch(self):
+        # Remove flags not understood by the NVIDIA compilers
+        if self.spec.satisfies('%nvhpc'):
+            filter_file('-fvisibility=hidden', '', 'lib/Makefile')
+            filter_file('-Wc++-compat', '', 'lib/Makefile', string=True)
+            filter_file('-Wcast-align', '', 'lib/Makefile')
+            filter_file('-Wcast-qual', '', 'lib/Makefile')
+            filter_file('-Wdeclaration-after-statement', '', 'lib/Makefile')
+            filter_file('-Wextra', '', 'lib/Makefile')
+            filter_file('-Wfloat-equal', '', 'lib/Makefile')
+            filter_file('-Wformat=2', '', 'lib/Makefile')
+            filter_file('-Winit-self', '', 'lib/Makefile')
+            filter_file('-Wmissing-prototypes', '', 'lib/Makefile')
+            filter_file('-Wpointer-arith', '', 'lib/Makefile')
+            filter_file('-Wredundant-decls', '', 'lib/Makefile')
+            filter_file('-Wshadow', '', 'lib/Makefile')
+            filter_file('-Wstrict-aliasing=1', '', 'lib/Makefile')
+            filter_file('-Wstrict-prototypes', '', 'lib/Makefile')
+            filter_file('-Wswitch-enum', '', 'lib/Makefile')
+            filter_file('-Wundef', '', 'lib/Makefile')
+            filter_file('-Wvla', '', 'lib/Makefile')
+            filter_file('-Wwrite-strings', '', 'lib/Makefile')