Elemental cublas (#4889)
* Added a package for the MDAnalysis toolkit. * Added a patch that allows Elemental to use cuBLAS internally. * Added support for LBANN to use the new cuBLAS extension in Elemental. * Added a proper variant for when LBANN does not want to use cuBLAS in elemental. * Added a package for the cnpy project and used it in the lbann package. * Removed unnecessary comment lines. * Removed blank lines * Removed debug variant * Add support for libjpeg-turbo * Added additional variants for OpenCV features. Fixed bug when linking in TIFF support, where libtiff used the regular JPEG library and OpenCV used libjpeg-turbo. Now libtiff can use libjpeg-turbo. * Removed the variant for getting Elemental to use the cublas variant. Updated the requirements for OpenCV to add new options. * Fixed a flake8 error in OpenCV and added a path to find cnpy in lbann. * Fixed line too long flake8 error. * Added a flag to specify the datatype size in lbann and fixed a flake8 error. * Added a debug build variant using hte new build_type * Fixed flake8 * Fixed how the debug build is pushed to Elemental * Fixed a bug in the Elemental package where the blas search flags were being overridden by the blas link flags. Changed how the sequential initialization variant is implemented in LBANN. * Added support via a variant to explicitly use mkl or openblas. This helps work around variant forwarding problems. * Updated package files to address pull request comments.
This commit is contained in:
parent
755081968f
commit
8ca7c77008
6 changed files with 804 additions and 42 deletions
34
var/spack/repos/builtin/packages/cnpy/package.py
Normal file
34
var/spack/repos/builtin/packages/cnpy/package.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
##############################################################################
|
||||||
|
# Copyright (c) 2013-2016, Lawrence Livermore National Security, LLC.
|
||||||
|
# Produced at the Lawrence Livermore National Laboratory.
|
||||||
|
#
|
||||||
|
# This file is part of Spack.
|
||||||
|
# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
|
||||||
|
# LLNL-CODE-647188
|
||||||
|
#
|
||||||
|
# For details, see https://github.com/llnl/spack
|
||||||
|
# Please also see the NOTICE and LICENSE files for our notice and the LGPL.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Lesser General Public License (as
|
||||||
|
# published by the Free Software Foundation) version 2.1, February 1999.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
|
||||||
|
# conditions of the GNU Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
##############################################################################
|
||||||
|
from spack import *
|
||||||
|
|
||||||
|
|
||||||
|
class Cnpy(CMakePackage):
|
||||||
|
"""cnpy: library to read/write .npy and .npz files in C/C++."""
|
||||||
|
|
||||||
|
homepage = "https://github.com/rogersce/cnpy"
|
||||||
|
url = "https://github.com/rogersce/cnpy"
|
||||||
|
|
||||||
|
version('master', git='https://github.com/rogersce/cnpy.git', branch="master")
|
|
@ -0,0 +1,668 @@
|
||||||
|
diff -Naur a/include/El/blas_like/level3.hpp b/include/El/blas_like/level3.hpp
|
||||||
|
--- a/include/El/blas_like/level3.hpp 2017-06-08 07:30:43.180249917 -0700
|
||||||
|
+++ b/include/El/blas_like/level3.hpp 2017-06-08 07:35:27.325434602 -0700
|
||||||
|
@@ -31,6 +31,10 @@
|
||||||
|
}
|
||||||
|
using namespace GemmAlgorithmNS;
|
||||||
|
|
||||||
|
+void GemmUseGPU(int min_M, int min_N, int min_K);
|
||||||
|
+
|
||||||
|
+void GemmUseCPU();
|
||||||
|
+
|
||||||
|
template<typename T>
|
||||||
|
void Gemm
|
||||||
|
( Orientation orientA, Orientation orientB,
|
||||||
|
diff -Naur a/include/El/core/imports/blas.hpp b/include/El/core/imports/blas.hpp
|
||||||
|
--- a/include/El/core/imports/blas.hpp 2017-06-08 07:30:43.522016908 -0700
|
||||||
|
+++ b/include/El/core/imports/blas.hpp 2017-06-08 07:35:06.834030908 -0700
|
||||||
|
@@ -916,4 +916,63 @@
|
||||||
|
} // namespace blas
|
||||||
|
} // namespace El
|
||||||
|
|
||||||
|
+
|
||||||
|
+#if defined(EL_USE_CUBLAS)
|
||||||
|
+
|
||||||
|
+namespace El {
|
||||||
|
+
|
||||||
|
+#ifdef EL_USE_64BIT_BLAS_INTS
|
||||||
|
+typedef long long int BlasInt;
|
||||||
|
+#else
|
||||||
|
+typedef int BlasInt;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+namespace cublas {
|
||||||
|
+
|
||||||
|
+// NOTE: templated routines are custom and not wrappers
|
||||||
|
+
|
||||||
|
+// Level 3 BLAS
|
||||||
|
+// ============
|
||||||
|
+template<typename T>
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const T& alpha,
|
||||||
|
+ const T* A, BlasInt ALDim,
|
||||||
|
+ const T* B, BlasInt BLDim,
|
||||||
|
+ const T& beta,
|
||||||
|
+ T* C, BlasInt CLDim );
|
||||||
|
+
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const float& alpha,
|
||||||
|
+ const float* A, BlasInt ALDim,
|
||||||
|
+ const float* B, BlasInt BLDim,
|
||||||
|
+ const float& beta,
|
||||||
|
+ float* C, BlasInt CLDim );
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const double& alpha,
|
||||||
|
+ const double* A, BlasInt ALDim,
|
||||||
|
+ const double* B, BlasInt BLDim,
|
||||||
|
+ const double& beta,
|
||||||
|
+ double* C, BlasInt CLDim );
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const scomplex& alpha,
|
||||||
|
+ const scomplex* A, BlasInt ALDim,
|
||||||
|
+ const scomplex* B, BlasInt BLDim,
|
||||||
|
+ const scomplex& beta,
|
||||||
|
+ scomplex* C, BlasInt CLDim );
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const dcomplex& alpha,
|
||||||
|
+ const dcomplex* A, BlasInt ALDim,
|
||||||
|
+ const dcomplex* B, BlasInt BLDim,
|
||||||
|
+ const dcomplex& beta,
|
||||||
|
+ dcomplex* C, BlasInt CLDim );
|
||||||
|
+
|
||||||
|
+} // namespace cublas
|
||||||
|
+} // namespace El
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#endif // ifndef EL_IMPORTS_BLAS_DECL_HPP
|
||||||
|
diff -Naur a/src/blas_like/level3/Gemm.cpp b/src/blas_like/level3/Gemm.cpp
|
||||||
|
--- a/src/blas_like/level3/Gemm.cpp 2017-06-08 07:30:44.307096427 -0700
|
||||||
|
+++ b/src/blas_like/level3/Gemm.cpp 2017-06-08 07:34:23.062863489 -0700
|
||||||
|
@@ -16,6 +16,20 @@
|
||||||
|
|
||||||
|
namespace El {
|
||||||
|
|
||||||
|
+char gemm_cpu_gpu_switch = 'c';
|
||||||
|
+int min_M = 0, min_N = 0, min_K = 0;
|
||||||
|
+
|
||||||
|
+void GemmUseGPU(int _min_M, int _min_N, int _min_K) {
|
||||||
|
+ gemm_cpu_gpu_switch = 'g';
|
||||||
|
+ min_M = _min_M;
|
||||||
|
+ min_N = _min_N;
|
||||||
|
+ min_K = _min_K;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void GemmUseCPU() {
|
||||||
|
+ gemm_cpu_gpu_switch = 'c';
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
template<typename T>
|
||||||
|
void Gemm
|
||||||
|
( Orientation orientA, Orientation orientB,
|
||||||
|
@@ -59,11 +73,30 @@
|
||||||
|
const Int k = ( orientA == NORMAL ? A.Width() : A.Height() );
|
||||||
|
if( k != 0 )
|
||||||
|
{
|
||||||
|
+#if defined(EL_USE_CUBLAS)
|
||||||
|
+ if (gemm_cpu_gpu_switch == 'g' &&
|
||||||
|
+ m >= min_M &&
|
||||||
|
+ n >= min_N &&
|
||||||
|
+ k >= min_K) {
|
||||||
|
+ cublas::Gemm
|
||||||
|
+ ( transA, transB, m, n, k,
|
||||||
|
+ alpha, A.LockedBuffer(), A.LDim(),
|
||||||
|
+ B.LockedBuffer(), B.LDim(),
|
||||||
|
+ beta, C.Buffer(), C.LDim() );
|
||||||
|
+ } else {
|
||||||
|
+ blas::Gemm
|
||||||
|
+ ( transA, transB, m, n, k,
|
||||||
|
+ alpha, A.LockedBuffer(), A.LDim(),
|
||||||
|
+ B.LockedBuffer(), B.LDim(),
|
||||||
|
+ beta, C.Buffer(), C.LDim() );
|
||||||
|
+ }
|
||||||
|
+#else
|
||||||
|
blas::Gemm
|
||||||
|
( transA, transB, m, n, k,
|
||||||
|
alpha, A.LockedBuffer(), A.LDim(),
|
||||||
|
B.LockedBuffer(), B.LDim(),
|
||||||
|
beta, C.Buffer(), C.LDim() );
|
||||||
|
+#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
diff -Naur a/src/core/imports/blas/Gemm.hpp b/src/core/imports/blas/Gemm.hpp
|
||||||
|
--- a/src/core/imports/blas/Gemm.hpp 2017-06-08 07:30:45.090529967 -0700
|
||||||
|
+++ b/src/core/imports/blas/Gemm.hpp 2017-06-08 07:34:46.503009958 -0700
|
||||||
|
@@ -41,6 +41,12 @@
|
||||||
|
|
||||||
|
} // extern "C"
|
||||||
|
|
||||||
|
+
|
||||||
|
+#if defined(EL_USE_CUBLAS)
|
||||||
|
+#include <cublas.h>
|
||||||
|
+#include <cub/util_allocator.cuh>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
namespace El {
|
||||||
|
namespace blas {
|
||||||
|
|
||||||
|
@@ -515,3 +521,515 @@
|
||||||
|
|
||||||
|
} // namespace blas
|
||||||
|
} // namespace El
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+#if EL_USE_CUBLAS
|
||||||
|
+
|
||||||
|
+#define USE_CUB 1
|
||||||
|
+
|
||||||
|
+namespace El {
|
||||||
|
+namespace cublas {
|
||||||
|
+
|
||||||
|
+#if USE_CUB
|
||||||
|
+cub::CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+template<typename T>
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const T& alpha,
|
||||||
|
+ const T* A, BlasInt ALDim,
|
||||||
|
+ const T* B, BlasInt BLDim,
|
||||||
|
+ const T& beta,
|
||||||
|
+ T* C, BlasInt CLDim )
|
||||||
|
+{
|
||||||
|
+ // put something here
|
||||||
|
+ printf("integer version \n");
|
||||||
|
+}
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Int& alpha,
|
||||||
|
+ const Int* A, BlasInt ALDim,
|
||||||
|
+ const Int* B, BlasInt BLDim,
|
||||||
|
+ const Int& beta,
|
||||||
|
+ Int* C, BlasInt CLDim );
|
||||||
|
+#ifdef EL_HAVE_QD
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const DoubleDouble& alpha,
|
||||||
|
+ const DoubleDouble* A, BlasInt ALDim,
|
||||||
|
+ const DoubleDouble* B, BlasInt BLDim,
|
||||||
|
+ const DoubleDouble& beta,
|
||||||
|
+ DoubleDouble* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const QuadDouble& alpha,
|
||||||
|
+ const QuadDouble* A, BlasInt ALDim,
|
||||||
|
+ const QuadDouble* B, BlasInt BLDim,
|
||||||
|
+ const QuadDouble& beta,
|
||||||
|
+ QuadDouble* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Complex<DoubleDouble>& alpha,
|
||||||
|
+ const Complex<DoubleDouble>* A, BlasInt ALDim,
|
||||||
|
+ const Complex<DoubleDouble>* B, BlasInt BLDim,
|
||||||
|
+ const Complex<DoubleDouble>& beta,
|
||||||
|
+ Complex<DoubleDouble>* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Complex<QuadDouble>& alpha,
|
||||||
|
+ const Complex<QuadDouble>* A, BlasInt ALDim,
|
||||||
|
+ const Complex<QuadDouble>* B, BlasInt BLDim,
|
||||||
|
+ const Complex<QuadDouble>& beta,
|
||||||
|
+ Complex<QuadDouble>* C, BlasInt CLDim );
|
||||||
|
+#endif
|
||||||
|
+#ifdef EL_HAVE_QUAD
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Quad& alpha,
|
||||||
|
+ const Quad* A, BlasInt ALDim,
|
||||||
|
+ const Quad* B, BlasInt BLDim,
|
||||||
|
+ const Quad& beta,
|
||||||
|
+ Quad* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Complex<Quad>& alpha,
|
||||||
|
+ const Complex<Quad>* A, BlasInt ALDim,
|
||||||
|
+ const Complex<Quad>* B, BlasInt BLDim,
|
||||||
|
+ const Complex<Quad>& beta,
|
||||||
|
+ Complex<Quad>* C, BlasInt CLDim );
|
||||||
|
+#endif
|
||||||
|
+#ifdef EL_HAVE_MPC
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const BigInt& alpha,
|
||||||
|
+ const BigInt* A, BlasInt ALDim,
|
||||||
|
+ const BigInt* B, BlasInt BLDim,
|
||||||
|
+ const BigInt& beta,
|
||||||
|
+ BigInt* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const BigFloat& alpha,
|
||||||
|
+ const BigFloat* A, BlasInt ALDim,
|
||||||
|
+ const BigFloat* B, BlasInt BLDim,
|
||||||
|
+ const BigFloat& beta,
|
||||||
|
+ BigFloat* C, BlasInt CLDim );
|
||||||
|
+template void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const Complex<BigFloat>& alpha,
|
||||||
|
+ const Complex<BigFloat>* A, BlasInt ALDim,
|
||||||
|
+ const Complex<BigFloat>* B, BlasInt BLDim,
|
||||||
|
+ const Complex<BigFloat>& beta,
|
||||||
|
+ Complex<BigFloat>* C, BlasInt CLDim );
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const float& alpha,
|
||||||
|
+ const float* A, BlasInt ALDim,
|
||||||
|
+ const float* B, BlasInt BLDim,
|
||||||
|
+ const float& beta,
|
||||||
|
+ float* C, BlasInt CLDim )
|
||||||
|
+{
|
||||||
|
+ EL_DEBUG_CSE
|
||||||
|
+ EL_DEBUG_ONLY(
|
||||||
|
+ if( std::toupper(transA) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(m,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(k,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( std::toupper(transB) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(k,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(n,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( CLDim < Max(m,1) )
|
||||||
|
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||||
|
+ )
|
||||||
|
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
|
||||||
|
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
|
||||||
|
+
|
||||||
|
+ const mpi::Comm comm;
|
||||||
|
+ const Int commRank = mpi::Rank( comm );
|
||||||
|
+ if (commRank == 0) {
|
||||||
|
+ //printf("calling cublas Sgemm: m %d n %d k %d\n", m, n, k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||||
|
+ // device memory size for A, B and C
|
||||||
|
+ BlasInt sizeA, sizeB, sizeC;
|
||||||
|
+ float *devA=NULL, *devB=NULL, *devC=NULL;
|
||||||
|
+
|
||||||
|
+ rowA = fixedTransA == 'T' ? k : m;
|
||||||
|
+ colA = fixedTransA == 'T' ? m : k;
|
||||||
|
+ rowB = fixedTransB == 'T' ? n : k;
|
||||||
|
+ colB = fixedTransB == 'T' ? k : n;
|
||||||
|
+ rowC = m;
|
||||||
|
+ colC = n;
|
||||||
|
+ sizeA = rowA * colA;
|
||||||
|
+ sizeB = rowB * colB;
|
||||||
|
+ sizeC = rowC * colC;
|
||||||
|
+
|
||||||
|
+ cublasStatus stat;
|
||||||
|
+
|
||||||
|
+#if USE_CUB
|
||||||
|
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
|
||||||
|
+ sizeof(float) * (sizeA+sizeB+sizeC) ));
|
||||||
|
+#else
|
||||||
|
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(float), (void **) &devA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ devB = devA + sizeA;
|
||||||
|
+ devC = devB + sizeB;
|
||||||
|
+
|
||||||
|
+ // copy matrix A, B and C to device
|
||||||
|
+ stat = cublasSetMatrix(rowA, colA, sizeof(float), A, ALDim, devA, rowA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||||
|
+
|
||||||
|
+ stat = cublasSetMatrix(rowB, colB, sizeof(float), B, BLDim, devB, rowB);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||||
|
+
|
||||||
|
+ if (beta != 0.0)
|
||||||
|
+ {
|
||||||
|
+ stat = cublasSetMatrix(rowC, colC, sizeof(float), C, CLDim, devC, rowC);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // cublas<t>gemm
|
||||||
|
+ cublasSgemm
|
||||||
|
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||||
|
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
|
||||||
|
+
|
||||||
|
+ // copy matrix C to host
|
||||||
|
+ stat = cublasGetMatrix(rowC, colC, sizeof(float), devC, rowC, C, CLDim);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||||
|
+
|
||||||
|
+ // free
|
||||||
|
+#if USE_CUB
|
||||||
|
+ CubDebugExit(g_allocator.DeviceFree(devA));
|
||||||
|
+#else
|
||||||
|
+ cublasFree(devA);
|
||||||
|
+#endif
|
||||||
|
+ //printf("CUBLAS float done ...\n");
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB,
|
||||||
|
+ BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const double& alpha,
|
||||||
|
+ const double* A, BlasInt ALDim,
|
||||||
|
+ const double* B, BlasInt BLDim,
|
||||||
|
+ const double& beta,
|
||||||
|
+ double* C, BlasInt CLDim )
|
||||||
|
+{
|
||||||
|
+ EL_DEBUG_CSE
|
||||||
|
+ EL_DEBUG_ONLY(
|
||||||
|
+ if( std::toupper(transA) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(m,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(k,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( std::toupper(transB) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(k,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(n,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( CLDim < Max(m,1) )
|
||||||
|
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||||
|
+ )
|
||||||
|
+ const char fixedTransA = ( std::toupper(transA) == 'C' ? 'T' : transA );
|
||||||
|
+ const char fixedTransB = ( std::toupper(transB) == 'C' ? 'T' : transB );
|
||||||
|
+
|
||||||
|
+ const mpi::Comm comm;
|
||||||
|
+ const Int commRank = mpi::Rank( comm );
|
||||||
|
+ if (commRank == 0) {
|
||||||
|
+ //printf("calling cublas Dgemm: m %d n %d k %d\n", m, n, k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||||
|
+ // device memory size for A, B and C
|
||||||
|
+ BlasInt sizeA, sizeB, sizeC;
|
||||||
|
+ double *devA=NULL, *devB=NULL, *devC=NULL;
|
||||||
|
+
|
||||||
|
+ rowA = fixedTransA == 'T' ? k : m;
|
||||||
|
+ colA = fixedTransA == 'T' ? m : k;
|
||||||
|
+ rowB = fixedTransB == 'T' ? n : k;
|
||||||
|
+ colB = fixedTransB == 'T' ? k : n;
|
||||||
|
+ rowC = m;
|
||||||
|
+ colC = n;
|
||||||
|
+ sizeA = rowA * colA;
|
||||||
|
+ sizeB = rowB * colB;
|
||||||
|
+ sizeC = rowC * colC;
|
||||||
|
+
|
||||||
|
+ cublasStatus stat;
|
||||||
|
+
|
||||||
|
+#if USE_CUB
|
||||||
|
+ CubDebugExit(g_allocator.DeviceAllocate((void**)&devA,
|
||||||
|
+ sizeof(double) * (sizeA+sizeB+sizeC) ));
|
||||||
|
+#else
|
||||||
|
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(double), (void **) &devA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ devB = devA + sizeA;
|
||||||
|
+ devC = devB + sizeB;
|
||||||
|
+
|
||||||
|
+ // copy matrix A, B and C to device
|
||||||
|
+ stat = cublasSetMatrix(rowA, colA, sizeof(double), A, ALDim, devA, rowA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||||
|
+
|
||||||
|
+ stat = cublasSetMatrix(rowB, colB, sizeof(double), B, BLDim, devB, rowB);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||||
|
+
|
||||||
|
+ if (beta != 0.0)
|
||||||
|
+ {
|
||||||
|
+ stat = cublasSetMatrix(rowC, colC, sizeof(double), C, CLDim, devC, rowC);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // cublas<t>gemm
|
||||||
|
+ cublasDgemm
|
||||||
|
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||||
|
+ alpha, devA, rowA, devB, rowB, beta, devC, rowC );
|
||||||
|
+
|
||||||
|
+ // copy matrix C to host
|
||||||
|
+ stat = cublasGetMatrix(rowC, colC, sizeof(double), devC, rowC, C, CLDim);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||||
|
+
|
||||||
|
+ // free
|
||||||
|
+#if USE_CUB
|
||||||
|
+ CubDebugExit(g_allocator.DeviceFree(devA));
|
||||||
|
+#else
|
||||||
|
+ cublasFree(devA);
|
||||||
|
+#endif
|
||||||
|
+ //printf("CUBLAS double done ...\n");
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const scomplex& alpha,
|
||||||
|
+ const scomplex* A, BlasInt ALDim,
|
||||||
|
+ const scomplex* B, BlasInt BLDim,
|
||||||
|
+ const scomplex& beta,
|
||||||
|
+ scomplex* C, BlasInt CLDim )
|
||||||
|
+{
|
||||||
|
+ EL_DEBUG_CSE
|
||||||
|
+ EL_DEBUG_ONLY(
|
||||||
|
+ if( std::toupper(transA) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(m,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(k,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( std::toupper(transB) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(k,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(n,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( CLDim < Max(m,1) )
|
||||||
|
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+ const char fixedTransA = transA;
|
||||||
|
+ const char fixedTransB = transB;
|
||||||
|
+
|
||||||
|
+ const mpi::Comm comm;
|
||||||
|
+ const Int commRank = mpi::Rank( comm );
|
||||||
|
+ if (commRank == 0) {
|
||||||
|
+ //printf("calling cublas Cgemm: m %d n %d k %d\n", m, n, k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||||
|
+ // device memory size for A, B and C
|
||||||
|
+ BlasInt sizeA, sizeB, sizeC;
|
||||||
|
+ cuComplex *devA=NULL, *devB=NULL, *devC=NULL;
|
||||||
|
+
|
||||||
|
+ rowA = fixedTransA == 'T' ? k : m;
|
||||||
|
+ colA = fixedTransA == 'T' ? m : k;
|
||||||
|
+ rowB = fixedTransB == 'T' ? n : k;
|
||||||
|
+ colB = fixedTransB == 'T' ? k : n;
|
||||||
|
+ rowC = m;
|
||||||
|
+ colC = n;
|
||||||
|
+ sizeA = rowA * colA;
|
||||||
|
+ sizeB = rowB * colB;
|
||||||
|
+ sizeC = rowC * colC;
|
||||||
|
+
|
||||||
|
+ cublasStatus stat;
|
||||||
|
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuComplex), (void **) &devA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||||
|
+
|
||||||
|
+ devB = devA + sizeA;
|
||||||
|
+ devC = devB + sizeB;
|
||||||
|
+
|
||||||
|
+ // copy matrix A, B and C to device
|
||||||
|
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuComplex), A, ALDim, devA, rowA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||||
|
+
|
||||||
|
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuComplex), B, BLDim, devB, rowB);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||||
|
+
|
||||||
|
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
|
||||||
|
+ {
|
||||||
|
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuComplex), C, CLDim, devC, rowC);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // cublas<t>gemm
|
||||||
|
+ cublasCgemm
|
||||||
|
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||||
|
+ *((cuComplex*) &alpha), devA, rowA, devB, rowB, *((cuComplex*) &beta), devC, rowC );
|
||||||
|
+
|
||||||
|
+ // copy matrix C to host
|
||||||
|
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuComplex), devC, rowC, C, CLDim);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||||
|
+
|
||||||
|
+ // free
|
||||||
|
+ cublasFree(devA);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void Gemm
|
||||||
|
+( char transA, char transB, BlasInt m, BlasInt n, BlasInt k,
|
||||||
|
+ const dcomplex& alpha,
|
||||||
|
+ const dcomplex* A, BlasInt ALDim,
|
||||||
|
+ const dcomplex* B, BlasInt BLDim,
|
||||||
|
+ const dcomplex& beta,
|
||||||
|
+ dcomplex* C, BlasInt CLDim )
|
||||||
|
+{
|
||||||
|
+ EL_DEBUG_CSE
|
||||||
|
+ EL_DEBUG_ONLY(
|
||||||
|
+ if( std::toupper(transA) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(m,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",m=",m);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( ALDim < Max(k,1) )
|
||||||
|
+ LogicError("ALDim was too small: ALDim=",ALDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( std::toupper(transB) == 'N' )
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(k,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",k=",k);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if( BLDim < Max(n,1) )
|
||||||
|
+ LogicError("BLDim was too small: BLDim=",BLDim,",n=",n);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if( CLDim < Max(m,1) )
|
||||||
|
+ LogicError("CLDim was too small: CLDim=",CLDim,",m=",m);
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+ const char fixedTransA = transA;
|
||||||
|
+ const char fixedTransB = transB;
|
||||||
|
+
|
||||||
|
+ const mpi::Comm comm;
|
||||||
|
+ const Int commRank = mpi::Rank( comm );
|
||||||
|
+ if (commRank == 0) {
|
||||||
|
+ //printf("calling cublas Zgemm: m %d n %d k %d\n", m, n, k);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ BlasInt rowA, colA, rowB, colB, rowC, colC;
|
||||||
|
+ // device memory size for A, B and C
|
||||||
|
+ BlasInt sizeA, sizeB, sizeC;
|
||||||
|
+ cuDoubleComplex *devA=NULL, *devB=NULL, *devC=NULL;
|
||||||
|
+
|
||||||
|
+ rowA = fixedTransA == 'T' ? k : m;
|
||||||
|
+ colA = fixedTransA == 'T' ? m : k;
|
||||||
|
+ rowB = fixedTransB == 'T' ? n : k;
|
||||||
|
+ colB = fixedTransB == 'T' ? k : n;
|
||||||
|
+ rowC = m;
|
||||||
|
+ colC = n;
|
||||||
|
+ sizeA = rowA * colA;
|
||||||
|
+ sizeB = rowB * colB;
|
||||||
|
+ sizeC = rowC * colC;
|
||||||
|
+
|
||||||
|
+ cublasStatus stat;
|
||||||
|
+ stat = cublasAlloc(sizeA+sizeB+sizeC, sizeof(cuDoubleComplex), (void **) &devA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("Alloc A,B,C error\n"); }
|
||||||
|
+
|
||||||
|
+ devB = devA + sizeA;
|
||||||
|
+ devC = devB + sizeB;
|
||||||
|
+
|
||||||
|
+ // copy matrix A, B and C to device
|
||||||
|
+ stat = cublasSetMatrix(rowA, colA, sizeof(cuDoubleComplex), A, ALDim, devA, rowA);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix A error\n"); }
|
||||||
|
+
|
||||||
|
+ stat = cublasSetMatrix(rowB, colB, sizeof(cuDoubleComplex), B, BLDim, devB, rowB);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix B error\n"); }
|
||||||
|
+
|
||||||
|
+ if (beta.real() != 0.0 || beta.imag() != 0.0)
|
||||||
|
+ {
|
||||||
|
+ stat = cublasSetMatrix(rowC, colC, sizeof(cuDoubleComplex), C, CLDim, devC, rowC);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("SetMatrix C error\n"); }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ cublasZgemm
|
||||||
|
+ ( fixedTransA, fixedTransB, m, n, k,
|
||||||
|
+ *((cuDoubleComplex*) &alpha), devA, rowA, devB, rowB, *((cuDoubleComplex*) &beta),
|
||||||
|
+ devC, rowC );
|
||||||
|
+
|
||||||
|
+ // copy matrix C to host
|
||||||
|
+ stat = cublasGetMatrix(rowC, colC, sizeof(cuDoubleComplex), devC, rowC, C, CLDim);
|
||||||
|
+ if (stat != CUBLAS_STATUS_SUCCESS) { RuntimeError("GetMatrix C error\n"); }
|
||||||
|
+
|
||||||
|
+ // free
|
||||||
|
+ cublasFree(devA);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+} // namespace cublas
|
||||||
|
+} // namespace El
|
||||||
|
+
|
||||||
|
+#endif
|
||||||
|
+
|
|
@ -33,6 +33,7 @@ class Elemental(CMakePackage):
|
||||||
homepage = "http://libelemental.org"
|
homepage = "http://libelemental.org"
|
||||||
url = "https://github.com/elemental/Elemental/archive/v0.87.6.tar.gz"
|
url = "https://github.com/elemental/Elemental/archive/v0.87.6.tar.gz"
|
||||||
|
|
||||||
|
version('master', git='https://github.com/elemental/Elemental.git', branch='master')
|
||||||
version('0.87.7', '6c1e7442021c59a36049e37ea69b8075')
|
version('0.87.7', '6c1e7442021c59a36049e37ea69b8075')
|
||||||
version('0.87.6', '9fd29783d45b0a0e27c0df85f548abe9')
|
version('0.87.6', '9fd29783d45b0a0e27c0df85f548abe9')
|
||||||
|
|
||||||
|
@ -52,6 +53,8 @@ class Elemental(CMakePackage):
|
||||||
description='Enable quad precision')
|
description='Enable quad precision')
|
||||||
variant('int64', default=False,
|
variant('int64', default=False,
|
||||||
description='Use 64bit integers')
|
description='Use 64bit integers')
|
||||||
|
variant('cublas', default=False,
|
||||||
|
description='Enable cuBLAS for local BLAS operations')
|
||||||
# When this variant is set remove the normal dependencies since
|
# When this variant is set remove the normal dependencies since
|
||||||
# Elemental has to build BLAS and ScaLAPACK internally
|
# Elemental has to build BLAS and ScaLAPACK internally
|
||||||
variant('int64_blas', default=False,
|
variant('int64_blas', default=False,
|
||||||
|
@ -62,15 +65,21 @@ class Elemental(CMakePackage):
|
||||||
variant('build_type', default='Release',
|
variant('build_type', default='Release',
|
||||||
description='The build type to build',
|
description='The build type to build',
|
||||||
values=('Debug', 'Release'))
|
values=('Debug', 'Release'))
|
||||||
|
variant('blas', default='openblas', values=('openblas', 'mkl'),
|
||||||
|
description='Enable the use of OpenBlas/MKL')
|
||||||
|
|
||||||
# Note that this forces us to use OpenBLAS until #1712 is fixed
|
# Note that #1712 forces us to enumerate the different blas variants
|
||||||
depends_on('blas', when='~openmp_blas ~int64_blas')
|
depends_on('blas', when='~openmp_blas ~int64_blas')
|
||||||
# Hack to forward variant to openblas package
|
# Hack to forward variant to openblas package
|
||||||
# Allow Elemental to build internally when using 8-byte ints
|
# Allow Elemental to build internally when using 8-byte ints
|
||||||
depends_on('openblas +openmp', when='+openmp_blas ~int64_blas')
|
depends_on('openblas +openmp', when='blas=openblas +openmp_blas ~int64_blas')
|
||||||
|
|
||||||
|
depends_on('intel-mkl', when="blas=mkl ~openmp_blas ~int64_blas")
|
||||||
|
depends_on('intel-mkl +openmp', when='blas=mkl +openmp_blas ~int64_blas')
|
||||||
|
depends_on('intel-mkl@2017.1 +openmp +ilp64', when='blas=mkl +openmp_blas +int64_blas')
|
||||||
|
|
||||||
# Note that this forces us to use OpenBLAS until #1712 is fixed
|
# Note that this forces us to use OpenBLAS until #1712 is fixed
|
||||||
depends_on('lapack', when='~openmp_blas')
|
depends_on('lapack', when='blas=openblas ~openmp_blas')
|
||||||
depends_on('metis')
|
depends_on('metis')
|
||||||
depends_on('metis +int64', when='+int64')
|
depends_on('metis +int64', when='+int64')
|
||||||
depends_on('mpi')
|
depends_on('mpi')
|
||||||
|
@ -79,6 +88,8 @@ class Elemental(CMakePackage):
|
||||||
extends('python', when='+python')
|
extends('python', when='+python')
|
||||||
depends_on('python@:2.8', when='+python')
|
depends_on('python@:2.8', when='+python')
|
||||||
|
|
||||||
|
patch('elemental_cublas.patch', when='+cublas')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def libs(self):
|
def libs(self):
|
||||||
shared = True if '+shared' in self.spec else False
|
shared = True if '+shared' in self.spec else False
|
||||||
|
@ -126,8 +137,7 @@ def cmake_args(self):
|
||||||
math_libs = spec['scalapack'].libs + math_libs
|
math_libs = spec['scalapack'].libs + math_libs
|
||||||
|
|
||||||
args.extend([
|
args.extend([
|
||||||
'-DMATH_LIBS:STRING={0}'.format(math_libs.search_flags),
|
'-DMATH_LIBS:STRING={0}'.format(math_libs.ld_flags)])
|
||||||
'-DMATH_LIBS:STRING={0}'.format(math_libs.link_flags)])
|
|
||||||
|
|
||||||
if '+python' in spec:
|
if '+python' in spec:
|
||||||
args.extend([
|
args.extend([
|
||||||
|
|
|
@ -39,37 +39,49 @@ class Lbann(CMakePackage):
|
||||||
variant('gpu', default=False, description='Builds with support for GPUs via CUDA and cuDNN')
|
variant('gpu', default=False, description='Builds with support for GPUs via CUDA and cuDNN')
|
||||||
variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV')
|
variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV')
|
||||||
variant('seq_init', default=False, description='Force serial initialization of weight matrices.')
|
variant('seq_init', default=False, description='Force serial initialization of weight matrices.')
|
||||||
|
variant('dtype', default=4, description='Size (bits) of floating point representation for weights')
|
||||||
|
variant('build_type', default='Release',
|
||||||
|
description='The build type to build',
|
||||||
|
values=('Debug', 'Release'))
|
||||||
|
|
||||||
depends_on('elemental +openmp_blas +scalapack +shared +int64')
|
depends_on('elemental +openmp_blas +scalapack +shared +int64')
|
||||||
|
depends_on('elemental +openmp_blas +scalapack +shared +int64 build_type=Debug',
|
||||||
|
when=('build_type=Debug'))
|
||||||
depends_on('cuda', when='+gpu')
|
depends_on('cuda', when='+gpu')
|
||||||
depends_on('mpi')
|
depends_on('mpi')
|
||||||
depends_on('opencv@3.2.0', when='+opencv')
|
depends_on('opencv@3.2.0: +openmp +core +highgui +imgproc +jpeg +png +tiff +zlib', when='+opencv')
|
||||||
depends_on('protobuf@3.0.2:')
|
depends_on('protobuf@3.0.2:')
|
||||||
|
depends_on('cnpy')
|
||||||
|
|
||||||
def cmake_args(self):
|
def cmake_args(self):
|
||||||
spec = self.spec
|
spec = self.spec
|
||||||
# Environment variables
|
# Environment variables
|
||||||
CPPFLAGS = []
|
CPPFLAGS = []
|
||||||
CPPFLAGS.append('-DLBANN_SET_EL_RNG')
|
CPPFLAGS.append('-DLBANN_SET_EL_RNG')
|
||||||
if '~seq_init' in spec:
|
|
||||||
CPPFLAGS.append('-DLBANN_PARALLEL_RANDOM_MATRICES')
|
CPPFLAGS.append('-DLBANN_DATATYPE={0}'.format(
|
||||||
|
int(spec.variants['dtype'].value)))
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
'-DCMAKE_INSTALL_MESSAGE=LAZY',
|
'-DCMAKE_INSTALL_MESSAGE=LAZY',
|
||||||
'-DCMAKE_CXX_FLAGS=%s' % ' '.join(CPPFLAGS),
|
'-DCMAKE_CXX_FLAGS=%s' % ' '.join(CPPFLAGS),
|
||||||
'-DWITH_CUDA:BOOL=%s' % ('+gpu' in spec),
|
'-DWITH_CUDA:BOOL=%s' % ('+gpu' in spec),
|
||||||
'-DWITH_CUDNN:BOOL=%s' % ('+gpu' in spec),
|
'-DWITH_CUDNN:BOOL=%s' % ('+gpu' in spec),
|
||||||
|
'-DELEMENTAL_USE_CUBLAS:BOOL=%s' % (
|
||||||
|
'+cublas' in spec['elemental']),
|
||||||
'-DWITH_TBINF=OFF',
|
'-DWITH_TBINF=OFF',
|
||||||
'-DWITH_VTUNE=OFF',
|
'-DWITH_VTUNE=OFF',
|
||||||
'-DElemental_DIR={0}'.format(self.spec['elemental'].prefix),
|
'-DElemental_DIR={0}'.format(spec['elemental'].prefix),
|
||||||
|
'-DCNPY_DIR={0}'.format(spec['cnpy'].prefix),
|
||||||
'-DELEMENTAL_MATH_LIBS={0}'.format(
|
'-DELEMENTAL_MATH_LIBS={0}'.format(
|
||||||
self.spec['elemental'].libs),
|
spec['elemental'].libs),
|
||||||
|
'-DSEQ_INIT:BOOL=%s' % ('+seq_init' in spec),
|
||||||
'-DVERBOSE=0',
|
'-DVERBOSE=0',
|
||||||
'-DLBANN_HOME=.',
|
'-DLBANN_HOME=.',
|
||||||
'-DLBANN_VER=spack']
|
'-DLBANN_VER=spack']
|
||||||
|
|
||||||
if '+opencv' in self.spec:
|
if '+opencv' in spec:
|
||||||
args.extend(['-DOpenCV_DIR:STRING={0}'.format(
|
args.extend(['-DOpenCV_DIR:STRING={0}'.format(
|
||||||
self.spec['opencv'].prefix)])
|
spec['opencv'].prefix)])
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
|
@ -35,6 +35,9 @@ class Libtiff(AutotoolsPackage):
|
||||||
version('4.0.6', 'd1d2e940dea0b5ad435f21f03d96dd72')
|
version('4.0.6', 'd1d2e940dea0b5ad435f21f03d96dd72')
|
||||||
version('4.0.3', '051c1068e6a0627f461948c365290410')
|
version('4.0.3', '051c1068e6a0627f461948c365290410')
|
||||||
|
|
||||||
depends_on('jpeg')
|
variant('turbo', default=False, description='use libjpeg-turbo')
|
||||||
|
|
||||||
|
depends_on('jpeg', when='-turbo')
|
||||||
|
depends_on('libjpeg-turbo', when='+turbo')
|
||||||
depends_on('zlib')
|
depends_on('zlib')
|
||||||
depends_on('xz')
|
depends_on('xz')
|
||||||
|
|
|
@ -42,8 +42,15 @@ class Opencv(CMakePackage):
|
||||||
homepage = 'http://opencv.org/'
|
homepage = 'http://opencv.org/'
|
||||||
url = 'https://github.com/Itseez/opencv/archive/3.1.0.tar.gz'
|
url = 'https://github.com/Itseez/opencv/archive/3.1.0.tar.gz'
|
||||||
|
|
||||||
version('3.2.0', 'a43b65488124ba33dde195fea9041b70')
|
version('master', git="https://github.com/opencv/opencv.git", branch="master")
|
||||||
version('3.1.0', '70e1dd07f0aa06606f1bc0e3fa15abd3')
|
version('3.2.0', 'a43b65488124ba33dde195fea9041b70')
|
||||||
|
version('3.1.0', '70e1dd07f0aa06606f1bc0e3fa15abd3')
|
||||||
|
version('2.4.13.2', 'fe52791ce523681a67036def4c25261b')
|
||||||
|
version('2.4.13.1', 'f6d354500d5013e60dc0fc44b07a63d1')
|
||||||
|
version('2.4.13', '8feb45a71adad89b8017a777477c3eff')
|
||||||
|
version('2.4.12.3', '2496a4a4caf8fecfbfc294fbe6a814b0')
|
||||||
|
version('2.4.12.2', 'bc0c60c2ea1cf4078deef99569912fc7')
|
||||||
|
version('2.4.12.1', '7192f51434710904b5e3594872b897c3')
|
||||||
|
|
||||||
variant('shared', default=True,
|
variant('shared', default=True,
|
||||||
description='Enables the build of shared libraries')
|
description='Enables the build of shared libraries')
|
||||||
|
@ -59,13 +66,21 @@ class Opencv(CMakePackage):
|
||||||
description='Enables the build of Python extensions')
|
description='Enables the build of Python extensions')
|
||||||
variant('java', default=False,
|
variant('java', default=False,
|
||||||
description='Activates support for Java')
|
description='Activates support for Java')
|
||||||
|
variant('openmp', default=False, description='Activates support for OpenMP threads')
|
||||||
|
variant('core', default=False, description='Include opencv_core module into the OpenCV build')
|
||||||
|
variant('highgui', default=False, description='Include opencv_highgui module into the OpenCV build')
|
||||||
|
variant('imgproc', default=False, description='Include opencv_imgproc module into the OpenCV build')
|
||||||
|
variant('jpeg', default=False, description='Include JPEG support')
|
||||||
|
variant('png', default=False, description='Include PNG support')
|
||||||
|
variant('tiff', default=False, description='Include TIFF support')
|
||||||
|
variant('zlib', default=False, description='Build zlib from source')
|
||||||
|
|
||||||
depends_on('eigen', when='+eigen', type='build')
|
depends_on('eigen', when='+eigen', type='build')
|
||||||
|
|
||||||
depends_on('zlib')
|
depends_on('zlib', when='+zlib')
|
||||||
depends_on('libpng')
|
depends_on('libpng', when='+png')
|
||||||
depends_on('libjpeg-turbo')
|
depends_on('libjpeg-turbo', when='+jpeg')
|
||||||
depends_on('libtiff')
|
depends_on('libtiff+turbo', when='+tiff')
|
||||||
|
|
||||||
depends_on('jasper', when='+jasper')
|
depends_on('jasper', when='+jasper')
|
||||||
depends_on('cuda', when='+cuda')
|
depends_on('cuda', when='+cuda')
|
||||||
|
@ -94,6 +109,22 @@ def cmake_args(self):
|
||||||
'ON' if '+vtk' in spec else 'OFF')),
|
'ON' if '+vtk' in spec else 'OFF')),
|
||||||
'-DBUILD_opencv_java:BOOL={0}'.format((
|
'-DBUILD_opencv_java:BOOL={0}'.format((
|
||||||
'ON' if '+java' in spec else 'OFF')),
|
'ON' if '+java' in spec else 'OFF')),
|
||||||
|
'-DBUILD_opencv_core:BOOL={0}'.format((
|
||||||
|
'ON' if '+core' in spec else 'OFF')),
|
||||||
|
'-DBUILD_opencv_highgui:BOOL={0}'.format((
|
||||||
|
'ON' if '+highgui' in spec else 'OFF')),
|
||||||
|
'-DBUILD_opencv_imgproc:BOOL={0}'.format((
|
||||||
|
'ON' if '+imgproc' in spec else 'OFF')),
|
||||||
|
'-DWITH_JPEG:BOOL={0}'.format((
|
||||||
|
'ON' if '+jpeg' in spec else 'OFF')),
|
||||||
|
'-DWITH_PNG:BOOL={0}'.format((
|
||||||
|
'ON' if '+png' in spec else 'OFF')),
|
||||||
|
'-DWITH_TIFF:BOOL={0}'.format((
|
||||||
|
'ON' if '+tiff' in spec else 'OFF')),
|
||||||
|
'-DWITH_ZLIB:BOOL={0}'.format((
|
||||||
|
'ON' if '+zlib' in spec else 'OFF')),
|
||||||
|
'-DWITH_OPENMP:BOOL={0}'.format((
|
||||||
|
'ON' if '+openmp' in spec else 'OFF')),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Media I/O
|
# Media I/O
|
||||||
|
@ -115,31 +146,35 @@ def cmake_args(self):
|
||||||
'-DPNG_INCLUDE_DIR:PATH={0}'.format(libpng.prefix.include)
|
'-DPNG_INCLUDE_DIR:PATH={0}'.format(libpng.prefix.include)
|
||||||
])
|
])
|
||||||
|
|
||||||
libjpeg = spec['libjpeg-turbo']
|
if '+jpeg' in spec:
|
||||||
args.extend([
|
libjpeg = spec['libjpeg-turbo']
|
||||||
'-DJPEG_LIBRARY:FILEPATH={0}'.format(
|
cmake_options.extend([
|
||||||
join_path(libjpeg.prefix.lib,
|
'-DBUILD_JPEG:BOOL=OFF',
|
||||||
'libjpeg.{0}'.format(dso_suffix))),
|
'-DJPEG_LIBRARY:FILEPATH={0}'.format(
|
||||||
'-DJPEG_INCLUDE_DIR:PATH={0}'.format(libjpeg.prefix.include)
|
join_path(libjpeg.prefix.lib,
|
||||||
])
|
'libjpeg.{0}'.format(dso_suffix))),
|
||||||
|
'-DJPEG_INCLUDE_DIR:PATH={0}'.format(libjpeg.prefix.include)
|
||||||
|
])
|
||||||
|
|
||||||
libtiff = spec['libtiff']
|
if '+tiff' in spec:
|
||||||
args.extend([
|
libtiff = spec['libtiff']
|
||||||
'-DTIFF_LIBRARY_{0}:FILEPATH={1}'.format((
|
cmake_options.extend([
|
||||||
'DEBUG' if '+debug' in spec else 'RELEASE'),
|
'-DTIFF_LIBRARY_{0}:FILEPATH={1}'.format((
|
||||||
join_path(libtiff.prefix.lib,
|
'DEBUG' if '+debug' in spec else 'RELEASE'),
|
||||||
'libtiff.{0}'.format(dso_suffix))),
|
join_path(libtiff.prefix.lib,
|
||||||
'-DTIFF_INCLUDE_DIR:PATH={0}'.format(libtiff.prefix.include)
|
'libtiff.{0}'.format(dso_suffix))),
|
||||||
])
|
'-DTIFF_INCLUDE_DIR:PATH={0}'.format(libtiff.prefix.include)
|
||||||
|
])
|
||||||
|
|
||||||
jasper = spec['jasper']
|
if '+jasper' in spec:
|
||||||
args.extend([
|
jasper = spec['jasper']
|
||||||
'-DJASPER_LIBRARY_{0}:FILEPATH={1}'.format((
|
cmake_options.extend([
|
||||||
'DEBUG' if '+debug' in spec else 'RELEASE'),
|
'-DJASPER_LIBRARY_{0}:FILEPATH={1}'.format((
|
||||||
join_path(jasper.prefix.lib,
|
'DEBUG' if '+debug' in spec else 'RELEASE'),
|
||||||
'libjasper.{0}'.format(dso_suffix))),
|
join_path(jasper.prefix.lib,
|
||||||
'-DJASPER_INCLUDE_DIR:PATH={0}'.format(jasper.prefix.include)
|
'libjasper.{0}'.format(dso_suffix))),
|
||||||
])
|
'-DJASPER_INCLUDE_DIR:PATH={0}'.format(jasper.prefix.include)
|
||||||
|
])
|
||||||
|
|
||||||
# GUI
|
# GUI
|
||||||
if '+gtk' not in spec:
|
if '+gtk' not in spec:
|
||||||
|
|
Loading…
Reference in a new issue