Hand-merged CUDA support

This commit is contained in:
Hrvoje Jasak 2013-10-11 14:12:56 +01:00
parent 7e8f06837c
commit db4d4cefe7
23 changed files with 2313 additions and 5 deletions

View file

@ -2,7 +2,7 @@
# ========= |
# \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
# \\ / O peration |
# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd.
# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak
# \\/ M anipulation |
#------------------------------------------------------------------------------
# License
@ -62,6 +62,14 @@
# So build your ThirdParty directory accordingly.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# System installed CUDA
#setenv CUDA_SYSTEM 1
#setenv CUDA_DIR path_to_system_installed_cuda
#setenv CUDA_BIN_DIR $CUDA_DIR/bin
#setenv CUDA_LIB_DIR $CUDA_DIR/lib
#setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
#setenv CUDA_ARCH sm_20
# System installed Mesquite
#setenv MESQUITE_SYSTEM 1
#setenv MESQUITE_DIR path_to_system_installed_mesquite

View file

@ -2,7 +2,7 @@
# ========= |
# \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
# \\ / O peration |
# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd.
# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak
# \\/ M anipulation |
#------------------------------------------------------------------------------
# License
@ -64,6 +64,14 @@ compilerInstall=System
# So build your ThirdParty directory accordingly.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# System installed CUDA
#export CUDA_SYSTEM=1
#export CUDA_DIR=path_to_system_installed_cuda
#export CUDA_BIN_DIR=$CUDA_DIR/bin
#export CUDA_LIB_DIR=$CUDA_DIR/lib
#export CUDA_INCLUDE_DIR=$CUDA_DIR/include
#export CUDA_ARCH=sm_20
# System installed Mesquite
#export MESQUITE_SYSTEM=1
#export MESQUITE_DIR=path_to_system_installed_mesquite

View file

@ -211,11 +211,11 @@ case SYSTEMOPENMPI:
endif
else
# Here, we assume your environment is already set for running
# and developping with openmpi.
# and developing with openmpi.
#
# Initialize OPENMPI_BIN_DIR using the path to mpicc
set mpicc_cmd=`which mpicc`
setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd`
setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd`
unset mpicc_cmd
endif
@ -382,6 +382,23 @@ else
setenv MPI_BUFFER_SIZE $minBufferSize
endif
# CUDA library
# ~~~~~~~~~~~~
if ( $?CUDA_SYSTEM == 0 && -e /usr/local/cuda-5.5/bin/nvcc ) then
setenv CUDA_DIR /usr/local/cuda-5.5
setenv CUDA_BIN_DIR $CUDA_DIR/bin
setenv CUDA_LIB_DIR $CUDA_DIR/lib64
setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
setenv CUDA_ARCH sm_20
endif
if ( $?CUDA_BIN_DIR ) then
_foamAddPath $CUDA_BIN_DIR
endif
if ( $?CUDA_LIB_DIR ) then
_foamAddLib $CUDA_LIB_DIR
endif
# CGAL library if available
# ~~~~~~~~~~~~~~~~~~~~~~~~~

View file

@ -432,6 +432,18 @@ fi
export MPI_BUFFER_SIZE
# CUDA if available
# ~~~~~~~~~~~~~~~~~
[ -z "$CUDA_SYSTEM" ] && [ -e /usr/local/cuda-5.5/bin/nvcc ] && {
export CUDA_DIR=/usr/local/cuda-5.5
export CUDA_BIN_DIR=$CUDA_DIR/bin
export CUDA_LIB_DIR=$CUDA_DIR/lib64
export CUDA_INCLUDE_DIR=$CUDA_DIR/include
}
[ -d "$CUDA_LIB_DIR" ] && _foamAddPath $CUDA_BIN_DIR && _foamAddLib $CUDA_LIB_DIR
# CGAL library if available
# ~~~~~~~~~~~~~~~~~~~~~~~~~
[ -d "$CGAL_LIB_DIR" ] && _foamAddLib $CGAL_LIB_DIR
@ -472,7 +484,7 @@ export MPI_BUFFER_SIZE
# Load Metis library
# ~~~~~~~~~~~~~~~~~~
[ -z "$METIS_SYSTEM" ] && [ -d $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
[ -z "$METIS_SYSTEM" ] && [ -e $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
_foamSource $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS/etc/metis-5.0pre2.sh
}
[ "$FOAM_VERBOSE" -a "$PS1" ] && echo " METIS_DIR is initialized to: $METIS_DIR"

View file

@ -68,4 +68,7 @@ wmake libso engine
wmake libso equationReader
wmake libso multiSolver
( cd cudaSolvers ; ./Allwmake )
# ----------------------------------------------------------------- end-of-file

13
src/cudaSolvers/Allwmake Executable file
View file

@ -0,0 +1,13 @@
#!/bin/sh
cd ${0%/*} || exit 1 # run from this directory
if [ -f $CUDA_BIN_DIR/nvcc ]
then
echo "Found nvcc -- enabling CUDA support."
wmake libso
else
echo "No nvcc - CUDA not available."
fi
# ----------------------------------------------------------------- end-of-file

View file

@ -0,0 +1,12 @@
cudaSolver/cudaSolver.C
cudaCG/cgDiag.cu
cudaCG/cgAmg.cu
cudaCG/cgAinv.cu
cudaCG/cudaCG.C
cudaBiCGStab/bicgDiag.cu
cudaBiCGStab/bicgAinv.cu
cudaBiCGStab/cudaBiCGStab.C
LIB = $(FOAM_LIBBIN)/libcudaSolvers

View file

@ -0,0 +1,5 @@
EXE_INC = \
-I$(CUDA_INCLUDE_DIR)
LIB_LIBS = \
-L$(CUDA_LIB_DIR) -lcudart

View file

@ -0,0 +1,246 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA™) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Modifications by Dominik Christ, Wikki Ltd.
Description
Sparse approximate inverse preconditioned conjugate gradient stabilized
solver for asymmetric Matrices using a CUSP CUDA™ based solver.
\**********************************************************************/
#include "cudaTypes.H"
// CUSP Includes
#include <cusp/detail/config.h>
#include <cusp/verify.h>
#include <cusp/precond/ainv.h>
#include <cusp/coo_matrix.h>
#include <cusp/blas.h>
#include <cusp/multiply.h>
extern "C" void bicgAinv
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf,
ValueType drop_tolerance,
int nonzero_per_row,
bool lin_dropping,
int lin_param
)
{
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
cusp::array1d<ValueType, MemorySpace> X(ces->X);
cusp::array1d<ValueType, MemorySpace> B(ces->B);
// Fill in the rest of the diag (rows and col)
// Determine row indices of diagonal values and fill A COO matrix
thrust::sequence
(
A.row_indices.begin(),
A.row_indices.begin() + ces->nCells
);
// Determine column indices of diagonal values and fill A COO matrix
thrust::sequence
(
A.column_indices.begin(),
A.column_indices.begin() + ces->nCells
);
// Sorted coo by row and column. speeds code up a little bit more
A.sort_by_row_and_column();
// Set storage
// #include "cufflink/setGPUStorage.H"
if (solverPerf->debugCusp)
{
std::cout << " Using Ainv preconditioner\n";
}
cusp::precond::bridson_ainv<ValueType, MemorySpace> M
(
A,
drop_tolerance,
nonzero_per_row,
lin_dropping,
lin_param
);
// Start Krylov solver
assert(A.num_rows == A.num_cols); // Sanity check
const size_t N = A.num_rows;
// Allocate workspace
cusp::array1d<ValueType,MemorySpace> y(N);
cusp::array1d<ValueType,MemorySpace> p(N);
cusp::array1d<ValueType,MemorySpace> r(N);
cusp::array1d<ValueType,MemorySpace> r_star(N);
cusp::array1d<ValueType,MemorySpace> s(N);
cusp::array1d<ValueType,MemorySpace> Mp(N);
cusp::array1d<ValueType,MemorySpace> AMp(N);
cusp::array1d<ValueType,MemorySpace> Ms(N);
cusp::array1d<ValueType,MemorySpace> AMs(N);
// y <- Ax
cusp::multiply(A, X, y);
// Define the normalization factor
ValueType normFactor = 1.0;
# include "buildNormFactor.H"
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// p <- r
cusp::blas::copy(r, p);
// r_star <- r
cusp::blas::copy(r, r_star);
ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
ValueType normR = cusp::blas::nrm2(r)/normFactor;
ValueType normR0 = normR; // Initial residual
solverPerf->iRes = normR0;
int count = 0;
while
(
normR > (solverPerf->tol)
&& count < (solverPerf->maxIter)
&& normR/normR0 >= (solverPerf->relTol)
|| count < solverPerf->minIter
)
{
// Mp = M*p
cusp::multiply(M, p, Mp);
// AMp = A*Mp
cusp::multiply(A, Mp, AMp);
// alpha = (r_j, r_star) / (A*M*p, r_star)
ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
// s_j = r_j - alpha * AMp
cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
ValueType normS = cusp::blas::nrm2(s)/normFactor;
if
(
!(
normS > (solverPerf->tol)
&& normS/normR0 >= (solverPerf->relTol)
)
)
{
// is this right?
// x += alpha*M*p_j
cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
// y <- Ax
cusp::multiply(A, X, y);
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// not sure we should be measuring r here...need to look again.
normR = cusp::blas::nrm2(r)/normFactor;
count++;
break;
}
// Ms = M*s_j
cusp::multiply(M, s, Ms);
// AMs = A*Ms
cusp::multiply(A, Ms, AMs);
// omega = (AMs, s) / (AMs, AMs)
ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
// x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
// r_{j+1} = s_j - omega*A*M*s
cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
// beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
r_r_star_old = r_r_star_new;
// p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
// not dure we should be measuring r here...need to look again.
normR = cusp::blas::nrm2(r)/normFactor;
count++;
}
// End Krylov Solver
// Final residual
solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
solverPerf->nIterations = count;
// converged?
if
(
solverPerf->fRes<=solverPerf->tol
|| solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
)
{
solverPerf->converged = true;
}
else
{
solverPerf->converged = false;
}
// Pass the solution vector back
ces->X = X;
}

View file

@ -0,0 +1,235 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA™) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Modifications by Dominik Christ, Wikki Ltd.
Description
Sparse approximate inverse preconditioned conjugate gradient stabilized
solver for asymmetric Matrices using a CUSP CUDA™ based solver.
\**********************************************************************/
#include "cudaTypes.H"
// CUSP Includes
#include <cusp/detail/config.h>
#include <cusp/verify.h>
#include <cusp/precond/diagonal.h>
#include <cusp/coo_matrix.h>
#include <cusp/blas.h>
#include <cusp/multiply.h>
extern "C" void bicgDiag
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf
)
{
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
cusp::array1d<ValueType, MemorySpace> X(ces->X);
cusp::array1d<ValueType, MemorySpace> B(ces->B);
// Fill in the rest of the diag (rows and col)
// Determine row indices of diagonal values and fill A COO matrix
thrust::sequence
(
A.row_indices.begin(),
A.row_indices.begin() + ces->nCells
);
// Determine column indices of diagonal values and fill A COO matrix
thrust::sequence
(
A.column_indices.begin(),
A.column_indices.begin() + ces->nCells
);
// Sorted coo by row and column. speeds code up a little bit more
A.sort_by_row_and_column();
// Set storage
//# include "setGPUStorage.H"
if (solverPerf->debugCusp)
{
std::cout << " Using Ainv preconditioner\n";
}
cusp::precond::diagonal<ValueType, MemorySpace> M(A);
// Start Krylov solver
assert(A.num_rows == A.num_cols); // Sanity check
const size_t N = A.num_rows;
// Allocate workspace
cusp::array1d<ValueType,MemorySpace> y(N);
cusp::array1d<ValueType,MemorySpace> p(N);
cusp::array1d<ValueType,MemorySpace> r(N);
cusp::array1d<ValueType,MemorySpace> r_star(N);
cusp::array1d<ValueType,MemorySpace> s(N);
cusp::array1d<ValueType,MemorySpace> Mp(N);
cusp::array1d<ValueType,MemorySpace> AMp(N);
cusp::array1d<ValueType,MemorySpace> Ms(N);
cusp::array1d<ValueType,MemorySpace> AMs(N);
// y <- Ax
cusp::multiply(A, X, y);
// Define the normalization factor
ValueType normFactor = 1.0;
# include "buildNormFactor.H"
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// p <- r
cusp::blas::copy(r, p);
// r_star <- r
cusp::blas::copy(r, r_star);
ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
ValueType normR = cusp::blas::nrm2(r)/normFactor;
ValueType normR0 = normR; // Initial residual
solverPerf->iRes = normR0;
int count = 0;
while
(
normR > (solverPerf->tol)
&& count < (solverPerf->maxIter)
&& normR/normR0 >= (solverPerf->relTol)
|| count < solverPerf->minIter
)
{
// Mp = M*p
cusp::multiply(M, p, Mp);
// AMp = A*Mp
cusp::multiply(A, Mp, AMp);
// alpha = (r_j, r_star) / (A*M*p, r_star)
ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
// s_j = r_j - alpha * AMp
cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
ValueType normS = cusp::blas::nrm2(s)/normFactor;
if
(
!(
normS > (solverPerf->tol)
&& normS/normR0 >= (solverPerf->relTol)
)
)
{
// is this right?
// x += alpha*M*p_j
cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
// y <- Ax
cusp::multiply(A, X, y);
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// not sure we should be measuring r here...need to look again.
normR = cusp::blas::nrm2(r)/normFactor;
count++;
break;
}
// Ms = M*s_j
cusp::multiply(M, s, Ms);
// AMs = A*Ms
cusp::multiply(A, Ms, AMs);
// omega = (AMs, s) / (AMs, AMs)
ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
// x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
// r_{j+1} = s_j - omega*A*M*s
cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
// beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
r_r_star_old = r_r_star_new;
// p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
// not dure we should be measuring r here...need to look again.
normR = cusp::blas::nrm2(r)/normFactor;
count++;
}
// End Krylov Solver
// Final residual
solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
solverPerf->nIterations = count;
// converged?
if
(
solverPerf->fRes<=solverPerf->tol
|| solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
)
{
solverPerf->converged = true;
}
else
{
solverPerf->converged = false;
}
// Pass the solution vector back
ces->X = X;
}

View file

@ -0,0 +1,168 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Description
Preconditioned Conjugate Gradient solver with run-time selectable
preconditioning
Author
Dominik Christ, Wikki Ltd.
Based on Cufflink library by Daniel P. Combest
\*---------------------------------------------------------------------------*/
#include "cudaBiCGStab.H"
// Preconditioner and solver are hardwired due to
// code structure of Cusp library. Below are
// external functions with solver/preconditioner combinations
// BiCG with diagonal preconditioning
extern "C" void bicgDiag
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverParam
);
// BiCG with Ainv preconditioning
extern "C" void bicgAinv
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf,
ValueType drop_tolerance,
int nonzero_per_row,
bool lin_dropping,
int lin_param
);
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
defineTypeNameAndDebug(cudaBiCGStab, 0);
lduSolver::addasymMatrixConstructorToTable<cudaBiCGStab>
addcudaBiCGStabAsymMatrixConstructorToTable_;
}
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
// - Construct from matrix and solver data stream
Foam::cudaBiCGStab::cudaBiCGStab
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
)
:
cudaSolver
(
fieldName,
matrix,
coupleBouCoeffs,
coupleIntCoeffs,
interfaces,
dict
)
{}
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
Foam::lduSolverPerformance Foam::cudaBiCGStab::solve
(
scalarField& x,
const scalarField& b,
const direction cmpt
) const
{
// Initialize Cusp solver perfomance
cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
solverPerf.minIter = minIter(); // Minimum iterations
solverPerf.maxIter = maxIter(); // Maximum iterations
solverPerf.relTol = relTolerance(); // Relative tolerance
solverPerf.tol = tolerance(); // Tolerance
if (lduMatrix::debug >= 2)
{
solverPerf.debugCusp = true;
}
// Initialize and copy matrix data to GPU
cuspEquationSystem ces = createAsymCuspMatrix(matrix(), x, b);
// Call solver externally
word preconName(dict().lookup("preconditioner"));
if (preconName == "diagonal")
{
bicgDiag(&ces, &solverPerf);
}
else if(preconName == "Ainv")
{
bicgAinv
(
&ces,
&solverPerf,
dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
dict().lookupOrDefault<label>("nonzeroPerRow", -1),
dict().lookupOrDefault<bool>("LinDropping", false),
dict().lookupOrDefault<label>("LinParameter", 1)
);
}
else
{
FatalErrorIn("cudaBiCGStab::solver()")
<< "Unknown preconditioner name. "
<< "Options are:" << nl
<< "(" << nl
<< "diagonal" << nl
<< "Ainv" << nl
<< ")" << nl
<< abort(FatalError);
}
// copy the x vector back to Openfoam
thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
// Return solver output
return lduSolverPerformance
(
typeName,
fieldName(),
solverPerf.iRes,
solverPerf.fRes,
solverPerf.nIterations,
solverPerf.converged,
solverPerf.singular
);
}
// ************************************************************************* //

View file

@ -0,0 +1,114 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Class
cudaBiCGStab
Description
Preconditioned Conjugate Gradient solver with run-time selectable
preconditioning for use with GPU
Author
Dominik Christ, Wikki Ltd.
Based on Cufflink library by Daniel P. Combest
SourceFiles
cudaBiCGStab.C
\*---------------------------------------------------------------------------*/
#ifndef cudaBiCGStab_H
#define cudaBiCGStab_H
#include "cudaSolver.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
/*---------------------------------------------------------------------------*\
Class cudaBiCGStab Declaration
\*---------------------------------------------------------------------------*/
class cudaBiCGStab
:
public cudaSolver
{
// Private Member Functions
//- Disallow default bitwise copy construct
cudaBiCGStab(const cudaBiCGStab&);
//- Disallow default bitwise assignment
void operator=(const cudaBiCGStab&);
public:
//- Runtime type information
TypeName("cudaBiCGStab");
// Constructors
//- Construct from matrix components and solver data stream
cudaBiCGStab
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
);
// Destructor
virtual ~cudaBiCGStab()
{}
// Member Functions
//- Solve the matrix with this solver
virtual lduSolverPerformance solve
(
scalarField& x,
const scalarField& b,
const direction cmpt = 0
) const;
};
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
} // End namespace Foam
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#endif
// ************************************************************************* //

View file

@ -0,0 +1,184 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA™) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Modifications by Dominik Christ, Wikki Ltd.
Description
Ainv preconditioned conjugate gradient
solver for symmetric Matrices using a CUSP CUDA™ based solver.
\**********************************************************************/
#include "cudaTypes.H"
// CUSP Includes
#include <cusp/detail/config.h>
#include <cusp/verify.h>
#include <cusp/precond/ainv.h>
#include <cusp/coo_matrix.h>
#include <cusp/blas.h>
#include <cusp/multiply.h>
extern "C" void cgAinv
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf,
ValueType drop_tolerance,
int nonzero_per_row,
bool lin_dropping,
int lin_param
)
{
// Populate A
#include "fillCOOMatrix.H"
// Set storage
// #include "cufflink/setGPUStorage.H"
if (solverPerf->debugCusp)
{
std::cout<<" Using Ainv preconditioner\n";
}
cusp::precond::bridson_ainv<ValueType, MemorySpace> M
(
A,
drop_tolerance,
nonzero_per_row,
lin_dropping,
lin_param
);
// Start the krylov solver
assert(A.num_rows == A.num_cols); // sanity check
const size_t N = A.num_rows;
// allocate workspace
cusp::array1d<ValueType,MemorySpace> y(N);
cusp::array1d<ValueType,MemorySpace> z(N);
cusp::array1d<ValueType,MemorySpace> r(N);
cusp::array1d<ValueType,MemorySpace> p(N);
// y <- Ax
cusp::multiply(A, X, y);
// Define the normalization factor
ValueType normFactor = 1.0;
# include "buildNormFactor.H"
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// z <- M*r
cusp::multiply(M, r, z);
// p <- z
cusp::blas::copy(z, p);
// rz = <r^H, z>
ValueType rz = cusp::blas::dotc(r, z);
ValueType normR = cusp::blas::nrm2(r)/normFactor;
ValueType normR0 = normR; // Initial residual
solverPerf->iRes = normR0;
int count = 0;
while
(
normR > (solverPerf->tol)
&& count < (solverPerf->maxIter)
&& normR/normR0 >= (solverPerf->relTol)
|| count < solverPerf->minIter
)
{
// y <- Ap
cusp::multiply(A, p, y);
// alpha <- <r,z>/<y,p>
ValueType alpha = rz / cusp::blas::dotc(y, p);
// x <- x + alpha * p
cusp::blas::axpy(p, X, alpha);
// r <- r - alpha * y
cusp::blas::axpy(y, r, -alpha);
// z <- M*r
cusp::multiply(M, r, z);
ValueType rz_old = rz;
// rz = <r^H, z>
rz = cusp::blas::dotc(r, z);
// beta <- <r_{i+1},r_{i+1}>/<r,r>
ValueType beta = rz / rz_old;
// p <- r + beta*p should be p <- z + beta*p
cusp::blas::axpby(z, p, p, ValueType(1), beta);
normR = cusp::blas::nrm2(r)/normFactor;
count++;
}
// End the krylov solver
// Final residual
solverPerf->fRes = normR;
solverPerf->nIterations = count;
// Converged?
if
(
solverPerf->fRes<=solverPerf->tol
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
)
{
solverPerf->converged = true;
}
else
{
solverPerf->converged = false;
}
// Pass the solution vector back
ces->X = X;
}

View file

@ -0,0 +1,174 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA™) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Modifications by Dominik Christ, Wikki Ltd.
Description
diagonal preconditioned conjugate gradient
solver for symmetric Matrices using a CUSP CUDA™ based solver.
\**********************************************************************/
#include "cudaTypes.H"
//CUSP Includes
#include <cusp/detail/config.h>
#include <cusp/verify.h>
#include <cusp/precond/aggregation/smoothed_aggregation.h>
#include <cusp/coo_matrix.h>
#include <cusp/blas.h>
#include <cusp/multiply.h>
extern "C" void cgAmg
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf,
ValueType theta
)
{
// Populate A
# include "fillCOOMatrix.H"
// Set storage
// #include "cufflink/setGPUStorage.H"
if(solverPerf->debugCusp)
{
std::cout << " Using amg preconditioner\n";
}
cusp::precond::aggregation::
smoothed_aggregation<IndexType, ValueType, MemorySpace> M(A);
// Start the krylov solver
assert(A.num_rows == A.num_cols); // sanity check
const size_t N = A.num_rows;
// allocate workspace
cusp::array1d<ValueType,MemorySpace> y(N);
cusp::array1d<ValueType,MemorySpace> z(N);
cusp::array1d<ValueType,MemorySpace> r(N);
cusp::array1d<ValueType,MemorySpace> p(N);
// y <- Ax
cusp::multiply(A, X, y);
//define the normalization factor
ValueType normFactor = 1.0;
# include "buildNormFactor.H"
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// z <- M*r
cusp::multiply(M, r, z);
// p <- z
cusp::blas::copy(z, p);
// rz = <r^H, z>
ValueType rz = cusp::blas::dotc(r, z);
ValueType normR = cusp::blas::nrm2(r)/normFactor;
ValueType normR0 = normR;//initial residual
solverPerf->iRes = normR0;
int count = 0;
while
(
normR > (solverPerf->tol)
&& count < (solverPerf->maxIter)
&& normR/normR0 >= (solverPerf->relTol)
|| count < solverPerf->minIter
)
{
// y <- Ap
cusp::multiply(A, p, y);
// alpha <- <r,z>/<y,p>
ValueType alpha = rz / cusp::blas::dotc(y, p);
// x <- x + alpha * p
cusp::blas::axpy(p, X, alpha);
// r <- r - alpha * y
cusp::blas::axpy(y, r, -alpha);
// z <- M*r
cusp::multiply(M, r, z);
ValueType rz_old = rz;
// rz = <r^H, z>
rz = cusp::blas::dotc(r, z);
// beta <- <r_{i+1},r_{i+1}>/<r,r>
ValueType beta = rz / rz_old;
// p <- r + beta*p should be p <- z + beta*p
cusp::blas::axpby(z, p, p, ValueType(1), beta);
normR = cusp::blas::nrm2(r)/normFactor;
count++;
}
// End the krylov solver
//final residual
solverPerf->fRes = normR;
solverPerf->nIterations = count;
//converged?
if
(
solverPerf->fRes<=solverPerf->tol
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
)
{
solverPerf->converged = true;
}
else
{
solverPerf->converged = false;
}
// Pass the solution vector back
ces->X = X;
}

View file

@ -0,0 +1,173 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA™) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Modifications by Dominik Christ, Wikki Ltd.
Description
diagonal preconditioned conjugate gradient
solver for symmetric Matrices using a CUSP CUDA™ based solver.
\**********************************************************************/
#include "cudaTypes.H"
//CUSP Includes
#include <cusp/detail/config.h>
#include <cusp/verify.h>
#include <cusp/precond/diagonal.h>
#include <cusp/coo_matrix.h>
#include <cusp/ell_matrix.h>
#include <cusp/blas.h>
#include <cusp/multiply.h>
extern "C" void cgDiag
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf
)
{
// Populate A
# include "fillCOOMatrix.H"
// Set storage
// #include "cufflink/setGPUStorage.H"
if(solverPerf->debugCusp)
{
std::cout << " Using Cusp_Diagonal preconditioner\n";
}
cusp::precond::diagonal<ValueType, MemorySpace> M(A);
// Start the krylov solver
assert(A.num_rows == A.num_cols); // sanity check
const size_t N = A.num_rows;
// allocate workspace
cusp::array1d<ValueType,MemorySpace> y(N);
cusp::array1d<ValueType,MemorySpace> z(N);
cusp::array1d<ValueType,MemorySpace> r(N);
cusp::array1d<ValueType,MemorySpace> p(N);
// y <- Ax
cusp::multiply(A, X, y);
//define the normalization factor
ValueType normFactor = 1.0;
#include "buildNormFactor.H"
// r <- b - A*x
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
// z <- M*r
cusp::multiply(M, r, z);
// p <- z
cusp::blas::copy(z, p);
// rz = <r^H, z>
ValueType rz = cusp::blas::dotc(r, z);
ValueType normR = cusp::blas::nrm2(r)/normFactor;
ValueType normR0 = normR; // Initial residual
solverPerf->iRes = normR0;
int count = 0;
while
(
normR > (solverPerf->tol)
&& count < (solverPerf->maxIter)
&& normR/normR0 >= (solverPerf->relTol)
|| count < solverPerf->minIter
)
{
// y <- Ap
cusp::multiply(A, p, y);
// alpha <- <r,z>/<y,p>
ValueType alpha = rz / cusp::blas::dotc(y, p);
// x <- x + alpha * p
cusp::blas::axpy(p, X, alpha);
// r <- r - alpha * y
cusp::blas::axpy(y, r, -alpha);
// z <- M*r
cusp::multiply(M, r, z);
ValueType rz_old = rz;
// rz = <r^H, z>
rz = cusp::blas::dotc(r, z);
// beta <- <r_{i+1},r_{i+1}>/<r,r>
ValueType beta = rz / rz_old;
// p <- r + beta*p should be p <- z + beta*p
cusp::blas::axpby(z, p, p, ValueType(1), beta);
normR = cusp::blas::nrm2(r)/normFactor;
count++;
}
// End the krylov solver
// Final residual
solverPerf->fRes = normR;
solverPerf->nIterations = count;
// Converged?
if
(
solverPerf->fRes<=solverPerf->tol
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
)
{
solverPerf->converged = true;
}
else
{
solverPerf->converged=false;
}
// Pass the solution vector back
ces->X = X;
}

View file

@ -0,0 +1,177 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
\*---------------------------------------------------------------------------*/
#include "cudaCG.H"
// Preconditioner and solver are hardwired due to
// code structure of Cusp library. Below are
// external functions with solver/preconditioner combinations
// CG with diagonal preconditioning
extern "C" void cgDiag
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverParam
);
// CG with Ainv preconditioning
extern "C" void cgAinv
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverPerf,
ValueType drop_tolerance,
int nonzero_per_row,
bool lin_dropping,
int lin_param
);
// CG with amg preconditioning
extern "C" void cgAmg
(
cuspEquationSystem* ces,
cudaSolverPerformance* solverParam,
ValueType theta
);
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
defineTypeNameAndDebug(cudaCG, 0);
lduSolver::addsymMatrixConstructorToTable<cudaCG>
addcudaCGSymMatrixConstructorToTable_;
}
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
// - Construct from matrix and solver data stream
Foam::cudaCG::cudaCG
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
)
:
cudaSolver
(
fieldName,
matrix,
coupleBouCoeffs,
coupleIntCoeffs,
interfaces,
dict
)
{}
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
Foam::lduSolverPerformance Foam::cudaCG::solve
(
scalarField& x,
const scalarField& b,
const direction cmpt
) const
{
// Initialize Cusp solver perfomance
cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
solverPerf.minIter = minIter(); // Minimum iterations
solverPerf.maxIter = maxIter(); // Maximum iterations
solverPerf.relTol = relTolerance(); // Relative tolerance
solverPerf.tol = tolerance(); // Tolerance
if (lduMatrix::debug >= 2)
{
solverPerf.debugCusp = true;
}
// Initialize and copy matrix data to GPU
cuspEquationSystem ces = createSymCuspMatrix(matrix(), x, b);
// Call solver externally
word preconName(dict().lookup("preconditioner"));
if (preconName == "diagonal")
{
cgDiag(&ces, &solverPerf);
}
else if(preconName == "Ainv")
{
cgAinv
(
&ces,
&solverPerf,
dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
dict().lookupOrDefault<label>("nonzeroPerRow", -1),
dict().lookupOrDefault<bool>("LinDropping", false),
dict().lookupOrDefault<label>("LinParameter", 1)
);
}
else if(preconName == "amg")
{
cgAmg
(
&ces,
&solverPerf,
dict().lookupOrDefault<scalar>("theta", 0)
);
}
else
{
FatalErrorIn("cudaCG::solver()")
<< "Unknown preconditioner name. "
<< "Options are:" << nl
<< "(" << nl
<< "diagonal" << nl
<< "Ainv" << nl
<< "amg" << nl
<< ")" << nl
<< abort(FatalError);
}
// Copy the x vector back to Openfoam
thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
// Return solver output
return lduSolverPerformance
(
typeName,
fieldName(),
solverPerf.iRes,
solverPerf.fRes,
solverPerf.nIterations,
solverPerf.converged,
solverPerf.singular
);
}
// ************************************************************************* //

View file

@ -0,0 +1,114 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Class
cudaCG
Description
Preconditioned Conjugate Gradient solver with run-time selectable
preconditioning for use with GPU
Author
Dominik Christ, Wikki Ltd.
Based on Cufflink library by Daniel P. Combest
SourceFiles
cudaCG.C
\*---------------------------------------------------------------------------*/
#ifndef cudaCG_H
#define cudaCG_H
#include "cudaSolver.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
/*---------------------------------------------------------------------------*\
Class cudaCG Declaration
\*---------------------------------------------------------------------------*/
class cudaCG
:
public cudaSolver
{
// Private Member Functions
//- Disallow default bitwise copy construct
cudaCG(const cudaCG&);
//- Disallow default bitwise assignment
void operator=(const cudaCG&);
public:
//- Runtime type information
TypeName("cudaCG");
// Constructors
//- Construct from matrix components and solver data stream
cudaCG
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
);
// Destructor
virtual ~cudaCG()
{}
// Member Functions
//- Solve the matrix with this solver
virtual lduSolverPerformance solve
(
scalarField& x,
const scalarField& b,
const direction cmpt = 0
) const;
};
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
} // End namespace Foam
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#endif
// ************************************************************************* //

View file

@ -0,0 +1,239 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
\*---------------------------------------------------------------------------*/
#include "cudaSolver.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
defineTypeNameAndDebug(cudaSolver, 0);
}
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
Foam::cudaSolver::cudaSolver
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
)
:
lduSolver
(
fieldName,
matrix,
coupleBouCoeffs,
coupleIntCoeffs,
interfaces,
dict
)
{}
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
cuspEquationSystem Foam::cudaSolver::createSymCuspMatrix
(
const lduMatrix& matrix,
const scalarField& x,
const scalarField& b
) const
{
cuspEquationSystem ces;
ces.nCells = x.size();
ces.nFaces = matrix.lower().size();
ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
(
ces.nCells,
ces.nCells,
ces.nCells+2*ces.nFaces
);
ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
// Copy values of lduMatrix diag to A COO matrix
thrust::copy
(
matrix.diag().begin(),
matrix.diag().end(),
ces.A.values.begin()
);
// Copy values of lduMatrix lower to A COO matrix
thrust::copy
(
matrix.lower().begin(),
matrix.lower().end(),
ces.A.values.begin()+ces.nCells
);
// Since matrix is symmetric, do not copy upper values to save time
// -> performed on GPU
// Copy row indices of lower into A COO matrix
thrust::copy
(
matrix.lduAddr().upperAddr().begin(),
matrix.lduAddr().upperAddr().end(),
ces.A.row_indices.begin()+ces.nCells
);
// Copy column indices of lower into A COO matrix
thrust::copy
(
matrix.lduAddr().lowerAddr().begin(),
matrix.lduAddr().lowerAddr().end(),
ces.A.column_indices.begin()+ces.nCells
);
// Do not initialize the row and column values of diag to save time
// -> performed on GPU
// Copy x of lower into x vector
thrust::copy(x.begin(), x.end(), ces.X.begin());
// Copy b of lower into b vector
thrust::copy(b.begin(), b.end(), ces.B.begin());
return ces;
}
cuspEquationSystem Foam::cudaSolver::createAsymCuspMatrix
(
const lduMatrix& matrix,
const scalarField& x,
const scalarField& b
) const
{
cuspEquationSystem ces;
ces.nCells = x.size();
ces.nFaces = matrix.lower().size();
ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
(
ces.nCells,
ces.nCells,
ces.nCells + 2*ces.nFaces
);
ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
// Copy values from the lduMatrix to our equation system
// Copy values of lduMatrix diag to A COO matrix
thrust::copy
(
matrix.diag().begin(),
matrix.diag().end(),
ces.A.values.begin()
);
// Copy values of lduMatrix lower to A COO matrix
thrust::copy
(
matrix.lower().begin(),
matrix.lower().end(),
ces.A.values.begin() + ces.nCells
);
// Copy values of lduMatrix upper to A COO matrix
thrust::copy
(
matrix.upper().begin(),
matrix.upper().end(),
ces.A.values.begin() + ces.nCells + ces.nFaces
);
// Copy row and column indices of lower and upper to our equations system
// do not initialize the row and column values of diag to save time
// -> performed on GPU
// Copy row indices of lower into A COO matrix
thrust::copy
(
matrix.lduAddr().upperAddr().begin(),
matrix.lduAddr().upperAddr().end(),
ces.A.row_indices.begin() + ces.nCells
);
// Copy column indices of lower into A COO matrix
thrust::copy
(
matrix.lduAddr().lowerAddr().begin(),
matrix.lduAddr().lowerAddr().end(),
ces.A.column_indices.begin() + ces.nCells
);
// Copy row indices of upper into A COO matrix
thrust::copy
(
matrix.lduAddr().lowerAddr().begin(),
matrix.lduAddr().lowerAddr().end(),
ces.A.row_indices.begin() + ces.nCells + ces.nFaces
);
// Copy column indices of upper into A COO matrix
thrust::copy
(
matrix.lduAddr().upperAddr().begin(),
matrix.lduAddr().upperAddr().end(),
ces.A.column_indices.begin() + ces.nCells + ces.nFaces
);
// Copy x of lower into x vector
thrust::copy(x.begin(), x.end(), ces.X.begin());
// Copy b of lower into b vector
thrust::copy(b.begin(), b.end(), ces.B.begin());
return ces;
}
cudaSolverPerformance Foam::cudaSolver::cudaSolverPerformanceDefault() const
{
cudaSolverPerformance csp;
csp.minIter = 0;
csp.maxIter = 1000;
csp.relTol = 0;
csp.tol = 1e-6;
csp.nIterations = 0;
csp.iRes = -1;
csp.fRes = -1;
csp.converged = false;
csp.singular = false;
csp.debugCusp = false;
return csp;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
// ************************************************************************* //

View file

@ -0,0 +1,136 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Class
Foam::cudaSolver
Description
Base class for GPU based solvers
SourceFiles
cudaSolver.C
Author
Dominik Christ, Wikki Ltd.
Based on Cufflink library by Daniel P. Combest
\*---------------------------------------------------------------------------*/
#ifndef cudaSolver_H
#define cudaSolver_H
#include "cudaTypes.H"
#include "lduMatrix.H"
#include "autoPtr.H"
#include "runTimeSelectionTables.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
/*---------------------------------------------------------------------------*\
Class cudaSolver Declaration
\*---------------------------------------------------------------------------*/
class cudaSolver
:
public lduSolver
{
// Private Member Functions
//- Disallow default bitwise copy construct
cudaSolver(const cudaSolver&);
//- Disallow default bitwise assignment
void operator=(const cudaSolver&);
public:
//- Runtime type information
TypeName("cudaSolver");
// Constructors
//- Construct from components
cudaSolver
(
const word& fieldName,
const lduMatrix& matrix,
const FieldField<Field, scalar>& coupleBouCoeffs,
const FieldField<Field, scalar>& coupleIntCoeffs,
const lduInterfaceFieldPtrsList& interfaces,
const dictionary& dict
);
// Destructor
virtual ~cudaSolver()
{}
// Member Functions
//- Solve the matrix with this solver
virtual lduSolverPerformance solve
(
scalarField& x,
const scalarField& b,
const direction cmpt = 0
) const = 0;
//- Create COO matrix from symmetric lduMatrix
cuspEquationSystem createSymCuspMatrix
(
const lduMatrix& matrix,
const scalarField& x,
const scalarField& b
) const;
//- Create COO matrix from asymmetric lduMatrix
cuspEquationSystem createAsymCuspMatrix
(
const lduMatrix& matrix,
const scalarField& x,
const scalarField& b
) const;
cudaSolverPerformance cudaSolverPerformanceDefault() const;
};
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
} // End namespace Foam
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#endif
// ************************************************************************* //

View file

@ -0,0 +1,99 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright held by original author
\\/ M anipulation |
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Class
Foam::cudaSolver
Description
Base class for GPU based solvers
SourceFiles
cudaSolver.C
Author
Dominik Christ, Wikki Ltd.
Based on Cufflink library by Daniel P. Combest
\*---------------------------------------------------------------------------*/
#ifndef cudaTypes_H
#define cudaTypes_H
#if defined(WM_DP)
#define MPI_SCALAR MPI_DOUBLE
typedef double ValueType;
#elif defined(WM_SP)
#define MPI_SCALAR MPI_FLOAT
typedef float ValueType;
#endif
#include <cusp/coo_matrix.h>
#include <cusp/memory.h>
#include <thrust/host_vector.h>
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
typedef cusp::device_memory MemorySpace;
typedef cusp::host_memory hostMemorySpace;
typedef int IndexType;
// used to prevent floating point exception
const ValueType SMALL = 1e-20;
struct cuspEquationSystem
{
cusp::coo_matrix<IndexType,ValueType,hostMemorySpace> A;
cusp::array1d< ValueType, hostMemorySpace> X;
cusp::array1d< ValueType, hostMemorySpace> B;
int nCells;
int nFaces;
};
struct cudaSolverPerformance
{
int minIter;
int maxIter;
double relTol;
double tol;
int nIterations;
double iRes;
double fRes;
bool converged;
bool singular;
bool debugCusp;
};
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#endif
// ************************************************************************* //

View file

@ -0,0 +1,85 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA) C/C++ programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Description
Builds normalization factor in exactly teh same way OpenFOAM® does
in order to normalize the residual to 1. See provided
documentation for exact formulation of normalization factor
\**********************************************************************/
// Build the normfactor as OF-1.6-ext does in file
// ($FOAM_SRC/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixSolver.C)
//all variables will be deleted once out of scope
{
//compute average of x vector
ValueType xRef = thrust::reduce
(
X.begin(),
X.begin() + A.num_rows
)/ValueType(A.num_rows);
//vector of average X values
cusp::array1d<ValueType,MemorySpace> xBar(N,xRef);
//holds A*xBar
cusp::array1d<ValueType,MemorySpace> yBar(N);
//compute ybar<-A*xBar
cusp::multiply(A, xBar, yBar);
cusp::array1d<ValueType,MemorySpace> ymyBar(N); // holds y - yBar
cusp::array1d<ValueType,MemorySpace> bmyBar(N); // holds b - yBar
// Calculate: ymyBar <- y - yBar
cusp::blas::axpby(y, yBar, ymyBar, ValueType(1), ValueType(-1));
// Calculate: bmyBar <- b - yBar
cusp::blas::axpby(B, yBar, bmyBar, ValueType(1), ValueType(-1));
//compute norm factor exactly as OpenFOAM does
normFactor = cusp::blas::nrm2(ymyBar) + cusp::blas::nrm2(bmyBar) + SMALL;
if (solverPerf->debugCusp)
{
std::cout<<" Normalisation factor = "<<normFactor<<"\n";
}
}

View file

@ -0,0 +1,85 @@
/**********************************************************************\
______ __ __ _______ _______ __ __ __ __ __ ___
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
| | | | | | | __| | __| | | | | | . ` | | <
| `----.| `--' | | | | | | `----.| | | |\ | | . \
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
Cuda For FOAM Link
cufflink is a library for linking numerical methods based on Nvidia's
Compute Unified Device Architecture (CUDA) C/C + + programming language
and OpenFOAM®.
Please note that cufflink is not approved or endorsed by OpenCFD®
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
producer of OpenFOAM® software.
The official web-site of OpenCFD® Limited is www.openfoam.com .
------------------------------------------------------------------------
This file is part of cufflink.
cufflink is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cufflink is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
Author
Daniel P. Combest. All rights reserved.
Description
Populate the COO matrix
\**********************************************************************/
// Fill in the rest of the diag (rows and col),
// upper and upper.rows and upper.cols
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
cusp::array1d<ValueType, MemorySpace> X(ces->X);
cusp::array1d<ValueType, MemorySpace> B(ces->B);
// Determine row indices of diagonal values and fill A COO matrix
thrust::sequence(A.row_indices.begin(), A.row_indices.begin() + ces->nCells);
// Determine column indices of diagonal values and fill A COO matrix
thrust::sequence
(
A.column_indices.begin(),
A.column_indices.begin() + ces->nCells
);
// Copy values of lower into upper in COO matrix
thrust::copy
(
A.values.begin() + ces->nCells,
A.values.begin() + ces->nCells + ces->nFaces,
A.values.begin() + ces->nCells + ces->nFaces
);
// Copy row indices of lower to columns of upper into A COO matrix
thrust::copy
(
A.row_indices.begin() + ces->nCells,
A.row_indices.begin() + ces->nCells + ces->nFaces,
A.column_indices.begin() + ces->nCells + ces->nFaces
);
// Copy column indices of lower to rows of upper into A COO matrix
thrust::copy
(
A.column_indices.begin() + ces->nCells,
A.column_indices.begin() + ces->nCells + ces->nFaces,
A.row_indices.begin() + ces->nCells + ces->nFaces
);
A.sort_by_row_and_column(); // Speeds code up a little bit more

View file

@ -8,3 +8,4 @@ include $(GENERAL_RULES)/standard
include $(RULES)/X
include $(RULES)/c
include $(RULES)/c++
include $(RULES)/nvcc