Hand-merged CUDA support
This commit is contained in:
parent
7e8f06837c
commit
db4d4cefe7
23 changed files with 2313 additions and 5 deletions
|
@ -2,7 +2,7 @@
|
|||
# ========= |
|
||||
# \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
# \\ / O peration |
|
||||
# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd.
|
||||
# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak
|
||||
# \\/ M anipulation |
|
||||
#------------------------------------------------------------------------------
|
||||
# License
|
||||
|
@ -62,6 +62,14 @@
|
|||
# So build your ThirdParty directory accordingly.
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
# System installed CUDA
|
||||
#setenv CUDA_SYSTEM 1
|
||||
#setenv CUDA_DIR path_to_system_installed_cuda
|
||||
#setenv CUDA_BIN_DIR $CUDA_DIR/bin
|
||||
#setenv CUDA_LIB_DIR $CUDA_DIR/lib
|
||||
#setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
|
||||
#setenv CUDA_ARCH sm_20
|
||||
|
||||
# System installed Mesquite
|
||||
#setenv MESQUITE_SYSTEM 1
|
||||
#setenv MESQUITE_DIR path_to_system_installed_mesquite
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# ========= |
|
||||
# \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
# \\ / O peration |
|
||||
# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd.
|
||||
# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak
|
||||
# \\/ M anipulation |
|
||||
#------------------------------------------------------------------------------
|
||||
# License
|
||||
|
@ -64,6 +64,14 @@ compilerInstall=System
|
|||
# So build your ThirdParty directory accordingly.
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
# System installed CUDA
|
||||
#export CUDA_SYSTEM=1
|
||||
#export CUDA_DIR=path_to_system_installed_cuda
|
||||
#export CUDA_BIN_DIR=$CUDA_DIR/bin
|
||||
#export CUDA_LIB_DIR=$CUDA_DIR/lib
|
||||
#export CUDA_INCLUDE_DIR=$CUDA_DIR/include
|
||||
#export CUDA_ARCH=sm_20
|
||||
|
||||
# System installed Mesquite
|
||||
#export MESQUITE_SYSTEM=1
|
||||
#export MESQUITE_DIR=path_to_system_installed_mesquite
|
||||
|
|
|
@ -211,7 +211,7 @@ case SYSTEMOPENMPI:
|
|||
endif
|
||||
else
|
||||
# Here, we assume your environment is already set for running
|
||||
# and developping with openmpi.
|
||||
# and developing with openmpi.
|
||||
#
|
||||
# Initialize OPENMPI_BIN_DIR using the path to mpicc
|
||||
set mpicc_cmd=`which mpicc`
|
||||
|
@ -382,6 +382,23 @@ else
|
|||
setenv MPI_BUFFER_SIZE $minBufferSize
|
||||
endif
|
||||
|
||||
# CUDA library
|
||||
# ~~~~~~~~~~~~
|
||||
if ( $?CUDA_SYSTEM == 0 && -e /usr/local/cuda-5.5/bin/nvcc ) then
|
||||
setenv CUDA_DIR /usr/local/cuda-5.5
|
||||
setenv CUDA_BIN_DIR $CUDA_DIR/bin
|
||||
setenv CUDA_LIB_DIR $CUDA_DIR/lib64
|
||||
setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
|
||||
setenv CUDA_ARCH sm_20
|
||||
endif
|
||||
|
||||
if ( $?CUDA_BIN_DIR ) then
|
||||
_foamAddPath $CUDA_BIN_DIR
|
||||
endif
|
||||
|
||||
if ( $?CUDA_LIB_DIR ) then
|
||||
_foamAddLib $CUDA_LIB_DIR
|
||||
endif
|
||||
|
||||
# CGAL library if available
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
|
@ -432,6 +432,18 @@ fi
|
|||
export MPI_BUFFER_SIZE
|
||||
|
||||
|
||||
# CUDA if available
|
||||
# ~~~~~~~~~~~~~~~~~
|
||||
[ -z "$CUDA_SYSTEM" ] && [ -e /usr/local/cuda-5.5/bin/nvcc ] && {
|
||||
export CUDA_DIR=/usr/local/cuda-5.5
|
||||
export CUDA_BIN_DIR=$CUDA_DIR/bin
|
||||
export CUDA_LIB_DIR=$CUDA_DIR/lib64
|
||||
export CUDA_INCLUDE_DIR=$CUDA_DIR/include
|
||||
}
|
||||
|
||||
[ -d "$CUDA_LIB_DIR" ] && _foamAddPath $CUDA_BIN_DIR && _foamAddLib $CUDA_LIB_DIR
|
||||
|
||||
|
||||
# CGAL library if available
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
[ -d "$CGAL_LIB_DIR" ] && _foamAddLib $CGAL_LIB_DIR
|
||||
|
@ -472,7 +484,7 @@ export MPI_BUFFER_SIZE
|
|||
|
||||
# Load Metis library
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
[ -z "$METIS_SYSTEM" ] && [ -d $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
|
||||
[ -z "$METIS_SYSTEM" ] && [ -e $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
|
||||
_foamSource $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS/etc/metis-5.0pre2.sh
|
||||
}
|
||||
[ "$FOAM_VERBOSE" -a "$PS1" ] && echo " METIS_DIR is initialized to: $METIS_DIR"
|
||||
|
|
|
@ -68,4 +68,7 @@ wmake libso engine
|
|||
|
||||
wmake libso equationReader
|
||||
wmake libso multiSolver
|
||||
|
||||
( cd cudaSolvers ; ./Allwmake )
|
||||
|
||||
# ----------------------------------------------------------------- end-of-file
|
||||
|
|
13
src/cudaSolvers/Allwmake
Executable file
13
src/cudaSolvers/Allwmake
Executable file
|
@ -0,0 +1,13 @@
|
|||
#!/bin/sh
|
||||
cd ${0%/*} || exit 1 # run from this directory
|
||||
|
||||
if [ -f $CUDA_BIN_DIR/nvcc ]
|
||||
then
|
||||
echo "Found nvcc -- enabling CUDA support."
|
||||
wmake libso
|
||||
else
|
||||
echo "No nvcc - CUDA not available."
|
||||
fi
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- end-of-file
|
12
src/cudaSolvers/Make/files
Normal file
12
src/cudaSolvers/Make/files
Normal file
|
@ -0,0 +1,12 @@
|
|||
cudaSolver/cudaSolver.C
|
||||
|
||||
cudaCG/cgDiag.cu
|
||||
cudaCG/cgAmg.cu
|
||||
cudaCG/cgAinv.cu
|
||||
cudaCG/cudaCG.C
|
||||
|
||||
cudaBiCGStab/bicgDiag.cu
|
||||
cudaBiCGStab/bicgAinv.cu
|
||||
cudaBiCGStab/cudaBiCGStab.C
|
||||
|
||||
LIB = $(FOAM_LIBBIN)/libcudaSolvers
|
5
src/cudaSolvers/Make/options
Normal file
5
src/cudaSolvers/Make/options
Normal file
|
@ -0,0 +1,5 @@
|
|||
EXE_INC = \
|
||||
-I$(CUDA_INCLUDE_DIR)
|
||||
|
||||
LIB_LIBS = \
|
||||
-L$(CUDA_LIB_DIR) -lcudart
|
246
src/cudaSolvers/cudaBiCGStab/bicgAinv.cu
Normal file
246
src/cudaSolvers/cudaBiCGStab/bicgAinv.cu
Normal file
|
@ -0,0 +1,246 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
Modifications by Dominik Christ, Wikki Ltd.
|
||||
|
||||
Description
|
||||
Sparse approximate inverse preconditioned conjugate gradient stabilized
|
||||
solver for asymmetric Matrices using a CUSP CUDA™ based solver.
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
|
||||
#include "cudaTypes.H"
|
||||
|
||||
// CUSP Includes
|
||||
#include <cusp/detail/config.h>
|
||||
#include <cusp/verify.h>
|
||||
#include <cusp/precond/ainv.h>
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/blas.h>
|
||||
#include <cusp/multiply.h>
|
||||
|
||||
extern "C" void bicgAinv
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf,
|
||||
ValueType drop_tolerance,
|
||||
int nonzero_per_row,
|
||||
bool lin_dropping,
|
||||
int lin_param
|
||||
)
|
||||
{
|
||||
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
|
||||
cusp::array1d<ValueType, MemorySpace> X(ces->X);
|
||||
cusp::array1d<ValueType, MemorySpace> B(ces->B);
|
||||
|
||||
// Fill in the rest of the diag (rows and col)
|
||||
// Determine row indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence
|
||||
(
|
||||
A.row_indices.begin(),
|
||||
A.row_indices.begin() + ces->nCells
|
||||
);
|
||||
|
||||
// Determine column indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence
|
||||
(
|
||||
A.column_indices.begin(),
|
||||
A.column_indices.begin() + ces->nCells
|
||||
);
|
||||
|
||||
// Sorted coo by row and column. speeds code up a little bit more
|
||||
A.sort_by_row_and_column();
|
||||
|
||||
// Set storage
|
||||
// #include "cufflink/setGPUStorage.H"
|
||||
|
||||
if (solverPerf->debugCusp)
|
||||
{
|
||||
std::cout << " Using Ainv preconditioner\n";
|
||||
}
|
||||
|
||||
cusp::precond::bridson_ainv<ValueType, MemorySpace> M
|
||||
(
|
||||
A,
|
||||
drop_tolerance,
|
||||
nonzero_per_row,
|
||||
lin_dropping,
|
||||
lin_param
|
||||
);
|
||||
|
||||
// Start Krylov solver
|
||||
assert(A.num_rows == A.num_cols); // Sanity check
|
||||
|
||||
const size_t N = A.num_rows;
|
||||
|
||||
// Allocate workspace
|
||||
cusp::array1d<ValueType,MemorySpace> y(N);
|
||||
|
||||
cusp::array1d<ValueType,MemorySpace> p(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r_star(N);
|
||||
cusp::array1d<ValueType,MemorySpace> s(N);
|
||||
cusp::array1d<ValueType,MemorySpace> Mp(N);
|
||||
cusp::array1d<ValueType,MemorySpace> AMp(N);
|
||||
cusp::array1d<ValueType,MemorySpace> Ms(N);
|
||||
cusp::array1d<ValueType,MemorySpace> AMs(N);
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
// Define the normalization factor
|
||||
ValueType normFactor = 1.0;
|
||||
|
||||
# include "buildNormFactor.H"
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
|
||||
// p <- r
|
||||
cusp::blas::copy(r, p);
|
||||
|
||||
// r_star <- r
|
||||
cusp::blas::copy(r, r_star);
|
||||
|
||||
ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
|
||||
|
||||
|
||||
ValueType normR = cusp::blas::nrm2(r)/normFactor;
|
||||
ValueType normR0 = normR; // Initial residual
|
||||
solverPerf->iRes = normR0;
|
||||
int count = 0;
|
||||
|
||||
while
|
||||
(
|
||||
normR > (solverPerf->tol)
|
||||
&& count < (solverPerf->maxIter)
|
||||
&& normR/normR0 >= (solverPerf->relTol)
|
||||
|| count < solverPerf->minIter
|
||||
)
|
||||
{
|
||||
// Mp = M*p
|
||||
cusp::multiply(M, p, Mp);
|
||||
|
||||
// AMp = A*Mp
|
||||
cusp::multiply(A, Mp, AMp);
|
||||
|
||||
// alpha = (r_j, r_star) / (A*M*p, r_star)
|
||||
ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
|
||||
|
||||
// s_j = r_j - alpha * AMp
|
||||
cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
|
||||
|
||||
ValueType normS = cusp::blas::nrm2(s)/normFactor;
|
||||
|
||||
if
|
||||
(
|
||||
!(
|
||||
normS > (solverPerf->tol)
|
||||
&& normS/normR0 >= (solverPerf->relTol)
|
||||
)
|
||||
)
|
||||
{
|
||||
// is this right?
|
||||
// x += alpha*M*p_j
|
||||
cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
// not sure we should be measuring r here...need to look again.
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Ms = M*s_j
|
||||
cusp::multiply(M, s, Ms);
|
||||
|
||||
// AMs = A*Ms
|
||||
cusp::multiply(A, Ms, AMs);
|
||||
|
||||
// omega = (AMs, s) / (AMs, AMs)
|
||||
ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
|
||||
|
||||
// x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
|
||||
cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
|
||||
|
||||
// r_{j+1} = s_j - omega*A*M*s
|
||||
cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
|
||||
|
||||
// beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
|
||||
ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
|
||||
ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
|
||||
r_r_star_old = r_r_star_new;
|
||||
|
||||
// p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
|
||||
cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
|
||||
|
||||
// not dure we should be measuring r here...need to look again.
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
}
|
||||
// End Krylov Solver
|
||||
|
||||
// Final residual
|
||||
solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
|
||||
solverPerf->nIterations = count;
|
||||
|
||||
// converged?
|
||||
if
|
||||
(
|
||||
solverPerf->fRes<=solverPerf->tol
|
||||
|| solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
|
||||
)
|
||||
{
|
||||
solverPerf->converged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
solverPerf->converged = false;
|
||||
}
|
||||
|
||||
// Pass the solution vector back
|
||||
ces->X = X;
|
||||
}
|
235
src/cudaSolvers/cudaBiCGStab/bicgDiag.cu
Normal file
235
src/cudaSolvers/cudaBiCGStab/bicgDiag.cu
Normal file
|
@ -0,0 +1,235 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
Modifications by Dominik Christ, Wikki Ltd.
|
||||
|
||||
Description
|
||||
Sparse approximate inverse preconditioned conjugate gradient stabilized
|
||||
solver for asymmetric Matrices using a CUSP CUDA™ based solver.
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
|
||||
#include "cudaTypes.H"
|
||||
|
||||
// CUSP Includes
|
||||
#include <cusp/detail/config.h>
|
||||
#include <cusp/verify.h>
|
||||
#include <cusp/precond/diagonal.h>
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/blas.h>
|
||||
#include <cusp/multiply.h>
|
||||
|
||||
extern "C" void bicgDiag
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf
|
||||
)
|
||||
{
|
||||
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
|
||||
cusp::array1d<ValueType, MemorySpace> X(ces->X);
|
||||
cusp::array1d<ValueType, MemorySpace> B(ces->B);
|
||||
|
||||
// Fill in the rest of the diag (rows and col)
|
||||
// Determine row indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence
|
||||
(
|
||||
A.row_indices.begin(),
|
||||
A.row_indices.begin() + ces->nCells
|
||||
);
|
||||
|
||||
// Determine column indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence
|
||||
(
|
||||
A.column_indices.begin(),
|
||||
A.column_indices.begin() + ces->nCells
|
||||
);
|
||||
|
||||
// Sorted coo by row and column. speeds code up a little bit more
|
||||
A.sort_by_row_and_column();
|
||||
|
||||
// Set storage
|
||||
//# include "setGPUStorage.H"
|
||||
|
||||
if (solverPerf->debugCusp)
|
||||
{
|
||||
std::cout << " Using Ainv preconditioner\n";
|
||||
}
|
||||
|
||||
cusp::precond::diagonal<ValueType, MemorySpace> M(A);
|
||||
|
||||
// Start Krylov solver
|
||||
assert(A.num_rows == A.num_cols); // Sanity check
|
||||
|
||||
const size_t N = A.num_rows;
|
||||
|
||||
// Allocate workspace
|
||||
cusp::array1d<ValueType,MemorySpace> y(N);
|
||||
|
||||
cusp::array1d<ValueType,MemorySpace> p(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r_star(N);
|
||||
cusp::array1d<ValueType,MemorySpace> s(N);
|
||||
cusp::array1d<ValueType,MemorySpace> Mp(N);
|
||||
cusp::array1d<ValueType,MemorySpace> AMp(N);
|
||||
cusp::array1d<ValueType,MemorySpace> Ms(N);
|
||||
cusp::array1d<ValueType,MemorySpace> AMs(N);
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
// Define the normalization factor
|
||||
ValueType normFactor = 1.0;
|
||||
|
||||
# include "buildNormFactor.H"
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
|
||||
// p <- r
|
||||
cusp::blas::copy(r, p);
|
||||
|
||||
// r_star <- r
|
||||
cusp::blas::copy(r, r_star);
|
||||
|
||||
ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
|
||||
|
||||
|
||||
ValueType normR = cusp::blas::nrm2(r)/normFactor;
|
||||
ValueType normR0 = normR; // Initial residual
|
||||
solverPerf->iRes = normR0;
|
||||
int count = 0;
|
||||
|
||||
while
|
||||
(
|
||||
normR > (solverPerf->tol)
|
||||
&& count < (solverPerf->maxIter)
|
||||
&& normR/normR0 >= (solverPerf->relTol)
|
||||
|| count < solverPerf->minIter
|
||||
)
|
||||
{
|
||||
// Mp = M*p
|
||||
cusp::multiply(M, p, Mp);
|
||||
|
||||
// AMp = A*Mp
|
||||
cusp::multiply(A, Mp, AMp);
|
||||
|
||||
// alpha = (r_j, r_star) / (A*M*p, r_star)
|
||||
ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
|
||||
|
||||
// s_j = r_j - alpha * AMp
|
||||
cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
|
||||
|
||||
ValueType normS = cusp::blas::nrm2(s)/normFactor;
|
||||
|
||||
if
|
||||
(
|
||||
!(
|
||||
normS > (solverPerf->tol)
|
||||
&& normS/normR0 >= (solverPerf->relTol)
|
||||
)
|
||||
)
|
||||
{
|
||||
// is this right?
|
||||
// x += alpha*M*p_j
|
||||
cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
// not sure we should be measuring r here...need to look again.
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Ms = M*s_j
|
||||
cusp::multiply(M, s, Ms);
|
||||
|
||||
// AMs = A*Ms
|
||||
cusp::multiply(A, Ms, AMs);
|
||||
|
||||
// omega = (AMs, s) / (AMs, AMs)
|
||||
ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
|
||||
|
||||
// x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
|
||||
cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
|
||||
|
||||
// r_{j+1} = s_j - omega*A*M*s
|
||||
cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
|
||||
|
||||
// beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
|
||||
ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
|
||||
ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
|
||||
r_r_star_old = r_r_star_new;
|
||||
|
||||
// p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
|
||||
cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
|
||||
|
||||
// not dure we should be measuring r here...need to look again.
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
}
|
||||
// End Krylov Solver
|
||||
|
||||
// Final residual
|
||||
solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
|
||||
solverPerf->nIterations = count;
|
||||
|
||||
// converged?
|
||||
if
|
||||
(
|
||||
solverPerf->fRes<=solverPerf->tol
|
||||
|| solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
|
||||
)
|
||||
{
|
||||
solverPerf->converged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
solverPerf->converged = false;
|
||||
}
|
||||
|
||||
// Pass the solution vector back
|
||||
ces->X = X;
|
||||
}
|
168
src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C
Normal file
168
src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Description
|
||||
Preconditioned Conjugate Gradient solver with run-time selectable
|
||||
preconditioning
|
||||
|
||||
Author
|
||||
Dominik Christ, Wikki Ltd.
|
||||
Based on Cufflink library by Daniel P. Combest
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#include "cudaBiCGStab.H"
|
||||
|
||||
// Preconditioner and solver are hardwired due to
|
||||
// code structure of Cusp library. Below are
|
||||
// external functions with solver/preconditioner combinations
|
||||
|
||||
// BiCG with diagonal preconditioning
|
||||
extern "C" void bicgDiag
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverParam
|
||||
);
|
||||
|
||||
// BiCG with Ainv preconditioning
|
||||
extern "C" void bicgAinv
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf,
|
||||
ValueType drop_tolerance,
|
||||
int nonzero_per_row,
|
||||
bool lin_dropping,
|
||||
int lin_param
|
||||
);
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
defineTypeNameAndDebug(cudaBiCGStab, 0);
|
||||
|
||||
lduSolver::addasymMatrixConstructorToTable<cudaBiCGStab>
|
||||
addcudaBiCGStabAsymMatrixConstructorToTable_;
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
|
||||
|
||||
// - Construct from matrix and solver data stream
|
||||
Foam::cudaBiCGStab::cudaBiCGStab
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
)
|
||||
:
|
||||
cudaSolver
|
||||
(
|
||||
fieldName,
|
||||
matrix,
|
||||
coupleBouCoeffs,
|
||||
coupleIntCoeffs,
|
||||
interfaces,
|
||||
dict
|
||||
)
|
||||
{}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
Foam::lduSolverPerformance Foam::cudaBiCGStab::solve
|
||||
(
|
||||
scalarField& x,
|
||||
const scalarField& b,
|
||||
const direction cmpt
|
||||
) const
|
||||
{
|
||||
// Initialize Cusp solver perfomance
|
||||
cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
|
||||
solverPerf.minIter = minIter(); // Minimum iterations
|
||||
solverPerf.maxIter = maxIter(); // Maximum iterations
|
||||
solverPerf.relTol = relTolerance(); // Relative tolerance
|
||||
solverPerf.tol = tolerance(); // Tolerance
|
||||
|
||||
if (lduMatrix::debug >= 2)
|
||||
{
|
||||
solverPerf.debugCusp = true;
|
||||
}
|
||||
|
||||
// Initialize and copy matrix data to GPU
|
||||
cuspEquationSystem ces = createAsymCuspMatrix(matrix(), x, b);
|
||||
|
||||
// Call solver externally
|
||||
word preconName(dict().lookup("preconditioner"));
|
||||
if (preconName == "diagonal")
|
||||
{
|
||||
bicgDiag(&ces, &solverPerf);
|
||||
}
|
||||
else if(preconName == "Ainv")
|
||||
{
|
||||
bicgAinv
|
||||
(
|
||||
&ces,
|
||||
&solverPerf,
|
||||
dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
|
||||
dict().lookupOrDefault<label>("nonzeroPerRow", -1),
|
||||
dict().lookupOrDefault<bool>("LinDropping", false),
|
||||
dict().lookupOrDefault<label>("LinParameter", 1)
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
FatalErrorIn("cudaBiCGStab::solver()")
|
||||
<< "Unknown preconditioner name. "
|
||||
<< "Options are:" << nl
|
||||
<< "(" << nl
|
||||
<< "diagonal" << nl
|
||||
<< "Ainv" << nl
|
||||
<< ")" << nl
|
||||
<< abort(FatalError);
|
||||
}
|
||||
|
||||
// copy the x vector back to Openfoam
|
||||
thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
|
||||
|
||||
// Return solver output
|
||||
return lduSolverPerformance
|
||||
(
|
||||
typeName,
|
||||
fieldName(),
|
||||
solverPerf.iRes,
|
||||
solverPerf.fRes,
|
||||
solverPerf.nIterations,
|
||||
solverPerf.converged,
|
||||
solverPerf.singular
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
114
src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H
Normal file
114
src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Class
|
||||
cudaBiCGStab
|
||||
|
||||
Description
|
||||
Preconditioned Conjugate Gradient solver with run-time selectable
|
||||
preconditioning for use with GPU
|
||||
|
||||
Author
|
||||
Dominik Christ, Wikki Ltd.
|
||||
Based on Cufflink library by Daniel P. Combest
|
||||
|
||||
SourceFiles
|
||||
cudaBiCGStab.C
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef cudaBiCGStab_H
|
||||
#define cudaBiCGStab_H
|
||||
|
||||
#include "cudaSolver.H"
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
|
||||
/*---------------------------------------------------------------------------*\
|
||||
Class cudaBiCGStab Declaration
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
class cudaBiCGStab
|
||||
:
|
||||
public cudaSolver
|
||||
{
|
||||
// Private Member Functions
|
||||
|
||||
//- Disallow default bitwise copy construct
|
||||
cudaBiCGStab(const cudaBiCGStab&);
|
||||
|
||||
//- Disallow default bitwise assignment
|
||||
void operator=(const cudaBiCGStab&);
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//- Runtime type information
|
||||
TypeName("cudaBiCGStab");
|
||||
|
||||
|
||||
// Constructors
|
||||
|
||||
//- Construct from matrix components and solver data stream
|
||||
cudaBiCGStab
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
);
|
||||
|
||||
|
||||
// Destructor
|
||||
|
||||
virtual ~cudaBiCGStab()
|
||||
{}
|
||||
|
||||
|
||||
// Member Functions
|
||||
|
||||
//- Solve the matrix with this solver
|
||||
virtual lduSolverPerformance solve
|
||||
(
|
||||
scalarField& x,
|
||||
const scalarField& b,
|
||||
const direction cmpt = 0
|
||||
) const;
|
||||
};
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
} // End namespace Foam
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
#endif
|
||||
|
||||
// ************************************************************************* //
|
184
src/cudaSolvers/cudaCG/cgAinv.cu
Normal file
184
src/cudaSolvers/cudaCG/cgAinv.cu
Normal file
|
@ -0,0 +1,184 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http:// www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
Modifications by Dominik Christ, Wikki Ltd.
|
||||
|
||||
Description
|
||||
Ainv preconditioned conjugate gradient
|
||||
solver for symmetric Matrices using a CUSP CUDA™ based solver.
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
|
||||
#include "cudaTypes.H"
|
||||
|
||||
// CUSP Includes
|
||||
#include <cusp/detail/config.h>
|
||||
#include <cusp/verify.h>
|
||||
#include <cusp/precond/ainv.h>
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/blas.h>
|
||||
#include <cusp/multiply.h>
|
||||
|
||||
extern "C" void cgAinv
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf,
|
||||
ValueType drop_tolerance,
|
||||
int nonzero_per_row,
|
||||
bool lin_dropping,
|
||||
int lin_param
|
||||
)
|
||||
{
|
||||
// Populate A
|
||||
#include "fillCOOMatrix.H"
|
||||
|
||||
// Set storage
|
||||
// #include "cufflink/setGPUStorage.H"
|
||||
|
||||
if (solverPerf->debugCusp)
|
||||
{
|
||||
std::cout<<" Using Ainv preconditioner\n";
|
||||
}
|
||||
|
||||
cusp::precond::bridson_ainv<ValueType, MemorySpace> M
|
||||
(
|
||||
A,
|
||||
drop_tolerance,
|
||||
nonzero_per_row,
|
||||
lin_dropping,
|
||||
lin_param
|
||||
);
|
||||
|
||||
// Start the krylov solver
|
||||
assert(A.num_rows == A.num_cols); // sanity check
|
||||
|
||||
const size_t N = A.num_rows;
|
||||
|
||||
// allocate workspace
|
||||
cusp::array1d<ValueType,MemorySpace> y(N);
|
||||
cusp::array1d<ValueType,MemorySpace> z(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r(N);
|
||||
cusp::array1d<ValueType,MemorySpace> p(N);
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
// Define the normalization factor
|
||||
ValueType normFactor = 1.0;
|
||||
|
||||
# include "buildNormFactor.H"
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
// p <- z
|
||||
cusp::blas::copy(z, p);
|
||||
|
||||
// rz = <r^H, z>
|
||||
ValueType rz = cusp::blas::dotc(r, z);
|
||||
|
||||
ValueType normR = cusp::blas::nrm2(r)/normFactor;
|
||||
ValueType normR0 = normR; // Initial residual
|
||||
solverPerf->iRes = normR0;
|
||||
int count = 0;
|
||||
|
||||
while
|
||||
(
|
||||
normR > (solverPerf->tol)
|
||||
&& count < (solverPerf->maxIter)
|
||||
&& normR/normR0 >= (solverPerf->relTol)
|
||||
|| count < solverPerf->minIter
|
||||
)
|
||||
{
|
||||
// y <- Ap
|
||||
cusp::multiply(A, p, y);
|
||||
|
||||
// alpha <- <r,z>/<y,p>
|
||||
ValueType alpha = rz / cusp::blas::dotc(y, p);
|
||||
|
||||
// x <- x + alpha * p
|
||||
cusp::blas::axpy(p, X, alpha);
|
||||
|
||||
// r <- r - alpha * y
|
||||
cusp::blas::axpy(y, r, -alpha);
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
ValueType rz_old = rz;
|
||||
|
||||
// rz = <r^H, z>
|
||||
rz = cusp::blas::dotc(r, z);
|
||||
|
||||
// beta <- <r_{i+1},r_{i+1}>/<r,r>
|
||||
ValueType beta = rz / rz_old;
|
||||
|
||||
// p <- r + beta*p should be p <- z + beta*p
|
||||
cusp::blas::axpby(z, p, p, ValueType(1), beta);
|
||||
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
}
|
||||
// End the krylov solver
|
||||
|
||||
// Final residual
|
||||
solverPerf->fRes = normR;
|
||||
solverPerf->nIterations = count;
|
||||
|
||||
// Converged?
|
||||
if
|
||||
(
|
||||
solverPerf->fRes<=solverPerf->tol
|
||||
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
|
||||
)
|
||||
{
|
||||
solverPerf->converged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
solverPerf->converged = false;
|
||||
}
|
||||
|
||||
// Pass the solution vector back
|
||||
ces->X = X;
|
||||
}
|
174
src/cudaSolvers/cudaCG/cgAmg.cu
Normal file
174
src/cudaSolvers/cudaCG/cgAmg.cu
Normal file
|
@ -0,0 +1,174 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
Modifications by Dominik Christ, Wikki Ltd.
|
||||
|
||||
Description
|
||||
diagonal preconditioned conjugate gradient
|
||||
solver for symmetric Matrices using a CUSP CUDA™ based solver.
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
#include "cudaTypes.H"
|
||||
|
||||
//CUSP Includes
|
||||
#include <cusp/detail/config.h>
|
||||
#include <cusp/verify.h>
|
||||
#include <cusp/precond/aggregation/smoothed_aggregation.h>
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/blas.h>
|
||||
#include <cusp/multiply.h>
|
||||
|
||||
extern "C" void cgAmg
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf,
|
||||
ValueType theta
|
||||
)
|
||||
{
|
||||
// Populate A
|
||||
# include "fillCOOMatrix.H"
|
||||
|
||||
// Set storage
|
||||
// #include "cufflink/setGPUStorage.H"
|
||||
|
||||
if(solverPerf->debugCusp)
|
||||
{
|
||||
std::cout << " Using amg preconditioner\n";
|
||||
}
|
||||
|
||||
cusp::precond::aggregation::
|
||||
smoothed_aggregation<IndexType, ValueType, MemorySpace> M(A);
|
||||
|
||||
// Start the krylov solver
|
||||
assert(A.num_rows == A.num_cols); // sanity check
|
||||
|
||||
const size_t N = A.num_rows;
|
||||
|
||||
// allocate workspace
|
||||
cusp::array1d<ValueType,MemorySpace> y(N);
|
||||
cusp::array1d<ValueType,MemorySpace> z(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r(N);
|
||||
cusp::array1d<ValueType,MemorySpace> p(N);
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
//define the normalization factor
|
||||
ValueType normFactor = 1.0;
|
||||
|
||||
# include "buildNormFactor.H"
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
// p <- z
|
||||
cusp::blas::copy(z, p);
|
||||
|
||||
// rz = <r^H, z>
|
||||
ValueType rz = cusp::blas::dotc(r, z);
|
||||
|
||||
ValueType normR = cusp::blas::nrm2(r)/normFactor;
|
||||
ValueType normR0 = normR;//initial residual
|
||||
solverPerf->iRes = normR0;
|
||||
int count = 0;
|
||||
|
||||
while
|
||||
(
|
||||
normR > (solverPerf->tol)
|
||||
&& count < (solverPerf->maxIter)
|
||||
&& normR/normR0 >= (solverPerf->relTol)
|
||||
|| count < solverPerf->minIter
|
||||
)
|
||||
{
|
||||
// y <- Ap
|
||||
cusp::multiply(A, p, y);
|
||||
|
||||
// alpha <- <r,z>/<y,p>
|
||||
ValueType alpha = rz / cusp::blas::dotc(y, p);
|
||||
|
||||
// x <- x + alpha * p
|
||||
cusp::blas::axpy(p, X, alpha);
|
||||
|
||||
// r <- r - alpha * y
|
||||
cusp::blas::axpy(y, r, -alpha);
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
ValueType rz_old = rz;
|
||||
|
||||
// rz = <r^H, z>
|
||||
rz = cusp::blas::dotc(r, z);
|
||||
|
||||
// beta <- <r_{i+1},r_{i+1}>/<r,r>
|
||||
ValueType beta = rz / rz_old;
|
||||
|
||||
// p <- r + beta*p should be p <- z + beta*p
|
||||
cusp::blas::axpby(z, p, p, ValueType(1), beta);
|
||||
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
}
|
||||
// End the krylov solver
|
||||
|
||||
//final residual
|
||||
solverPerf->fRes = normR;
|
||||
solverPerf->nIterations = count;
|
||||
|
||||
//converged?
|
||||
if
|
||||
(
|
||||
solverPerf->fRes<=solverPerf->tol
|
||||
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
|
||||
)
|
||||
{
|
||||
solverPerf->converged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
solverPerf->converged = false;
|
||||
}
|
||||
|
||||
// Pass the solution vector back
|
||||
ces->X = X;
|
||||
}
|
173
src/cudaSolvers/cudaCG/cgDiag.cu
Normal file
173
src/cudaSolvers/cudaCG/cgDiag.cu
Normal file
|
@ -0,0 +1,173 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
Modifications by Dominik Christ, Wikki Ltd.
|
||||
|
||||
Description
|
||||
diagonal preconditioned conjugate gradient
|
||||
solver for symmetric Matrices using a CUSP CUDA™ based solver.
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
#include "cudaTypes.H"
|
||||
|
||||
//CUSP Includes
|
||||
#include <cusp/detail/config.h>
|
||||
#include <cusp/verify.h>
|
||||
#include <cusp/precond/diagonal.h>
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/ell_matrix.h>
|
||||
#include <cusp/blas.h>
|
||||
#include <cusp/multiply.h>
|
||||
|
||||
extern "C" void cgDiag
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf
|
||||
)
|
||||
{
|
||||
// Populate A
|
||||
# include "fillCOOMatrix.H"
|
||||
|
||||
// Set storage
|
||||
// #include "cufflink/setGPUStorage.H"
|
||||
|
||||
if(solverPerf->debugCusp)
|
||||
{
|
||||
std::cout << " Using Cusp_Diagonal preconditioner\n";
|
||||
}
|
||||
|
||||
cusp::precond::diagonal<ValueType, MemorySpace> M(A);
|
||||
|
||||
// Start the krylov solver
|
||||
assert(A.num_rows == A.num_cols); // sanity check
|
||||
|
||||
const size_t N = A.num_rows;
|
||||
|
||||
// allocate workspace
|
||||
cusp::array1d<ValueType,MemorySpace> y(N);
|
||||
cusp::array1d<ValueType,MemorySpace> z(N);
|
||||
cusp::array1d<ValueType,MemorySpace> r(N);
|
||||
cusp::array1d<ValueType,MemorySpace> p(N);
|
||||
|
||||
// y <- Ax
|
||||
cusp::multiply(A, X, y);
|
||||
|
||||
//define the normalization factor
|
||||
ValueType normFactor = 1.0;
|
||||
|
||||
#include "buildNormFactor.H"
|
||||
|
||||
// r <- b - A*x
|
||||
cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
// p <- z
|
||||
cusp::blas::copy(z, p);
|
||||
|
||||
// rz = <r^H, z>
|
||||
ValueType rz = cusp::blas::dotc(r, z);
|
||||
|
||||
ValueType normR = cusp::blas::nrm2(r)/normFactor;
|
||||
ValueType normR0 = normR; // Initial residual
|
||||
solverPerf->iRes = normR0;
|
||||
int count = 0;
|
||||
|
||||
while
|
||||
(
|
||||
normR > (solverPerf->tol)
|
||||
&& count < (solverPerf->maxIter)
|
||||
&& normR/normR0 >= (solverPerf->relTol)
|
||||
|| count < solverPerf->minIter
|
||||
)
|
||||
{
|
||||
// y <- Ap
|
||||
cusp::multiply(A, p, y);
|
||||
|
||||
// alpha <- <r,z>/<y,p>
|
||||
ValueType alpha = rz / cusp::blas::dotc(y, p);
|
||||
|
||||
// x <- x + alpha * p
|
||||
cusp::blas::axpy(p, X, alpha);
|
||||
|
||||
// r <- r - alpha * y
|
||||
cusp::blas::axpy(y, r, -alpha);
|
||||
|
||||
// z <- M*r
|
||||
cusp::multiply(M, r, z);
|
||||
|
||||
ValueType rz_old = rz;
|
||||
|
||||
// rz = <r^H, z>
|
||||
rz = cusp::blas::dotc(r, z);
|
||||
|
||||
// beta <- <r_{i+1},r_{i+1}>/<r,r>
|
||||
ValueType beta = rz / rz_old;
|
||||
|
||||
// p <- r + beta*p should be p <- z + beta*p
|
||||
cusp::blas::axpby(z, p, p, ValueType(1), beta);
|
||||
|
||||
normR = cusp::blas::nrm2(r)/normFactor;
|
||||
|
||||
count++;
|
||||
}
|
||||
// End the krylov solver
|
||||
|
||||
// Final residual
|
||||
solverPerf->fRes = normR;
|
||||
solverPerf->nIterations = count;
|
||||
|
||||
// Converged?
|
||||
if
|
||||
(
|
||||
solverPerf->fRes<=solverPerf->tol
|
||||
|| solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
|
||||
)
|
||||
{
|
||||
solverPerf->converged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
solverPerf->converged=false;
|
||||
}
|
||||
|
||||
// Pass the solution vector back
|
||||
ces->X = X;
|
||||
}
|
177
src/cudaSolvers/cudaCG/cudaCG.C
Normal file
177
src/cudaSolvers/cudaCG/cudaCG.C
Normal file
|
@ -0,0 +1,177 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#include "cudaCG.H"
|
||||
|
||||
// Preconditioner and solver are hardwired due to
|
||||
// code structure of Cusp library. Below are
|
||||
// external functions with solver/preconditioner combinations
|
||||
|
||||
// CG with diagonal preconditioning
|
||||
extern "C" void cgDiag
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverParam
|
||||
);
|
||||
|
||||
// CG with Ainv preconditioning
|
||||
extern "C" void cgAinv
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverPerf,
|
||||
ValueType drop_tolerance,
|
||||
int nonzero_per_row,
|
||||
bool lin_dropping,
|
||||
int lin_param
|
||||
);
|
||||
|
||||
// CG with amg preconditioning
|
||||
extern "C" void cgAmg
|
||||
(
|
||||
cuspEquationSystem* ces,
|
||||
cudaSolverPerformance* solverParam,
|
||||
ValueType theta
|
||||
);
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
defineTypeNameAndDebug(cudaCG, 0);
|
||||
|
||||
lduSolver::addsymMatrixConstructorToTable<cudaCG>
|
||||
addcudaCGSymMatrixConstructorToTable_;
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
|
||||
|
||||
// - Construct from matrix and solver data stream
|
||||
Foam::cudaCG::cudaCG
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
)
|
||||
:
|
||||
cudaSolver
|
||||
(
|
||||
fieldName,
|
||||
matrix,
|
||||
coupleBouCoeffs,
|
||||
coupleIntCoeffs,
|
||||
interfaces,
|
||||
dict
|
||||
)
|
||||
{}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
Foam::lduSolverPerformance Foam::cudaCG::solve
|
||||
(
|
||||
scalarField& x,
|
||||
const scalarField& b,
|
||||
const direction cmpt
|
||||
) const
|
||||
{
|
||||
// Initialize Cusp solver perfomance
|
||||
cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
|
||||
solverPerf.minIter = minIter(); // Minimum iterations
|
||||
solverPerf.maxIter = maxIter(); // Maximum iterations
|
||||
solverPerf.relTol = relTolerance(); // Relative tolerance
|
||||
solverPerf.tol = tolerance(); // Tolerance
|
||||
|
||||
if (lduMatrix::debug >= 2)
|
||||
{
|
||||
solverPerf.debugCusp = true;
|
||||
}
|
||||
|
||||
// Initialize and copy matrix data to GPU
|
||||
cuspEquationSystem ces = createSymCuspMatrix(matrix(), x, b);
|
||||
|
||||
// Call solver externally
|
||||
word preconName(dict().lookup("preconditioner"));
|
||||
if (preconName == "diagonal")
|
||||
{
|
||||
cgDiag(&ces, &solverPerf);
|
||||
}
|
||||
else if(preconName == "Ainv")
|
||||
{
|
||||
cgAinv
|
||||
(
|
||||
&ces,
|
||||
&solverPerf,
|
||||
dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
|
||||
dict().lookupOrDefault<label>("nonzeroPerRow", -1),
|
||||
dict().lookupOrDefault<bool>("LinDropping", false),
|
||||
dict().lookupOrDefault<label>("LinParameter", 1)
|
||||
);
|
||||
}
|
||||
else if(preconName == "amg")
|
||||
{
|
||||
cgAmg
|
||||
(
|
||||
&ces,
|
||||
&solverPerf,
|
||||
dict().lookupOrDefault<scalar>("theta", 0)
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
FatalErrorIn("cudaCG::solver()")
|
||||
<< "Unknown preconditioner name. "
|
||||
<< "Options are:" << nl
|
||||
<< "(" << nl
|
||||
<< "diagonal" << nl
|
||||
<< "Ainv" << nl
|
||||
<< "amg" << nl
|
||||
<< ")" << nl
|
||||
<< abort(FatalError);
|
||||
}
|
||||
|
||||
// Copy the x vector back to Openfoam
|
||||
thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
|
||||
|
||||
// Return solver output
|
||||
return lduSolverPerformance
|
||||
(
|
||||
typeName,
|
||||
fieldName(),
|
||||
solverPerf.iRes,
|
||||
solverPerf.fRes,
|
||||
solverPerf.nIterations,
|
||||
solverPerf.converged,
|
||||
solverPerf.singular
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
114
src/cudaSolvers/cudaCG/cudaCG.H
Normal file
114
src/cudaSolvers/cudaCG/cudaCG.H
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Class
|
||||
cudaCG
|
||||
|
||||
Description
|
||||
Preconditioned Conjugate Gradient solver with run-time selectable
|
||||
preconditioning for use with GPU
|
||||
|
||||
Author
|
||||
Dominik Christ, Wikki Ltd.
|
||||
Based on Cufflink library by Daniel P. Combest
|
||||
|
||||
SourceFiles
|
||||
cudaCG.C
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef cudaCG_H
|
||||
#define cudaCG_H
|
||||
|
||||
#include "cudaSolver.H"
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
|
||||
/*---------------------------------------------------------------------------*\
|
||||
Class cudaCG Declaration
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
class cudaCG
|
||||
:
|
||||
public cudaSolver
|
||||
{
|
||||
// Private Member Functions
|
||||
|
||||
//- Disallow default bitwise copy construct
|
||||
cudaCG(const cudaCG&);
|
||||
|
||||
//- Disallow default bitwise assignment
|
||||
void operator=(const cudaCG&);
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//- Runtime type information
|
||||
TypeName("cudaCG");
|
||||
|
||||
|
||||
// Constructors
|
||||
|
||||
//- Construct from matrix components and solver data stream
|
||||
cudaCG
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
);
|
||||
|
||||
|
||||
// Destructor
|
||||
|
||||
virtual ~cudaCG()
|
||||
{}
|
||||
|
||||
|
||||
// Member Functions
|
||||
|
||||
//- Solve the matrix with this solver
|
||||
virtual lduSolverPerformance solve
|
||||
(
|
||||
scalarField& x,
|
||||
const scalarField& b,
|
||||
const direction cmpt = 0
|
||||
) const;
|
||||
};
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
} // End namespace Foam
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
#endif
|
||||
|
||||
// ************************************************************************* //
|
239
src/cudaSolvers/cudaSolver/cudaSolver.C
Normal file
239
src/cudaSolvers/cudaSolver/cudaSolver.C
Normal file
|
@ -0,0 +1,239 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#include "cudaSolver.H"
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
defineTypeNameAndDebug(cudaSolver, 0);
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
|
||||
|
||||
Foam::cudaSolver::cudaSolver
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
)
|
||||
:
|
||||
lduSolver
|
||||
(
|
||||
fieldName,
|
||||
matrix,
|
||||
coupleBouCoeffs,
|
||||
coupleIntCoeffs,
|
||||
interfaces,
|
||||
dict
|
||||
)
|
||||
{}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
cuspEquationSystem Foam::cudaSolver::createSymCuspMatrix
|
||||
(
|
||||
const lduMatrix& matrix,
|
||||
const scalarField& x,
|
||||
const scalarField& b
|
||||
) const
|
||||
{
|
||||
cuspEquationSystem ces;
|
||||
ces.nCells = x.size();
|
||||
ces.nFaces = matrix.lower().size();
|
||||
|
||||
ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
|
||||
(
|
||||
ces.nCells,
|
||||
ces.nCells,
|
||||
ces.nCells+2*ces.nFaces
|
||||
);
|
||||
ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
|
||||
ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
|
||||
|
||||
// Copy values of lduMatrix diag to A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.diag().begin(),
|
||||
matrix.diag().end(),
|
||||
ces.A.values.begin()
|
||||
);
|
||||
|
||||
// Copy values of lduMatrix lower to A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lower().begin(),
|
||||
matrix.lower().end(),
|
||||
ces.A.values.begin()+ces.nCells
|
||||
);
|
||||
|
||||
// Since matrix is symmetric, do not copy upper values to save time
|
||||
// -> performed on GPU
|
||||
// Copy row indices of lower into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().upperAddr().begin(),
|
||||
matrix.lduAddr().upperAddr().end(),
|
||||
ces.A.row_indices.begin()+ces.nCells
|
||||
);
|
||||
// Copy column indices of lower into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().lowerAddr().begin(),
|
||||
matrix.lduAddr().lowerAddr().end(),
|
||||
ces.A.column_indices.begin()+ces.nCells
|
||||
);
|
||||
|
||||
// Do not initialize the row and column values of diag to save time
|
||||
// -> performed on GPU
|
||||
// Copy x of lower into x vector
|
||||
thrust::copy(x.begin(), x.end(), ces.X.begin());
|
||||
// Copy b of lower into b vector
|
||||
thrust::copy(b.begin(), b.end(), ces.B.begin());
|
||||
|
||||
return ces;
|
||||
}
|
||||
|
||||
cuspEquationSystem Foam::cudaSolver::createAsymCuspMatrix
|
||||
(
|
||||
const lduMatrix& matrix,
|
||||
const scalarField& x,
|
||||
const scalarField& b
|
||||
) const
|
||||
{
|
||||
cuspEquationSystem ces;
|
||||
ces.nCells = x.size();
|
||||
ces.nFaces = matrix.lower().size();
|
||||
|
||||
ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
|
||||
(
|
||||
ces.nCells,
|
||||
ces.nCells,
|
||||
ces.nCells + 2*ces.nFaces
|
||||
);
|
||||
ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
|
||||
ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
|
||||
|
||||
// Copy values from the lduMatrix to our equation system
|
||||
// Copy values of lduMatrix diag to A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.diag().begin(),
|
||||
matrix.diag().end(),
|
||||
ces.A.values.begin()
|
||||
);
|
||||
|
||||
// Copy values of lduMatrix lower to A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lower().begin(),
|
||||
matrix.lower().end(),
|
||||
ces.A.values.begin() + ces.nCells
|
||||
);
|
||||
|
||||
// Copy values of lduMatrix upper to A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.upper().begin(),
|
||||
matrix.upper().end(),
|
||||
ces.A.values.begin() + ces.nCells + ces.nFaces
|
||||
);
|
||||
|
||||
// Copy row and column indices of lower and upper to our equations system
|
||||
// do not initialize the row and column values of diag to save time
|
||||
// -> performed on GPU
|
||||
|
||||
// Copy row indices of lower into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().upperAddr().begin(),
|
||||
matrix.lduAddr().upperAddr().end(),
|
||||
ces.A.row_indices.begin() + ces.nCells
|
||||
);
|
||||
|
||||
// Copy column indices of lower into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().lowerAddr().begin(),
|
||||
matrix.lduAddr().lowerAddr().end(),
|
||||
ces.A.column_indices.begin() + ces.nCells
|
||||
);
|
||||
|
||||
// Copy row indices of upper into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().lowerAddr().begin(),
|
||||
matrix.lduAddr().lowerAddr().end(),
|
||||
ces.A.row_indices.begin() + ces.nCells + ces.nFaces
|
||||
);
|
||||
|
||||
// Copy column indices of upper into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
matrix.lduAddr().upperAddr().begin(),
|
||||
matrix.lduAddr().upperAddr().end(),
|
||||
ces.A.column_indices.begin() + ces.nCells + ces.nFaces
|
||||
);
|
||||
|
||||
// Copy x of lower into x vector
|
||||
thrust::copy(x.begin(), x.end(), ces.X.begin());
|
||||
// Copy b of lower into b vector
|
||||
thrust::copy(b.begin(), b.end(), ces.B.begin());
|
||||
return ces;
|
||||
}
|
||||
|
||||
cudaSolverPerformance Foam::cudaSolver::cudaSolverPerformanceDefault() const
|
||||
{
|
||||
cudaSolverPerformance csp;
|
||||
|
||||
csp.minIter = 0;
|
||||
csp.maxIter = 1000;
|
||||
csp.relTol = 0;
|
||||
csp.tol = 1e-6;
|
||||
|
||||
csp.nIterations = 0;
|
||||
csp.iRes = -1;
|
||||
csp.fRes = -1;
|
||||
csp.converged = false;
|
||||
csp.singular = false;
|
||||
|
||||
csp.debugCusp = false;
|
||||
|
||||
return csp;
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
|
||||
// ************************************************************************* //
|
136
src/cudaSolvers/cudaSolver/cudaSolver.H
Normal file
136
src/cudaSolvers/cudaSolver/cudaSolver.H
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Class
|
||||
Foam::cudaSolver
|
||||
|
||||
Description
|
||||
Base class for GPU based solvers
|
||||
|
||||
SourceFiles
|
||||
cudaSolver.C
|
||||
|
||||
Author
|
||||
Dominik Christ, Wikki Ltd.
|
||||
Based on Cufflink library by Daniel P. Combest
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef cudaSolver_H
|
||||
#define cudaSolver_H
|
||||
|
||||
#include "cudaTypes.H"
|
||||
#include "lduMatrix.H"
|
||||
#include "autoPtr.H"
|
||||
#include "runTimeSelectionTables.H"
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*\
|
||||
Class cudaSolver Declaration
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
class cudaSolver
|
||||
:
|
||||
public lduSolver
|
||||
{
|
||||
|
||||
// Private Member Functions
|
||||
|
||||
//- Disallow default bitwise copy construct
|
||||
cudaSolver(const cudaSolver&);
|
||||
|
||||
//- Disallow default bitwise assignment
|
||||
void operator=(const cudaSolver&);
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//- Runtime type information
|
||||
TypeName("cudaSolver");
|
||||
|
||||
|
||||
// Constructors
|
||||
|
||||
//- Construct from components
|
||||
cudaSolver
|
||||
(
|
||||
const word& fieldName,
|
||||
const lduMatrix& matrix,
|
||||
const FieldField<Field, scalar>& coupleBouCoeffs,
|
||||
const FieldField<Field, scalar>& coupleIntCoeffs,
|
||||
const lduInterfaceFieldPtrsList& interfaces,
|
||||
const dictionary& dict
|
||||
);
|
||||
|
||||
|
||||
// Destructor
|
||||
|
||||
virtual ~cudaSolver()
|
||||
{}
|
||||
|
||||
|
||||
// Member Functions
|
||||
//- Solve the matrix with this solver
|
||||
virtual lduSolverPerformance solve
|
||||
(
|
||||
scalarField& x,
|
||||
const scalarField& b,
|
||||
const direction cmpt = 0
|
||||
) const = 0;
|
||||
|
||||
//- Create COO matrix from symmetric lduMatrix
|
||||
cuspEquationSystem createSymCuspMatrix
|
||||
(
|
||||
const lduMatrix& matrix,
|
||||
const scalarField& x,
|
||||
const scalarField& b
|
||||
) const;
|
||||
|
||||
//- Create COO matrix from asymmetric lduMatrix
|
||||
cuspEquationSystem createAsymCuspMatrix
|
||||
(
|
||||
const lduMatrix& matrix,
|
||||
const scalarField& x,
|
||||
const scalarField& b
|
||||
) const;
|
||||
|
||||
cudaSolverPerformance cudaSolverPerformanceDefault() const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
} // End namespace Foam
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
#endif
|
||||
|
||||
// ************************************************************************* //
|
99
src/cudaSolvers/cudaSolver/cudaTypes.H
Normal file
99
src/cudaSolvers/cudaSolver/cudaTypes.H
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*---------------------------------------------------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration |
|
||||
\\ / A nd | Copyright held by original author
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
|
||||
OpenFOAM is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OpenFOAM; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Class
|
||||
Foam::cudaSolver
|
||||
|
||||
Description
|
||||
Base class for GPU based solvers
|
||||
|
||||
SourceFiles
|
||||
cudaSolver.C
|
||||
|
||||
Author
|
||||
Dominik Christ, Wikki Ltd.
|
||||
Based on Cufflink library by Daniel P. Combest
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
#ifndef cudaTypes_H
|
||||
#define cudaTypes_H
|
||||
|
||||
#if defined(WM_DP)
|
||||
|
||||
#define MPI_SCALAR MPI_DOUBLE
|
||||
typedef double ValueType;
|
||||
|
||||
#elif defined(WM_SP)
|
||||
|
||||
#define MPI_SCALAR MPI_FLOAT
|
||||
typedef float ValueType;
|
||||
|
||||
#endif
|
||||
|
||||
#include <cusp/coo_matrix.h>
|
||||
#include <cusp/memory.h>
|
||||
#include <thrust/host_vector.h>
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
typedef cusp::device_memory MemorySpace;
|
||||
typedef cusp::host_memory hostMemorySpace;
|
||||
typedef int IndexType;
|
||||
|
||||
// used to prevent floating point exception
|
||||
const ValueType SMALL = 1e-20;
|
||||
|
||||
|
||||
struct cuspEquationSystem
|
||||
{
|
||||
cusp::coo_matrix<IndexType,ValueType,hostMemorySpace> A;
|
||||
cusp::array1d< ValueType, hostMemorySpace> X;
|
||||
cusp::array1d< ValueType, hostMemorySpace> B;
|
||||
int nCells;
|
||||
int nFaces;
|
||||
};
|
||||
|
||||
|
||||
struct cudaSolverPerformance
|
||||
{
|
||||
int minIter;
|
||||
int maxIter;
|
||||
double relTol;
|
||||
double tol;
|
||||
|
||||
int nIterations;
|
||||
double iRes;
|
||||
double fRes;
|
||||
bool converged;
|
||||
bool singular;
|
||||
|
||||
bool debugCusp;
|
||||
};
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||
|
||||
#endif
|
||||
|
||||
// ************************************************************************* //
|
85
src/cudaSolvers/include/buildNormFactor.H
Normal file
85
src/cudaSolvers/include/buildNormFactor.H
Normal file
|
@ -0,0 +1,85 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C++ programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
|
||||
Description
|
||||
Builds normalization factor in exactly teh same way OpenFOAM® does
|
||||
in order to normalize the residual to 1. See provided
|
||||
documentation for exact formulation of normalization factor
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
|
||||
// Build the normfactor as OF-1.6-ext does in file
|
||||
// ($FOAM_SRC/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixSolver.C)
|
||||
|
||||
//all variables will be deleted once out of scope
|
||||
{
|
||||
|
||||
//compute average of x vector
|
||||
ValueType xRef = thrust::reduce
|
||||
(
|
||||
X.begin(),
|
||||
X.begin() + A.num_rows
|
||||
)/ValueType(A.num_rows);
|
||||
|
||||
//vector of average X values
|
||||
cusp::array1d<ValueType,MemorySpace> xBar(N,xRef);
|
||||
|
||||
//holds A*xBar
|
||||
cusp::array1d<ValueType,MemorySpace> yBar(N);
|
||||
|
||||
//compute ybar<-A*xBar
|
||||
cusp::multiply(A, xBar, yBar);
|
||||
|
||||
cusp::array1d<ValueType,MemorySpace> ymyBar(N); // holds y - yBar
|
||||
cusp::array1d<ValueType,MemorySpace> bmyBar(N); // holds b - yBar
|
||||
|
||||
// Calculate: ymyBar <- y - yBar
|
||||
cusp::blas::axpby(y, yBar, ymyBar, ValueType(1), ValueType(-1));
|
||||
// Calculate: bmyBar <- b - yBar
|
||||
cusp::blas::axpby(B, yBar, bmyBar, ValueType(1), ValueType(-1));
|
||||
|
||||
//compute norm factor exactly as OpenFOAM does
|
||||
normFactor = cusp::blas::nrm2(ymyBar) + cusp::blas::nrm2(bmyBar) + SMALL;
|
||||
|
||||
if (solverPerf->debugCusp)
|
||||
{
|
||||
std::cout<<" Normalisation factor = "<<normFactor<<"\n";
|
||||
}
|
||||
}
|
85
src/cudaSolvers/include/fillCOOMatrix.H
Normal file
85
src/cudaSolvers/include/fillCOOMatrix.H
Normal file
|
@ -0,0 +1,85 @@
|
|||
/**********************************************************************\
|
||||
______ __ __ _______ _______ __ __ __ __ __ ___
|
||||
/ || | | | | ____|| ____|| | | | | \ | | | |/ /
|
||||
| ,----'| | | | | |__ | |__ | | | | | \| | | ' /
|
||||
| | | | | | | __| | __| | | | | | . ` | | <
|
||||
| `----.| `--' | | | | | | `----.| | | |\ | | . \
|
||||
\______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\
|
||||
|
||||
Cuda For FOAM Link
|
||||
|
||||
cufflink is a library for linking numerical methods based on Nvidia's
|
||||
Compute Unified Device Architecture (CUDA™) C/C + + programming language
|
||||
and OpenFOAM®.
|
||||
|
||||
Please note that cufflink is not approved or endorsed by OpenCFD®
|
||||
Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
|
||||
producer of OpenFOAM® software.
|
||||
|
||||
The official web-site of OpenCFD® Limited is www.openfoam.com .
|
||||
|
||||
------------------------------------------------------------------------
|
||||
This file is part of cufflink.
|
||||
|
||||
cufflink is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
cufflink is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with cufflink. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
Daniel P. Combest. All rights reserved.
|
||||
|
||||
Description
|
||||
Populate the COO matrix
|
||||
|
||||
\**********************************************************************/
|
||||
|
||||
// Fill in the rest of the diag (rows and col),
|
||||
// upper and upper.rows and upper.cols
|
||||
cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
|
||||
cusp::array1d<ValueType, MemorySpace> X(ces->X);
|
||||
cusp::array1d<ValueType, MemorySpace> B(ces->B);
|
||||
|
||||
// Determine row indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence(A.row_indices.begin(), A.row_indices.begin() + ces->nCells);
|
||||
|
||||
// Determine column indices of diagonal values and fill A COO matrix
|
||||
thrust::sequence
|
||||
(
|
||||
A.column_indices.begin(),
|
||||
A.column_indices.begin() + ces->nCells
|
||||
);
|
||||
|
||||
// Copy values of lower into upper in COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
A.values.begin() + ces->nCells,
|
||||
A.values.begin() + ces->nCells + ces->nFaces,
|
||||
A.values.begin() + ces->nCells + ces->nFaces
|
||||
);
|
||||
|
||||
// Copy row indices of lower to columns of upper into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
A.row_indices.begin() + ces->nCells,
|
||||
A.row_indices.begin() + ces->nCells + ces->nFaces,
|
||||
A.column_indices.begin() + ces->nCells + ces->nFaces
|
||||
);
|
||||
|
||||
// Copy column indices of lower to rows of upper into A COO matrix
|
||||
thrust::copy
|
||||
(
|
||||
A.column_indices.begin() + ces->nCells,
|
||||
A.column_indices.begin() + ces->nCells + ces->nFaces,
|
||||
A.row_indices.begin() + ces->nCells + ces->nFaces
|
||||
);
|
||||
|
||||
A.sort_by_row_and_column(); // Speeds code up a little bit more
|
|
@ -8,3 +8,4 @@ include $(GENERAL_RULES)/standard
|
|||
include $(RULES)/X
|
||||
include $(RULES)/c
|
||||
include $(RULES)/c++
|
||||
include $(RULES)/nvcc
|
||||
|
|
Reference in a new issue