From db4d4cefe7deb048ed430fe27bf04a873948e58e Mon Sep 17 00:00:00 2001 From: Hrvoje Jasak Date: Fri, 11 Oct 2013 14:12:56 +0100 Subject: [PATCH] Hand-merged CUDA support --- etc/prefs.csh-EXAMPLE | 10 +- etc/prefs.sh-EXAMPLE | 10 +- etc/settings.csh | 21 +- etc/settings.sh | 14 +- src/Allwmake | 3 + src/cudaSolvers/Allwmake | 13 ++ src/cudaSolvers/Make/files | 12 + src/cudaSolvers/Make/options | 5 + src/cudaSolvers/cudaBiCGStab/bicgAinv.cu | 246 ++++++++++++++++++++ src/cudaSolvers/cudaBiCGStab/bicgDiag.cu | 235 +++++++++++++++++++ src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C | 168 +++++++++++++ src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H | 114 +++++++++ src/cudaSolvers/cudaCG/cgAinv.cu | 184 +++++++++++++++ src/cudaSolvers/cudaCG/cgAmg.cu | 174 ++++++++++++++ src/cudaSolvers/cudaCG/cgDiag.cu | 173 ++++++++++++++ src/cudaSolvers/cudaCG/cudaCG.C | 177 ++++++++++++++ src/cudaSolvers/cudaCG/cudaCG.H | 114 +++++++++ src/cudaSolvers/cudaSolver/cudaSolver.C | 239 +++++++++++++++++++ src/cudaSolvers/cudaSolver/cudaSolver.H | 136 +++++++++++ src/cudaSolvers/cudaSolver/cudaTypes.H | 99 ++++++++ src/cudaSolvers/include/buildNormFactor.H | 85 +++++++ src/cudaSolvers/include/fillCOOMatrix.H | 85 +++++++ wmake/rules/linux64Gcc/general | 1 + 23 files changed, 2313 insertions(+), 5 deletions(-) create mode 100755 src/cudaSolvers/Allwmake create mode 100644 src/cudaSolvers/Make/files create mode 100644 src/cudaSolvers/Make/options create mode 100644 src/cudaSolvers/cudaBiCGStab/bicgAinv.cu create mode 100644 src/cudaSolvers/cudaBiCGStab/bicgDiag.cu create mode 100644 src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C create mode 100644 src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H create mode 100644 src/cudaSolvers/cudaCG/cgAinv.cu create mode 100644 src/cudaSolvers/cudaCG/cgAmg.cu create mode 100644 src/cudaSolvers/cudaCG/cgDiag.cu create mode 100644 src/cudaSolvers/cudaCG/cudaCG.C create mode 100644 src/cudaSolvers/cudaCG/cudaCG.H create mode 100644 src/cudaSolvers/cudaSolver/cudaSolver.C create mode 100644 src/cudaSolvers/cudaSolver/cudaSolver.H create mode 100644 src/cudaSolvers/cudaSolver/cudaTypes.H create mode 100644 src/cudaSolvers/include/buildNormFactor.H create mode 100644 src/cudaSolvers/include/fillCOOMatrix.H diff --git a/etc/prefs.csh-EXAMPLE b/etc/prefs.csh-EXAMPLE index f6df78398..1fea9faa0 100644 --- a/etc/prefs.csh-EXAMPLE +++ b/etc/prefs.csh-EXAMPLE @@ -2,7 +2,7 @@ # ========= | # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox # \\ / O peration | -# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. +# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak # \\/ M anipulation | #------------------------------------------------------------------------------ # License @@ -62,6 +62,14 @@ # So build your ThirdParty directory accordingly. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# System installed CUDA +#setenv CUDA_SYSTEM 1 +#setenv CUDA_DIR path_to_system_installed_cuda +#setenv CUDA_BIN_DIR $CUDA_DIR/bin +#setenv CUDA_LIB_DIR $CUDA_DIR/lib +#setenv CUDA_INCLUDE_DIR $CUDA_DIR/include +#setenv CUDA_ARCH sm_20 + # System installed Mesquite #setenv MESQUITE_SYSTEM 1 #setenv MESQUITE_DIR path_to_system_installed_mesquite diff --git a/etc/prefs.sh-EXAMPLE b/etc/prefs.sh-EXAMPLE index 080826808..cf377cc21 100644 --- a/etc/prefs.sh-EXAMPLE +++ b/etc/prefs.sh-EXAMPLE @@ -2,7 +2,7 @@ # ========= | # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox # \\ / O peration | -# \\ / A nd | Copyright (C) 2010-2010 OpenCFD Ltd. +# \\ / A nd | Copyright (C) 2013 Hrvoje Jasak # \\/ M anipulation | #------------------------------------------------------------------------------ # License @@ -64,6 +64,14 @@ compilerInstall=System # So build your ThirdParty directory accordingly. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# System installed CUDA +#export CUDA_SYSTEM=1 +#export CUDA_DIR=path_to_system_installed_cuda +#export CUDA_BIN_DIR=$CUDA_DIR/bin +#export CUDA_LIB_DIR=$CUDA_DIR/lib +#export CUDA_INCLUDE_DIR=$CUDA_DIR/include +#export CUDA_ARCH=sm_20 + # System installed Mesquite #export MESQUITE_SYSTEM=1 #export MESQUITE_DIR=path_to_system_installed_mesquite diff --git a/etc/settings.csh b/etc/settings.csh index c37b66701..97db1a8c1 100644 --- a/etc/settings.csh +++ b/etc/settings.csh @@ -211,11 +211,11 @@ case SYSTEMOPENMPI: endif else # Here, we assume your environment is already set for running - # and developping with openmpi. + # and developing with openmpi. # # Initialize OPENMPI_BIN_DIR using the path to mpicc set mpicc_cmd=`which mpicc` - setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd` + setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd` unset mpicc_cmd endif @@ -382,6 +382,23 @@ else setenv MPI_BUFFER_SIZE $minBufferSize endif +# CUDA library +# ~~~~~~~~~~~~ +if ( $?CUDA_SYSTEM == 0 && -e /usr/local/cuda-5.5/bin/nvcc ) then + setenv CUDA_DIR /usr/local/cuda-5.5 + setenv CUDA_BIN_DIR $CUDA_DIR/bin + setenv CUDA_LIB_DIR $CUDA_DIR/lib64 + setenv CUDA_INCLUDE_DIR $CUDA_DIR/include + setenv CUDA_ARCH sm_20 +endif + +if ( $?CUDA_BIN_DIR ) then + _foamAddPath $CUDA_BIN_DIR +endif + +if ( $?CUDA_LIB_DIR ) then + _foamAddLib $CUDA_LIB_DIR +endif # CGAL library if available # ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/etc/settings.sh b/etc/settings.sh index d9c15cb9f..a0565c297 100644 --- a/etc/settings.sh +++ b/etc/settings.sh @@ -432,6 +432,18 @@ fi export MPI_BUFFER_SIZE +# CUDA if available +# ~~~~~~~~~~~~~~~~~ +[ -z "$CUDA_SYSTEM" ] && [ -e /usr/local/cuda-5.5/bin/nvcc ] && { + export CUDA_DIR=/usr/local/cuda-5.5 + export CUDA_BIN_DIR=$CUDA_DIR/bin + export CUDA_LIB_DIR=$CUDA_DIR/lib64 + export CUDA_INCLUDE_DIR=$CUDA_DIR/include +} + +[ -d "$CUDA_LIB_DIR" ] && _foamAddPath $CUDA_BIN_DIR && _foamAddLib $CUDA_LIB_DIR + + # CGAL library if available # ~~~~~~~~~~~~~~~~~~~~~~~~~ [ -d "$CGAL_LIB_DIR" ] && _foamAddLib $CGAL_LIB_DIR @@ -472,7 +484,7 @@ export MPI_BUFFER_SIZE # Load Metis library # ~~~~~~~~~~~~~~~~~~ -[ -z "$METIS_SYSTEM" ] && [ -d $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && { +[ -z "$METIS_SYSTEM" ] && [ -e $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && { _foamSource $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS/etc/metis-5.0pre2.sh } [ "$FOAM_VERBOSE" -a "$PS1" ] && echo " METIS_DIR is initialized to: $METIS_DIR" diff --git a/src/Allwmake b/src/Allwmake index 188710d12..c0ad6e69e 100755 --- a/src/Allwmake +++ b/src/Allwmake @@ -68,4 +68,7 @@ wmake libso engine wmake libso equationReader wmake libso multiSolver + +( cd cudaSolvers ; ./Allwmake ) + # ----------------------------------------------------------------- end-of-file diff --git a/src/cudaSolvers/Allwmake b/src/cudaSolvers/Allwmake new file mode 100755 index 000000000..479f97dc5 --- /dev/null +++ b/src/cudaSolvers/Allwmake @@ -0,0 +1,13 @@ +#!/bin/sh +cd ${0%/*} || exit 1 # run from this directory + +if [ -f $CUDA_BIN_DIR/nvcc ] +then + echo "Found nvcc -- enabling CUDA support." + wmake libso +else + echo "No nvcc - CUDA not available." +fi + + +# ----------------------------------------------------------------- end-of-file diff --git a/src/cudaSolvers/Make/files b/src/cudaSolvers/Make/files new file mode 100644 index 000000000..06b2759fc --- /dev/null +++ b/src/cudaSolvers/Make/files @@ -0,0 +1,12 @@ +cudaSolver/cudaSolver.C + +cudaCG/cgDiag.cu +cudaCG/cgAmg.cu +cudaCG/cgAinv.cu +cudaCG/cudaCG.C + +cudaBiCGStab/bicgDiag.cu +cudaBiCGStab/bicgAinv.cu +cudaBiCGStab/cudaBiCGStab.C + +LIB = $(FOAM_LIBBIN)/libcudaSolvers diff --git a/src/cudaSolvers/Make/options b/src/cudaSolvers/Make/options new file mode 100644 index 000000000..d1800d1bb --- /dev/null +++ b/src/cudaSolvers/Make/options @@ -0,0 +1,5 @@ +EXE_INC = \ + -I$(CUDA_INCLUDE_DIR) + +LIB_LIBS = \ + -L$(CUDA_LIB_DIR) -lcudart diff --git a/src/cudaSolvers/cudaBiCGStab/bicgAinv.cu b/src/cudaSolvers/cudaBiCGStab/bicgAinv.cu new file mode 100644 index 000000000..15b6fbdc3 --- /dev/null +++ b/src/cudaSolvers/cudaBiCGStab/bicgAinv.cu @@ -0,0 +1,246 @@ +/**********************************************************************\ + ______ __ __ _______ _______ __ __ __ __ __ ___ + / || | | | | ____|| ____|| | | | | \ | | | |/ / +| ,----'| | | | | |__ | |__ | | | | | \| | | ' / +| | | | | | | __| | __| | | | | | . ` | | < +| `----.| `--' | | | | | | `----.| | | |\ | | . \ + \______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\ + +Cuda For FOAM Link + +cufflink is a library for linking numerical methods based on Nvidia's +Compute Unified Device Architecture (CUDA™) C/C++ programming language +and OpenFOAM®. + +Please note that cufflink is not approved or endorsed by OpenCFD® +Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and +producer of OpenFOAM® software. + +The official web-site of OpenCFD® Limited is www.openfoam.com . + +------------------------------------------------------------------------ +This file is part of cufflink. + + cufflink is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cufflink is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cufflink. If not, see . + + Author + Daniel P. Combest. All rights reserved. + Modifications by Dominik Christ, Wikki Ltd. + + Description + Sparse approximate inverse preconditioned conjugate gradient stabilized + solver for asymmetric Matrices using a CUSP CUDA™ based solver. + +\**********************************************************************/ + + +#include "cudaTypes.H" + +// CUSP Includes +#include +#include +#include +#include +#include +#include + +extern "C" void bicgAinv +( + cuspEquationSystem* ces, + cudaSolverPerformance* solverPerf, + ValueType drop_tolerance, + int nonzero_per_row, + bool lin_dropping, + int lin_param +) +{ + cusp::coo_matrix A(ces->A); + cusp::array1d X(ces->X); + cusp::array1d B(ces->B); + + // Fill in the rest of the diag (rows and col) + // Determine row indices of diagonal values and fill A COO matrix + thrust::sequence + ( + A.row_indices.begin(), + A.row_indices.begin() + ces->nCells + ); + + // Determine column indices of diagonal values and fill A COO matrix + thrust::sequence + ( + A.column_indices.begin(), + A.column_indices.begin() + ces->nCells + ); + + // Sorted coo by row and column. speeds code up a little bit more + A.sort_by_row_and_column(); + + // Set storage + // #include "cufflink/setGPUStorage.H" + + if (solverPerf->debugCusp) + { + std::cout << " Using Ainv preconditioner\n"; + } + + cusp::precond::bridson_ainv M + ( + A, + drop_tolerance, + nonzero_per_row, + lin_dropping, + lin_param + ); + + // Start Krylov solver + assert(A.num_rows == A.num_cols); // Sanity check + + const size_t N = A.num_rows; + + // Allocate workspace + cusp::array1d y(N); + + cusp::array1d p(N); + cusp::array1d r(N); + cusp::array1d r_star(N); + cusp::array1d s(N); + cusp::array1d Mp(N); + cusp::array1d AMp(N); + cusp::array1d Ms(N); + cusp::array1d AMs(N); + + // y <- Ax + cusp::multiply(A, X, y); + + // Define the normalization factor + ValueType normFactor = 1.0; + +# include "buildNormFactor.H" + + // r <- b - A*x + cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1)); + + // p <- r + cusp::blas::copy(r, p); + + // r_star <- r + cusp::blas::copy(r, r_star); + + ValueType r_r_star_old = cusp::blas::dotc(r_star, r); + + + ValueType normR = cusp::blas::nrm2(r)/normFactor; + ValueType normR0 = normR; // Initial residual + solverPerf->iRes = normR0; + int count = 0; + + while + ( + normR > (solverPerf->tol) + && count < (solverPerf->maxIter) + && normR/normR0 >= (solverPerf->relTol) + || count < solverPerf->minIter + ) + { + // Mp = M*p + cusp::multiply(M, p, Mp); + + // AMp = A*Mp + cusp::multiply(A, Mp, AMp); + + // alpha = (r_j, r_star) / (A*M*p, r_star) + ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp); + + // s_j = r_j - alpha * AMp + cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha)); + + ValueType normS = cusp::blas::nrm2(s)/normFactor; + + if + ( + !( + normS > (solverPerf->tol) + && normS/normR0 >= (solverPerf->relTol) + ) + ) + { + // is this right? + // x += alpha*M*p_j + cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha)); + + // y <- Ax + cusp::multiply(A, X, y); + + // r <- b - A*x + cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1)); + // not sure we should be measuring r here...need to look again. + normR = cusp::blas::nrm2(r)/normFactor; + + count++; + + break; + } + + // Ms = M*s_j + cusp::multiply(M, s, Ms); + + // AMs = A*Ms + cusp::multiply(A, Ms, AMs); + + // omega = (AMs, s) / (AMs, AMs) + ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs); + + // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j + cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega); + + // r_{j+1} = s_j - omega*A*M*s + cusp::blas::axpby(s, AMs, r, ValueType(1), -omega); + + // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega) + ValueType r_r_star_new = cusp::blas::dotc(r_star, r); + ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega); + r_r_star_old = r_r_star_new; + + // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p) + cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega); + + // not dure we should be measuring r here...need to look again. + normR = cusp::blas::nrm2(r)/normFactor; + + count++; + } + // End Krylov Solver + + // Final residual + solverPerf->fRes = cusp::blas::nrm2(r)/normFactor; + solverPerf->nIterations = count; + + // converged? + if + ( + solverPerf->fRes<=solverPerf->tol + || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol + ) + { + solverPerf->converged = true; + } + else + { + solverPerf->converged = false; + } + + // Pass the solution vector back + ces->X = X; +} diff --git a/src/cudaSolvers/cudaBiCGStab/bicgDiag.cu b/src/cudaSolvers/cudaBiCGStab/bicgDiag.cu new file mode 100644 index 000000000..2c63150ae --- /dev/null +++ b/src/cudaSolvers/cudaBiCGStab/bicgDiag.cu @@ -0,0 +1,235 @@ +/**********************************************************************\ + ______ __ __ _______ _______ __ __ __ __ __ ___ + / || | | | | ____|| ____|| | | | | \ | | | |/ / +| ,----'| | | | | |__ | |__ | | | | | \| | | ' / +| | | | | | | __| | __| | | | | | . ` | | < +| `----.| `--' | | | | | | `----.| | | |\ | | . \ + \______| \______/ |__| |__| |_______||__| |__| \__| |__|\__\ + +Cuda For FOAM Link + +cufflink is a library for linking numerical methods based on Nvidia's +Compute Unified Device Architecture (CUDA™) C/C++ programming language +and OpenFOAM®. + +Please note that cufflink is not approved or endorsed by OpenCFD® +Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and +producer of OpenFOAM® software. + +The official web-site of OpenCFD® Limited is www.openfoam.com . + +------------------------------------------------------------------------ +This file is part of cufflink. + + cufflink is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cufflink is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cufflink. If not, see . + + Author + Daniel P. Combest. All rights reserved. + Modifications by Dominik Christ, Wikki Ltd. + + Description + Sparse approximate inverse preconditioned conjugate gradient stabilized + solver for asymmetric Matrices using a CUSP CUDA™ based solver. + +\**********************************************************************/ + + +#include "cudaTypes.H" + +// CUSP Includes +#include +#include +#include +#include +#include +#include + +extern "C" void bicgDiag +( + cuspEquationSystem* ces, + cudaSolverPerformance* solverPerf +) +{ + cusp::coo_matrix A(ces->A); + cusp::array1d X(ces->X); + cusp::array1d B(ces->B); + + // Fill in the rest of the diag (rows and col) + // Determine row indices of diagonal values and fill A COO matrix + thrust::sequence + ( + A.row_indices.begin(), + A.row_indices.begin() + ces->nCells + ); + + // Determine column indices of diagonal values and fill A COO matrix + thrust::sequence + ( + A.column_indices.begin(), + A.column_indices.begin() + ces->nCells + ); + + // Sorted coo by row and column. speeds code up a little bit more + A.sort_by_row_and_column(); + + // Set storage +//# include "setGPUStorage.H" + + if (solverPerf->debugCusp) + { + std::cout << " Using Ainv preconditioner\n"; + } + + cusp::precond::diagonal M(A); + + // Start Krylov solver + assert(A.num_rows == A.num_cols); // Sanity check + + const size_t N = A.num_rows; + + // Allocate workspace + cusp::array1d y(N); + + cusp::array1d p(N); + cusp::array1d r(N); + cusp::array1d r_star(N); + cusp::array1d s(N); + cusp::array1d Mp(N); + cusp::array1d AMp(N); + cusp::array1d Ms(N); + cusp::array1d AMs(N); + + // y <- Ax + cusp::multiply(A, X, y); + + // Define the normalization factor + ValueType normFactor = 1.0; + +# include "buildNormFactor.H" + + // r <- b - A*x + cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1)); + + // p <- r + cusp::blas::copy(r, p); + + // r_star <- r + cusp::blas::copy(r, r_star); + + ValueType r_r_star_old = cusp::blas::dotc(r_star, r); + + + ValueType normR = cusp::blas::nrm2(r)/normFactor; + ValueType normR0 = normR; // Initial residual + solverPerf->iRes = normR0; + int count = 0; + + while + ( + normR > (solverPerf->tol) + && count < (solverPerf->maxIter) + && normR/normR0 >= (solverPerf->relTol) + || count < solverPerf->minIter + ) + { + // Mp = M*p + cusp::multiply(M, p, Mp); + + // AMp = A*Mp + cusp::multiply(A, Mp, AMp); + + // alpha = (r_j, r_star) / (A*M*p, r_star) + ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp); + + // s_j = r_j - alpha * AMp + cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha)); + + ValueType normS = cusp::blas::nrm2(s)/normFactor; + + if + ( + !( + normS > (solverPerf->tol) + && normS/normR0 >= (solverPerf->relTol) + ) + ) + { + // is this right? + // x += alpha*M*p_j + cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha)); + + // y <- Ax + cusp::multiply(A, X, y); + + // r <- b - A*x + cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1)); + // not sure we should be measuring r here...need to look again. + normR = cusp::blas::nrm2(r)/normFactor; + + count++; + + break; + } + + // Ms = M*s_j + cusp::multiply(M, s, Ms); + + // AMs = A*Ms + cusp::multiply(A, Ms, AMs); + + // omega = (AMs, s) / (AMs, AMs) + ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs); + + // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j + cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega); + + // r_{j+1} = s_j - omega*A*M*s + cusp::blas::axpby(s, AMs, r, ValueType(1), -omega); + + // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega) + ValueType r_r_star_new = cusp::blas::dotc(r_star, r); + ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega); + r_r_star_old = r_r_star_new; + + // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p) + cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega); + + // not dure we should be measuring r here...need to look again. + normR = cusp::blas::nrm2(r)/normFactor; + + count++; + } + // End Krylov Solver + + // Final residual + solverPerf->fRes = cusp::blas::nrm2(r)/normFactor; + solverPerf->nIterations = count; + + // converged? + if + ( + solverPerf->fRes<=solverPerf->tol + || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol + ) + { + solverPerf->converged = true; + } + else + { + solverPerf->converged = false; + } + + // Pass the solution vector back + ces->X = X; +} diff --git a/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C b/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C new file mode 100644 index 000000000..14039a205 --- /dev/null +++ b/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C @@ -0,0 +1,168 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | + \\ / A nd | Copyright held by original author + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Description + Preconditioned Conjugate Gradient solver with run-time selectable + preconditioning + +Author + Dominik Christ, Wikki Ltd. + Based on Cufflink library by Daniel P. Combest + +\*---------------------------------------------------------------------------*/ + +#include "cudaBiCGStab.H" + +// Preconditioner and solver are hardwired due to +// code structure of Cusp library. Below are +// external functions with solver/preconditioner combinations + +// BiCG with diagonal preconditioning +extern "C" void bicgDiag +( + cuspEquationSystem* ces, + cudaSolverPerformance* solverParam +); + +// BiCG with Ainv preconditioning +extern "C" void bicgAinv +( + cuspEquationSystem* ces, + cudaSolverPerformance* solverPerf, + ValueType drop_tolerance, + int nonzero_per_row, + bool lin_dropping, + int lin_param +); + + +// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // + +namespace Foam +{ + defineTypeNameAndDebug(cudaBiCGStab, 0); + + lduSolver::addasymMatrixConstructorToTable + addcudaBiCGStabAsymMatrixConstructorToTable_; +} + + +// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // + +// - Construct from matrix and solver data stream +Foam::cudaBiCGStab::cudaBiCGStab +( + const word& fieldName, + const lduMatrix& matrix, + const FieldField& coupleBouCoeffs, + const FieldField& coupleIntCoeffs, + const lduInterfaceFieldPtrsList& interfaces, + const dictionary& dict +) +: + cudaSolver + ( + fieldName, + matrix, + coupleBouCoeffs, + coupleIntCoeffs, + interfaces, + dict + ) +{} + + +// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // + +Foam::lduSolverPerformance Foam::cudaBiCGStab::solve +( + scalarField& x, + const scalarField& b, + const direction cmpt +) const +{ + // Initialize Cusp solver perfomance + cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault(); + solverPerf.minIter = minIter(); // Minimum iterations + solverPerf.maxIter = maxIter(); // Maximum iterations + solverPerf.relTol = relTolerance(); // Relative tolerance + solverPerf.tol = tolerance(); // Tolerance + + if (lduMatrix::debug >= 2) + { + solverPerf.debugCusp = true; + } + + // Initialize and copy matrix data to GPU + cuspEquationSystem ces = createAsymCuspMatrix(matrix(), x, b); + + // Call solver externally + word preconName(dict().lookup("preconditioner")); + if (preconName == "diagonal") + { + bicgDiag(&ces, &solverPerf); + } + else if(preconName == "Ainv") + { + bicgAinv + ( + &ces, + &solverPerf, + dict().lookupOrDefault("dropTolerance", 0.1), + dict().lookupOrDefault