Hand-merged CUDA support

2013-10-11 14:12:56 +01:00 · 2013-10-11 14:12:56 +01:00 · db4d4cefe7
commit db4d4cefe7
parent 7e8f06837c
23 changed files with 2313 additions and 5 deletions
--- a/etc/prefs.csh-EXAMPLE
+++ b/etc/prefs.csh-EXAMPLE
@ -2,7 +2,7 @@
 # =========                 |
 # \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
 #  \\    /   O peration     |
-#   \\  /    A nd           | Copyright (C) 2010-2010 OpenCFD Ltd.
+#   \\  /    A nd           | Copyright (C) 2013 Hrvoje Jasak
 #    \\/     M anipulation  |
 #------------------------------------------------------------------------------
 # License
@ -62,6 +62,14 @@
 #     So build your ThirdParty directory accordingly.
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+# System installed CUDA
+#setenv CUDA_SYSTEM 1
+#setenv CUDA_DIR path_to_system_installed_cuda
+#setenv CUDA_BIN_DIR $CUDA_DIR/bin
+#setenv CUDA_LIB_DIR $CUDA_DIR/lib
+#setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
+#setenv CUDA_ARCH sm_20
+
 # System installed Mesquite
 #setenv MESQUITE_SYSTEM 1
 #setenv MESQUITE_DIR path_to_system_installed_mesquite
--- a/etc/prefs.sh-EXAMPLE
+++ b/etc/prefs.sh-EXAMPLE
@ -2,7 +2,7 @@
 # =========                 |
 # \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
 #  \\    /   O peration     |
-#   \\  /    A nd           | Copyright (C) 2010-2010 OpenCFD Ltd.
+#   \\  /    A nd           | Copyright (C) 2013 Hrvoje Jasak
 #    \\/     M anipulation  |
 #------------------------------------------------------------------------------
 # License
@ -64,6 +64,14 @@ compilerInstall=System
 #     So build your ThirdParty directory accordingly.
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+# System installed CUDA
+#export CUDA_SYSTEM=1
+#export CUDA_DIR=path_to_system_installed_cuda
+#export CUDA_BIN_DIR=$CUDA_DIR/bin
+#export CUDA_LIB_DIR=$CUDA_DIR/lib
+#export CUDA_INCLUDE_DIR=$CUDA_DIR/include
+#export CUDA_ARCH=sm_20
+
 # System installed Mesquite
 #export MESQUITE_SYSTEM=1
 #export MESQUITE_DIR=path_to_system_installed_mesquite
--- a/etc/settings.csh
+++ b/etc/settings.csh
@ -211,11 +211,11 @@ case SYSTEMOPENMPI:
         endif
    else
    # Here, we assume your environment is already set for running
-    # and developping with openmpi.
+    # and developing with openmpi.
    #
    # Initialize OPENMPI_BIN_DIR using the path to mpicc
        set mpicc_cmd=`which mpicc`
-    setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd`
+        setenv OPENMPI_BIN_DIR `dirname $mpicc_cmd`
        unset mpicc_cmd
    endif

@ -382,6 +382,23 @@ else
    setenv MPI_BUFFER_SIZE $minBufferSize
 endif

+# CUDA library
+# ~~~~~~~~~~~~
+if ( $?CUDA_SYSTEM == 0 && -e /usr/local/cuda-5.5/bin/nvcc ) then
+    setenv CUDA_DIR /usr/local/cuda-5.5
+    setenv CUDA_BIN_DIR $CUDA_DIR/bin
+    setenv CUDA_LIB_DIR $CUDA_DIR/lib64
+    setenv CUDA_INCLUDE_DIR $CUDA_DIR/include
+    setenv CUDA_ARCH sm_20
+endif
+
+if ( $?CUDA_BIN_DIR ) then
+    _foamAddPath $CUDA_BIN_DIR
+endif
+
+if ( $?CUDA_LIB_DIR ) then
+    _foamAddLib $CUDA_LIB_DIR
+endif

 # CGAL library if available
 # ~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/etc/settings.sh
+++ b/etc/settings.sh
@ -432,6 +432,18 @@ fi
 export MPI_BUFFER_SIZE


+# CUDA if available
+# ~~~~~~~~~~~~~~~~~
+[ -z "$CUDA_SYSTEM" ] && [ -e /usr/local/cuda-5.5/bin/nvcc ] && {
+    export CUDA_DIR=/usr/local/cuda-5.5
+    export CUDA_BIN_DIR=$CUDA_DIR/bin
+    export CUDA_LIB_DIR=$CUDA_DIR/lib64
+    export CUDA_INCLUDE_DIR=$CUDA_DIR/include
+}
+
+[ -d "$CUDA_LIB_DIR" ] && _foamAddPath $CUDA_BIN_DIR && _foamAddLib $CUDA_LIB_DIR
+
+
 # CGAL library if available
 # ~~~~~~~~~~~~~~~~~~~~~~~~~
 [ -d "$CGAL_LIB_DIR" ] && _foamAddLib $CGAL_LIB_DIR
@ -472,7 +484,7 @@ export MPI_BUFFER_SIZE

 # Load Metis library
 # ~~~~~~~~~~~~~~~~~~
-[ -z "$METIS_SYSTEM" ] && [ -d $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
+[ -z "$METIS_SYSTEM" ] && [ -e $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS ] && {
    _foamSource $WM_THIRD_PARTY_DIR/packages/metis-5.0pre2/platforms/$WM_OPTIONS/etc/metis-5.0pre2.sh
 }
 [ "$FOAM_VERBOSE" -a "$PS1" ] && echo "    METIS_DIR is initialized to: $METIS_DIR"
--- a/src/Allwmake
+++ b/src/Allwmake
@ -68,4 +68,7 @@ wmake libso engine

 wmake libso equationReader
 wmake libso multiSolver
+
+( cd cudaSolvers ; ./Allwmake )
+
 # ----------------------------------------------------------------- end-of-file
--- a/src/cudaSolvers/Allwmake
+++ b/src/cudaSolvers/Allwmake
@ -0,0 +1,13 @@
+#!/bin/sh
+cd ${0%/*} || exit 1    # run from this directory
+
+if [ -f $CUDA_BIN_DIR/nvcc ]
+then
+    echo "Found nvcc  --  enabling CUDA support."
+    wmake libso
+else
+    echo "No nvcc - CUDA not available."
+fi
+
+
+# ----------------------------------------------------------------- end-of-file
--- a/src/cudaSolvers/Make/files
+++ b/src/cudaSolvers/Make/files
@ -0,0 +1,12 @@
+cudaSolver/cudaSolver.C
+
+cudaCG/cgDiag.cu
+cudaCG/cgAmg.cu
+cudaCG/cgAinv.cu
+cudaCG/cudaCG.C
+
+cudaBiCGStab/bicgDiag.cu
+cudaBiCGStab/bicgAinv.cu
+cudaBiCGStab/cudaBiCGStab.C
+
+LIB = $(FOAM_LIBBIN)/libcudaSolvers
--- a/src/cudaSolvers/Make/options
+++ b/src/cudaSolvers/Make/options
@ -0,0 +1,5 @@
+EXE_INC = \
+    -I$(CUDA_INCLUDE_DIR)
+
+LIB_LIBS = \
+    -L$(CUDA_LIB_DIR) -lcudart
--- a/src/cudaSolvers/cudaBiCGStab/bicgAinv.cu
+++ b/src/cudaSolvers/cudaBiCGStab/bicgAinv.cu
@ -0,0 +1,246 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http:// www.gnu.org/licenses/>.
+
+    Author
+        Daniel P. Combest.  All rights reserved.
+        Modifications by Dominik Christ, Wikki Ltd.
+
+    Description
+        Sparse approximate inverse preconditioned conjugate gradient stabilized
+    solver for asymmetric Matrices using a CUSP CUDA™ based solver.
+
+\**********************************************************************/
+
+
+#include "cudaTypes.H"
+
+// CUSP Includes
+#include <cusp/detail/config.h>
+#include <cusp/verify.h>
+#include <cusp/precond/ainv.h>
+#include <cusp/coo_matrix.h>
+#include <cusp/blas.h>
+#include <cusp/multiply.h>
+
+extern "C" void bicgAinv
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf,
+    ValueType drop_tolerance,
+    int nonzero_per_row,
+    bool lin_dropping,
+    int lin_param
+)
+{
+    cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
+    cusp::array1d<ValueType, MemorySpace> X(ces->X);
+    cusp::array1d<ValueType, MemorySpace> B(ces->B);
+
+    // Fill in the rest of the diag (rows and col)
+    // Determine row indices of diagonal values and fill A COO matrix
+    thrust::sequence
+    (
+        A.row_indices.begin(),
+        A.row_indices.begin() + ces->nCells
+    );
+
+    // Determine column indices of diagonal values and fill A COO matrix
+    thrust::sequence
+    (
+        A.column_indices.begin(),
+        A.column_indices.begin() + ces->nCells
+    );
+
+    // Sorted coo by row and column. speeds code up a little bit more
+    A.sort_by_row_and_column();
+
+    // Set storage
+    // #include "cufflink/setGPUStorage.H"
+
+    if (solverPerf->debugCusp)
+    {
+        std::cout << "   Using Ainv preconditioner\n";
+    }
+
+    cusp::precond::bridson_ainv<ValueType, MemorySpace> M
+    (
+         A,
+         drop_tolerance,
+         nonzero_per_row,
+         lin_dropping,
+         lin_param
+    );
+
+    // Start Krylov solver
+    assert(A.num_rows == A.num_cols);   // Sanity check
+
+    const size_t N = A.num_rows;
+
+    // Allocate workspace
+    cusp::array1d<ValueType,MemorySpace> y(N);
+
+    cusp::array1d<ValueType,MemorySpace> p(N);
+    cusp::array1d<ValueType,MemorySpace> r(N);
+    cusp::array1d<ValueType,MemorySpace> r_star(N);
+    cusp::array1d<ValueType,MemorySpace> s(N);
+    cusp::array1d<ValueType,MemorySpace> Mp(N);
+    cusp::array1d<ValueType,MemorySpace> AMp(N);
+    cusp::array1d<ValueType,MemorySpace> Ms(N);
+    cusp::array1d<ValueType,MemorySpace> AMs(N);
+
+    // y <- Ax
+    cusp::multiply(A, X, y);
+
+    // Define the normalization factor
+    ValueType normFactor = 1.0;
+
+#   include "buildNormFactor.H"
+
+    // r <- b - A*x
+    cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+
+    // p <- r
+    cusp::blas::copy(r, p);
+
+    // r_star <- r
+    cusp::blas::copy(r, r_star);
+
+    ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
+
+
+    ValueType normR = cusp::blas::nrm2(r)/normFactor;
+    ValueType normR0 = normR;   // Initial residual
+    solverPerf->iRes    = normR0;
+    int count = 0;
+
+    while
+    (
+    	normR > (solverPerf->tol)
+     && count < (solverPerf->maxIter)
+     && normR/normR0 >= (solverPerf->relTol)
+     || count < solverPerf->minIter
+    )
+    {
+        // Mp = M*p
+        cusp::multiply(M, p, Mp);
+
+        // AMp = A*Mp
+        cusp::multiply(A, Mp, AMp);
+
+        // alpha = (r_j, r_star) / (A*M*p, r_star)
+        ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
+
+        // s_j = r_j - alpha * AMp
+        cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
+
+        ValueType normS = cusp::blas::nrm2(s)/normFactor;
+
+        if
+        (
+           !(
+                normS > (solverPerf->tol)
+             && normS/normR0 >= (solverPerf->relTol)
+            )
+        )
+        {
+            // is this right?
+            // x += alpha*M*p_j
+            cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
+
+            // y <- Ax
+            cusp::multiply(A, X, y);
+
+            // r <- b - A*x
+            cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+            // not sure we should be measuring r here...need to look again.
+            normR = cusp::blas::nrm2(r)/normFactor;
+
+            count++;
+
+            break;
+        }
+
+        // Ms = M*s_j
+        cusp::multiply(M, s, Ms);
+
+        // AMs = A*Ms
+        cusp::multiply(A, Ms, AMs);
+
+        // omega = (AMs, s) / (AMs, AMs)
+        ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
+
+        // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
+        cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
+
+        // r_{j+1} = s_j - omega*A*M*s
+        cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
+
+        // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
+        ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
+        ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
+        r_r_star_old = r_r_star_new;
+
+        // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
+        cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
+
+        // not dure we should be measuring r here...need to look again.
+        normR = cusp::blas::nrm2(r)/normFactor;
+
+        count++;
+    }
+    // End Krylov Solver
+
+    // Final residual
+    solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
+    solverPerf->nIterations = count;
+
+    // converged?
+    if
+    (
+        solverPerf->fRes<=solverPerf->tol
+     || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
+    )
+    {
+        solverPerf->converged = true;
+    }
+    else
+    {
+        solverPerf->converged = false;
+    }
+
+    // Pass the solution vector back
+    ces->X = X;
+}
--- a/src/cudaSolvers/cudaBiCGStab/bicgDiag.cu
+++ b/src/cudaSolvers/cudaBiCGStab/bicgDiag.cu
@ -0,0 +1,235 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http:// www.gnu.org/licenses/>.
+
+    Author
+        Daniel P. Combest.  All rights reserved.
+        Modifications by Dominik Christ, Wikki Ltd.
+
+    Description
+        Sparse approximate inverse preconditioned conjugate gradient stabilized
+    solver for asymmetric Matrices using a CUSP CUDA™ based solver.
+
+\**********************************************************************/
+
+
+#include "cudaTypes.H"
+
+// CUSP Includes
+#include <cusp/detail/config.h>
+#include <cusp/verify.h>
+#include <cusp/precond/diagonal.h>
+#include <cusp/coo_matrix.h>
+#include <cusp/blas.h>
+#include <cusp/multiply.h>
+
+extern "C" void bicgDiag
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf
+)
+{
+    cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
+    cusp::array1d<ValueType, MemorySpace> X(ces->X);
+    cusp::array1d<ValueType, MemorySpace> B(ces->B);
+
+    // Fill in the rest of the diag (rows and col)
+    // Determine row indices of diagonal values and fill A COO matrix
+    thrust::sequence
+    (
+        A.row_indices.begin(),
+        A.row_indices.begin() + ces->nCells
+    );
+
+    // Determine column indices of diagonal values and fill A COO matrix
+    thrust::sequence
+    (
+        A.column_indices.begin(),
+        A.column_indices.begin() + ces->nCells
+    );
+
+    // Sorted coo by row and column. speeds code up a little bit more
+    A.sort_by_row_and_column();
+
+    // Set storage
+//#   include "setGPUStorage.H"
+
+    if (solverPerf->debugCusp)
+    {
+        std::cout << "   Using Ainv preconditioner\n";
+    }
+
+    cusp::precond::diagonal<ValueType, MemorySpace> M(A);
+
+    // Start Krylov solver
+    assert(A.num_rows == A.num_cols);   // Sanity check
+
+    const size_t N = A.num_rows;
+
+    // Allocate workspace
+    cusp::array1d<ValueType,MemorySpace> y(N);
+
+    cusp::array1d<ValueType,MemorySpace> p(N);
+    cusp::array1d<ValueType,MemorySpace> r(N);
+    cusp::array1d<ValueType,MemorySpace> r_star(N);
+    cusp::array1d<ValueType,MemorySpace> s(N);
+    cusp::array1d<ValueType,MemorySpace> Mp(N);
+    cusp::array1d<ValueType,MemorySpace> AMp(N);
+    cusp::array1d<ValueType,MemorySpace> Ms(N);
+    cusp::array1d<ValueType,MemorySpace> AMs(N);
+
+    // y <- Ax
+    cusp::multiply(A, X, y);
+
+    // Define the normalization factor
+    ValueType normFactor = 1.0;
+
+#   include "buildNormFactor.H"
+
+    // r <- b - A*x
+    cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+
+    // p <- r
+    cusp::blas::copy(r, p);
+
+    // r_star <- r
+    cusp::blas::copy(r, r_star);
+
+    ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
+
+
+    ValueType normR = cusp::blas::nrm2(r)/normFactor;
+    ValueType normR0 = normR;   // Initial residual
+    solverPerf->iRes    = normR0;
+    int count = 0;
+
+    while
+    (
+    	normR > (solverPerf->tol)
+     && count < (solverPerf->maxIter)
+     && normR/normR0 >= (solverPerf->relTol)
+     || count < solverPerf->minIter
+    )
+    {
+        // Mp = M*p
+        cusp::multiply(M, p, Mp);
+
+        // AMp = A*Mp
+        cusp::multiply(A, Mp, AMp);
+
+        // alpha = (r_j, r_star) / (A*M*p, r_star)
+        ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
+
+        // s_j = r_j - alpha * AMp
+        cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
+
+        ValueType normS = cusp::blas::nrm2(s)/normFactor;
+
+        if
+        (
+           !(
+                normS > (solverPerf->tol)
+             && normS/normR0 >= (solverPerf->relTol)
+            )
+        )
+        {
+            // is this right?
+            // x += alpha*M*p_j
+            cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
+
+            // y <- Ax
+            cusp::multiply(A, X, y);
+
+            // r <- b - A*x
+            cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+            // not sure we should be measuring r here...need to look again.
+            normR = cusp::blas::nrm2(r)/normFactor;
+
+            count++;
+
+            break;
+        }
+
+        // Ms = M*s_j
+        cusp::multiply(M, s, Ms);
+
+        // AMs = A*Ms
+        cusp::multiply(A, Ms, AMs);
+
+        // omega = (AMs, s) / (AMs, AMs)
+        ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
+
+        // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
+        cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
+
+        // r_{j+1} = s_j - omega*A*M*s
+        cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
+
+        // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
+        ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
+        ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
+        r_r_star_old = r_r_star_new;
+
+        // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
+        cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
+
+        // not dure we should be measuring r here...need to look again.
+        normR = cusp::blas::nrm2(r)/normFactor;
+
+        count++;
+    }
+    // End Krylov Solver
+
+    // Final residual
+    solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
+    solverPerf->nIterations = count;
+
+    // converged?
+    if
+    (
+        solverPerf->fRes<=solverPerf->tol
+     || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
+    )
+    {
+        solverPerf->converged = true;
+    }
+    else
+    {
+        solverPerf->converged = false;
+    }
+
+    // Pass the solution vector back
+    ces->X = X;
+}
--- a/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C
+++ b/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.C
@ -0,0 +1,168 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Description
+    Preconditioned Conjugate Gradient solver with run-time selectable
+    preconditioning
+
+Author
+    Dominik Christ, Wikki Ltd.
+    Based on Cufflink library by Daniel P. Combest
+
+\*---------------------------------------------------------------------------*/
+
+#include "cudaBiCGStab.H"
+
+// Preconditioner and solver are hardwired due to
+// code structure of Cusp library. Below are
+// external functions with solver/preconditioner combinations
+
+// BiCG with diagonal preconditioning
+extern "C" void bicgDiag
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverParam
+);
+
+// BiCG with Ainv preconditioning
+extern "C" void bicgAinv
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf,
+    ValueType drop_tolerance,
+    int nonzero_per_row,
+    bool lin_dropping,
+    int lin_param
+);
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+    defineTypeNameAndDebug(cudaBiCGStab, 0);
+
+    lduSolver::addasymMatrixConstructorToTable<cudaBiCGStab>
+        addcudaBiCGStabAsymMatrixConstructorToTable_;
+}
+
+
+// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
+
+// - Construct from matrix and solver data stream
+Foam::cudaBiCGStab::cudaBiCGStab
+(
+    const word& fieldName,
+    const lduMatrix& matrix,
+    const FieldField<Field, scalar>& coupleBouCoeffs,
+    const FieldField<Field, scalar>& coupleIntCoeffs,
+    const lduInterfaceFieldPtrsList& interfaces,
+    const dictionary& dict
+)
+:
+    cudaSolver
+    (
+        fieldName,
+        matrix,
+        coupleBouCoeffs,
+        coupleIntCoeffs,
+        interfaces,
+        dict
+    )
+{}
+
+
+// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
+
+Foam::lduSolverPerformance Foam::cudaBiCGStab::solve
+(
+    scalarField& x,
+    const scalarField& b,
+    const direction cmpt
+) const
+{
+    // Initialize Cusp solver perfomance
+    cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
+    solverPerf.minIter = minIter(); // Minimum iterations
+    solverPerf.maxIter = maxIter(); // Maximum iterations
+    solverPerf.relTol = relTolerance(); // Relative tolerance
+    solverPerf.tol = tolerance(); // Tolerance
+
+    if (lduMatrix::debug >= 2)
+    {
+         solverPerf.debugCusp = true;
+    }
+
+    // Initialize and copy matrix data to GPU
+    cuspEquationSystem ces = createAsymCuspMatrix(matrix(), x, b);
+
+    // Call solver externally
+    word preconName(dict().lookup("preconditioner"));
+    if (preconName == "diagonal")
+    {
+        bicgDiag(&ces, &solverPerf);
+    }
+    else if(preconName == "Ainv")
+    {
+        bicgAinv
+        (
+            &ces,
+            &solverPerf,
+            dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
+            dict().lookupOrDefault<label>("nonzeroPerRow", -1),
+            dict().lookupOrDefault<bool>("LinDropping", false),
+            dict().lookupOrDefault<label>("LinParameter", 1)
+        );
+    }
+    else
+    {
+        FatalErrorIn("cudaBiCGStab::solver()")
+            << "Unknown preconditioner name. "
+            << "Options are:" << nl
+            << "(" << nl
+            << "diagonal" << nl
+            << "Ainv" << nl
+            << ")" << nl
+            << abort(FatalError);
+    }
+
+    // copy the x vector back to Openfoam
+	thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
+
+    // Return solver output
+    return lduSolverPerformance
+    (
+        typeName,
+        fieldName(),
+        solverPerf.iRes,
+        solverPerf.fRes,
+        solverPerf.nIterations,
+        solverPerf.converged,
+        solverPerf.singular
+    );
+
+}
+
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H
+++ b/src/cudaSolvers/cudaBiCGStab/cudaBiCGStab.H
@ -0,0 +1,114 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Class
+    cudaBiCGStab
+
+Description
+    Preconditioned Conjugate Gradient solver with run-time selectable
+    preconditioning for use with GPU
+
+Author
+    Dominik Christ, Wikki Ltd.
+    Based on Cufflink library by Daniel P. Combest
+
+SourceFiles
+    cudaBiCGStab.C
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef cudaBiCGStab_H
+#define cudaBiCGStab_H
+
+#include "cudaSolver.H"
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+
+/*---------------------------------------------------------------------------*\
+			     Class cudaBiCGStab Declaration
+\*---------------------------------------------------------------------------*/
+
+class cudaBiCGStab
+:
+    public cudaSolver
+{
+    // Private Member Functions
+
+	//- Disallow default bitwise copy construct
+	cudaBiCGStab(const cudaBiCGStab&);
+
+	//- Disallow default bitwise assignment
+	void operator=(const cudaBiCGStab&);
+
+
+public:
+
+    //- Runtime type information
+    TypeName("cudaBiCGStab");
+
+
+    // Constructors
+
+	//- Construct from matrix components and solver data stream
+	cudaBiCGStab
+	(
+	    const word& fieldName,
+	    const lduMatrix& matrix,
+	    const FieldField<Field, scalar>& coupleBouCoeffs,
+	    const FieldField<Field, scalar>& coupleIntCoeffs,
+	    const lduInterfaceFieldPtrsList& interfaces,
+	    const dictionary& dict
+	);
+
+
+    // Destructor
+
+	virtual ~cudaBiCGStab()
+	{}
+
+
+    // Member Functions
+
+	//- Solve the matrix with this solver
+	virtual lduSolverPerformance solve
+	(
+	    scalarField& x,
+	    const scalarField& b,
+	    const direction cmpt = 0
+	) const;
+};
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+} // End namespace Foam
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+#endif
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaCG/cgAinv.cu
+++ b/src/cudaSolvers/cudaCG/cgAinv.cu
@ -0,0 +1,184 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http:// www.gnu.org/licenses/>.
+
+    Author
+    Daniel P. Combest.  All rights reserved.
+    Modifications by Dominik Christ, Wikki Ltd.
+
+    Description
+    Ainv preconditioned conjugate gradient
+    solver for symmetric Matrices using a CUSP CUDA™ based solver.
+
+\**********************************************************************/
+
+
+#include "cudaTypes.H"
+
+// CUSP Includes
+#include <cusp/detail/config.h>
+#include <cusp/verify.h>
+#include <cusp/precond/ainv.h>
+#include <cusp/coo_matrix.h>
+#include <cusp/blas.h>
+#include <cusp/multiply.h>
+
+extern "C" void cgAinv
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf,
+    ValueType drop_tolerance,
+    int nonzero_per_row,
+    bool lin_dropping,
+    int lin_param
+)
+{
+    // Populate A
+    #include "fillCOOMatrix.H"
+
+    // Set storage
+    // #include "cufflink/setGPUStorage.H"
+
+    if (solverPerf->debugCusp)
+    {
+        std::cout<<"   Using Ainv preconditioner\n";
+    }
+
+    cusp::precond::bridson_ainv<ValueType, MemorySpace> M
+    (
+        A,
+        drop_tolerance,
+        nonzero_per_row,
+        lin_dropping,
+        lin_param
+    );
+
+     // Start the krylov solver
+    assert(A.num_rows == A.num_cols); // sanity check
+
+    const size_t N = A.num_rows;
+
+    // allocate workspace
+    cusp::array1d<ValueType,MemorySpace> y(N);
+    cusp::array1d<ValueType,MemorySpace> z(N);
+    cusp::array1d<ValueType,MemorySpace> r(N);
+    cusp::array1d<ValueType,MemorySpace> p(N);
+
+    // y <- Ax
+    cusp::multiply(A, X, y);
+
+    // Define the normalization factor
+    ValueType normFactor = 1.0;
+
+#   include "buildNormFactor.H"
+
+    // r <- b - A*x
+    cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+
+    // z <- M*r
+    cusp::multiply(M, r, z);
+
+    // p <- z
+    cusp::blas::copy(z, p);
+
+    // rz = <r^H, z>
+    ValueType rz = cusp::blas::dotc(r, z);
+
+    ValueType normR = cusp::blas::nrm2(r)/normFactor;
+    ValueType normR0 = normR;   // Initial residual
+    solverPerf->iRes = normR0;
+    int count = 0;
+
+    while
+    (
+        normR > (solverPerf->tol)
+     && count < (solverPerf->maxIter)
+     && normR/normR0 >= (solverPerf->relTol)
+     || count < solverPerf->minIter
+    )
+    {
+        // y <- Ap
+        cusp::multiply(A, p, y);
+
+        // alpha <- <r,z>/<y,p>
+        ValueType alpha =  rz / cusp::blas::dotc(y, p);
+
+        // x <- x + alpha * p
+        cusp::blas::axpy(p, X, alpha);
+
+        // r <- r - alpha * y
+        cusp::blas::axpy(y, r, -alpha);
+
+        // z <- M*r
+        cusp::multiply(M, r, z);
+
+        ValueType rz_old = rz;
+
+        // rz = <r^H, z>
+        rz = cusp::blas::dotc(r, z);
+
+        // beta <- <r_{i+1},r_{i+1}>/<r,r>
+        ValueType beta = rz / rz_old;
+
+        // p <- r + beta*p should be p <- z + beta*p
+        cusp::blas::axpby(z, p, p, ValueType(1), beta);
+
+        normR = cusp::blas::nrm2(r)/normFactor;
+
+        count++;
+    }
+    // End the krylov solver
+
+    // Final residual
+    solverPerf->fRes = normR;
+    solverPerf->nIterations = count;
+
+    // Converged?
+    if
+    (
+        solverPerf->fRes<=solverPerf->tol
+     || solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
+    )
+    {
+        solverPerf->converged = true;
+    }
+    else
+    {
+        solverPerf->converged = false;
+    }
+
+    // Pass the solution vector back
+    ces->X = X;
+}
--- a/src/cudaSolvers/cudaCG/cgAmg.cu
+++ b/src/cudaSolvers/cudaCG/cgAmg.cu
@ -0,0 +1,174 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
+
+    Author
+    Daniel P. Combest.  All rights reserved.
+    Modifications by Dominik Christ, Wikki Ltd.
+
+    Description
+    diagonal preconditioned conjugate gradient
+    solver for symmetric Matrices using a CUSP CUDA™ based solver.
+
+\**********************************************************************/
+
+#include "cudaTypes.H"
+
+//CUSP Includes
+#include <cusp/detail/config.h>
+#include <cusp/verify.h>
+#include <cusp/precond/aggregation/smoothed_aggregation.h>
+#include <cusp/coo_matrix.h>
+#include <cusp/blas.h>
+#include <cusp/multiply.h>
+
+extern "C" void cgAmg
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf,
+    ValueType theta
+)
+{
+    // Populate A
+#   include "fillCOOMatrix.H"
+
+    // Set storage
+    // #include "cufflink/setGPUStorage.H"
+
+    if(solverPerf->debugCusp)
+    {
+      std::cout << "   Using amg preconditioner\n";
+    }
+
+    cusp::precond::aggregation::
+        smoothed_aggregation<IndexType, ValueType, MemorySpace> M(A);
+
+    // Start the krylov solver
+    assert(A.num_rows == A.num_cols); // sanity check
+
+    const size_t N = A.num_rows;
+
+    // allocate workspace
+    cusp::array1d<ValueType,MemorySpace> y(N);
+    cusp::array1d<ValueType,MemorySpace> z(N);
+    cusp::array1d<ValueType,MemorySpace> r(N);
+    cusp::array1d<ValueType,MemorySpace> p(N);
+
+    // y <- Ax
+    cusp::multiply(A, X, y);
+
+    //define the normalization factor
+    ValueType normFactor = 1.0;
+
+#   include "buildNormFactor.H"
+
+    // r <- b - A*x
+    cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+
+    // z <- M*r
+    cusp::multiply(M, r, z);
+
+    // p <- z
+    cusp::blas::copy(z, p);
+
+    // rz = <r^H, z>
+    ValueType rz = cusp::blas::dotc(r, z);
+
+    ValueType normR = cusp::blas::nrm2(r)/normFactor;
+    ValueType normR0 = normR;//initial residual
+    solverPerf->iRes = normR0;
+    int count = 0;
+
+    while
+    (
+        normR > (solverPerf->tol)
+     && count < (solverPerf->maxIter)
+     && normR/normR0 >= (solverPerf->relTol)
+     || count < solverPerf->minIter
+    )
+    {
+        // y <- Ap
+        cusp::multiply(A, p, y);
+
+        // alpha <- <r,z>/<y,p>
+        ValueType alpha =  rz / cusp::blas::dotc(y, p);
+
+        // x <- x + alpha * p
+        cusp::blas::axpy(p, X, alpha);
+
+        // r <- r - alpha * y
+        cusp::blas::axpy(y, r, -alpha);
+
+        // z <- M*r
+        cusp::multiply(M, r, z);
+
+        ValueType rz_old = rz;
+
+        // rz = <r^H, z>
+        rz = cusp::blas::dotc(r, z);
+
+        // beta <- <r_{i+1},r_{i+1}>/<r,r>
+        ValueType beta = rz / rz_old;
+
+        // p <- r + beta*p should be p <- z + beta*p
+        cusp::blas::axpby(z, p, p, ValueType(1), beta);
+
+        normR = cusp::blas::nrm2(r)/normFactor;
+
+        count++;
+    }
+    // End the krylov solver
+
+    //final residual
+    solverPerf->fRes = normR;
+    solverPerf->nIterations = count;
+
+    //converged?
+    if
+    (
+        solverPerf->fRes<=solverPerf->tol
+     || solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
+    )
+    {
+        solverPerf->converged = true;
+    }
+    else
+    {
+        solverPerf->converged = false;
+    }
+
+    // Pass the solution vector back
+    ces->X = X;
+}
--- a/src/cudaSolvers/cudaCG/cgDiag.cu
+++ b/src/cudaSolvers/cudaCG/cgDiag.cu
@ -0,0 +1,173 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
+
+    Author
+    Daniel P. Combest.  All rights reserved.
+    Modifications by Dominik Christ, Wikki Ltd.
+
+    Description
+    diagonal preconditioned conjugate gradient
+    solver for symmetric Matrices using a CUSP CUDA™ based solver.
+
+\**********************************************************************/
+
+#include "cudaTypes.H"
+
+//CUSP Includes
+#include <cusp/detail/config.h>
+#include <cusp/verify.h>
+#include <cusp/precond/diagonal.h>
+#include <cusp/coo_matrix.h>
+#include <cusp/ell_matrix.h>
+#include <cusp/blas.h>
+#include <cusp/multiply.h>
+
+extern "C" void cgDiag
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf
+)
+{
+    // Populate A
+#   include "fillCOOMatrix.H"
+
+    // Set storage
+    // #include "cufflink/setGPUStorage.H"
+
+    if(solverPerf->debugCusp)
+    {
+        std::cout << "   Using Cusp_Diagonal preconditioner\n";
+    }
+
+    cusp::precond::diagonal<ValueType, MemorySpace> M(A);
+
+    // Start the krylov solver
+    assert(A.num_rows == A.num_cols); // sanity check
+
+    const size_t N = A.num_rows;
+
+    // allocate workspace
+    cusp::array1d<ValueType,MemorySpace> y(N);
+    cusp::array1d<ValueType,MemorySpace> z(N);
+    cusp::array1d<ValueType,MemorySpace> r(N);
+    cusp::array1d<ValueType,MemorySpace> p(N);
+
+    // y <- Ax
+    cusp::multiply(A, X, y);
+
+    //define the normalization factor
+    ValueType normFactor = 1.0;
+
+    #include "buildNormFactor.H"
+
+    // r <- b - A*x
+    cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
+
+    // z <- M*r
+    cusp::multiply(M, r, z);
+
+    // p <- z
+    cusp::blas::copy(z, p);
+
+    // rz = <r^H, z>
+    ValueType rz = cusp::blas::dotc(r, z);
+
+    ValueType normR = cusp::blas::nrm2(r)/normFactor;
+    ValueType normR0 = normR;   // Initial residual
+    solverPerf->iRes = normR0;
+    int count = 0;
+
+    while
+    (
+        normR > (solverPerf->tol)
+     && count < (solverPerf->maxIter)
+     && normR/normR0 >= (solverPerf->relTol)
+     || count < solverPerf->minIter
+    )
+    {
+        // y <- Ap
+        cusp::multiply(A, p, y);
+
+        // alpha <- <r,z>/<y,p>
+        ValueType alpha =  rz / cusp::blas::dotc(y, p);
+
+        // x <- x + alpha * p
+        cusp::blas::axpy(p, X, alpha);
+
+        // r <- r - alpha * y
+        cusp::blas::axpy(y, r, -alpha);
+
+        // z <- M*r
+        cusp::multiply(M, r, z);
+
+        ValueType rz_old = rz;
+
+        // rz = <r^H, z>
+        rz = cusp::blas::dotc(r, z);
+
+        // beta <- <r_{i+1},r_{i+1}>/<r,r>
+        ValueType beta = rz / rz_old;
+
+        // p <- r + beta*p should be p <- z + beta*p
+        cusp::blas::axpby(z, p, p, ValueType(1), beta);
+
+        normR = cusp::blas::nrm2(r)/normFactor;
+
+        count++;
+    }
+    // End the krylov solver
+
+    // Final residual
+    solverPerf->fRes = normR;
+    solverPerf->nIterations = count;
+
+    // Converged?
+    if
+    (
+        solverPerf->fRes<=solverPerf->tol
+     || solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
+    )
+    {
+        solverPerf->converged = true;
+    }
+    else
+    {
+        solverPerf->converged=false;
+    }
+
+    // Pass the solution vector back
+    ces->X = X;
+}
--- a/src/cudaSolvers/cudaCG/cudaCG.C
+++ b/src/cudaSolvers/cudaCG/cudaCG.C
@ -0,0 +1,177 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+\*---------------------------------------------------------------------------*/
+
+#include "cudaCG.H"
+
+// Preconditioner and solver are hardwired due to
+// code structure of Cusp library. Below are
+// external functions with solver/preconditioner combinations
+
+// CG with diagonal preconditioning
+extern "C" void cgDiag
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverParam
+);
+
+// CG with Ainv preconditioning
+extern "C" void cgAinv
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverPerf,
+    ValueType drop_tolerance,
+    int nonzero_per_row,
+    bool lin_dropping,
+    int lin_param
+);
+
+// CG with amg preconditioning
+extern "C" void cgAmg
+(
+    cuspEquationSystem* ces,
+    cudaSolverPerformance* solverParam,
+    ValueType theta
+);
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+    defineTypeNameAndDebug(cudaCG, 0);
+
+    lduSolver::addsymMatrixConstructorToTable<cudaCG>
+        addcudaCGSymMatrixConstructorToTable_;
+}
+
+
+// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
+
+// - Construct from matrix and solver data stream
+Foam::cudaCG::cudaCG
+(
+    const word& fieldName,
+    const lduMatrix& matrix,
+    const FieldField<Field, scalar>& coupleBouCoeffs,
+    const FieldField<Field, scalar>& coupleIntCoeffs,
+    const lduInterfaceFieldPtrsList& interfaces,
+    const dictionary& dict
+)
+:
+    cudaSolver
+    (
+        fieldName,
+        matrix,
+        coupleBouCoeffs,
+        coupleIntCoeffs,
+        interfaces,
+        dict
+    )
+{}
+
+
+// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
+
+Foam::lduSolverPerformance Foam::cudaCG::solve
+(
+    scalarField& x,
+    const scalarField& b,
+    const direction cmpt
+) const
+{
+    // Initialize Cusp solver perfomance
+    cudaSolverPerformance solverPerf = cudaSolverPerformanceDefault();
+    solverPerf.minIter = minIter(); // Minimum iterations
+    solverPerf.maxIter = maxIter(); // Maximum iterations
+    solverPerf.relTol = relTolerance(); // Relative tolerance
+    solverPerf.tol = tolerance(); // Tolerance
+
+    if (lduMatrix::debug >= 2)
+    {
+         solverPerf.debugCusp = true;
+    }
+
+    // Initialize and copy matrix data to GPU
+    cuspEquationSystem ces = createSymCuspMatrix(matrix(), x, b);
+
+    // Call solver externally
+    word preconName(dict().lookup("preconditioner"));
+    if (preconName == "diagonal")
+    {
+        cgDiag(&ces, &solverPerf);
+    }
+    else if(preconName == "Ainv")
+    {
+        cgAinv
+        (
+            &ces,
+            &solverPerf,
+            dict().lookupOrDefault<scalar>("dropTolerance", 0.1),
+            dict().lookupOrDefault<label>("nonzeroPerRow", -1),
+            dict().lookupOrDefault<bool>("LinDropping", false),
+            dict().lookupOrDefault<label>("LinParameter", 1)
+        );
+    }
+    else if(preconName == "amg")
+    {
+        cgAmg
+        (
+            &ces,
+            &solverPerf,
+            dict().lookupOrDefault<scalar>("theta", 0)
+        );
+    }
+    else
+    {
+        FatalErrorIn("cudaCG::solver()")
+            << "Unknown preconditioner name. "
+            << "Options are:" << nl
+            << "(" << nl
+            << "diagonal" << nl
+            << "Ainv" << nl
+            << "amg" << nl
+            << ")" << nl
+            << abort(FatalError);
+    }
+
+    // Copy the x vector back to Openfoam
+    thrust::copy(ces.X.begin(), ces.X.end(), x.begin());
+
+    // Return solver output
+    return lduSolverPerformance
+    (
+        typeName,
+        fieldName(),
+        solverPerf.iRes,
+        solverPerf.fRes,
+        solverPerf.nIterations,
+        solverPerf.converged,
+        solverPerf.singular
+    );
+
+}
+
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaCG/cudaCG.H
+++ b/src/cudaSolvers/cudaCG/cudaCG.H
@ -0,0 +1,114 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Class
+    cudaCG
+
+Description
+    Preconditioned Conjugate Gradient solver with run-time selectable
+    preconditioning for use with GPU
+
+Author
+    Dominik Christ, Wikki Ltd.
+    Based on Cufflink library by Daniel P. Combest
+
+SourceFiles
+    cudaCG.C
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef cudaCG_H
+#define cudaCG_H
+
+#include "cudaSolver.H"
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+
+/*---------------------------------------------------------------------------*\
+			     Class cudaCG Declaration
+\*---------------------------------------------------------------------------*/
+
+class cudaCG
+:
+    public cudaSolver
+{
+    // Private Member Functions
+
+	//- Disallow default bitwise copy construct
+	cudaCG(const cudaCG&);
+
+	//- Disallow default bitwise assignment
+	void operator=(const cudaCG&);
+
+
+public:
+
+    //- Runtime type information
+    TypeName("cudaCG");
+
+
+    // Constructors
+
+	//- Construct from matrix components and solver data stream
+	cudaCG
+	(
+	    const word& fieldName,
+	    const lduMatrix& matrix,
+	    const FieldField<Field, scalar>& coupleBouCoeffs,
+	    const FieldField<Field, scalar>& coupleIntCoeffs,
+	    const lduInterfaceFieldPtrsList& interfaces,
+	    const dictionary& dict
+	);
+
+
+    // Destructor
+
+	virtual ~cudaCG()
+	{}
+
+
+    // Member Functions
+
+	//- Solve the matrix with this solver
+	virtual lduSolverPerformance solve
+	(
+	    scalarField& x,
+	    const scalarField& b,
+	    const direction cmpt = 0
+	) const;
+};
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+} // End namespace Foam
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+#endif
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaSolver/cudaSolver.C
+++ b/src/cudaSolvers/cudaSolver/cudaSolver.C
@ -0,0 +1,239 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+\*---------------------------------------------------------------------------*/
+
+#include "cudaSolver.H"
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+    defineTypeNameAndDebug(cudaSolver, 0);
+}
+
+
+// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //
+
+Foam::cudaSolver::cudaSolver
+(
+    const word& fieldName,
+    const lduMatrix& matrix,
+    const FieldField<Field, scalar>& coupleBouCoeffs,
+    const FieldField<Field, scalar>& coupleIntCoeffs,
+    const lduInterfaceFieldPtrsList& interfaces,
+    const dictionary& dict
+)
+:
+    lduSolver
+    (
+    fieldName,
+    matrix,
+    coupleBouCoeffs,
+    coupleIntCoeffs,
+    interfaces,
+    dict
+    )
+{}
+
+
+// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //
+
+cuspEquationSystem Foam::cudaSolver::createSymCuspMatrix
+(
+    const lduMatrix& matrix,
+    const scalarField& x,
+    const scalarField& b
+) const
+{
+    cuspEquationSystem ces;
+    ces.nCells = x.size();
+    ces.nFaces = matrix.lower().size();
+
+    ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
+    (
+        ces.nCells,
+        ces.nCells,
+        ces.nCells+2*ces.nFaces
+    );
+    ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
+    ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
+
+    // Copy values of lduMatrix diag to A COO matrix
+    thrust::copy
+    (
+        matrix.diag().begin(),
+        matrix.diag().end(),
+        ces.A.values.begin()
+    );
+
+    // Copy values of lduMatrix lower to A COO matrix
+    thrust::copy
+    (
+        matrix.lower().begin(),
+        matrix.lower().end(),
+        ces.A.values.begin()+ces.nCells
+    );
+
+    // Since matrix is symmetric, do not copy upper values to save time
+    // -> performed on GPU
+    // Copy row indices of lower into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().upperAddr().begin(),
+        matrix.lduAddr().upperAddr().end(),
+        ces.A.row_indices.begin()+ces.nCells
+    );
+    // Copy column indices of lower into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().lowerAddr().begin(),
+        matrix.lduAddr().lowerAddr().end(),
+        ces.A.column_indices.begin()+ces.nCells
+    );
+
+    // Do not initialize the row and column values of diag to save time
+    // -> performed on GPU
+    // Copy x of lower into x vector
+    thrust::copy(x.begin(), x.end(), ces.X.begin());
+    // Copy b of lower into b vector
+    thrust::copy(b.begin(), b.end(), ces.B.begin());
+
+    return ces;
+}
+
+cuspEquationSystem Foam::cudaSolver::createAsymCuspMatrix
+(
+    const lduMatrix& matrix,
+    const scalarField& x,
+    const scalarField& b
+) const
+{
+    cuspEquationSystem ces;
+    ces.nCells = x.size();
+    ces.nFaces = matrix.lower().size();
+
+    ces.A = cusp::coo_matrix<IndexType, ValueType, hostMemorySpace>
+    (
+        ces.nCells,
+        ces.nCells,
+        ces.nCells + 2*ces.nFaces
+    );
+    ces.X = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
+    ces.B = cusp::array1d< ValueType, hostMemorySpace>(ces.nCells);
+
+    // Copy values from the lduMatrix to our equation system
+    // Copy values of lduMatrix diag to A COO matrix
+    thrust::copy
+    (
+        matrix.diag().begin(),
+        matrix.diag().end(),
+        ces.A.values.begin()
+    );
+
+    // Copy values of lduMatrix lower to A COO matrix
+    thrust::copy
+    (
+        matrix.lower().begin(),
+        matrix.lower().end(),
+        ces.A.values.begin() + ces.nCells
+    );
+
+    // Copy values of lduMatrix upper to A COO matrix
+    thrust::copy
+    (
+        matrix.upper().begin(),
+        matrix.upper().end(),
+        ces.A.values.begin() + ces.nCells + ces.nFaces
+    );
+
+    // Copy row and column indices of lower and upper to our equations system
+    // do not initialize the row and column values of diag to save time
+    // -> performed on GPU
+
+    // Copy row indices of lower into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().upperAddr().begin(),
+        matrix.lduAddr().upperAddr().end(),
+        ces.A.row_indices.begin() + ces.nCells
+    );
+
+    // Copy column indices of lower into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().lowerAddr().begin(),
+        matrix.lduAddr().lowerAddr().end(),
+        ces.A.column_indices.begin() + ces.nCells
+    );
+
+    // Copy row indices of upper into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().lowerAddr().begin(),
+        matrix.lduAddr().lowerAddr().end(),
+        ces.A.row_indices.begin() + ces.nCells + ces.nFaces
+    );
+
+    // Copy column indices of upper into A COO matrix
+    thrust::copy
+    (
+        matrix.lduAddr().upperAddr().begin(),
+        matrix.lduAddr().upperAddr().end(),
+        ces.A.column_indices.begin() + ces.nCells + ces.nFaces
+    );
+
+    // Copy x of lower into x vector
+    thrust::copy(x.begin(), x.end(), ces.X.begin());
+    // Copy b of lower into b vector
+    thrust::copy(b.begin(), b.end(), ces.B.begin());
+    return ces;
+}
+
+cudaSolverPerformance Foam::cudaSolver::cudaSolverPerformanceDefault() const
+{
+    cudaSolverPerformance csp;
+
+    csp.minIter = 0;
+    csp.maxIter = 1000;
+    csp.relTol = 0;
+    csp.tol = 1e-6;
+
+    csp.nIterations = 0;
+    csp.iRes = -1;
+    csp.fRes = -1;
+    csp.converged = false;
+    csp.singular = false;
+
+    csp.debugCusp = false;
+
+    return csp;
+}
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaSolver/cudaSolver.H
+++ b/src/cudaSolvers/cudaSolver/cudaSolver.H
@ -0,0 +1,136 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Class
+    Foam::cudaSolver
+
+Description
+    Base class for GPU based solvers
+
+SourceFiles
+    cudaSolver.C
+
+Author
+    Dominik Christ, Wikki Ltd.
+    Based on Cufflink library by Daniel P. Combest
+
+\*---------------------------------------------------------------------------*/
+
+#ifndef cudaSolver_H
+#define cudaSolver_H
+
+#include "cudaTypes.H"
+#include "lduMatrix.H"
+#include "autoPtr.H"
+#include "runTimeSelectionTables.H"
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+namespace Foam
+{
+
+
+/*---------------------------------------------------------------------------*\
+                           Class cudaSolver Declaration
+\*---------------------------------------------------------------------------*/
+
+class cudaSolver
+:
+    public lduSolver
+{
+
+    // Private Member Functions
+
+        //- Disallow default bitwise copy construct
+        cudaSolver(const cudaSolver&);
+
+        //- Disallow default bitwise assignment
+        void operator=(const cudaSolver&);
+
+
+public:
+
+    //- Runtime type information
+    TypeName("cudaSolver");
+
+
+    // Constructors
+
+        //- Construct from components
+        cudaSolver
+        (
+            const word& fieldName,
+            const lduMatrix& matrix,
+            const FieldField<Field, scalar>& coupleBouCoeffs,
+            const FieldField<Field, scalar>& coupleIntCoeffs,
+            const lduInterfaceFieldPtrsList& interfaces,
+            const dictionary& dict
+        );
+
+
+    // Destructor
+
+        virtual ~cudaSolver()
+        {}
+
+
+    // Member Functions
+        //- Solve the matrix with this solver
+        virtual lduSolverPerformance solve
+        (
+            scalarField& x,
+            const scalarField& b,
+            const direction cmpt = 0
+        ) const = 0;
+
+        //- Create COO matrix from symmetric lduMatrix
+        cuspEquationSystem createSymCuspMatrix
+        (
+            const lduMatrix& matrix,
+            const scalarField& x,
+            const scalarField& b
+        ) const;
+
+        //- Create COO matrix from asymmetric lduMatrix
+        cuspEquationSystem createAsymCuspMatrix
+        (
+            const lduMatrix& matrix,
+            const scalarField& x,
+            const scalarField& b
+        ) const;
+
+        cudaSolverPerformance cudaSolverPerformanceDefault() const;
+
+};
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+} // End namespace Foam
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+#endif
+
+// ************************************************************************* //
--- a/src/cudaSolvers/cudaSolver/cudaTypes.H
+++ b/src/cudaSolvers/cudaSolver/cudaTypes.H
@ -0,0 +1,99 @@
+/*---------------------------------------------------------------------------*\
+  =========                 |
+  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
+   \\    /   O peration     |
+    \\  /    A nd           | Copyright held by original author
+     \\/     M anipulation  |
+-------------------------------------------------------------------------------
+License
+    This file is part of OpenFOAM.
+
+    OpenFOAM is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the
+    Free Software Foundation; either version 2 of the License, or (at your
+    option) any later version.
+
+    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+    for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with OpenFOAM; if not, write to the Free Software Foundation,
+    Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Class
+    Foam::cudaSolver
+
+Description
+    Base class for GPU based solvers
+
+SourceFiles
+    cudaSolver.C
+
+Author
+    Dominik Christ, Wikki Ltd.
+    Based on Cufflink library by Daniel P. Combest
+
+\*---------------------------------------------------------------------------*/
+#ifndef cudaTypes_H
+#define cudaTypes_H
+
+#if defined(WM_DP)
+
+    #define MPI_SCALAR MPI_DOUBLE
+    typedef double ValueType;
+
+#elif defined(WM_SP)
+
+    #define MPI_SCALAR MPI_FLOAT
+    typedef float ValueType;
+
+#endif
+
+#include <cusp/coo_matrix.h>
+#include <cusp/memory.h>
+#include <thrust/host_vector.h>
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+typedef cusp::device_memory MemorySpace;
+typedef cusp::host_memory hostMemorySpace;
+typedef int IndexType;
+
+// used to prevent floating point exception
+const ValueType SMALL = 1e-20;
+
+
+struct cuspEquationSystem
+{
+    cusp::coo_matrix<IndexType,ValueType,hostMemorySpace> A;
+    cusp::array1d< ValueType, hostMemorySpace> X;
+    cusp::array1d< ValueType, hostMemorySpace> B;
+    int nCells;
+    int nFaces;
+};
+
+
+struct cudaSolverPerformance
+{
+    int minIter;
+    int maxIter;
+    double relTol;
+    double tol;
+
+    int nIterations;
+    double iRes;
+    double fRes;
+    bool converged;
+    bool singular;
+
+    bool debugCusp;
+};
+
+
+// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
+
+#endif
+
+// ************************************************************************* //
--- a/src/cudaSolvers/include/buildNormFactor.H
+++ b/src/cudaSolvers/include/buildNormFactor.H
@ -0,0 +1,85 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C++ programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
+
+    Author
+	Daniel P. Combest.  All rights reserved.
+
+    Description
+	Builds normalization factor in exactly teh same way OpenFOAM® does
+	in order to normalize the residual to 1.  See provided
+	documentation for exact formulation of normalization factor
+
+\**********************************************************************/
+
+
+// Build the normfactor as OF-1.6-ext does in file
+// ($FOAM_SRC/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixSolver.C)
+
+//all variables will be deleted once out of scope
+{
+
+    //compute average of x vector
+    ValueType xRef = thrust::reduce
+    (
+        X.begin(),
+        X.begin() + A.num_rows
+    )/ValueType(A.num_rows);
+
+    //vector of average X values
+    cusp::array1d<ValueType,MemorySpace> xBar(N,xRef);
+
+    //holds A*xBar
+    cusp::array1d<ValueType,MemorySpace> yBar(N);
+
+    //compute ybar<-A*xBar
+    cusp::multiply(A, xBar, yBar);
+
+    cusp::array1d<ValueType,MemorySpace> ymyBar(N);  // holds y - yBar
+    cusp::array1d<ValueType,MemorySpace> bmyBar(N);  // holds b - yBar
+
+    // Calculate: ymyBar <- y - yBar
+    cusp::blas::axpby(y, yBar, ymyBar, ValueType(1), ValueType(-1));
+    // Calculate: bmyBar <- b - yBar
+    cusp::blas::axpby(B, yBar, bmyBar, ValueType(1), ValueType(-1));
+
+    //compute norm factor exactly as OpenFOAM does
+    normFactor = cusp::blas::nrm2(ymyBar) + cusp::blas::nrm2(bmyBar) + SMALL;
+
+    if (solverPerf->debugCusp)
+    {
+        std::cout<<"   Normalisation factor = "<<normFactor<<"\n";
+    }
+}
--- a/src/cudaSolvers/include/fillCOOMatrix.H
+++ b/src/cudaSolvers/include/fillCOOMatrix.H
@ -0,0 +1,85 @@
+/**********************************************************************\
+  ______  __    __   _______  _______  __       __   __   __   __  ___
+ /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
+|  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
+|  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
+|  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
+ \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
+
+Cuda For FOAM Link
+
+cufflink is a library for linking numerical methods based on Nvidia's
+Compute Unified Device Architecture (CUDA™) C/C + + programming language
+and OpenFOAM®.
+
+Please note that cufflink is not approved or endorsed by OpenCFD®
+Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
+producer of OpenFOAM® software.
+
+The official web-site of OpenCFD® Limited is www.openfoam.com .
+
+------------------------------------------------------------------------
+This file is part of cufflink.
+
+    cufflink is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    cufflink is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
+
+    Author
+        Daniel P. Combest.  All rights reserved.
+
+    Description
+       Populate the COO matrix
+
+\**********************************************************************/
+
+// Fill in the rest of the diag (rows and col),
+// upper and upper.rows and upper.cols
+cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
+cusp::array1d<ValueType, MemorySpace> X(ces->X);
+cusp::array1d<ValueType, MemorySpace> B(ces->B);
+
+// Determine row indices of diagonal values and fill A COO matrix
+thrust::sequence(A.row_indices.begin(), A.row_indices.begin() + ces->nCells);
+
+// Determine column indices of diagonal values and fill A COO matrix
+thrust::sequence
+(
+    A.column_indices.begin(),
+    A.column_indices.begin() + ces->nCells
+);
+
+// Copy values of lower into upper in COO matrix
+thrust::copy
+(
+    A.values.begin() + ces->nCells,
+	A.values.begin() + ces->nCells + ces->nFaces,
+	A.values.begin() + ces->nCells + ces->nFaces
+);
+
+// Copy row indices of lower to columns of upper into A COO matrix
+thrust::copy
+(
+    A.row_indices.begin() + ces->nCells,
+    A.row_indices.begin() + ces->nCells + ces->nFaces,
+    A.column_indices.begin() + ces->nCells + ces->nFaces
+);
+
+// Copy column indices of lower to rows of upper into A COO matrix
+thrust::copy
+(
+    A.column_indices.begin() + ces->nCells,
+    A.column_indices.begin() + ces->nCells + ces->nFaces,
+    A.row_indices.begin() + ces->nCells + ces->nFaces
+);
+
+A.sort_by_row_and_column(); // Speeds code up a little bit more
--- a/wmake/rules/linux64Gcc/general
+++ b/wmake/rules/linux64Gcc/general
@ -8,3 +8,4 @@ include $(GENERAL_RULES)/standard
 include $(RULES)/X
 include $(RULES)/c
 include $(RULES)/c++
+include $(RULES)/nvcc