Fast processor SAMG support without search

This commit is contained in:
Hrvoje Jasak 2017-06-08 17:46:02 +01:00
parent 774009e384
commit 41e28bb072

View file

@ -55,24 +55,17 @@ Foam::processorSAMGInterface::processorSAMGInterface
fineProcInterface_(refCast<const processorLduInterface>(fineInterface)), fineProcInterface_(refCast<const processorLduInterface>(fineInterface)),
comm_(fineProcInterface_.comm()), comm_(fineProcInterface_.comm()),
tag_(fineProcInterface_.tag()) tag_(fineProcInterface_.tag())
{
// MASTER processor
if (myProcNo() < neighbProcNo())
{ {
// READ FIRST // READ FIRST
//------------------------------------------------------------------------------ // This code is written for the FILTERED local matrix
// This code is written for the FILTERED local matrix. Why? // because the addressing is very natural. If this is not ok, filter the
// Because the addressing is very natural. If this is not ok, filter the
// big matrix here using fineInterface.faceCells(), transpose, and use // big matrix here using fineInterface.faceCells(), transpose, and use
// the same names for crMatrix arrays (then, there is no need to change // the same names for crMatrix arrays (then, there is no need to change
// the triple product code). // the triple product code).
//------------------------------------------------------------------------------
// Algorithm details: // Algorithm details:
// Go through the triple product similar to SAMG Policy - collect // Go through the triple product similar to SAMG Policy - collect
// coarse addressing on master - send it to slave to sort its addressing // coarse addressing on master and slave in the same order
// accordingly - HashTable containing labelPair(owner, neighbour) as key
// and coarse face index as entry
// The resulting contributions from triple product will be saved using // The resulting contributions from triple product will be saved using
// the following arrays: // the following arrays:
@ -89,206 +82,274 @@ Foam::processorSAMGInterface::processorSAMGInterface
// *Note: (A*B)^T = B^T*A^T, which is exactly what I have. Use the fact // *Note: (A*B)^T = B^T*A^T, which is exactly what I have. Use the fact
// on the slave processor!!! // on the slave processor!!!
// Addressing of coarse faces - for sorting faceCells_ on slave // Pair collection is always done looking for the master side
HashTable<label, labelPair, Hash<labelPair> > coarseLevelTable; // HJ, 8/Jun/2017
// Lists for saving addressing and weights - give size, but check at the // Look up neighbours for a master
// end! HashTable<DynamicList<label, 4>, label, Hash<label> > neighboursTable
DynamicList<label> dynFaceCells(5*fineProcInterface_.interfaceSize());
DynamicList<label> dynFineAddr(5*fineProcInterface_.interfaceSize());
DynamicList<scalar> dynRestrictW(5*fineProcInterface_.interfaceSize());
DynamicList<label> dynRestrictAddr(5*fineProcInterface_.interfaceSize());
// Filtered prolongation matrix from my side
crMatrix prolongationT = interfaceProlongation.T();
const labelList& rRowStart = prolongationT.crAddr().rowStart();
const labelList& rColumn = prolongationT.crAddr().column();
const scalarField& rCoeffs = prolongationT.coeffs();
const label rNRows = prolongationT.crAddr().nRows();
// Filtered prolongation matrix from neighbour - to obtain restriction,
// make a transpose
const labelList& pRowStart =
nbrInterfaceProlongation.crAddr().rowStart();
const labelList& pColumn = nbrInterfaceProlongation.crAddr().column();
const scalarField& pCoeffs = nbrInterfaceProlongation.coeffs();
const label pNCols = nbrInterfaceProlongation.crAddr().nCols();
labelList coeffMark(pNCols, -1);
label nCoarseCoeffs = 0;
label nCoarseContribs = 0;
// Row of R
for (label ir = 0; ir < rNRows; ir++)
{
// Restart coeffMark for each restriction row
coeffMark = -1;
// Row start addressing R
for
( (
label indexR = rRowStart[ir]; Foam::max(128, fineProcInterface_.interfaceSize())
indexR < rRowStart[ir + 1];
indexR++
)
{
// Column of coefficient in R
// This is an important information: it tells us which
// boundary coefficient this restriction coeff multiplies! It is
// the index of the boundary coeff in the boundary array.
const label jr = rColumn[indexR];
for
(
label indexP = pRowStart[jr];
indexP < pRowStart[jr + 1];
indexP++
)
{
// Column of P, goes into coarse address
label jp = pColumn[indexP];
// To which coeff do I add myself to? Does the address
// in this row already exist?
label identify = coeffMark[jp];
// If the coeff at this address doesn't exist
if (identify == -1)
{
// Found a new coarse coeff!
identify = nCoarseCoeffs;
coeffMark[jp] = nCoarseCoeffs;
dynFaceCells.append(ir);
// Save address and coeff into HashTable for sorting
// faceCells on slave
coarseLevelTable.insert
(
labelPair(ir, jp),
nCoarseCoeffs
); );
nCoarseCoeffs++;
}
dynFineAddr.append(jr); // Neighbour face-faces addressing for a face with multiple neighbours
dynRestrictW.append(rCoeffs[indexR]*pCoeffs[indexP]); HashTable<DynamicList<DynamicList<label, 4>, 4>, label, Hash<label> >
dynRestrictAddr.append(identify); faceFaceTable
(
Foam::max(128, fineProcInterface_.interfaceSize())
);
nCoarseContribs++; // Neighbour face-faces weights for a face with split neighbours
} HashTable<DynamicList<DynamicList<scalar, 4>, 4>, label, Hash<label> >
} faceFaceWeightsTable
} (
Foam::max(128, fineProcInterface_.interfaceSize())
);
// Check for fixed lists // Count the number of coarse faces
if (nCoarseContribs > 5*fineProcInterface_.interfaceSize()) label nCoarseFaces = 0;
// Count the number of agglomeration pairs
label nAgglomPairs = 0;
// Switching prolongation matrices
const crMatrix* masterP = NULL;
const crMatrix* neighbourP = NULL;
if (myProcNo() < neighbProcNo())
{ {
WarningIn("processorSAMGInterface::processorSAMGInterface(...)") // Grab prolongation matrix, master side
<< "Coarse SAMG processor siginificantly bigger than fine: " masterP = &interfaceProlongation;
<< "nCoarseFaces = " << nCoarseContribs neighbourP = &nbrInterfaceProlongation;
<< " nFineFaces = " << fineProcInterface_.interfaceSize()
<< endl;
} }
// Resize arrays to final size
faceCells_.transfer(dynFaceCells.shrink());
fineAddressing_.transfer(dynFineAddr.shrink());
restrictWeights_.transfer(dynRestrictW.shrink());
restrictAddressing_.transfer(dynRestrictAddr.shrink());
// Send to slave
OPstream toNbr(Pstream::blocking, neighbProcNo());
toNbr<< coarseLevelTable << nCoarseContribs;
}
// Slave processor
else else
{ {
// Slave must receive the hash table sent by the master to know how to // Grab prolongation matrix, slave side
// sort its faceCells_ masterP = &nbrInterfaceProlongation;
IPstream fromNbr(Pstream::blocking, neighbProcNo()); neighbourP = &interfaceProlongation;
}
HashTable<label, labelPair, Hash<labelPair> > masterCoarseLevel = label curMaster, curSlave;
HashTable<label, labelPair, Hash<labelPair> >(fromNbr);
const label nCoarseContribs = readLabel(fromNbr); // Loop through the interface and calculate triple product row-by-row
for(label faceI = 0; faceI < fineProcInterface_.interfaceSize(); faceI++)
// Filtered prolongation matrix from my side
crMatrix prolongationT = interfaceProlongation.T();
const labelList& rRowStart = prolongationT.crAddr().rowStart();
const labelList& rColumn = prolongationT.crAddr().column();
const scalarField& rCoeffs = prolongationT.coeffs();
const label rNRows = prolongationT.crAddr().nRows();
// Filtered prolongation matrix from neighbour - to obtain restriction,
// make
// a transpose
const labelList& pRowStart =
nbrInterfaceProlongation.crAddr().rowStart();
const labelList& pColumn = nbrInterfaceProlongation.crAddr().column();
const scalarField& pCoeffs = nbrInterfaceProlongation.coeffs();
const label pNCols = nbrInterfaceProlongation.crAddr().nCols();
faceCells_.setSize(masterCoarseLevel.size(), -1);
restrictWeights_.setSize(nCoarseContribs);
fineAddressing_.setSize(nCoarseContribs);
restrictAddressing_.setSize(nCoarseContribs);
labelList coeffMark(pNCols, -1);
label nCoarseEntries = 0;
// This loop is only for sorting the faceCells_ on slave side to match
// the order on master
// Row of R
for (label ir = 0; ir < rNRows; ir++)
{ {
// Restart coeffMark for each restriction row // Triple product for only one row of prolongation and restriction
coeffMark = -1;
// Row start addressing R
for for
( (
label indexR = rRowStart[ir]; label indexR = masterP->crAddr().rowStart()[faceI];
indexR < rRowStart[ir + 1]; indexR < masterP->crAddr().rowStart()[faceI + 1];
indexR++ indexR++
) )
{ {
// Column of coefficient in R // Grab weight from restriction
const label jr = rColumn[indexR]; scalar rWeight = masterP->coeffs()[indexR];
for for
( (
label indexP = pRowStart[jr]; label indexP = neighbourP->crAddr().rowStart()[faceI];
indexP < pRowStart[jr + 1]; indexP < neighbourP->crAddr().rowStart()[faceI + 1];
indexP++ indexP++
) )
{ {
// Column of P, into coarse address // Grab weight from prolongation
label jp = pColumn[indexP]; scalar pWeight = neighbourP->coeffs()[indexP];
// Array for marking new contributions in the row ir // Code in the current master and slave - used for
label identify = coeffMark[jp]; // identifying the face
curMaster = masterP->crAddr().column()[indexR];
curSlave = neighbourP->crAddr().column()[indexP];
if (identify == -1) if (neighboursTable.found(curMaster))
{ {
label address = masterCoarseLevel[labelPair(jp, ir)]; // This contribution already exists - add the result
// to the existing contribution
// Found a new coarse coeff! // This master side face already exists
identify = address;
coeffMark[jp] = address; // Check all current neighbours to see if the current
faceCells_[identify] = ir; // slave already exists. If so, add the coefficient.
DynamicList<label, 4>& curNbrs =
neighboursTable.find(curMaster)();
DynamicList<DynamicList<label, 4>, 4>& curFaceFaces =
faceFaceTable.find(curMaster)();
DynamicList<DynamicList<scalar, 4>, 4>& curFaceWeights =
faceFaceWeightsTable.find(curMaster)();
// Search for coded neighbour
bool nbrFound = false;
forAll (curNbrs, curNbrI)
{
// Check neighbour slave
if (curNbrs[curNbrI] == curSlave)
{
nbrFound = true;
curFaceFaces[curNbrI].append(faceI);
curFaceWeights[curNbrI].append(pWeight*rWeight);
// New agglomeration pair found in already
// existing pair
nAgglomPairs++;
break;
}
} }
restrictWeights_[nCoarseEntries] = if (!nbrFound)
rCoeffs[indexR]*pCoeffs[indexP]; {
fineAddressing_[nCoarseEntries] = jr; curNbrs.append(curSlave);
restrictAddressing_[nCoarseEntries] = identify;
nCoarseEntries++; DynamicList<label, 4> newFF;
newFF.append(faceI);
curFaceFaces.append(newFF);
DynamicList<scalar, 4> newFW;
newFW.append(pWeight*rWeight);
curFaceWeights.append(newFW);
// New coarse face created for existing master
nCoarseFaces++;
nAgglomPairs++;
}
}
else
{
// This master has got no neighbours yet.
// Add a neighbour and a coefficient as a
// new list, thus creating a new face
DynamicList<label, 4> newNbrs;
newNbrs.append(curSlave);
neighboursTable.insert
(
curMaster,
newNbrs
);
DynamicList<DynamicList<label, 4>, 4> newFF;
newFF.append(DynamicList<label, 4>());
newFF[0].append(faceI);
faceFaceTable.insert
(
curMaster,
newFF
);
DynamicList<DynamicList<scalar, 4>, 4> newFFWeights;
newFFWeights.append(DynamicList<scalar, 4>());
newFFWeights[0].append(pWeight*rWeight);
faceFaceWeightsTable.insert
(
curMaster,
newFFWeights
);
// New coarse face created for a new master
nCoarseFaces++;
nAgglomPairs++;
} }
} }
} }
} // end for all fine faces
// Since only local faces are analysed, lists can now be resized
faceCells_.setSize(nCoarseFaces);
fineAddressing_.setSize(nAgglomPairs);
restrictAddressing_.setSize(nAgglomPairs);
restrictWeights_.setSize(nAgglomPairs);
// Global faces shall be assembled by the increasing label of master
// cluster ID.
labelList contents = neighboursTable.toc();
// Sort makes sure the order is identical on both sides.
// HJ, 20/Feb/2009 and 6/Jun/2016
sort(contents);
// Note: Restriction is done on master side only
// Reset face counter for re-use
nCoarseFaces = 0;
nAgglomPairs = 0;
// Master side
if (myProcNo() < neighbProcNo())
{
// On master side, the owner addressing is stored in table of contents
forAll (contents, masterI)
{
DynamicList<label, 4>& curNbrs =
neighboursTable.find(contents[masterI])();
DynamicList<DynamicList<label, 4>, 4>& curFaceFaces =
faceFaceTable.find(contents[masterI])();
DynamicList<DynamicList<scalar, 4>, 4>& curFaceWeights =
faceFaceWeightsTable.find(contents[masterI])();
forAll (curNbrs, curNbrI)
{
// Get faces and weights
DynamicList<label, 4>& facesIter = curFaceFaces[curNbrI];
DynamicList<scalar, 4>& weightsIter = curFaceWeights[curNbrI];
// Record master cluster index
faceCells_[nCoarseFaces] = contents[masterI];
// Collect agglomeration data
forAll (facesIter, facesIterI)
{
fineAddressing_[nAgglomPairs] = facesIter[facesIterI];
restrictAddressing_[nAgglomPairs] = nCoarseFaces;
restrictWeights_[nAgglomPairs] = weightsIter[facesIterI];
nAgglomPairs++;
}
nCoarseFaces++;
}
}
}
// Slave side
else
{
// On slave side, the owner addressing is stored in linked lists
forAll (contents, masterI)
{
DynamicList<label, 4>& curNbrs =
neighboursTable.find(contents[masterI])();
DynamicList<DynamicList<label, 4>, 4>& curFaceFaces =
faceFaceTable.find(contents[masterI])();
DynamicList<DynamicList<scalar, 4>, 4>& curFaceWeights =
faceFaceWeightsTable.find(contents[masterI])();
forAll (curNbrs, curNbrI)
{
// Get faces and weights
DynamicList<label, 4>& facesIter = curFaceFaces[curNbrI];
DynamicList<scalar, 4>& weightsIter = curFaceWeights[curNbrI];
faceCells_[nCoarseFaces] = curNbrs[curNbrI];
forAll (facesIter, facesIterI)
{
fineAddressing_[nAgglomPairs] = facesIter[facesIterI];
restrictAddressing_[nAgglomPairs] = nCoarseFaces;
restrictWeights_[nAgglomPairs] = weightsIter[facesIterI];
nAgglomPairs++;
}
nCoarseFaces++;
}
}
} }
} }