Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/muelu/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,5 @@ TRIBITS_ADD_LIBRARY(
# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration
# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration
# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration
# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration
# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration
3 changes: 3 additions & 0 deletions packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ using RepartitionBlockDiagonalFactory [[maybe_unused]] = MueLu::RepartitionBlock
#ifdef MUELU_REPARTITIONHEURISTICFACTORY_SHORT
using RepartitionHeuristicFactory [[maybe_unused]] = MueLu::RepartitionHeuristicFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
#endif
#ifdef MUELU_REPARTITIONUTILITIES_SHORT
using RepartitionUtilities [[maybe_unused]] = MueLu::RepartitionUtilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
#endif
#ifdef MUELU_REPLICATEPFACTORY_SHORT
using ReplicatePFactory [[maybe_unused]] = MueLu::ReplicatePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,6 @@ class RepartitionFactory : public SingleLevelFactoryBase {

}; // class RepartitionFactory

template <class Scalar = DefaultScalar,
class LocalOrdinal = DefaultLocalOrdinal,
class GlobalOrdinal = DefaultGlobalOrdinal,
class Node = DefaultNode>
struct RepartitionUtilities {
#undef MUELU_REPARTITIONFACTORY_SHORT
#include "MueLu_UseShortNames.hpp"

std::tuple<Array<GO>, GO> static ConstructGIDs(RCP<GOVector> decomposition);
};

} // namespace MueLu

#define MUELU_REPARTITIONFACTORY_SHORT
Expand Down
118 changes: 2 additions & 116 deletions packages/muelu/src/Rebalancing/MueLu_RepartitionFactory_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#include "MueLu_Monitor.hpp"
#include "MueLu_PerfUtils.hpp"

#include "MueLu_RepartitionUtilities.hpp"

namespace MueLu {

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Expand Down Expand Up @@ -466,122 +468,6 @@ void RepartitionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
decompEntries[i] = match[decompEntries[i]];
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
std::tuple<Teuchos::Array<GlobalOrdinal>, GlobalOrdinal>
RepartitionUtilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::ConstructGIDs(RCP<GOVector> decomposition) {
ArrayRCP<const GO> decompEntries;
if (decomposition->getLocalLength() > 0)
decompEntries = decomposition->getData(0);

auto rowMap = decomposition->getMap();
auto comm = rowMap->getComm();
const auto myRank = comm->getRank();
const auto numProcs = comm->getSize();
Xpetra::UnderlyingLib lib = rowMap->lib();

Array<GO> myGIDs;
myGIDs.reserve(decomposition->getLocalLength());

// Step 0: Construct mapping
// part number -> GIDs I own which belong to this part
// NOTE: my own part GIDs are not part of the map
typedef std::map<GO, Array<GO> > map_type;
map_type sendMap;
for (LO i = 0; i < decompEntries.size(); i++) {
GO id = decompEntries[i];
GO GID = rowMap->getGlobalElement(i);

if (id == myRank)
myGIDs.push_back(GID);
else
sendMap[id].push_back(GID);
}
decompEntries = Teuchos::null;

int numSend = sendMap.size(), numRecv;

// Arrayify map keys
Array<GO> myParts(numSend), myPart(1);
int cnt = 0;
myPart[0] = myRank;
for (typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++)
myParts[cnt++] = it->first;

const GO numLocalKept = myGIDs.size();

// Step 1: Find out how many processors send me data
// partsIndexBase starts from zero, as the processors ids start from zero
{
GO partsIndexBase = 0;
RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm);
RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm);
RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn);

RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave);
RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn);
if (numSend) {
ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0);
for (int i = 0; i < numSend; i++)
partsISendData[i] = 1;
}
(numPartsIRecv->getDataNonConst(0))[0] = 0;

numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD);
numRecv = (numPartsIRecv->getData(0))[0];
}

RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();

// Step 2: Get my GIDs from everybody else
MPI_Datatype MpiType = Teuchos::Details::MpiTypeTraits<GO>::getType();
int msgTag = 12345; // TODO: use Comm::dup for all internal messaging

// Post sends
Array<MPI_Request> sendReqs(numSend);
cnt = 0;
for (typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++)
MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]);

map_type recvMap;
size_t totalGIDs = myGIDs.size();
for (int i = 0; i < numRecv; i++) {
MPI_Status status;
MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status);

// Get rank and number of elements from status
int fromRank = status.MPI_SOURCE, count;
MPI_Get_count(&status, MpiType, &count);

recvMap[fromRank].resize(count);
MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status);

totalGIDs += count;
}

// Do waits on send requests
if (numSend) {
Array<MPI_Status> sendStatuses(numSend);
MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr());
}

// Merge GIDs
myGIDs.reserve(totalGIDs);
for (typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) {
int offset = myGIDs.size(), len = it->second.size();
if (len) {
myGIDs.resize(offset + len);
memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len * sizeof(GO));
}
}
// NOTE 2: The general sorting algorithm could be sped up by using the knowledge that original myGIDs and all received chunks
// (i.e. it->second) are sorted. Therefore, a merge sort would work well in this situation.
std::sort(myGIDs.begin(), myGIDs.end());

return std::make_tuple(myGIDs, numLocalKept);
}

} // namespace MueLu

#endif // ifdef HAVE_MPI
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// @HEADER
// *****************************************************************************
// MueLu: A package for multigrid based preconditioning
//
// Copyright 2012 NTESS and the MueLu contributors.
// SPDX-License-Identifier: BSD-3-Clause
// *****************************************************************************
// @HEADER

#ifndef MUELU_REPARTITIONUTILITIES_DECL_HPP
#define MUELU_REPARTITIONUTILITIES_DECL_HPP

#include "MueLu_ConfigDefs.hpp"
#ifdef HAVE_MPI

// Some classes are only used in the definition (_def.hpp) of this class
// but forward declarations are needed here to enable the UseShortNames mechanism.
#include <Xpetra_Map_fwd.hpp>
#include <Xpetra_MapFactory_fwd.hpp>
#include <Xpetra_Import_fwd.hpp>
#include <Xpetra_ImportFactory_fwd.hpp>
#include <Xpetra_Export_fwd.hpp>
#include <Xpetra_ExportFactory_fwd.hpp>
#include <Xpetra_Vector_fwd.hpp>

#include "MueLu_SingleLevelFactoryBase.hpp"
#include "MueLu_RepartitionFactory_fwd.hpp"
#include "MueLu_CloneRepartitionInterface_fwd.hpp"
#include "MueLu_Utilities_fwd.hpp"
#include "MueLu_PerfUtils_fwd.hpp"

namespace MueLu {

template <class Scalar = DefaultScalar,
class LocalOrdinal = DefaultLocalOrdinal,
class GlobalOrdinal = DefaultGlobalOrdinal,
class Node = DefaultNode>
struct RepartitionUtilities {
#undef MUELU_REPARTITIONFACTORY_SHORT
#include "MueLu_UseShortNames.hpp"

std::tuple<Array<GO>, GO> static ConstructGIDs(RCP<GOVector> decomposition);
};

} // namespace MueLu

#define MUELU_REPARTITIONUTILITIES_SHORT

#endif // ifdef HAVE_MPI
#endif // MUELU_REPARTITIONUTILITIES_DECL_HPP
164 changes: 164 additions & 0 deletions packages/muelu/src/Rebalancing/MueLu_RepartitionUtilities_def.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// @HEADER
// *****************************************************************************
// MueLu: A package for multigrid based preconditioning
//
// Copyright 2012 NTESS and the MueLu contributors.
// SPDX-License-Identifier: BSD-3-Clause
// *****************************************************************************
// @HEADER

#ifndef MUELU_REPARTITIONUTILITIES_DEF_HPP
#define MUELU_REPARTITIONUTILITIES_DEF_HPP

#include <algorithm>
#include <iostream>
#include <sstream>

#include "MueLu_RepartitionUtilities_decl.hpp" // TMP JG NOTE: before other includes, otherwise I cannot test the fwd declaration in _def

#ifdef HAVE_MPI
#include <Teuchos_DefaultMpiComm.hpp>
#include <Teuchos_CommHelpers.hpp>
#include <Teuchos_Details_MpiTypeTraits.hpp>

#include <Xpetra_Map.hpp>
#include <Xpetra_MapFactory.hpp>
#include <Xpetra_MultiVectorFactory.hpp>
#include <Xpetra_VectorFactory.hpp>
#include <Xpetra_Import.hpp>
#include <Xpetra_ImportFactory.hpp>
#include <Xpetra_Export.hpp>
#include <Xpetra_ExportFactory.hpp>

#include "MueLu_Utilities.hpp"

#include "MueLu_CloneRepartitionInterface.hpp"

#include "MueLu_Level.hpp"
#include "MueLu_MasterList.hpp"
#include "MueLu_Monitor.hpp"
#include "MueLu_PerfUtils.hpp"

namespace MueLu {

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
std::tuple<Teuchos::Array<GlobalOrdinal>, GlobalOrdinal>
RepartitionUtilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::ConstructGIDs(RCP<GOVector> decomposition) {
ArrayRCP<const GO> decompEntries;
if (decomposition->getLocalLength() > 0)
decompEntries = decomposition->getData(0);

auto rowMap = decomposition->getMap();
auto comm = rowMap->getComm();
const auto myRank = comm->getRank();
const auto numProcs = comm->getSize();
Xpetra::UnderlyingLib lib = rowMap->lib();

Array<GO> myGIDs;
myGIDs.reserve(decomposition->getLocalLength());

// Step 0: Construct mapping
// part number -> GIDs I own which belong to this part
// NOTE: my own part GIDs are not part of the map
typedef std::map<GO, Array<GO> > map_type;
map_type sendMap;
for (LO i = 0; i < decompEntries.size(); i++) {
GO id = decompEntries[i];
GO GID = rowMap->getGlobalElement(i);

if (id == myRank)
myGIDs.push_back(GID);
else
sendMap[id].push_back(GID);
}
decompEntries = Teuchos::null;

int numSend = sendMap.size(), numRecv;

// Arrayify map keys
Array<GO> myParts(numSend), myPart(1);
int cnt = 0;
myPart[0] = myRank;
for (typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++)
myParts[cnt++] = it->first;

const GO numLocalKept = myGIDs.size();

// Step 1: Find out how many processors send me data
// partsIndexBase starts from zero, as the processors ids start from zero
{
GO partsIndexBase = 0;
RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm);
RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm);
RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn);

RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave);
RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn);
if (numSend) {
ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0);
for (int i = 0; i < numSend; i++)
partsISendData[i] = 1;
}
(numPartsIRecv->getDataNonConst(0))[0] = 0;

numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD);
numRecv = (numPartsIRecv->getData(0))[0];
}

RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();

// Step 2: Get my GIDs from everybody else
MPI_Datatype MpiType = Teuchos::Details::MpiTypeTraits<GO>::getType();
int msgTag = 12345; // TODO: use Comm::dup for all internal messaging

// Post sends
Array<MPI_Request> sendReqs(numSend);
cnt = 0;
for (typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++)
MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]);

map_type recvMap;
size_t totalGIDs = myGIDs.size();
for (int i = 0; i < numRecv; i++) {
MPI_Status status;
MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status);

// Get rank and number of elements from status
int fromRank = status.MPI_SOURCE, count;
MPI_Get_count(&status, MpiType, &count);

recvMap[fromRank].resize(count);
MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status);

totalGIDs += count;
}

// Do waits on send requests
if (numSend) {
Array<MPI_Status> sendStatuses(numSend);
MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr());
}

// Merge GIDs
myGIDs.reserve(totalGIDs);
for (typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) {
int offset = myGIDs.size(), len = it->second.size();
if (len) {
myGIDs.resize(offset + len);
memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len * sizeof(GO));
}
}
// NOTE 2: The general sorting algorithm could be sped up by using the knowledge that original myGIDs and all received chunks
// (i.e. it->second) are sorted. Therefore, a merge sort would work well in this situation.
std::sort(myGIDs.begin(), myGIDs.end());

return std::make_tuple(myGIDs, numLocalKept);
}

} // namespace MueLu

#endif // ifdef HAVE_MPI

#endif // MUELU_REPARTITIONUTILITIES_DEF_HPP
3 changes: 2 additions & 1 deletion packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ ReorderBlockAFactory
RepartitionFactory - #ifdef HAVE_MPI
RepartitionBlockDiagonalFactory - #ifdef HAVE_MPI
RepartitionHeuristicFactory - #ifdef HAVE_MPI
RepartitionUtilities - #ifdef HAVE_MPI
ReplicatePFactory
RigidBodyModeFactory
SaPFactory
Expand Down Expand Up @@ -148,4 +149,4 @@ MultiPhys
Maxwell_Utils
SingleLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB)
TwoLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB)
MatlabSmoother - #if defined(HAVE_MUELU_MATLAB)
MatlabSmoother - #if defined(HAVE_MUELU_MATLAB)
Loading
Loading