From bedc2dc03063acaeed82145f4c41ddff041c4868 Mon Sep 17 00:00:00 2001 From: Keita Iwabuchi Date: Tue, 28 Sep 2021 11:39:08 -0700 Subject: [PATCH] Add MPI & YGM examples --- .gitignore | 3 +- CMakeLists.txt | 55 ++++++++++--- examples/CMakeLists.txt | 8 +- examples/dataframe/CMakeLists.txt | 4 +- examples/mpi/CMakeLists.txt | 7 ++ examples/mpi/noop.cpp | 33 ++++++++ examples/ygm_wordcounter/CMakeLists.txt | 15 ++++ examples/ygm_wordcounter/count_table.hpp | 35 +++++++++ examples/ygm_wordcounter/ygm_noop.cpp | 20 +++++ examples/ygm_wordcounter/ygm_top_k_words.cpp | 83 ++++++++++++++++++++ examples/ygm_wordcounter/ygm_wordcount.cpp | 66 ++++++++++++++++ include/clippy/clippy.hpp | 15 +++- 12 files changed, 328 insertions(+), 16 deletions(-) create mode 100644 examples/mpi/CMakeLists.txt create mode 100644 examples/mpi/noop.cpp create mode 100644 examples/ygm_wordcounter/CMakeLists.txt create mode 100644 examples/ygm_wordcounter/count_table.hpp create mode 100644 examples/ygm_wordcounter/ygm_noop.cpp create mode 100644 examples/ygm_wordcounter/ygm_top_k_words.cpp create mode 100644 examples/ygm_wordcounter/ygm_wordcount.cpp diff --git a/.gitignore b/.gitignore index 378eac2..858e8c5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -build +/build* +/.idea/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 97c64a9..b4dc362 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,8 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) # Let's ensure -std=c++xx instead of -std=g++xx set(CMAKE_CXX_EXTENSIONS OFF) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + # Let's nicely support folders in IDE's set_property(GLOBAL PROPERTY USE_FOLDERS ON) @@ -38,21 +40,56 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) endif() endif() +# +# Threads +find_package(Threads REQUIRED) + # # Boost -find_package(Boost 1.75 REQUIRED COMPONENTS) +find_package(Boost 1.75 REQUIRED) -find_package(Threads REQUIRED) +include(FetchContent) # # Metall -include(FetchContent) -FetchContent_Declare( - Metall - GIT_REPOSITORY https://github.com/LLNL/metall.git - GIT_TAG develop -) -FetchContent_MakeAvailable(Metall) +find_package(Metall QUIET) +if (NOT Metall_FOUND) + set (JUST_INSTALL_HEADER TRUE) + FetchContent_Declare( + Metall + GIT_REPOSITORY https://github.com/LLNL/metall.git + GIT_TAG develop + ) + FetchContent_MakeAvailable(Metall) +endif () + +# +# cereal +FetchContent_Declare(cereal + URL https://github.com/USCiLab/cereal/archive/refs/tags/v1.3.0.zip + ) +FetchContent_GetProperties(cereal) +if (NOT cereal_POPULATED) + FetchContent_Populate(cereal) +endif () +set(cereal_INCLUDE_DIR "${cereal_SOURCE_DIR}/include") + +# +# MPI +find_package(MPI) + +# +# YGM +if (MPI_CXX_FOUND) + FetchContent_Declare(YGM + URL https://github.com/LLNL/ygm/archive/refs/heads/master.zip + ) + FetchContent_GetProperties(YGM) + if (NOT ygm_POPULATED) + FetchContent_Populate(YGM) + endif () + set(YGM_INCLUDE_DIR "${ygm_SOURCE_DIR}/include") +endif () ### Require out-of-source builds file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4402545..e4cd5fc 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -20,10 +20,10 @@ function ( add_example example_name ) set(example_exe "${example_name}") add_executable(${example_exe} ${example_source}) target_include_directories(${example_exe} PRIVATE ${Boost_INCLUDE_DIRS}) - target_link_libraries(${example_exe} Metall) - target_link_libraries(${example_exe} stdc++fs Threads::Threads) + target_link_libraries(${example_exe} PRIVATE Metall) + target_link_libraries(${example_exe} PRIVATE stdc++fs Threads::Threads) if (UNIX AND NOT APPLE) - target_link_libraries(${example_exe} rt) + target_link_libraries(${example_exe} PRIVATE rt) endif () include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endfunction() @@ -38,3 +38,5 @@ add_example(sort_string_edges) add_subdirectory(wordcounter) add_subdirectory(dataframe) +add_subdirectory(mpi) +add_subdirectory(ygm_wordcounter) diff --git a/examples/dataframe/CMakeLists.txt b/examples/dataframe/CMakeLists.txt index 6872826..1583e27 100644 --- a/examples/dataframe/CMakeLists.txt +++ b/examples/dataframe/CMakeLists.txt @@ -5,13 +5,13 @@ function ( add_df_example example_name ) add_example(${example_name}) - target_link_libraries(${example_name} libcliprt) + target_link_libraries(${example_name} PRIVATE libcliprt) endfunction() include_directories(${Boost_INCLUDE_DIRS}) add_library(libcliprt STATIC clip) -target_link_libraries(libcliprt ${Boost_LIBRARIES}) +target_link_libraries(libcliprt PUBLIC ${Boost_LIBRARIES}) add_df_example(columninfo) add_df_example(columnquery) diff --git a/examples/mpi/CMakeLists.txt b/examples/mpi/CMakeLists.txt new file mode 100644 index 0000000..17a83b7 --- /dev/null +++ b/examples/mpi/CMakeLists.txt @@ -0,0 +1,7 @@ +if (MPI_CXX_FOUND) + add_example(noop) + target_include_directories(noop PUBLIC ${MPI_INCLUDE_PATH}) + target_link_libraries(noop PRIVATE MPI::MPI_CXX) +else() + message(STATUS "Will skip building the MPI examples") +endif() \ No newline at end of file diff --git a/examples/mpi/noop.cpp b/examples/mpi/noop.cpp new file mode 100644 index 0000000..a9f0d9f --- /dev/null +++ b/examples/mpi/noop.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#ifndef MPI_VERSION +#error "MPI_VERSION is not defined." +#endif + +int main(int argc, char **argv) { + + MPI_Init(&argc, &argv); + { + { + clippy::clippy clip("noop", ""); + clip.returns("Size"); + if (clip.parse(argc, argv)) { return 0; } + + int mpi_size; + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + clip.to_return(mpi_size); + } + MPI_Barrier(MPI_COMM_WORLD); + } + MPI_Finalize(); + + return 0; +} diff --git a/examples/ygm_wordcounter/CMakeLists.txt b/examples/ygm_wordcounter/CMakeLists.txt new file mode 100644 index 0000000..7d729a0 --- /dev/null +++ b/examples/ygm_wordcounter/CMakeLists.txt @@ -0,0 +1,15 @@ +function(add_ygm_example example_name) + add_example(${example_name}) + target_include_directories(${example_name} PUBLIC ${cereal_INCLUDE_DIR}) + target_include_directories(${example_name} PUBLIC ${YGM_INCLUDE_DIR}) + target_include_directories(${example_name} PUBLIC ${MPI_INCLUDE_PATH}) + target_link_libraries(${example_name} PRIVATE MPI::MPI_CXX) +endfunction() + +if (MPI_CXX_FOUND) + add_ygm_example(ygm_wordcount) + add_ygm_example(ygm_top_k_words) + add_ygm_example(ygm_noop) +else() + message(STATUS "Will skip building the YGM examples") +endif() \ No newline at end of file diff --git a/examples/ygm_wordcounter/count_table.hpp b/examples/ygm_wordcounter/count_table.hpp new file mode 100644 index 0000000..204b5d7 --- /dev/null +++ b/examples/ygm_wordcounter/count_table.hpp @@ -0,0 +1,35 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#ifndef CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ +#define CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ + +#include +#include +#include +#include +#include +#include + +namespace ygm_wordcount { +namespace { +namespace mc = metall::container; +} + +template +using alloc_t = metall::manager::allocator_type; + +template +using sc_alloc_t = mc::scoped_allocator_adaptor>; + +using pmem_str_t = mc::basic_string, sc_alloc_t>; +using pmem_count_table = mc::unordered_map, + std::equal_to<>, + sc_alloc_t>>; +} + +#endif //CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ diff --git a/examples/ygm_wordcounter/ygm_noop.cpp b/examples/ygm_wordcounter/ygm_noop.cpp new file mode 100644 index 0000000..86b14ee --- /dev/null +++ b/examples/ygm_wordcounter/ygm_noop.cpp @@ -0,0 +1,20 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv); + { + clippy::clippy clip("noop", "No operation"); + clip.returns("#of ranks"); + if (clip.parse(argc, argv)) { return 0; } + clip.to_return(world.size()); + } + world.barrier(); + + return 0; +} diff --git a/examples/ygm_wordcounter/ygm_top_k_words.cpp b/examples/ygm_wordcounter/ygm_top_k_words.cpp new file mode 100644 index 0000000..b954754 --- /dev/null +++ b/examples/ygm_wordcounter/ygm_top_k_words.cpp @@ -0,0 +1,83 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include + +#include "count_table.hpp" + +using namespace ygm_wordcount; + +// The top one has the lowest count +using que_type = std::priority_queue, + std::vector>, + std::greater>>; + +static int k = 0; + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv); + + { + clippy::clippy clip("top_k_words", "Return the top k words"); + clip.add_required("path", "Data store path"); + clip.add_required("k", "k"); + clip.returns>("Top k words"); + if (clip.parse(argc, argv)) { return 0; } + + const auto path = clip.get("path") + "-" + std::to_string(world.rank()); + k = clip.get("k"); + + std::unique_ptr manager; + manager = std::make_unique(metall::open_read_only, path.c_str()); + static const pmem_count_table *table = manager->find(metall::unique_instance).first; + + // Find the local top k words + que_type local_top_k_queue; + for (const auto &elem: *table) { + if (local_top_k_queue.size() < k || local_top_k_queue.top().first < elem.second) { + local_top_k_queue.emplace(elem.second, elem.first.c_str()); + } + if (local_top_k_queue.size() > k) { + local_top_k_queue.pop(); + } + } + + // Find the global top k words + static que_type global_top_k_queue; + while (!local_top_k_queue.empty()) { + auto gather = [](auto pcomm, int from, const std::size_t count, const std::string &word) { + if (global_top_k_queue.size() < k || global_top_k_queue.top().first < count) { + global_top_k_queue.emplace(count, word); + } + if (global_top_k_queue.size() > k) { + global_top_k_queue.pop(); + } + }; + const auto &top = local_top_k_queue.top(); + world.async(0, gather, top.first, top.second); + local_top_k_queue.pop(); + } + + world.barrier(); + + std::vector top_k_words; + while (!global_top_k_queue.empty()) { + top_k_words.emplace_back(global_top_k_queue.top().second); + global_top_k_queue.pop(); + } + std::reverse(top_k_words.begin(), top_k_words.end()); + clip.to_return(top_k_words); + table = nullptr; + } + world.barrier(); + + return 0; +} diff --git a/examples/ygm_wordcounter/ygm_wordcount.cpp b/examples/ygm_wordcounter/ygm_wordcount.cpp new file mode 100644 index 0000000..d665d90 --- /dev/null +++ b/examples/ygm_wordcounter/ygm_wordcount.cpp @@ -0,0 +1,66 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include +#include +#include + +#include "count_table.hpp" + +using namespace ygm_wordcount; + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv, 8 * 1024); + + { + clippy::clippy clip("wordcount", "Distributed word count example"); + clip.add_required("path", "Data store path"); + clip.add_required>("files", "Input word files"); + clip.returns("Total unique worlds"); + if (clip.parse(argc, argv)) { return 0; } + + const auto path = clip.get("path") + "-" + std::to_string(world.rank()); + + std::unique_ptr manager; + if (metall::manager::consistent(path.c_str())) { + manager = std::make_unique(metall::open_only, path.c_str()); + } else { + manager = std::make_unique(metall::create_only, path.c_str()); + } + static pmem_count_table *table = manager->find_or_construct(metall::unique_instance)(manager->get_allocator()); + + const auto word_files = clip.get>("files"); + for (int i = 0; i < word_files.size(); ++i) { + if (i % world.size() != world.rank()) continue; + + std::ifstream ifs(word_files[i]); + std::string word; + while (ifs >> word) { + auto counter = [](auto pcomm, int from, const std::string &w) { + pmem_str_t tmp(w.c_str(), table->get_allocator()); + if (table->count(tmp) == 0) { + table->emplace(std::move(tmp), 1); + } else { + table->at(tmp)++; + } + }; + const int dest = metall::utility::string_hash{}(word) % world.size(); + world.async(dest, counter, word); + } + } + world.barrier(); + + const auto total_words = world.all_reduce_sum(table->size()); + clip.to_return(total_words); + table = nullptr; + } + world.barrier(); + + return 0; +} diff --git a/include/clippy/clippy.hpp b/include/clippy/clippy.hpp index 43c922c..5ccbb62 100644 --- a/include/clippy/clippy.hpp +++ b/include/clippy/clippy.hpp @@ -14,6 +14,9 @@ #include #include +#if __has_include() +#include +#endif namespace clippy { @@ -25,7 +28,17 @@ class clippy { } ~clippy() { - if (return_values) { std::cout << m_json_return << std::endl; } + if (return_values) { + int rank = 0; +#ifdef MPI_VERSION + if (::MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } +#endif + if (rank == 0) { + std::cout << m_json_return << std::endl; + } + } } template