diff --git a/CMakeLists.txt b/CMakeLists.txt index c3bbabe..9b7506f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,8 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) # Let's ensure -std=c++xx instead of -std=g++xx set(CMAKE_CXX_EXTENSIONS OFF) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + # Let's nicely support folders in IDE's set_property(GLOBAL PROPERTY USE_FOLDERS ON) @@ -46,14 +48,17 @@ find_package(Threads REQUIRED) # Boost find_package(Boost 1.75 REQUIRED) +include(FetchContent) + +# +# Boost +find_package(Boost 1.75 REQUIRED) + # # Metall find_package(Metall QUIET) -if (Metall_FOUND) - message(STATUS "Found Metall") -else () - message(STATUS "Could NOT find Metall locally. Download Metall.") - include(FetchContent) +if (NOT Metall_FOUND) + set (JUST_INSTALL_HEADER TRUE) FetchContent_Declare( Metall GIT_REPOSITORY https://github.com/LLNL/metall.git diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 474346f..9229443 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -39,4 +39,4 @@ add_example(sort_string_edges) add_subdirectory(wordcounter) add_subdirectory(dataframe) add_subdirectory(mpi) -add_subdirectory(ygm) +add_subdirectory(ygm_wordcounter) \ No newline at end of file diff --git a/examples/mpi/CMakeLists.txt b/examples/mpi/CMakeLists.txt index c061010..17a83b7 100644 --- a/examples/mpi/CMakeLists.txt +++ b/examples/mpi/CMakeLists.txt @@ -1,7 +1,7 @@ if (MPI_CXX_FOUND) - add_example(mpi_ranks) - target_include_directories(mpi_ranks PUBLIC ${MPI_INCLUDE_PATH}) - target_link_libraries(mpi_ranks PRIVATE MPI::MPI_CXX) + add_example(noop) + target_include_directories(noop PUBLIC ${MPI_INCLUDE_PATH}) + target_link_libraries(noop PRIVATE MPI::MPI_CXX) else() message(STATUS "Will skip building the MPI examples") endif() \ No newline at end of file diff --git a/examples/mpi/noop.cpp b/examples/mpi/noop.cpp new file mode 100644 index 0000000..a9f0d9f --- /dev/null +++ b/examples/mpi/noop.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#ifndef MPI_VERSION +#error "MPI_VERSION is not defined." +#endif + +int main(int argc, char **argv) { + + MPI_Init(&argc, &argv); + { + { + clippy::clippy clip("noop", ""); + clip.returns("Size"); + if (clip.parse(argc, argv)) { return 0; } + + int mpi_size; + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + clip.to_return(mpi_size); + } + MPI_Barrier(MPI_COMM_WORLD); + } + MPI_Finalize(); + + return 0; +} diff --git a/examples/ygm_wordcounter/CMakeLists.txt b/examples/ygm_wordcounter/CMakeLists.txt new file mode 100644 index 0000000..7d729a0 --- /dev/null +++ b/examples/ygm_wordcounter/CMakeLists.txt @@ -0,0 +1,15 @@ +function(add_ygm_example example_name) + add_example(${example_name}) + target_include_directories(${example_name} PUBLIC ${cereal_INCLUDE_DIR}) + target_include_directories(${example_name} PUBLIC ${YGM_INCLUDE_DIR}) + target_include_directories(${example_name} PUBLIC ${MPI_INCLUDE_PATH}) + target_link_libraries(${example_name} PRIVATE MPI::MPI_CXX) +endfunction() + +if (MPI_CXX_FOUND) + add_ygm_example(ygm_wordcount) + add_ygm_example(ygm_top_k_words) + add_ygm_example(ygm_noop) +else() + message(STATUS "Will skip building the YGM examples") +endif() \ No newline at end of file diff --git a/examples/ygm_wordcounter/count_table.hpp b/examples/ygm_wordcounter/count_table.hpp new file mode 100644 index 0000000..204b5d7 --- /dev/null +++ b/examples/ygm_wordcounter/count_table.hpp @@ -0,0 +1,35 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#ifndef CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ +#define CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ + +#include +#include +#include +#include +#include +#include + +namespace ygm_wordcount { +namespace { +namespace mc = metall::container; +} + +template +using alloc_t = metall::manager::allocator_type; + +template +using sc_alloc_t = mc::scoped_allocator_adaptor>; + +using pmem_str_t = mc::basic_string, sc_alloc_t>; +using pmem_count_table = mc::unordered_map, + std::equal_to<>, + sc_alloc_t>>; +} + +#endif //CLIPPY_EXAMPLES_YGM_YGM_COUNT_TABLE_HPP_ diff --git a/examples/ygm_wordcounter/ygm_noop.cpp b/examples/ygm_wordcounter/ygm_noop.cpp new file mode 100644 index 0000000..86b14ee --- /dev/null +++ b/examples/ygm_wordcounter/ygm_noop.cpp @@ -0,0 +1,20 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv); + { + clippy::clippy clip("noop", "No operation"); + clip.returns("#of ranks"); + if (clip.parse(argc, argv)) { return 0; } + clip.to_return(world.size()); + } + world.barrier(); + + return 0; +} diff --git a/examples/ygm_wordcounter/ygm_top_k_words.cpp b/examples/ygm_wordcounter/ygm_top_k_words.cpp new file mode 100644 index 0000000..b954754 --- /dev/null +++ b/examples/ygm_wordcounter/ygm_top_k_words.cpp @@ -0,0 +1,83 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include + +#include "count_table.hpp" + +using namespace ygm_wordcount; + +// The top one has the lowest count +using que_type = std::priority_queue, + std::vector>, + std::greater>>; + +static int k = 0; + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv); + + { + clippy::clippy clip("top_k_words", "Return the top k words"); + clip.add_required("path", "Data store path"); + clip.add_required("k", "k"); + clip.returns>("Top k words"); + if (clip.parse(argc, argv)) { return 0; } + + const auto path = clip.get("path") + "-" + std::to_string(world.rank()); + k = clip.get("k"); + + std::unique_ptr manager; + manager = std::make_unique(metall::open_read_only, path.c_str()); + static const pmem_count_table *table = manager->find(metall::unique_instance).first; + + // Find the local top k words + que_type local_top_k_queue; + for (const auto &elem: *table) { + if (local_top_k_queue.size() < k || local_top_k_queue.top().first < elem.second) { + local_top_k_queue.emplace(elem.second, elem.first.c_str()); + } + if (local_top_k_queue.size() > k) { + local_top_k_queue.pop(); + } + } + + // Find the global top k words + static que_type global_top_k_queue; + while (!local_top_k_queue.empty()) { + auto gather = [](auto pcomm, int from, const std::size_t count, const std::string &word) { + if (global_top_k_queue.size() < k || global_top_k_queue.top().first < count) { + global_top_k_queue.emplace(count, word); + } + if (global_top_k_queue.size() > k) { + global_top_k_queue.pop(); + } + }; + const auto &top = local_top_k_queue.top(); + world.async(0, gather, top.first, top.second); + local_top_k_queue.pop(); + } + + world.barrier(); + + std::vector top_k_words; + while (!global_top_k_queue.empty()) { + top_k_words.emplace_back(global_top_k_queue.top().second); + global_top_k_queue.pop(); + } + std::reverse(top_k_words.begin(), top_k_words.end()); + clip.to_return(top_k_words); + table = nullptr; + } + world.barrier(); + + return 0; +} diff --git a/examples/ygm_wordcounter/ygm_wordcount.cpp b/examples/ygm_wordcounter/ygm_wordcount.cpp new file mode 100644 index 0000000..d665d90 --- /dev/null +++ b/examples/ygm_wordcounter/ygm_wordcount.cpp @@ -0,0 +1,66 @@ +// Copyright 2020 Lawrence Livermore National Security, LLC and other CLIPPy Project Developers. +// See the top-level COPYRIGHT file for details. +// +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include +#include +#include + +#include "count_table.hpp" + +using namespace ygm_wordcount; + +int main(int argc, char **argv) { + ygm::comm world(&argc, &argv, 8 * 1024); + + { + clippy::clippy clip("wordcount", "Distributed word count example"); + clip.add_required("path", "Data store path"); + clip.add_required>("files", "Input word files"); + clip.returns("Total unique worlds"); + if (clip.parse(argc, argv)) { return 0; } + + const auto path = clip.get("path") + "-" + std::to_string(world.rank()); + + std::unique_ptr manager; + if (metall::manager::consistent(path.c_str())) { + manager = std::make_unique(metall::open_only, path.c_str()); + } else { + manager = std::make_unique(metall::create_only, path.c_str()); + } + static pmem_count_table *table = manager->find_or_construct(metall::unique_instance)(manager->get_allocator()); + + const auto word_files = clip.get>("files"); + for (int i = 0; i < word_files.size(); ++i) { + if (i % world.size() != world.rank()) continue; + + std::ifstream ifs(word_files[i]); + std::string word; + while (ifs >> word) { + auto counter = [](auto pcomm, int from, const std::string &w) { + pmem_str_t tmp(w.c_str(), table->get_allocator()); + if (table->count(tmp) == 0) { + table->emplace(std::move(tmp), 1); + } else { + table->at(tmp)++; + } + }; + const int dest = metall::utility::string_hash{}(word) % world.size(); + world.async(dest, counter, word); + } + } + world.barrier(); + + const auto total_words = world.all_reduce_sum(table->size()); + clip.to_return(total_words); + table = nullptr; + } + world.barrier(); + + return 0; +}