diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt index 6c75deb0..2ad14c28 100644 --- a/perf/CMakeLists.txt +++ b/perf/CMakeLists.txt @@ -16,6 +16,30 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(fmt) +set(GOOGLE_WEB_INPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/input_data/web-Google.txt") +set(GOOGLE_WEB_INPUT_URL "https://snap.stanford.edu/data/web-Google.txt.gz") +if(NOT EXISTS "${GOOGLE_WEB_INPUT_FILE}") + if(NOT EXISTS "${GOOGLE_WEB_INPUT_FILE}.gz") + file(DOWNLOAD "${GOOGLE_WEB_INPUT_URL}" "${GOOGLE_WEB_INPUT_FILE}.gz" SHOW_PROGRESS) + endif() + execute_process( + COMMAND gzip -dc "${GOOGLE_WEB_INPUT_FILE}.gz" + OUTPUT_FILE "${GOOGLE_WEB_INPUT_FILE}" + ) +endif() + +set(BERKELEY_STANFORD_INPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/input_data/web-BerkStan.txt") +set(BERKELEY_STANFORD_INPUT_URL "https://snap-sentiment.stanford.edu/data/web-BerkStan.txt.gz") +if(NOT EXISTS "${BERKELEY_STANFORD_INPUT_FILE}") + if(NOT EXISTS "${BERKELEY_STANFORD_INPUT_FILE}.gz") + file(DOWNLOAD "${BERKELEY_STANFORD_INPUT_URL}" "${BERKELEY_STANFORD_INPUT_FILE}.gz" SHOW_PROGRESS) + endif() + execute_process( + COMMAND gzip -dc "${BERKELEY_STANFORD_INPUT_FILE}.gz" + OUTPUT_FILE "${BERKELEY_STANFORD_INPUT_FILE}" + ) +endif() + file(GLOB PERF_SOURCES "graaflib/*.cpp" "graaflib/*/*.cpp") add_executable( ${PROJECT_NAME}_perf @@ -30,4 +54,5 @@ target_link_libraries( PRIVATE benchmark fmt::fmt -) \ No newline at end of file +) + diff --git a/perf/graaflib/kruskal_benchmark.cpp b/perf/graaflib/kruskal_benchmark.cpp new file mode 100644 index 00000000..1323fbf8 --- /dev/null +++ b/perf/graaflib/kruskal_benchmark.cpp @@ -0,0 +1,22 @@ +#include +#include + +#include "utils/dataset_reader.h" + +namespace { + +static void bm_kruskal(benchmark::State& state, + const utils::dataset& dataset_name) { + static const auto graph{utils::construct_graph_from_file(dataset_name)}; + + for (auto _ : state) { + auto result = graaf::algorithm::kruskal_minimum_spanning_tree(graph); + benchmark::DoNotOptimize(result); + } +} + +} // namespace + +// Register the benchmarks +BENCHMARK_CAPTURE(bm_kruskal, web_google, utils::dataset::WEB_GOOGLE); +BENCHMARK_CAPTURE(bm_kruskal, web_berkstan, utils::dataset::WEB_BERK_STAN); diff --git a/perf/graaflib/prim_benchmark.cpp b/perf/graaflib/prim_benchmark.cpp new file mode 100644 index 00000000..33d61f77 --- /dev/null +++ b/perf/graaflib/prim_benchmark.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +#include + +#include "utils/dataset_reader.h" + +namespace { + +namespace { + +[[nodiscard]] utils::graph_t compute_connected_subgraph( + const utils::graph_t& graph, const graaf::vertex_id_t start_vertex) { + utils::graph_t connected_subgraph{}; + + graaf::algorithm::breadth_first_traverse( + graph, start_vertex, [&connected_subgraph](const graaf::edge_id_t& edge) { + const auto [source, target](edge); + + if (!connected_subgraph.has_vertex(source)) { + connected_subgraph.add_vertex(utils::no_data{}, source); + } + + if (!connected_subgraph.has_vertex(target)) { + connected_subgraph.add_vertex(utils::no_data{}, target); + } + + connected_subgraph.add_edge(source, target, 1); + }); + + return connected_subgraph; +} + +} // namespace + +static void bm_prim(benchmark::State& state, const utils::dataset& dataset_name, + const graaf::vertex_id_t start_vertex) { + static const auto graph{utils::construct_graph_from_file(dataset_name)}; + const auto connected_subgraph{ + compute_connected_subgraph(graph, start_vertex)}; + + std::cout << "number of vertices in connected subgraph: " + << connected_subgraph.vertex_count() << std::endl; + + for (auto _ : state) { + auto result = graaf::algorithm::prim_minimum_spanning_tree( + connected_subgraph, start_vertex); + benchmark::DoNotOptimize(result); + } +} + +} // namespace + +// Register the benchmarks +BENCHMARK_CAPTURE(bm_prim, web_google, utils::dataset::WEB_GOOGLE, 1); +BENCHMARK_CAPTURE(bm_prim, web_berkstan, utils::dataset::WEB_BERK_STAN, 1); diff --git a/perf/graaflib/utils/dataset_reader.cpp b/perf/graaflib/utils/dataset_reader.cpp new file mode 100644 index 00000000..9478f2ef --- /dev/null +++ b/perf/graaflib/utils/dataset_reader.cpp @@ -0,0 +1,64 @@ +#include "dataset_reader.h" + +#include +#include +#include +#include +#include + +namespace utils { + +namespace { + +struct graph_file { + std::filesystem::path filename{}; + std::size_t number_of_header_lines{}; +}; + +const std::unordered_map DATASETS{ + {dataset::WEB_GOOGLE, graph_file{.filename = "input_data/web-Google.txt", + .number_of_header_lines = 4}}, + {dataset::WEB_BERK_STAN, + graph_file{.filename = "input_data/web-BerkStan.txt", + .number_of_header_lines = 4}}}; + +int UNIT_WEIGHT{1}; + +} // namespace + +graph_t construct_graph_from_file(const dataset& dataset_name) { + const auto& dataset{DATASETS.at(dataset_name)}; + + std::ifstream file{}; + file.open(dataset.filename); + assert(file.is_open()); + + std::string line; + + // Skip the header lines + for (int i{0}; i < dataset.number_of_header_lines; ++i) { + std::getline(file, line); + } + + graph_t graph{}; + + while (std::getline(file, line)) { + const auto delim{line.find("\t")}; + const auto source{std::stoi(line.substr(0, delim))}; + const auto target{std::stoi(line.substr(delim, line.size() - delim))}; + + if (!graph.has_vertex(source)) { + graph.add_vertex(no_data{}, source); + } + + if (!graph.has_vertex(target)) { + graph.add_vertex(no_data{}, target); + } + + graph.add_edge(source, target, UNIT_WEIGHT); + } + + return graph; +} + +} // namespace utils diff --git a/perf/graaflib/utils/dataset_reader.h b/perf/graaflib/utils/dataset_reader.h new file mode 100644 index 00000000..cca2f5eb --- /dev/null +++ b/perf/graaflib/utils/dataset_reader.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace utils { + +enum class dataset : int64_t { WEB_GOOGLE, WEB_BERK_STAN }; + +struct no_data {}; +using graph_t = graaf::undirected_graph; + +[[nodiscard]] graph_t construct_graph_from_file(const dataset& dataset_name); + +} // namespace utils