bobluppes · bobluppes · May 3, 2025 · May 3, 2025 · May 3, 2025 · May 3, 2025
diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
@@ -16,6 +16,30 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(fmt)
 
+set(GOOGLE_WEB_INPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/input_data/web-Google.txt")
+set(GOOGLE_WEB_INPUT_URL "https://snap.stanford.edu/data/web-Google.txt.gz")
+if(NOT EXISTS "${GOOGLE_WEB_INPUT_FILE}")
+  if(NOT EXISTS "${GOOGLE_WEB_INPUT_FILE}.gz")
+    file(DOWNLOAD "${GOOGLE_WEB_INPUT_URL}" "${GOOGLE_WEB_INPUT_FILE}.gz" SHOW_PROGRESS)
+  endif()
+  execute_process(
+    COMMAND gzip -dc "${GOOGLE_WEB_INPUT_FILE}.gz"
+    OUTPUT_FILE "${GOOGLE_WEB_INPUT_FILE}"
+  )
+endif()
+
+set(BERKELEY_STANFORD_INPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/input_data/web-BerkStan.txt")
+set(BERKELEY_STANFORD_INPUT_URL "https://snap-sentiment.stanford.edu/data/web-BerkStan.txt.gz")
+if(NOT EXISTS "${BERKELEY_STANFORD_INPUT_FILE}")
+  if(NOT EXISTS "${BERKELEY_STANFORD_INPUT_FILE}.gz")
+    file(DOWNLOAD "${BERKELEY_STANFORD_INPUT_URL}" "${BERKELEY_STANFORD_INPUT_FILE}.gz" SHOW_PROGRESS)
+  endif()
+  execute_process(
+    COMMAND gzip -dc "${BERKELEY_STANFORD_INPUT_FILE}.gz"
+    OUTPUT_FILE "${BERKELEY_STANFORD_INPUT_FILE}"
+  )
+endif()
+
 file(GLOB PERF_SOURCES "graaflib/*.cpp" "graaflib/*/*.cpp")
 add_executable(
   ${PROJECT_NAME}_perf
@@ -30,4 +54,5 @@ target_link_libraries(
   PRIVATE
   benchmark
   fmt::fmt
-)
+)
+
diff --git a/perf/graaflib/kruskal_benchmark.cpp b/perf/graaflib/kruskal_benchmark.cpp
@@ -0,0 +1,22 @@
+#include <benchmark/benchmark.h>
+#include <graaflib/algorithm/minimum_spanning_tree/kruskal.h>
+
+#include "utils/dataset_reader.h"
+
+namespace {
+
+static void bm_kruskal(benchmark::State& state,
+                       const utils::dataset& dataset_name) {
+  static const auto graph{utils::construct_graph_from_file(dataset_name)};
+
+  for (auto _ : state) {
+    auto result = graaf::algorithm::kruskal_minimum_spanning_tree(graph);
+    benchmark::DoNotOptimize(result);
+  }
+}
+
+}  // namespace
+
+// Register the benchmarks
+BENCHMARK_CAPTURE(bm_kruskal, web_google, utils::dataset::WEB_GOOGLE);
+BENCHMARK_CAPTURE(bm_kruskal, web_berkstan, utils::dataset::WEB_BERK_STAN);
diff --git a/perf/graaflib/prim_benchmark.cpp b/perf/graaflib/prim_benchmark.cpp
@@ -0,0 +1,57 @@
+#include <benchmark/benchmark.h>
+#include <graaflib/algorithm/graph_traversal/breadth_first_search.h>
+#include <graaflib/algorithm/minimum_spanning_tree/prim.h>
+
+#include <iostream>
+
+#include "utils/dataset_reader.h"
+
+namespace {
+
+namespace {
+
+[[nodiscard]] utils::graph_t compute_connected_subgraph(
+    const utils::graph_t& graph, const graaf::vertex_id_t start_vertex) {
+  utils::graph_t connected_subgraph{};
+
+  graaf::algorithm::breadth_first_traverse(
+      graph, start_vertex, [&connected_subgraph](const graaf::edge_id_t& edge) {
+        const auto [source, target](edge);
+
+        if (!connected_subgraph.has_vertex(source)) {
+          connected_subgraph.add_vertex(utils::no_data{}, source);
+        }
+
+        if (!connected_subgraph.has_vertex(target)) {
+          connected_subgraph.add_vertex(utils::no_data{}, target);
+        }
+
+        connected_subgraph.add_edge(source, target, 1);
+      });
+
+  return connected_subgraph;
+}
+
+}  // namespace
+
+static void bm_prim(benchmark::State& state, const utils::dataset& dataset_name,
+                    const graaf::vertex_id_t start_vertex) {
+  static const auto graph{utils::construct_graph_from_file(dataset_name)};
+  const auto connected_subgraph{
+      compute_connected_subgraph(graph, start_vertex)};
+
+  std::cout << "number of vertices in connected subgraph: "
+            << connected_subgraph.vertex_count() << std::endl;
+
+  for (auto _ : state) {
+    auto result = graaf::algorithm::prim_minimum_spanning_tree(
+        connected_subgraph, start_vertex);
+    benchmark::DoNotOptimize(result);
+  }
+}
+
+}  // namespace
+
+// Register the benchmarks
+BENCHMARK_CAPTURE(bm_prim, web_google, utils::dataset::WEB_GOOGLE, 1);
+BENCHMARK_CAPTURE(bm_prim, web_berkstan, utils::dataset::WEB_BERK_STAN, 1);
diff --git a/perf/graaflib/utils/dataset_reader.cpp b/perf/graaflib/utils/dataset_reader.cpp
@@ -0,0 +1,64 @@
+#include "dataset_reader.h"
+
+#include <cassert>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+
+namespace utils {
+
+namespace {
+
+struct graph_file {
+  std::filesystem::path filename{};
+  std::size_t number_of_header_lines{};
+};
+
+const std::unordered_map<dataset, graph_file> DATASETS{
+    {dataset::WEB_GOOGLE, graph_file{.filename = "input_data/web-Google.txt",
+                                     .number_of_header_lines = 4}},
+    {dataset::WEB_BERK_STAN,
+     graph_file{.filename = "input_data/web-BerkStan.txt",
+                .number_of_header_lines = 4}}};
+
+int UNIT_WEIGHT{1};
+
+}  // namespace
+
+graph_t construct_graph_from_file(const dataset& dataset_name) {
+  const auto& dataset{DATASETS.at(dataset_name)};
+
+  std::ifstream file{};
+  file.open(dataset.filename);
+  assert(file.is_open());
+
+  std::string line;
+
+  // Skip the header lines
+  for (int i{0}; i < dataset.number_of_header_lines; ++i) {
+    std::getline(file, line);
+  }
+
+  graph_t graph{};
+
+  while (std::getline(file, line)) {
+    const auto delim{line.find("\t")};
+    const auto source{std::stoi(line.substr(0, delim))};
+    const auto target{std::stoi(line.substr(delim, line.size() - delim))};
+
+    if (!graph.has_vertex(source)) {
+      graph.add_vertex(no_data{}, source);
+    }
+
+    if (!graph.has_vertex(target)) {
+      graph.add_vertex(no_data{}, target);
+    }
+
+    graph.add_edge(source, target, UNIT_WEIGHT);
+  }
+
+  return graph;
+}
+
+}  // namespace utils
diff --git a/perf/graaflib/utils/dataset_reader.h b/perf/graaflib/utils/dataset_reader.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <graaflib/graph.h>
+
+namespace utils {
+
+enum class dataset : int64_t { WEB_GOOGLE, WEB_BERK_STAN };
+
+struct no_data {};
+using graph_t = graaf::undirected_graph<no_data, int>;
+
+[[nodiscard]] graph_t construct_graph_from_file(const dataset& dataset_name);
+
+}  // namespace utils