diff --git a/pyproject.toml b/pyproject.toml index c5043c89..f8be075e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,11 @@ developer = [ 'pre-commit==3.8.0', 'ruff==0.6.7', ] +heatmap = [ + 'matplotlib>=3.8', + 'seaborn>=0.13.2', + 'pandas>=2.0', +] test = [ 'pytest>=7.2', 'numpy>=1.23', diff --git a/timing/heatmap_betweenness_centrality_timing.png b/timing/heatmap_betweenness_centrality_timing.png index cadb1990..d7852113 100644 Binary files a/timing/heatmap_betweenness_centrality_timing.png and b/timing/heatmap_betweenness_centrality_timing.png differ diff --git a/timing/heatmap_is_reachable_timing.png b/timing/heatmap_is_reachable_timing.png index 2d70a7ce..7e9976a5 100644 Binary files a/timing/heatmap_is_reachable_timing.png and b/timing/heatmap_is_reachable_timing.png differ diff --git a/timing/heatmap_node_redundancy_timing.png b/timing/heatmap_node_redundancy_timing.png index 04532441..407a2db4 100644 Binary files a/timing/heatmap_node_redundancy_timing.png and b/timing/heatmap_node_redundancy_timing.png differ diff --git a/timing/timing_individual_function.py b/timing/timing_individual_function.py index 809315d0..1f2e04ff 100644 --- a/timing/timing_individual_function.py +++ b/timing/timing_individual_function.py @@ -1,110 +1,161 @@ -import time +""" +To generate heatmaps comparing the performance of nx-parallel and NetworkX implementations, make sure to run: + python3 -m pip install -e '.[heatmap]' +""" import networkx as nx -import pandas as pd +import nx_parallel as nxp +from matplotlib import pyplot as plt, patches as mpatches import seaborn as sns -from matplotlib import pyplot as plt +import numpy as np +import pandas as pd +import joblib +import timeit +import random +import types -import nx_parallel as nxp +# Default Config +joblib.parallel_config(n_jobs=-1) +# To use NetworkX's parallel backend, set the following configuration. +# nx.config.backends.parallel.active = True +# nx.config.backends.parallel.n_jobs = -1 -# Code to create README heatmaps for individual function currFun -heatmapDF = pd.DataFrame() -# for bipartite graphs -# n = [50, 100, 200, 400] -# m = [25, 50, 100, 200] -number_of_nodes_list = [200, 400, 800, 1600] -weighted = False -pList = [1, 0.8, 0.6, 0.4, 0.2] -currFun = nx.tournament.is_reachable -""" -for p in pList: - for num in range(len(number_of_nodes_list)): - # create original and parallel graphs - G = nx.fast_gnp_random_graph( - number_of_nodes_list[num], p, seed=42, directed=True - ) - - - # for bipartite.node_redundancy - G = nx.bipartite.random_graph(n[num], m[num], p, seed=42, directed=True) - for i in G.nodes: - l = list(G.neighbors(i)) - if len(l) == 0: - v = random.choice(list(G.nodes) - [i,]) - G.add_edge(i, v) - G.add_edge(i, random.choice([node for node in G.nodes if node != i])) - elif len(l) == 1: - G.add_edge(i, random.choice([node for node in G.nodes if node != i and node not in list(G.neighbors(i))])) - - # for weighted graphs - if weighted: - random.seed(42) - for u, v in G.edges(): - G[u][v]["weight"] = random.random() - - H = nxp.ParallelGraph(G) - - # time both versions and update heatmapDF - t1 = time.time() - c1 = currFun(H) - if isinstance(c1, types.GeneratorType): - d = dict(c1) - t2 = time.time() - parallelTime = t2 - t1 - t1 = time.time() - c2 = currFun(G) - if isinstance(c2, types.GeneratorType): - d = dict(c2) - t2 = time.time() - stdTime = t2 - t1 +tournament_funcs = ["is_reachable", "tournament_is_strongly_connected"] +bipartite_funcs = ["node_redundancy"] +random.seed(42) + + +def time_individual_function( + targetFunc, number_of_nodes, edge_prob, speedup_df, heatmap_annot, *, weighted=False +): + def measure_time(G, *args): + repeat = 5 + + def wrapper(): + result = targetFunc(G, *args) + if isinstance(result, types.GeneratorType): + _ = dict(result) + + times = timeit.repeat(wrapper, repeat=repeat, number=1) + return min(times) + + def record_result(stdTime, parallelTime, row, col): timesFaster = stdTime / parallelTime - heatmapDF.at[number_of_nodes_list[num], p] = timesFaster - print("Finished " + str(currFun)) -""" + speedup_df.at[row, col] = timesFaster + heatmap_annot.at[row, col] = f"{parallelTime:.2g}s\n\n{timesFaster:.2g}x" + + if targetFunc.__name__ not in tournament_funcs: + for p in edge_prob: + for ind, num in enumerate(number_of_nodes): + # for bipartite graphs + if targetFunc.__name__ in bipartite_funcs: + n = [200, 400, 800, 1600] + m = [100, 200, 400, 800] + print(n[ind] + m[ind]) + G = nx.bipartite.random_graph(n[ind], m[ind], p, directed=True) + for cur_node in G.nodes: + neighbors = set(G.neighbors(cur_node)) + # have atleast 2 outgoing edges + while len(neighbors) < 2: + new_neighbor = random.choice( + [ + node + for node in G.nodes + if node != cur_node and node not in neighbors + ] + ) + G.add_edge(cur_node, new_neighbor) + neighbors.add(new_neighbor) + else: + print(num) + G = nx.fast_gnp_random_graph(num, p, directed=True) -# Code to create for row of heatmap specifically for tournaments -for num in number_of_nodes_list: - print(num) - G = nx.tournament.random_tournament(num, seed=42) - H = nxp.ParallelGraph(G) - t1 = time.time() - c = currFun(H, 1, num) - t2 = time.time() - parallelTime = t2 - t1 - print(parallelTime) - t1 = time.time() - c = currFun(G, 1, num) - t2 = time.time() - stdTime = t2 - t1 - print(stdTime) - timesFaster = stdTime / parallelTime - heatmapDF.at[num, 3] = timesFaster - print("Finished " + str(currFun)) - -# plotting the heatmap with numbers and a green color scheme -plt.figure(figsize=(20, 4)) -hm = sns.heatmap(data=heatmapDF.T, annot=True, cmap="Greens", cbar=True) - -# Remove the tick labels on both axes -hm.set_yticklabels( - [ - 3, + # for weighted graphs + if weighted: + random.seed(42) + for u, v in G.edges(): + G[u][v]["weight"] = random.random() + + H = nxp.ParallelGraph(G) + # time both versions and update speedup_df + parallelTime = measure_time(H) + print(parallelTime) + stdTime = measure_time(G) + print(stdTime) + record_result(stdTime, parallelTime, num, p) + print("Finished " + str(targetFunc)) + else: + # for tournament graphs + for num in number_of_nodes: + print(num) + G = nx.tournament.random_tournament(num) + H = nxp.ParallelGraph(G) + parallelTime = measure_time(H, 1, num) + print(parallelTime) + stdTime = measure_time(G, 1, num) + print(stdTime) + record_result(stdTime, parallelTime, num, edge_prob[0]) + print("Finished " + str(targetFunc)) + + +def plot_timing_heatmap(targetFunc): + number_of_nodes = ( + [200, 400, 800, 1600] + if targetFunc.__name__ not in bipartite_funcs + else [300, 600, 1200, 2400] + ) + edge_prob = ( + [1, 0.8, 0.6, 0.4, 0.2] if targetFunc.__name__ not in tournament_funcs else [1] + ) + + speedup_df = pd.DataFrame(index=number_of_nodes, columns=edge_prob, dtype=float) + heatmap_annot = pd.DataFrame(index=number_of_nodes, columns=edge_prob, dtype=object) + + time_individual_function( + targetFunc, number_of_nodes, edge_prob, speedup_df, heatmap_annot + ) + + plt.rcParams["font.family"] = "Arial Rounded MT Bold" + plt.figure(figsize=(20, 6)) + ax = sns.heatmap( + data=speedup_df.T, + annot=heatmap_annot.T, + annot_kws={"size": 12, "weight": "bold"}, + fmt="", + cmap="Greens", + cbar=True, + ) + + ax.set_xticks(np.arange(len(number_of_nodes)) + 0.5) + ax.set_xticklabels(number_of_nodes, rotation=45) + ax.set_yticks(np.arange(len(edge_prob)) + 0.5) + ax.set_yticklabels(edge_prob, rotation=20) + + ax.set_xlabel("Number of Vertices", fontweight="bold", fontsize=12) + ax.set_ylabel("Edge Probability", fontweight="bold", fontsize=12) + + n_jobs = nxp.get_n_jobs() + ax.set_title( + f"Small Scale Demo: Time Speedups of {targetFunc.__name__} compared to NetworkX on {n_jobs} cores", + fontweight="bold", + fontsize=14, + loc="left", + ) + + legend_patches = [ + mpatches.Patch(color="none", label="Top Line: Parallel runtime (s)"), + mpatches.Patch(color="none", label="Bottom Line: Speed-up"), ] -) - -# Adding x-axis labels -hm.set_xticklabels(number_of_nodes_list) - -# Rotating the x-axis labels for better readability (optional) -plt.xticks(rotation=45) -plt.yticks(rotation=20) -plt.title( - "Small Scale Demo: Times Speedups of " + currFun.__name__ + " compared to NetworkX" -) -plt.xlabel("Number of Vertices") -plt.ylabel("Edge Probability") -print(currFun.__name__) - -# displaying the plotted heatmap -plt.tight_layout() -plt.savefig("timing/" + "heatmap_" + currFun.__name__ + "_timing.png") + ax.legend( + handles=legend_patches, + loc="lower right", + bbox_to_anchor=(1.0, 1.02), + title="Cell Annotation Format", + prop={"size": 12, "weight": "bold"}, + ) + + plt.tight_layout(rect=[0, 0, 1, 0.94]) + plt.savefig("timing/" + "heatmap_" + targetFunc.__name__ + "_timing.png") + + +# plot_timing_heatmap(nx.algorithms.tournament.is_reachable)