Skip to content

Fast parallel algorithm for diameter of undirected unweighted real-world graphs #97

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions _nx_parallel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ def get_info():
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
},
},
"diameter": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/distance_measures.py#L11",
"additional_docs": "This alternative to the more general `diameter` function is faster and allows for an approximation tolerance, though the default is to find the exact zero-tolerance result. The function uses the Iterative Fringe Upper Bound (IFUB) algorithm [1]_ with parallel computation of BFSes for fringe vertices.",
"additional_parameters": {
'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n_jobs` number of chunks."
},
},
"edge_betweenness_centrality": {
"url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/centrality/betweenness.py#L96",
"additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and computing edge betweenness centrality for each chunk concurrently.",
Expand Down
1 change: 1 addition & 0 deletions nx_parallel/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .tournament import *
from .vitality import *
from .cluster import *
from .distance_measures import *
139 changes: 139 additions & 0 deletions nx_parallel/algorithms/distance_measures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Graph diameter"""

import networkx as nx
import nx_parallel as nxp
from joblib import Parallel, delayed

__all__ = ["diameter"]


@nxp._configure_if_nx_active()
def diameter(G, e=None, usebounds=False, weight=None, get_chunks="chunks"):
"""This alternative to the more general `diameter` function is faster and
allows for an approximation tolerance, though the default is to find the
exact zero-tolerance result. The function uses the Iterative Fringe Upper
Bound (IFUB) algorithm [1]_ with parallel computation of BFSes for fringe
vertices.

networkx.diameter : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.distance_measures.diameter.html#networkx.algorithms.distance_measures.diameter

Parameters
----------
get_chunks : str, function (default = "chunks")
A function that takes in a list of all the nodes as input and returns an
iterable `node_chunks`. The default chunking is done by slicing the
`nodes` into `n_jobs` number of chunks.

Notes
-----
The IFUB algorithm first selects an approximate "central" node using
the 4-sweep heuristic. The 4-sweep method starts from a random node,
finds its farthest node, then repeats this process four times to
approximate a central node. A BFS tree is then rooted at this node,
and eccentricities are computed layer-wise in parallel. If the max eccentricity
from a layer exceeds twice the layer index, the algorithm terminates
and returns the diameter; otherwise, it proceeds further. IFUB is
observed to compute diameters efficiently for real-world graphs [1]_.

References
----------
.. [1] Crescenzi, P., Grossi, R., Lanzi, L., & Marino, A.
"On computing the diameter of real-world undirected graphs"
Theoretical Computer Science 426 (2012): 34-52.
https://doi.org/10.1016/j.tcs.2012.09.018
"""
G = G.graph_object if isinstance(G, nxp.ParallelGraph) else G

if not nx.is_connected(G):
raise nx.NetworkXError("Cannot compute metric because graph is not connected.")

start_node = max(G.nodes(), key=G.degree)
lower_bound = 0

# First BFS from start_node
layers = list(nx.bfs_layers(G, start_node))
max_level_node = layers[-1][0] if layers[-1] else None

# Second BFS from max_level_node
layers = list(nx.bfs_layers(G, max_level_node))
max_level = len(layers) - 1
max_level_node = layers[-1][0] if layers[-1] else None
lower_bound = max(lower_bound, max_level)

# Find a mid-level node
mid_level = max_level // 2
mid_level_node = (
layers[mid_level][0] if mid_level < len(layers) and layers[mid_level] else None
)

# Third BFS from mid_level_node
layers = list(nx.bfs_layers(G, mid_level_node))
max_level_node = layers[-1][0] if layers[-1] else None

# Fourth BFS from max_level_node
layers = list(nx.bfs_layers(G, max_level_node))
max_level = len(layers) - 1
max_level_node = layers[-1][0] if layers[-1] else None
lower_bound = max(lower_bound, max_level)

# Find a mid-level node from the last BFS
mid_level = max_level // 2
mid_level_node = (
layers[mid_level][0] if mid_level < len(layers) and layers[mid_level] else None
)

error_tolerance = 0
root = mid_level_node
layers = list(nx.bfs_layers(G, root))
max_level = len(layers) - 1
upper_bound = 2 * max_level
lower_bound = max(lower_bound, max_level)
cur_level = max_level
level_vertices = dict(enumerate(layers))

n_jobs = nxp.get_n_jobs()

while upper_bound - lower_bound > error_tolerance:
fringe_vertices = level_vertices.get(cur_level, [])

if not fringe_vertices:
cur_level -= 1
continue

# Parallelize the eccentricity calculation for fringe vertices
if get_chunks == "chunks":
vertex_chunks = nxp.create_iterables(G, "node", n_jobs, fringe_vertices)
else:
vertex_chunks = get_chunks(fringe_vertices)

# Calculate eccentricity for each chunk in parallel
chunk_eccentricities = Parallel()(
delayed(_calculate_eccentricities_for_nodes)(G, chunk)
for chunk in vertex_chunks
)

# Find the maximum eccentricity across all chunks
cur_max_ecc = (
max(max(eccs.values()) for eccs in chunk_eccentricities)
if chunk_eccentricities
else 0
)

if max(lower_bound, cur_max_ecc) > 2 * (cur_level - 1):
return max(lower_bound, cur_max_ecc)
else:
lower_bound = max(lower_bound, cur_max_ecc)
upper_bound = 2 * (cur_level - 1)

cur_level -= 1

return lower_bound


def _calculate_eccentricities_for_nodes(G, nodes):
"""Calculate eccentricities for a subset of nodes."""
eccentricities = {-1: 0}
for node in nodes:
layers = list(nx.bfs_layers(G, node))
eccentricities[node] = len(layers) - 1
return eccentricities
32 changes: 32 additions & 0 deletions nx_parallel/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
"approximate_all_pairs_node_connectivity",
# Connectivity
"connectivity.all_pairs_node_connectivity",
# Diameter : unweighted undirected graphs
"diameter",
]


Expand Down Expand Up @@ -96,3 +98,33 @@ def convert_to_nx(result, *, name=None):
if isinstance(result, ParallelGraph):
return result.graph_object
return result

@staticmethod
def can_run(name, args, kwargs):
"""Determine if the algorithm can be run with the given arguments."""
if name == "diameter":
# Extract the graph from args
if not args:
return False

graph = args[0]
if isinstance(graph, ParallelGraph):
graph = graph.graph_object

if graph.is_directed():
return (
"Parallel diameter implementation only supports undirected graphs"
)

if kwargs.get("weight") is not None:
return (
"Parallel diameter implementation only supports unweighted graphs"
)

for u, v, data in graph.edges(data=True):
if "weight" in data:
return "Parallel diameter implementation only supports unweighted graphs"

return True

return True # All other algorithms can run by default
Loading