Skip to content

Guard against pathologically connected dependency graphs #13424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/13424.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Handle pathologically connected dependency graphs in reasonable time.
34 changes: 24 additions & 10 deletions src/pip/_internal/resolution/resolvelib/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,24 @@ def get_topological_weights(
requirement_keys.
"""
path: set[str | None] = set()
weights: dict[str | None, int] = {}
weights: dict[str | None, list[int]] = {}

def visit(node: str | None) -> None:
if node in path:
# We hit a cycle, so we'll break it here.
return

# The walk is exponential and for pathologically connected graphs (which
# are the ones most likely to contain cycles in the first place) it can
# take until the heat-death of the universe. To counter this we limit
# the number of attempts to visit (i.e. traverse through) any given
# node. We choose a value here which gives decent enough coverage for
# fairly well behaved graphs, and still limits the walk complexity to be
# linear in nature.
cur_weights = weights.get(node, [])
if len(cur_weights) >= 5:
return

# Time to visit the children!
path.add(node)
for child in graph.iter_children(node):
Expand All @@ -264,14 +275,14 @@ def visit(node: str | None) -> None:
if node not in requirement_keys:
return

last_known_parent_count = weights.get(node, 0)
weights[node] = max(last_known_parent_count, len(path))
cur_weights.append(len(path))
weights[node] = cur_weights

# Simplify the graph, pruning leaves that have no dependencies.
# This is needed for large graphs (say over 200 packages) because the
# `visit` function is exponentially slower then, taking minutes.
# Simplify the graph, pruning leaves that have no dependencies. This is
# needed for large graphs (say over 200 packages) because the `visit`
# function is slower for large/densely connected graphs, taking minutes.
# See https://github.com/pypa/pip/issues/10557
# We will loop until we explicitly break the loop.
# We repeat the pruning step until we have no more leaves to remove.
while True:
leaves = set()
for key in graph:
Expand All @@ -291,12 +302,13 @@ def visit(node: str | None) -> None:
for leaf in leaves:
if leaf not in requirement_keys:
continue
weights[leaf] = weight
weights[leaf] = [weight]
# Remove the leaves from the graph, making it simpler.
for leaf in leaves:
graph.remove(leaf)

# Visit the remaining graph.
# Visit the remaining graph, this will only have nodes to handle if the
# graph had a cycle in it, which the pruning step above could not handle.
# `None` is guaranteed to be the root node by resolvelib.
visit(None)

Expand All @@ -305,7 +317,9 @@ def visit(node: str | None) -> None:
difference = set(weights.keys()).difference(requirement_keys)
assert not difference, difference

return weights
# Now give back all the weights, choosing the largest ones from what we
# accumulated.
return {node: max(wgts) for (node, wgts) in weights.items()}


def _req_set_item_sorter(
Expand Down