Spec traversal: add option for topological ordering (#33817)

Spec traversals can now specify a topological ordering. A topologically-
ordered traversal with input specs X1, X2... will

* include all of X1, X2... and their children
* be ordered such that a given node is guaranteed to appear before any
  of its children in the traversal

Other notes:

* Input specs can be children of other input specs (this is useful if
  a user specifies a set of specs to uninstall: some of those specs
  might be children of others)
* `direction="parents"` will produce a reversed topological order
  (children always come before parents).
* `cover="edges"` will generate a list of edges L such that (a) input
  edges will always appear before output edges and (b) if you create
  a list with the destination of each edge in L the result is
  topologically ordered
This commit is contained in:
Harmen Stoppels 2022-11-22 03:33:35 +01:00 committed by GitHub
parent f97f37550a
commit 44c22a54c9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 371 additions and 99 deletions

View file

@ -25,64 +25,109 @@ def create_dag(nodes, edges):
@pytest.fixture()
def abstract_specs_dtuse():
nodes = [
"dtbuild1",
"dtbuild2",
"dtbuild3",
"dtlink1",
"dtlink2",
"dtlink3",
"dtlink4",
"dtlink5",
"dtrun1",
"dtrun2",
"dtrun3",
"dttop",
"dtuse",
]
edges = [
("dtbuild1", "dtbuild2", ("build")),
("dtbuild1", "dtlink2", ("build", "link")),
("dtbuild1", "dtrun2", ("run")),
("dtlink1", "dtlink3", ("build", "link")),
("dtlink3", "dtbuild2", ("build")),
("dtlink3", "dtlink4", ("build", "link")),
("dtrun1", "dtlink5", ("build", "link")),
("dtrun1", "dtrun3", ("run")),
("dtrun3", "dtbuild3", ("build")),
("dttop", "dtbuild1", ("build",)),
("dttop", "dtlink1", ("build", "link")),
("dttop", "dtrun1", ("run")),
("dtuse", "dttop", ("build", "link")),
]
return create_dag(nodes, edges)
return create_dag(
nodes=[
"dtbuild1",
"dtbuild2",
"dtbuild3",
"dtlink1",
"dtlink2",
"dtlink3",
"dtlink4",
"dtlink5",
"dtrun1",
"dtrun2",
"dtrun3",
"dttop",
"dtuse",
],
edges=[
("dtbuild1", "dtbuild2", ("build")),
("dtbuild1", "dtlink2", ("build", "link")),
("dtbuild1", "dtrun2", ("run")),
("dtlink1", "dtlink3", ("build", "link")),
("dtlink3", "dtbuild2", ("build")),
("dtlink3", "dtlink4", ("build", "link")),
("dtrun1", "dtlink5", ("build", "link")),
("dtrun1", "dtrun3", ("run")),
("dtrun3", "dtbuild3", ("build")),
("dttop", "dtbuild1", ("build",)),
("dttop", "dtlink1", ("build", "link")),
("dttop", "dtrun1", ("run")),
("dtuse", "dttop", ("build", "link")),
],
)
@pytest.fixture()
def abstract_specs_dt_diamond():
nodes = ["dt-diamond", "dt-diamond-left", "dt-diamond-right", "dt-diamond-bottom"]
edges = [
("dt-diamond", "dt-diamond-left", ("build", "link")),
("dt-diamond", "dt-diamond-right", ("build", "link")),
("dt-diamond-right", "dt-diamond-bottom", ("build", "link", "run")),
("dt-diamond-left", "dt-diamond-bottom", ("build")),
]
return create_dag(nodes, edges)
return create_dag(
nodes=["dt-diamond", "dt-diamond-left", "dt-diamond-right", "dt-diamond-bottom"],
edges=[
("dt-diamond", "dt-diamond-left", ("build", "link")),
("dt-diamond", "dt-diamond-right", ("build", "link")),
("dt-diamond-right", "dt-diamond-bottom", ("build", "link", "run")),
("dt-diamond-left", "dt-diamond-bottom", ("build")),
],
)
@pytest.fixture()
def abstract_specs_chain():
# Chain a -> b -> c -> d with skip connections
# from a -> c and a -> d.
nodes = ["chain-a", "chain-b", "chain-c", "chain-d"]
edges = [
("chain-a", "chain-b", ("build", "link")),
("chain-b", "chain-c", ("build", "link")),
("chain-c", "chain-d", ("build", "link")),
("chain-a", "chain-c", ("build", "link")),
("chain-a", "chain-d", ("build", "link")),
]
return create_dag(nodes, edges)
return create_dag(
nodes=["chain-a", "chain-b", "chain-c", "chain-d"],
edges=[
("chain-a", "chain-b", ("build", "link")),
("chain-b", "chain-c", ("build", "link")),
("chain-c", "chain-d", ("build", "link")),
("chain-a", "chain-c", ("build", "link")),
("chain-a", "chain-d", ("build", "link")),
],
)
@pytest.mark.parametrize("direction", ("children", "parents"))
@pytest.mark.parametrize("deptype", ("all", ("link", "build"), ("run", "link")))
def test_all_orders_traverse_the_same_nodes(direction, deptype, abstract_specs_dtuse):
# Test whether all graph traversal methods visit the same set of vertices.
# When testing cover=nodes, the traversal methods may reach the same vertices
# through different edges, so we're using traverse_nodes here to only verify the
# vertices.
#
# NOTE: root=False currently means "yield nodes discovered at depth > 0",
# meaning that depth first search will yield dtlink5 as it is first found through
# dtuse, whereas breadth first search considers dtlink5 at depth 0 and does not
# yield it since it is a root. Therefore, we only use root=True.
# (The inconsistency cannot be resolved by making root=False mean "don't yield
# vertices without in-edges", since this is not how it's used; it's typically used
# as "skip the input specs".)
specs = [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
kwargs = {"root": True, "direction": direction, "deptype": deptype, "cover": "nodes"}
def nodes(order):
s = traverse.traverse_nodes(specs, order=order, **kwargs)
return sorted(list(s))
assert nodes("pre") == nodes("post") == nodes("breadth") == nodes("topo")
@pytest.mark.parametrize("direction", ("children", "parents"))
@pytest.mark.parametrize("root", (True, False))
@pytest.mark.parametrize("deptype", ("all", ("link", "build"), ("run", "link")))
def test_all_orders_traverse_the_same_edges(direction, root, deptype, abstract_specs_dtuse):
# Test whether all graph traversal methods visit the same set of edges.
# All edges should be returned, including the artificial edges to the input
# specs when root=True.
specs = [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]]
kwargs = {"root": root, "direction": direction, "deptype": deptype, "cover": "edges"}
def edges(order):
s = traverse.traverse_edges(specs, order=order, **kwargs)
return sorted(list(s))
assert edges("pre") == edges("post") == edges("breadth") == edges("topo")
def test_breadth_first_traversal(abstract_specs_dtuse):
@ -169,18 +214,18 @@ def test_breadth_first_traversal_reverse(abstract_specs_dt_diamond):
]
def test_breadth_first_traversal_multiple_roots(abstract_specs_dt_diamond):
def test_breadth_first_traversal_multiple_input_specs(abstract_specs_dt_diamond):
# With DFS, the branch dt-diamond -> dt-diamond-left -> dt-diamond-bottom
# is followed, with BFS, dt-diamond-bottom should be traced through the second
# root dt-diamond-right at depth 1 instead.
roots = [
# input spec dt-diamond-right at depth 1 instead.
input_specs = [
abstract_specs_dt_diamond["dt-diamond"],
abstract_specs_dt_diamond["dt-diamond-right"],
]
gen = traverse.traverse_edges(roots, order="breadth", key=id, depth=True, root=False)
gen = traverse.traverse_edges(input_specs, order="breadth", key=id, depth=True, root=False)
assert [(depth, edge.parent.name, edge.spec.name) for (depth, edge) in gen] == [
(1, "dt-diamond", "dt-diamond-left"), # edge from first root "to" depth 1
(1, "dt-diamond-right", "dt-diamond-bottom"), # edge from second root "to" depth 1
(1, "dt-diamond", "dt-diamond-left"), # edge from first input spec "to" depth 1
(1, "dt-diamond-right", "dt-diamond-bottom"), # edge from second input spec "to" depth 1
]
@ -284,3 +329,93 @@ def test_breadth_first_versus_depth_first_printing(abstract_specs_chain):
^chain-d
"""
assert s.tree(depth_first=False, cover="edges", **args) == bfs_tree_edges
@pytest.fixture()
def abstract_specs_toposort():
# Create a graph that both BFS and DFS would not traverse in topo order, given the
# default edge ordering (by target spec name). Roots are {A, E} in forward order
# and {F, G} in backward order.
# forward: DFS([A, E]) traverses [A, B, F, G, C, D, E] (not topo since C < B)
# forward: BFS([A, E]) traverses [A, E, B, C, D, F, G] (not topo since C < B)
# reverse: DFS([F, G]) traverses [F, B, A, D, C, E, G] (not topo since D < A)
# reverse: BFS([F, G]) traverses [F, G, B, A, D, C, E] (not topo since D < A)
# E
# | A
# | | \
# | C |
# \ | |
# D |
# | /
# B
# / \
# F G
return create_dag(
nodes=["A", "B", "C", "D", "E", "F", "G"],
edges=(
("A", "B", "all"),
("A", "C", "all"),
("B", "F", "all"),
("B", "G", "all"),
("C", "D", "all"),
("D", "B", "all"),
("E", "D", "all"),
),
)
def test_traverse_nodes_topo(abstract_specs_toposort):
# Test whether we get topologically ordered specs when using traverse_nodes with
# order=topo and cover=nodes.
nodes = abstract_specs_toposort
def test_topo(input_specs, direction="children"):
# Ensure the invariant that all parents of specs[i] are in specs[0:i]
specs = list(
traverse.traverse_nodes(input_specs, order="topo", cover="nodes", direction=direction)
)
reverse = "parents" if direction == "children" else "children"
for i in range(len(specs)):
parents = specs[i].traverse(cover="nodes", direction=reverse, root=False)
assert set(list(parents)).issubset(set(specs[:i]))
# Traverse forward from roots A and E and a non-root D. Notice that adding D has no
# effect, it's just to make the test case a bit more complicated, as D is a starting
# point for traversal, but it's also discovered as a descendant of E and A.
test_topo([nodes["D"], nodes["E"], nodes["A"]], direction="children")
# Traverse reverse from leafs F and G and non-leaf D
test_topo([nodes["F"], nodes["D"], nodes["G"]], direction="parents")
def test_traverse_edges_topo(abstract_specs_toposort):
# Test the invariant that for each node in-edges precede out-edges when
# using traverse_edges with order=topo.
nodes = abstract_specs_toposort
input_specs = [nodes["E"], nodes["A"]]
# Collect pairs of (parent spec name, child spec name)
edges = [
(e.parent.name, e.spec.name)
for e in traverse.traverse_edges(input_specs, order="topo", cover="edges", root=False)
]
# See figure above, we have 7 edges (excluding artifical ones to the root)
assert set(edges) == set(
[
("A", "B"),
("A", "C"),
("B", "F"),
("B", "G"),
("C", "D"),
("D", "B"),
("E", "D"),
]
)
# Verify that all in-edges precede all out-edges
for node in nodes.keys():
in_edge_indices = [i for (i, (parent, child)) in enumerate(edges) if node == child]
out_edge_indices = [i for (i, (parent, child)) in enumerate(edges) if node == parent]
if in_edge_indices and out_edge_indices:
assert max(in_edge_indices) < min(out_edge_indices)

View file

@ -29,10 +29,10 @@ class BaseVisitor(object):
def __init__(self, deptype="all"):
self.deptype = deptype
def accept(self, node):
def accept(self, item):
"""
Arguments:
node (EdgeAndDepth): Provides the depth and the edge through which the
item (EdgeAndDepth): Provides the depth and the edge through which the
node was discovered
Returns:
@ -42,8 +42,8 @@ def accept(self, node):
"""
return True
def neighbors(self, node):
return sort_edges(node.edge.spec.edges_to_dependencies(deptype=self.deptype))
def neighbors(self, item):
return sort_edges(item.edge.spec.edges_to_dependencies(deptype=self.deptype))
class ReverseVisitor(object):
@ -53,13 +53,13 @@ def __init__(self, visitor, deptype="all"):
self.visitor = visitor
self.deptype = deptype
def accept(self, node):
return self.visitor.accept(node)
def accept(self, item):
return self.visitor.accept(item)
def neighbors(self, node):
def neighbors(self, item):
"""Return dependents, note that we actually flip the edge direction to allow
generic programming"""
spec = node.edge.spec
spec = item.edge.spec
return sort_edges(
[edge.flip() for edge in spec.edges_from_dependents(deptype=self.deptype)]
)
@ -73,19 +73,19 @@ def __init__(self, visitor, key=id, visited=None):
self.key = key
self.visited = set() if visited is None else visited
def accept(self, node):
def accept(self, item):
# Covering nodes means: visit nodes once and only once.
key = self.key(node.edge.spec)
key = self.key(item.edge.spec)
if key in self.visited:
return False
accept = self.visitor.accept(node)
accept = self.visitor.accept(item)
self.visited.add(key)
return accept
def neighbors(self, node):
return self.visitor.neighbors(node)
def neighbors(self, item):
return self.visitor.neighbors(item)
class CoverEdgesVisitor(object):
@ -96,18 +96,82 @@ def __init__(self, visitor, key=id, visited=None):
self.visited = set() if visited is None else visited
self.key = key
def accept(self, node):
return self.visitor.accept(node)
def accept(self, item):
return self.visitor.accept(item)
def neighbors(self, node):
def neighbors(self, item):
# Covering edges means: drop dependencies of visited nodes.
key = self.key(node.edge.spec)
key = self.key(item.edge.spec)
if key in self.visited:
return []
self.visited.add(key)
return self.visitor.neighbors(node)
return self.visitor.neighbors(item)
class TopoVisitor(object):
"""Visitor that can be used in :py:func:`depth-first traversal
<spack.traverse.traverse_depth_first_with_visitor>` to generate
a topologically ordered list of specs.
Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms
(2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.;
Stein, Clifford.
Summary of the algorithm: prepend each vertex to a list in depth-first post-order,
not following edges to nodes already seen. This ensures all descendants occur after
their parent, yielding a topological order.
Note: in this particular implementation we collect the *edges* through which the
vertices are discovered, meaning that a topological order of *vertices* is obtained
by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``.
Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable
edges, with the property that for each vertex all in-edges precede all out-edges.
"""
def __init__(self, visitor, key=id, root=True, all_edges=False):
"""
Arguments:
visitor: visitor that implements accept(), pre(), post() and neighbors()
key: uniqueness key for nodes
root (bool): Whether to include the root node.
all_edges (bool): when ``False`` (default): Each node is reached once,
and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically
ordered. When ``True``, every edge is listed, ordered such that for
each node all in-edges precede all out-edges.
"""
self.visited = set()
self.visitor = visitor
self.key = key
self.root = root
self.reverse_order = []
self.all_edges = all_edges
def accept(self, item):
if self.key(item.edge.spec) not in self.visited:
return True
if self.all_edges and (self.root or item.depth > 0):
self.reverse_order.append(item.edge)
return False
def pre(self, item):
# You could add a temporary marker for cycle detection
# that's cleared in `post`, but we assume no cycles.
pass
def post(self, item):
self.visited.add(self.key(item.edge.spec))
if self.root or item.depth > 0:
self.reverse_order.append(item.edge)
def neighbors(self, item):
return self.visitor.neighbors(item)
@property
def edges(self):
"""Return edges in topological order (in-edges precede out-edges)."""
return list(reversed(self.reverse_order))
def get_visitor_from_args(cover, direction, deptype, key=id, visited=None, visitor=None):
@ -144,7 +208,7 @@ def get_visitor_from_args(cover, direction, deptype, key=id, visited=None, visit
return visitor
def root_specs(specs):
def with_artificial_edges(specs):
"""Initialize a list of edges from an imaginary root node to the root specs."""
return [
EdgeAndDepth(edge=spack.spec.DependencySpec(parent=None, spec=s, deptypes=()), depth=0)
@ -152,23 +216,31 @@ def root_specs(specs):
]
def traverse_depth_first_edges_generator(nodes, visitor, post_order=False, root=True, depth=False):
# This is a somewhat non-standard implementation, but the reason to start with
# edges is that we don't have to deal with an artificial root node when doing DFS
# on multiple (root) specs.
for node in nodes:
if not visitor.accept(node):
def traverse_depth_first_edges_generator(edges, visitor, post_order=False, root=True, depth=False):
"""Generator that takes explores a DAG in depth-first fashion starting from
a list of edges. Note that typically DFS would take a vertex not a list of edges,
but the API is like this so we don't have to create an artificial root node when
traversing from multiple roots in a DAG.
Arguments:
edges (list): List of EdgeAndDepth instances
visitor: class instance implementing accept() and neigbors()
post_order (bool): Whether to yield nodes when backtracking
root (bool): whether to yield at depth 0
depth (bool): when ``True`` yield a tuple of depth and edge, otherwise only the
edge.
"""
for edge in edges:
if not visitor.accept(edge):
continue
yield_me = root or node.depth > 0
yield_me = root or edge.depth > 0
# Pre
if yield_me and not post_order:
yield (node.depth, node.edge) if depth else node.edge
yield (edge.depth, edge.edge) if depth else edge.edge
neighbors = [
EdgeAndDepth(edge=edge, depth=node.depth + 1) for edge in visitor.neighbors(node)
]
neighbors = [EdgeAndDepth(edge=n, depth=edge.depth + 1) for n in visitor.neighbors(edge)]
# This extra branch is just for efficiency.
if len(neighbors) >= 0:
@ -179,22 +251,22 @@ def traverse_depth_first_edges_generator(nodes, visitor, post_order=False, root=
# Post
if yield_me and post_order:
yield (node.depth, node.edge) if depth else node.edge
yield (edge.depth, edge.edge) if depth else edge.edge
def traverse_breadth_first_edges_generator(queue, visitor, root=True, depth=False):
while len(queue) > 0:
node = queue.pop(0)
edge = queue.pop(0)
# If the visitor doesn't accept the node, we don't yield it nor follow its edges.
if not visitor.accept(node):
if not visitor.accept(edge):
continue
if root or node.depth > 0:
yield (node.depth, node.edge) if depth else node.edge
if root or edge.depth > 0:
yield (edge.depth, edge.edge) if depth else edge.edge
for edge in visitor.neighbors(node):
queue.append(EdgeAndDepth(edge, node.depth + 1))
for e in visitor.neighbors(edge):
queue.append(EdgeAndDepth(e, edge.depth + 1))
def traverse_breadth_first_with_visitor(specs, visitor):
@ -205,16 +277,39 @@ def traverse_breadth_first_with_visitor(specs, visitor):
visitor: object that implements accept and neighbors interface, see
for example BaseVisitor.
"""
queue = root_specs(specs)
queue = with_artificial_edges(specs)
while len(queue) > 0:
node = queue.pop(0)
edge = queue.pop(0)
# If the visitor doesn't accept the node, we don't traverse it further.
if not visitor.accept(node):
if not visitor.accept(edge):
continue
for edge in visitor.neighbors(node):
queue.append(EdgeAndDepth(edge, node.depth + 1))
for e in visitor.neighbors(edge):
queue.append(EdgeAndDepth(e, edge.depth + 1))
def traverse_depth_first_with_visitor(edges, visitor):
"""Traverse a DAG in depth-first fashion using a visitor, starting from
a list of edges. Note that typically DFS would take a vertex not a list of edges,
but the API is like this so we don't have to create an artificial root node when
traversing from multiple roots in a DAG.
Arguments:
edges (list): List of EdgeAndDepth instances
visitor: class instance implementing accept(), pre(), post() and neighbors()
"""
for edge in edges:
if not visitor.accept(edge):
continue
visitor.pre(edge)
neighbors = [EdgeAndDepth(edge=e, depth=edge.depth + 1) for e in visitor.neighbors(edge)]
traverse_depth_first_with_visitor(neighbors, visitor)
visitor.post(edge)
# Helper functions for generating a tree using breadth-first traversal
@ -275,6 +370,34 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0):
yield item
# Topologic order
def traverse_edges_topo(
specs, direction="children", deptype="all", key=id, root=True, all_edges=False
):
"""
Returns a list of edges in topological order, in the sense that all in-edges of a
vertex appear before all out-edges. By default with direction=children edges are
directed from dependent to dependency. With directions=parents, the edges are
directed from dependency to dependent.
Arguments:
specs (list): List of root specs (considered to be depth 0)
direction (str): ``children`` (edges are directed from dependent to dependency)
or ``parents`` (edges are flipped / directed from dependency to dependent)
deptype (str or tuple): allowed dependency types
key: function that takes a spec and outputs a key for uniqueness test.
root (bool): Yield the root nodes themselves
all_edges (bool): When ``False`` only one in-edge per node is returned, when
``True`` all reachable edges are returned.
"""
visitor = BaseVisitor(deptype)
if direction == "parents":
visitor = ReverseVisitor(visitor, deptype)
visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges)
traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor)
return visitor.edges
# High-level API: traverse_edges, traverse_nodes, traverse_tree.
@ -298,6 +421,7 @@ def traverse_edges(
root (bool): Yield the root nodes themselves
order (str): What order of traversal to use in the DAG. For depth-first
search this can be ``pre`` or ``post``. For BFS this should be ``breadth``.
For topological order use ``topo``
cover (str): Determines how extensively to cover the dag. Possible values:
``nodes`` -- Visit each unique node in the dag only once.
``edges`` -- If a node has been visited once but is reached along a
@ -320,7 +444,19 @@ def traverse_edges(
A generator that yields ``DependencySpec`` if depth is ``False``
or a tuple of ``(depth, DependencySpec)`` if depth is ``True``.
"""
root_edges = root_specs(specs)
if order == "topo":
if cover == "paths":
raise ValueError("cover=paths not supported for order=topo")
# TODO: There is no known need for topological ordering of traversals (edge or node)
# with an initialized "visited" set. Revisit if needed.
if visited is not None:
raise ValueError("visited set not implemented for order=topo")
return traverse_edges_topo(
specs, direction, deptype, key, root, all_edges=cover == "edges"
)
root_edges = with_artificial_edges(specs)
visitor = get_visitor_from_args(cover, direction, deptype, key, visited)
# Depth-first
@ -328,9 +464,10 @@ def traverse_edges(
return traverse_depth_first_edges_generator(
root_edges, visitor, order == "post", root, depth
)
elif order == "breadth":
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
# Breadth-first
return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth)
raise ValueError("Unknown order {}".format(order))
def traverse_nodes(