Fix multiple quadratic complexity issues in environments (#38771)

1. Fix O(n^2) iteration in `_get_overwrite_specs`
2. Early exit `get_by_hash` on full hash
3. Fix O(n^2) double lookup in `all_matching_specs` with hashes
4. Fix some legibility issues
This commit is contained in:
Harmen Stoppels 2023-07-07 12:51:58 +02:00 committed by GitHub
parent ca9b52bbc5
commit a1d33e97ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 63 deletions

View file

@ -39,7 +39,6 @@
import spack.stage import spack.stage
import spack.store import spack.store
import spack.subprocess_context import spack.subprocess_context
import spack.traverse
import spack.user_environment as uenv import spack.user_environment as uenv
import spack.util.cpus import spack.util.cpus
import spack.util.environment import spack.util.environment
@ -51,6 +50,7 @@
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
import spack.util.url import spack.util.url
import spack.version import spack.version
from spack import traverse
from spack.filesystem_view import SimpleFilesystemView, inverse_view_func_parser, view_func_parser from spack.filesystem_view import SimpleFilesystemView, inverse_view_func_parser, view_func_parser
from spack.installer import PackageInstaller from spack.installer import PackageInstaller
from spack.schema.env import TOP_LEVEL_KEY from spack.schema.env import TOP_LEVEL_KEY
@ -426,32 +426,6 @@ def _is_dev_spec_and_has_changed(spec):
return mtime > record.installation_time return mtime > record.installation_time
def _spec_needs_overwrite(spec, changed_dev_specs):
"""Check whether the current spec needs to be overwritten because either it has
changed itself or one of its dependencies have changed
"""
# if it's not installed, we don't need to overwrite it
if not spec.installed:
return False
# If the spec itself has changed this is a trivial decision
if spec in changed_dev_specs:
return True
# if spec and all deps aren't dev builds, we don't need to overwrite it
if not any(spec.satisfies(c) for c in ("dev_path=*", "^dev_path=*")):
return False
# If any dep needs overwrite, or any dep is missing and is a dev build then
# overwrite this package
if any(
((not dep.installed) and dep.satisfies("dev_path=*"))
or _spec_needs_overwrite(dep, changed_dev_specs)
for dep in spec.traverse(root=False)
):
return True
def _error_on_nonempty_view_dir(new_root): def _error_on_nonempty_view_dir(new_root):
"""Defensively error when the target view path already exists and is not an """Defensively error when the target view path already exists and is not an
empty directory. This usually happens when the view symlink was removed, but empty directory. This usually happens when the view symlink was removed, but
@ -636,18 +610,16 @@ def specs_for_view(self, concretized_root_specs):
From the list of concretized user specs in the environment, flatten From the list of concretized user specs in the environment, flatten
the dags, and filter selected, installed specs, remove duplicates on dag hash. the dags, and filter selected, installed specs, remove duplicates on dag hash.
""" """
dag_hash = lambda spec: spec.dag_hash()
# With deps, requires traversal # With deps, requires traversal
if self.link == "all" or self.link == "run": if self.link == "all" or self.link == "run":
deptype = ("run") if self.link == "run" else ("link", "run") deptype = ("run") if self.link == "run" else ("link", "run")
specs = list( specs = list(
spack.traverse.traverse_nodes( traverse.traverse_nodes(
concretized_root_specs, deptype=deptype, key=dag_hash concretized_root_specs, deptype=deptype, key=traverse.by_dag_hash
) )
) )
else: else:
specs = list(dedupe(concretized_root_specs, key=dag_hash)) specs = list(dedupe(concretized_root_specs, key=traverse.by_dag_hash))
# Filter selected, installed specs # Filter selected, installed specs
with spack.store.db.read_transaction(): with spack.store.db.read_transaction():
@ -1808,17 +1780,29 @@ def _add_concrete_spec(self, spec, concrete, new=True):
self.specs_by_hash[h] = concrete self.specs_by_hash[h] = concrete
def _get_overwrite_specs(self): def _get_overwrite_specs(self):
# Collect all specs in the environment first before checking which ones # Find all dev specs that were modified.
# to rebuild to avoid checking the same specs multiple times changed_dev_specs = [
specs_to_check = set() s
for dag_hash in self.concretized_order: for s in traverse.traverse_nodes(
root_spec = self.specs_by_hash[dag_hash] self.concrete_roots(), order="breadth", key=traverse.by_dag_hash
specs_to_check.update(root_spec.traverse(root=True)) )
if _is_dev_spec_and_has_changed(s)
changed_dev_specs = set(s for s in specs_to_check if _is_dev_spec_and_has_changed(s)) ]
# Collect their hashes, and the hashes of their installed parents.
# Notice: with order=breadth all changed dev specs are at depth 0,
# even if they occur as parents of one another.
return [ return [
s.dag_hash() for s in specs_to_check if _spec_needs_overwrite(s, changed_dev_specs) spec.dag_hash()
for depth, spec in traverse.traverse_nodes(
changed_dev_specs,
root=True,
order="breadth",
depth=True,
direction="parents",
key=traverse.by_dag_hash,
)
if depth == 0 or spec.installed
] ]
def _install_log_links(self, spec): def _install_log_links(self, spec):
@ -1925,7 +1909,7 @@ def install_specs(self, specs=None, **install_args):
def all_specs(self): def all_specs(self):
"""Return all specs, even those a user spec would shadow.""" """Return all specs, even those a user spec would shadow."""
roots = [self.specs_by_hash[h] for h in self.concretized_order] roots = [self.specs_by_hash[h] for h in self.concretized_order]
specs = [s for s in spack.traverse.traverse_nodes(roots, lambda s: s.dag_hash())] specs = [s for s in traverse.traverse_nodes(roots, key=traverse.by_dag_hash)]
specs.sort() specs.sort()
return specs return specs
@ -1971,13 +1955,18 @@ def concrete_roots(self):
roots *without* associated user spec""" roots *without* associated user spec"""
return [root for _, root in self.concretized_specs()] return [root for _, root in self.concretized_specs()]
def get_by_hash(self, dag_hash): def get_by_hash(self, dag_hash: str) -> List[Spec]:
matches = {} # If it's not a partial hash prefix we can early exit
roots = [self.specs_by_hash[h] for h in self.concretized_order] early_exit = len(dag_hash) == 32
for spec in spack.traverse.traverse_nodes(roots, key=lambda s: s.dag_hash()): matches = []
for spec in traverse.traverse_nodes(
self.concrete_roots(), key=traverse.by_dag_hash, order="breadth"
):
if spec.dag_hash().startswith(dag_hash): if spec.dag_hash().startswith(dag_hash):
matches[spec.dag_hash()] = spec matches.append(spec)
return list(matches.values()) if early_exit:
break
return matches
def get_one_by_hash(self, dag_hash): def get_one_by_hash(self, dag_hash):
"""Returns the single spec from the environment which matches the """Returns the single spec from the environment which matches the
@ -1989,11 +1978,14 @@ def get_one_by_hash(self, dag_hash):
def all_matching_specs(self, *specs: spack.spec.Spec) -> List[Spec]: def all_matching_specs(self, *specs: spack.spec.Spec) -> List[Spec]:
"""Returns all concretized specs in the environment satisfying any of the input specs""" """Returns all concretized specs in the environment satisfying any of the input specs"""
key = lambda s: s.dag_hash() # Look up abstract hashes ahead of time, to avoid O(n^2) traversal.
specs = [s.lookup_hash() for s in specs]
# Avoid double lookup by directly calling _satisfies.
return [ return [
s s
for s in spack.traverse.traverse_nodes(self.concrete_roots(), key=key) for s in traverse.traverse_nodes(self.concrete_roots(), key=traverse.by_dag_hash)
if any(s.satisfies(t) for t in specs) if any(s._satisfies(t) for t in specs)
] ]
@spack.repo.autospec @spack.repo.autospec
@ -2017,9 +2009,9 @@ def matching_spec(self, spec):
env_root_to_user = {root.dag_hash(): user for user, root in self.concretized_specs()} env_root_to_user = {root.dag_hash(): user for user, root in self.concretized_specs()}
root_matches, dep_matches = [], [] root_matches, dep_matches = [], []
for env_spec in spack.traverse.traverse_nodes( for env_spec in traverse.traverse_nodes(
specs=[root for _, root in self.concretized_specs()], specs=[root for _, root in self.concretized_specs()],
key=lambda s: s.dag_hash(), key=traverse.by_dag_hash,
order="breadth", order="breadth",
): ):
if not env_spec.satisfies(spec): if not env_spec.satisfies(spec):
@ -2093,8 +2085,8 @@ def _get_environment_specs(self, recurse_dependencies=True):
if recurse_dependencies: if recurse_dependencies:
specs.extend( specs.extend(
spack.traverse.traverse_nodes( traverse.traverse_nodes(
specs, root=False, deptype=("link", "run"), key=lambda s: s.dag_hash() specs, root=False, deptype=("link", "run"), key=traverse.by_dag_hash
) )
) )
@ -2103,9 +2095,7 @@ def _get_environment_specs(self, recurse_dependencies=True):
def _to_lockfile_dict(self): def _to_lockfile_dict(self):
"""Create a dictionary to store a lockfile for this environment.""" """Create a dictionary to store a lockfile for this environment."""
concrete_specs = {} concrete_specs = {}
for s in spack.traverse.traverse_nodes( for s in traverse.traverse_nodes(self.specs_by_hash.values(), key=traverse.by_dag_hash):
self.specs_by_hash.values(), key=lambda s: s.dag_hash()
):
spec_dict = s.node_dict_with_hashes(hash=ht.dag_hash) spec_dict = s.node_dict_with_hashes(hash=ht.dag_hash)
# Assumes no legacy formats, since this was just created. # Assumes no legacy formats, since this was just created.
spec_dict[ht.dag_hash.name] = s.dag_hash() spec_dict[ht.dag_hash.name] = s.dag_hash()
@ -2262,7 +2252,7 @@ def ensure_env_directory_exists(self, dot_env: bool = False) -> None:
def update_environment_repository(self) -> None: def update_environment_repository(self) -> None:
"""Updates the repository associated with the environment.""" """Updates the repository associated with the environment."""
for spec in spack.traverse.traverse_nodes(self.new_specs): for spec in traverse.traverse_nodes(self.new_specs):
if not spec.concrete: if not spec.concrete:
raise ValueError("specs passed to environment.write() must be concrete!") raise ValueError("specs passed to environment.write() must be concrete!")

View file

@ -396,17 +396,16 @@ def reset_string():
with envdir.as_cwd(): with envdir.as_cwd():
with open("spack.yaml", "w") as f: with open("spack.yaml", "w") as f:
f.write( f.write(
"""\ f"""\
spack: spack:
specs: specs:
- %s@0.0.0 - {test_spec}@0.0.0
develop: develop:
dev-build-test-install: dev-build-test-install:
spec: dev-build-test-install@0.0.0 spec: dev-build-test-install@0.0.0
path: %s path: {build_dir}
""" """
% (test_spec, build_dir)
) )
env("create", "test", "./spack.yaml") env("create", "test", "./spack.yaml")

View file

@ -554,3 +554,8 @@ def traverse_tree(specs, cover="nodes", deptype="all", key=id, depth_first=True)
return traverse_breadth_first_tree_nodes(None, edges) return traverse_breadth_first_tree_nodes(None, edges)
return traverse_edges(specs, order="pre", cover=cover, deptype=deptype, key=key, depth=True) return traverse_edges(specs, order="pre", cover=cover, deptype=deptype, key=key, depth=True)
def by_dag_hash(s: "spack.spec.Spec") -> str:
"""Used very often as a key function for traversals."""
return s.dag_hash()