performance: speed up existence checks in packages (#23661)
Spack doesn't require users to manually index their repos; it reindexes the indexes automatically when things change. To determine when to do this, it has to `stat()` all package files in each repository to make sure that indexes up to date with packages. We currently index virtual providers, patches by sha256, and tags on packages. When this was originally implemented, we ran the checker all the time, at startup, but that was slow (see #7587). But we didn't go far enough -- it still consults the checker and does all the stat operations just to see if a package exists (`Repo.exists()`). That might've been a wash in 2018, but as the number of packages has grown, it's gotten slower -- checking 5k packages is expensive and users see this for small operations. It's a win now to make `Repo.exists()` check files directly. **Fix:** This PR does a number of things to speed up `spack load`, `spack info`, and other commands: - [x] Make `Repo.exists()` check files directly again with `os.path.exists()` (this is the big one) - [x] Refactor `Spec.satisfies()` so that a checking for virtual packages only happens if needed (avoids some calls to exists()) - [x] Avoid calling `Repo.exists(spec)` in `Repo.get()`. `Repo.get()` will ultimately try to load a `package.py` file anyway; we can let the failure to load it indicate that the package doesn't exist, and avoid another call to exists(). - [x] Fix up some comments in spec parsing - [x] Call `UnknownPackageError` more consistently in `repo.py`
This commit is contained in:
parent
18b436bbb0
commit
fdfd1fed41
3 changed files with 37 additions and 24 deletions
|
@ -918,8 +918,12 @@ def _read_config(self):
|
||||||
@autospec
|
@autospec
|
||||||
def get(self, spec):
|
def get(self, spec):
|
||||||
"""Returns the package associated with the supplied spec."""
|
"""Returns the package associated with the supplied spec."""
|
||||||
if not self.exists(spec.name):
|
# NOTE: we only check whether the package is None here, not whether it
|
||||||
raise UnknownPackageError(spec.name)
|
# actually exists, because we have to load it anyway, and that ends up
|
||||||
|
# checking for existence. We avoid constructing FastPackageChecker,
|
||||||
|
# which will stat all packages.
|
||||||
|
if spec.name is None:
|
||||||
|
raise UnknownPackageError(None, self)
|
||||||
|
|
||||||
if spec.namespace and spec.namespace != self.namespace:
|
if spec.namespace and spec.namespace != self.namespace:
|
||||||
raise UnknownPackageError(spec.name, self.namespace)
|
raise UnknownPackageError(spec.name, self.namespace)
|
||||||
|
@ -1064,8 +1068,17 @@ def all_package_classes(self):
|
||||||
|
|
||||||
def exists(self, pkg_name):
|
def exists(self, pkg_name):
|
||||||
"""Whether a package with the supplied name exists."""
|
"""Whether a package with the supplied name exists."""
|
||||||
|
if pkg_name is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# if the FastPackageChecker is already constructed, use it
|
||||||
|
if self._fast_package_checker:
|
||||||
return pkg_name in self._pkg_checker
|
return pkg_name in self._pkg_checker
|
||||||
|
|
||||||
|
# if not, check for the package.py file
|
||||||
|
path = self.filename_for_package_name(pkg_name)
|
||||||
|
return os.path.exists(path)
|
||||||
|
|
||||||
def last_mtime(self):
|
def last_mtime(self):
|
||||||
"""Time a package file in this repo was last updated."""
|
"""Time a package file in this repo was last updated."""
|
||||||
return self._pkg_checker.last_mtime()
|
return self._pkg_checker.last_mtime()
|
||||||
|
@ -1331,7 +1344,7 @@ def __init__(self, name, repo=None):
|
||||||
long_msg = None
|
long_msg = None
|
||||||
if name:
|
if name:
|
||||||
if repo:
|
if repo:
|
||||||
msg = "Package '{0}' not found in repository '{1}'"
|
msg = "Package '{0}' not found in repository '{1.root}'"
|
||||||
msg = msg.format(name, repo)
|
msg = msg.format(name, repo)
|
||||||
else:
|
else:
|
||||||
msg = "Package '{0}' not found.".format(name)
|
msg = "Package '{0}' not found.".format(name)
|
||||||
|
|
|
@ -3193,6 +3193,8 @@ def satisfies(self, other, deps=True, strict=False, strict_deps=False):
|
||||||
if other.concrete:
|
if other.concrete:
|
||||||
return self.concrete and self.dag_hash() == other.dag_hash()
|
return self.concrete and self.dag_hash() == other.dag_hash()
|
||||||
|
|
||||||
|
# If the names are different, we need to consider virtuals
|
||||||
|
if self.name != other.name and self.name and other.name:
|
||||||
# A concrete provider can satisfy a virtual dependency.
|
# A concrete provider can satisfy a virtual dependency.
|
||||||
if not self.virtual and other.virtual:
|
if not self.virtual and other.virtual:
|
||||||
try:
|
try:
|
||||||
|
@ -3204,16 +3206,12 @@ def satisfies(self, other, deps=True, strict=False, strict_deps=False):
|
||||||
|
|
||||||
if pkg.provides(other.name):
|
if pkg.provides(other.name):
|
||||||
for provided, when_specs in pkg.provided.items():
|
for provided, when_specs in pkg.provided.items():
|
||||||
if any(self.satisfies(when_spec, deps=False, strict=strict)
|
if any(self.satisfies(when, deps=False, strict=strict)
|
||||||
for when_spec in when_specs):
|
for when in when_specs):
|
||||||
if provided.satisfies(other):
|
if provided.satisfies(other):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Otherwise, first thing we care about is whether the name matches
|
|
||||||
if self.name != other.name and self.name and other.name:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# namespaces either match, or other doesn't require one.
|
# namespaces either match, or other doesn't require one.
|
||||||
if (other.namespace is not None and
|
if (other.namespace is not None and
|
||||||
self.namespace is not None and
|
self.namespace is not None and
|
||||||
|
@ -4639,7 +4637,8 @@ def spec(self, name):
|
||||||
break
|
break
|
||||||
|
|
||||||
elif self.accept(HASH):
|
elif self.accept(HASH):
|
||||||
# Get spec by hash and confirm it matches what we already have
|
# Get spec by hash and confirm it matches any constraints we
|
||||||
|
# already read in
|
||||||
hash_spec = self.spec_by_hash()
|
hash_spec = self.spec_by_hash()
|
||||||
if hash_spec.satisfies(spec):
|
if hash_spec.satisfies(spec):
|
||||||
spec._dup(hash_spec)
|
spec._dup(hash_spec)
|
||||||
|
|
|
@ -129,6 +129,7 @@ def test_pkg_add(mock_pkg_git_repo):
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree('pkg-e')
|
shutil.rmtree('pkg-e')
|
||||||
# Removing a package mid-run disrupts Spack's caching
|
# Removing a package mid-run disrupts Spack's caching
|
||||||
|
if spack.repo.path.repos[0]._fast_package_checker:
|
||||||
spack.repo.path.repos[0]._fast_package_checker.invalidate()
|
spack.repo.path.repos[0]._fast_package_checker.invalidate()
|
||||||
|
|
||||||
with pytest.raises(spack.main.SpackCommandError):
|
with pytest.raises(spack.main.SpackCommandError):
|
||||||
|
|
Loading…
Reference in a new issue