From 910190f55bb5467305dd75a4dac8c60f1f51e283 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 6 Nov 2023 17:00:37 -0800 Subject: [PATCH] database: optimize query() by skipping unnecessary virtual checks (#40898) Most queries will end up calling `spec.satisfies(query)` on everything in the DB, which will cause Spack to ask whether the query spec is virtual if its name doesn't match the target spec's. This can be expensive, because it can cause Spack to check if any new virtuals showed up in *all* the packages it knows about. That can currently trigger thousands of `stat()` calls. We can avoid the virtual check for most successful queries if we consider that if there *is* a match by name, the query spec *can't* be virtual. This PR adds an optimization to the query loop to save any comparisons that would trigger a virtual check for last. - [x] Add a `deferred` list to the `query()` loop. - [x] First run through the `query()` loop *only* checks for name matches. - [x] Query loop now returns early if there's a name match, skipping most `satisfies()` calls. - [x] Second run through the `deferred()` list only runs if query spec is virtual. - [x] Fix up handling of concrete specs. - [x] Add test for querying virtuals in DB. - [x] Avoid allocating deferred if not necessary. --------- Co-authored-by: Harmen Stoppels --- lib/spack/spack/database.py | 43 +++++++++++++++++++++++++------- lib/spack/spack/test/database.py | 8 ++++++ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index f252fbc05d..ecda8c36b0 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -1522,14 +1522,18 @@ def _query( # TODO: like installed and known that can be queried? Or are # TODO: these really special cases that only belong here? - # Just look up concrete specs with hashes; no fancy search. - if isinstance(query_spec, spack.spec.Spec) and query_spec.concrete: - # TODO: handling of hashes restriction is not particularly elegant. - hash_key = query_spec.dag_hash() - if hash_key in self._data and (not hashes or hash_key in hashes): - return [self._data[hash_key].spec] - else: - return [] + if query_spec is not any: + if not isinstance(query_spec, spack.spec.Spec): + query_spec = spack.spec.Spec(query_spec) + + # Just look up concrete specs with hashes; no fancy search. + if query_spec.concrete: + # TODO: handling of hashes restriction is not particularly elegant. + hash_key = query_spec.dag_hash() + if hash_key in self._data and (not hashes or hash_key in hashes): + return [self._data[hash_key].spec] + else: + return [] # Abstract specs require more work -- currently we test # against everything. @@ -1537,6 +1541,9 @@ def _query( start_date = start_date or datetime.datetime.min end_date = end_date or datetime.datetime.max + # save specs whose name doesn't match for last, to avoid a virtual check + deferred = [] + for key, rec in self._data.items(): if hashes is not None and rec.spec.dag_hash() not in hashes: continue @@ -1561,8 +1568,26 @@ def _query( if not (start_date < inst_date < end_date): continue - if query_spec is any or rec.spec.satisfies(query_spec): + if query_spec is any: results.append(rec.spec) + continue + + # check anon specs and exact name matches first + if not query_spec.name or rec.spec.name == query_spec.name: + if rec.spec.satisfies(query_spec): + results.append(rec.spec) + + # save potential virtual matches for later, but not if we already found a match + elif not results: + deferred.append(rec.spec) + + # Checking for virtuals is expensive, so we save it for last and only if needed. + # If we get here, we didn't find anything in the DB that matched by name. + # If we did fine something, the query spec can't be virtual b/c we matched an actual + # package installation, so skip the virtual check entirely. If we *didn't* find anything, + # check all the deferred specs *if* the query is virtual. + if not results and query_spec is not any and deferred and query_spec.virtual: + results = [spec for spec in deferred if spec.satisfies(query_spec)] return results diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 3033370ac6..ee3e5da81e 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -803,6 +803,14 @@ def test_query_spec_with_non_conditional_virtual_dependency(database): assert len(results) == 1 +def test_query_virtual_spec(database): + """Make sure we can query for virtuals in the DB""" + results = spack.store.STORE.db.query_local("mpi") + assert len(results) == 3 + names = [s.name for s in results] + assert all(name in names for name in ["mpich", "mpich2", "zmpi"]) + + def test_failed_spec_path_error(database): """Ensure spec not concrete check is covered.""" s = spack.spec.Spec("a")