spack gc: add options for environments and build dependencies (#41731)

This adds a few options to `spack gc`.

One to give you a little more control over dependencies:

* `-b` / `--keep-build-dependencies`: By default, `spack gc` considers build dependencies to be "no longer needed" once their dependents are installed. With this option, we'll keep build dependencies of needed installations as well.

And two more to make working with environments easier:

* `-E` / `--except-any-environment`: Garbage collect anything NOT needed by an environment. `spack gc -E` and `spack gc -bE` are now easy ways to get rid of everytihng not used by some environment.

* `-e` / `--except-environment` `ENV`: Instead of considering all environments, garbage collect everything not needed by a *specific* environment. Note that you can use this with `-E` to add directory environments to the list of considered envs, e.g.:

      spack gc -E -e /path/to/direnv1 -e /path/to/direnv2 #...

- [x] rework `unused_specs()` method on DB to add options for roots and deptypes
- [x] add `all_hashes()` method on DB
- [x] rework `spack gc` command to add 3 more options
- [x] tests
This commit is contained in:
Todd Gamblin 2023-12-26 14:52:10 -08:00 committed by GitHub
parent 441b68aca3
commit 24d12c632c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 301 additions and 74 deletions

View file

@ -21,10 +21,11 @@ def confirm_action(specs: List[spack.spec.Spec], participle: str, noun: str):
participle: action expressed as a participle, e.g. "uninstalled"
noun: action expressed as a noun, e.g. "uninstallation"
"""
tty.msg(f"The following {len(specs)} packages will be {participle}:\n")
spack.cmd.display_specs(specs, **display_args)
print("")
answer = tty.get_yes_or_no("Do you want to proceed?", default=False)
print()
answer = tty.get_yes_or_no(
f"{len(specs)} packages will be {participle}. Do you want to proceed?", default=False
)
if not answer:
tty.msg(f"Aborting {noun}")
sys.exit(0)

View file

@ -8,6 +8,7 @@
import spack.cmd.common.arguments
import spack.cmd.common.confirmation
import spack.cmd.uninstall
import spack.deptypes as dt
import spack.environment as ev
import spack.store
@ -17,31 +18,91 @@
def setup_parser(subparser):
subparser.add_argument(
"-E",
"--except-any-environment",
action="store_true",
help="remove everything unless needed by an environment",
)
subparser.add_argument(
"-e",
"--except-environment",
metavar="ENV",
action="append",
default=[],
help="remove everything unless needed by specified environment\n"
"you can list multiple environments, or specify directory\n"
"environments by path.",
)
subparser.add_argument(
"-b",
"--keep-build-dependencies",
action="store_true",
help="do not remove installed build-only dependencies of roots\n"
"(default is to keep only link & run dependencies)",
)
spack.cmd.common.arguments.add_common_arguments(subparser, ["yes_to_all"])
def roots_from_environments(args, active_env):
# if we're using -E or -e, make a list of environments whose roots we should consider.
all_environments = []
# -E will garbage collect anything not needed by any env, including the current one
if args.except_any_environment:
all_environments += list(ev.all_environments())
if active_env:
all_environments.append(active_env)
# -e says "also preserve things needed by this particular env"
for env_name_or_dir in args.except_environment:
print("HMM", env_name_or_dir)
if ev.exists(env_name_or_dir):
env = ev.read(env_name_or_dir)
elif ev.is_env_dir(env_name_or_dir):
env = ev.Environment(env_name_or_dir)
else:
tty.die(f"No such environment: '{env_name_or_dir}'")
all_environments.append(env)
# add root hashes from all considered environments to list of roots
root_hashes = set()
for env in all_environments:
root_hashes |= set(env.concretized_order)
return root_hashes
def gc(parser, args):
specs = spack.store.STORE.db.unused_specs
deptype = dt.LINK | dt.RUN
if args.keep_build_dependencies:
deptype |= dt.BUILD
# Restrict garbage collection to the active environment
# speculating over roots that are yet to be installed
env = ev.active_environment()
if env:
msg = 'Restricting the garbage collection to the "{0}" environment'
tty.msg(msg.format(env.name))
env.concretize()
roots = [s for s in env.roots()]
all_hashes = set([s.dag_hash() for r in roots for s in r.traverse()])
lr_hashes = set([s.dag_hash() for r in roots for s in r.traverse(deptype=("link", "run"))])
maybe_to_be_removed = all_hashes - lr_hashes
specs = [s for s in specs if s.dag_hash() in maybe_to_be_removed]
active_env = ev.active_environment()
if not specs:
msg = "There are no unused specs. Spack's store is clean."
tty.msg(msg)
return
# wrap the whole command with a read transaction to avoid multiple
with spack.store.STORE.db.read_transaction():
if args.except_environment or args.except_any_environment:
# if either of these is specified, we ignore the active environment and garbage
# collect anything NOT in specified environments.
root_hashes = roots_from_environments(args, active_env)
if not args.yes_to_all:
spack.cmd.common.confirmation.confirm_action(specs, "uninstalled", "uninstallation")
elif active_env:
# only gc what's in current environment
tty.msg(f"Restricting garbage collection to environment '{active_env.name}'")
root_hashes = set(spack.store.STORE.db.all_hashes()) # keep everything
root_hashes -= set(active_env.all_hashes()) # except this env
root_hashes |= set(active_env.concretized_order) # but keep its roots
else:
# consider all explicit specs roots (the default for db.unused_specs())
root_hashes = None
spack.cmd.uninstall.do_uninstall(specs, force=False)
specs = spack.store.STORE.db.unused_specs(root_hashes=root_hashes, deptype=deptype)
if not specs:
tty.msg("There are no unused specs. Spack's store is clean.")
return
if not args.yes_to_all:
spack.cmd.common.confirmation.confirm_action(specs, "uninstalled", "uninstall")
spack.cmd.uninstall.do_uninstall(specs, force=False)

View file

@ -277,7 +277,7 @@ def uninstall_specs(args, specs):
return
if not args.yes_to_all:
confirmation.confirm_action(uninstall_list, "uninstalled", "uninstallation")
confirmation.confirm_action(uninstall_list, "uninstalled", "uninstall")
# Uninstall everything on the list
do_uninstall(uninstall_list, args.force)

View file

@ -25,9 +25,20 @@
import socket
import sys
import time
from typing import Any, Callable, Dict, Generator, List, NamedTuple, Set, Type, Union
import spack.deptypes as dt
from typing import (
Any,
Callable,
Container,
Dict,
Generator,
List,
NamedTuple,
Optional,
Set,
Tuple,
Type,
Union,
)
try:
import uuid
@ -37,13 +48,13 @@
_use_uuid = False
pass
from typing import Optional, Tuple
import llnl.util.filesystem as fs
import llnl.util.tty as tty
import spack.deptypes as dt
import spack.hash_types as ht
import spack.spec
import spack.traverse as tr
import spack.util.lock as lk
import spack.util.spack_json as sjson
import spack.version as vn
@ -297,7 +308,7 @@ def __reduce__(self):
end_date (datetime.datetime or None): filters the query discarding
specs that have been installed after ``end_date``.
hashes (typing.Container): list or set of hashes that we can use to
hashes (Container): list or set of hashes that we can use to
restrict the search
in_buildcache (bool or None): Specs that are marked in
@ -1648,31 +1659,35 @@ def is_occupied_install_prefix(self, path):
with self.read_transaction():
return path in self._installed_prefixes
@property
def unused_specs(self):
"""Return all the specs that are currently installed but not needed
at runtime to satisfy user's requests.
Specs in the return list are those which are not either:
1. Installed on an explicit user request
2. Installed as a "run" or "link" dependency (even transitive) of
a spec at point 1.
"""
needed, visited = set(), set()
def all_hashes(self):
"""Return dag hash of every spec in the database."""
with self.read_transaction():
for key, rec in self._data.items():
if not rec.explicit:
continue
return list(self._data.keys())
# recycle `visited` across calls to avoid redundantly traversing
for spec in rec.spec.traverse(visited=visited, deptype=("link", "run")):
needed.add(spec.dag_hash())
def unused_specs(
self,
root_hashes: Optional[Container[str]] = None,
deptype: Union[dt.DepFlag, dt.DepTypes] = dt.LINK | dt.RUN,
) -> "List[spack.spec.Spec]":
"""Return all specs that are currently installed but not needed by root specs.
unused = [
rec.spec for key, rec in self._data.items() if key not in needed and rec.installed
]
By default, roots are all explicit specs in the database. If a set of root
hashes are passed in, they are instead used as the roots.
return unused
Arguments:
root_hashes: optional list of roots to consider when evaluating needed installations.
deptype: if a spec is reachable from a root via these dependency types, it is
considered needed. By default only link and run dependency types are considered.
"""
def root(key, record):
"""Whether a DB record is a root for garbage collection."""
return key in root_hashes if root_hashes is not None else record.explicit
with self.read_transaction():
roots = [rec.spec for key, rec in self._data.items() if root(key, rec)]
needed = set(id(spec) for spec in tr.traverse_nodes(roots, deptype=deptype))
return [rec.spec for rec in self._data.values() if id(rec.spec) not in needed]
def update_explicit(self, spec, explicit):
"""

View file

@ -793,7 +793,7 @@ def __init__(self, manifest_dir: Union[str, pathlib.Path]) -> None:
#: User specs from the last concretization
self.concretized_user_specs: List[Spec] = []
#: Roots associated with the last concretization, in order
self.concretized_order: List[Spec] = []
self.concretized_order: List[str] = []
#: Concretized specs by hash
self.specs_by_hash: Dict[str, Spec] = {}
#: Repository for this environment (memoized)

View file

@ -11,37 +11,140 @@
import spack.spec
gc = spack.main.SpackCommand("gc")
add = spack.main.SpackCommand("add")
install = spack.main.SpackCommand("install")
find = spack.main.SpackCommand("find")
pytestmark = pytest.mark.not_on_windows("does not run on windows")
@pytest.mark.db
def test_no_packages_to_remove(config, mutable_database, capsys):
with capsys.disabled():
output = gc("-y")
def test_gc_without_build_dependency(config, mutable_database):
output = gc("-yb")
assert "There are no unused specs." in output
output = gc("-y")
assert "There are no unused specs." in output
@pytest.mark.db
def test_packages_are_removed(config, mutable_database, capsys):
def test_gc_with_build_dependency(config, mutable_database):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
with capsys.disabled():
output = gc("-y")
output = gc("-yb")
assert "There are no unused specs." in output
output = gc("-y")
assert "Successfully uninstalled cmake" in output
@pytest.mark.db
def test_gc_with_environment(config, mutable_database, mutable_mock_env_path, capsys):
def test_gc_with_environment(config, mutable_database, mutable_mock_env_path):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
e = ev.create("test_gc")
e.add("cmake")
with e:
with capsys.disabled():
output = gc("-y")
assert "Restricting the garbage collection" in output
add("cmake")
install()
assert "cmake" in find()
output = gc("-y")
assert "Restricting garbage collection" in output
assert "There are no unused specs" in output
@pytest.mark.db
def test_gc_with_build_dependency_in_environment(config, mutable_database, mutable_mock_env_path):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
e = ev.create("test_gc")
with e:
add("simple-inheritance")
install()
assert "simple-inheritance" in find()
output = gc("-yb")
assert "Restricting garbage collection" in output
assert "There are no unused specs" in output
with e:
assert "simple-inheritance" in find()
output = gc("-y")
assert "Restricting garbage collection" in output
assert "Successfully uninstalled cmake" in output
@pytest.mark.db
def test_gc_except_any_environments(config, mutable_database, mutable_mock_env_path):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
assert "zmpi" in find()
e = ev.create("test_gc")
with e:
add("simple-inheritance")
install()
assert "simple-inheritance" in find()
output = gc("-yE")
assert "Restricting garbage collection" not in output
assert "Successfully uninstalled zmpi" in output
assert "zmpi" not in find()
with e:
output = gc("-yE")
assert "Restricting garbage collection" not in output
assert "There are no unused specs" not in find()
@pytest.mark.db
def test_gc_except_specific_environments(config, mutable_database, mutable_mock_env_path):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
assert "zmpi" in find()
e = ev.create("test_gc")
with e:
add("simple-inheritance")
install()
assert "simple-inheritance" in find()
output = gc("-ye", "test_gc")
assert "Restricting garbage collection" not in output
assert "Successfully uninstalled zmpi" in output
assert "zmpi" not in find()
@pytest.mark.db
def test_gc_except_nonexisting_dir_env(config, mutable_database, mutable_mock_env_path, tmpdir):
output = gc("-ye", tmpdir.strpath, fail_on_error=False)
assert "No such environment" in output
gc.returncode == 1
@pytest.mark.db
def test_gc_except_specific_dir_env(config, mutable_database, mutable_mock_env_path, tmpdir):
s = spack.spec.Spec("simple-inheritance")
s.concretize()
s.package.do_install(fake=True, explicit=True)
assert "zmpi" in find()
e = ev.create_in_dir(tmpdir.strpath)
with e:
add("simple-inheritance")
install()
assert "simple-inheritance" in find()
output = gc("-ye", tmpdir.strpath)
assert "Restricting garbage collection" not in output
assert "Successfully uninstalled zmpi" in output
assert "zmpi" not in find()

View file

@ -801,13 +801,13 @@ def mock_low_high_config(tmpdir):
def _populate(mock_db):
r"""Populate a mock database with packages.
Here is what the mock DB looks like:
Here is what the mock DB looks like (explicit roots at top):
o mpileaks o mpileaks' o mpileaks''
|\ |\ |\
| o callpath | o callpath' | o callpath''
|/| |/| |/|
o | mpich o | mpich2 o | zmpi
o mpileaks o mpileaks' o mpileaks'' o externaltest o trivial-smoke-test
|\ |\ |\ |
| o callpath | o callpath' | o callpath'' o externaltool
|/| |/| |/| |
o | mpich o | mpich2 o | zmpi o externalvirtual
| | o | fake
| | |
| |______________/

View file

@ -25,6 +25,7 @@
from llnl.util.tty.colify import colify
import spack.database
import spack.deptypes as dt
import spack.package_base
import spack.repo
import spack.spec
@ -778,9 +779,39 @@ def test_query_unused_specs(mutable_database):
s.concretize()
s.package.do_install(fake=True, explicit=True)
unused = spack.store.STORE.db.unused_specs
assert len(unused) == 1
assert unused[0].name == "cmake"
si = s.dag_hash()
ml_mpich = spack.store.STORE.db.query_one("mpileaks ^mpich").dag_hash()
ml_mpich2 = spack.store.STORE.db.query_one("mpileaks ^mpich2").dag_hash()
ml_zmpi = spack.store.STORE.db.query_one("mpileaks ^zmpi").dag_hash()
externaltest = spack.store.STORE.db.query_one("externaltest").dag_hash()
trivial_smoke_test = spack.store.STORE.db.query_one("trivial-smoke-test").dag_hash()
def check_unused(roots, deptype, expected):
unused = spack.store.STORE.db.unused_specs(root_hashes=roots, deptype=deptype)
assert set(u.name for u in unused) == set(expected)
default_dt = dt.LINK | dt.RUN
check_unused(None, default_dt, ["cmake"])
check_unused(
[si, ml_mpich, ml_mpich2, ml_zmpi, externaltest],
default_dt,
["trivial-smoke-test", "cmake"],
)
check_unused(
[si, ml_mpich, ml_mpich2, ml_zmpi, externaltest],
dt.LINK | dt.RUN | dt.BUILD,
["trivial-smoke-test"],
)
check_unused(
[si, ml_mpich, ml_mpich2, externaltest, trivial_smoke_test],
dt.LINK | dt.RUN | dt.BUILD,
["mpileaks", "callpath", "zmpi", "fake"],
)
check_unused(
[si, ml_mpich, ml_mpich2, ml_zmpi],
default_dt,
["trivial-smoke-test", "cmake", "externaltest", "externaltool", "externalvirtual"],
)
@pytest.mark.regression("10019")
@ -1008,6 +1039,16 @@ def test_check_parents(spec_str, parent_name, expected_nparents, database):
assert len(edges) == expected_nparents
def test_db_all_hashes(database):
# ensure we get the right number of hashes without a read transaction
hashes = database.all_hashes()
assert len(hashes) == 17
# and make sure the hashes match
with database.read_transaction():
assert set(s.dag_hash() for s in database.query()) == set(hashes)
def test_consistency_of_dependents_upon_remove(mutable_database):
# Check the initial state
s = mutable_database.query_one("dyninst")

View file

@ -1177,7 +1177,7 @@ _spack_find() {
}
_spack_gc() {
SPACK_COMPREPLY="-h --help -y --yes-to-all"
SPACK_COMPREPLY="-h --help -E --except-any-environment -e --except-environment -b --keep-build-dependencies -y --yes-to-all"
}
_spack_gpg() {

View file

@ -1747,9 +1747,15 @@ complete -c spack -n '__fish_spack_using_command find' -l end-date -r -f -a end_
complete -c spack -n '__fish_spack_using_command find' -l end-date -r -d 'latest date of installation [YYYY-MM-DD]'
# spack gc
set -g __fish_spack_optspecs_spack_gc h/help y/yes-to-all
set -g __fish_spack_optspecs_spack_gc h/help E/except-any-environment e/except-environment= b/keep-build-dependencies y/yes-to-all
complete -c spack -n '__fish_spack_using_command gc' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command gc' -s h -l help -d 'show this help message and exit'
complete -c spack -n '__fish_spack_using_command gc' -s E -l except-any-environment -f -a except_any_environment
complete -c spack -n '__fish_spack_using_command gc' -s E -l except-any-environment -d 'remove everything unless needed by an environment'
complete -c spack -n '__fish_spack_using_command gc' -s e -l except-environment -r -f -a except_environment
complete -c spack -n '__fish_spack_using_command gc' -s e -l except-environment -r -d 'remove everything unless needed by specified environment'
complete -c spack -n '__fish_spack_using_command gc' -s b -l keep-build-dependencies -f -a keep_build_dependencies
complete -c spack -n '__fish_spack_using_command gc' -s b -l keep-build-dependencies -d 'do not remove installed build-only dependencies of roots'
complete -c spack -n '__fish_spack_using_command gc' -s y -l yes-to-all -f -a yes_to_all
complete -c spack -n '__fish_spack_using_command gc' -s y -l yes-to-all -d 'assume "yes" is the answer to every confirmation request'