Fix bugs in spack monitor (#27511)

Updates to installer.py did not account for spack monitor, so as currently implemented
there are three cases of failure that spack monitor will not account for. To fix this we add additional
hooks, including an on cancel and also do a custom action on concretization fail.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>

Co-authored-by: vsoch <vsoch@users.noreply.github.com>
This commit is contained in:
Vanessasaurus 2021-12-19 22:54:41 -07:00 committed by GitHub
parent ece1e3905e
commit da9e152ed1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 77 additions and 3 deletions

View file

@ -59,7 +59,7 @@ are available:
install_files : install file listing read from install_manifest.json
environment_variables : environment variables parsed from spack-build-env.txt
config_args : config args loaded from spack-configure-args.txt
abigail : Application Binary Interface (ABI) features for objects
libabigail : Application Binary Interface (ABI) features for objects
In the above, the first three are fairly simple - parsing metadata files from

View file

@ -671,6 +671,13 @@ If you need to write a hook that is relevant to a failure within a build
process, you would want to instead use ``on_phase_failure``.
"""""""""""""""""""""""""""
``on_install_cancel(spec)``
"""""""""""""""""""""""""""
The same, but triggered if a spec install is cancelled for any reason.
"""""""""""""""""""""""""""""""""""""""""""""""
``on_phase_success(pkg, phase_name, log_file)``
"""""""""""""""""""""""""""""""""""""""""""""""

View file

@ -403,6 +403,10 @@ def get_tests(specs):
except SpackError as e:
tty.debug(e)
reporter.concretization_report(e.message)
# Tell spack monitor about it
if args.use_monitor and abstract_specs:
monitor.failed_concretization(abstract_specs)
raise
# 2. Concrete specs from yaml files

View file

@ -90,6 +90,7 @@ def __call__(self, *args, **kwargs):
on_install_start = _HookRunner('on_install_start')
on_install_success = _HookRunner('on_install_success')
on_install_failure = _HookRunner('on_install_failure')
on_install_cancel = _HookRunner('on_install_cancel')
# Analyzer hooks
on_analyzer_save = _HookRunner('on_analyzer_save')

View file

@ -41,6 +41,17 @@ def on_install_failure(spec):
tty.verbose(result.get('message'))
def on_install_cancel(spec):
"""Triggered on cancel of an install
"""
if not spack.monitor.cli:
return
tty.debug("Running on_install_cancel for %s" % spec)
result = spack.monitor.cli.cancel_task(spec)
tty.verbose(result.get('message'))
def on_phase_success(pkg, phase_name, log_file):
"""Triggered on a phase success
"""

View file

@ -1198,6 +1198,7 @@ def _install_task(self, task):
except spack.build_environment.StopPhase as e:
# A StopPhase exception means that do_install was asked to
# stop early from clients, and is not an error at this point
spack.hooks.on_install_failure(task.request.pkg.spec)
pid = '{0}: '.format(self.pid) if tty.show_pid() else ''
tty.debug('{0}{1}'.format(pid, str(e)))
tty.debug('Package stage directory: {0}' .format(pkg.stage.source_path))
@ -1654,7 +1655,7 @@ def install(self):
err = 'Failed to install {0} due to {1}: {2}'
tty.error(err.format(pkg.name, exc.__class__.__name__,
str(exc)))
spack.hooks.on_install_failure(task.request.pkg.spec)
spack.hooks.on_install_cancel(task.request.pkg.spec)
raise
except (Exception, SystemExit) as exc:
@ -1918,6 +1919,9 @@ def _real_install(self):
except BaseException:
combine_phase_logs(pkg.phase_log_files, pkg.log_path)
spack.hooks.on_phase_error(pkg, phase_name, log_file)
# phase error indicates install error
spack.hooks.on_install_failure(pkg.spec)
raise
# We assume loggers share echo True/False

View file

@ -118,13 +118,16 @@ class SpackMonitorClient:
def __init__(self, host=None, prefix="ms1", allow_fail=False, tags=None,
save_local=False):
# We can control setting an arbitrary version if needed
sv = spack.main.get_version()
self.spack_version = os.environ.get("SPACKMON_SPACK_VERSION") or sv
self.host = host or "http://127.0.0.1"
self.baseurl = "%s/%s" % (self.host, prefix.strip("/"))
self.token = os.environ.get("SPACKMON_TOKEN")
self.username = os.environ.get("SPACKMON_USER")
self.headers = {}
self.allow_fail = allow_fail
self.spack_version = spack.main.get_version()
self.capture_build_environment()
self.tags = tags
self.save_local = save_local
@ -200,6 +203,14 @@ def capture_build_environment(self):
"""
from spack.util.environment import get_host_environment_metadata
self.build_environment = get_host_environment_metadata()
keys = list(self.build_environment.keys())
# Allow to customize any of these values via the environment
for key in keys:
envar_name = "SPACKMON_%s" % key.upper()
envar = os.environ.get(envar_name)
if envar:
self.build_environment[key] = envar
def require_auth(self):
"""
@ -413,6 +424,37 @@ def new_configuration(self, specs):
return configs
def failed_concretization(self, specs):
"""
Given a list of abstract specs, tell spack monitor concretization failed.
"""
configs = {}
# There should only be one spec generally (what cases would have >1?)
for spec in specs:
# update the spec to have build hash indicating that cannot be built
meta = spec.to_dict()['spec']
nodes = []
for node in meta.get("nodes", []):
for hashtype in ["build_hash", "full_hash"]:
node[hashtype] = "FAILED_CONCRETIZATION"
nodes.append(node)
meta['nodes'] = nodes
# We can't concretize / hash
as_dict = {"spec": meta,
"spack_version": self.spack_version}
if self.save_local:
filename = "spec-%s-%s-config.json" % (spec.name, spec.version)
self.save(as_dict, filename)
else:
response = self.do_request("specs/new/", data=sjson.dump(as_dict))
configs[spec.package.name] = response.get('data', {})
return configs
def new_build(self, spec):
"""
Create a new build.
@ -503,6 +545,11 @@ def fail_task(self, spec):
"""
return self.update_build(spec, status="FAILED")
def cancel_task(self, spec):
"""Given a spec, mark it as cancelled.
"""
return self.update_build(spec, status="CANCELLED")
def send_analyze_metadata(self, pkg, metadata):
"""
Send spack analyzer metadata to the spack monitor server.