Spack install: handle failed restore of backup (#25647)
Spack has logic to preserve an installation prefix when it is being overwritten: if the new install fails, the old files are restored. This PR adds error handling for when this backup restoration fails (i.e. the new install fails, and then some unexpected error prevents restoration from the backup).
This commit is contained in:
parent
df590bb6ee
commit
18760de972
3 changed files with 200 additions and 35 deletions
|
@ -656,6 +656,12 @@ def working_dir(dirname, **kwargs):
|
|||
os.chdir(orig_dir)
|
||||
|
||||
|
||||
class CouldNotRestoreDirectoryBackup(RuntimeError):
|
||||
def __init__(self, inner_exception, outer_exception):
|
||||
self.inner_exception = inner_exception
|
||||
self.outer_exception = outer_exception
|
||||
|
||||
|
||||
@contextmanager
|
||||
def replace_directory_transaction(directory_name, tmp_root=None):
|
||||
"""Moves a directory to a temporary space. If the operations executed
|
||||
|
@ -683,29 +689,33 @@ def replace_directory_transaction(directory_name, tmp_root=None):
|
|||
assert os.path.isabs(tmp_root)
|
||||
|
||||
tmp_dir = tempfile.mkdtemp(dir=tmp_root)
|
||||
tty.debug('TEMPORARY DIRECTORY CREATED [{0}]'.format(tmp_dir))
|
||||
tty.debug('Temporary directory created [{0}]'.format(tmp_dir))
|
||||
|
||||
shutil.move(src=directory_name, dst=tmp_dir)
|
||||
tty.debug('DIRECTORY MOVED [src={0}, dest={1}]'.format(
|
||||
directory_name, tmp_dir
|
||||
))
|
||||
tty.debug('Directory moved [src={0}, dest={1}]'.format(directory_name, tmp_dir))
|
||||
|
||||
try:
|
||||
yield tmp_dir
|
||||
except (Exception, KeyboardInterrupt, SystemExit):
|
||||
# Delete what was there, before copying back the original content
|
||||
if os.path.exists(directory_name):
|
||||
shutil.rmtree(directory_name)
|
||||
shutil.move(
|
||||
src=os.path.join(tmp_dir, directory_basename),
|
||||
dst=os.path.dirname(directory_name)
|
||||
)
|
||||
tty.debug('DIRECTORY RECOVERED [{0}]'.format(directory_name))
|
||||
except (Exception, KeyboardInterrupt, SystemExit) as inner_exception:
|
||||
# Try to recover the original directory, if this fails, raise a
|
||||
# composite exception.
|
||||
try:
|
||||
# Delete what was there, before copying back the original content
|
||||
if os.path.exists(directory_name):
|
||||
shutil.rmtree(directory_name)
|
||||
shutil.move(
|
||||
src=os.path.join(tmp_dir, directory_basename),
|
||||
dst=os.path.dirname(directory_name)
|
||||
)
|
||||
except Exception as outer_exception:
|
||||
raise CouldNotRestoreDirectoryBackup(inner_exception, outer_exception)
|
||||
|
||||
tty.debug('Directory recovered [{0}]'.format(directory_name))
|
||||
raise
|
||||
else:
|
||||
# Otherwise delete the temporary directory
|
||||
shutil.rmtree(tmp_dir)
|
||||
tty.debug('TEMPORARY DIRECTORY DELETED [{0}]'.format(tmp_dir))
|
||||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||||
tty.debug('Temporary directory deleted [{0}]'.format(tmp_dir))
|
||||
|
||||
|
||||
def hash_directory(directory, ignore=[]):
|
||||
|
|
|
@ -85,6 +85,15 @@
|
|||
STATUS_REMOVED = 'removed'
|
||||
|
||||
|
||||
class InstallAction(object):
|
||||
#: Don't perform an install
|
||||
NONE = 0
|
||||
#: Do a standard install
|
||||
INSTALL = 1
|
||||
#: Do an overwrite install
|
||||
OVERWRITE = 2
|
||||
|
||||
|
||||
def _check_last_phase(pkg):
|
||||
"""
|
||||
Ensures the specified package has a valid last phase before proceeding
|
||||
|
@ -1418,6 +1427,42 @@ def _init_queue(self):
|
|||
for dependent_id in dependents.difference(task.dependents):
|
||||
task.add_dependent(dependent_id)
|
||||
|
||||
def _install_action(self, task):
|
||||
"""
|
||||
Determine whether the installation should be overwritten (if it already
|
||||
exists) or skipped (if has been handled by another process).
|
||||
|
||||
If the package has not been installed yet, this will indicate that the
|
||||
installation should proceed as normal (i.e. no need to transactionally
|
||||
preserve the old prefix).
|
||||
"""
|
||||
# If we don't have to overwrite, do a normal install
|
||||
if task.pkg.spec.dag_hash() not in task.request.overwrite:
|
||||
return InstallAction.INSTALL
|
||||
|
||||
# If it's not installed, do a normal install as well
|
||||
rec, installed = self._check_db(task.pkg.spec)
|
||||
if not installed:
|
||||
return InstallAction.INSTALL
|
||||
|
||||
# Ensure install_tree projections have not changed.
|
||||
assert task.pkg.prefix == rec.path
|
||||
|
||||
# If another process has overwritten this, we shouldn't install at all
|
||||
if rec.installation_time >= task.request.overwrite_time:
|
||||
return InstallAction.NONE
|
||||
|
||||
# If the install prefix is missing, warn about it, and proceed with
|
||||
# normal install.
|
||||
if not os.path.exists(task.pkg.prefix):
|
||||
tty.debug("Missing installation to overwrite")
|
||||
return InstallAction.INSTALL
|
||||
|
||||
# Otherwise, do an actual overwrite install. We backup the original
|
||||
# install directory, put the old prefix
|
||||
# back on failure
|
||||
return InstallAction.OVERWRITE
|
||||
|
||||
def install(self):
|
||||
"""
|
||||
Install the requested package(s) and or associated dependencies.
|
||||
|
@ -1561,26 +1606,12 @@ def install(self):
|
|||
# Proceed with the installation since we have an exclusive write
|
||||
# lock on the package.
|
||||
try:
|
||||
if pkg.spec.dag_hash() in task.request.overwrite:
|
||||
rec, _ = self._check_db(pkg.spec)
|
||||
if rec and rec.installed:
|
||||
if rec.installation_time < task.request.overwrite_time:
|
||||
# If it's actually overwriting, do a fs transaction
|
||||
if os.path.exists(rec.path):
|
||||
with fs.replace_directory_transaction(
|
||||
rec.path):
|
||||
# fs transaction will put the old prefix
|
||||
# back on failure, so make sure to keep it.
|
||||
keep_prefix = True
|
||||
self._install_task(task)
|
||||
else:
|
||||
tty.debug("Missing installation to overwrite")
|
||||
self._install_task(task)
|
||||
else:
|
||||
# overwriting nothing
|
||||
self._install_task(task)
|
||||
else:
|
||||
action = self._install_action(task)
|
||||
|
||||
if action == InstallAction.INSTALL:
|
||||
self._install_task(task)
|
||||
elif action == InstallAction.OVERWRITE:
|
||||
OverwriteInstall(self, spack.store.db, task).install()
|
||||
|
||||
self._update_installed(task)
|
||||
|
||||
|
@ -1631,7 +1662,7 @@ def install(self):
|
|||
finally:
|
||||
# Remove the install prefix if anything went wrong during
|
||||
# install.
|
||||
if not keep_prefix:
|
||||
if not keep_prefix and not action == InstallAction.OVERWRITE:
|
||||
pkg.remove_prefix()
|
||||
|
||||
# The subprocess *may* have removed the build stage. Mark it
|
||||
|
@ -1883,6 +1914,35 @@ def build_process(pkg, install_args):
|
|||
return installer.run()
|
||||
|
||||
|
||||
class OverwriteInstall(object):
|
||||
def __init__(self, installer, database, task, tmp_root=None):
|
||||
self.installer = installer
|
||||
self.database = database
|
||||
self.task = task
|
||||
self.tmp_root = tmp_root
|
||||
|
||||
def install(self):
|
||||
"""
|
||||
Try to run the install task overwriting the package prefix.
|
||||
If this fails, try to recover the original install prefix. If that fails
|
||||
too, mark the spec as uninstalled. This function always the original
|
||||
install error if installation fails.
|
||||
"""
|
||||
try:
|
||||
with fs.replace_directory_transaction(self.task.pkg.prefix, self.tmp_root):
|
||||
self.installer._install_task(self.task)
|
||||
except fs.CouldNotRestoreDirectoryBackup as e:
|
||||
self.database.remove(self.task.pkg.spec)
|
||||
tty.error('Recovery of install dir of {0} failed due to '
|
||||
'{1}: {2}. The spec is now uninstalled.'.format(
|
||||
self.task.pkg.name,
|
||||
e.outer_exception.__class__.__name__,
|
||||
str(e.outer_exception)))
|
||||
|
||||
# Unwrap the actual installation exception.
|
||||
raise e.inner_exception
|
||||
|
||||
|
||||
class BuildTask(object):
|
||||
"""Class for representing the build task for a package."""
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import py
|
||||
import pytest
|
||||
|
@ -1177,3 +1178,97 @@ def test_install_skip_patch(install_mockery, mock_fetch):
|
|||
|
||||
spec, install_args = const_arg[0]
|
||||
assert inst.package_id(spec.package) in installer.installed
|
||||
|
||||
|
||||
def test_overwrite_install_backup_success(temporary_store, config, mock_packages,
|
||||
tmpdir):
|
||||
"""
|
||||
When doing an overwrite install that fails, Spack should restore the backup
|
||||
of the original prefix, and leave the original spec marked installed.
|
||||
"""
|
||||
# Where to store the backups
|
||||
backup = str(tmpdir.mkdir("backup"))
|
||||
|
||||
# Get a build task. TODO: refactor this to avoid calling internal methods
|
||||
const_arg = installer_args(["b"])
|
||||
installer = create_installer(const_arg)
|
||||
installer._init_queue()
|
||||
task = installer._pop_task()
|
||||
|
||||
# Make sure the install prefix exists with some trivial file
|
||||
installed_file = os.path.join(task.pkg.prefix, 'some_file')
|
||||
fs.touchp(installed_file)
|
||||
|
||||
class InstallerThatWipesThePrefixDir:
|
||||
def _install_task(self, task):
|
||||
shutil.rmtree(task.pkg.prefix, ignore_errors=True)
|
||||
fs.mkdirp(task.pkg.prefix)
|
||||
raise Exception("Some fatal install error")
|
||||
|
||||
class FakeDatabase:
|
||||
called = False
|
||||
|
||||
def remove(self, spec):
|
||||
self.called = True
|
||||
|
||||
fake_installer = InstallerThatWipesThePrefixDir()
|
||||
fake_db = FakeDatabase()
|
||||
overwrite_install = inst.OverwriteInstall(
|
||||
fake_installer, fake_db, task, tmp_root=backup)
|
||||
|
||||
# Installation should throw the installation exception, not the backup
|
||||
# failure.
|
||||
with pytest.raises(Exception, match='Some fatal install error'):
|
||||
overwrite_install.install()
|
||||
|
||||
# Make sure the package is not marked uninstalled and the original dir
|
||||
# is back.
|
||||
assert not fake_db.called
|
||||
assert os.path.exists(installed_file)
|
||||
|
||||
|
||||
def test_overwrite_install_backup_failure(temporary_store, config, mock_packages,
|
||||
tmpdir):
|
||||
"""
|
||||
When doing an overwrite install that fails, Spack should try to recover the
|
||||
original prefix. If that fails, the spec is lost, and it should be removed
|
||||
from the database.
|
||||
"""
|
||||
# Where to store the backups
|
||||
backup = str(tmpdir.mkdir("backup"))
|
||||
|
||||
class InstallerThatAccidentallyDeletesTheBackupDir:
|
||||
def _install_task(self, task):
|
||||
# Remove the backup directory so that restoring goes terribly wrong
|
||||
shutil.rmtree(backup)
|
||||
raise Exception("Some fatal install error")
|
||||
|
||||
class FakeDatabase:
|
||||
called = False
|
||||
|
||||
def remove(self, spec):
|
||||
self.called = True
|
||||
|
||||
# Get a build task. TODO: refactor this to avoid calling internal methods
|
||||
const_arg = installer_args(["b"])
|
||||
installer = create_installer(const_arg)
|
||||
installer._init_queue()
|
||||
task = installer._pop_task()
|
||||
|
||||
# Make sure the install prefix exists
|
||||
installed_file = os.path.join(task.pkg.prefix, 'some_file')
|
||||
fs.touchp(installed_file)
|
||||
|
||||
fake_installer = InstallerThatAccidentallyDeletesTheBackupDir()
|
||||
fake_db = FakeDatabase()
|
||||
overwrite_install = inst.OverwriteInstall(
|
||||
fake_installer, fake_db, task, tmp_root=backup)
|
||||
|
||||
# Installation should throw the installation exception, not the backup
|
||||
# failure.
|
||||
with pytest.raises(Exception, match='Some fatal install error'):
|
||||
overwrite_install.install()
|
||||
|
||||
# Make sure that `remove` was called on the database after an unsuccessful
|
||||
# attempt to restore the backup.
|
||||
assert fake_db.called
|
||||
|
|
Loading…
Reference in a new issue