Environments: Avoid inconsistent state on failed write (#18538)

Fixes #18441 

When writing an environment, there are cases where the lock file for
the environment may be removed. In this case there was a period 
between removing the lock file and writing the new manifest file
where an exception could leave the manifest in its old state (in
which case the lock and manifest would be out of sync).

This adds a context manager which is used to restore the prior lock
file state in cases where the manifest file cannot be written.
This commit is contained in:
Massimiliano Culpo 2020-09-11 19:57:29 +02:00 committed by GitHub
parent e7040467f2
commit 8ad2cc2acf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 206 additions and 19 deletions

View file

@ -974,6 +974,53 @@ def remove_linked_tree(path):
shutil.rmtree(path, True)
@contextmanager
def safe_remove(*files_or_dirs):
"""Context manager to remove the files passed as input, but restore
them in case any exception is raised in the context block.
Args:
*files_or_dirs: glob expressions for files or directories
to be removed
Returns:
Dictionary that maps deleted files to their temporary copy
within the context block.
"""
# Find all the files or directories that match
glob_matches = [glob.glob(x) for x in files_or_dirs]
# Sort them so that shorter paths like "/foo/bar" come before
# nested paths like "/foo/bar/baz.yaml". This simplifies the
# handling of temporary copies below
sorted_matches = sorted([
os.path.abspath(x) for x in itertools.chain(*glob_matches)
], key=len)
# Copy files and directories in a temporary location
removed, dst_root = {}, tempfile.mkdtemp()
try:
for id, file_or_dir in enumerate(sorted_matches):
# The glob expression at the top ensures that the file/dir exists
# at the time we enter the loop. Double check here since it might
# happen that a previous iteration of the loop already removed it.
# This is the case, for instance, if we remove the directory
# "/foo/bar" before the file "/foo/bar/baz.yaml".
if not os.path.exists(file_or_dir):
continue
# The monotonic ID is a simple way to make the filename
# or directory name unique in the temporary folder
basename = os.path.basename(file_or_dir) + '-{0}'.format(id)
temporary_path = os.path.join(dst_root, basename)
shutil.move(file_or_dir, temporary_path)
removed[file_or_dir] = temporary_path
yield removed
except BaseException:
# Restore the files that were removed
for original_path, temporary_path in removed.items():
shutil.move(temporary_path, original_path)
raise
def fix_darwin_install_name(path):
"""Fix install name of dynamic libraries on Darwin to have full path.

View file

@ -1514,13 +1514,26 @@ def write(self, regenerate_views=True):
# write the lock file last
with fs.write_tmp_and_move(self.lock_path) as f:
sjson.dump(self._to_lockfile_dict(), stream=f)
self._update_and_write_manifest(raw_yaml_dict, yaml_dict)
else:
if os.path.exists(self.lock_path):
os.unlink(self.lock_path)
with fs.safe_remove(self.lock_path):
self._update_and_write_manifest(raw_yaml_dict, yaml_dict)
# TODO: rethink where this needs to happen along with
# writing. For some of the commands (like install, which write
# concrete specs AND regen) this might as well be a separate
# call. But, having it here makes the views consistent witht the
# concretized environment for most operations. Which is the
# special case?
if regenerate_views:
self.regenerate_views()
def _update_and_write_manifest(self, raw_yaml_dict, yaml_dict):
"""Update YAML manifest for this environment based on changes to
spec lists and views and write it.
"""
# invalidate _repo cache
self._repo = None
# put any changes in the definitions in the YAML
for name, speclist in self.spec_lists.items():
if name == user_speclist_name:
@ -1547,14 +1560,12 @@ def write(self, regenerate_views=True):
for ayl in active_yaml_lists)]
list_for_new_specs = active_yaml_lists[0].setdefault(name, [])
list_for_new_specs[:] = list_for_new_specs + new_specs
# put the new user specs in the YAML.
# This can be done directly because there can't be multiple definitions
# nor when clauses for `specs` list.
yaml_spec_list = yaml_dict.setdefault(user_speclist_name,
[])
yaml_spec_list[:] = self.user_specs.yaml_list
# Construct YAML representation of view
default_name = default_view_name
if self.views and len(self.views) == 1 and default_name in self.views:
@ -1572,9 +1583,7 @@ def write(self, regenerate_views=True):
for name, view in self.views.items())
else:
view = False
yaml_dict['view'] = view
# Remove yaml sections that are shadowing defaults
# construct garbage path to ensure we don't find a manifest by accident
with fs.temp_cwd() as env_dir:
@ -1584,7 +1593,6 @@ def write(self, regenerate_views=True):
if yaml_dict[key] == config_dict(bare_env.yaml).get(key, None):
if key not in raw_yaml_dict:
del yaml_dict[key]
# if all that worked, write out the manifest file at the top level
# (we used to check whether the yaml had changed and not write it out
# if it hadn't. We can't do that anymore because it could be the only
@ -1598,15 +1606,6 @@ def write(self, regenerate_views=True):
with fs.write_tmp_and_move(self.manifest_path) as f:
_write_yaml(self.yaml, f)
# TODO: rethink where this needs to happen along with
# writing. For some of the commands (like install, which write
# concrete specs AND regen) this might as well be a separate
# call. But, having it here makes the views consistent witht the
# concretized environment for most operations. Which is the
# special case?
if regenerate_views:
self.regenerate_views()
def __enter__(self):
self._previous_active = _active_environment
activate(self)

View file

@ -2189,3 +2189,40 @@ def extract_build_hash(environment):
libelf_second_hash = extract_build_hash(e)
assert libelf_first_hash == libelf_second_hash
@pytest.mark.regression('18441')
def test_lockfile_not_deleted_on_write_error(tmpdir, monkeypatch):
raw_yaml = """
spack:
specs:
- dyninst
packages:
libelf:
externals:
- spec: libelf@0.8.13
prefix: /usr
"""
spack_yaml = tmpdir.join('spack.yaml')
spack_yaml.write(raw_yaml)
spack_lock = tmpdir.join('spack.lock')
# Concretize a first time and create a lockfile
with ev.Environment(str(tmpdir)):
concretize()
assert os.path.exists(str(spack_lock))
# If I run concretize again and there's an error during write,
# the spack.lock file shouldn't disappear from disk
def _write_helper_raise(self, x, y):
raise RuntimeError('some error')
monkeypatch.setattr(
ev.Environment, '_update_and_write_manifest', _write_helper_raise
)
with ev.Environment(str(tmpdir)) as e:
e.concretize(force=True)
with pytest.raises(RuntimeError):
e.clear()
e.write()
assert os.path.exists(str(spack_lock))

View file

@ -4,13 +4,13 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""Tests for ``llnl/util/filesystem.py``"""
import pytest
import os
import shutil
import stat
import sys
import pytest
import llnl.util.filesystem as fs
import spack.paths
@ -484,3 +484,107 @@ def test_filter_files_multiple(tmpdir):
assert '<malloc.h>' not in f.read()
assert '<string.h>' not in f.read()
assert '<stdio.h>' not in f.read()
# Each test input is a tuple of entries which prescribe
# - the 'subdirs' to be created from tmpdir
# - the 'files' in that directory
# - what is to be removed
@pytest.mark.parametrize('files_or_dirs', [
# Remove a file over the two that are present
[{'subdirs': None,
'files': ['spack.lock', 'spack.yaml'],
'remove': ['spack.lock']}],
# Remove the entire directory where two files are stored
[{'subdirs': 'myenv',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['myenv']}],
# Combine a mix of directories and files
[{'subdirs': None,
'files': ['spack.lock', 'spack.yaml'],
'remove': ['spack.lock']},
{'subdirs': 'myenv',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['myenv']}],
# Multiple subdirectories, remove root
[{'subdirs': 'work/myenv1',
'files': ['spack.lock', 'spack.yaml'],
'remove': []},
{'subdirs': 'work/myenv2',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['work']}],
# Multiple subdirectories, remove each one
[{'subdirs': 'work/myenv1',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['work/myenv1']},
{'subdirs': 'work/myenv2',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['work/myenv2']}],
# Remove files with the same name in different directories
[{'subdirs': 'work/myenv1',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['work/myenv1/spack.lock']},
{'subdirs': 'work/myenv2',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['work/myenv2/spack.lock']}],
# Remove first the directory, then a file within the directory
[{'subdirs': 'myenv',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['myenv', 'myenv/spack.lock']}],
# Remove first a file within a directory, then the directory
[{'subdirs': 'myenv',
'files': ['spack.lock', 'spack.yaml'],
'remove': ['myenv/spack.lock', 'myenv']}],
])
@pytest.mark.regression('18441')
def test_safe_remove(files_or_dirs, tmpdir):
# Create a fake directory structure as prescribed by test input
to_be_removed, to_be_checked = [], []
for entry in files_or_dirs:
# Create relative dir
subdirs = entry['subdirs']
dir = tmpdir if not subdirs else tmpdir.ensure(
*subdirs.split('/'), dir=True
)
# Create files in the directory
files = entry['files']
for f in files:
abspath = str(dir.join(f))
to_be_checked.append(abspath)
fs.touch(abspath)
# List of things to be removed
for r in entry['remove']:
to_be_removed.append(str(tmpdir.join(r)))
# Assert that files are deleted in the context block,
# mock a failure by raising an exception
with pytest.raises(RuntimeError):
with fs.safe_remove(*to_be_removed):
for entry in to_be_removed:
assert not os.path.exists(entry)
raise RuntimeError('Mock a failure')
# Assert that files are restored
for entry in to_be_checked:
assert os.path.exists(entry)
@pytest.mark.regression('18441')
def test_content_of_files_with_same_name(tmpdir):
# Create two subdirectories containing a file with the same name,
# differentiate the files by their content
file1 = tmpdir.ensure('myenv1/spack.lock')
file2 = tmpdir.ensure('myenv2/spack.lock')
file1.write('file1'), file2.write('file2')
# Use 'safe_remove' to remove the two files
with pytest.raises(RuntimeError):
with fs.safe_remove(str(file1), str(file2)):
raise RuntimeError('Mock a failure')
# Check both files have been restored correctly
# and have not been mixed
assert file1.read().strip() == 'file1'
assert file2.read().strip() == 'file2'