Environments: Avoid inconsistent state on failed write (#18538)

Fixes #18441 When writing an environment, there are cases where the lock file for the environment may be removed. In this case there was a period between removing the lock file and writing the new manifest file where an exception could leave the manifest in its old state (in which case the lock and manifest would be out of sync). This adds a context manager which is used to restore the prior lock file state in cases where the manifest file cannot be written.
2020-09-11 19:57:29 +02:00 · 2020-09-11 19:57:29 +02:00 · 8ad2cc2acf
commit 8ad2cc2acf
parent e7040467f2
4 changed files with 206 additions and 19 deletions
--- a/lib/spack/llnl/util/filesystem.py
+++ b/lib/spack/llnl/util/filesystem.py
@ -974,6 +974,53 @@ def remove_linked_tree(path):
            shutil.rmtree(path, True)


+@contextmanager
+def safe_remove(*files_or_dirs):
+    """Context manager to remove the files passed as input, but restore
+    them in case any exception is raised in the context block.
+
+    Args:
+        *files_or_dirs: glob expressions for files or directories
+            to be removed
+
+    Returns:
+        Dictionary that maps deleted files to their temporary copy
+        within the context block.
+    """
+    # Find all the files or directories that match
+    glob_matches = [glob.glob(x) for x in files_or_dirs]
+    # Sort them so that shorter paths like "/foo/bar" come before
+    # nested paths like "/foo/bar/baz.yaml". This simplifies the
+    # handling of temporary copies below
+    sorted_matches = sorted([
+        os.path.abspath(x) for x in itertools.chain(*glob_matches)
+    ], key=len)
+
+    # Copy files and directories in a temporary location
+    removed, dst_root = {}, tempfile.mkdtemp()
+    try:
+        for id, file_or_dir in enumerate(sorted_matches):
+            # The glob expression at the top ensures that the file/dir exists
+            # at the time we enter the loop. Double check here since it might
+            # happen that a previous iteration of the loop already removed it.
+            # This is the case, for instance, if we remove the directory
+            # "/foo/bar" before the file "/foo/bar/baz.yaml".
+            if not os.path.exists(file_or_dir):
+                continue
+            # The monotonic ID is a simple way to make the filename
+            # or directory name unique in the temporary folder
+            basename = os.path.basename(file_or_dir) + '-{0}'.format(id)
+            temporary_path = os.path.join(dst_root, basename)
+            shutil.move(file_or_dir, temporary_path)
+            removed[file_or_dir] = temporary_path
+        yield removed
+    except BaseException:
+        # Restore the files that were removed
+        for original_path, temporary_path in removed.items():
+            shutil.move(temporary_path, original_path)
+        raise
+
+
 def fix_darwin_install_name(path):
    """Fix install name of dynamic libraries on Darwin to have full path.

--- a/lib/spack/spack/environment.py
+++ b/lib/spack/spack/environment.py
@ -1514,13 +1514,26 @@ def write(self, regenerate_views=True):
            # write the lock file last
            with fs.write_tmp_and_move(self.lock_path) as f:
                sjson.dump(self._to_lockfile_dict(), stream=f)
+            self._update_and_write_manifest(raw_yaml_dict, yaml_dict)
        else:
-            if os.path.exists(self.lock_path):
-                os.unlink(self.lock_path)
+            with fs.safe_remove(self.lock_path):
+                self._update_and_write_manifest(raw_yaml_dict, yaml_dict)

+        # TODO: rethink where this needs to happen along with
+        # writing. For some of the commands (like install, which write
+        # concrete specs AND regen) this might as well be a separate
+        # call.  But, having it here makes the views consistent witht the
+        # concretized environment for most operations.  Which is the
+        # special case?
+        if regenerate_views:
+            self.regenerate_views()
+
+    def _update_and_write_manifest(self, raw_yaml_dict, yaml_dict):
+        """Update YAML manifest for this environment based on changes to
+        spec lists and views and write it.
+        """
        # invalidate _repo cache
        self._repo = None
-
        # put any changes in the definitions in the YAML
        for name, speclist in self.spec_lists.items():
            if name == user_speclist_name:
@ -1547,14 +1560,12 @@ def write(self, regenerate_views=True):
                                 for ayl in active_yaml_lists)]
            list_for_new_specs = active_yaml_lists[0].setdefault(name, [])
            list_for_new_specs[:] = list_for_new_specs + new_specs
-
        # put the new user specs in the YAML.
        # This can be done directly because there can't be multiple definitions
        # nor when clauses for `specs` list.
        yaml_spec_list = yaml_dict.setdefault(user_speclist_name,
                                              [])
        yaml_spec_list[:] = self.user_specs.yaml_list
-
        # Construct YAML representation of view
        default_name = default_view_name
        if self.views and len(self.views) == 1 and default_name in self.views:
@ -1572,9 +1583,7 @@ def write(self, regenerate_views=True):
                        for name, view in self.views.items())
        else:
            view = False
-
        yaml_dict['view'] = view
-
        # Remove yaml sections that are shadowing defaults
        # construct garbage path to ensure we don't find a manifest by accident
        with fs.temp_cwd() as env_dir:
@ -1584,7 +1593,6 @@ def write(self, regenerate_views=True):
                if yaml_dict[key] == config_dict(bare_env.yaml).get(key, None):
                    if key not in raw_yaml_dict:
                        del yaml_dict[key]
-
        # if all that worked, write out the manifest file at the top level
        # (we used to check whether the yaml had changed and not write it out
        # if it hadn't. We can't do that anymore because it could be the only
@ -1598,15 +1606,6 @@ def write(self, regenerate_views=True):
            with fs.write_tmp_and_move(self.manifest_path) as f:
                _write_yaml(self.yaml, f)

-        # TODO: rethink where this needs to happen along with
-        # writing. For some of the commands (like install, which write
-        # concrete specs AND regen) this might as well be a separate
-        # call.  But, having it here makes the views consistent witht the
-        # concretized environment for most operations.  Which is the
-        # special case?
-        if regenerate_views:
-            self.regenerate_views()
-
    def __enter__(self):
        self._previous_active = _active_environment
        activate(self)
--- a/lib/spack/spack/test/cmd/env.py
+++ b/lib/spack/spack/test/cmd/env.py
@ -2189,3 +2189,40 @@ def extract_build_hash(environment):
        libelf_second_hash = extract_build_hash(e)

    assert libelf_first_hash == libelf_second_hash
+
+
+@pytest.mark.regression('18441')
+def test_lockfile_not_deleted_on_write_error(tmpdir, monkeypatch):
+    raw_yaml = """
+spack:
+  specs:
+  - dyninst
+  packages:
+    libelf:
+      externals:
+      - spec: libelf@0.8.13
+        prefix: /usr
+"""
+    spack_yaml = tmpdir.join('spack.yaml')
+    spack_yaml.write(raw_yaml)
+    spack_lock = tmpdir.join('spack.lock')
+
+    # Concretize a first time and create a lockfile
+    with ev.Environment(str(tmpdir)):
+        concretize()
+    assert os.path.exists(str(spack_lock))
+
+    # If I run concretize again and there's an error during write,
+    # the spack.lock file shouldn't disappear from disk
+    def _write_helper_raise(self, x, y):
+        raise RuntimeError('some error')
+
+    monkeypatch.setattr(
+        ev.Environment, '_update_and_write_manifest', _write_helper_raise
+    )
+    with ev.Environment(str(tmpdir)) as e:
+        e.concretize(force=True)
+        with pytest.raises(RuntimeError):
+            e.clear()
+            e.write()
+    assert os.path.exists(str(spack_lock))
--- a/lib/spack/spack/test/llnl/util/filesystem.py
+++ b/lib/spack/spack/test/llnl/util/filesystem.py
@ -4,13 +4,13 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)

 """Tests for ``llnl/util/filesystem.py``"""
-
-import pytest
 import os
 import shutil
 import stat
 import sys

+import pytest
+
 import llnl.util.filesystem as fs
 import spack.paths

@ -484,3 +484,107 @@ def test_filter_files_multiple(tmpdir):
        assert '<malloc.h>' not in f.read()
        assert '<string.h>' not in f.read()
        assert '<stdio.h>' not in f.read()
+
+
+# Each test input is a tuple of entries which prescribe
+# - the 'subdirs' to be created from tmpdir
+# - the 'files' in that directory
+# - what is to be removed
+@pytest.mark.parametrize('files_or_dirs', [
+    # Remove a file over the two that are present
+    [{'subdirs': None,
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['spack.lock']}],
+    # Remove the entire directory where two files are stored
+    [{'subdirs': 'myenv',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['myenv']}],
+    # Combine a mix of directories and files
+    [{'subdirs': None,
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['spack.lock']},
+     {'subdirs': 'myenv',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['myenv']}],
+    # Multiple subdirectories, remove root
+    [{'subdirs': 'work/myenv1',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': []},
+     {'subdirs': 'work/myenv2',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['work']}],
+    # Multiple subdirectories, remove each one
+    [{'subdirs': 'work/myenv1',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['work/myenv1']},
+     {'subdirs': 'work/myenv2',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['work/myenv2']}],
+    # Remove files with the same name in different directories
+    [{'subdirs': 'work/myenv1',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['work/myenv1/spack.lock']},
+     {'subdirs': 'work/myenv2',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['work/myenv2/spack.lock']}],
+    # Remove first the directory, then a file within the directory
+    [{'subdirs': 'myenv',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['myenv', 'myenv/spack.lock']}],
+    # Remove first a file within a directory, then the directory
+    [{'subdirs': 'myenv',
+      'files': ['spack.lock', 'spack.yaml'],
+      'remove': ['myenv/spack.lock', 'myenv']}],
+])
+@pytest.mark.regression('18441')
+def test_safe_remove(files_or_dirs, tmpdir):
+    # Create a fake directory structure as prescribed by test input
+    to_be_removed, to_be_checked = [], []
+    for entry in files_or_dirs:
+        # Create relative dir
+        subdirs = entry['subdirs']
+        dir = tmpdir if not subdirs else tmpdir.ensure(
+            *subdirs.split('/'), dir=True
+        )
+
+        # Create files in the directory
+        files = entry['files']
+        for f in files:
+            abspath = str(dir.join(f))
+            to_be_checked.append(abspath)
+            fs.touch(abspath)
+
+        # List of things to be removed
+        for r in entry['remove']:
+            to_be_removed.append(str(tmpdir.join(r)))
+
+    # Assert that files are deleted in the context block,
+    # mock a failure by raising an exception
+    with pytest.raises(RuntimeError):
+        with fs.safe_remove(*to_be_removed):
+            for entry in to_be_removed:
+                assert not os.path.exists(entry)
+            raise RuntimeError('Mock a failure')
+
+    # Assert that files are restored
+    for entry in to_be_checked:
+        assert os.path.exists(entry)
+
+
+@pytest.mark.regression('18441')
+def test_content_of_files_with_same_name(tmpdir):
+    # Create two subdirectories containing a file with the same name,
+    # differentiate the files by their content
+    file1 = tmpdir.ensure('myenv1/spack.lock')
+    file2 = tmpdir.ensure('myenv2/spack.lock')
+    file1.write('file1'), file2.write('file2')
+
+    # Use 'safe_remove' to remove the two files
+    with pytest.raises(RuntimeError):
+        with fs.safe_remove(str(file1), str(file2)):
+            raise RuntimeError('Mock a failure')
+
+    # Check both files have been restored correctly
+    # and have not been mixed
+    assert file1.read().strip() == 'file1'
+    assert file2.read().strip() == 'file2'